downloads: refactor Downloads::File into Danbooru::Http.

Remove the Downloads::File class. Move download methods to
Danbooru::Http instead. This means that:

* HTTParty has been replaced with http.rb for downloading files.

* Downloading is no longer tightly coupled to source strategies. Before
  Downloads::File tried to automatically look up the source and download
  the full size image instead if we gave it a sample url. Now we can
  do plain downloads without source strategies altering the url.

* The Cloudflare Polish check has been changed from checking for a
  Cloudflare IP to checking for the CF-Polished header. Looking up the
  list of Cloudflare IPs was slow and flaky during testing.

* The SSRF protection code has been factored out so it can be used for
  normal http requests, not just for downloads.

* The Webmock gem can be removed, since it was only used for stubbing
  out certain HTTParty requests in the download tests. The Webmock gem
  is buggy and caused certain tests to fail during CI.

* The retriable gem can be removed, since we no longer autoretry failed
  downloads. We assume that if a download fails once then retrying
  probably won't help.
This commit is contained in:
evazion
2020-06-15 04:41:42 -05:00
committed by evazion
parent 10b7a53449
commit 26ad844bbe
15 changed files with 173 additions and 285 deletions

View File

@@ -1,8 +1,8 @@
module DownloadTestHelper
def assert_downloaded(expected_filesize, source, referer = nil)
download = Downloads::File.new(source, referer)
tempfile, strategy = download.download!
assert_equal(expected_filesize, tempfile.size, "Tested source URL: #{source}")
strategy = Sources::Strategies.find(source, referer)
file = strategy.download_file!
assert_equal(expected_filesize, file.size, "Tested source URL: #{source}")
rescue Net::OpenTimeout
skip "Remote connection to #{source} failed"
end

View File

@@ -14,10 +14,4 @@ class CloudflareServiceTest < ActiveSupport::TestCase
assert_requested(:delete, "https://api.cloudflare.com/client/v4/zones/123/purge_cache", times: 1)
end
end
context "#ips" do
should "work" do
refute_empty(@cloudflare.ips)
end
end
end

View File

@@ -4,7 +4,7 @@ class DanbooruHttpTest < ActiveSupport::TestCase
context "Danbooru::Http" do
context "#get method" do
should "work for all basic methods" do
%i[get put post delete].each do |method|
%i[get head put post delete].each do |method|
response = Danbooru::Http.send(method, "https://httpbin.org/status/200")
assert_equal(200, response.status)
end
@@ -26,8 +26,10 @@ class DanbooruHttpTest < ActiveSupport::TestCase
end
should "fail if the request takes too long to download" do
response = Danbooru::Http.timeout(1).get("https://httpbin.org/drip?duration=5&numbytes=5")
assert_equal(599, response.status)
# XXX should return status 599 instead
assert_raises(HTTP::TimeoutError) do
response = Danbooru::Http.timeout(1).get("https://httpbin.org/drip?duration=10&numbytes=10").flush
end
end
should "automatically decompress gzipped responses" do
@@ -45,5 +47,45 @@ class DanbooruHttpTest < ActiveSupport::TestCase
assert_equal(response2.body, response1.body)
end
end
context "#download method" do
should "download files" do
response, file = Danbooru::Http.download_media("https://httpbin.org/bytes/1000")
assert_equal(200, response.status)
assert_equal(1000, file.size)
end
should "follow redirects when downloading files" do
response, file = Danbooru::Http.download_media("https://httpbin.org/redirect-to?url=https://httpbin.org/bytes/1000")
assert_equal(200, response.status)
assert_equal(1000, file.size)
end
should "fail if the url points to a private IP" do
assert_raises(Danbooru::Http::DownloadError) do
Danbooru::Http.public_only.download_media("https://127.0.0.1.xip.io")
end
end
should "fail if the url redirects to a private IP" do
assert_raises(Danbooru::Http::DownloadError) do
Danbooru::Http.public_only.download_media("https://httpbin.org/redirect-to?url=https://127.0.0.1.xip.io")
end
end
should "fail if a download is too large" do
assert_raises(Danbooru::Http::FileTooLargeError) do
response, file = Danbooru::Http.max_size(500).download_media("https://httpbin.org/bytes/1000")
end
end
should "fail if a streaming download is too large" do
assert_raises(Danbooru::Http::FileTooLargeError) do
response, file = Danbooru::Http.max_size(500).download_media("https://httpbin.org/stream-bytes/1000")
end
end
end
end
end

View File

@@ -3,53 +3,27 @@ require 'test_helper'
module Downloads
class ArtStationTest < ActiveSupport::TestCase
context "a download for a (small) artstation image" do
setup do
@asset = "https://cdnb3.artstation.com/p/assets/images/images/003/716/071/small/aoi-ogata-hate-city.jpg?1476754974"
@download = Downloads::File.new(@asset)
end
should "download the /4k/ image instead" do
file, strategy = @download.download!
assert_equal(1_880_910, ::File.size(file.path))
assert_downloaded(1_880_910, "https://cdnb3.artstation.com/p/assets/images/images/003/716/071/small/aoi-ogata-hate-city.jpg?1476754974")
end
end
context "for an image where an original does not exist" do
setup do
@asset = "https://cdna.artstation.com/p/assets/images/images/004/730/278/large/mendel-oh-dragonll.jpg"
@download = Downloads::File.new(@asset)
end
should "not try to download the original" do
file, strategy = @download.download!
assert_equal(483_192, ::File.size(file.path))
assert_downloaded(483_192, "https://cdna.artstation.com/p/assets/images/images/004/730/278/large/mendel-oh-dragonll.jpg")
end
end
context "a download for an ArtStation image hosted on CloudFlare" do
setup do
@asset = "https://cdnb.artstation.com/p/assets/images/images/003/716/071/large/aoi-ogata-hate-city.jpg?1476754974"
end
should "return the original file, not the polished file" do
@asset = "https://cdnb.artstation.com/p/assets/images/images/003/716/071/large/aoi-ogata-hate-city.jpg?1476754974"
assert_downloaded(1_880_910, @asset)
end
should "return the original filesize, not the polished filesize" do
assert_equal(1_880_910, Downloads::File.new(@asset).size)
end
end
context "a download for a https://$artist.artstation.com/projects/$id page" do
setup do
@source = "https://dantewontdie.artstation.com/projects/YZK5q"
@download = Downloads::File.new(@source)
end
should "download the original image instead" do
file, strategy = @download.download!
assert_equal(247_350, ::File.size(file.path))
assert_downloaded(247_350, "https://dantewontdie.artstation.com/projects/YZK5q")
end
end
end

View File

@@ -1,81 +0,0 @@
require 'test_helper'
module Downloads
class FileTest < ActiveSupport::TestCase
context "A post download" do
setup do
@source = "http://www.google.com/intl/en_ALL/images/logo.gif"
@download = Downloads::File.new(@source)
end
context "for a banned IP" do
setup do
Resolv.expects(:getaddress).returns("127.0.0.1").at_least_once
end
should "not try to download the file" do
assert_raise(Downloads::File::Error) { Downloads::File.new("http://evil.com").download! }
end
should "not try to fetch the size" do
assert_raise(Downloads::File::Error) { Downloads::File.new("http://evil.com").size }
end
should "not follow redirects to banned IPs" do
url = "http://httpbin.org/redirect-to?url=http://127.0.0.1"
stub_request(:get, url).to_return(status: 301, headers: { "Location": "http://127.0.0.1" })
assert_raise(Downloads::File::Error) { Downloads::File.new(url).download! }
end
should "not follow redirects that resolve to a banned IP" do
url = "http://httpbin.org/redirect-to?url=http://127.0.0.1.nip.io"
stub_request(:get, url).to_return(status: 301, headers: { "Location": "http://127.0.0.1.xip.io" })
assert_raise(Downloads::File::Error) { Downloads::File.new(url).download! }
end
end
context "that fails" do
should "retry three times before giving up" do
HTTParty.expects(:get).times(3).raises(Errno::ETIMEDOUT)
assert_raises(Errno::ETIMEDOUT) { @download.download! }
end
should "return an uncorrupted file on the second try" do
bomb = stub("bomb")
bomb.stubs(:code).raises(IOError)
resp = stub("resp", success?: true)
chunk = stub("a")
chunk.stubs(:code).returns(200)
chunk.stubs(:size).returns(1)
chunk.stubs(:to_s).returns("a")
HTTParty.expects(:get).twice.multiple_yields(chunk, bomb).then.multiple_yields(chunk, chunk).returns(resp)
@download.stubs(:is_cloudflare?).returns(false)
tempfile, _strategy = @download.download!
assert_equal("aa", tempfile.read)
end
end
should "throw an exception when the file is larger than the maximum" do
assert_raise(Downloads::File::Error) do
@download.download!(max_size: 1)
end
end
should "store the file in the tempfile path" do
tempfile, strategy = @download.download!
assert_operator(tempfile.size, :>, 0, "should have data")
end
should "correctly save the file when following 302 redirects" do
download = Downloads::File.new("https://yande.re/post/show/578014")
file, strategy = download.download!(url: download.preview_url)
assert_equal(19134, file.size)
end
end
end
end

View File

@@ -136,18 +136,14 @@ module Downloads
end
context "An ugoira site for pixiv" do
setup do
@download = Downloads::File.new("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364")
@tempfile, strategy = @download.download!
@tempfile.close!
end
should "capture the data" do
assert_equal(2, @download.data[:ugoira_frame_data].size)
if @download.data[:ugoira_frame_data][0]["file"]
@strategy = Sources::Strategies.find("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364")
assert_equal(2, @strategy.data[:ugoira_frame_data].size)
if @strategy.data[:ugoira_frame_data][0]["file"]
assert_equal([{"file" => "000000.jpg", "delay" => 125}, {"file" => "000001.jpg", "delay" => 125}], @download.data[:ugoira_frame_data])
else
assert_equal([{"delay_msec" => 125}, {"delay_msec" => 125}], @download.data[:ugoira_frame_data])
assert_equal([{"delay_msec" => 125}, {"delay_msec" => 125}], @strategy.data[:ugoira_frame_data])
end
end
end

View File

@@ -212,7 +212,7 @@ class UploadServiceTest < ActiveSupport::TestCase
context "on timeout errors" do
setup do
@source = "https://cdn.donmai.us/original/93/f4/93f4dd66ef1eb11a89e56d31f9adc8d0.jpg"
HTTParty.stubs(:get).raises(Net::ReadTimeout)
Danbooru::Http.any_instance.stubs(:get).raises(HTTP::TimeoutError)
end
should "leave the upload in an error state" do