From e48b75c261795f93447804d897f8a2076379e37c Mon Sep 17 00:00:00 2001 From: evazion Date: Sat, 17 Feb 2018 11:02:48 -0600 Subject: [PATCH 1/3] downloads: rewrite url in `download!`, not `http_get_streaming`. Refactor Downloads::File#http_get_streaming to just download the given url, not rewrite it. Don't clobber @source or @data in `#size` either. --- app/logical/downloads/file.rb | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/app/logical/downloads/file.rb b/app/logical/downloads/file.rb index 9699b0942..28372e8be 100644 --- a/app/logical/downloads/file.rb +++ b/app/logical/downloads/file.rb @@ -26,20 +26,23 @@ module Downloads end def size - @source, headers, @data = before_download(@source, @data) + url, headers, _ = before_download(@source, @data) options = { timeout: 3, headers: headers }.deep_merge(Danbooru.config.httparty_options) - res = HTTParty.head(@source, options) + res = HTTParty.head(url, options) res.content_length end def download! + url, headers, @data = before_download(@source, @data) + ::File.open(@file_path, "wb") do |out| - @source, @data = http_get_streaming(@source, @data) do |response| + http_get_streaming(url, headers) do |response| out.write(response) end end - @downloaded_source = @source - @source = after_download(@source) + + @downloaded_source = url + @source = after_download(url) end def before_download(url, datums) @@ -67,7 +70,7 @@ module Downloads end end - def http_get_streaming(src, datums = {}, options = {}, &block) + def http_get_streaming(src, headers = {}, options = {}, &block) max_size = options[:max_size] || Danbooru.config.max_file_size max_size = nil if max_size == 0 # unlimited limit = 4 @@ -79,9 +82,6 @@ module Downloads raise Error.new("URL must be HTTP or HTTPS") end - src, headers, datums = before_download(src, datums) - url = URI.parse(src) - validate_local_hosts(url) begin @@ -96,7 +96,7 @@ module Downloads @content_type = res["Content-Type"] - return [src, datums] + return else raise Error.new("HTTP error code: #{res.code} #{res.message}") end @@ -109,8 +109,6 @@ module Downloads end end end # while - - [src, datums] end # def def fix_twitter_sources(src) From b859a1f714ba91bc2ab7b019a097685cd90d285a Mon Sep 17 00:00:00 2001 From: evazion Date: Sat, 24 Feb 2018 13:21:08 -0600 Subject: [PATCH 2/3] downloads: add tests for untested sites. --- test/test_helpers/download_helper.rb | 2 +- test/unit/downloads/moebooru_test.rb | 14 +++++++++ test/unit/downloads/nico_seiga_test.rb | 32 ++++++++++++++++++++ test/unit/downloads/nijie_test.rb | 22 ++++++++++++++ test/unit/downloads/pawoo_test.rb | 32 ++++++++++++++++++++ test/unit/downloads/twitter_test.rb | 41 ++++++++++++++++++++++++++ 6 files changed, 142 insertions(+), 1 deletion(-) create mode 100644 test/unit/downloads/moebooru_test.rb create mode 100644 test/unit/downloads/nico_seiga_test.rb create mode 100644 test/unit/downloads/nijie_test.rb create mode 100644 test/unit/downloads/pawoo_test.rb create mode 100644 test/unit/downloads/twitter_test.rb diff --git a/test/test_helpers/download_helper.rb b/test/test_helpers/download_helper.rb index 95a843644..9097e7cea 100644 --- a/test/test_helpers/download_helper.rb +++ b/test/test_helpers/download_helper.rb @@ -15,7 +15,7 @@ module DownloadTestHelper download = Downloads::File.new(test_source, tempfile.path) rewritten_source, _, _ = download.before_download(test_source, {}) - assert_equal(expected_source, rewritten_source, "Tested source URL: #{test_source}") + assert_match(expected_source, rewritten_source, "Tested source URL: #{test_source}") end def assert_not_rewritten(source) diff --git a/test/unit/downloads/moebooru_test.rb b/test/unit/downloads/moebooru_test.rb new file mode 100644 index 000000000..2fa77eb68 --- /dev/null +++ b/test/unit/downloads/moebooru_test.rb @@ -0,0 +1,14 @@ +require "test_helper" + +module Downloads + class MoebooruTest < ActiveSupport::TestCase + context "downloading a 'https://yande.re/jpeg/:hash/:file.jpg' jpeg sample url" do + should "download the original file" do + @source = "https://yande.re/jpeg/2c6876ac2317fce617e3c5f1a642123b/yande.re%20292092%20hatsune_miku%20tid%20vocaloid.jpg" + @rewrite = "https://yande.re/image/2c6876ac2317fce617e3c5f1a642123b/yande.re%20292092%20hatsune_miku%20tid%20vocaloid.png" + assert_rewritten(@rewrite, @source) + assert_downloaded(1_050_117, @source) + end + end + end +end diff --git a/test/unit/downloads/nico_seiga_test.rb b/test/unit/downloads/nico_seiga_test.rb new file mode 100644 index 000000000..de00ed7bb --- /dev/null +++ b/test/unit/downloads/nico_seiga_test.rb @@ -0,0 +1,32 @@ +require "test_helper" + +module Downloads + class NicoSeigaTest < ActiveSupport::TestCase + context "downloading a 'http://seiga.nicovideo.jp/seiga/:id' url" do + should "download the original file" do + @source = "http://seiga.nicovideo.jp/seiga/im4937663" + @rewrite = %r!http://lohas.nicoseiga.jp/priv/\h{40}/\d+/4937663! + assert_rewritten(@rewrite, @source) + assert_downloaded(2032, @source) + end + end + + context "downloading a 'http://lohas.nicoseiga.jp/o/:hash/:id' url" do + should "download the original file" do + @source = "http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663" + @rewrite = %r!http://lohas.nicoseiga.jp/priv/\h{40}/\d+/4937663! + assert_rewritten(@rewrite, @source) + assert_downloaded(2032, @source) + end + end + + context "downloading a 'https://lohas.nicoseiga.jp/thumb/:id' url" do + should "download the original file" do + @source = "https://lohas.nicoseiga.jp/thumb/4937663i" + @rewrite = %r!http://lohas.nicoseiga.jp/priv/\h{40}/\d+/4937663! + assert_rewritten(@rewrite, @source) + assert_downloaded(2032, @source) + end + end + end +end diff --git a/test/unit/downloads/nijie_test.rb b/test/unit/downloads/nijie_test.rb new file mode 100644 index 000000000..7c7b13c98 --- /dev/null +++ b/test/unit/downloads/nijie_test.rb @@ -0,0 +1,22 @@ +require "test_helper" + +module Downloads + class NijieTest < ActiveSupport::TestCase + context "downloading a 'http://nijie.info/view.php?id=:id' url" do + should "download the original file" do + @source = "http://nijie.info/view.php?id=213043" + @rewrite = "https://pic03.nijie.info/nijie_picture/728995_20170505014820_0.jpg" + assert_rewritten(@rewrite, @source) + assert_downloaded(132_555, @source) + end + end + + context "downloading a 'https://pic*.nijie.info/nijie_picture/:id.jpg' url" do + should "download the original file" do + @source = "https://pic03.nijie.info/nijie_picture/728995_20170505014820_0.jpg" + assert_not_rewritten(@source) + assert_downloaded(132_555, @source) + end + end + end +end diff --git a/test/unit/downloads/pawoo_test.rb b/test/unit/downloads/pawoo_test.rb new file mode 100644 index 000000000..4340e2e04 --- /dev/null +++ b/test/unit/downloads/pawoo_test.rb @@ -0,0 +1,32 @@ +require "test_helper" + +module Downloads + class PawooTest < ActiveSupport::TestCase + context "downloading a 'https://pawoo.net/web/statuses/:id' url" do + should "download the original file" do + @source = "https://pawoo.net/web/statuses/1202176" + @rewrite = "https://img.pawoo.net/media_attachments/files/000/128/953/original/4c0a06087b03343f.png" + assert_rewritten(@rewrite, @source) + assert_downloaded(7680, @source) + end + end + + context "downloading a 'https://pawoo.net/:user/:id' url" do + should "download the original file" do + @source = "https://pawoo.net/@9ed00e924818/1202176" + @rewrite = "https://img.pawoo.net/media_attachments/files/000/128/953/original/4c0a06087b03343f.png" + assert_rewritten(@rewrite, @source) + assert_downloaded(7680, @source) + end + end + + context "downloading a 'https://img.pawoo.net/media_attachments/:id/small/:file' url" do + should "download the original file" do + @source = "https://img.pawoo.net/media_attachments/files/000/128/953/small/4c0a06087b03343f.png" + @rewrite = "https://img.pawoo.net/media_attachments/files/000/128/953/original/4c0a06087b03343f.png" + assert_rewritten(@rewrite, @source) + assert_downloaded(7680, @source) + end + end + end +end diff --git a/test/unit/downloads/twitter_test.rb b/test/unit/downloads/twitter_test.rb new file mode 100644 index 000000000..a7f1e7bf7 --- /dev/null +++ b/test/unit/downloads/twitter_test.rb @@ -0,0 +1,41 @@ +require "test_helper" + +module Downloads + class TwitterTest < ActiveSupport::TestCase + context "downloading a 'https://twitter.com/:user/status/:id' url containing a video" do + should "download the largest video" do + @source = "https://twitter.com/CincinnatiZoo/status/859073537713328129" + @rewrite = "https://video.twimg.com/ext_tw_video/859073467769126913/pu/vid/1280x720/cPGgVROXHy3yrK6u.mp4" + assert_rewritten(@rewrite, @source) + assert_downloaded(8_602_983, @source) + end + end + + context "downloading a 'https://twitter.com/:user/status/:id/photo/:n' card url" do + should "download the orig file" do + @source = "https://twitter.com/masayasuf/status/870734961778630656/photo/1" + @rewrite = "https://pbs.twimg.com/media/DBV40M2UIAAHYlt.jpg:orig" + assert_rewritten(@rewrite, @source) + assert_downloaded(788_206, @source) + end + end + + context "downloading a 'https://mobile.twitter.com/:user/status/:id/photo/:n' mobile url" do + should "download the orig file" do + @source = "https://mobile.twitter.com/Strangestone/status/556440271961858051" + @rewrite = "https://pbs.twimg.com/media/B7jfc1JCcAEyeJh.png:orig" + assert_rewritten(@rewrite, @source) + assert_downloaded(280_150, @source) + end + end + + context "downloading a 'https://pbs.twimg.com/media/*:large' url" do + should "download the orig file" do + @source = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:large" + @rewrite = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:orig" + assert_rewritten(@rewrite, @source) + assert_downloaded(9800, @source) + end + end + end +end From c9eee7e4d4cd20d139caa59254ba85195dcec8a8 Mon Sep 17 00:00:00 2001 From: evazion Date: Sat, 17 Feb 2018 11:43:27 -0600 Subject: [PATCH 3/3] Fix #3528: Prevent CloudFlare from altering images. --- app/logical/downloads/file.rb | 24 +++++++++++++++++++++++- test/unit/downloads/art_station_test.rb | 7 +++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/app/logical/downloads/file.rb b/app/logical/downloads/file.rb index 28372e8be..68d8faafd 100644 --- a/app/logical/downloads/file.rb +++ b/app/logical/downloads/file.rb @@ -36,7 +36,7 @@ module Downloads url, headers, @data = before_download(@source, @data) ::File.open(@file_path, "wb") do |out| - http_get_streaming(url, headers) do |response| + http_get_streaming(uncached_url(url, headers), headers) do |response| out.write(response) end end @@ -133,5 +133,27 @@ module Downloads src end end + + private + + # Prevent Cloudflare from potentially mangling the image. See issue #3528. + def uncached_url(url, headers = {}) + url = Addressable::URI.parse(url) + + if is_cloudflare?(url, headers) + url.query_values = (url.query_values || {}).merge(danbooru_no_cache: SecureRandom.uuid) + end + + url + end + + def is_cloudflare?(url, headers = {}) + Cache.get("is_cloudflare:#{url.origin}", 4.hours) do + res = HTTParty.head(url, { headers: headers }.deep_merge(Danbooru.config.httparty_options)) + raise Error.new("HTTP error code: #{res.code} #{res.message}") unless res.success? + + res.key?("CF-Ray") + end + end end end diff --git a/test/unit/downloads/art_station_test.rb b/test/unit/downloads/art_station_test.rb index a7db904d5..077007b8d 100644 --- a/test/unit/downloads/art_station_test.rb +++ b/test/unit/downloads/art_station_test.rb @@ -28,6 +28,13 @@ module Downloads end end + context "a download for an ArtStation image hosted on CloudFlare" do + should "return the original file, not the polished file" do + @source = "https://cdnb.artstation.com/p/assets/images/images/003/716/071/large/aoi-ogata-hate-city.jpg?1476754974" + assert_downloaded(517_706, @source) # polished size: 502_052 + end + end + context "a download for a https://$artist.artstation.com/projects/$id page" do setup do @source = "https://dantewontdie.artstation.com/projects/YZK5q"