diff --git a/app/logical/downloads/rewrite_strategies/tumblr.rb b/app/logical/downloads/rewrite_strategies/tumblr.rb index 9771fc7be..11a462df0 100644 --- a/app/logical/downloads/rewrite_strategies/tumblr.rb +++ b/app/logical/downloads/rewrite_strategies/tumblr.rb @@ -1,45 +1,60 @@ module Downloads module RewriteStrategies + DOMAIN = '(data|(\d+\.)?media)\.tumblr\.com' + MD5 = '(?[0-9a-f]{32})' + FILENAME = '(?(tumblr_(inline_)?)?[a-z0-9]+(_r[0-9]+)?)' + SIZES = '(250|400|500|500h|540|1280|raw)' + EXT = '(?\w+)' + class Tumblr < Base def rewrite(url, headers, data = {}) - if url =~ %r{^https?://.*tumblr\.com} - url, headers = rewrite_cdn(url, headers) - url, headers = rewrite_thumbnails(url, headers) - end + url = rewrite_cdn(url) + url = rewrite_samples(url, headers) return [url, headers, data] end protected - def rewrite_thumbnails(url, headers) - if url =~ %r{^https?://.+\.tumblr\.com/(?:\w+/)?(?:tumblr_)?(\w+_)(\d+)(\..+)$} - match = $1 - given_size = $2 - file_ext = $3 + # Look for the biggest available version on data.tumblr.com. A bigger + # version may or may not exist. + # + # http://40.media.tumblr.com/d8c6d49785c0842ee31ff26c010b7445/tumblr_naypopLln51tkufhoo2_500h.png + # => http://data.tumblr.com/d8c6d49785c0842ee31ff26c010b7445/tumblr_naypopLln51tkufhoo2_raw.png + # + # https://40.media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_1280.jpg + # => http://data.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_raw.jpg + # + # https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif + # => http://data.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_500.gif + # + # https://25.media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png + # => http://data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png + # + # http://data.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_400.jpg + # => http://data.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_500.jpg + # + # http://data.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg + # => http://data.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg + def rewrite_samples(url, headers) + if url =~ %r!\Ahttps?://#{DOMAIN}/(?#{MD5}/)?#{FILENAME}_#{SIZES}\.#{EXT}\z!i + sizes = ["raw", 1280, 540, 500, 400, 250] + candidates = sizes.map do |size| + "http://data.tumblr.com/#{$~[:dir]}#{$~[:filename]}_#{size}.#{$~[:ext]}" + end - big_1280_url = url.sub(match + given_size, match + "1280") - if file_ext == ".gif" - res = http_head_request(big_1280_url, headers) - # Sometimes the 1280 version of a gif is actually a static jpeg. We don't want that so we only use the 1280 version if it really is a gif. - if res.is_a?(Net::HTTPSuccess) && res["content-type"] == "image/gif" - return [big_1280_url, headers] - end - else - if http_exists?(big_1280_url, headers) - return [big_1280_url, headers] - end + url = candidates.find do |candidate| + http_exists?(candidate, headers) end end - return [url, headers] + url end - def rewrite_cdn(url, headers) - if url =~ %r{https?://gs1\.wac\.edgecastcdn\.net/8019B6/data\.tumblr\.com/} - url.sub!("gs1.wac.edgecastcdn.net/8019B6/", "") - end - - return [url, headers] + # https://gs1.wac.edgecastcdn.net/8019B6/data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png + # => http://data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png + def rewrite_cdn(url) + url.sub!(%r!\Ahttps?://gs1\.wac\.edgecastcdn\.net/8019B6/data\.tumblr\.com!i, "http://data.tumblr.com") + url end end end diff --git a/test/helpers/download_helper.rb b/test/helpers/download_helper.rb new file mode 100644 index 000000000..5a8efd928 --- /dev/null +++ b/test/helpers/download_helper.rb @@ -0,0 +1,24 @@ +module DownloadTestHelper + def assert_downloaded(expected_filesize, source) + tempfile = Tempfile.new("danbooru-test") + download = Downloads::File.new(source, tempfile.path) + + assert_nothing_raised(Downloads::File::Error) do + download.download! + end + + assert_equal(expected_filesize, tempfile.size, "Tested source URL: #{source}") + end + + def assert_rewritten(expected_source, test_source) + tempfile = Tempfile.new("danbooru-test") + download = Downloads::File.new(test_source, tempfile.path) + + rewritten_source, headers, _ = download.before_download(test_source, {}, {}) + assert_equal(expected_source, rewritten_source, "Tested source URL: #{test_source}") + end + + def assert_not_rewritten(source) + assert_rewritten(source, source) + end +end diff --git a/test/test_helper.rb b/test/test_helper.rb index 16d22d0eb..623090386 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -11,9 +11,9 @@ end require File.expand_path('../../config/environment', __FILE__) require 'rails/test_help' require 'cache' -require 'helpers/post_archive_test_helper' Dir[File.expand_path(File.dirname(__FILE__) + "/factories/*.rb")].each {|file| require file} +Dir[File.expand_path(File.dirname(__FILE__) + "/helpers/*.rb")].each {|file| require file} Shoulda::Matchers.configure do |config| config.integrate do |with| @@ -23,6 +23,13 @@ end class ActiveSupport::TestCase include PostArchiveTestHelper + include ReportbooruHelper + include DownloadTestHelper + + setup do + mock_popular_search_service! + mock_missed_search_service! + end teardown do Cache.clear @@ -49,13 +56,3 @@ end Delayed::Worker.delay_jobs = false TestAfterCommit.enabled = false - -require "helpers/reportbooru_helper" -class ActiveSupport::TestCase - include ReportbooruHelper - - setup do - mock_popular_search_service! - mock_missed_search_service! - end -end diff --git a/test/unit/downloads/pixiv_test.rb b/test/unit/downloads/pixiv_test.rb index ed2349975..17da0da7c 100644 --- a/test/unit/downloads/pixiv_test.rb +++ b/test/unit/downloads/pixiv_test.rb @@ -2,29 +2,6 @@ require 'test_helper' module Downloads class PixivTest < ActiveSupport::TestCase - def assert_downloaded(expected_filesize, source) - tempfile = Tempfile.new("danbooru-test") - download = Downloads::File.new(source, tempfile.path) - - assert_nothing_raised(Downloads::File::Error) do - download.download! - end - - assert_equal(expected_filesize, tempfile.size, "Tested source URL: #{source}") - end - - def assert_rewritten(expected_source, test_source) - tempfile = Tempfile.new("danbooru-test") - download = Downloads::File.new(test_source, tempfile.path) - - rewritten_source, headers, _ = download.before_download(test_source, {}, {}) - assert_equal(expected_source, rewritten_source, "Tested source URL: #{test_source}") - end - - def assert_not_rewritten(source) - assert_rewritten(source, source) - end - context "An ugoira site for pixiv" do setup do @tempfile = Tempfile.new("danbooru-test") diff --git a/test/unit/downloads/tumblr_test.rb b/test/unit/downloads/tumblr_test.rb index acfb77fb3..760800285 100644 --- a/test/unit/downloads/tumblr_test.rb +++ b/test/unit/downloads/tumblr_test.rb @@ -3,32 +3,75 @@ require 'test_helper' module Downloads class TumblrTest < ActiveSupport::TestCase context "a download for a tumblr 500 sample" do - setup do - @source = "http://24.media.tumblr.com/fc328250915434e66e8e6a92773f79d0/tumblr_mf4nshfibc1s0oswoo1_500.jpg" - @tempfile = Tempfile.new("danbooru-test") - @download = Downloads::File.new(@source, @tempfile.path) - @download.download! - end - - should "instead change the source to the 1280 version" do - assert_equal("http://24.media.tumblr.com/fc328250915434e66e8e6a92773f79d0/tumblr_mf4nshfibc1s0oswoo1_1280.jpg", @download.source) - end - - should "instead download the 1280 version" do - assert_equal(196_617, ::File.size(@tempfile.path)) + should "instead download the raw version" do + @source = "https://24.media.tumblr.com/fc328250915434e66e8e6a92773f79d0/tumblr_mf4nshfibc1s0oswoo1_500.jpg" + @rewrite = "http://data.tumblr.com/fc328250915434e66e8e6a92773f79d0/tumblr_mf4nshfibc1s0oswoo1_raw.jpg" + assert_rewritten(@rewrite, @source) + assert_downloaded(196_617, @source) end end - context "a download for a tumblr 500 image without a larger size" do - setup do - @source = "http://25.media.tumblr.com/tumblr_lxbzel2H5y1r9yjhso1_500.jpg" - @tempfile = Tempfile.new("danbooru-test") - @download = Downloads::File.new(@source, @tempfile.path) - @download.download! + context "a download for a *.media.tumblr.com/tumblr_$id_$size image without a larger size" do + should "download the same version" do + @source = "https://25.media.tumblr.com/tumblr_lxbzel2H5y1r9yjhso1_500.jpg" + @rewrite = "http://data.tumblr.com/tumblr_lxbzel2H5y1r9yjhso1_500.jpg" + assert_rewritten(@rewrite, @source) + assert_downloaded(90_122, @source) end + end - should "download the 500 version" do - assert_equal(90_122, ::File.size(@tempfile.path)) + context "a download for a *.media.tumblr.com/tumblr_$id_$size image with a larger size" do + should "download the best available version" do + @source = "https://25.media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png" + @rewrite = "http://data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png" + assert_rewritten(@rewrite, @source) + assert_downloaded(34_060, @source) + end + end + + context "a download for a *.media.tumblr.com/$hash/tumblr_$id_rN_$size image" do + should "download the best available version" do + @source = "https://33.media.tumblr.com/4b7fecf9a5a8284fbaefb051a2369b55/tumblr_npozqfwc9h1rt6u7do1_r1_500.gif" + @rewrite = "http://data.tumblr.com/4b7fecf9a5a8284fbaefb051a2369b55/tumblr_npozqfwc9h1rt6u7do1_r1_raw.gif" + assert_rewritten(@rewrite, @source) + assert_downloaded(1_234_017, @source) + end + end + + context "a download for a *.media.tumblr.com/$hash/tumblr_inline_$id_$size image" do + should "download the best available version" do + @source = "https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif" + @rewrite = "http://data.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_500.gif" + assert_rewritten(@rewrite, @source) + assert_downloaded(110_348, @source) + end + end + + context "a download for a data.tumblr.com/$id_$size image with a larger size" do + should "download the best available version" do + @source = "http://data.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_400.jpg" + @rewrite = "http://data.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_500.jpg" + assert_rewritten(@rewrite, @source) + assert_downloaded(153_885, @source) + end + end + + context "a download for a data.tumblr.com/tumblr_$id_$size.jpg image" do + should "download the best available version" do + @source = "http://data.tumblr.com/tumblr_m24kbxqKAX1rszquso1_250.jpg" + @rewrite = "http://data.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg" + assert_rewritten(@rewrite, @source) + assert_downloaded(296_399, @source) + end + end + + context "a download for a gs1.wac.edgecastcdn.net image" do + should "rewrite to the full tumblr version" do + @source = "https://gs1.wac.edgecastcdn.net/8019B6/data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png" + @rewrite = "http://data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png" + + assert_downloaded(34_060, @source) + assert_rewritten(@rewrite, @source) end end end