diff --git a/app/logical/sources/strategies/tumblr.rb b/app/logical/sources/strategies/tumblr.rb index 49f519c43..411a9a9e3 100644 --- a/app/logical/sources/strategies/tumblr.rb +++ b/app/logical/sources/strategies/tumblr.rb @@ -1,12 +1,13 @@ module Sources::Strategies class Tumblr < Base + SIZES = %w[1280 640 540 500h 500 400 250 100] + BASE_URL = %r!\Ahttps?://(?:[^/]+\.)*tumblr\.com!i DOMAIN = %r{(data|(\d+\.)?media)\.tumblr\.com} MD5 = %r{(?[0-9a-f]{32})}i FILENAME = %r{(?(tumblr_(inline_)?)?[a-z0-9]+(_r[0-9]+)?)}i - SIZES = %r{(?:250|400|500|500h|540|1280|raw)}i EXT = %r{(?\w+)} - IMAGE = %r!\Ahttps?://#{DOMAIN}/(?#{MD5}/)?#{FILENAME}_#{SIZES}\.#{EXT}\z!i + IMAGE = %r!\Ahttps?://#{DOMAIN}/(?#{MD5}/)?#{FILENAME}_(?\w+)\.#{EXT}\z!i POST = %r!\Ahttps?://(?[^.]+)\.tumblr\.com/(?:post|image)/(?\d+)!i def self.enabled? @@ -132,19 +133,16 @@ module Sources::Strategies # # http://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg # => https://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg - def find_largest(x) - if x =~ IMAGE - sizes = [1280, 640, 540, "500h", 500, 400, 250] - candidates = sizes.map do |size| - "https://media.tumblr.com/#{$~[:dir]}#{$~[:filename]}_#{size}.#{$~[:ext]}" - end + def find_largest(url, sizes: SIZES) + return url unless url =~ IMAGE - return candidates.find do |candidate| - http_exists?(candidate, headers) - end + candidates = sizes.map do |size| + "https://media.tumblr.com/#{$~[:dir]}#{$~[:filename]}_#{size}.#{$~[:ext]}" end - return x + candidates.find do |candidate| + http_exists?(candidate, headers) + end end def inline_images diff --git a/test/unit/sources/tumblr_test.rb b/test/unit/sources/tumblr_test.rb index 356270ec5..a9873e0c0 100644 --- a/test/unit/sources/tumblr_test.rb +++ b/test/unit/sources/tumblr_test.rb @@ -209,5 +209,19 @@ module Sources assert_operator(data, :<, site.to_h) end end + + context "A download for a 'http://*.media.tumblr.com/$hash/tumblr_$id_$size.png' image" do + should "find the largest image" do + %w[100 250 400 500 500h 540 640 1280].each do |size| + page = "https://natsuki-teru.tumblr.com/post/178728919271" + image = "https://66.media.tumblr.com/b9395771b2d0435fe4efee926a5a7d9c/tumblr_pg2wu1L9DM1trd056o2_#{size}.png" + full = "https://media.tumblr.com/b9395771b2d0435fe4efee926a5a7d9c/tumblr_pg2wu1L9DM1trd056o2_1280.png" + site = Sources::Strategies.find(image, page) + + assert_equal(full, site.image_url) + assert_equal(full, site.image_urls.second) + end + end + end end end