From 561571921874878e0d420756e8c8f7735e83f194 Mon Sep 17 00:00:00 2001 From: nonamethanks Date: Fri, 14 Aug 2020 01:03:14 +0200 Subject: [PATCH] Tumblr: support highest res for new image urls --- app/logical/sources/strategies/tumblr.rb | 27 ++++++++++++++++++------ test/unit/sources/tumblr_test.rb | 13 ++++++------ 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/app/logical/sources/strategies/tumblr.rb b/app/logical/sources/strategies/tumblr.rb index e3440c910..311a5fffe 100644 --- a/app/logical/sources/strategies/tumblr.rb +++ b/app/logical/sources/strategies/tumblr.rb @@ -7,6 +7,8 @@ # https://66.media.tumblr.com/5a2c3fe25c977e2281392752ab971c90/3dbfaec9b9e0c2e3-92/s500x750/4f92bbaaf95c0b4e7970e62b1d2e1415859dd659.png # # https://superboin.tumblr.com/post/141169066579/photoset_iframe/superboin/tumblr_o45miiAOts1u6rxu8/500/false +# +# https://make-do5.tumblr.com/post/619663949657423872 (extremely high res, extractable) module Sources::Strategies class Tumblr < Base @@ -26,6 +28,11 @@ module Sources::Strategies VIDEO = %r{\Ahttps?://(?:vtt|ve|va\.media)\.tumblr\.com/}i POST = %r{\Ahttps?://(?[^.]+)\.tumblr\.com/(?:post|image)/(?\d+)}i + NEW_HEADERS = { + "user-agent": Danbooru.config.canonical_app_name, + "accept": "text/html" + } + def self.enabled? Danbooru.config.tumblr_consumer_key.present? end @@ -161,14 +168,22 @@ module Sources::Strategies # http://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg # => https://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg def find_largest(url, sizes: SIZES) - return url unless url =~ OLD_IMAGE + if url =~ OLD_IMAGE + candidates = sizes.map do |size| + "https://media.tumblr.com/#{$~[:dir]}#{$~[:filename]}_#{size}.#{$~[:ext]}" + end - candidates = sizes.map do |size| - "https://media.tumblr.com/#{$~[:dir]}#{$~[:filename]}_#{size}.#{$~[:ext]}" - end + candidates.find do |candidate| + http_exists?(candidate) + end + elsif url =~ %r{/s\d+x\d+/(\w+\.\w+)$}i + max_size = Integer.sqrt(Danbooru.config.max_image_resolution) + url = url.gsub(%r{/s\d+x\d+/\w+\.\w+$}i, "/s#{max_size}x#{max_size}/#{$1}") - candidates.find do |candidate| - http_exists?(candidate) + resp = Danbooru::Http.cache(1.minute).get(url, headers: NEW_HEADERS).parse + resp.at("img[src*='/s#{max_size}x#{max_size}/']")["src"] + else + url end end diff --git a/test/unit/sources/tumblr_test.rb b/test/unit/sources/tumblr_test.rb index 4dbfc88bb..c00dad23e 100644 --- a/test/unit/sources/tumblr_test.rb +++ b/test/unit/sources/tumblr_test.rb @@ -220,14 +220,13 @@ module Sources context "A Tumblr post with new image URLs" do should "return the correct image url" do - page_url = "https://emlan.tumblr.com/post/189469423572/kuro-attempts-to-buy-a-racy-book-at-comiket-but" - image1_url = "https://64.media.tumblr.com/168dabd09d5ad69eb5fedcf94c45c31a/3dbfaec9b9e0c2e3-72/s640x960/bf33a1324f3f36d2dc64f011bfeab4867da62bc8.png" - image2_url = "https://64.media.tumblr.com/5a2c3fe25c977e2281392752ab971c90/3dbfaec9b9e0c2e3-92/s540x810/cd270c29db06b5e7fdcee63114fe3eb2c9c0d590.png" - strategy = Sources::Strategies.find(image2_url, page_url) + image_url = "https://64.media.tumblr.com/3dfdab77d913ad1ea59f22407d6ac6f3/b1764aa0f9c378d0-23/s1280x1920/46f4af7ec94456f8fef380ee6311eb81178ce7e9.jpg" + page_url = "https://make-do5.tumblr.com/post/619663949657423872" + strategy = Sources::Strategies.find(image_url, page_url) - assert_equal([image1_url, image2_url], strategy.image_urls) - assert_equal(image2_url, strategy.image_url) - assert_equal("https://emlan.tumblr.com/post/189469423572", strategy.canonical_url) + assert_match(%r{/3dfdab77d913ad1ea59f22407d6ac6f3/b1764aa0f9c378d0-23/s\d+x\d+/}i, image_url) + assert_equal(page_url, strategy.canonical_url) + assert_downloaded(7_428_704, strategy.image_url) end end