Tumblr: support highest res for new image urls

This commit is contained in:
nonamethanks
2020-08-14 01:03:14 +02:00
parent 0e41fef011
commit 5615719218
2 changed files with 27 additions and 13 deletions

View File

@@ -7,6 +7,8 @@
# https://66.media.tumblr.com/5a2c3fe25c977e2281392752ab971c90/3dbfaec9b9e0c2e3-92/s500x750/4f92bbaaf95c0b4e7970e62b1d2e1415859dd659.png # https://66.media.tumblr.com/5a2c3fe25c977e2281392752ab971c90/3dbfaec9b9e0c2e3-92/s500x750/4f92bbaaf95c0b4e7970e62b1d2e1415859dd659.png
# #
# https://superboin.tumblr.com/post/141169066579/photoset_iframe/superboin/tumblr_o45miiAOts1u6rxu8/500/false # https://superboin.tumblr.com/post/141169066579/photoset_iframe/superboin/tumblr_o45miiAOts1u6rxu8/500/false
#
# https://make-do5.tumblr.com/post/619663949657423872 (extremely high res, extractable)
module Sources::Strategies module Sources::Strategies
class Tumblr < Base class Tumblr < Base
@@ -26,6 +28,11 @@ module Sources::Strategies
VIDEO = %r{\Ahttps?://(?:vtt|ve|va\.media)\.tumblr\.com/}i VIDEO = %r{\Ahttps?://(?:vtt|ve|va\.media)\.tumblr\.com/}i
POST = %r{\Ahttps?://(?<blog_name>[^.]+)\.tumblr\.com/(?:post|image)/(?<post_id>\d+)}i POST = %r{\Ahttps?://(?<blog_name>[^.]+)\.tumblr\.com/(?:post|image)/(?<post_id>\d+)}i
NEW_HEADERS = {
"user-agent": Danbooru.config.canonical_app_name,
"accept": "text/html"
}
def self.enabled? def self.enabled?
Danbooru.config.tumblr_consumer_key.present? Danbooru.config.tumblr_consumer_key.present?
end end
@@ -161,14 +168,22 @@ module Sources::Strategies
# http://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg # http://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
# => https://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg # => https://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
def find_largest(url, sizes: SIZES) def find_largest(url, sizes: SIZES)
return url unless url =~ OLD_IMAGE if url =~ OLD_IMAGE
candidates = sizes.map do |size|
"https://media.tumblr.com/#{$~[:dir]}#{$~[:filename]}_#{size}.#{$~[:ext]}"
end
candidates = sizes.map do |size| candidates.find do |candidate|
"https://media.tumblr.com/#{$~[:dir]}#{$~[:filename]}_#{size}.#{$~[:ext]}" http_exists?(candidate)
end end
elsif url =~ %r{/s\d+x\d+/(\w+\.\w+)$}i
max_size = Integer.sqrt(Danbooru.config.max_image_resolution)
url = url.gsub(%r{/s\d+x\d+/\w+\.\w+$}i, "/s#{max_size}x#{max_size}/#{$1}")
candidates.find do |candidate| resp = Danbooru::Http.cache(1.minute).get(url, headers: NEW_HEADERS).parse
http_exists?(candidate) resp.at("img[src*='/s#{max_size}x#{max_size}/']")["src"]
else
url
end end
end end

View File

@@ -220,14 +220,13 @@ module Sources
context "A Tumblr post with new image URLs" do context "A Tumblr post with new image URLs" do
should "return the correct image url" do should "return the correct image url" do
page_url = "https://emlan.tumblr.com/post/189469423572/kuro-attempts-to-buy-a-racy-book-at-comiket-but" image_url = "https://64.media.tumblr.com/3dfdab77d913ad1ea59f22407d6ac6f3/b1764aa0f9c378d0-23/s1280x1920/46f4af7ec94456f8fef380ee6311eb81178ce7e9.jpg"
image1_url = "https://64.media.tumblr.com/168dabd09d5ad69eb5fedcf94c45c31a/3dbfaec9b9e0c2e3-72/s640x960/bf33a1324f3f36d2dc64f011bfeab4867da62bc8.png" page_url = "https://make-do5.tumblr.com/post/619663949657423872"
image2_url = "https://64.media.tumblr.com/5a2c3fe25c977e2281392752ab971c90/3dbfaec9b9e0c2e3-92/s540x810/cd270c29db06b5e7fdcee63114fe3eb2c9c0d590.png" strategy = Sources::Strategies.find(image_url, page_url)
strategy = Sources::Strategies.find(image2_url, page_url)
assert_equal([image1_url, image2_url], strategy.image_urls) assert_match(%r{/3dfdab77d913ad1ea59f22407d6ac6f3/b1764aa0f9c378d0-23/s\d+x\d+/}i, image_url)
assert_equal(image2_url, strategy.image_url) assert_equal(page_url, strategy.canonical_url)
assert_equal("https://emlan.tumblr.com/post/189469423572", strategy.canonical_url) assert_downloaded(7_428_704, strategy.image_url)
end end
end end