Merge pull request #4585 from nonamethanks/fix_tumblr

Tumblr: support highest res for new image urls
This commit is contained in:
evazion
2020-08-17 14:11:48 -05:00
committed by GitHub
2 changed files with 27 additions and 13 deletions

View File

@@ -7,6 +7,8 @@
# https://66.media.tumblr.com/5a2c3fe25c977e2281392752ab971c90/3dbfaec9b9e0c2e3-92/s500x750/4f92bbaaf95c0b4e7970e62b1d2e1415859dd659.png # https://66.media.tumblr.com/5a2c3fe25c977e2281392752ab971c90/3dbfaec9b9e0c2e3-92/s500x750/4f92bbaaf95c0b4e7970e62b1d2e1415859dd659.png
# #
# https://superboin.tumblr.com/post/141169066579/photoset_iframe/superboin/tumblr_o45miiAOts1u6rxu8/500/false # https://superboin.tumblr.com/post/141169066579/photoset_iframe/superboin/tumblr_o45miiAOts1u6rxu8/500/false
#
# https://make-do5.tumblr.com/post/619663949657423872 (extremely high res, extractable)
module Sources::Strategies module Sources::Strategies
class Tumblr < Base class Tumblr < Base
@@ -26,6 +28,11 @@ module Sources::Strategies
VIDEO = %r{\Ahttps?://(?:vtt|ve|va\.media)\.tumblr\.com/}i VIDEO = %r{\Ahttps?://(?:vtt|ve|va\.media)\.tumblr\.com/}i
POST = %r{\Ahttps?://(?<blog_name>[^.]+)\.tumblr\.com/(?:post|image)/(?<post_id>\d+)}i POST = %r{\Ahttps?://(?<blog_name>[^.]+)\.tumblr\.com/(?:post|image)/(?<post_id>\d+)}i
NEW_HEADERS = {
"user-agent": Danbooru.config.canonical_app_name,
"accept": "text/html"
}
def self.enabled? def self.enabled?
Danbooru.config.tumblr_consumer_key.present? Danbooru.config.tumblr_consumer_key.present?
end end
@@ -161,14 +168,22 @@ module Sources::Strategies
# http://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg # http://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
# => https://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg # => https://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
def find_largest(url, sizes: SIZES) def find_largest(url, sizes: SIZES)
return url unless url =~ OLD_IMAGE if url =~ OLD_IMAGE
candidates = sizes.map do |size|
"https://media.tumblr.com/#{$~[:dir]}#{$~[:filename]}_#{size}.#{$~[:ext]}"
end
candidates = sizes.map do |size| candidates.find do |candidate|
"https://media.tumblr.com/#{$~[:dir]}#{$~[:filename]}_#{size}.#{$~[:ext]}" http_exists?(candidate)
end end
elsif url =~ %r{/s\d+x\d+/(\w+\.\w+)$}i
max_size = Integer.sqrt(Danbooru.config.max_image_resolution)
url = url.gsub(%r{/s\d+x\d+/\w+\.\w+$}i, "/s#{max_size}x#{max_size}/#{$1}")
candidates.find do |candidate| resp = Danbooru::Http.cache(1.minute).get(url, headers: NEW_HEADERS).parse
http_exists?(candidate) resp.at("img[src*='/s#{max_size}x#{max_size}/']")["src"]
else
url
end end
end end

View File

@@ -220,14 +220,13 @@ module Sources
context "A Tumblr post with new image URLs" do context "A Tumblr post with new image URLs" do
should "return the correct image url" do should "return the correct image url" do
page_url = "https://emlan.tumblr.com/post/189469423572/kuro-attempts-to-buy-a-racy-book-at-comiket-but" image_url = "https://64.media.tumblr.com/3dfdab77d913ad1ea59f22407d6ac6f3/b1764aa0f9c378d0-23/s1280x1920/46f4af7ec94456f8fef380ee6311eb81178ce7e9.jpg"
image1_url = "https://64.media.tumblr.com/168dabd09d5ad69eb5fedcf94c45c31a/3dbfaec9b9e0c2e3-72/s640x960/bf33a1324f3f36d2dc64f011bfeab4867da62bc8.png" page_url = "https://make-do5.tumblr.com/post/619663949657423872"
image2_url = "https://64.media.tumblr.com/5a2c3fe25c977e2281392752ab971c90/3dbfaec9b9e0c2e3-92/s540x810/cd270c29db06b5e7fdcee63114fe3eb2c9c0d590.png" strategy = Sources::Strategies.find(image_url, page_url)
strategy = Sources::Strategies.find(image2_url, page_url)
assert_equal([image1_url, image2_url], strategy.image_urls) assert_match(%r{/3dfdab77d913ad1ea59f22407d6ac6f3/b1764aa0f9c378d0-23/s\d+x\d+/}i, image_url)
assert_equal(image2_url, strategy.image_url) assert_equal(page_url, strategy.canonical_url)
assert_equal("https://emlan.tumblr.com/post/189469423572", strategy.canonical_url) assert_downloaded(7_428_704, strategy.image_url)
end end
end end