Merge pull request #4585 from nonamethanks/fix_tumblr

Tumblr: support highest res for new image urls
This commit is contained in:
evazion
2020-08-17 14:11:48 -05:00
committed by GitHub
2 changed files with 27 additions and 13 deletions

View File

@@ -7,6 +7,8 @@
# https://66.media.tumblr.com/5a2c3fe25c977e2281392752ab971c90/3dbfaec9b9e0c2e3-92/s500x750/4f92bbaaf95c0b4e7970e62b1d2e1415859dd659.png
#
# https://superboin.tumblr.com/post/141169066579/photoset_iframe/superboin/tumblr_o45miiAOts1u6rxu8/500/false
#
# https://make-do5.tumblr.com/post/619663949657423872 (extremely high res, extractable)
module Sources::Strategies
class Tumblr < Base
@@ -26,6 +28,11 @@ module Sources::Strategies
VIDEO = %r{\Ahttps?://(?:vtt|ve|va\.media)\.tumblr\.com/}i
POST = %r{\Ahttps?://(?<blog_name>[^.]+)\.tumblr\.com/(?:post|image)/(?<post_id>\d+)}i
NEW_HEADERS = {
"user-agent": Danbooru.config.canonical_app_name,
"accept": "text/html"
}
def self.enabled?
Danbooru.config.tumblr_consumer_key.present?
end
@@ -161,14 +168,22 @@ module Sources::Strategies
# http://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
# => https://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
def find_largest(url, sizes: SIZES)
return url unless url =~ OLD_IMAGE
if url =~ OLD_IMAGE
candidates = sizes.map do |size|
"https://media.tumblr.com/#{$~[:dir]}#{$~[:filename]}_#{size}.#{$~[:ext]}"
end
candidates = sizes.map do |size|
"https://media.tumblr.com/#{$~[:dir]}#{$~[:filename]}_#{size}.#{$~[:ext]}"
end
candidates.find do |candidate|
http_exists?(candidate)
end
elsif url =~ %r{/s\d+x\d+/(\w+\.\w+)$}i
max_size = Integer.sqrt(Danbooru.config.max_image_resolution)
url = url.gsub(%r{/s\d+x\d+/\w+\.\w+$}i, "/s#{max_size}x#{max_size}/#{$1}")
candidates.find do |candidate|
http_exists?(candidate)
resp = Danbooru::Http.cache(1.minute).get(url, headers: NEW_HEADERS).parse
resp.at("img[src*='/s#{max_size}x#{max_size}/']")["src"]
else
url
end
end

View File

@@ -220,14 +220,13 @@ module Sources
context "A Tumblr post with new image URLs" do
should "return the correct image url" do
page_url = "https://emlan.tumblr.com/post/189469423572/kuro-attempts-to-buy-a-racy-book-at-comiket-but"
image1_url = "https://64.media.tumblr.com/168dabd09d5ad69eb5fedcf94c45c31a/3dbfaec9b9e0c2e3-72/s640x960/bf33a1324f3f36d2dc64f011bfeab4867da62bc8.png"
image2_url = "https://64.media.tumblr.com/5a2c3fe25c977e2281392752ab971c90/3dbfaec9b9e0c2e3-92/s540x810/cd270c29db06b5e7fdcee63114fe3eb2c9c0d590.png"
strategy = Sources::Strategies.find(image2_url, page_url)
image_url = "https://64.media.tumblr.com/3dfdab77d913ad1ea59f22407d6ac6f3/b1764aa0f9c378d0-23/s1280x1920/46f4af7ec94456f8fef380ee6311eb81178ce7e9.jpg"
page_url = "https://make-do5.tumblr.com/post/619663949657423872"
strategy = Sources::Strategies.find(image_url, page_url)
assert_equal([image1_url, image2_url], strategy.image_urls)
assert_equal(image2_url, strategy.image_url)
assert_equal("https://emlan.tumblr.com/post/189469423572", strategy.canonical_url)
assert_match(%r{/3dfdab77d913ad1ea59f22407d6ac6f3/b1764aa0f9c378d0-23/s\d+x\d+/}i, image_url)
assert_equal(page_url, strategy.canonical_url)
assert_downloaded(7_428_704, strategy.image_url)
end
end