Merge pull request #3179 from evazion/fix-tumblr-samples

Fix #3178: Rewrite Tumblr image samples
This commit is contained in:
Albert Yi
2017-06-22 16:13:39 -07:00
committed by GitHub
5 changed files with 138 additions and 82 deletions

View File

@@ -1,45 +1,60 @@
module Downloads
module RewriteStrategies
DOMAIN = '(data|(\d+\.)?media)\.tumblr\.com'
MD5 = '(?<md5>[0-9a-f]{32})'
FILENAME = '(?<filename>(tumblr_(inline_)?)?[a-z0-9]+(_r[0-9]+)?)'
SIZES = '(250|400|500|500h|540|1280|raw)'
EXT = '(?<ext>\w+)'
class Tumblr < Base
def rewrite(url, headers, data = {})
if url =~ %r{^https?://.*tumblr\.com}
url, headers = rewrite_cdn(url, headers)
url, headers = rewrite_thumbnails(url, headers)
end
url = rewrite_cdn(url)
url = rewrite_samples(url, headers)
return [url, headers, data]
end
protected
def rewrite_thumbnails(url, headers)
if url =~ %r{^https?://.+\.tumblr\.com/(?:\w+/)?(?:tumblr_)?(\w+_)(\d+)(\..+)$}
match = $1
given_size = $2
file_ext = $3
# Look for the biggest available version on data.tumblr.com. A bigger
# version may or may not exist.
#
# http://40.media.tumblr.com/d8c6d49785c0842ee31ff26c010b7445/tumblr_naypopLln51tkufhoo2_500h.png
# => http://data.tumblr.com/d8c6d49785c0842ee31ff26c010b7445/tumblr_naypopLln51tkufhoo2_raw.png
#
# https://40.media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_1280.jpg
# => http://data.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_raw.jpg
#
# https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif
# => http://data.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_500.gif
#
# https://25.media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
# => http://data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png
#
# http://data.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_400.jpg
# => http://data.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_500.jpg
#
# http://data.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
# => http://data.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
def rewrite_samples(url, headers)
if url =~ %r!\Ahttps?://#{DOMAIN}/(?<dir>#{MD5}/)?#{FILENAME}_#{SIZES}\.#{EXT}\z!i
sizes = ["raw", 1280, 540, 500, 400, 250]
candidates = sizes.map do |size|
"http://data.tumblr.com/#{$~[:dir]}#{$~[:filename]}_#{size}.#{$~[:ext]}"
end
big_1280_url = url.sub(match + given_size, match + "1280")
if file_ext == ".gif"
res = http_head_request(big_1280_url, headers)
# Sometimes the 1280 version of a gif is actually a static jpeg. We don't want that so we only use the 1280 version if it really is a gif.
if res.is_a?(Net::HTTPSuccess) && res["content-type"] == "image/gif"
return [big_1280_url, headers]
end
else
if http_exists?(big_1280_url, headers)
return [big_1280_url, headers]
end
url = candidates.find do |candidate|
http_exists?(candidate, headers)
end
end
return [url, headers]
url
end
def rewrite_cdn(url, headers)
if url =~ %r{https?://gs1\.wac\.edgecastcdn\.net/8019B6/data\.tumblr\.com/}
url.sub!("gs1.wac.edgecastcdn.net/8019B6/", "")
end
return [url, headers]
# https://gs1.wac.edgecastcdn.net/8019B6/data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
# => http://data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
def rewrite_cdn(url)
url.sub!(%r!\Ahttps?://gs1\.wac\.edgecastcdn\.net/8019B6/data\.tumblr\.com!i, "http://data.tumblr.com")
url
end
end
end