tumblr: fix exception when parsing mangled image urls.

Fix a nil exception when trying to parse invalid URLs like `https://25.media.tumblr.com/91719d337b218681abc48cdc24e`.
This commit is contained in:
evazion
2022-09-05 14:37:36 -05:00
parent 3d5b201e0d
commit f55951ab58
2 changed files with 52 additions and 6 deletions

View File

@@ -24,21 +24,24 @@ class Source::URL::Tumblr < Source::URL
# https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif
# https://media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_500.gif
# https://66.media.tumblr.com/b9395771b2d0435fe4efee926a5a7d9c/tumblr_pg2wu1L9DM1trd056o2_500h.png
in _, directory, file if image_url?
@directory = directory
parse_filename
# https://25.media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
# https://media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png
# https://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_500.jpg
# https://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
# https://va.media.tumblr.com/tumblr_pgohk0TjhS1u7mrsl.mp4
in _, *directory, file if image_url?
@directory = directory.first
@filename, @old_variant_size, @extension = file.match(/(\w+?)(?:_(\d+h?|raw))?\.(\w+)\z/).captures
in _, file if image_url?
parse_filename
# https://marmaladica.tumblr.com/post/188237914346/saved
# https://emlan.tumblr.com/post/189469423572/kuro-attempts-to-buy-a-racy-book-at-comiket-but
# https://superboin.tumblr.com/post/141169066579/photoset_iframe/superboin/tumblr_o45miiAOts1u6rxu8/500/false
# https://make-do5.tumblr.com/post/619663949657423872
# http://raspdraws.tumblr.com/image/70021467381
in _, ("post" | "image"), /^\d+$/ => work_id, *rest
in _, ("post" | "image"), /^\d+$/ => work_id, *rest unless image_url?
@blog_name = subdomain unless subdomain == "www"
@work_id = work_id
@@ -73,6 +76,32 @@ class Source::URL::Tumblr < Source::URL
end
end
def parse_filename
return if filename.blank?
case filename.split("_")
# http://data.tumblr.com/07e7bba538046b2b586433976290ee1f/tumblr_o3gg44HcOg1r9pi29o1_raw.jpg
# https://40.media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_1280.jpg
# https://media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_raw.jpg
# https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif
# https://media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_500.gif
# https://66.media.tumblr.com/b9395771b2d0435fe4efee926a5a7d9c/tumblr_pg2wu1L9DM1trd056o2_500h.png
# https://25.media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
# https://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_500.jpg
in *words, /\A\d+h?|raw\z/ => size
@filename = words.join("_")
@sample_size = size
# https://va.media.tumblr.com/tumblr_pgohk0TjhS1u7mrsl.mp4
# https://66.media.tumblr.com/168dabd09d5ad69eb5fedcf94c45c31a/3dbfaec9b9e0c2e3-72/s640x960/bf33a1324f3f36d2dc64f011bfeab4867da62bc8.png
# https://66.media.tumblr.com/5a2c3fe25c977e2281392752ab971c90/3dbfaec9b9e0c2e3-92/s500x750/4f92bbaaf95c0b4e7970e62b1d2e1415859dd659.png
else
@filename = filename
@sample_size = nil
end
end
def image_url?
# http://data.tumblr.com/07e7bba538046b2b586433976290ee1f/tumblr_o3gg44HcOg1r9pi29o1_raw.jpg
# https://40.media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_1280.jpg
@@ -81,11 +110,11 @@ class Source::URL::Tumblr < Source::URL
end
def variants
return [] unless @old_variant_size.present?
return [] unless @sample_size.present? && @filename.present?
directory = "#{@directory}/" if @directory.present?
sizes = %w[1280 640 540 500h 500 400 250 100]
sizes.map { |size| "https://media.tumblr.com/#{directory}#{@filename}_#{size}.#{@extension}" }
sizes.map { |size| "https://media.tumblr.com/#{directory}#{@filename}_#{size}.#{file_ext}" }
end
def page_url

View File

@@ -267,7 +267,24 @@ module Sources
assert(Source::URL.image_url?("http://data.tumblr.com/07e7bba538046b2b586433976290ee1f/tumblr_o3gg44HcOg1r9pi29o1_raw.jpg"))
assert(Source::URL.image_url?("https://40.media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_1280.jpg"))
assert(Source::URL.image_url?("https://media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_raw.jpg"))
assert(Source::URL.image_url?("https://66.media.tumblr.com/2c6f55531618b4335c67e29157f5c1fc/tumblr_pz4a44xdVj1ssucdno1_1280.png"))
assert(Source::URL.image_url?("https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif"))
assert(Source::URL.image_url?("https://media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_500.gif"))
assert(Source::URL.image_url?("https://66.media.tumblr.com/b9395771b2d0435fe4efee926a5a7d9c/tumblr_pg2wu1L9DM1trd056o2_500h.png"))
assert(Source::URL.image_url?("https://25.media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png"))
assert(Source::URL.image_url?("https://media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png"))
assert(Source::URL.image_url?("https://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_500.jpg"))
assert(Source::URL.image_url?("https://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg"))
assert(Source::URL.image_url?("https://va.media.tumblr.com/tumblr_pgohk0TjhS1u7mrsl.mp4"))
assert(Source::URL.image_url?("https://66.media.tumblr.com/168dabd09d5ad69eb5fedcf94c45c31a/3dbfaec9b9e0c2e3-72/s640x960/bf33a1324f3f36d2dc64f011bfeab4867da62bc8.png"))
assert(Source::URL.image_url?("https://66.media.tumblr.com/5a2c3fe25c977e2281392752ab971c90/3dbfaec9b9e0c2e3-92/s500x750/4f92bbaaf95c0b4e7970e62b1d2e1415859dd659.png"))
assert(Source::URL.image_url?("https://25.media.tumblr.com/91719d337b218681abc48cdc24e"))
refute(Source::URL.page_url?("https://25.media.tumblr.com/91719d337b218681abc48cdc24e"))
refute(Source::URL.profile_url?("https://25.media.tumblr.com/91719d337b218681abc48cdc24e"))
end
end
end