From f55951ab584a434bcaa03258cc5f5152266653e1 Mon Sep 17 00:00:00 2001 From: evazion Date: Mon, 5 Sep 2022 14:37:36 -0500 Subject: [PATCH] tumblr: fix exception when parsing mangled image urls. Fix a nil exception when trying to parse invalid URLs like `https://25.media.tumblr.com/91719d337b218681abc48cdc24e`. --- app/logical/source/url/tumblr.rb | 41 +++++++++++++++++++++++++++----- test/unit/sources/tumblr_test.rb | 17 +++++++++++++ 2 files changed, 52 insertions(+), 6 deletions(-) diff --git a/app/logical/source/url/tumblr.rb b/app/logical/source/url/tumblr.rb index ab46e79a0..77cd7049e 100644 --- a/app/logical/source/url/tumblr.rb +++ b/app/logical/source/url/tumblr.rb @@ -24,21 +24,24 @@ class Source::URL::Tumblr < Source::URL # https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif # https://media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_500.gif # https://66.media.tumblr.com/b9395771b2d0435fe4efee926a5a7d9c/tumblr_pg2wu1L9DM1trd056o2_500h.png + in _, directory, file if image_url? + @directory = directory + parse_filename + # https://25.media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png # https://media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png # https://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_500.jpg # https://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg # https://va.media.tumblr.com/tumblr_pgohk0TjhS1u7mrsl.mp4 - in _, *directory, file if image_url? - @directory = directory.first - @filename, @old_variant_size, @extension = file.match(/(\w+?)(?:_(\d+h?|raw))?\.(\w+)\z/).captures + in _, file if image_url? + parse_filename # https://marmaladica.tumblr.com/post/188237914346/saved # https://emlan.tumblr.com/post/189469423572/kuro-attempts-to-buy-a-racy-book-at-comiket-but # https://superboin.tumblr.com/post/141169066579/photoset_iframe/superboin/tumblr_o45miiAOts1u6rxu8/500/false # https://make-do5.tumblr.com/post/619663949657423872 # http://raspdraws.tumblr.com/image/70021467381 - in _, ("post" | "image"), /^\d+$/ => work_id, *rest + in _, ("post" | "image"), /^\d+$/ => work_id, *rest unless image_url? @blog_name = subdomain unless subdomain == "www" @work_id = work_id @@ -73,6 +76,32 @@ class Source::URL::Tumblr < Source::URL end end + def parse_filename + return if filename.blank? + + case filename.split("_") + + # http://data.tumblr.com/07e7bba538046b2b586433976290ee1f/tumblr_o3gg44HcOg1r9pi29o1_raw.jpg + # https://40.media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_1280.jpg + # https://media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_raw.jpg + # https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif + # https://media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_500.gif + # https://66.media.tumblr.com/b9395771b2d0435fe4efee926a5a7d9c/tumblr_pg2wu1L9DM1trd056o2_500h.png + # https://25.media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png + # https://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_500.jpg + in *words, /\A\d+h?|raw\z/ => size + @filename = words.join("_") + @sample_size = size + + # https://va.media.tumblr.com/tumblr_pgohk0TjhS1u7mrsl.mp4 + # https://66.media.tumblr.com/168dabd09d5ad69eb5fedcf94c45c31a/3dbfaec9b9e0c2e3-72/s640x960/bf33a1324f3f36d2dc64f011bfeab4867da62bc8.png + # https://66.media.tumblr.com/5a2c3fe25c977e2281392752ab971c90/3dbfaec9b9e0c2e3-92/s500x750/4f92bbaaf95c0b4e7970e62b1d2e1415859dd659.png + else + @filename = filename + @sample_size = nil + end + end + def image_url? # http://data.tumblr.com/07e7bba538046b2b586433976290ee1f/tumblr_o3gg44HcOg1r9pi29o1_raw.jpg # https://40.media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_1280.jpg @@ -81,11 +110,11 @@ class Source::URL::Tumblr < Source::URL end def variants - return [] unless @old_variant_size.present? + return [] unless @sample_size.present? && @filename.present? directory = "#{@directory}/" if @directory.present? sizes = %w[1280 640 540 500h 500 400 250 100] - sizes.map { |size| "https://media.tumblr.com/#{directory}#{@filename}_#{size}.#{@extension}" } + sizes.map { |size| "https://media.tumblr.com/#{directory}#{@filename}_#{size}.#{file_ext}" } end def page_url diff --git a/test/unit/sources/tumblr_test.rb b/test/unit/sources/tumblr_test.rb index 31146de93..7beb232f5 100644 --- a/test/unit/sources/tumblr_test.rb +++ b/test/unit/sources/tumblr_test.rb @@ -267,7 +267,24 @@ module Sources assert(Source::URL.image_url?("http://data.tumblr.com/07e7bba538046b2b586433976290ee1f/tumblr_o3gg44HcOg1r9pi29o1_raw.jpg")) assert(Source::URL.image_url?("https://40.media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_1280.jpg")) + assert(Source::URL.image_url?("https://media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_raw.jpg")) + assert(Source::URL.image_url?("https://66.media.tumblr.com/2c6f55531618b4335c67e29157f5c1fc/tumblr_pz4a44xdVj1ssucdno1_1280.png")) + assert(Source::URL.image_url?("https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif")) + assert(Source::URL.image_url?("https://media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_500.gif")) + assert(Source::URL.image_url?("https://66.media.tumblr.com/b9395771b2d0435fe4efee926a5a7d9c/tumblr_pg2wu1L9DM1trd056o2_500h.png")) + + assert(Source::URL.image_url?("https://25.media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png")) + assert(Source::URL.image_url?("https://media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png")) + assert(Source::URL.image_url?("https://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_500.jpg")) + assert(Source::URL.image_url?("https://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg")) assert(Source::URL.image_url?("https://va.media.tumblr.com/tumblr_pgohk0TjhS1u7mrsl.mp4")) + + assert(Source::URL.image_url?("https://66.media.tumblr.com/168dabd09d5ad69eb5fedcf94c45c31a/3dbfaec9b9e0c2e3-72/s640x960/bf33a1324f3f36d2dc64f011bfeab4867da62bc8.png")) + assert(Source::URL.image_url?("https://66.media.tumblr.com/5a2c3fe25c977e2281392752ab971c90/3dbfaec9b9e0c2e3-92/s500x750/4f92bbaaf95c0b4e7970e62b1d2e1415859dd659.png")) + + assert(Source::URL.image_url?("https://25.media.tumblr.com/91719d337b218681abc48cdc24e")) + refute(Source::URL.page_url?("https://25.media.tumblr.com/91719d337b218681abc48cdc24e")) + refute(Source::URL.profile_url?("https://25.media.tumblr.com/91719d337b218681abc48cdc24e")) end end end