diff --git a/app/logical/source/extractor/tumblr.rb b/app/logical/source/extractor/tumblr.rb index 866e0e70b..ea9a8a85c 100644 --- a/app/logical/source/extractor/tumblr.rb +++ b/app/logical/source/extractor/tumblr.rb @@ -29,7 +29,8 @@ class Source::Extractor end assets += inline_images - assets.map { |url| find_largest(url) } + assets = assets.map { |url| find_largest(url) } + assets.compact end def page_url @@ -92,9 +93,8 @@ class Source::Extractor if parsed_image.full_image_url.present? image_url_html(parsed_image.full_image_url)&.at("img[src*='/#{parsed_image.directory}/']")&.[](:src) elsif parsed_image.variants.present? - # Look for the biggest available version on media.tumblr.com. A bigger - # version may or may not exist. - parsed_image.variants.find { |variant| http_exists?(variant) } + # Look for the biggest available version on media.tumblr.com. A bigger version may or may not exist. + parsed_image.variants.find { |variant| http_exists?(variant) } || image_url else parsed_image.original_url end diff --git a/app/logical/source/url/tumblr.rb b/app/logical/source/url/tumblr.rb index ca6450c0e..fb04c646b 100644 --- a/app/logical/source/url/tumblr.rb +++ b/app/logical/source/url/tumblr.rb @@ -123,12 +123,19 @@ class Source::URL::Tumblr < Source::URL subdomain&.ends_with?(".media") || subdomain&.in?(%w[data media]) end + def video_url? + # https://va.media.tumblr.com/tumblr_rjoh0hR8Xe1teimlz_720.mp4 + image_url? && file_ext.in?(%w[mp4 webm]) + end + def variants return [] unless @sample_size.present? && @filename.present? directory = "#{@directory}/" if @directory.present? - sizes = %w[1280 640 540 500h 500 400 250 100] - sizes.map { |size| "https://media.tumblr.com/#{directory}#{@filename}_#{size}.#{file_ext}" } + media_host = video_url? ? "va.media.tumblr.com" : "media.tumblr.com" + sizes = %w[1280 720 640 540 500h 500 400 250 100] + + sizes.map { |size| "https://#{media_host}/#{directory}#{@filename}_#{size}.#{file_ext}" } end def page_url diff --git a/test/unit/sources/tumblr_test.rb b/test/unit/sources/tumblr_test.rb index b0493b67b..80f89bd99 100644 --- a/test/unit/sources/tumblr_test.rb +++ b/test/unit/sources/tumblr_test.rb @@ -96,6 +96,14 @@ module Sources ) end + context "A video post with a https://va.media.tumblr.com/tumblr_*_720.mp4 URL" do + strategy_should_work( + "https://cloudstation.tumblr.com/post/697975577362251776/direct-quote-from-kaiba-post-battle-city", + image_urls: ["https://va.media.tumblr.com/tumblr_rjoh0hR8Xe1teimlz_720.mp4"], + download_size: 1_073_148, + ) + end + context "The source for a 'http://*.tumblr.com/post/*' answer post with inline images" do strategy_should_work( "https://noizave.tumblr.com/post/171237880542/test-ask",