diff --git a/app/logical/sources/strategies/tumblr.rb b/app/logical/sources/strategies/tumblr.rb index 107b872e1..a434321b4 100644 --- a/app/logical/sources/strategies/tumblr.rb +++ b/app/logical/sources/strategies/tumblr.rb @@ -1,3 +1,11 @@ +# https://marmaladica.tumblr.com/post/188237914346/saved +# https://66.media.tumblr.com/2c6f55531618b4335c67e29157f5c1fc/tumblr_pz4a44xdVj1ssucdno1_1280.png +# https://66.media.tumblr.com/11700cab20d65d5a6acc470e284dbd3a/tumblr_pz4a44xdVj1ssucdno2_1280.png +# +# https://emlan.tumblr.com/post/189469423572/kuro-attempts-to-buy-a-racy-book-at-comiket-but +# https://66.media.tumblr.com/168dabd09d5ad69eb5fedcf94c45c31a/3dbfaec9b9e0c2e3-72/s640x960/bf33a1324f3f36d2dc64f011bfeab4867da62bc8.png +# https://66.media.tumblr.com/5a2c3fe25c977e2281392752ab971c90/3dbfaec9b9e0c2e3-92/s500x750/4f92bbaaf95c0b4e7970e62b1d2e1415859dd659.png + module Sources::Strategies class Tumblr < Base SIZES = %w[1280 640 540 500h 500 400 250 100] @@ -7,7 +15,12 @@ module Sources::Strategies MD5 = %r{(?[0-9a-f]{32})}i FILENAME = %r{(?(tumblr_(inline_)?)?[a-z0-9]+(_r[0-9]+)?)}i EXT = %r{(?\w+)} - IMAGE = %r!\Ahttps?://#{DOMAIN}/(?#{MD5}/)?#{FILENAME}_(?\w+)\.#{EXT}\z!i + + # old: https://66.media.tumblr.com/2c6f55531618b4335c67e29157f5c1fc/tumblr_pz4a44xdVj1ssucdno1_1280.png + # new: https://66.media.tumblr.com/168dabd09d5ad69eb5fedcf94c45c31a/3dbfaec9b9e0c2e3-72/s640x960/bf33a1324f3f36d2dc64f011bfeab4867da62bc8.png + OLD_IMAGE = %r!\Ahttps?://#{DOMAIN}/(?#{MD5}/)?#{FILENAME}_(?\w+)\.#{EXT}\z!i + + IMAGE = %r!\Ahttps?://#{DOMAIN}/!i VIDEO = %r!\Ahttps?://(?:vtt|ve\.media)\.tumblr\.com/!i POST = %r!\Ahttps?://(?[^.]+)\.tumblr\.com/(?:post|image)/(?\d+)!i @@ -138,7 +151,7 @@ module Sources::Strategies # http://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg # => https://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg def find_largest(url, sizes: SIZES) - return url unless url =~ IMAGE + return url unless url =~ OLD_IMAGE candidates = sizes.map do |size| "https://media.tumblr.com/#{$~[:dir]}#{$~[:filename]}_#{size}.#{$~[:ext]}" diff --git a/test/unit/sources/tumblr_test.rb b/test/unit/sources/tumblr_test.rb index f13f3d245..fe29481c0 100644 --- a/test/unit/sources/tumblr_test.rb +++ b/test/unit/sources/tumblr_test.rb @@ -125,7 +125,7 @@ module Sources assert_equal(%w[ https://media.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_orwwptNBCE1wsfqepo1_1280.png https://media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_orwwptNBCE1wsfqepo2_1280.jpg - https://media.tumblr.com/d2ed224f135b0c81f812df81a0a8692d/tumblr_orwwptNBCE1wsfqepo3_1280.gif + https://media.tumblr.com/d2ed224f135b0c81f812df81a0a8692d/tumblr_orwwptNBCE1wsfqepo3_640.gif https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_inline_os3134mABB1v11u29_1280.png https://media.tumblr.com/34ed9d0ff4a21625981372291cb53040/tumblr_nv3hwpsZQY1uft51jo1_1280.gif ], site.image_urls) @@ -218,6 +218,19 @@ module Sources end end + context "A Tumblr post with new image URLs" do + should "return the correct image url" do + page_url = "https://emlan.tumblr.com/post/189469423572/kuro-attempts-to-buy-a-racy-book-at-comiket-but" + image1_url = "https://66.media.tumblr.com/168dabd09d5ad69eb5fedcf94c45c31a/3dbfaec9b9e0c2e3-72/s640x960/bf33a1324f3f36d2dc64f011bfeab4867da62bc8.png" + image2_url = "https://66.media.tumblr.com/5a2c3fe25c977e2281392752ab971c90/3dbfaec9b9e0c2e3-92/s540x810/cd270c29db06b5e7fdcee63114fe3eb2c9c0d590.png" + strategy = Sources::Strategies.find(image2_url, page_url) + + assert_equal([image1_url, image2_url], strategy.image_urls) + assert_equal(image2_url, strategy.image_url) + assert_equal("https://emlan.tumblr.com/post/189469423572", strategy.canonical_url) + end + end + context "A deleted tumblr post" do should "extract the info from the url" do site = Sources::Strategies.find("http://shimetsukage.tumblr.com/post/176805588268/20180809-ssb-coolboy")