Files
danbooru/test/unit/sources/tumblr_test.rb
evazion b4aea72d04 sources: remove preview_urls method from base strategy.
Remove the `preview_urls` method from strategies. The only place this was used was
when doing IQDB searches, to download the thumbnail image from the source instead of
the full image.

This wasn't worth it for a few reasons:

* Thumbnails on other sites are sometimes not the size we want, which could affect
  IQDB results.
* Grabbing thumbnails is complex for some sites. You can't always just rewrite the
  image URL. Sometimes it requires extra API calls, which can be slower than just
  grabbing the full image.
* For videos and animations, thumbnails from other sites don't always match our
  thumbnails. We do smart thumbnail generation to try to avoid blank thumbnails, which
  means we don't always pick the first frame, which could affect IQDB results.

API changes:

* /iqdb_queries?search[file_url] now downloads the URL as is without any modification.
  Before it tried to change thumbnail and sample size image URLs to the full version.

* /iqdb_queries?search[url] now returns an error if the URL is for a HTML page that
  contains multiple images. Before it would grab only the first image and silently
  ignore the rest.
2022-03-11 03:22:23 -06:00

251 lines
9.8 KiB
Ruby

require "test_helper"
module Sources
class TumblrTest < ActiveSupport::TestCase
def setup
skip "Tumblr key is not configured" unless Sources::Strategies::Tumblr.enabled?
end
context "The source for a 'http://*.tumblr.com/post/*' photo post with a single image" do
setup do
@site = Sources::Strategies.find("https://noizave.tumblr.com/post/162206271767")
end
should "get the artist name" do
assert_equal("noizave", @site.artist_name)
end
should "get the profile" do
assert_equal("https://noizave.tumblr.com", @site.profile_url)
end
should "get the tags" do
tags = ["tag", "red hair", "red-hair", "red_hair"]
assert_equal(tags, @site.tags.map(&:first))
assert_equal(["red_hair", "tag"], @site.normalized_tags)
end
should "get the commentary" do
desc = <<~EOS.chomp
<h2>header</h2>
<hr><p>plain <b>bold</b> <i>italics</i> <strike>strike</strike></p>
<!-- more -->
<ol><li>one</li>
<li>two</li>
</ol><ul><li>one</li>
<ul><li>two</li>
</ul></ul><blockquote><p>quote</p></blockquote>
<p><a href=\"http://www.google.com\">link</a></p>
EOS
assert_nil(@site.artist_commentary_title)
assert_equal(desc, @site.artist_commentary_desc)
end
should "get the dtext-ified commentary" do
desc = <<~EOS.chomp
h2. header
plain [b]bold[/b] [i]italics[/i] [s]strike[/s]
* one
* two
* one
* two
[quote]quote[/quote]
"link":[http://www.google.com]
EOS
assert_equal(desc, @site.dtext_artist_commentary_desc)
end
should "get the image url" do
assert_equal(["https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_1280.png"], @site.image_urls)
end
should "get the canonical url" do
assert_equal("https://noizave.tumblr.com/post/162206271767", @site.canonical_url)
end
should "get the artist" do
CurrentUser.user = FactoryBot.create(:user)
CurrentUser.ip_addr = "127.0.0.1"
@artist = FactoryBot.create(:artist, name: "noizave", url_string: "https://noizave.tumblr.com/")
assert_equal([@artist], @site.artists)
end
end
context "The source for a 'http://*.tumblr.com/image/*' image page" do
setup do
@site = Sources::Strategies.find("https://noizave.tumblr.com/image/162206271767")
end
should "get the image url" do
assert_equal(["https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_1280.png"], @site.image_urls)
end
should "get the canonical url" do
assert_equal("https://noizave.tumblr.com/post/162206271767", @site.canonical_url)
end
should "get the tags" do
tags = ["tag", "red hair", "red-hair", "red_hair"]
assert_equal(tags, @site.tags.map(&:first))
assert_equal(["red_hair", "tag"], @site.normalized_tags)
end
end
context "The source for a 'http://*.media.tumblr.com/$hash/tumblr_$id_540.jpg' image" do
setup do
@url = "https://78.media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_orwwptNBCE1wsfqepo2_540.jpg"
@ref = "https://noizave.tumblr.com/post/162094447052"
end
context "with a referer" do
should "get all the metadata" do
site = Sources::Strategies.find(@url, @ref)
assert_equal("noizave", site.artist_name)
assert_equal("https://noizave.tumblr.com", site.profile_url)
assert_equal(["tag1", "tag2"], site.tags.map(&:first))
assert_equal(@ref, site.canonical_url)
assert_equal(["https://media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_orwwptNBCE1wsfqepo2_1280.jpg"], site.image_urls)
end
end
context "without a referer" do
should "still find all the relevant information" do
site = Sources::Strategies.find(@url)
assert_equal("noizave", site.artist_name)
assert_equal("https://noizave.tumblr.com", site.profile_url)
assert_equal(["tag1", "tag2"], site.tags.map(&:first))
assert_equal(@ref, site.canonical_url)
assert_equal(["https://media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_orwwptNBCE1wsfqepo2_1280.jpg"], site.image_urls)
end
end
end
context "The source for a 'http://*.tumblr.com/post/*' text post with inline images" do
setup do
@site = Sources::Strategies.find("https://noizave.tumblr.com/post/162221502947")
end
should "get the image urls" do
urls = %w[
https://media.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_inline_os2zhkfhY01v11u29_1280.png
https://media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_inline_os2zkg02xH1v11u29_1280.jpg
]
assert_equal(urls.sort, @site.image_urls.sort)
end
should "get the commentary" do
desc = %r{<p>description</p><figure class="tmblr-full" data-orig-height="3000" data-orig-width="3000"><img src="https://\d+.media.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_inline_os2zhkfhY01v11u29_540.png" data-orig-height="3000" data-orig-width="3000"/></figure><figure class="tmblr-full" data-orig-height="3000" data-orig-width="3000"><img src="https://\d+.media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_inline_os2zkg02xH1v11u29_540.jpg" data-orig-height="3000" data-orig-width="3000"/></figure>}
assert_equal("test post", @site.artist_commentary_title)
assert_match(desc, @site.artist_commentary_desc)
end
end
context "A video post with inline images" do
should "get the video and inline images" do
url = "https://noizave.tumblr.com/post/162222617101"
site = Sources::Strategies.find(url)
urls = %w[
https://va.media.tumblr.com/tumblr_os31dkexhK1wsfqep.mp4
https://media.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_inline_os31dclyCR1v11u29_1280.png
]
assert_equal(urls, site.image_urls)
assert_equal(url, site.canonical_url)
end
end
context "The source for a 'http://*.tumblr.com/post/*' answer post with inline images" do
setup do
@site = Sources::Strategies.find("https://noizave.tumblr.com/post/171237880542/test-ask")
end
should "get the image urls" do
urls = ["https://media.tumblr.com/cb481f031010e8ddad564b2150149c9a/tumblr_inline_p4nxoyLrSh1v11u29_1280.png"]
assert_equal(urls, @site.image_urls)
end
should "get the commentary" do
assert_equal("Anonymous asked: test ask", @site.artist_commentary_title)
assert_match("test answer", @site.artist_commentary_desc)
end
should "get the canonical url" do
assert_equal("https://noizave.tumblr.com/post/171237880542", @site.canonical_url)
end
end
context "A Tumblr post with new image URLs" do
should "return the correct image url" do
image_url = "https://64.media.tumblr.com/3dfdab77d913ad1ea59f22407d6ac6f3/b1764aa0f9c378d0-23/s1280x1920/46f4af7ec94456f8fef380ee6311eb81178ce7e9.jpg"
page_url = "https://make-do5.tumblr.com/post/619663949657423872"
strategy = Sources::Strategies.find(image_url, page_url)
assert_match(%r{/3dfdab77d913ad1ea59f22407d6ac6f3/b1764aa0f9c378d0-23/s\d+x\d+/}i, image_url)
assert_equal(page_url, strategy.canonical_url)
assert_downloaded(7_428_704, strategy.image_urls.sole)
end
end
context "A deleted tumblr post" do
should "extract the info from the url" do
site = Sources::Strategies.find("http://shimetsukage.tumblr.com/post/176805588268/20180809-ssb-coolboy")
assert_nothing_raised { site.to_h }
assert_equal("shimetsukage", site.artist_name)
assert_equal("https://shimetsukage.tumblr.com", site.profile_url)
assert_equal("https://shimetsukage.tumblr.com/post/176805588268", site.page_url)
assert_equal("https://shimetsukage.tumblr.com/post/176805588268", site.canonical_url)
assert_equal([], site.image_urls)
assert_equal([], site.tags)
end
end
context "A download for a 'http://*.media.tumblr.com/$hash/tumblr_$id_$size.png' image" do
should "find the largest image" do
%w[100 250 400 500 500h 540 640 1280].each do |size|
page = "https://natsuki-teru.tumblr.com/post/178728919271"
image = "https://66.media.tumblr.com/b9395771b2d0435fe4efee926a5a7d9c/tumblr_pg2wu1L9DM1trd056o2_#{size}.png"
full = "https://media.tumblr.com/b9395771b2d0435fe4efee926a5a7d9c/tumblr_pg2wu1L9DM1trd056o2_1280.png"
site = Sources::Strategies.find(image, page)
assert_equal([full], site.image_urls)
end
end
end
context "normalizing for source" do
should "normalize correctly" do
source1 = "https://octrain1020.tumblr.com/post/190713122589"
source2 = "https://octrain1020.tumblr.com/image/190713122589"
source3 = "https://octrain1020.tumblr.com/image/190713122589#asd"
source4 = "https://superboin.tumblr.com/post/141169066579/photoset_iframe/superboin/tumblr_o45miiAOts1u6rxu8/500/false"
assert_equal(source1, Sources::Strategies.normalize_source(source1))
assert_equal(source1, Sources::Strategies.normalize_source(source2))
assert_equal(source1, Sources::Strategies.normalize_source(source3))
assert_equal("https://superboin.tumblr.com/post/141169066579", Sources::Strategies.normalize_source(source4))
end
should "avoid normalizing unnormalizable urls" do
bad_source = "https://octrain1020.tumblr.com/"
assert_equal(bad_source, Sources::Strategies.normalize_source(bad_source))
end
end
end
end