sources: remove preview_urls method from base strategy.

Remove the `preview_urls` method from strategies. The only place this was used was
when doing IQDB searches, to download the thumbnail image from the source instead of
the full image.

This wasn't worth it for a few reasons:

* Thumbnails on other sites are sometimes not the size we want, which could affect
  IQDB results.
* Grabbing thumbnails is complex for some sites. You can't always just rewrite the
  image URL. Sometimes it requires extra API calls, which can be slower than just
  grabbing the full image.
* For videos and animations, thumbnails from other sites don't always match our
  thumbnails. We do smart thumbnail generation to try to avoid blank thumbnails, which
  means we don't always pick the first frame, which could affect IQDB results.

API changes:

* /iqdb_queries?search[file_url] now downloads the URL as is without any modification.
  Before it tried to change thumbnail and sample size image URLs to the full version.

* /iqdb_queries?search[url] now returns an error if the URL is for a HTML page that
  contains multiple images. Before it would grab only the first image and silently
  ignore the rest.
This commit is contained in:
evazion
2022-03-11 02:54:26 -06:00
parent 2f61486ac6
commit b4aea72d04
18 changed files with 12 additions and 171 deletions

View File

@@ -11,10 +11,6 @@ module Sources
assert_equal(["https://cdn.artstation.com/p/assets/images/images/000/705/368/4k/jey-rain-one1.jpg?1443931773"], @site.image_urls)
end
should "get the preview url" do
assert_equal("https://cdn.artstation.com/p/assets/images/images/000/705/368/small/jey-rain-one1.jpg", @site.preview_url.sub(/\?\d+/, ""))
end
should "get the canonical url" do
assert_equal("https://jeyrain.artstation.com/projects/04XA4", @site.canonical_url)
end
@@ -47,11 +43,6 @@ module Sources
assert_equal([url], @site.image_urls)
end
should "get the preview url" do
url = "https://cdn.artstation.com/p/assets/images/images/006/066/534/small/yinan-cui-reika.jpg?1495781565"
assert_equal(url, @site.preview_url)
end
should "get the canonical url" do
assert_equal("https://dantewontdie.artstation.com/projects/YZK5q", @site.canonical_url)
end
@@ -85,11 +76,6 @@ module Sources
assert_equal([url], @site.image_urls)
end
should "get the preview url" do
url = "https://cdn.artstation.com/p/assets/images/images/000/144/922/small/cassio-yoshiyaki-cody2backup2-yoshiyaki.jpg?1406314198"
assert_equal(url, @site.preview_url)
end
should "get the tags" do
assert_equal(["Street Fighter", "Cody", "SF"].sort, @site.tags.map(&:first).sort)
assert_equal(["street_fighter", "cody", "sf"].sort, @site.normalized_tags.sort)
@@ -146,7 +132,6 @@ module Sources
site = Sources::Strategies.find(url)
assert_equal(["https://cdn.artstation.com/p/assets/covers/images/007/262/828/original/monica-kyrie-1.jpg?1504865060"], site.image_urls)
assert_equal("https://cdn.artstation.com/p/assets/covers/images/007/262/828/small/monica-kyrie-1.jpg?1504865060", site.preview_url)
end
end

View File

@@ -2,14 +2,12 @@ require "test_helper"
module Sources
class MoebooruTest < ActiveSupport::TestCase
def assert_source_data_equals(url, referer = nil, site_name: nil, image_url: nil, page_url: nil, preview_url: nil, size: nil, tags: [], profile_url: nil, **params)
def assert_source_data_equals(url, referer = nil, site_name: nil, image_url: nil, page_url: nil, size: nil, tags: [], profile_url: nil, **params)
site = Sources::Strategies.find(url)
assert_equal(site_name, site.site_name)
assert_equal([image_url], site.image_urls)
assert_equal(image_url, site.canonical_url)
assert_equal(preview_url, site.preview_url)
assert_equal([preview_url], site.preview_urls)
assert_equal(page_url, site.page_url) if page_url.present?
assert_equal(tags.sort, site.tags.map(&:first).sort)
assert_equal(profile_url.to_s, site.profile_url.to_s)
@@ -26,25 +24,15 @@ module Sources
end
end
context "A 'https://files.yande.re/preview/:hh/:hh/:file.jpg' preview url" do
should "return a non-empty list of preview_urls" do
url = "https://files.yande.re/data/preview/7c/d1/7cd124fc28203233cce3bade26651d43.jpg"
site = Sources::Strategies.find(url)
assert_equal([url], site.preview_urls)
end
end
context "Fetching data for an active yande.re .jpg post" do
should "work" do
@prev = "https://files.yande.re/data/preview/7e/cf/7ecfdead705d7b956b26b1d37b98d089.jpg"
@samp = "https://files.yande.re/sample/7ecfdead705d7b956b26b1d37b98d089/yande.re%20482880%20sample%20bayashiko%20journey_to_the_west%20sun_wukong.jpg"
@full = "https://files.yande.re/image/7ecfdead705d7b956b26b1d37b98d089/yande.re%20482880.jpg"
@page = "https://yande.re/post/show/482880"
@tags = ["bayashiko", "journey_to_the_west", "sun_wukong"]
@size = 362_554
@profile_url = "https://twitter.com/apononori"
@data = { site_name: "Yande.re", preview_url: @prev, image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: @profile_url }
@data = { site_name: "Yande.re", image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: @profile_url }
assert_source_data_equals(@samp, **@data)
assert_source_data_equals(@full, **@data)
@@ -54,14 +42,13 @@ module Sources
context "Fetching data for a deleted yande.re .png post with the post id" do
should "work" do
@prev = "https://files.yande.re/data/preview/fb/27/fb27a7ea6c48b2ef76fe915e378b9098.jpg"
@samp = "https://files.yande.re/sample/fb27a7ea6c48b2ef76fe915e378b9098/yande.re%20398018%20detexted%20misaki_kurehito%20saenai_heroine_no_sodatekata%20sawamura_spencer_eriri%20thighhighs.jpg"
@jpeg = "https://files.yande.re/sample/fb27a7ea6c48b2ef76fe915e378b9098/yande.re%20398018%20detexted%20misaki_kurehito%20saenai_heroine_no_sodatekata%20sawamura_spencer_eriri%20thighhighs.jpg"
@full = "https://files.yande.re/image/fb27a7ea6c48b2ef76fe915e378b9098/yande.re%20398018.png"
@page = "https://yande.re/post/show/398018"
@tags = ["misaki_kurehito", "saenai_heroine_no_sodatekata", "sawamura_spencer_eriri", "detexted", "thighhighs"]
@size = 9_118_998
@data = { site_name: "Yande.re", preview_url: @prev, image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: nil }
@data = { site_name: "Yande.re", image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: nil }
assert_source_data_equals(@samp, **@data)
assert_source_data_equals(@jpeg, **@data)
@@ -72,13 +59,12 @@ module Sources
context "Fetching data for a deleted yande.re .png post without the post id" do
should "work" do
@prev = "https://files.yande.re/data/preview/fb/27/fb27a7ea6c48b2ef76fe915e378b9098.jpg"
@samp = "https://files.yande.re/sample/fb27a7ea6c48b2ef76fe915e378b9098.jpg"
@jpeg = "https://files.yande.re/jpeg/fb27a7ea6c48b2ef76fe915e378b9098.jpg"
@full = "https://files.yande.re/image/fb27a7ea6c48b2ef76fe915e378b9098.png"
@tags = []
@size = 9_118_998
@data = { site_name: "Yande.re", preview_url: @prev, image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: nil }
@data = { site_name: "Yande.re", image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: nil }
assert_source_data_equals(@samp, **@data)
assert_source_data_equals(@jpeg, **@data)
@@ -90,7 +76,6 @@ module Sources
context "Konachan.com:" do
context "Fetching data for an active konachan.com .png post" do
should "work" do
@prev = "https://konachan.com/data/preview/ca/12/ca12cdb79a66d242e95a6f958341bf05.jpg"
@samp = "https://konachan.com/sample/ca12cdb79a66d242e95a6f958341bf05/Konachan.com%20-%20270916%20sample.jpg"
@jpeg = "https://konachan.com/jpeg/ca12cdb79a66d242e95a6f958341bf05/Konachan.com%20-%20270916%20anthropomorphism%20bed%20blonde_hair%20bow%20brown_eyes%20doll%20girls_frontline%20hara_shoutarou%20hoodie%20long_hair%20pantyhose%20scar%20skirt%20twintails.jpg"
@full = "https://konachan.com/image/ca12cdb79a66d242e95a6f958341bf05/Konachan.com%20-%20270916.png"
@@ -103,7 +88,7 @@ module Sources
]
@profile_url = "https://www.pixiv.net/users/22528152"
@data = { site_name: "Konachan", preview_url: @prev, image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: @profile_url }
@data = { site_name: "Konachan", image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: @profile_url }
assert_source_data_equals(@samp, **@data)
assert_source_data_equals(@jpeg, **@data)
assert_source_data_equals(@full, **@data)

View File

@@ -55,12 +55,6 @@ module Sources
assert_equal("https://nijie.info/view.php?id=213043", @site.canonical_url)
end
should "get the preview url" do
assert_equal("https://pic.nijie.net/07/nijie/17/95/728995/illust/0_0_403fdd541191110c_c25585.jpg", @site.preview_url)
assert_equal([@site.preview_url], @site.preview_urls)
assert_downloaded(132_555, @site.preview_url)
end
should "get the profile" do
assert_equal("https://nijie.info/members.php?id=728995", @site.profile_url)
end
@@ -126,11 +120,6 @@ module Sources
assert_equal(["https://pic.nijie.net/03/nijie_picture/728995_20170505014820_0.jpg"], @site.image_urls)
end
should "get the preview urls" do
assert_equal("https://pic.nijie.net/03/__rs_l170x170/nijie_picture/728995_20170505014820_0.jpg", @site.preview_url)
assert_equal(["https://pic.nijie.net/03/__rs_l170x170/nijie_picture/728995_20170505014820_0.jpg"], @site.preview_urls)
end
should "get the canonical url" do
assert_equal("https://nijie.info/view.php?id=213043", @site.canonical_url)
end
@@ -153,11 +142,6 @@ module Sources
assert_equal(["https://pic.nijie.net/07/nijie/17/95/728995/illust/0_0_403fdd541191110c_c25585.jpg"], @site.image_urls)
end
should "get the preview urls" do
assert_equal("https://pic.nijie.net/07/nijie/17/95/728995/illust/0_0_403fdd541191110c_c25585.jpg", @site.preview_url)
assert_equal([@site.preview_url], @site.preview_urls)
end
should "get the canonical url" do
assert_equal("https://nijie.info/view.php?id=213043", @site.canonical_url)
end
@@ -222,7 +206,6 @@ module Sources
assert_equal("https://nijie.info/members.php?id=236014", site.profile_url)
assert_equal("名無しのチンポップ", site.artist_name)
assert_equal([site.url], site.image_urls)
assert_equal(["https://pic.nijie.net/03/__rs_l170x170/nijie_picture/diff/main/218856_4_236014_20170620101333.png"], site.preview_urls)
end
end
@@ -272,7 +255,6 @@ module Sources
assert_nothing_raised { site.to_h }
assert_equal("https://nijie.info/members.php?id=196201", site.profile_url)
assert_equal([site.url], site.image_urls)
assert_equal(1, site.preview_urls.size)
end
end

View File

@@ -70,10 +70,6 @@ module Sources
assert_equal(["https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_1280.png"], @site.image_urls)
end
should "get the preview url" do
assert_equal("https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_250.png", @site.preview_url)
end
should "get the canonical url" do
assert_equal("https://noizave.tumblr.com/post/162206271767", @site.canonical_url)
end

View File

@@ -30,7 +30,6 @@ module Sources
should "get the correct urls" do
@site = Sources::Strategies.find("https://twitter.com/CincinnatiZoo/status/859073537713328129")
assert_equal(["https://video.twimg.com/ext_tw_video/859073467769126913/pu/vid/1280x720/cPGgVROXHy3yrK6u.mp4"], @site.image_urls)
assert_equal(["https://pbs.twimg.com/ext_tw_video_thumb/859073467769126913/pu/img/VKHGdXPsqKASBTvm.jpg:small"], @site.preview_urls)
assert_equal("https://twitter.com/CincinnatiZoo/status/859073537713328129", @site.canonical_url)
end
@@ -61,10 +60,6 @@ module Sources
should "get the image url" do
assert_equal(["https://video.twimg.com/tweet_video/EWHWVrmVcAAp4Vw.mp4"], @site.image_urls)
end
should "get the preview urls" do
assert_equal(["https://pbs.twimg.com/tweet_video_thumb/EWHWVrmVcAAp4Vw.jpg:small"], @site.preview_urls)
end
end
context "A twitter summary card from twitter with a :large image" do
@@ -76,10 +71,6 @@ module Sources
assert_equal(["https://pbs.twimg.com/media/C1kt72yVEAEGpOv.jpg:orig"], @site.image_urls)
end
should "get the preview url" do
assert_equal("https://pbs.twimg.com/media/C1kt72yVEAEGpOv.jpg:small", @site.preview_url)
end
should "get the canonical url" do
assert_equal("https://twitter.com/aranobu/status/817736083567820800", @site.canonical_url)
end
@@ -92,7 +83,6 @@ module Sources
should "get the urls" do
assert_equal(["https://pbs.twimg.com/media/B7jfc1JCcAEyeJh.png:orig"], @site.image_urls)
assert_equal("https://pbs.twimg.com/media/B7jfc1JCcAEyeJh.png:small", @site.preview_url)
assert_equal("https://twitter.com/Strangestone/status/556440271961858051", @site.page_url)
assert_equal("https://twitter.com/Strangestone/status/556440271961858051", @site.canonical_url)
end
@@ -127,7 +117,6 @@ module Sources
should "get the image urls" do
assert_equal(["https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:orig"], @site.image_urls)
assert_equal("https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:small", @site.preview_url)
end
should "get the canonical url" do
@@ -158,7 +147,6 @@ module Sources
assert_equal("nounproject", @site.tag_name)
assert_equal("Noun Project", @site.artist_name)
assert_equal(["https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:orig"], @site.image_urls)
assert_equal("https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:small", @site.preview_url)
end
end
@@ -169,8 +157,6 @@ module Sources
should "work" do
assert_equal(["https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:orig"], @site.image_urls)
assert_equal("https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:small", @site.preview_url)
assert_equal(["https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:small"], @site.preview_urls)
assert(@site.artist_name.blank?)
assert(@site.profile_url.blank?)
assert(@site.artists.empty?)
@@ -188,8 +174,6 @@ module Sources
should "work" do
assert_equal(["https://pbs.twimg.com/media/EBGp2YdUYAA19Uj.jpg:orig"], @site.image_urls)
assert_equal("https://pbs.twimg.com/media/EBGp2YdUYAA19Uj.jpg:small", @site.preview_url)
assert_equal(["https://pbs.twimg.com/media/EBGp2YdUYAA19Uj.jpg:small"], @site.preview_urls)
assert_equal("https://pbs.twimg.com/media/EBGp2YdUYAA19Uj.jpg:orig", @site.canonical_url)
end