sources: remove preview_urls method from base strategy.

Remove the `preview_urls` method from strategies. The only place this was used was
when doing IQDB searches, to download the thumbnail image from the source instead of
the full image.

This wasn't worth it for a few reasons:

* Thumbnails on other sites are sometimes not the size we want, which could affect
  IQDB results.
* Grabbing thumbnails is complex for some sites. You can't always just rewrite the
  image URL. Sometimes it requires extra API calls, which can be slower than just
  grabbing the full image.
* For videos and animations, thumbnails from other sites don't always match our
  thumbnails. We do smart thumbnail generation to try to avoid blank thumbnails, which
  means we don't always pick the first frame, which could affect IQDB results.

API changes:

* /iqdb_queries?search[file_url] now downloads the URL as is without any modification.
  Before it tried to change thumbnail and sample size image URLs to the full version.

* /iqdb_queries?search[url] now returns an error if the URL is for a HTML page that
  contains multiple images. Before it would grab only the first image and silently
  ignore the rest.
This commit is contained in:
evazion
2022-03-11 02:54:26 -06:00
parent 2f61486ac6
commit b4aea72d04
18 changed files with 12 additions and 171 deletions

View File

@@ -30,11 +30,15 @@ class IqdbClient
if file.present?
file = file.tempfile
elsif url.present?
file = download(url, :preview_url)
strategy = Sources::Strategies.find(url)
raise Error, "Can't do reverse image search: #{url} has multiple images. Enter the URL of a single image." if strategy.image_urls.size > 1
download_url = strategy.image_urls.first
file = Sources::Strategies.find(download_url).download_file!(download_url)
elsif image_url.present?
file = download(image_url, :url)
file = Sources::Strategies.find(image_url).download_file!(image_url)
elsif file_url.present?
file = download(file_url, :image_url)
file = Sources::Strategies.find(file_url).download_file!(file_url)
elsif post_id.present?
file = Post.find(post_id).file(:preview)
elsif media_asset_id.present?
@@ -54,17 +58,6 @@ class IqdbClient
file.try(:close)
end
# Download an URL to a file.
# @param url [String] the URL to download
# @param type [Symbol] the type of URL to download (:preview_url or full :image_url)
# @return [MediaFile] the downloaded file
def download(url, type)
strategy = Sources::Strategies.find(url)
download_url = strategy.send(type)
file = strategy.download_file!(download_url)
file
end
# Transform the JSON returned by IQDB to add the full post data for each
# match.
# @param matches [Array<Hash>] the array of IQDB matches

View File

@@ -85,15 +85,6 @@ class Source::URL::Moebooru < Source::URL
end
end
def self.preview_image_url(site_name, md5)
case site_name
when "Yande.re"
"https://files.yande.re/data/preview/#{md5[0..1]}/#{md5[2..3]}/#{md5}.jpg"
when "Konachan"
"https://konachan.com/data/preview/#{md5[0..1]}/#{md5[2..3]}/#{md5}.jpg"
end
end
def self.full_image_url(site_name, md5, file_ext, post_id = nil)
case site_name
when "Yande.re"

View File

@@ -90,10 +90,6 @@ class Source::URL::Nijie < Source::URL
subdomain.to_s.starts_with?("pic")
end
def preview_image_url
to_s.gsub(/nijie_picture/, "__rs_l170x170/nijie_picture") if image_url?
end
def full_image_url
to_s.remove(%r{__rs_\w+/}i).gsub("http:", "https:") if image_url?
end

View File

@@ -15,10 +15,6 @@ module Sources::Strategies
@image_urls ||= image_urls_sub.map { |asset| asset_url(asset, :largest) }
end
def preview_urls
@preview_urls ||= image_urls_sub.map { |asset| asset_url(asset, :smallest) }
end
def page_url
return nil if project_id.blank?

View File

@@ -129,16 +129,6 @@ module Sources
[]
end
# A smaller representation of the image that's suitable for
# displaying previews.
def preview_urls
image_urls
end
def preview_url
preview_urls.first
end
# Whatever <tt>url</tt> is, this method should return a link to the HTML
# page containing the resource. It should not be a binary file. It will
# eventually be assigned as the source for the post, but it does not
@@ -237,10 +227,6 @@ module Sources
)
end
def file_url
image_url
end
def tags
(@tags || []).uniq
end
@@ -310,8 +296,6 @@ module Sources
},
:artists => artists.as_json(include: :sorted_urls),
:image_urls => image_urls,
:preview_url => preview_url,
:preview_urls => preview_urls,
:page_url => page_url,
:canonical_url => canonical_url,
:normalized_for_artist_finder_url => normalize_for_artist_finder,

View File

@@ -22,12 +22,6 @@ module Sources
end
end
def preview_urls
previews = [page&.at("meta[property='og:image']")&.[](:content)].compact
previews.presence || image_urls
end
def page_url
parsed_url.page_url || parsed_referer&.page_url
end

View File

@@ -20,12 +20,6 @@ module Sources
image.to_a.map { |img| URI.join(page_url, img["src"]).to_s }
end
def preview_urls
image_urls.map do
"https://thumbs.hentai-foundry.com/thumb.php?pid=#{illust_id}&size=250"
end
end
def page_url
return nil if illust_id.blank?

View File

@@ -16,11 +16,6 @@ module Sources
[Source::URL::Moebooru.full_image_url(site_name, post_md5, file_ext, post_id)]
end
def preview_urls
return image_urls if post_md5.blank?
[Source::URL::Moebooru.preview_image_url(site_name, post_md5)]
end
def page_url
return nil if post_id.blank?
"https://#{domain}/post/show/#{post_id}"

View File

@@ -39,14 +39,6 @@ module Sources
end
end
def preview_urls
if illust_id.present?
["https://lohas.nicoseiga.jp/thumb/#{illust_id}i"]
else
image_urls
end
end
def page_url
# XXX what if referer isn't from NicoSeiga?
parsed_referer&.page_url || parsed_url.page_url

View File

@@ -35,10 +35,6 @@ module Sources
images.map { |img| Source::URL.parse("https:#{img}").full_image_url }
end
def preview_urls
image_urls.map { |url| Source::URL.parse(url).preview_image_url }
end
def page_url
return nil if illust_id.blank?
"https://nijie.info/view.php?id=#{illust_id}"

View File

@@ -36,12 +36,6 @@ module Sources::Strategies
assets.map { |url| find_largest(url) }
end
def preview_urls
image_urls.map do |x|
x.sub(/_1280\.(jpg|png|gif|jpeg)\z/, '_250.\1')
end
end
def page_url
parsed_url.page_url || parsed_referer&.page_url || post_url_from_image_html&.page_url
end

View File

@@ -51,18 +51,6 @@ module Sources::Strategies
end
end
def preview_urls
if api_response.dig(:extended_entities, :media).present?
api_response.dig(:extended_entities, :media).to_a.map do |media|
media[:media_url_https] + ":small"
end
else
image_urls.map do |url|
url.gsub(/:orig\z/, ":small")
end
end
end
def page_url
return nil if status_id.blank? || tag_name.blank?
"https://twitter.com/#{tag_name}/status/#{status_id}"

View File

@@ -35,10 +35,6 @@ module Sources
end
end
def preview_urls
image_urls.map { |img| img.gsub(%r{.cn/\w+/(\w+)}, '.cn/orj360/\1') }
end
def page_url
return nil unless api_response.present?

View File

@@ -11,10 +11,6 @@ module Sources
assert_equal(["https://cdn.artstation.com/p/assets/images/images/000/705/368/4k/jey-rain-one1.jpg?1443931773"], @site.image_urls)
end
should "get the preview url" do
assert_equal("https://cdn.artstation.com/p/assets/images/images/000/705/368/small/jey-rain-one1.jpg", @site.preview_url.sub(/\?\d+/, ""))
end
should "get the canonical url" do
assert_equal("https://jeyrain.artstation.com/projects/04XA4", @site.canonical_url)
end
@@ -47,11 +43,6 @@ module Sources
assert_equal([url], @site.image_urls)
end
should "get the preview url" do
url = "https://cdn.artstation.com/p/assets/images/images/006/066/534/small/yinan-cui-reika.jpg?1495781565"
assert_equal(url, @site.preview_url)
end
should "get the canonical url" do
assert_equal("https://dantewontdie.artstation.com/projects/YZK5q", @site.canonical_url)
end
@@ -85,11 +76,6 @@ module Sources
assert_equal([url], @site.image_urls)
end
should "get the preview url" do
url = "https://cdn.artstation.com/p/assets/images/images/000/144/922/small/cassio-yoshiyaki-cody2backup2-yoshiyaki.jpg?1406314198"
assert_equal(url, @site.preview_url)
end
should "get the tags" do
assert_equal(["Street Fighter", "Cody", "SF"].sort, @site.tags.map(&:first).sort)
assert_equal(["street_fighter", "cody", "sf"].sort, @site.normalized_tags.sort)
@@ -146,7 +132,6 @@ module Sources
site = Sources::Strategies.find(url)
assert_equal(["https://cdn.artstation.com/p/assets/covers/images/007/262/828/original/monica-kyrie-1.jpg?1504865060"], site.image_urls)
assert_equal("https://cdn.artstation.com/p/assets/covers/images/007/262/828/small/monica-kyrie-1.jpg?1504865060", site.preview_url)
end
end

View File

@@ -2,14 +2,12 @@ require "test_helper"
module Sources
class MoebooruTest < ActiveSupport::TestCase
def assert_source_data_equals(url, referer = nil, site_name: nil, image_url: nil, page_url: nil, preview_url: nil, size: nil, tags: [], profile_url: nil, **params)
def assert_source_data_equals(url, referer = nil, site_name: nil, image_url: nil, page_url: nil, size: nil, tags: [], profile_url: nil, **params)
site = Sources::Strategies.find(url)
assert_equal(site_name, site.site_name)
assert_equal([image_url], site.image_urls)
assert_equal(image_url, site.canonical_url)
assert_equal(preview_url, site.preview_url)
assert_equal([preview_url], site.preview_urls)
assert_equal(page_url, site.page_url) if page_url.present?
assert_equal(tags.sort, site.tags.map(&:first).sort)
assert_equal(profile_url.to_s, site.profile_url.to_s)
@@ -26,25 +24,15 @@ module Sources
end
end
context "A 'https://files.yande.re/preview/:hh/:hh/:file.jpg' preview url" do
should "return a non-empty list of preview_urls" do
url = "https://files.yande.re/data/preview/7c/d1/7cd124fc28203233cce3bade26651d43.jpg"
site = Sources::Strategies.find(url)
assert_equal([url], site.preview_urls)
end
end
context "Fetching data for an active yande.re .jpg post" do
should "work" do
@prev = "https://files.yande.re/data/preview/7e/cf/7ecfdead705d7b956b26b1d37b98d089.jpg"
@samp = "https://files.yande.re/sample/7ecfdead705d7b956b26b1d37b98d089/yande.re%20482880%20sample%20bayashiko%20journey_to_the_west%20sun_wukong.jpg"
@full = "https://files.yande.re/image/7ecfdead705d7b956b26b1d37b98d089/yande.re%20482880.jpg"
@page = "https://yande.re/post/show/482880"
@tags = ["bayashiko", "journey_to_the_west", "sun_wukong"]
@size = 362_554
@profile_url = "https://twitter.com/apononori"
@data = { site_name: "Yande.re", preview_url: @prev, image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: @profile_url }
@data = { site_name: "Yande.re", image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: @profile_url }
assert_source_data_equals(@samp, **@data)
assert_source_data_equals(@full, **@data)
@@ -54,14 +42,13 @@ module Sources
context "Fetching data for a deleted yande.re .png post with the post id" do
should "work" do
@prev = "https://files.yande.re/data/preview/fb/27/fb27a7ea6c48b2ef76fe915e378b9098.jpg"
@samp = "https://files.yande.re/sample/fb27a7ea6c48b2ef76fe915e378b9098/yande.re%20398018%20detexted%20misaki_kurehito%20saenai_heroine_no_sodatekata%20sawamura_spencer_eriri%20thighhighs.jpg"
@jpeg = "https://files.yande.re/sample/fb27a7ea6c48b2ef76fe915e378b9098/yande.re%20398018%20detexted%20misaki_kurehito%20saenai_heroine_no_sodatekata%20sawamura_spencer_eriri%20thighhighs.jpg"
@full = "https://files.yande.re/image/fb27a7ea6c48b2ef76fe915e378b9098/yande.re%20398018.png"
@page = "https://yande.re/post/show/398018"
@tags = ["misaki_kurehito", "saenai_heroine_no_sodatekata", "sawamura_spencer_eriri", "detexted", "thighhighs"]
@size = 9_118_998
@data = { site_name: "Yande.re", preview_url: @prev, image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: nil }
@data = { site_name: "Yande.re", image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: nil }
assert_source_data_equals(@samp, **@data)
assert_source_data_equals(@jpeg, **@data)
@@ -72,13 +59,12 @@ module Sources
context "Fetching data for a deleted yande.re .png post without the post id" do
should "work" do
@prev = "https://files.yande.re/data/preview/fb/27/fb27a7ea6c48b2ef76fe915e378b9098.jpg"
@samp = "https://files.yande.re/sample/fb27a7ea6c48b2ef76fe915e378b9098.jpg"
@jpeg = "https://files.yande.re/jpeg/fb27a7ea6c48b2ef76fe915e378b9098.jpg"
@full = "https://files.yande.re/image/fb27a7ea6c48b2ef76fe915e378b9098.png"
@tags = []
@size = 9_118_998
@data = { site_name: "Yande.re", preview_url: @prev, image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: nil }
@data = { site_name: "Yande.re", image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: nil }
assert_source_data_equals(@samp, **@data)
assert_source_data_equals(@jpeg, **@data)
@@ -90,7 +76,6 @@ module Sources
context "Konachan.com:" do
context "Fetching data for an active konachan.com .png post" do
should "work" do
@prev = "https://konachan.com/data/preview/ca/12/ca12cdb79a66d242e95a6f958341bf05.jpg"
@samp = "https://konachan.com/sample/ca12cdb79a66d242e95a6f958341bf05/Konachan.com%20-%20270916%20sample.jpg"
@jpeg = "https://konachan.com/jpeg/ca12cdb79a66d242e95a6f958341bf05/Konachan.com%20-%20270916%20anthropomorphism%20bed%20blonde_hair%20bow%20brown_eyes%20doll%20girls_frontline%20hara_shoutarou%20hoodie%20long_hair%20pantyhose%20scar%20skirt%20twintails.jpg"
@full = "https://konachan.com/image/ca12cdb79a66d242e95a6f958341bf05/Konachan.com%20-%20270916.png"
@@ -103,7 +88,7 @@ module Sources
]
@profile_url = "https://www.pixiv.net/users/22528152"
@data = { site_name: "Konachan", preview_url: @prev, image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: @profile_url }
@data = { site_name: "Konachan", image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: @profile_url }
assert_source_data_equals(@samp, **@data)
assert_source_data_equals(@jpeg, **@data)
assert_source_data_equals(@full, **@data)

View File

@@ -55,12 +55,6 @@ module Sources
assert_equal("https://nijie.info/view.php?id=213043", @site.canonical_url)
end
should "get the preview url" do
assert_equal("https://pic.nijie.net/07/nijie/17/95/728995/illust/0_0_403fdd541191110c_c25585.jpg", @site.preview_url)
assert_equal([@site.preview_url], @site.preview_urls)
assert_downloaded(132_555, @site.preview_url)
end
should "get the profile" do
assert_equal("https://nijie.info/members.php?id=728995", @site.profile_url)
end
@@ -126,11 +120,6 @@ module Sources
assert_equal(["https://pic.nijie.net/03/nijie_picture/728995_20170505014820_0.jpg"], @site.image_urls)
end
should "get the preview urls" do
assert_equal("https://pic.nijie.net/03/__rs_l170x170/nijie_picture/728995_20170505014820_0.jpg", @site.preview_url)
assert_equal(["https://pic.nijie.net/03/__rs_l170x170/nijie_picture/728995_20170505014820_0.jpg"], @site.preview_urls)
end
should "get the canonical url" do
assert_equal("https://nijie.info/view.php?id=213043", @site.canonical_url)
end
@@ -153,11 +142,6 @@ module Sources
assert_equal(["https://pic.nijie.net/07/nijie/17/95/728995/illust/0_0_403fdd541191110c_c25585.jpg"], @site.image_urls)
end
should "get the preview urls" do
assert_equal("https://pic.nijie.net/07/nijie/17/95/728995/illust/0_0_403fdd541191110c_c25585.jpg", @site.preview_url)
assert_equal([@site.preview_url], @site.preview_urls)
end
should "get the canonical url" do
assert_equal("https://nijie.info/view.php?id=213043", @site.canonical_url)
end
@@ -222,7 +206,6 @@ module Sources
assert_equal("https://nijie.info/members.php?id=236014", site.profile_url)
assert_equal("名無しのチンポップ", site.artist_name)
assert_equal([site.url], site.image_urls)
assert_equal(["https://pic.nijie.net/03/__rs_l170x170/nijie_picture/diff/main/218856_4_236014_20170620101333.png"], site.preview_urls)
end
end
@@ -272,7 +255,6 @@ module Sources
assert_nothing_raised { site.to_h }
assert_equal("https://nijie.info/members.php?id=196201", site.profile_url)
assert_equal([site.url], site.image_urls)
assert_equal(1, site.preview_urls.size)
end
end

View File

@@ -70,10 +70,6 @@ module Sources
assert_equal(["https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_1280.png"], @site.image_urls)
end
should "get the preview url" do
assert_equal("https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_250.png", @site.preview_url)
end
should "get the canonical url" do
assert_equal("https://noizave.tumblr.com/post/162206271767", @site.canonical_url)
end

View File

@@ -30,7 +30,6 @@ module Sources
should "get the correct urls" do
@site = Sources::Strategies.find("https://twitter.com/CincinnatiZoo/status/859073537713328129")
assert_equal(["https://video.twimg.com/ext_tw_video/859073467769126913/pu/vid/1280x720/cPGgVROXHy3yrK6u.mp4"], @site.image_urls)
assert_equal(["https://pbs.twimg.com/ext_tw_video_thumb/859073467769126913/pu/img/VKHGdXPsqKASBTvm.jpg:small"], @site.preview_urls)
assert_equal("https://twitter.com/CincinnatiZoo/status/859073537713328129", @site.canonical_url)
end
@@ -61,10 +60,6 @@ module Sources
should "get the image url" do
assert_equal(["https://video.twimg.com/tweet_video/EWHWVrmVcAAp4Vw.mp4"], @site.image_urls)
end
should "get the preview urls" do
assert_equal(["https://pbs.twimg.com/tweet_video_thumb/EWHWVrmVcAAp4Vw.jpg:small"], @site.preview_urls)
end
end
context "A twitter summary card from twitter with a :large image" do
@@ -76,10 +71,6 @@ module Sources
assert_equal(["https://pbs.twimg.com/media/C1kt72yVEAEGpOv.jpg:orig"], @site.image_urls)
end
should "get the preview url" do
assert_equal("https://pbs.twimg.com/media/C1kt72yVEAEGpOv.jpg:small", @site.preview_url)
end
should "get the canonical url" do
assert_equal("https://twitter.com/aranobu/status/817736083567820800", @site.canonical_url)
end
@@ -92,7 +83,6 @@ module Sources
should "get the urls" do
assert_equal(["https://pbs.twimg.com/media/B7jfc1JCcAEyeJh.png:orig"], @site.image_urls)
assert_equal("https://pbs.twimg.com/media/B7jfc1JCcAEyeJh.png:small", @site.preview_url)
assert_equal("https://twitter.com/Strangestone/status/556440271961858051", @site.page_url)
assert_equal("https://twitter.com/Strangestone/status/556440271961858051", @site.canonical_url)
end
@@ -127,7 +117,6 @@ module Sources
should "get the image urls" do
assert_equal(["https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:orig"], @site.image_urls)
assert_equal("https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:small", @site.preview_url)
end
should "get the canonical url" do
@@ -158,7 +147,6 @@ module Sources
assert_equal("nounproject", @site.tag_name)
assert_equal("Noun Project", @site.artist_name)
assert_equal(["https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:orig"], @site.image_urls)
assert_equal("https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:small", @site.preview_url)
end
end
@@ -169,8 +157,6 @@ module Sources
should "work" do
assert_equal(["https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:orig"], @site.image_urls)
assert_equal("https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:small", @site.preview_url)
assert_equal(["https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:small"], @site.preview_urls)
assert(@site.artist_name.blank?)
assert(@site.profile_url.blank?)
assert(@site.artists.empty?)
@@ -188,8 +174,6 @@ module Sources
should "work" do
assert_equal(["https://pbs.twimg.com/media/EBGp2YdUYAA19Uj.jpg:orig"], @site.image_urls)
assert_equal("https://pbs.twimg.com/media/EBGp2YdUYAA19Uj.jpg:small", @site.preview_url)
assert_equal(["https://pbs.twimg.com/media/EBGp2YdUYAA19Uj.jpg:small"], @site.preview_urls)
assert_equal("https://pbs.twimg.com/media/EBGp2YdUYAA19Uj.jpg:orig", @site.canonical_url)
end