artists: clean up artist finding logic.

Rename Artist#find_all_by_url to url_matches and drop previous
url_matches method, along with find_artists and search_for_profile.

Previously find_artists tried to lookup the url, referer url, and profile
url in turn until an artist match was found. This was wasteful, because
the source strategy already knows which url to lookup (usually the profile
url). If that url doesn't find a match, then the artist doesn't exist.
This commit is contained in:
evazion
2018-09-08 16:03:10 -05:00
parent 10ca4dd3ad
commit 583f8457f0
4 changed files with 14 additions and 57 deletions

View File

@@ -129,6 +129,8 @@ module Sources
normalize_for_artist_finder.present?
end
# The url to use for artist finding purposes. This will be stored in the
# artist entry. Normally this will be the profile url.
def normalize_for_artist_finder
profile_url || url
end
@@ -139,8 +141,7 @@ module Sources
end
def artists
url = profile_url.presence || image_url.presence
Artist.find_artists(url)
Artist.url_matches(normalize_for_artist_finder)
end
def file_url

View File

@@ -66,14 +66,6 @@ module Sources::Strategies
""
end
def artists
if profile_url.present?
Artist.find_artists(profile_url)
else
[]
end
end
def artist_name
return "" if api_response.blank?
api_response.attrs[:user][:screen_name]

View File

@@ -148,7 +148,7 @@ class Artist < ApplicationRecord
%r!\Ahttps?://(?:[a-zA-Z0-9_-]+\.)*#{domain}/\z!i
end)
def find_all_by_url(url)
def url_matches(url)
url = ArtistUrl.normalize(url)
artists = []
@@ -163,7 +163,7 @@ class Artist < ApplicationRecord
break if url =~ SITE_BLACKLIST_REGEXP
end
artists.inject({}) {|h, x| h[x.name] = x; h}.values.slice(0, 20)
where(id: artists.uniq(&:name).take(20))
end
end
@@ -456,40 +456,6 @@ class Artist < ApplicationRecord
end
module SearchMethods
def find_artists(url, referer_url = nil)
artists = url_matches(url).order("id desc").limit(10)
if artists.empty? && referer_url.present? && referer_url != url
artists = url_matches(referer_url).order("id desc").limit(20)
end
artists
rescue PixivApiClient::Error => e
[]
end
def url_matches(string)
matches = find_all_by_url(string).map(&:id)
if matches.any?
where("id in (?)", matches)
elsif matches = search_for_profile(string)
where("id in (?)", matches)
else
where("false")
end
end
def search_for_profile(url)
source = Sources::Strategies.find(url)
find_all_by_url(source.profile_url)
rescue Net::OpenTimeout, PixivApiClient::Error
raise if Rails.env.test?
nil
rescue Exception
nil
end
def other_names_match(string)
if string =~ /\*/ && CurrentUser.is_builder?
where("artists.other_names ILIKE ? ESCAPE E'\\\\'", string.to_escaped_for_sql_like)

View File

@@ -154,9 +154,7 @@ class ArtistTest < ActiveSupport::TestCase
should "resolve ambiguous urls" do
bobross = FactoryBot.create(:artist, :name => "bob_ross", :url_string => "http://artists.com/bobross/image.jpg")
bob = FactoryBot.create(:artist, :name => "bob", :url_string => "http://artists.com/bob/image.jpg")
matches = Artist.find_all_by_url("http://artists.com/bob/test.jpg")
assert_equal(1, matches.size)
assert_equal("bob", matches.first.name)
assert_artist_found("bob", "http://artists.com/bob/test.jpg")
end
should "parse urls" do
@@ -190,7 +188,7 @@ class ArtistTest < ActiveSupport::TestCase
should "ignore pixiv.net/ and pixiv.net/img/ url matches" do
a1 = FactoryBot.create(:artist, :name => "yomosaka", :url_string => "http://i2.pixiv.net/img18/img/evazion/14901720.png")
a2 = FactoryBot.create(:artist, :name => "niwatazumi_bf", :url_string => "http://i2.pixiv.net/img18/img/evazion/14901720_big_p0.png")
assert_equal([], Artist.find_all_by_url("http://i2.pixiv.net/img28/img/kyang692/35563903.jpg"))
assert_artist_not_found("http://i2.pixiv.net/img28/img/kyang692/35563903.jpg")
end
should "find matches by url" do
@@ -198,11 +196,11 @@ class ArtistTest < ActiveSupport::TestCase
a2 = FactoryBot.create(:artist, :name => "subway", :url_string => "http://subway.com/x/test.jpg")
a3 = FactoryBot.create(:artist, :name => "minko", :url_string => "https://minko.com/x/test.jpg")
assert_equal(["rembrandt"], Artist.find_all_by_url("http://rembrandt.com/x/test.jpg").map(&:name))
assert_equal(["rembrandt"], Artist.find_all_by_url("http://rembrandt.com/x/another.jpg").map(&:name))
assert_equal([], Artist.find_all_by_url("http://nonexistent.com/test.jpg").map(&:name))
assert_equal(["minko"], Artist.find_all_by_url("https://minko.com/x/test.jpg").map(&:name))
assert_equal(["minko"], Artist.find_all_by_url("http://minko.com/x/test.jpg").map(&:name))
assert_artist_found("rembrandt", "http://rembrandt.com/x/test.jpg")
assert_artist_found("rembrandt", "http://rembrandt.com/x/another.jpg")
assert_artist_not_found("http://nonexistent.com/test.jpg")
assert_artist_found("minko", "https://minko.com/x/test.jpg")
assert_artist_found("minko", "http://minko.com/x/test.jpg")
end
should "be case-insensitive to domains when finding matches by url" do
@@ -212,7 +210,7 @@ class ArtistTest < ActiveSupport::TestCase
should "not find duplicates" do
FactoryBot.create(:artist, :name => "warhol", :url_string => "http://warhol.com/x/a/image.jpg\nhttp://warhol.com/x/b/image.jpg")
assert_equal(["warhol"], Artist.find_all_by_url("http://warhol.com/x/test.jpg").map(&:name))
assert_artist_found("warhol", "http://warhol.com/x/test.jpg")
end
should "not include duplicate urls" do
@@ -222,7 +220,7 @@ class ArtistTest < ActiveSupport::TestCase
should "hide deleted artists" do
FactoryBot.create(:artist, :name => "warhol", :url_string => "http://warhol.com/a/image.jpg", :is_active => false)
assert_equal([], Artist.find_all_by_url("http://warhol.com/a/image.jpg").map(&:name))
assert_artist_not_found("http://warhol.com/a/image.jpg")
end
context "when finding deviantart artists" do