artists: clean up artist finding logic.

Rename Artist#find_all_by_url to url_matches and drop previous
url_matches method, along with find_artists and search_for_profile.

Previously find_artists tried to lookup the url, referer url, and profile
url in turn until an artist match was found. This was wasteful, because
the source strategy already knows which url to lookup (usually the profile
url). If that url doesn't find a match, then the artist doesn't exist.
This commit is contained in:
evazion
2018-09-08 16:03:10 -05:00
parent 10ca4dd3ad
commit 583f8457f0
4 changed files with 14 additions and 57 deletions

View File

@@ -129,6 +129,8 @@ module Sources
normalize_for_artist_finder.present? normalize_for_artist_finder.present?
end end
# The url to use for artist finding purposes. This will be stored in the
# artist entry. Normally this will be the profile url.
def normalize_for_artist_finder def normalize_for_artist_finder
profile_url || url profile_url || url
end end
@@ -139,8 +141,7 @@ module Sources
end end
def artists def artists
url = profile_url.presence || image_url.presence Artist.url_matches(normalize_for_artist_finder)
Artist.find_artists(url)
end end
def file_url def file_url

View File

@@ -66,14 +66,6 @@ module Sources::Strategies
"" ""
end end
def artists
if profile_url.present?
Artist.find_artists(profile_url)
else
[]
end
end
def artist_name def artist_name
return "" if api_response.blank? return "" if api_response.blank?
api_response.attrs[:user][:screen_name] api_response.attrs[:user][:screen_name]

View File

@@ -148,7 +148,7 @@ class Artist < ApplicationRecord
%r!\Ahttps?://(?:[a-zA-Z0-9_-]+\.)*#{domain}/\z!i %r!\Ahttps?://(?:[a-zA-Z0-9_-]+\.)*#{domain}/\z!i
end) end)
def find_all_by_url(url) def url_matches(url)
url = ArtistUrl.normalize(url) url = ArtistUrl.normalize(url)
artists = [] artists = []
@@ -163,7 +163,7 @@ class Artist < ApplicationRecord
break if url =~ SITE_BLACKLIST_REGEXP break if url =~ SITE_BLACKLIST_REGEXP
end end
artists.inject({}) {|h, x| h[x.name] = x; h}.values.slice(0, 20) where(id: artists.uniq(&:name).take(20))
end end
end end
@@ -456,40 +456,6 @@ class Artist < ApplicationRecord
end end
module SearchMethods module SearchMethods
def find_artists(url, referer_url = nil)
artists = url_matches(url).order("id desc").limit(10)
if artists.empty? && referer_url.present? && referer_url != url
artists = url_matches(referer_url).order("id desc").limit(20)
end
artists
rescue PixivApiClient::Error => e
[]
end
def url_matches(string)
matches = find_all_by_url(string).map(&:id)
if matches.any?
where("id in (?)", matches)
elsif matches = search_for_profile(string)
where("id in (?)", matches)
else
where("false")
end
end
def search_for_profile(url)
source = Sources::Strategies.find(url)
find_all_by_url(source.profile_url)
rescue Net::OpenTimeout, PixivApiClient::Error
raise if Rails.env.test?
nil
rescue Exception
nil
end
def other_names_match(string) def other_names_match(string)
if string =~ /\*/ && CurrentUser.is_builder? if string =~ /\*/ && CurrentUser.is_builder?
where("artists.other_names ILIKE ? ESCAPE E'\\\\'", string.to_escaped_for_sql_like) where("artists.other_names ILIKE ? ESCAPE E'\\\\'", string.to_escaped_for_sql_like)

View File

@@ -154,9 +154,7 @@ class ArtistTest < ActiveSupport::TestCase
should "resolve ambiguous urls" do should "resolve ambiguous urls" do
bobross = FactoryBot.create(:artist, :name => "bob_ross", :url_string => "http://artists.com/bobross/image.jpg") bobross = FactoryBot.create(:artist, :name => "bob_ross", :url_string => "http://artists.com/bobross/image.jpg")
bob = FactoryBot.create(:artist, :name => "bob", :url_string => "http://artists.com/bob/image.jpg") bob = FactoryBot.create(:artist, :name => "bob", :url_string => "http://artists.com/bob/image.jpg")
matches = Artist.find_all_by_url("http://artists.com/bob/test.jpg") assert_artist_found("bob", "http://artists.com/bob/test.jpg")
assert_equal(1, matches.size)
assert_equal("bob", matches.first.name)
end end
should "parse urls" do should "parse urls" do
@@ -190,7 +188,7 @@ class ArtistTest < ActiveSupport::TestCase
should "ignore pixiv.net/ and pixiv.net/img/ url matches" do should "ignore pixiv.net/ and pixiv.net/img/ url matches" do
a1 = FactoryBot.create(:artist, :name => "yomosaka", :url_string => "http://i2.pixiv.net/img18/img/evazion/14901720.png") a1 = FactoryBot.create(:artist, :name => "yomosaka", :url_string => "http://i2.pixiv.net/img18/img/evazion/14901720.png")
a2 = FactoryBot.create(:artist, :name => "niwatazumi_bf", :url_string => "http://i2.pixiv.net/img18/img/evazion/14901720_big_p0.png") a2 = FactoryBot.create(:artist, :name => "niwatazumi_bf", :url_string => "http://i2.pixiv.net/img18/img/evazion/14901720_big_p0.png")
assert_equal([], Artist.find_all_by_url("http://i2.pixiv.net/img28/img/kyang692/35563903.jpg")) assert_artist_not_found("http://i2.pixiv.net/img28/img/kyang692/35563903.jpg")
end end
should "find matches by url" do should "find matches by url" do
@@ -198,11 +196,11 @@ class ArtistTest < ActiveSupport::TestCase
a2 = FactoryBot.create(:artist, :name => "subway", :url_string => "http://subway.com/x/test.jpg") a2 = FactoryBot.create(:artist, :name => "subway", :url_string => "http://subway.com/x/test.jpg")
a3 = FactoryBot.create(:artist, :name => "minko", :url_string => "https://minko.com/x/test.jpg") a3 = FactoryBot.create(:artist, :name => "minko", :url_string => "https://minko.com/x/test.jpg")
assert_equal(["rembrandt"], Artist.find_all_by_url("http://rembrandt.com/x/test.jpg").map(&:name)) assert_artist_found("rembrandt", "http://rembrandt.com/x/test.jpg")
assert_equal(["rembrandt"], Artist.find_all_by_url("http://rembrandt.com/x/another.jpg").map(&:name)) assert_artist_found("rembrandt", "http://rembrandt.com/x/another.jpg")
assert_equal([], Artist.find_all_by_url("http://nonexistent.com/test.jpg").map(&:name)) assert_artist_not_found("http://nonexistent.com/test.jpg")
assert_equal(["minko"], Artist.find_all_by_url("https://minko.com/x/test.jpg").map(&:name)) assert_artist_found("minko", "https://minko.com/x/test.jpg")
assert_equal(["minko"], Artist.find_all_by_url("http://minko.com/x/test.jpg").map(&:name)) assert_artist_found("minko", "http://minko.com/x/test.jpg")
end end
should "be case-insensitive to domains when finding matches by url" do should "be case-insensitive to domains when finding matches by url" do
@@ -212,7 +210,7 @@ class ArtistTest < ActiveSupport::TestCase
should "not find duplicates" do should "not find duplicates" do
FactoryBot.create(:artist, :name => "warhol", :url_string => "http://warhol.com/x/a/image.jpg\nhttp://warhol.com/x/b/image.jpg") FactoryBot.create(:artist, :name => "warhol", :url_string => "http://warhol.com/x/a/image.jpg\nhttp://warhol.com/x/b/image.jpg")
assert_equal(["warhol"], Artist.find_all_by_url("http://warhol.com/x/test.jpg").map(&:name)) assert_artist_found("warhol", "http://warhol.com/x/test.jpg")
end end
should "not include duplicate urls" do should "not include duplicate urls" do
@@ -222,7 +220,7 @@ class ArtistTest < ActiveSupport::TestCase
should "hide deleted artists" do should "hide deleted artists" do
FactoryBot.create(:artist, :name => "warhol", :url_string => "http://warhol.com/a/image.jpg", :is_active => false) FactoryBot.create(:artist, :name => "warhol", :url_string => "http://warhol.com/a/image.jpg", :is_active => false)
assert_equal([], Artist.find_all_by_url("http://warhol.com/a/image.jpg").map(&:name)) assert_artist_not_found("http://warhol.com/a/image.jpg")
end end
context "when finding deviantart artists" do context "when finding deviantart artists" do