From 583f8457f06ca8b412a0283737adfcf331e7627f Mon Sep 17 00:00:00 2001 From: evazion Date: Sat, 8 Sep 2018 16:03:10 -0500 Subject: [PATCH] artists: clean up artist finding logic. Rename Artist#find_all_by_url to url_matches and drop previous url_matches method, along with find_artists and search_for_profile. Previously find_artists tried to lookup the url, referer url, and profile url in turn until an artist match was found. This was wasteful, because the source strategy already knows which url to lookup (usually the profile url). If that url doesn't find a match, then the artist doesn't exist. --- app/logical/sources/strategies/base.rb | 5 +-- app/logical/sources/strategies/twitter.rb | 8 ----- app/models/artist.rb | 38 ++--------------------- test/unit/artist_test.rb | 20 ++++++------ 4 files changed, 14 insertions(+), 57 deletions(-) diff --git a/app/logical/sources/strategies/base.rb b/app/logical/sources/strategies/base.rb index 2404e2f40..9d87b2273 100644 --- a/app/logical/sources/strategies/base.rb +++ b/app/logical/sources/strategies/base.rb @@ -129,6 +129,8 @@ module Sources normalize_for_artist_finder.present? end + # The url to use for artist finding purposes. This will be stored in the + # artist entry. Normally this will be the profile url. def normalize_for_artist_finder profile_url || url end @@ -139,8 +141,7 @@ module Sources end def artists - url = profile_url.presence || image_url.presence - Artist.find_artists(url) + Artist.url_matches(normalize_for_artist_finder) end def file_url diff --git a/app/logical/sources/strategies/twitter.rb b/app/logical/sources/strategies/twitter.rb index 99f28fba3..b1c83a261 100644 --- a/app/logical/sources/strategies/twitter.rb +++ b/app/logical/sources/strategies/twitter.rb @@ -66,14 +66,6 @@ module Sources::Strategies "" end - def artists - if profile_url.present? - Artist.find_artists(profile_url) - else - [] - end - end - def artist_name return "" if api_response.blank? api_response.attrs[:user][:screen_name] diff --git a/app/models/artist.rb b/app/models/artist.rb index 0ef989b19..3b54ee35d 100644 --- a/app/models/artist.rb +++ b/app/models/artist.rb @@ -148,7 +148,7 @@ class Artist < ApplicationRecord %r!\Ahttps?://(?:[a-zA-Z0-9_-]+\.)*#{domain}/\z!i end) - def find_all_by_url(url) + def url_matches(url) url = ArtistUrl.normalize(url) artists = [] @@ -163,7 +163,7 @@ class Artist < ApplicationRecord break if url =~ SITE_BLACKLIST_REGEXP end - artists.inject({}) {|h, x| h[x.name] = x; h}.values.slice(0, 20) + where(id: artists.uniq(&:name).take(20)) end end @@ -456,40 +456,6 @@ class Artist < ApplicationRecord end module SearchMethods - def find_artists(url, referer_url = nil) - artists = url_matches(url).order("id desc").limit(10) - - if artists.empty? && referer_url.present? && referer_url != url - artists = url_matches(referer_url).order("id desc").limit(20) - end - - artists - rescue PixivApiClient::Error => e - [] - end - - def url_matches(string) - matches = find_all_by_url(string).map(&:id) - - if matches.any? - where("id in (?)", matches) - elsif matches = search_for_profile(string) - where("id in (?)", matches) - else - where("false") - end - end - - def search_for_profile(url) - source = Sources::Strategies.find(url) - find_all_by_url(source.profile_url) - rescue Net::OpenTimeout, PixivApiClient::Error - raise if Rails.env.test? - nil - rescue Exception - nil - end - def other_names_match(string) if string =~ /\*/ && CurrentUser.is_builder? where("artists.other_names ILIKE ? ESCAPE E'\\\\'", string.to_escaped_for_sql_like) diff --git a/test/unit/artist_test.rb b/test/unit/artist_test.rb index 74925eaa4..a6df35c0a 100644 --- a/test/unit/artist_test.rb +++ b/test/unit/artist_test.rb @@ -154,9 +154,7 @@ class ArtistTest < ActiveSupport::TestCase should "resolve ambiguous urls" do bobross = FactoryBot.create(:artist, :name => "bob_ross", :url_string => "http://artists.com/bobross/image.jpg") bob = FactoryBot.create(:artist, :name => "bob", :url_string => "http://artists.com/bob/image.jpg") - matches = Artist.find_all_by_url("http://artists.com/bob/test.jpg") - assert_equal(1, matches.size) - assert_equal("bob", matches.first.name) + assert_artist_found("bob", "http://artists.com/bob/test.jpg") end should "parse urls" do @@ -190,7 +188,7 @@ class ArtistTest < ActiveSupport::TestCase should "ignore pixiv.net/ and pixiv.net/img/ url matches" do a1 = FactoryBot.create(:artist, :name => "yomosaka", :url_string => "http://i2.pixiv.net/img18/img/evazion/14901720.png") a2 = FactoryBot.create(:artist, :name => "niwatazumi_bf", :url_string => "http://i2.pixiv.net/img18/img/evazion/14901720_big_p0.png") - assert_equal([], Artist.find_all_by_url("http://i2.pixiv.net/img28/img/kyang692/35563903.jpg")) + assert_artist_not_found("http://i2.pixiv.net/img28/img/kyang692/35563903.jpg") end should "find matches by url" do @@ -198,11 +196,11 @@ class ArtistTest < ActiveSupport::TestCase a2 = FactoryBot.create(:artist, :name => "subway", :url_string => "http://subway.com/x/test.jpg") a3 = FactoryBot.create(:artist, :name => "minko", :url_string => "https://minko.com/x/test.jpg") - assert_equal(["rembrandt"], Artist.find_all_by_url("http://rembrandt.com/x/test.jpg").map(&:name)) - assert_equal(["rembrandt"], Artist.find_all_by_url("http://rembrandt.com/x/another.jpg").map(&:name)) - assert_equal([], Artist.find_all_by_url("http://nonexistent.com/test.jpg").map(&:name)) - assert_equal(["minko"], Artist.find_all_by_url("https://minko.com/x/test.jpg").map(&:name)) - assert_equal(["minko"], Artist.find_all_by_url("http://minko.com/x/test.jpg").map(&:name)) + assert_artist_found("rembrandt", "http://rembrandt.com/x/test.jpg") + assert_artist_found("rembrandt", "http://rembrandt.com/x/another.jpg") + assert_artist_not_found("http://nonexistent.com/test.jpg") + assert_artist_found("minko", "https://minko.com/x/test.jpg") + assert_artist_found("minko", "http://minko.com/x/test.jpg") end should "be case-insensitive to domains when finding matches by url" do @@ -212,7 +210,7 @@ class ArtistTest < ActiveSupport::TestCase should "not find duplicates" do FactoryBot.create(:artist, :name => "warhol", :url_string => "http://warhol.com/x/a/image.jpg\nhttp://warhol.com/x/b/image.jpg") - assert_equal(["warhol"], Artist.find_all_by_url("http://warhol.com/x/test.jpg").map(&:name)) + assert_artist_found("warhol", "http://warhol.com/x/test.jpg") end should "not include duplicate urls" do @@ -222,7 +220,7 @@ class ArtistTest < ActiveSupport::TestCase should "hide deleted artists" do FactoryBot.create(:artist, :name => "warhol", :url_string => "http://warhol.com/a/image.jpg", :is_active => false) - assert_equal([], Artist.find_all_by_url("http://warhol.com/a/image.jpg").map(&:name)) + assert_artist_not_found("http://warhol.com/a/image.jpg") end context "when finding deviantart artists" do