diff --git a/app/logical/sources/strategies/base.rb b/app/logical/sources/strategies/base.rb index 96a159612..2fdda17d9 100644 --- a/app/logical/sources/strategies/base.rb +++ b/app/logical/sources/strategies/base.rb @@ -97,7 +97,8 @@ module Sources [artist_name, tag_name].compact.uniq end - # A link to the artist's profile page on the site. + # A link to the artist's profile page on the site. This will be used for + # artist finding purposes, so it needs to match the URL in the artist entry. def profile_url nil end @@ -137,12 +138,6 @@ module Sources end memoize :http_downloader - # The url to use for artist finding purposes. This will be stored in the - # artist entry. Normally this will be the profile url. - def normalize_for_artist_finder - profile_url.presence || url - end - # Given a post/image url, this is the normalized url that will be displayed in a post's page in its stead. # This function should never make any network call, even indirectly. Return nil to never normalize. def normalize_for_source @@ -150,7 +145,7 @@ module Sources end def artists - ArtistFinder.find_artists(normalize_for_artist_finder.to_s) + ArtistFinder.find_artists(profile_url.to_s) end # A new artist entry with suggested defaults for when the artist doesn't @@ -234,7 +229,6 @@ module Sources :image_urls => image_urls, :page_url => page_url, :canonical_url => canonical_url, - :normalized_for_artist_finder_url => normalize_for_artist_finder, :tags => tags, :normalized_tags => normalized_tags, :translated_tags => translated_tags, diff --git a/app/logical/sources/strategies/nico_seiga.rb b/app/logical/sources/strategies/nico_seiga.rb index 5eff5caa0..31c1e0f26 100644 --- a/app/logical/sources/strategies/nico_seiga.rb +++ b/app/logical/sources/strategies/nico_seiga.rb @@ -40,10 +40,7 @@ module Sources end def profile_url - user_id = api_client&.user_id - return if user_id.blank? # artists can be anonymous - - "https://seiga.nicovideo.jp/user/illust/#{api_client.user_id}" + "https://seiga.nicovideo.jp/user/illust/#{api_client.user_id}" if api_client.user_id.present? end def artist_name diff --git a/app/logical/sources/strategies/pixiv.rb b/app/logical/sources/strategies/pixiv.rb index 0cffcccb8..75101e087 100644 --- a/app/logical/sources/strategies/pixiv.rb +++ b/app/logical/sources/strategies/pixiv.rb @@ -62,12 +62,10 @@ module Sources end def profile_url - if parsed_url.profile_url.present? - parsed_url.profile_url - elsif api_illust[:userId].present? + if api_illust[:userId].present? "https://www.pixiv.net/users/#{api_illust[:userId]}" - else - nil + elsif parsed_url.profile_url.present? + parsed_url.profile_url end end diff --git a/app/logical/sources/strategies/twitter.rb b/app/logical/sources/strategies/twitter.rb index 374f860c7..1f0bf06b2 100644 --- a/app/logical/sources/strategies/twitter.rb +++ b/app/logical/sources/strategies/twitter.rb @@ -93,10 +93,6 @@ module Sources::Strategies api_response[:full_text].to_s end - def normalize_for_artist_finder - profile_url.try(:downcase).presence || url - end - def normalize_for_source if tag_name_from_url.present? && status_id.present? "https://twitter.com/#{tag_name_from_url}/status/#{status_id}" diff --git a/app/models/artist.rb b/app/models/artist.rb index d225d12d3..fd6658bc8 100644 --- a/app/models/artist.rb +++ b/app/models/artist.rb @@ -242,7 +242,8 @@ class Artist < ApplicationRecord elsif query.include?("*") where(id: ArtistURL.where_like(:url, query).select(:artist_id)) elsif query =~ %r{\Ahttps?://}i - ArtistFinder.find_artists(query) + url = Sources::Strategies.find(query).profile_url || query + ArtistFinder.find_artists(url) else where(id: ArtistURL.where_like(:url, "*#{query}*").select(:artist_id)) end diff --git a/app/models/artist_url.rb b/app/models/artist_url.rb index dbad3882c..445bab327 100644 --- a/app/models/artist_url.rb +++ b/app/models/artist_url.rb @@ -19,27 +19,16 @@ class ArtistURL < ApplicationRecord end def self.normalize_normalized_url(url) - if url.nil? - nil - else - url = url.sub(%r{^https://}, "http://") - url = url.sub(%r{^http://blog-imgs-\d+\.fc2}, "http://blog.fc2") - url = url.sub(%r{^http://blog-imgs-\d+-\w+\.fc2}, "http://blog.fc2") - url = url.sub(%r{^http://blog\d*\.fc2\.com/(?:\w/){,3}(\w+)}, "http://\\1.blog.fc2.com") - url = url.sub(%r{^http://pictures.hentai-foundry.com//}, "http://pictures.hentai-foundry.com/") + return nil if url.nil? - # the strategy won't always work for twitter because it looks for a status - url = url.downcase if url =~ %r{^https?://(?:mobile\.)?twitter\.com}i + url = Source::URL.parse(url)&.profile_url || url + url = url.sub(%r{^https://}, "http://") + url = url.sub(%r{^http://blog-imgs-\d+\.fc2}, "http://blog.fc2") + url = url.sub(%r{^http://blog-imgs-\d+-\w+\.fc2}, "http://blog.fc2") + url = url.sub(%r{^http://blog\d*\.fc2\.com/(?:\w/){,3}(\w+)}, "http://\\1.blog.fc2.com") - url = Sources::Strategies.find(url).normalize_for_artist_finder - - # XXX the Pixiv strategy should implement normalize_for_artist_finder and return the correct url directly. - url = url.sub(%r{\Ahttps?://www\.pixiv\.net/(?:en/)?users/(\d+)\z}i, 'https://www.pixiv.net/member.php?id=\1') - - url = url.gsub(%r{/+\Z}, "") - url = url.gsub(%r{^https://}, "http://") - url + "/" - end + url = url.gsub(%r{/+\Z}, "") + url + "/" end def self.search(params = {}) @@ -67,7 +56,8 @@ class ArtistURL < ApplicationRecord elsif url.include?("*") where_ilike(attr, url) else - where(attr => normalize_normalized_url(url)) + profile_url = Sources::Strategies.find(url).profile_url || url + where(attr => normalize_normalized_url(profile_url)) end end diff --git a/test/unit/artist_test.rb b/test/unit/artist_test.rb index 97dc2328d..07a3beb82 100644 --- a/test/unit/artist_test.rb +++ b/test/unit/artist_test.rb @@ -2,14 +2,14 @@ require 'test_helper' class ArtistTest < ActiveSupport::TestCase def assert_artist_found(expected_name, source_url) - artists = ArtistFinder.find_artists(source_url).to_a + artists = Artist.search(url_matches: source_url).to_a assert_equal(1, artists.size) assert_equal(expected_name, artists.first.name, "Testing URL: #{source_url}") end def assert_artist_not_found(source_url) - artists = ArtistFinder.find_artists(source_url).to_a + artists = Artist.search(url_matches: source_url).to_a assert_equal(0, artists.size, "Testing URL: #{source_url}") end diff --git a/test/unit/artist_url_test.rb b/test/unit/artist_url_test.rb index a9ccca9f9..938fb4657 100644 --- a/test/unit/artist_url_test.rb +++ b/test/unit/artist_url_test.rb @@ -52,149 +52,97 @@ class ArtistURLTest < ActiveSupport::TestCase end should "normalise https" do - url = FactoryBot.create(:artist_url, :url => "https://google.com") + url = create(:artist_url, url: "https://google.com") assert_equal("https://google.com", url.url) assert_equal("http://google.com/", url.normalized_url) end should "normalise domains to lowercase" do - url = FactoryBot.create(:artist_url, url: "https://ArtistName.example.com") + url = create(:artist_url, url: "https://ArtistName.example.com") assert_equal("http://artistname.example.com/", url.normalized_url) end - context "normalize twitter profile urls" do - setup do - @url = FactoryBot.create(:artist_url, :url => "https://twitter.com/BLAH") - end + should "normalize ArtStation urls" do + url = create(:artist_url, url: "https://www.artstation.com/koyorin") + assert_equal("http://www.artstation.com/koyorin/", url.normalized_url) - should "downcase the url" do - assert_equal("http://twitter.com/blah/", @url.normalized_url) - end - end - - context "artstation urls" do - setup do - @urls = [ - FactoryBot.create(:artist_url, url: "https://www.artstation.com/koyorin"), - FactoryBot.create(:artist_url, url: "https://koyorin.artstation.com"), - FactoryBot.create(:artist_url, url: "https://www.artstation.com/artwork/04XA4") - ] - end - - should "normalize" do - assert_equal("http://www.artstation.com/koyorin/", @urls[0].normalized_url) - assert_equal("http://www.artstation.com/koyorin/", @urls[1].normalized_url) - assert_equal("http://www.artstation.com/jeyrain/", @urls[2].normalized_url) - end - end - - context "deviantart urls" do - setup do - @urls = [ - FactoryBot.create(:artist_url, url: "https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484"), - FactoryBot.create(:artist_url, url: "http://noizave.deviantart.com/art/test-post-please-ignore-685436408"), - FactoryBot.create(:artist_url, url: "https://www.deviantart.com/noizave") - ] - end - - should "normalize" do - assert_equal("http://www.deviantart.com/aeror404/", @urls[0].normalized_url) - assert_equal("http://www.deviantart.com/noizave/", @urls[1].normalized_url) - assert_equal("http://www.deviantart.com/noizave/", @urls[2].normalized_url) - end - end - - context "nicoseiga urls" do - setup do - @urls = [ - FactoryBot.create(:artist_url, url: "http://seiga.nicovideo.jp/user/illust/7017777"), - FactoryBot.create(:artist_url, url: "http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663"), - FactoryBot.create(:artist_url, url: "http://seiga.nicovideo.jp/seiga/im4937663") - ] - end - - should "normalize" do - assert_equal("http://seiga.nicovideo.jp/user/illust/7017777/", @urls[0].normalized_url) - assert_equal("http://seiga.nicovideo.jp/user/illust/7017777/", @urls[1].normalized_url) - assert_equal("http://seiga.nicovideo.jp/user/illust/7017777/", @urls[2].normalized_url) - end + url = create(:artist_url, url: "https://koyorin.artstation.com"), + assert_equal("http://www.artstation.com/koyorin/", url.normalized_url) end should "normalize fc2 urls" do - url = FactoryBot.create(:artist_url, :url => "http://blog55.fc2.com/monet") + url = create(:artist_url, url: "http://blog55.fc2.com/monet") assert_equal("http://blog55.fc2.com/monet", url.url) assert_equal("http://monet.blog.fc2.com/", url.normalized_url) - url = FactoryBot.create(:artist_url, :url => "http://blog-imgs-55.fc2.com/monet") + url = create(:artist_url, url: "http://blog-imgs-55.fc2.com/monet") assert_equal("http://blog-imgs-55.fc2.com/monet", url.url) assert_equal("http://monet.blog.fc2.com/", url.normalized_url) end should "normalize deviant art artist urls" do - url = FactoryBot.create(:artist_url, :url => "https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484") - assert_equal("http://www.deviantart.com/aeror404/", url.normalized_url) + url = create(:artist_url, url: "https://noizave.deviantart.com") + assert_equal("http://www.deviantart.com/noizave/", url.normalized_url) end should "normalize nico seiga artist urls" do - url = FactoryBot.create(:artist_url, :url => "http://seiga.nicovideo.jp/user/illust/7017777") + url = create(:artist_url, url: "http://seiga.nicovideo.jp/user/illust/7017777") assert_equal("http://seiga.nicovideo.jp/user/illust/7017777/", url.normalized_url) - url = FactoryBot.create(:artist_url, :url => "http://seiga.nicovideo.jp/seiga/im4937663") - assert_equal("http://seiga.nicovideo.jp/user/illust/7017777/", url.normalized_url) + url = create(:artist_url, url: "http://seiga.nicovideo.jp/manga/list?user_id=23839737") + assert_equal("http://seiga.nicovideo.jp/user/illust/23839737/", url.normalized_url) + + url = create(:artist_url, url: "https://www.nicovideo.jp/user/20446930/mylist/28674289") + assert_equal("http://seiga.nicovideo.jp/user/illust/20446930/", url.normalized_url) end should "normalize hentai foundry artist urls" do - url = FactoryBot.create(:artist_url, :url => "http://pictures.hentai-foundry.com//a/AnimeFlux/219123.jpg") - assert_equal("http://www.hentai-foundry.com/user/AnimeFlux/", url.normalized_url) - end - - should "normalize pixiv urls" do - url = FactoryBot.create(:artist_url, :url => "https://i.pximg.net/img-original/img/2010/11/30/08/39/58/14901720_p0.png") - assert_equal("https://i.pximg.net/img-original/img/2010/11/30/08/39/58/14901720_p0.png", url.url) - assert_equal("http://www.pixiv.net/member.php?id=339253/", url.normalized_url) + url = create(:artist_url, url: "https://www.hentai-foundry.com/user/kajinman/profile") + assert_equal("http://www.hentai-foundry.com/user/kajinman/", url.normalized_url) end should "normalize pixiv stacc urls" do - url = FactoryBot.create(:artist_url, :url => "https://www.pixiv.net/stacc/evazion") - assert_equal("https://www.pixiv.net/stacc/evazion", url.url) + url = create(:artist_url, url: "https://www.pixiv.net/stacc/evazion") assert_equal("http://www.pixiv.net/stacc/evazion/", url.normalized_url) end should "normalize pixiv fanbox account urls" do - url = FactoryBot.create(:artist_url, :url => "http://www.pixiv.net/fanbox/creator/3113804") - assert_equal("http://www.pixiv.net/fanbox/creator/3113804", url.url) - assert_equal("http://drw24olf.fanbox.cc/", url.normalized_url) + url = create(:artist_url, url: "https://www.pixiv.net/fanbox/creator/3113804") + assert_equal("http://www.pixiv.net/fanbox/creator/3113804/", url.normalized_url) + + url = create(:artist_url, url: "https://omu001.fanbox.cc/posts/39714") + assert_equal("http://omu001.fanbox.cc/", url.normalized_url) end should "normalize pixiv.net/user/123 urls" do url = create(:artist_url, url: "https://www.pixiv.net/en/users/123") - assert_equal("https://www.pixiv.net/en/users/123", url.url) assert_equal("http://www.pixiv.net/member.php?id=123/", url.normalized_url) end should "normalize twitter urls" do - url = FactoryBot.create(:artist_url, :url => "https://twitter.com/aoimanabu/status/892370963630743552") - assert_equal("https://twitter.com/aoimanabu/status/892370963630743552", url.url) + url = create(:artist_url, url: "https://twitter.com/aoimanabu/status/892370963630743552") assert_equal("http://twitter.com/aoimanabu/", url.normalized_url) + + url = create(:artist_url, url: "https://twitter.com/BLAH") + assert_equal("http://twitter.com/BLAH/", url.normalized_url) end should "normalize https://twitter.com/intent/user?user_id=* urls" do - url = FactoryBot.create(:artist_url, :url => "https://twitter.com/intent/user?user_id=2784590030") - assert_equal("https://twitter.com/intent/user?user_id=2784590030", url.url) + url = create(:artist_url, url: "https://twitter.com/intent/user?user_id=2784590030") assert_equal("http://twitter.com/intent/user?user_id=2784590030/", url.normalized_url) end should "normalize nijie urls" do - url = FactoryBot.create(:artist_url, url: "https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png") + url = create(:artist_url, url: "https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png") assert_equal("http://nijie.info/members.php?id=236014/", url.normalized_url) - url = FactoryBot.create(:artist_url, url: "https://nijie.info/members.php?id=161703") + url = create(:artist_url, url: "https://nijie.info/members.php?id=161703") assert_equal("http://nijie.info/members.php?id=161703/", url.normalized_url) - url = FactoryBot.create(:artist_url, url: "https://www.nijie.info/members_illust.php?id=161703") + url = create(:artist_url, url: "https://www.nijie.info/members_illust.php?id=161703") assert_equal("http://nijie.info/members.php?id=161703/", url.normalized_url) - url = FactoryBot.create(:artist_url, url: "https://nijie.info/invalid.php") + url = create(:artist_url, url: "https://nijie.info/invalid.php") assert_equal("http://nijie.info/invalid.php/", url.normalized_url) end diff --git a/test/unit/sources/hentai_foundry_test.rb b/test/unit/sources/hentai_foundry_test.rb index 39b880d31..c381623bb 100644 --- a/test/unit/sources/hentai_foundry_test.rb +++ b/test/unit/sources/hentai_foundry_test.rb @@ -67,10 +67,6 @@ module Sources should "get the artist name" do assert_equal("Afrobull", @site.artist_name) end - - should "get the normalized url" do - assert_equal("https://www.hentai-foundry.com/user/Afrobull", @site.normalize_for_artist_finder) - end end context "A deleted picture" do