artists: change how artist urls are normalized.

Change how artist URLs are normalized in artist entries. Don't try to secretly
convert image URLs to profile URLs in artist entries. For example, if someone puts a
Pixiv image URL in an artist entry, don't secretly try to fetch the source and
convert it into a profile URL in the `normalized_url` field.

We did this because years ago, it was standard practice to put image URLs in artist
entries. Pixiv image URLs used to contain the artist's username, so we used to put
image URLs in artist entries for artist finding purposes. But Pixiv changed it so
that image URLs no longer contained the username, so we dealt with it by adding a
`normalized_url` column to artist_urls and secretly converting image URLs to profile
URLs in this field. But this is no longer necessary because now we don't normally put
image URLs in artist entries in the first place.

Now the `profile_url` method in `Source::URL` is used to normalize URLs in artist
entries. This lets us parse various profile URL formats and normalize them into a
single canonical form.

This also removes the `normalize_for_artist_finder` method from source strategies.
Instead the `profile_url` method is used for artist finding purposes. So the profile
URL returned by the source strategy needs to be the same as the URL in the artist
entry in order for artist finding to work.
This commit is contained in:
evazion
2022-03-12 19:54:47 -06:00
parent 9343f7c912
commit cf8b8207e2
9 changed files with 55 additions and 135 deletions

View File

@@ -2,14 +2,14 @@ require 'test_helper'
class ArtistTest < ActiveSupport::TestCase
def assert_artist_found(expected_name, source_url)
artists = ArtistFinder.find_artists(source_url).to_a
artists = Artist.search(url_matches: source_url).to_a
assert_equal(1, artists.size)
assert_equal(expected_name, artists.first.name, "Testing URL: #{source_url}")
end
def assert_artist_not_found(source_url)
artists = ArtistFinder.find_artists(source_url).to_a
artists = Artist.search(url_matches: source_url).to_a
assert_equal(0, artists.size, "Testing URL: #{source_url}")
end

View File

@@ -52,149 +52,97 @@ class ArtistURLTest < ActiveSupport::TestCase
end
should "normalise https" do
url = FactoryBot.create(:artist_url, :url => "https://google.com")
url = create(:artist_url, url: "https://google.com")
assert_equal("https://google.com", url.url)
assert_equal("http://google.com/", url.normalized_url)
end
should "normalise domains to lowercase" do
url = FactoryBot.create(:artist_url, url: "https://ArtistName.example.com")
url = create(:artist_url, url: "https://ArtistName.example.com")
assert_equal("http://artistname.example.com/", url.normalized_url)
end
context "normalize twitter profile urls" do
setup do
@url = FactoryBot.create(:artist_url, :url => "https://twitter.com/BLAH")
end
should "normalize ArtStation urls" do
url = create(:artist_url, url: "https://www.artstation.com/koyorin")
assert_equal("http://www.artstation.com/koyorin/", url.normalized_url)
should "downcase the url" do
assert_equal("http://twitter.com/blah/", @url.normalized_url)
end
end
context "artstation urls" do
setup do
@urls = [
FactoryBot.create(:artist_url, url: "https://www.artstation.com/koyorin"),
FactoryBot.create(:artist_url, url: "https://koyorin.artstation.com"),
FactoryBot.create(:artist_url, url: "https://www.artstation.com/artwork/04XA4")
]
end
should "normalize" do
assert_equal("http://www.artstation.com/koyorin/", @urls[0].normalized_url)
assert_equal("http://www.artstation.com/koyorin/", @urls[1].normalized_url)
assert_equal("http://www.artstation.com/jeyrain/", @urls[2].normalized_url)
end
end
context "deviantart urls" do
setup do
@urls = [
FactoryBot.create(:artist_url, url: "https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484"),
FactoryBot.create(:artist_url, url: "http://noizave.deviantart.com/art/test-post-please-ignore-685436408"),
FactoryBot.create(:artist_url, url: "https://www.deviantart.com/noizave")
]
end
should "normalize" do
assert_equal("http://www.deviantart.com/aeror404/", @urls[0].normalized_url)
assert_equal("http://www.deviantart.com/noizave/", @urls[1].normalized_url)
assert_equal("http://www.deviantart.com/noizave/", @urls[2].normalized_url)
end
end
context "nicoseiga urls" do
setup do
@urls = [
FactoryBot.create(:artist_url, url: "http://seiga.nicovideo.jp/user/illust/7017777"),
FactoryBot.create(:artist_url, url: "http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663"),
FactoryBot.create(:artist_url, url: "http://seiga.nicovideo.jp/seiga/im4937663")
]
end
should "normalize" do
assert_equal("http://seiga.nicovideo.jp/user/illust/7017777/", @urls[0].normalized_url)
assert_equal("http://seiga.nicovideo.jp/user/illust/7017777/", @urls[1].normalized_url)
assert_equal("http://seiga.nicovideo.jp/user/illust/7017777/", @urls[2].normalized_url)
end
url = create(:artist_url, url: "https://koyorin.artstation.com"),
assert_equal("http://www.artstation.com/koyorin/", url.normalized_url)
end
should "normalize fc2 urls" do
url = FactoryBot.create(:artist_url, :url => "http://blog55.fc2.com/monet")
url = create(:artist_url, url: "http://blog55.fc2.com/monet")
assert_equal("http://blog55.fc2.com/monet", url.url)
assert_equal("http://monet.blog.fc2.com/", url.normalized_url)
url = FactoryBot.create(:artist_url, :url => "http://blog-imgs-55.fc2.com/monet")
url = create(:artist_url, url: "http://blog-imgs-55.fc2.com/monet")
assert_equal("http://blog-imgs-55.fc2.com/monet", url.url)
assert_equal("http://monet.blog.fc2.com/", url.normalized_url)
end
should "normalize deviant art artist urls" do
url = FactoryBot.create(:artist_url, :url => "https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484")
assert_equal("http://www.deviantart.com/aeror404/", url.normalized_url)
url = create(:artist_url, url: "https://noizave.deviantart.com")
assert_equal("http://www.deviantart.com/noizave/", url.normalized_url)
end
should "normalize nico seiga artist urls" do
url = FactoryBot.create(:artist_url, :url => "http://seiga.nicovideo.jp/user/illust/7017777")
url = create(:artist_url, url: "http://seiga.nicovideo.jp/user/illust/7017777")
assert_equal("http://seiga.nicovideo.jp/user/illust/7017777/", url.normalized_url)
url = FactoryBot.create(:artist_url, :url => "http://seiga.nicovideo.jp/seiga/im4937663")
assert_equal("http://seiga.nicovideo.jp/user/illust/7017777/", url.normalized_url)
url = create(:artist_url, url: "http://seiga.nicovideo.jp/manga/list?user_id=23839737")
assert_equal("http://seiga.nicovideo.jp/user/illust/23839737/", url.normalized_url)
url = create(:artist_url, url: "https://www.nicovideo.jp/user/20446930/mylist/28674289")
assert_equal("http://seiga.nicovideo.jp/user/illust/20446930/", url.normalized_url)
end
should "normalize hentai foundry artist urls" do
url = FactoryBot.create(:artist_url, :url => "http://pictures.hentai-foundry.com//a/AnimeFlux/219123.jpg")
assert_equal("http://www.hentai-foundry.com/user/AnimeFlux/", url.normalized_url)
end
should "normalize pixiv urls" do
url = FactoryBot.create(:artist_url, :url => "https://i.pximg.net/img-original/img/2010/11/30/08/39/58/14901720_p0.png")
assert_equal("https://i.pximg.net/img-original/img/2010/11/30/08/39/58/14901720_p0.png", url.url)
assert_equal("http://www.pixiv.net/member.php?id=339253/", url.normalized_url)
url = create(:artist_url, url: "https://www.hentai-foundry.com/user/kajinman/profile")
assert_equal("http://www.hentai-foundry.com/user/kajinman/", url.normalized_url)
end
should "normalize pixiv stacc urls" do
url = FactoryBot.create(:artist_url, :url => "https://www.pixiv.net/stacc/evazion")
assert_equal("https://www.pixiv.net/stacc/evazion", url.url)
url = create(:artist_url, url: "https://www.pixiv.net/stacc/evazion")
assert_equal("http://www.pixiv.net/stacc/evazion/", url.normalized_url)
end
should "normalize pixiv fanbox account urls" do
url = FactoryBot.create(:artist_url, :url => "http://www.pixiv.net/fanbox/creator/3113804")
assert_equal("http://www.pixiv.net/fanbox/creator/3113804", url.url)
assert_equal("http://drw24olf.fanbox.cc/", url.normalized_url)
url = create(:artist_url, url: "https://www.pixiv.net/fanbox/creator/3113804")
assert_equal("http://www.pixiv.net/fanbox/creator/3113804/", url.normalized_url)
url = create(:artist_url, url: "https://omu001.fanbox.cc/posts/39714")
assert_equal("http://omu001.fanbox.cc/", url.normalized_url)
end
should "normalize pixiv.net/user/123 urls" do
url = create(:artist_url, url: "https://www.pixiv.net/en/users/123")
assert_equal("https://www.pixiv.net/en/users/123", url.url)
assert_equal("http://www.pixiv.net/member.php?id=123/", url.normalized_url)
end
should "normalize twitter urls" do
url = FactoryBot.create(:artist_url, :url => "https://twitter.com/aoimanabu/status/892370963630743552")
assert_equal("https://twitter.com/aoimanabu/status/892370963630743552", url.url)
url = create(:artist_url, url: "https://twitter.com/aoimanabu/status/892370963630743552")
assert_equal("http://twitter.com/aoimanabu/", url.normalized_url)
url = create(:artist_url, url: "https://twitter.com/BLAH")
assert_equal("http://twitter.com/BLAH/", url.normalized_url)
end
should "normalize https://twitter.com/intent/user?user_id=* urls" do
url = FactoryBot.create(:artist_url, :url => "https://twitter.com/intent/user?user_id=2784590030")
assert_equal("https://twitter.com/intent/user?user_id=2784590030", url.url)
url = create(:artist_url, url: "https://twitter.com/intent/user?user_id=2784590030")
assert_equal("http://twitter.com/intent/user?user_id=2784590030/", url.normalized_url)
end
should "normalize nijie urls" do
url = FactoryBot.create(:artist_url, url: "https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png")
url = create(:artist_url, url: "https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png")
assert_equal("http://nijie.info/members.php?id=236014/", url.normalized_url)
url = FactoryBot.create(:artist_url, url: "https://nijie.info/members.php?id=161703")
url = create(:artist_url, url: "https://nijie.info/members.php?id=161703")
assert_equal("http://nijie.info/members.php?id=161703/", url.normalized_url)
url = FactoryBot.create(:artist_url, url: "https://www.nijie.info/members_illust.php?id=161703")
url = create(:artist_url, url: "https://www.nijie.info/members_illust.php?id=161703")
assert_equal("http://nijie.info/members.php?id=161703/", url.normalized_url)
url = FactoryBot.create(:artist_url, url: "https://nijie.info/invalid.php")
url = create(:artist_url, url: "https://nijie.info/invalid.php")
assert_equal("http://nijie.info/invalid.php/", url.normalized_url)
end

View File

@@ -67,10 +67,6 @@ module Sources
should "get the artist name" do
assert_equal("Afrobull", @site.artist_name)
end
should "get the normalized url" do
assert_equal("https://www.hentai-foundry.com/user/Afrobull", @site.normalize_for_artist_finder)
end
end
context "A deleted picture" do