artists: change how artist urls are normalized.
Change how artist URLs are normalized in artist entries. Don't try to secretly convert image URLs to profile URLs in artist entries. For example, if someone puts a Pixiv image URL in an artist entry, don't secretly try to fetch the source and convert it into a profile URL in the `normalized_url` field. We did this because years ago, it was standard practice to put image URLs in artist entries. Pixiv image URLs used to contain the artist's username, so we used to put image URLs in artist entries for artist finding purposes. But Pixiv changed it so that image URLs no longer contained the username, so we dealt with it by adding a `normalized_url` column to artist_urls and secretly converting image URLs to profile URLs in this field. But this is no longer necessary because now we don't normally put image URLs in artist entries in the first place. Now the `profile_url` method in `Source::URL` is used to normalize URLs in artist entries. This lets us parse various profile URL formats and normalize them into a single canonical form. This also removes the `normalize_for_artist_finder` method from source strategies. Instead the `profile_url` method is used for artist finding purposes. So the profile URL returned by the source strategy needs to be the same as the URL in the artist entry in order for artist finding to work.
This commit is contained in:
@@ -2,14 +2,14 @@ require 'test_helper'
|
||||
|
||||
class ArtistTest < ActiveSupport::TestCase
|
||||
def assert_artist_found(expected_name, source_url)
|
||||
artists = ArtistFinder.find_artists(source_url).to_a
|
||||
artists = Artist.search(url_matches: source_url).to_a
|
||||
|
||||
assert_equal(1, artists.size)
|
||||
assert_equal(expected_name, artists.first.name, "Testing URL: #{source_url}")
|
||||
end
|
||||
|
||||
def assert_artist_not_found(source_url)
|
||||
artists = ArtistFinder.find_artists(source_url).to_a
|
||||
artists = Artist.search(url_matches: source_url).to_a
|
||||
assert_equal(0, artists.size, "Testing URL: #{source_url}")
|
||||
end
|
||||
|
||||
|
||||
@@ -52,149 +52,97 @@ class ArtistURLTest < ActiveSupport::TestCase
|
||||
end
|
||||
|
||||
should "normalise https" do
|
||||
url = FactoryBot.create(:artist_url, :url => "https://google.com")
|
||||
url = create(:artist_url, url: "https://google.com")
|
||||
assert_equal("https://google.com", url.url)
|
||||
assert_equal("http://google.com/", url.normalized_url)
|
||||
end
|
||||
|
||||
should "normalise domains to lowercase" do
|
||||
url = FactoryBot.create(:artist_url, url: "https://ArtistName.example.com")
|
||||
url = create(:artist_url, url: "https://ArtistName.example.com")
|
||||
assert_equal("http://artistname.example.com/", url.normalized_url)
|
||||
end
|
||||
|
||||
context "normalize twitter profile urls" do
|
||||
setup do
|
||||
@url = FactoryBot.create(:artist_url, :url => "https://twitter.com/BLAH")
|
||||
end
|
||||
should "normalize ArtStation urls" do
|
||||
url = create(:artist_url, url: "https://www.artstation.com/koyorin")
|
||||
assert_equal("http://www.artstation.com/koyorin/", url.normalized_url)
|
||||
|
||||
should "downcase the url" do
|
||||
assert_equal("http://twitter.com/blah/", @url.normalized_url)
|
||||
end
|
||||
end
|
||||
|
||||
context "artstation urls" do
|
||||
setup do
|
||||
@urls = [
|
||||
FactoryBot.create(:artist_url, url: "https://www.artstation.com/koyorin"),
|
||||
FactoryBot.create(:artist_url, url: "https://koyorin.artstation.com"),
|
||||
FactoryBot.create(:artist_url, url: "https://www.artstation.com/artwork/04XA4")
|
||||
]
|
||||
end
|
||||
|
||||
should "normalize" do
|
||||
assert_equal("http://www.artstation.com/koyorin/", @urls[0].normalized_url)
|
||||
assert_equal("http://www.artstation.com/koyorin/", @urls[1].normalized_url)
|
||||
assert_equal("http://www.artstation.com/jeyrain/", @urls[2].normalized_url)
|
||||
end
|
||||
end
|
||||
|
||||
context "deviantart urls" do
|
||||
setup do
|
||||
@urls = [
|
||||
FactoryBot.create(:artist_url, url: "https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484"),
|
||||
FactoryBot.create(:artist_url, url: "http://noizave.deviantart.com/art/test-post-please-ignore-685436408"),
|
||||
FactoryBot.create(:artist_url, url: "https://www.deviantart.com/noizave")
|
||||
]
|
||||
end
|
||||
|
||||
should "normalize" do
|
||||
assert_equal("http://www.deviantart.com/aeror404/", @urls[0].normalized_url)
|
||||
assert_equal("http://www.deviantart.com/noizave/", @urls[1].normalized_url)
|
||||
assert_equal("http://www.deviantart.com/noizave/", @urls[2].normalized_url)
|
||||
end
|
||||
end
|
||||
|
||||
context "nicoseiga urls" do
|
||||
setup do
|
||||
@urls = [
|
||||
FactoryBot.create(:artist_url, url: "http://seiga.nicovideo.jp/user/illust/7017777"),
|
||||
FactoryBot.create(:artist_url, url: "http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663"),
|
||||
FactoryBot.create(:artist_url, url: "http://seiga.nicovideo.jp/seiga/im4937663")
|
||||
]
|
||||
end
|
||||
|
||||
should "normalize" do
|
||||
assert_equal("http://seiga.nicovideo.jp/user/illust/7017777/", @urls[0].normalized_url)
|
||||
assert_equal("http://seiga.nicovideo.jp/user/illust/7017777/", @urls[1].normalized_url)
|
||||
assert_equal("http://seiga.nicovideo.jp/user/illust/7017777/", @urls[2].normalized_url)
|
||||
end
|
||||
url = create(:artist_url, url: "https://koyorin.artstation.com"),
|
||||
assert_equal("http://www.artstation.com/koyorin/", url.normalized_url)
|
||||
end
|
||||
|
||||
should "normalize fc2 urls" do
|
||||
url = FactoryBot.create(:artist_url, :url => "http://blog55.fc2.com/monet")
|
||||
url = create(:artist_url, url: "http://blog55.fc2.com/monet")
|
||||
assert_equal("http://blog55.fc2.com/monet", url.url)
|
||||
assert_equal("http://monet.blog.fc2.com/", url.normalized_url)
|
||||
|
||||
url = FactoryBot.create(:artist_url, :url => "http://blog-imgs-55.fc2.com/monet")
|
||||
url = create(:artist_url, url: "http://blog-imgs-55.fc2.com/monet")
|
||||
assert_equal("http://blog-imgs-55.fc2.com/monet", url.url)
|
||||
assert_equal("http://monet.blog.fc2.com/", url.normalized_url)
|
||||
end
|
||||
|
||||
should "normalize deviant art artist urls" do
|
||||
url = FactoryBot.create(:artist_url, :url => "https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484")
|
||||
assert_equal("http://www.deviantart.com/aeror404/", url.normalized_url)
|
||||
url = create(:artist_url, url: "https://noizave.deviantart.com")
|
||||
assert_equal("http://www.deviantart.com/noizave/", url.normalized_url)
|
||||
end
|
||||
|
||||
should "normalize nico seiga artist urls" do
|
||||
url = FactoryBot.create(:artist_url, :url => "http://seiga.nicovideo.jp/user/illust/7017777")
|
||||
url = create(:artist_url, url: "http://seiga.nicovideo.jp/user/illust/7017777")
|
||||
assert_equal("http://seiga.nicovideo.jp/user/illust/7017777/", url.normalized_url)
|
||||
|
||||
url = FactoryBot.create(:artist_url, :url => "http://seiga.nicovideo.jp/seiga/im4937663")
|
||||
assert_equal("http://seiga.nicovideo.jp/user/illust/7017777/", url.normalized_url)
|
||||
url = create(:artist_url, url: "http://seiga.nicovideo.jp/manga/list?user_id=23839737")
|
||||
assert_equal("http://seiga.nicovideo.jp/user/illust/23839737/", url.normalized_url)
|
||||
|
||||
url = create(:artist_url, url: "https://www.nicovideo.jp/user/20446930/mylist/28674289")
|
||||
assert_equal("http://seiga.nicovideo.jp/user/illust/20446930/", url.normalized_url)
|
||||
end
|
||||
|
||||
should "normalize hentai foundry artist urls" do
|
||||
url = FactoryBot.create(:artist_url, :url => "http://pictures.hentai-foundry.com//a/AnimeFlux/219123.jpg")
|
||||
assert_equal("http://www.hentai-foundry.com/user/AnimeFlux/", url.normalized_url)
|
||||
end
|
||||
|
||||
should "normalize pixiv urls" do
|
||||
url = FactoryBot.create(:artist_url, :url => "https://i.pximg.net/img-original/img/2010/11/30/08/39/58/14901720_p0.png")
|
||||
assert_equal("https://i.pximg.net/img-original/img/2010/11/30/08/39/58/14901720_p0.png", url.url)
|
||||
assert_equal("http://www.pixiv.net/member.php?id=339253/", url.normalized_url)
|
||||
url = create(:artist_url, url: "https://www.hentai-foundry.com/user/kajinman/profile")
|
||||
assert_equal("http://www.hentai-foundry.com/user/kajinman/", url.normalized_url)
|
||||
end
|
||||
|
||||
should "normalize pixiv stacc urls" do
|
||||
url = FactoryBot.create(:artist_url, :url => "https://www.pixiv.net/stacc/evazion")
|
||||
assert_equal("https://www.pixiv.net/stacc/evazion", url.url)
|
||||
url = create(:artist_url, url: "https://www.pixiv.net/stacc/evazion")
|
||||
assert_equal("http://www.pixiv.net/stacc/evazion/", url.normalized_url)
|
||||
end
|
||||
|
||||
should "normalize pixiv fanbox account urls" do
|
||||
url = FactoryBot.create(:artist_url, :url => "http://www.pixiv.net/fanbox/creator/3113804")
|
||||
assert_equal("http://www.pixiv.net/fanbox/creator/3113804", url.url)
|
||||
assert_equal("http://drw24olf.fanbox.cc/", url.normalized_url)
|
||||
url = create(:artist_url, url: "https://www.pixiv.net/fanbox/creator/3113804")
|
||||
assert_equal("http://www.pixiv.net/fanbox/creator/3113804/", url.normalized_url)
|
||||
|
||||
url = create(:artist_url, url: "https://omu001.fanbox.cc/posts/39714")
|
||||
assert_equal("http://omu001.fanbox.cc/", url.normalized_url)
|
||||
end
|
||||
|
||||
should "normalize pixiv.net/user/123 urls" do
|
||||
url = create(:artist_url, url: "https://www.pixiv.net/en/users/123")
|
||||
assert_equal("https://www.pixiv.net/en/users/123", url.url)
|
||||
assert_equal("http://www.pixiv.net/member.php?id=123/", url.normalized_url)
|
||||
end
|
||||
|
||||
should "normalize twitter urls" do
|
||||
url = FactoryBot.create(:artist_url, :url => "https://twitter.com/aoimanabu/status/892370963630743552")
|
||||
assert_equal("https://twitter.com/aoimanabu/status/892370963630743552", url.url)
|
||||
url = create(:artist_url, url: "https://twitter.com/aoimanabu/status/892370963630743552")
|
||||
assert_equal("http://twitter.com/aoimanabu/", url.normalized_url)
|
||||
|
||||
url = create(:artist_url, url: "https://twitter.com/BLAH")
|
||||
assert_equal("http://twitter.com/BLAH/", url.normalized_url)
|
||||
end
|
||||
|
||||
should "normalize https://twitter.com/intent/user?user_id=* urls" do
|
||||
url = FactoryBot.create(:artist_url, :url => "https://twitter.com/intent/user?user_id=2784590030")
|
||||
assert_equal("https://twitter.com/intent/user?user_id=2784590030", url.url)
|
||||
url = create(:artist_url, url: "https://twitter.com/intent/user?user_id=2784590030")
|
||||
assert_equal("http://twitter.com/intent/user?user_id=2784590030/", url.normalized_url)
|
||||
end
|
||||
|
||||
should "normalize nijie urls" do
|
||||
url = FactoryBot.create(:artist_url, url: "https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png")
|
||||
url = create(:artist_url, url: "https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png")
|
||||
assert_equal("http://nijie.info/members.php?id=236014/", url.normalized_url)
|
||||
|
||||
url = FactoryBot.create(:artist_url, url: "https://nijie.info/members.php?id=161703")
|
||||
url = create(:artist_url, url: "https://nijie.info/members.php?id=161703")
|
||||
assert_equal("http://nijie.info/members.php?id=161703/", url.normalized_url)
|
||||
|
||||
url = FactoryBot.create(:artist_url, url: "https://www.nijie.info/members_illust.php?id=161703")
|
||||
url = create(:artist_url, url: "https://www.nijie.info/members_illust.php?id=161703")
|
||||
assert_equal("http://nijie.info/members.php?id=161703/", url.normalized_url)
|
||||
|
||||
url = FactoryBot.create(:artist_url, url: "https://nijie.info/invalid.php")
|
||||
url = create(:artist_url, url: "https://nijie.info/invalid.php")
|
||||
assert_equal("http://nijie.info/invalid.php/", url.normalized_url)
|
||||
end
|
||||
|
||||
|
||||
@@ -67,10 +67,6 @@ module Sources
|
||||
should "get the artist name" do
|
||||
assert_equal("Afrobull", @site.artist_name)
|
||||
end
|
||||
|
||||
should "get the normalized url" do
|
||||
assert_equal("https://www.hentai-foundry.com/user/Afrobull", @site.normalize_for_artist_finder)
|
||||
end
|
||||
end
|
||||
|
||||
context "A deleted picture" do
|
||||
|
||||
Reference in New Issue
Block a user