artists: normalize urls added to artist entries.

When a URL is added to an artist entry, normalize it to a standard form.

Artist URLs have both a `url` column and a `normalized_url` column. The
`normalized_url` is used for artist finding and the `url` is the raw URL
entered by the user. Previously only the `normalized_url` field was
normalized; now the URL entered by the user is also converted to a
normalized form.

This means that if an URL like this is added to an artist entry:

* http://www.pixiv.net/member.php?id=1234
* http://www.pixiv.net/en/users/1234
* http://www.twitter.com/DanbooruBot/
* http://mobile.twitter.com/DanbooruBot/

It will get normalized to this:

* https://www.pixiv.net/users/1234
* https://twitter.com/DanbooruBot

This fixes problems with duplicate URLs being added to artist entries
because URLs weren't normalized to a single form.
This commit is contained in:
evazion
2022-03-18 01:47:25 -05:00
parent 455ee9a52a
commit 10dac3ee51
5 changed files with 42 additions and 16 deletions

View File

@@ -136,8 +136,7 @@ module Source
def profile_url
if user_id.present?
# "https://www.pixiv.net/users/#{user_id}"
"https://www.pixiv.net/member.php?id=#{user_id}"
"https://www.pixiv.net/users/#{user_id}"
elsif username.present?
"https://www.pixiv.net/stacc/#{username}"
end

View File

@@ -103,7 +103,7 @@ class ArtistURL < ApplicationRecord
end
def self.normalize_url(url)
Danbooru::URL.parse(url)&.to_normalized_s.presence || url
Source::URL.parse(url)&.profile_url || Danbooru::URL.parse(url)&.to_normalized_s || url
end
def url=(url)

View File

@@ -24,7 +24,7 @@ class ArtistVersionsControllerTest < ActionDispatch::IntegrationTest
should respond_to_search(name: "masao").with { [@versions[2], @versions[0]] }
should respond_to_search(name_matches: "(deleted)").with { @versions[1] }
should respond_to_search(group_name_matches: "the_best").with { @versions[2] }
should respond_to_search(urls_include_any: "https://www.deviantart.com/masao").with { @versions[2] }
should respond_to_search(urls_include_any: "https://www.deviantart.com/masao").with { [@versions[2], @versions[1], @versions[0]] }
should respond_to_search(is_deleted: "true").with { @versions[1] }
context "using includes" do

View File

@@ -183,7 +183,7 @@ class ArtistsControllerTest < ActionDispatch::IntegrationTest
should respond_to_search(has_tag: "false").with { [@banned, @deleted, @artgerm, @artist] }
should respond_to_search(has_urls: "true").with { [@artgerm, @masao] }
should respond_to_search(has_urls: "false").with { [@banned, @deleted, @artist] }
should respond_to_search(urls: {url: "http://www.pixiv.net/member.php?id=32777"}).with { @masao }
should respond_to_search(urls: {url: "https://www.pixiv.net/users/32777"}).with { @masao }
should respond_to_search(urls: {normalized_url: "http://www.deviantart.com/artgerm/"}).with { @artgerm }
end
end

View File

@@ -17,7 +17,7 @@ class ArtistURLTest < ActiveSupport::TestCase
end
should "allow urls to be marked as inactive" do
url = FactoryBot.create(:artist_url, url: "http://monet.com", is_active: false)
url = create(:artist_url, url: "http://monet.com", is_active: false)
assert_equal("http://monet.com", url.url)
assert_equal("http://monet.com/", url.normalized_url)
assert_equal("-http://monet.com", url.to_s)
@@ -59,85 +59,112 @@ class ArtistURLTest < ActiveSupport::TestCase
should "normalise domains to lowercase" do
url = create(:artist_url, url: "https://ArtistName.example.com")
assert_equal("https://artistname.example.com", url.url)
assert_equal("http://artistname.example.com/", url.normalized_url)
end
should "normalize ArtStation urls" do
url = create(:artist_url, url: "https://www.artstation.com/koyorin")
assert_equal("https://www.artstation.com/koyorin", url.url)
assert_equal("http://www.artstation.com/koyorin/", url.normalized_url)
url = create(:artist_url, url: "https://koyorin.artstation.com"),
url = create(:artist_url, url: "https://koyorin.artstation.com")
assert_equal("https://www.artstation.com/koyorin", url.url)
assert_equal("http://www.artstation.com/koyorin/", url.normalized_url)
end
should "normalize fc2 urls" do
url = create(:artist_url, url: "http://silencexs.blog106.fc2.com/")
assert_equal("http://silencexs.blog.fc2.com", url.url)
assert_equal("http://silencexs.blog.fc2.com/", url.normalized_url)
end
should "normalize deviant art artist urls" do
url = create(:artist_url, url: "https://noizave.deviantart.com")
assert_equal("https://www.deviantart.com/noizave", url.url)
assert_equal("http://www.deviantart.com/noizave/", url.normalized_url)
end
should "normalize nico seiga artist urls" do
url = create(:artist_url, url: "http://seiga.nicovideo.jp/user/illust/7017777")
assert_equal("https://seiga.nicovideo.jp/user/illust/7017777", url.url)
assert_equal("http://seiga.nicovideo.jp/user/illust/7017777/", url.normalized_url)
url = create(:artist_url, url: "http://seiga.nicovideo.jp/manga/list?user_id=23839737")
assert_equal("https://seiga.nicovideo.jp/manga/list?user_id=23839737", url.url)
assert_equal("http://seiga.nicovideo.jp/manga/list?user_id=23839737/", url.normalized_url)
url = create(:artist_url, url: "https://www.nicovideo.jp/user/20446930/mylist/28674289")
assert_equal("https://www.nicovideo.jp/user/20446930", url.url)
assert_equal("http://www.nicovideo.jp/user/20446930/", url.normalized_url)
end
should "normalize hentai foundry artist urls" do
url = create(:artist_url, url: "https://www.hentai-foundry.com/user/kajinman/profile")
url = create(:artist_url, url: "http://www.hentai-foundry.com/user/kajinman/profile")
assert_equal("https://www.hentai-foundry.com/user/kajinman", url.url)
assert_equal("http://www.hentai-foundry.com/user/kajinman/", url.normalized_url)
end
should "normalize pixiv stacc urls" do
url = create(:artist_url, url: "https://www.pixiv.net/stacc/evazion")
url = create(:artist_url, url: "http://www.pixiv.net/stacc/evazion/")
assert_equal("https://www.pixiv.net/stacc/evazion", url.url)
assert_equal("http://www.pixiv.net/stacc/evazion/", url.normalized_url)
end
should "normalize pixiv fanbox account urls" do
url = create(:artist_url, url: "https://www.pixiv.net/fanbox/creator/3113804")
url = create(:artist_url, url: "http://www.pixiv.net/fanbox/creator/3113804")
assert_equal("https://www.pixiv.net/fanbox/creator/3113804", url.url)
assert_equal("http://www.pixiv.net/fanbox/creator/3113804/", url.normalized_url)
url = create(:artist_url, url: "https://omu001.fanbox.cc/posts/39714")
url = create(:artist_url, url: "http://omu001.fanbox.cc/posts/39714")
assert_equal("https://omu001.fanbox.cc", url.url)
assert_equal("http://omu001.fanbox.cc/", url.normalized_url)
end
should "normalize pixiv.net/user/123 urls" do
url = create(:artist_url, url: "https://www.pixiv.net/en/users/123")
assert_equal("http://www.pixiv.net/member.php?id=123/", url.normalized_url)
url = create(:artist_url, url: "http://www.pixiv.net/en/users/123")
assert_equal("https://www.pixiv.net/users/123", url.url)
assert_equal("http://www.pixiv.net/users/123/", url.normalized_url)
end
should "normalize twitter urls" do
url = create(:artist_url, url: "https://twitter.com/aoimanabu/status/892370963630743552")
assert_equal("https://twitter.com/aoimanabu", url.url)
assert_equal("http://twitter.com/aoimanabu/", url.normalized_url)
url = create(:artist_url, url: "https://twitter.com/BLAH")
assert_equal("https://twitter.com/BLAH", url.url)
assert_equal("http://twitter.com/BLAH/", url.normalized_url)
end
should "normalize https://twitter.com/intent/user?user_id=* urls" do
url = create(:artist_url, url: "https://twitter.com/intent/user?user_id=2784590030")
assert_equal("https://twitter.com/intent/user?user_id=2784590030", url.url)
assert_equal("http://twitter.com/intent/user?user_id=2784590030/", url.normalized_url)
end
should "normalize nijie urls" do
url = create(:artist_url, url: "https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png")
assert_equal("https://nijie.info/members.php?id=236014", url.url)
assert_equal("http://nijie.info/members.php?id=236014/", url.normalized_url)
url = create(:artist_url, url: "https://nijie.info/members.php?id=161703")
url = create(:artist_url, url: "http://nijie.info/members.php?id=161703")
assert_equal("https://nijie.info/members.php?id=161703", url.url)
assert_equal("http://nijie.info/members.php?id=161703/", url.normalized_url)
url = create(:artist_url, url: "https://www.nijie.info/members_illust.php?id=161703")
url = create(:artist_url, url: "http://www.nijie.info/members_illust.php?id=161703")
assert_equal("https://nijie.info/members.php?id=161703", url.url)
assert_equal("http://nijie.info/members.php?id=161703/", url.normalized_url)
url = create(:artist_url, url: "https://nijie.info/invalid.php")
url = create(:artist_url, url: "http://nijie.info/invalid.php")
assert_equal("http://nijie.info/invalid.php", url.url)
assert_equal("http://nijie.info/invalid.php/", url.normalized_url)
end