wiki pages, artists: fix normalization of other names.

Fix wiki pages and artists to normalize other names more consistently
and correctly.

For wiki pages, we strip leading / trailing / repeated underscores to
fix user typos, and we normalize to NFKC form to make search more consistent.

For artists, we allow leading / trailing / repeated underscores because
some artist names have these, and we normalize to NFC form because some
artists have weird names that would be lost by NFKC form. This does make
search less consistent.
This commit is contained in:
evazion
2021-01-09 23:38:38 -06:00
parent e788d8d0b6
commit 5962152ee3
6 changed files with 98 additions and 36 deletions

View File

@@ -384,9 +384,27 @@ class ArtistTest < ActiveSupport::TestCase
end
end
should "normalize its other names" do
artist = FactoryBot.create(:artist, name: "a1", other_names: "a1 aaa aaa AAA bbb ccc_ddd")
assert_equal("aaa bbb ccc_ddd", artist.other_names_string)
context "the #normalize_other_names method" do
subject { build(:artist) }
should normalize_attribute(:other_names).from([" foo"]).to(["foo"])
should normalize_attribute(:other_names).from(["foo "]).to(["foo"])
should normalize_attribute(:other_names).from(["___foo"]).to(["___foo"])
should normalize_attribute(:other_names).from(["foo___"]).to(["foo___"])
should normalize_attribute(:other_names).from(["foo\n"]).to(["foo"])
should normalize_attribute(:other_names).from(["foo bar"]).to(["foo_bar"])
should normalize_attribute(:other_names).from(["foo bar"]).to(["foo_bar"])
should normalize_attribute(:other_names).from(["foo___bar"]).to(["foo___bar"])
should normalize_attribute(:other_names).from([" _Foo Bar_ "]).to(["_Foo_Bar_"])
should normalize_attribute(:other_names).from(["foo 1", "bar 2"]).to(["foo_1", "bar_2"])
should normalize_attribute(:other_names).from(["foo", nil, "", " ", "bar"]).to(["foo", "bar"])
should normalize_attribute(:other_names).from([nil, "", " "]).to([])
should normalize_attribute(:other_names).from(["pokémon".unicode_normalize(:nfd)]).to(["pokémon".unicode_normalize(:nfkc)])
should normalize_attribute(:other_names).from(["foo", "foo"]).to(["foo"])
should normalize_attribute(:other_names).from("foo foo").to(["foo"])
should normalize_attribute(:other_names).from("foo bar").to(["foo", "bar"])
should normalize_attribute(:other_names).from("_foo_ Bar").to(["_foo_", "Bar"])
end
should "search on its name should return results" do
@@ -550,7 +568,7 @@ class ArtistTest < ActiveSupport::TestCase
artist = Artist.new_with_defaults(source: source)
assert_equal("niceandcool", artist.name)
assert_equal("nice_and_cool", artist.other_names_string)
assert_equal("Nice_and_Cool niceandcool", artist.other_names_string)
assert_includes(artist.urls.map(&:url), "https://www.pixiv.net/users/906442")
assert_includes(artist.urls.map(&:url), "https://www.pixiv.net/stacc/niceandcool")
end
@@ -561,7 +579,7 @@ class ArtistTest < ActiveSupport::TestCase
artist = Artist.new_with_defaults(name: "test_artist")
assert_equal("test_artist", artist.name)
assert_equal("nice_and_cool niceandcool", artist.other_names_string)
assert_equal("Nice_and_Cool niceandcool", artist.other_names_string)
assert_includes(artist.urls.map(&:url), "https://www.pixiv.net/users/906442")
assert_includes(artist.urls.map(&:url), "https://www.pixiv.net/stacc/niceandcool")
end

View File

@@ -18,15 +18,6 @@ class WikiPageTest < ActiveSupport::TestCase
@wiki_page = FactoryBot.create(:wiki_page, :title => "HOT POTATO", :other_names => "foo*bar baz")
end
should "normalize its title" do
assert_equal("hot_potato", @wiki_page.title)
end
should "normalize its other names" do
@wiki_page.update(:other_names => "foo*bar baz baz 加賀(艦これ)")
assert_equal(%w[foo*bar baz 加賀(艦これ)], @wiki_page.other_names)
end
should "search by title" do
matches = WikiPage.titled("hot potato")
assert_equal(1, matches.count)
@@ -91,6 +82,27 @@ class WikiPageTest < ActiveSupport::TestCase
end
end
context "the #normalize_other_names method" do
subject { build(:wiki_page) }
should normalize_attribute(:other_names).from([" foo"]).to(["foo"])
should normalize_attribute(:other_names).from(["foo "]).to(["foo"])
should normalize_attribute(:other_names).from(["___foo"]).to(["foo"])
should normalize_attribute(:other_names).from(["foo___"]).to(["foo"])
should normalize_attribute(:other_names).from(["foo\n"]).to(["foo"])
should normalize_attribute(:other_names).from(["foo bar"]).to(["foo_bar"])
should normalize_attribute(:other_names).from(["foo bar"]).to(["foo_bar"])
should normalize_attribute(:other_names).from(["foo___bar"]).to(["foo_bar"])
should normalize_attribute(:other_names).from([" _Foo Bar_ "]).to(["Foo_Bar"])
should normalize_attribute(:other_names).from(["foo 1", "bar 2"]).to(["foo_1", "bar_2"])
should normalize_attribute(:other_names).from(["foo", nil, "", " ", "bar"]).to(["foo", "bar"])
should normalize_attribute(:other_names).from([nil, "", " "]).to([])
should normalize_attribute(:other_names).from(["pokémon".unicode_normalize(:nfd)]).to(["pokémon".unicode_normalize(:nfkc)])
should normalize_attribute(:other_names).from([""]).to(["ABC"])
should normalize_attribute(:other_names).from(["foo", "foo"]).to(["foo"])
should normalize_attribute(:other_names).from(%w[foo*bar baz baz 加賀(艦これ)]).to(%w[foo*bar baz 加賀(艦これ)])
end
context "during title validation" do
should normalize_attribute(:title).from(" foo ").to("foo")
should normalize_attribute(:title).from("~foo").to("foo")