wiki pages, artists: fix normalization of other names.
Fix wiki pages and artists to normalize other names more consistently and correctly. For wiki pages, we strip leading / trailing / repeated underscores to fix user typos, and we normalize to NFKC form to make search more consistent. For artists, we allow leading / trailing / repeated underscores because some artist names have these, and we normalize to NFC form because some artists have weird names that would be lost by NFKC form. This does make search less consistent.
This commit is contained in:
@@ -3,11 +3,13 @@ class Artist < ApplicationRecord
|
||||
class RevertError < StandardError; end
|
||||
|
||||
attr_accessor :url_string_changed
|
||||
array_attribute :other_names
|
||||
|
||||
deletable
|
||||
|
||||
before_validation :normalize_name
|
||||
before_validation :normalize_other_names
|
||||
normalize :name, :normalize_name
|
||||
normalize :other_names, :normalize_other_names
|
||||
array_attribute :other_names # XXX must come after `normalize :other_names`
|
||||
|
||||
validate :validate_tag_category
|
||||
validates :name, tag_name: true, uniqueness: true
|
||||
before_save :update_tag_category
|
||||
@@ -55,27 +57,26 @@ class Artist < ApplicationRecord
|
||||
end
|
||||
end
|
||||
|
||||
module NameMethods
|
||||
extend ActiveSupport::Concern
|
||||
|
||||
module ClassMethods
|
||||
concerning :NameMethods do
|
||||
class_methods do
|
||||
def normalize_name(name)
|
||||
name.to_s.mb_chars.downcase.strip.gsub(/ /, '_').to_s
|
||||
end
|
||||
end
|
||||
|
||||
def normalize_name
|
||||
self.name = Artist.normalize_name(name)
|
||||
def normalize_other_names(other_names)
|
||||
other_names.map { |name| normalize_other_name(name) }.uniq.reject(&:blank?)
|
||||
end
|
||||
|
||||
# XXX Differences from wiki page other names: allow uppercase, use NFC
|
||||
# instead of NFKC, and allow repeated, leading, and trailing underscores.
|
||||
def normalize_other_name(other_name)
|
||||
other_name.to_s.unicode_normalize(:nfc).normalize_whitespace.squish.tr(" ", "_")
|
||||
end
|
||||
end
|
||||
|
||||
def pretty_name
|
||||
name.tr("_", " ")
|
||||
end
|
||||
|
||||
def normalize_other_names
|
||||
self.other_names = other_names.map { |x| Artist.normalize_name(x) }.uniq
|
||||
self.other_names -= [name]
|
||||
end
|
||||
end
|
||||
|
||||
module VersionMethods
|
||||
@@ -145,8 +146,6 @@ class Artist < ApplicationRecord
|
||||
artist = Artist.new(params)
|
||||
end
|
||||
|
||||
artist.normalize_name
|
||||
artist.normalize_other_names
|
||||
artist
|
||||
end
|
||||
end
|
||||
@@ -284,7 +283,6 @@ class Artist < ApplicationRecord
|
||||
end
|
||||
|
||||
include UrlMethods
|
||||
include NameMethods
|
||||
include VersionMethods
|
||||
extend FactoryMethods
|
||||
include TagMethods
|
||||
|
||||
@@ -3,12 +3,12 @@ class WikiPage < ApplicationRecord
|
||||
|
||||
META_WIKIS = ["list_of_", "tag_group:", "pool_group:", "howto:", "about:", "help:", "template:"]
|
||||
|
||||
before_validation :normalize_other_names
|
||||
before_save :update_dtext_links, if: :dtext_links_changed?
|
||||
after_save :create_version
|
||||
|
||||
normalize :title, :normalize_title
|
||||
normalize :body, :normalize_text
|
||||
normalize :other_names, :normalize_other_names
|
||||
|
||||
validates :title, tag_name: true, presence: true, uniqueness: true, if: :title_changed?
|
||||
validates :body, presence: true, unless: -> { is_deleted? || other_names.present? }
|
||||
@@ -153,12 +153,12 @@ class WikiPage < ApplicationRecord
|
||||
title.to_s.downcase.delete_prefix("~").gsub(/[[:space:]]+/, "_").squeeze("_").gsub(/\A_|_\z/, "")
|
||||
end
|
||||
|
||||
def normalize_other_names
|
||||
self.other_names = other_names.map { |name| WikiPage.normalize_other_name(name) }.uniq
|
||||
def self.normalize_other_names(other_names)
|
||||
other_names.map { |name| normalize_other_name(name) }.uniq.reject(&:blank?)
|
||||
end
|
||||
|
||||
def self.normalize_other_name(name)
|
||||
name.unicode_normalize(:nfkc).gsub(/[[:space:]]+/, " ").strip.tr(" ", "_")
|
||||
name.to_s.unicode_normalize(:nfkc).normalize_whitespace.gsub(/[[:space:]]+/, "_").squeeze("_").gsub(/\A_|_\z/, "")
|
||||
end
|
||||
|
||||
def category_name
|
||||
|
||||
17
script/fixes/071_renormalize_other_names_in_wiki_pages.rb
Executable file
17
script/fixes/071_renormalize_other_names_in_wiki_pages.rb
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env ruby
|
||||
|
||||
require_relative "../../config/environment"
|
||||
|
||||
CurrentUser.scoped(User.system) do
|
||||
WikiPage.transaction do
|
||||
WikiPage.find_each do |wp|
|
||||
old_other_names = wp.other_names
|
||||
wp.other_names = old_other_names
|
||||
|
||||
if wp.changed?
|
||||
p [wp.id, wp.changes]
|
||||
wp.save!
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
17
script/fixes/072_renormalize_other_names_in_artists.rb
Executable file
17
script/fixes/072_renormalize_other_names_in_artists.rb
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env ruby
|
||||
|
||||
require_relative "../../config/environment"
|
||||
|
||||
CurrentUser.scoped(User.system) do
|
||||
Artist.transaction do
|
||||
Artist.find_each do |artist|
|
||||
old_other_names = artist.other_names
|
||||
artist.other_names = old_other_names
|
||||
|
||||
if artist.changed?
|
||||
p [artist.id, artist.changes]
|
||||
artist.save!
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -384,9 +384,27 @@ class ArtistTest < ActiveSupport::TestCase
|
||||
end
|
||||
end
|
||||
|
||||
should "normalize its other names" do
|
||||
artist = FactoryBot.create(:artist, name: "a1", other_names: "a1 aaa aaa AAA bbb ccc_ddd")
|
||||
assert_equal("aaa bbb ccc_ddd", artist.other_names_string)
|
||||
context "the #normalize_other_names method" do
|
||||
subject { build(:artist) }
|
||||
|
||||
should normalize_attribute(:other_names).from([" foo"]).to(["foo"])
|
||||
should normalize_attribute(:other_names).from(["foo "]).to(["foo"])
|
||||
should normalize_attribute(:other_names).from(["___foo"]).to(["___foo"])
|
||||
should normalize_attribute(:other_names).from(["foo___"]).to(["foo___"])
|
||||
should normalize_attribute(:other_names).from(["foo\n"]).to(["foo"])
|
||||
should normalize_attribute(:other_names).from(["foo bar"]).to(["foo_bar"])
|
||||
should normalize_attribute(:other_names).from(["foo bar"]).to(["foo_bar"])
|
||||
should normalize_attribute(:other_names).from(["foo___bar"]).to(["foo___bar"])
|
||||
should normalize_attribute(:other_names).from([" _Foo Bar_ "]).to(["_Foo_Bar_"])
|
||||
should normalize_attribute(:other_names).from(["foo 1", "bar 2"]).to(["foo_1", "bar_2"])
|
||||
should normalize_attribute(:other_names).from(["foo", nil, "", " ", "bar"]).to(["foo", "bar"])
|
||||
should normalize_attribute(:other_names).from([nil, "", " "]).to([])
|
||||
should normalize_attribute(:other_names).from(["pokémon".unicode_normalize(:nfd)]).to(["pokémon".unicode_normalize(:nfkc)])
|
||||
should normalize_attribute(:other_names).from(["foo", "foo"]).to(["foo"])
|
||||
|
||||
should normalize_attribute(:other_names).from("foo foo").to(["foo"])
|
||||
should normalize_attribute(:other_names).from("foo bar").to(["foo", "bar"])
|
||||
should normalize_attribute(:other_names).from("_foo_ Bar").to(["_foo_", "Bar"])
|
||||
end
|
||||
|
||||
should "search on its name should return results" do
|
||||
@@ -550,7 +568,7 @@ class ArtistTest < ActiveSupport::TestCase
|
||||
artist = Artist.new_with_defaults(source: source)
|
||||
|
||||
assert_equal("niceandcool", artist.name)
|
||||
assert_equal("nice_and_cool", artist.other_names_string)
|
||||
assert_equal("Nice_and_Cool niceandcool", artist.other_names_string)
|
||||
assert_includes(artist.urls.map(&:url), "https://www.pixiv.net/users/906442")
|
||||
assert_includes(artist.urls.map(&:url), "https://www.pixiv.net/stacc/niceandcool")
|
||||
end
|
||||
@@ -561,7 +579,7 @@ class ArtistTest < ActiveSupport::TestCase
|
||||
artist = Artist.new_with_defaults(name: "test_artist")
|
||||
|
||||
assert_equal("test_artist", artist.name)
|
||||
assert_equal("nice_and_cool niceandcool", artist.other_names_string)
|
||||
assert_equal("Nice_and_Cool niceandcool", artist.other_names_string)
|
||||
assert_includes(artist.urls.map(&:url), "https://www.pixiv.net/users/906442")
|
||||
assert_includes(artist.urls.map(&:url), "https://www.pixiv.net/stacc/niceandcool")
|
||||
end
|
||||
|
||||
@@ -18,15 +18,6 @@ class WikiPageTest < ActiveSupport::TestCase
|
||||
@wiki_page = FactoryBot.create(:wiki_page, :title => "HOT POTATO", :other_names => "foo*bar baz")
|
||||
end
|
||||
|
||||
should "normalize its title" do
|
||||
assert_equal("hot_potato", @wiki_page.title)
|
||||
end
|
||||
|
||||
should "normalize its other names" do
|
||||
@wiki_page.update(:other_names => "foo*bar baz baz 加賀(艦これ)")
|
||||
assert_equal(%w[foo*bar baz 加賀(艦これ)], @wiki_page.other_names)
|
||||
end
|
||||
|
||||
should "search by title" do
|
||||
matches = WikiPage.titled("hot potato")
|
||||
assert_equal(1, matches.count)
|
||||
@@ -91,6 +82,27 @@ class WikiPageTest < ActiveSupport::TestCase
|
||||
end
|
||||
end
|
||||
|
||||
context "the #normalize_other_names method" do
|
||||
subject { build(:wiki_page) }
|
||||
|
||||
should normalize_attribute(:other_names).from([" foo"]).to(["foo"])
|
||||
should normalize_attribute(:other_names).from(["foo "]).to(["foo"])
|
||||
should normalize_attribute(:other_names).from(["___foo"]).to(["foo"])
|
||||
should normalize_attribute(:other_names).from(["foo___"]).to(["foo"])
|
||||
should normalize_attribute(:other_names).from(["foo\n"]).to(["foo"])
|
||||
should normalize_attribute(:other_names).from(["foo bar"]).to(["foo_bar"])
|
||||
should normalize_attribute(:other_names).from(["foo bar"]).to(["foo_bar"])
|
||||
should normalize_attribute(:other_names).from(["foo___bar"]).to(["foo_bar"])
|
||||
should normalize_attribute(:other_names).from([" _Foo Bar_ "]).to(["Foo_Bar"])
|
||||
should normalize_attribute(:other_names).from(["foo 1", "bar 2"]).to(["foo_1", "bar_2"])
|
||||
should normalize_attribute(:other_names).from(["foo", nil, "", " ", "bar"]).to(["foo", "bar"])
|
||||
should normalize_attribute(:other_names).from([nil, "", " "]).to([])
|
||||
should normalize_attribute(:other_names).from(["pokémon".unicode_normalize(:nfd)]).to(["pokémon".unicode_normalize(:nfkc)])
|
||||
should normalize_attribute(:other_names).from(["ABC"]).to(["ABC"])
|
||||
should normalize_attribute(:other_names).from(["foo", "foo"]).to(["foo"])
|
||||
should normalize_attribute(:other_names).from(%w[foo*bar baz baz 加賀(艦これ)]).to(%w[foo*bar baz 加賀(艦これ)])
|
||||
end
|
||||
|
||||
context "during title validation" do
|
||||
should normalize_attribute(:title).from(" foo ").to("foo")
|
||||
should normalize_attribute(:title).from("~foo").to("foo")
|
||||
|
||||
Reference in New Issue
Block a user