wiki pages, artists: fix normalization of other names.

Fix wiki pages and artists to normalize other names more consistently
and correctly.

For wiki pages, we strip leading / trailing / repeated underscores to
fix user typos, and we normalize to NFKC form to make search more consistent.

For artists, we allow leading / trailing / repeated underscores because
some artist names have these, and we normalize to NFC form because some
artists have weird names that would be lost by NFKC form. This does make
search less consistent.
This commit is contained in:
evazion
2021-01-09 23:38:38 -06:00
parent e788d8d0b6
commit 5962152ee3
6 changed files with 98 additions and 36 deletions

View File

@@ -3,11 +3,13 @@ class Artist < ApplicationRecord
class RevertError < StandardError; end
attr_accessor :url_string_changed
array_attribute :other_names
deletable
before_validation :normalize_name
before_validation :normalize_other_names
normalize :name, :normalize_name
normalize :other_names, :normalize_other_names
array_attribute :other_names # XXX must come after `normalize :other_names`
validate :validate_tag_category
validates :name, tag_name: true, uniqueness: true
before_save :update_tag_category
@@ -55,27 +57,26 @@ class Artist < ApplicationRecord
end
end
module NameMethods
extend ActiveSupport::Concern
module ClassMethods
concerning :NameMethods do
class_methods do
def normalize_name(name)
name.to_s.mb_chars.downcase.strip.gsub(/ /, '_').to_s
end
end
def normalize_name
self.name = Artist.normalize_name(name)
def normalize_other_names(other_names)
other_names.map { |name| normalize_other_name(name) }.uniq.reject(&:blank?)
end
# XXX Differences from wiki page other names: allow uppercase, use NFC
# instead of NFKC, and allow repeated, leading, and trailing underscores.
def normalize_other_name(other_name)
other_name.to_s.unicode_normalize(:nfc).normalize_whitespace.squish.tr(" ", "_")
end
end
def pretty_name
name.tr("_", " ")
end
def normalize_other_names
self.other_names = other_names.map { |x| Artist.normalize_name(x) }.uniq
self.other_names -= [name]
end
end
module VersionMethods
@@ -145,8 +146,6 @@ class Artist < ApplicationRecord
artist = Artist.new(params)
end
artist.normalize_name
artist.normalize_other_names
artist
end
end
@@ -284,7 +283,6 @@ class Artist < ApplicationRecord
end
include UrlMethods
include NameMethods
include VersionMethods
extend FactoryMethods
include TagMethods

View File

@@ -3,12 +3,12 @@ class WikiPage < ApplicationRecord
META_WIKIS = ["list_of_", "tag_group:", "pool_group:", "howto:", "about:", "help:", "template:"]
before_validation :normalize_other_names
before_save :update_dtext_links, if: :dtext_links_changed?
after_save :create_version
normalize :title, :normalize_title
normalize :body, :normalize_text
normalize :other_names, :normalize_other_names
validates :title, tag_name: true, presence: true, uniqueness: true, if: :title_changed?
validates :body, presence: true, unless: -> { is_deleted? || other_names.present? }
@@ -153,12 +153,12 @@ class WikiPage < ApplicationRecord
title.to_s.downcase.delete_prefix("~").gsub(/[[:space:]]+/, "_").squeeze("_").gsub(/\A_|_\z/, "")
end
def normalize_other_names
self.other_names = other_names.map { |name| WikiPage.normalize_other_name(name) }.uniq
def self.normalize_other_names(other_names)
other_names.map { |name| normalize_other_name(name) }.uniq.reject(&:blank?)
end
def self.normalize_other_name(name)
name.unicode_normalize(:nfkc).gsub(/[[:space:]]+/, " ").strip.tr(" ", "_")
name.to_s.unicode_normalize(:nfkc).normalize_whitespace.gsub(/[[:space:]]+/, "_").squeeze("_").gsub(/\A_|_\z/, "")
end
def category_name