Fix #4016: Translated tags failing to find some tags.
* Normalize spaces to underscores when saving other names. Preserve case since case can be significant. * Fix WikiPage#other_names_include to search case-insensitively (note: this prevents using the index). * Fix sources to return the raw tags in `#tags` and the normalized tags in `#normalized_tags`. The normalized tags are the tags that will be matched against other names.
This commit is contained in:
@@ -72,7 +72,7 @@ module Sources::Strategies
|
||||
|
||||
def tags
|
||||
api_response[:tags].to_a.map do |tag|
|
||||
[tag.downcase.tr(" ", "_"), "https://www.artstation.com/search?q=" + CGI.escape(tag)]
|
||||
[tag, "https://www.artstation.com/search?q=" + CGI.escape(tag)]
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -178,8 +178,16 @@ module Sources
|
||||
(@tags || []).uniq
|
||||
end
|
||||
|
||||
def normalized_tags
|
||||
tags.map { |tag, url| normalize_tag(tag) }.sort.uniq
|
||||
end
|
||||
|
||||
def normalize_tag(tag)
|
||||
WikiPage.normalize_other_name(tag).downcase
|
||||
end
|
||||
|
||||
def translated_tags
|
||||
translated_tags = tags.map(&:first).flat_map(&method(:translate_tag)).uniq.sort
|
||||
translated_tags = normalized_tags.flat_map(&method(:translate_tag)).uniq.sort
|
||||
translated_tags.reject { |tag| tag.category == Tag.categories.artist }
|
||||
end
|
||||
|
||||
@@ -240,6 +248,7 @@ module Sources
|
||||
:canonical_url => canonical_url,
|
||||
:normalized_for_artist_finder_url => normalize_for_artist_finder,
|
||||
:tags => tags,
|
||||
:normalized_tags => normalized_tags,
|
||||
:translated_tags => translated_tags,
|
||||
:unique_id => unique_id,
|
||||
:artist_commentary => {
|
||||
|
||||
@@ -77,7 +77,7 @@ module Sources
|
||||
|
||||
def tags
|
||||
api_response[:tags].to_s.split.map do |tag|
|
||||
[tag.tr("_", " "), "https://#{site_name}/post?tags=#{CGI.escape(tag)}"]
|
||||
[tag, "https://#{site_name}/post?tags=#{CGI.escape(tag)}"]
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -160,14 +160,16 @@ module Sources
|
||||
rescue PixivApiClient::BadIDError
|
||||
[]
|
||||
end
|
||||
memoize :tags
|
||||
|
||||
def normalize_tag(tag)
|
||||
tag.gsub(/\d+users入り\z/i, "")
|
||||
end
|
||||
|
||||
def translate_tag(tag)
|
||||
normalized_tag = tag.gsub(/\d+users入り\z/i, "")
|
||||
translated_tags = super(normalized_tag)
|
||||
translated_tags = super(tag)
|
||||
|
||||
if translated_tags.empty? && normalized_tag.include?("/")
|
||||
translated_tags = normalized_tag.split("/").flat_map { |tag| super(tag) }
|
||||
if translated_tags.empty? && tag.include?("/")
|
||||
translated_tags = tag.split("/").flat_map { |tag| super(tag) }
|
||||
end
|
||||
|
||||
translated_tags
|
||||
|
||||
@@ -105,12 +105,15 @@ module Sources::Strategies
|
||||
|
||||
def tags
|
||||
post[:tags].to_a.map do |tag|
|
||||
# normalize tags: space, underscore, and hyphen are equivalent in tumblr tags.
|
||||
etag = tag.gsub(/[ _-]/, "_")
|
||||
[etag, "https://tumblr.com/tagged/#{CGI.escape(etag)}"]
|
||||
[tag, "https://tumblr.com/tagged/#{CGI.escape(tag)}"]
|
||||
end.uniq
|
||||
end
|
||||
|
||||
def normalize_tag(tag)
|
||||
tag = tag.tr("-", "_")
|
||||
super(tag)
|
||||
end
|
||||
|
||||
def dtext_artist_commentary_desc
|
||||
DText.from_html(artist_commentary_desc).strip
|
||||
end
|
||||
|
||||
@@ -32,7 +32,9 @@ class WikiPage < ApplicationRecord
|
||||
end
|
||||
|
||||
def other_names_include(name)
|
||||
where("wiki_pages.other_names @> ARRAY[?]", name.unicode_normalize(:nfkc))
|
||||
name = normalize_other_name(name).downcase
|
||||
subquery = WikiPage.from("unnest(other_names) AS other_name").where("lower(other_name) = ?", name)
|
||||
where(id: subquery)
|
||||
end
|
||||
|
||||
def other_names_match(name)
|
||||
@@ -146,7 +148,11 @@ class WikiPage < ApplicationRecord
|
||||
end
|
||||
|
||||
def normalize_other_names
|
||||
self.other_names = other_names.map { |name| name.unicode_normalize(:nfkc) }.uniq
|
||||
self.other_names = other_names.map { |name| WikiPage.normalize_other_name(name) }.uniq
|
||||
end
|
||||
|
||||
def self.normalize_other_name(name)
|
||||
name.unicode_normalize(:nfkc).gsub(/[[:space:]]+/, " ").strip.tr(" ", "_")
|
||||
end
|
||||
|
||||
def skip_secondary_validations=(value)
|
||||
|
||||
Reference in New Issue
Block a user