Fix #4016: Translated tags failing to find some tags.

* Normalize spaces to underscores when saving other names. Preserve case
  since case can be significant.

* Fix WikiPage#other_names_include to search case-insensitively (note:
  this prevents using the index).

* Fix sources to return the raw tags in `#tags` and the normalized tags
  in `#normalized_tags`. The normalized tags are the tags that will be
  matched against other names.
This commit is contained in:
evazion
2018-12-16 11:27:04 -06:00
parent a1df1abf0b
commit c700ea4b5f
9 changed files with 49 additions and 21 deletions

View File

@@ -72,7 +72,7 @@ module Sources::Strategies
def tags
api_response[:tags].to_a.map do |tag|
[tag.downcase.tr(" ", "_"), "https://www.artstation.com/search?q=" + CGI.escape(tag)]
[tag, "https://www.artstation.com/search?q=" + CGI.escape(tag)]
end
end

View File

@@ -178,8 +178,16 @@ module Sources
(@tags || []).uniq
end
def normalized_tags
tags.map { |tag, url| normalize_tag(tag) }.sort.uniq
end
def normalize_tag(tag)
WikiPage.normalize_other_name(tag).downcase
end
def translated_tags
translated_tags = tags.map(&:first).flat_map(&method(:translate_tag)).uniq.sort
translated_tags = normalized_tags.flat_map(&method(:translate_tag)).uniq.sort
translated_tags.reject { |tag| tag.category == Tag.categories.artist }
end
@@ -240,6 +248,7 @@ module Sources
:canonical_url => canonical_url,
:normalized_for_artist_finder_url => normalize_for_artist_finder,
:tags => tags,
:normalized_tags => normalized_tags,
:translated_tags => translated_tags,
:unique_id => unique_id,
:artist_commentary => {

View File

@@ -77,7 +77,7 @@ module Sources
def tags
api_response[:tags].to_s.split.map do |tag|
[tag.tr("_", " "), "https://#{site_name}/post?tags=#{CGI.escape(tag)}"]
[tag, "https://#{site_name}/post?tags=#{CGI.escape(tag)}"]
end
end

View File

@@ -160,14 +160,16 @@ module Sources
rescue PixivApiClient::BadIDError
[]
end
memoize :tags
def normalize_tag(tag)
tag.gsub(/\d+users入り\z/i, "")
end
def translate_tag(tag)
normalized_tag = tag.gsub(/\d+users入り\z/i, "")
translated_tags = super(normalized_tag)
translated_tags = super(tag)
if translated_tags.empty? && normalized_tag.include?("/")
translated_tags = normalized_tag.split("/").flat_map { |tag| super(tag) }
if translated_tags.empty? && tag.include?("/")
translated_tags = tag.split("/").flat_map { |tag| super(tag) }
end
translated_tags

View File

@@ -105,12 +105,15 @@ module Sources::Strategies
def tags
post[:tags].to_a.map do |tag|
# normalize tags: space, underscore, and hyphen are equivalent in tumblr tags.
etag = tag.gsub(/[ _-]/, "_")
[etag, "https://tumblr.com/tagged/#{CGI.escape(etag)}"]
[tag, "https://tumblr.com/tagged/#{CGI.escape(tag)}"]
end.uniq
end
def normalize_tag(tag)
tag = tag.tr("-", "_")
super(tag)
end
def dtext_artist_commentary_desc
DText.from_html(artist_commentary_desc).strip
end

View File

@@ -32,7 +32,9 @@ class WikiPage < ApplicationRecord
end
def other_names_include(name)
where("wiki_pages.other_names @> ARRAY[?]", name.unicode_normalize(:nfkc))
name = normalize_other_name(name).downcase
subquery = WikiPage.from("unnest(other_names) AS other_name").where("lower(other_name) = ?", name)
where(id: subquery)
end
def other_names_match(name)
@@ -146,7 +148,11 @@ class WikiPage < ApplicationRecord
end
def normalize_other_names
self.other_names = other_names.map { |name| name.unicode_normalize(:nfkc) }.uniq
self.other_names = other_names.map { |name| WikiPage.normalize_other_name(name) }.uniq
end
def self.normalize_other_name(name)
name.unicode_normalize(:nfkc).gsub(/[[:space:]]+/, " ").strip.tr(" ", "_")
end
def skip_secondary_validations=(value)