Fix #4016: Translated tags failing to find some tags.
* Normalize spaces to underscores when saving other names. Preserve case since case can be significant. * Fix WikiPage#other_names_include to search case-insensitively (note: this prevents using the index). * Fix sources to return the raw tags in `#tags` and the normalized tags in `#normalized_tags`. The normalized tags are the tags that will be matched against other names.
This commit is contained in:
@@ -72,7 +72,7 @@ module Sources::Strategies
|
||||
|
||||
def tags
|
||||
api_response[:tags].to_a.map do |tag|
|
||||
[tag.downcase.tr(" ", "_"), "https://www.artstation.com/search?q=" + CGI.escape(tag)]
|
||||
[tag, "https://www.artstation.com/search?q=" + CGI.escape(tag)]
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -178,8 +178,16 @@ module Sources
|
||||
(@tags || []).uniq
|
||||
end
|
||||
|
||||
def normalized_tags
|
||||
tags.map { |tag, url| normalize_tag(tag) }.sort.uniq
|
||||
end
|
||||
|
||||
def normalize_tag(tag)
|
||||
WikiPage.normalize_other_name(tag).downcase
|
||||
end
|
||||
|
||||
def translated_tags
|
||||
translated_tags = tags.map(&:first).flat_map(&method(:translate_tag)).uniq.sort
|
||||
translated_tags = normalized_tags.flat_map(&method(:translate_tag)).uniq.sort
|
||||
translated_tags.reject { |tag| tag.category == Tag.categories.artist }
|
||||
end
|
||||
|
||||
@@ -240,6 +248,7 @@ module Sources
|
||||
:canonical_url => canonical_url,
|
||||
:normalized_for_artist_finder_url => normalize_for_artist_finder,
|
||||
:tags => tags,
|
||||
:normalized_tags => normalized_tags,
|
||||
:translated_tags => translated_tags,
|
||||
:unique_id => unique_id,
|
||||
:artist_commentary => {
|
||||
|
||||
@@ -77,7 +77,7 @@ module Sources
|
||||
|
||||
def tags
|
||||
api_response[:tags].to_s.split.map do |tag|
|
||||
[tag.tr("_", " "), "https://#{site_name}/post?tags=#{CGI.escape(tag)}"]
|
||||
[tag, "https://#{site_name}/post?tags=#{CGI.escape(tag)}"]
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -160,14 +160,16 @@ module Sources
|
||||
rescue PixivApiClient::BadIDError
|
||||
[]
|
||||
end
|
||||
memoize :tags
|
||||
|
||||
def normalize_tag(tag)
|
||||
tag.gsub(/\d+users入り\z/i, "")
|
||||
end
|
||||
|
||||
def translate_tag(tag)
|
||||
normalized_tag = tag.gsub(/\d+users入り\z/i, "")
|
||||
translated_tags = super(normalized_tag)
|
||||
translated_tags = super(tag)
|
||||
|
||||
if translated_tags.empty? && normalized_tag.include?("/")
|
||||
translated_tags = normalized_tag.split("/").flat_map { |tag| super(tag) }
|
||||
if translated_tags.empty? && tag.include?("/")
|
||||
translated_tags = tag.split("/").flat_map { |tag| super(tag) }
|
||||
end
|
||||
|
||||
translated_tags
|
||||
|
||||
@@ -105,12 +105,15 @@ module Sources::Strategies
|
||||
|
||||
def tags
|
||||
post[:tags].to_a.map do |tag|
|
||||
# normalize tags: space, underscore, and hyphen are equivalent in tumblr tags.
|
||||
etag = tag.gsub(/[ _-]/, "_")
|
||||
[etag, "https://tumblr.com/tagged/#{CGI.escape(etag)}"]
|
||||
[tag, "https://tumblr.com/tagged/#{CGI.escape(tag)}"]
|
||||
end.uniq
|
||||
end
|
||||
|
||||
def normalize_tag(tag)
|
||||
tag = tag.tr("-", "_")
|
||||
super(tag)
|
||||
end
|
||||
|
||||
def dtext_artist_commentary_desc
|
||||
DText.from_html(artist_commentary_desc).strip
|
||||
end
|
||||
|
||||
@@ -32,7 +32,9 @@ class WikiPage < ApplicationRecord
|
||||
end
|
||||
|
||||
def other_names_include(name)
|
||||
where("wiki_pages.other_names @> ARRAY[?]", name.unicode_normalize(:nfkc))
|
||||
name = normalize_other_name(name).downcase
|
||||
subquery = WikiPage.from("unnest(other_names) AS other_name").where("lower(other_name) = ?", name)
|
||||
where(id: subquery)
|
||||
end
|
||||
|
||||
def other_names_match(name)
|
||||
@@ -146,7 +148,11 @@ class WikiPage < ApplicationRecord
|
||||
end
|
||||
|
||||
def normalize_other_names
|
||||
self.other_names = other_names.map { |name| name.unicode_normalize(:nfkc) }.uniq
|
||||
self.other_names = other_names.map { |name| WikiPage.normalize_other_name(name) }.uniq
|
||||
end
|
||||
|
||||
def self.normalize_other_name(name)
|
||||
name.unicode_normalize(:nfkc).gsub(/[[:space:]]+/, " ").strip.tr(" ", "_")
|
||||
end
|
||||
|
||||
def skip_secondary_validations=(value)
|
||||
|
||||
@@ -56,7 +56,8 @@ module Sources
|
||||
end
|
||||
|
||||
should "get the tags" do
|
||||
assert_equal(%w[gantz reika], @site.tags.map(&:first))
|
||||
assert_equal(%w[gantz Reika], @site.tags.map(&:first))
|
||||
assert_equal(%w[gantz reika], @site.normalized_tags)
|
||||
end
|
||||
|
||||
should "get the artist commentary" do
|
||||
@@ -74,6 +75,11 @@ module Sources
|
||||
url = "https://cdna.artstation.com/p/assets/images/images/000/144/922/large/cassio-yoshiyaki-cody2backup2-yoshiyaki.jpg?1406314198"
|
||||
assert_equal(url, @site.image_url)
|
||||
end
|
||||
|
||||
should "get the tags" do
|
||||
assert_equal(["Street Fighter", "Cody", "SF"].sort, @site.tags.map(&:first).sort)
|
||||
assert_equal(["street_fighter", "cody", "sf"].sort, @site.normalized_tags.sort)
|
||||
end
|
||||
end
|
||||
|
||||
context "The source site for a http://cdna.artstation.com/p/assets/... url" do
|
||||
|
||||
@@ -34,7 +34,7 @@ module Sources
|
||||
@samp = "https://files.yande.re/sample/7ecfdead705d7b956b26b1d37b98d089/yande.re%20482880%20sample%20bayashiko%20journey_to_the_west%20sun_wukong.jpg"
|
||||
@full = "https://files.yande.re/image/7ecfdead705d7b956b26b1d37b98d089/yande.re%20482880.jpg"
|
||||
@page = "https://yande.re/post/show/482880"
|
||||
@tags = ["bayashiko", "journey to the west", "sun wukong"]
|
||||
@tags = ["bayashiko", "journey_to_the_west", "sun_wukong"]
|
||||
@size = 362_554
|
||||
@profile_url = "https://twitter.com/apononori"
|
||||
@data = { site_name: "yande.re", preview_url: @prev, image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: @profile_url }
|
||||
@@ -52,7 +52,7 @@ module Sources
|
||||
@jpeg = "https://files.yande.re/sample/fb27a7ea6c48b2ef76fe915e378b9098/yande.re%20398018%20detexted%20misaki_kurehito%20saenai_heroine_no_sodatekata%20sawamura_spencer_eriri%20thighhighs.jpg"
|
||||
@full = "https://files.yande.re/image/fb27a7ea6c48b2ef76fe915e378b9098/yande.re%20398018.png"
|
||||
@page = "https://yande.re/post/show/398018"
|
||||
@tags = ["misaki kurehito", "saenai heroine no sodatekata", "sawamura spencer eriri", "detexted", "thighhighs"]
|
||||
@tags = ["misaki_kurehito", "saenai_heroine_no_sodatekata", "sawamura_spencer_eriri", "detexted", "thighhighs"]
|
||||
@size = 9_118_998
|
||||
@data = { site_name: "yande.re", preview_url: @prev, image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: nil }
|
||||
|
||||
@@ -93,7 +93,7 @@ module Sources
|
||||
anthropomorphism bed blonde_hair bow brown_eyes doll
|
||||
girls_frontline hara_shoutarou hoodie long_hair pantyhose scar skirt
|
||||
twintails ump-45_(girls_frontline) ump-9_(girls_frontline)
|
||||
].map { |tag| tag.tr("_", " ") }
|
||||
]
|
||||
@profile_url = "https://www.pixiv.net/member.php?id=22528152"
|
||||
|
||||
@data = { site_name: "konachan.com", preview_url: @prev, image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: @profile_url }
|
||||
|
||||
@@ -20,8 +20,9 @@ module Sources
|
||||
end
|
||||
|
||||
should "get the tags" do
|
||||
tags = [["tag", "https://tumblr.com/tagged/tag"], ["red_hair", "https://tumblr.com/tagged/red_hair"]]
|
||||
assert_equal(tags, @site.tags)
|
||||
tags = ["tag", "red hair", "red-hair", "red_hair"]
|
||||
assert_equal(tags, @site.tags.map(&:first))
|
||||
assert_equal(["red_hair", "tag"], @site.normalized_tags)
|
||||
end
|
||||
|
||||
should "get the commentary" do
|
||||
@@ -100,8 +101,9 @@ module Sources
|
||||
end
|
||||
|
||||
should "get the tags" do
|
||||
tags = [["tag", "https://tumblr.com/tagged/tag"], ["red_hair", "https://tumblr.com/tagged/red_hair"]]
|
||||
assert_equal(tags, @site.tags)
|
||||
tags = ["tag", "red hair", "red-hair", "red_hair"]
|
||||
assert_equal(tags, @site.tags.map(&:first))
|
||||
assert_equal(["red_hair", "tag"], @site.normalized_tags)
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
Reference in New Issue
Block a user