diff --git a/app/logical/sources/strategies/art_station.rb b/app/logical/sources/strategies/art_station.rb index 48b71a3d4..2a234153e 100644 --- a/app/logical/sources/strategies/art_station.rb +++ b/app/logical/sources/strategies/art_station.rb @@ -72,7 +72,7 @@ module Sources::Strategies def tags api_response[:tags].to_a.map do |tag| - [tag.downcase.tr(" ", "_"), "https://www.artstation.com/search?q=" + CGI.escape(tag)] + [tag, "https://www.artstation.com/search?q=" + CGI.escape(tag)] end end diff --git a/app/logical/sources/strategies/base.rb b/app/logical/sources/strategies/base.rb index c877e0d7a..822924c14 100644 --- a/app/logical/sources/strategies/base.rb +++ b/app/logical/sources/strategies/base.rb @@ -178,8 +178,16 @@ module Sources (@tags || []).uniq end + def normalized_tags + tags.map { |tag, url| normalize_tag(tag) }.sort.uniq + end + + def normalize_tag(tag) + WikiPage.normalize_other_name(tag).downcase + end + def translated_tags - translated_tags = tags.map(&:first).flat_map(&method(:translate_tag)).uniq.sort + translated_tags = normalized_tags.flat_map(&method(:translate_tag)).uniq.sort translated_tags.reject { |tag| tag.category == Tag.categories.artist } end @@ -240,6 +248,7 @@ module Sources :canonical_url => canonical_url, :normalized_for_artist_finder_url => normalize_for_artist_finder, :tags => tags, + :normalized_tags => normalized_tags, :translated_tags => translated_tags, :unique_id => unique_id, :artist_commentary => { diff --git a/app/logical/sources/strategies/moebooru.rb b/app/logical/sources/strategies/moebooru.rb index 028629b5b..cdeb82d9e 100644 --- a/app/logical/sources/strategies/moebooru.rb +++ b/app/logical/sources/strategies/moebooru.rb @@ -77,7 +77,7 @@ module Sources def tags api_response[:tags].to_s.split.map do |tag| - [tag.tr("_", " "), "https://#{site_name}/post?tags=#{CGI.escape(tag)}"] + [tag, "https://#{site_name}/post?tags=#{CGI.escape(tag)}"] end end diff --git a/app/logical/sources/strategies/pixiv.rb b/app/logical/sources/strategies/pixiv.rb index eb0c815c7..4179b2cc5 100644 --- a/app/logical/sources/strategies/pixiv.rb +++ b/app/logical/sources/strategies/pixiv.rb @@ -160,14 +160,16 @@ module Sources rescue PixivApiClient::BadIDError [] end - memoize :tags + + def normalize_tag(tag) + tag.gsub(/\d+users入り\z/i, "") + end def translate_tag(tag) - normalized_tag = tag.gsub(/\d+users入り\z/i, "") - translated_tags = super(normalized_tag) + translated_tags = super(tag) - if translated_tags.empty? && normalized_tag.include?("/") - translated_tags = normalized_tag.split("/").flat_map { |tag| super(tag) } + if translated_tags.empty? && tag.include?("/") + translated_tags = tag.split("/").flat_map { |tag| super(tag) } end translated_tags diff --git a/app/logical/sources/strategies/tumblr.rb b/app/logical/sources/strategies/tumblr.rb index a32b3a236..f1bdda16b 100644 --- a/app/logical/sources/strategies/tumblr.rb +++ b/app/logical/sources/strategies/tumblr.rb @@ -105,12 +105,15 @@ module Sources::Strategies def tags post[:tags].to_a.map do |tag| - # normalize tags: space, underscore, and hyphen are equivalent in tumblr tags. - etag = tag.gsub(/[ _-]/, "_") - [etag, "https://tumblr.com/tagged/#{CGI.escape(etag)}"] + [tag, "https://tumblr.com/tagged/#{CGI.escape(tag)}"] end.uniq end + def normalize_tag(tag) + tag = tag.tr("-", "_") + super(tag) + end + def dtext_artist_commentary_desc DText.from_html(artist_commentary_desc).strip end diff --git a/app/models/wiki_page.rb b/app/models/wiki_page.rb index 6a9c9cd59..b1bff5770 100644 --- a/app/models/wiki_page.rb +++ b/app/models/wiki_page.rb @@ -32,7 +32,9 @@ class WikiPage < ApplicationRecord end def other_names_include(name) - where("wiki_pages.other_names @> ARRAY[?]", name.unicode_normalize(:nfkc)) + name = normalize_other_name(name).downcase + subquery = WikiPage.from("unnest(other_names) AS other_name").where("lower(other_name) = ?", name) + where(id: subquery) end def other_names_match(name) @@ -146,7 +148,11 @@ class WikiPage < ApplicationRecord end def normalize_other_names - self.other_names = other_names.map { |name| name.unicode_normalize(:nfkc) }.uniq + self.other_names = other_names.map { |name| WikiPage.normalize_other_name(name) }.uniq + end + + def self.normalize_other_name(name) + name.unicode_normalize(:nfkc).gsub(/[[:space:]]+/, " ").strip.tr(" ", "_") end def skip_secondary_validations=(value) diff --git a/test/unit/sources/art_station_test.rb b/test/unit/sources/art_station_test.rb index 640a5a43d..9ef9ba928 100644 --- a/test/unit/sources/art_station_test.rb +++ b/test/unit/sources/art_station_test.rb @@ -56,7 +56,8 @@ module Sources end should "get the tags" do - assert_equal(%w[gantz reika], @site.tags.map(&:first)) + assert_equal(%w[gantz Reika], @site.tags.map(&:first)) + assert_equal(%w[gantz reika], @site.normalized_tags) end should "get the artist commentary" do @@ -74,6 +75,11 @@ module Sources url = "https://cdna.artstation.com/p/assets/images/images/000/144/922/large/cassio-yoshiyaki-cody2backup2-yoshiyaki.jpg?1406314198" assert_equal(url, @site.image_url) end + + should "get the tags" do + assert_equal(["Street Fighter", "Cody", "SF"].sort, @site.tags.map(&:first).sort) + assert_equal(["street_fighter", "cody", "sf"].sort, @site.normalized_tags.sort) + end end context "The source site for a http://cdna.artstation.com/p/assets/... url" do diff --git a/test/unit/sources/moebooru_test.rb b/test/unit/sources/moebooru_test.rb index d42cff72c..4654ea98f 100644 --- a/test/unit/sources/moebooru_test.rb +++ b/test/unit/sources/moebooru_test.rb @@ -34,7 +34,7 @@ module Sources @samp = "https://files.yande.re/sample/7ecfdead705d7b956b26b1d37b98d089/yande.re%20482880%20sample%20bayashiko%20journey_to_the_west%20sun_wukong.jpg" @full = "https://files.yande.re/image/7ecfdead705d7b956b26b1d37b98d089/yande.re%20482880.jpg" @page = "https://yande.re/post/show/482880" - @tags = ["bayashiko", "journey to the west", "sun wukong"] + @tags = ["bayashiko", "journey_to_the_west", "sun_wukong"] @size = 362_554 @profile_url = "https://twitter.com/apononori" @data = { site_name: "yande.re", preview_url: @prev, image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: @profile_url } @@ -52,7 +52,7 @@ module Sources @jpeg = "https://files.yande.re/sample/fb27a7ea6c48b2ef76fe915e378b9098/yande.re%20398018%20detexted%20misaki_kurehito%20saenai_heroine_no_sodatekata%20sawamura_spencer_eriri%20thighhighs.jpg" @full = "https://files.yande.re/image/fb27a7ea6c48b2ef76fe915e378b9098/yande.re%20398018.png" @page = "https://yande.re/post/show/398018" - @tags = ["misaki kurehito", "saenai heroine no sodatekata", "sawamura spencer eriri", "detexted", "thighhighs"] + @tags = ["misaki_kurehito", "saenai_heroine_no_sodatekata", "sawamura_spencer_eriri", "detexted", "thighhighs"] @size = 9_118_998 @data = { site_name: "yande.re", preview_url: @prev, image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: nil } @@ -93,7 +93,7 @@ module Sources anthropomorphism bed blonde_hair bow brown_eyes doll girls_frontline hara_shoutarou hoodie long_hair pantyhose scar skirt twintails ump-45_(girls_frontline) ump-9_(girls_frontline) - ].map { |tag| tag.tr("_", " ") } + ] @profile_url = "https://www.pixiv.net/member.php?id=22528152" @data = { site_name: "konachan.com", preview_url: @prev, image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: @profile_url } diff --git a/test/unit/sources/tumblr_test.rb b/test/unit/sources/tumblr_test.rb index d5100f58f..43fb8fb7b 100644 --- a/test/unit/sources/tumblr_test.rb +++ b/test/unit/sources/tumblr_test.rb @@ -20,8 +20,9 @@ module Sources end should "get the tags" do - tags = [["tag", "https://tumblr.com/tagged/tag"], ["red_hair", "https://tumblr.com/tagged/red_hair"]] - assert_equal(tags, @site.tags) + tags = ["tag", "red hair", "red-hair", "red_hair"] + assert_equal(tags, @site.tags.map(&:first)) + assert_equal(["red_hair", "tag"], @site.normalized_tags) end should "get the commentary" do @@ -100,8 +101,9 @@ module Sources end should "get the tags" do - tags = [["tag", "https://tumblr.com/tagged/tag"], ["red_hair", "https://tumblr.com/tagged/red_hair"]] - assert_equal(tags, @site.tags) + tags = ["tag", "red hair", "red-hair", "red_hair"] + assert_equal(tags, @site.tags.map(&:first)) + assert_equal(["red_hair", "tag"], @site.normalized_tags) end end