From cc8986641b9f5b41a1aef27aa26345ae55ad360c Mon Sep 17 00:00:00 2001 From: evazion Date: Sun, 9 Jul 2017 11:43:55 -0500 Subject: [PATCH] Fix #3208: Fix translated tag suggestions for Pixiv. * Only suggest the Danbooru tag with the same name if there is no matching wiki other name. Example: if we have the Pixiv tag `Fate` and the Danbooru tag `fate_(series)` with other name `fate`, suggest that, not the Danbooru tag `fate`. * Don't suggest tags that are empty or whose wiki is deleted. * Only split tags on "/" if there are no other matches, and only for Pixiv. * For Pixiv, only include traditional media tags in tag list, not digital media (Photoshop, SAI). * Add some tests. --- app/logical/pixiv_api_client.rb | 18 ++++++- app/logical/sources/site.rb | 19 +------- app/logical/sources/strategies/base.rb | 17 +++++++ app/logical/sources/strategies/pixiv.rb | 11 +++++ app/models/tag.rb | 4 ++ test/unit/sources/pixiv_test.rb | 63 ++++++++++++++++++++++++- 6 files changed, 112 insertions(+), 20 deletions(-) diff --git a/app/logical/pixiv_api_client.rb b/app/logical/pixiv_api_client.rb index 199c24840..20084f7fe 100644 --- a/app/logical/pixiv_api_client.rb +++ b/app/logical/pixiv_api_client.rb @@ -3,6 +3,21 @@ class PixivApiClient CLIENT_ID = "bYGKuGVw91e0NMfPGp44euvGt59s" CLIENT_SECRET = "HP3RmkgAmEGro0gn1x9ioawQE8WMfvLXDz3ZqxpK" + # Tools to not include in the tags list. We don't tag digital media, so + # including these results in bad translated tags suggestions. + TOOLS_BLACKLIST = %w[ + Photoshop Illustrator Fireworks Flash Painter PaintShopPro pixiv\ Sketch + CLIP\ STUDIO\ PAINT IllustStudio ComicStudio RETAS\ STUDIO SAI PhotoStudio + Pixia NekoPaint PictBear openCanvas ArtRage Expression Inkscape GIMP + CGillust COMICWORKS MS_Paint EDGE AzPainter AzPainter2 AzDrawing + PicturePublisher SketchBookPro Processing 4thPaint GraphicsGale mdiapp + Paintgraphic AfterEffects drawr CLIP\ PAINT\ Lab FireAlpaca Pixelmator + AzDrawing2 MediBang\ Paint Krita ibisPaint Procreate Live2D + Lightwave3D Shade Poser STRATA AnimationMaster XSI CARRARA CINEMA4D Maya + 3dsMax Blender ZBrush Metasequoia Sunny3D Bryce Vue Hexagon\ King SketchUp + VistaPro Sculptris Comi\ Po! modo DAZ\ Studio 3D-Coat + ] + class Error < Exception ; end class WorksResponse @@ -96,7 +111,8 @@ class PixivApiClient @page_count = json["page_count"].to_i @artist_commentary_title = json["title"].to_s @artist_commentary_desc = json["caption"].to_s - @tags = [json["tags"], json["tools"]].flatten.compact.reject {|x| x =~ /^http:/} + @tags = json["tags"].reject {|x| x =~ /^http:/} + @tags += json["tools"] - TOOLS_BLACKLIST if page_count > 1 @pages = json["metadata"]["pages"].map {|x| x["image_urls"]["large"]} diff --git a/app/logical/sources/site.rb b/app/logical/sources/site.rb index 50b233eb8..0b938fc08 100644 --- a/app/logical/sources/site.rb +++ b/app/logical/sources/site.rb @@ -8,7 +8,7 @@ module Sources :file_url, :ugoira_frame_data, :ugoira_content_type, :image_urls, :artist_commentary_title, :artist_commentary_desc, :dtext_artist_commentary_title, :dtext_artist_commentary_desc, - :rewrite_thumbnails, :illust_id_from_url, :to => :strategy + :rewrite_thumbnails, :illust_id_from_url, :translate_tag, :translated_tags, :to => :strategy def self.strategies [Strategies::PixivWhitecube, Strategies::Pixiv, Strategies::NicoSeiga, Strategies::DeviantArt, Strategies::ArtStation, Strategies::Nijie, Strategies::Twitter, Strategies::Tumblr, Strategies::Pawoo] @@ -43,23 +43,6 @@ module Sources url end - def translated_tags - untranslated_tags = tags - untranslated_tags = untranslated_tags.map(&:first) - untranslated_tags += untranslated_tags.grep(/\//).map {|x| x.split(/\//)}.flatten - untranslated_tags = untranslated_tags.map do |tag| - if tag =~ /\A(\S+?)_?\d+users入り\Z/ - $1 - else - tag - end - end - untranslated_tags.reject! {|x| x.blank?} - wikis = WikiPage.title_in(untranslated_tags) - wikis += WikiPage.other_names_equal(untranslated_tags) - wikis.uniq.map{|wiki_page| [wiki_page.title, wiki_page.category_name]} - end - def to_h return { :artist_name => artist_name, diff --git a/app/logical/sources/strategies/base.rb b/app/logical/sources/strategies/base.rb index 31f8e3df2..356b704ca 100644 --- a/app/logical/sources/strategies/base.rb +++ b/app/logical/sources/strategies/base.rb @@ -80,6 +80,23 @@ module Sources (@tags || []).uniq end + def translated_tags + translated_tags = tags.map(&:first).flat_map(&method(:translate_tag)).uniq.sort + translated_tags.map { |tag| [tag.name, tag.category] } + end + + # Given a tag from the source site, should return an array of corresponding Danbooru tags. + def translate_tag(untranslated_tag) + translated_tags = Tag.where(name: WikiPage.active.other_names_equal([untranslated_tag]).uniq.select(:title)) + + if translated_tags.empty? + normalized_name = TagAlias.to_aliased([Tag.normalize_name(untranslated_tag)]) + translated_tags = Tag.nonempty.where(name: normalized_name) + end + + translated_tags + end + # Should be set to a url for sites that prevent hotlinking, or left nil for sites that don't. def fake_referer nil diff --git a/app/logical/sources/strategies/pixiv.rb b/app/logical/sources/strategies/pixiv.rb index cd0b3c3c7..a9c68e216 100644 --- a/app/logical/sources/strategies/pixiv.rb +++ b/app/logical/sources/strategies/pixiv.rb @@ -56,6 +56,17 @@ module Sources "http://www.pixiv.net/member.php?id=#{@metadata.user_id}/" end + def translate_tag(tag) + normalized_tag = tag.gsub(/\A(\S+?)_?\d+users入り\Z/i, '\1') + + translated_tags = super(normalized_tag) + if translated_tags.empty? && normalized_tag.include?("/") + translated_tags = normalized_tag.split("/").flat_map { |tag| super(tag) } + end + + translated_tags + end + def get return unless illust_id_from_url @illust_id = illust_id_from_url diff --git a/app/models/tag.rb b/app/models/tag.rb index 8a2227718..b3a08c0e5 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -787,6 +787,10 @@ class Tag < ApplicationRecord end module SearchMethods + def nonempty + where("tags.post_count > 0") + end + def name_matches(name) where("tags.name LIKE ? ESCAPE E'\\\\'", name.mb_chars.downcase.to_escaped_for_sql_like) end diff --git a/test/unit/sources/pixiv_test.rb b/test/unit/sources/pixiv_test.rb index a96540ca5..d73d21ea8 100644 --- a/test/unit/sources/pixiv_test.rb +++ b/test/unit/sources/pixiv_test.rb @@ -84,7 +84,7 @@ module Sources pixiv_tags = @site.tags.map(&:first) pixiv_links = @site.tags.map(&:last) - assert_equal(["漫画", "foo", "bar", "tag1", "tag2", "derp", "鉛筆", "色鉛筆", "シャープペンシル"], pixiv_tags) + assert_equal(%w[漫画 Fate/GrandOrder foo FOO 風景10users入り 伊19/陸奥 鉛筆], pixiv_tags) assert_contains(pixiv_links, /search\.php/) end @@ -131,6 +131,67 @@ module Sources assert_equal(dtext_desc, @site.dtext_artist_commentary_desc) end end + + context "translating the tags" do + setup do + CurrentUser.user = FactoryGirl.create(:user) + CurrentUser.ip_addr = "127.0.0.1" + + tags = { + "comic" => "漫画", + "scenery" => "風景", + "i-19_(kantai_collection)" => "伊19", + "mutsu_(kantai_collection)" => "陸奥", + "fate/grand_order" => "Fate/GrandOrder", + "fate" => "", + "foo" => "", + } + + tags.each do |tag, other_names| + FactoryGirl.create(:tag, name: tag, post_count: 1) + FactoryGirl.create(:wiki_page, title: tag, other_names: other_names) + end + + @site = get_source("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=46304614") + @tags = @site.tags.map(&:first) + @translated_tags = @site.translated_tags.map(&:first) + end + + should "get the original tags" do + assert_equal(%w[漫画 Fate/GrandOrder foo FOO 風景10users入り 伊19/陸奥 鉛筆], @tags) + end + + should "translate the tag if it matches a wiki other name" do + assert_includes(@tags, "漫画") + assert_includes(@translated_tags, "comic") + end + + should "return the same tag if it doesn't match a wiki other name but it does match a tag" do + assert_includes(@tags, "foo") + assert_includes(@translated_tags, "foo") + end + + should "not translate tags for digital media" do + assert_equal(false, @tags.include?("Photoshop")) + end + + should "normalize 10users入り tags" do + assert_includes(@tags, "風景10users入り") + assert_includes(@translated_tags, "scenery") + end + + should "split the base tag if it has no match" do + assert_includes(@tags, "伊19/陸奥") + assert_includes(@translated_tags, "i-19_(kantai_collection)") + assert_includes(@translated_tags, "mutsu_(kantai_collection)") + end + + should "not split the base tag if it has a match" do + assert_includes(@tags, "Fate/GrandOrder") + assert_includes(@translated_tags, "fate/grand_order") + assert_equal(false, @translated_tags.grep("fate").any?) + end + end end end end