Fix #3208: Fix translated tag suggestions for Pixiv.

* Only suggest the Danbooru tag with the same name if there is no
  matching wiki other name. Example: if we have the Pixiv tag `Fate` and
  the Danbooru tag `fate_(series)` with other name `fate`, suggest that,
  not the Danbooru tag `fate`.

* Don't suggest tags that are empty or whose wiki is deleted.

* Only split tags on "/" if there are no other matches, and only for Pixiv.

* For Pixiv, only include traditional media tags in tag list, not digital media (Photoshop, SAI).

* Add some tests.
This commit is contained in:
evazion
2017-07-09 11:43:55 -05:00
parent a860bec0a7
commit cc8986641b
6 changed files with 112 additions and 20 deletions

View File

@@ -3,6 +3,21 @@ class PixivApiClient
CLIENT_ID = "bYGKuGVw91e0NMfPGp44euvGt59s"
CLIENT_SECRET = "HP3RmkgAmEGro0gn1x9ioawQE8WMfvLXDz3ZqxpK"
# Tools to not include in the tags list. We don't tag digital media, so
# including these results in bad translated tags suggestions.
TOOLS_BLACKLIST = %w[
Photoshop Illustrator Fireworks Flash Painter PaintShopPro pixiv\ Sketch
CLIP\ STUDIO\ PAINT IllustStudio ComicStudio RETAS\ STUDIO SAI PhotoStudio
Pixia NekoPaint PictBear openCanvas ArtRage Expression Inkscape GIMP
CGillust COMICWORKS MS_Paint EDGE AzPainter AzPainter2 AzDrawing
PicturePublisher SketchBookPro Processing 4thPaint GraphicsGale mdiapp
Paintgraphic AfterEffects drawr CLIP\ PAINT\ Lab FireAlpaca Pixelmator
AzDrawing2 MediBang\ Paint Krita ibisPaint Procreate Live2D
Lightwave3D Shade Poser STRATA AnimationMaster XSI CARRARA CINEMA4D Maya
3dsMax Blender ZBrush Metasequoia Sunny3D Bryce Vue Hexagon\ King SketchUp
VistaPro Sculptris Comi\ Po! modo DAZ\ Studio 3D-Coat
]
class Error < Exception ; end
class WorksResponse
@@ -96,7 +111,8 @@ class PixivApiClient
@page_count = json["page_count"].to_i
@artist_commentary_title = json["title"].to_s
@artist_commentary_desc = json["caption"].to_s
@tags = [json["tags"], json["tools"]].flatten.compact.reject {|x| x =~ /^http:/}
@tags = json["tags"].reject {|x| x =~ /^http:/}
@tags += json["tools"] - TOOLS_BLACKLIST
if page_count > 1
@pages = json["metadata"]["pages"].map {|x| x["image_urls"]["large"]}

View File

@@ -8,7 +8,7 @@ module Sources
:file_url, :ugoira_frame_data, :ugoira_content_type, :image_urls,
:artist_commentary_title, :artist_commentary_desc,
:dtext_artist_commentary_title, :dtext_artist_commentary_desc,
:rewrite_thumbnails, :illust_id_from_url, :to => :strategy
:rewrite_thumbnails, :illust_id_from_url, :translate_tag, :translated_tags, :to => :strategy
def self.strategies
[Strategies::PixivWhitecube, Strategies::Pixiv, Strategies::NicoSeiga, Strategies::DeviantArt, Strategies::ArtStation, Strategies::Nijie, Strategies::Twitter, Strategies::Tumblr, Strategies::Pawoo]
@@ -43,23 +43,6 @@ module Sources
url
end
def translated_tags
untranslated_tags = tags
untranslated_tags = untranslated_tags.map(&:first)
untranslated_tags += untranslated_tags.grep(/\//).map {|x| x.split(/\//)}.flatten
untranslated_tags = untranslated_tags.map do |tag|
if tag =~ /\A(\S+?)_?\d+users入り\Z/
$1
else
tag
end
end
untranslated_tags.reject! {|x| x.blank?}
wikis = WikiPage.title_in(untranslated_tags)
wikis += WikiPage.other_names_equal(untranslated_tags)
wikis.uniq.map{|wiki_page| [wiki_page.title, wiki_page.category_name]}
end
def to_h
return {
:artist_name => artist_name,

View File

@@ -80,6 +80,23 @@ module Sources
(@tags || []).uniq
end
def translated_tags
translated_tags = tags.map(&:first).flat_map(&method(:translate_tag)).uniq.sort
translated_tags.map { |tag| [tag.name, tag.category] }
end
# Given a tag from the source site, should return an array of corresponding Danbooru tags.
def translate_tag(untranslated_tag)
translated_tags = Tag.where(name: WikiPage.active.other_names_equal([untranslated_tag]).uniq.select(:title))
if translated_tags.empty?
normalized_name = TagAlias.to_aliased([Tag.normalize_name(untranslated_tag)])
translated_tags = Tag.nonempty.where(name: normalized_name)
end
translated_tags
end
# Should be set to a url for sites that prevent hotlinking, or left nil for sites that don't.
def fake_referer
nil

View File

@@ -56,6 +56,17 @@ module Sources
"http://www.pixiv.net/member.php?id=#{@metadata.user_id}/"
end
def translate_tag(tag)
normalized_tag = tag.gsub(/\A(\S+?)_?\d+users入り\Z/i, '\1')
translated_tags = super(normalized_tag)
if translated_tags.empty? && normalized_tag.include?("/")
translated_tags = normalized_tag.split("/").flat_map { |tag| super(tag) }
end
translated_tags
end
def get
return unless illust_id_from_url
@illust_id = illust_id_from_url

View File

@@ -787,6 +787,10 @@ class Tag < ApplicationRecord
end
module SearchMethods
def nonempty
where("tags.post_count > 0")
end
def name_matches(name)
where("tags.name LIKE ? ESCAPE E'\\\\'", name.mb_chars.downcase.to_escaped_for_sql_like)
end

View File

@@ -84,7 +84,7 @@ module Sources
pixiv_tags = @site.tags.map(&:first)
pixiv_links = @site.tags.map(&:last)
assert_equal(["漫画", "foo", "bar", "tag1", "tag2", "derp", "鉛筆", "色鉛筆", "シャープペンシル"], pixiv_tags)
assert_equal(%w[漫画 Fate/GrandOrder foo FOO 風景10users入り 伊19/陸奥 鉛筆], pixiv_tags)
assert_contains(pixiv_links, /search\.php/)
end
@@ -131,6 +131,67 @@ module Sources
assert_equal(dtext_desc, @site.dtext_artist_commentary_desc)
end
end
context "translating the tags" do
setup do
CurrentUser.user = FactoryGirl.create(:user)
CurrentUser.ip_addr = "127.0.0.1"
tags = {
"comic" => "漫画",
"scenery" => "風景",
"i-19_(kantai_collection)" => "伊19",
"mutsu_(kantai_collection)" => "陸奥",
"fate/grand_order" => "Fate/GrandOrder",
"fate" => "",
"foo" => "",
}
tags.each do |tag, other_names|
FactoryGirl.create(:tag, name: tag, post_count: 1)
FactoryGirl.create(:wiki_page, title: tag, other_names: other_names)
end
@site = get_source("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=46304614")
@tags = @site.tags.map(&:first)
@translated_tags = @site.translated_tags.map(&:first)
end
should "get the original tags" do
assert_equal(%w[漫画 Fate/GrandOrder foo FOO 風景10users入り 伊19/陸奥 鉛筆], @tags)
end
should "translate the tag if it matches a wiki other name" do
assert_includes(@tags, "漫画")
assert_includes(@translated_tags, "comic")
end
should "return the same tag if it doesn't match a wiki other name but it does match a tag" do
assert_includes(@tags, "foo")
assert_includes(@translated_tags, "foo")
end
should "not translate tags for digital media" do
assert_equal(false, @tags.include?("Photoshop"))
end
should "normalize 10users入り tags" do
assert_includes(@tags, "風景10users入り")
assert_includes(@translated_tags, "scenery")
end
should "split the base tag if it has no match" do
assert_includes(@tags, "伊19/陸奥")
assert_includes(@translated_tags, "i-19_(kantai_collection)")
assert_includes(@translated_tags, "mutsu_(kantai_collection)")
end
should "not split the base tag if it has a match" do
assert_includes(@tags, "Fate/GrandOrder")
assert_includes(@translated_tags, "fate/grand_order")
assert_equal(false, @translated_tags.grep("fate").any?)
end
end
end
end
end