Fix #3208: Fix translated tag suggestions for Pixiv.
* Only suggest the Danbooru tag with the same name if there is no matching wiki other name. Example: if we have the Pixiv tag `Fate` and the Danbooru tag `fate_(series)` with other name `fate`, suggest that, not the Danbooru tag `fate`. * Don't suggest tags that are empty or whose wiki is deleted. * Only split tags on "/" if there are no other matches, and only for Pixiv. * For Pixiv, only include traditional media tags in tag list, not digital media (Photoshop, SAI). * Add some tests.
This commit is contained in:
@@ -3,6 +3,21 @@ class PixivApiClient
|
||||
CLIENT_ID = "bYGKuGVw91e0NMfPGp44euvGt59s"
|
||||
CLIENT_SECRET = "HP3RmkgAmEGro0gn1x9ioawQE8WMfvLXDz3ZqxpK"
|
||||
|
||||
# Tools to not include in the tags list. We don't tag digital media, so
|
||||
# including these results in bad translated tags suggestions.
|
||||
TOOLS_BLACKLIST = %w[
|
||||
Photoshop Illustrator Fireworks Flash Painter PaintShopPro pixiv\ Sketch
|
||||
CLIP\ STUDIO\ PAINT IllustStudio ComicStudio RETAS\ STUDIO SAI PhotoStudio
|
||||
Pixia NekoPaint PictBear openCanvas ArtRage Expression Inkscape GIMP
|
||||
CGillust COMICWORKS MS_Paint EDGE AzPainter AzPainter2 AzDrawing
|
||||
PicturePublisher SketchBookPro Processing 4thPaint GraphicsGale mdiapp
|
||||
Paintgraphic AfterEffects drawr CLIP\ PAINT\ Lab FireAlpaca Pixelmator
|
||||
AzDrawing2 MediBang\ Paint Krita ibisPaint Procreate Live2D
|
||||
Lightwave3D Shade Poser STRATA AnimationMaster XSI CARRARA CINEMA4D Maya
|
||||
3dsMax Blender ZBrush Metasequoia Sunny3D Bryce Vue Hexagon\ King SketchUp
|
||||
VistaPro Sculptris Comi\ Po! modo DAZ\ Studio 3D-Coat
|
||||
]
|
||||
|
||||
class Error < Exception ; end
|
||||
|
||||
class WorksResponse
|
||||
@@ -96,7 +111,8 @@ class PixivApiClient
|
||||
@page_count = json["page_count"].to_i
|
||||
@artist_commentary_title = json["title"].to_s
|
||||
@artist_commentary_desc = json["caption"].to_s
|
||||
@tags = [json["tags"], json["tools"]].flatten.compact.reject {|x| x =~ /^http:/}
|
||||
@tags = json["tags"].reject {|x| x =~ /^http:/}
|
||||
@tags += json["tools"] - TOOLS_BLACKLIST
|
||||
|
||||
if page_count > 1
|
||||
@pages = json["metadata"]["pages"].map {|x| x["image_urls"]["large"]}
|
||||
|
||||
@@ -8,7 +8,7 @@ module Sources
|
||||
:file_url, :ugoira_frame_data, :ugoira_content_type, :image_urls,
|
||||
:artist_commentary_title, :artist_commentary_desc,
|
||||
:dtext_artist_commentary_title, :dtext_artist_commentary_desc,
|
||||
:rewrite_thumbnails, :illust_id_from_url, :to => :strategy
|
||||
:rewrite_thumbnails, :illust_id_from_url, :translate_tag, :translated_tags, :to => :strategy
|
||||
|
||||
def self.strategies
|
||||
[Strategies::PixivWhitecube, Strategies::Pixiv, Strategies::NicoSeiga, Strategies::DeviantArt, Strategies::ArtStation, Strategies::Nijie, Strategies::Twitter, Strategies::Tumblr, Strategies::Pawoo]
|
||||
@@ -43,23 +43,6 @@ module Sources
|
||||
url
|
||||
end
|
||||
|
||||
def translated_tags
|
||||
untranslated_tags = tags
|
||||
untranslated_tags = untranslated_tags.map(&:first)
|
||||
untranslated_tags += untranslated_tags.grep(/\//).map {|x| x.split(/\//)}.flatten
|
||||
untranslated_tags = untranslated_tags.map do |tag|
|
||||
if tag =~ /\A(\S+?)_?\d+users入り\Z/
|
||||
$1
|
||||
else
|
||||
tag
|
||||
end
|
||||
end
|
||||
untranslated_tags.reject! {|x| x.blank?}
|
||||
wikis = WikiPage.title_in(untranslated_tags)
|
||||
wikis += WikiPage.other_names_equal(untranslated_tags)
|
||||
wikis.uniq.map{|wiki_page| [wiki_page.title, wiki_page.category_name]}
|
||||
end
|
||||
|
||||
def to_h
|
||||
return {
|
||||
:artist_name => artist_name,
|
||||
|
||||
@@ -80,6 +80,23 @@ module Sources
|
||||
(@tags || []).uniq
|
||||
end
|
||||
|
||||
def translated_tags
|
||||
translated_tags = tags.map(&:first).flat_map(&method(:translate_tag)).uniq.sort
|
||||
translated_tags.map { |tag| [tag.name, tag.category] }
|
||||
end
|
||||
|
||||
# Given a tag from the source site, should return an array of corresponding Danbooru tags.
|
||||
def translate_tag(untranslated_tag)
|
||||
translated_tags = Tag.where(name: WikiPage.active.other_names_equal([untranslated_tag]).uniq.select(:title))
|
||||
|
||||
if translated_tags.empty?
|
||||
normalized_name = TagAlias.to_aliased([Tag.normalize_name(untranslated_tag)])
|
||||
translated_tags = Tag.nonempty.where(name: normalized_name)
|
||||
end
|
||||
|
||||
translated_tags
|
||||
end
|
||||
|
||||
# Should be set to a url for sites that prevent hotlinking, or left nil for sites that don't.
|
||||
def fake_referer
|
||||
nil
|
||||
|
||||
@@ -56,6 +56,17 @@ module Sources
|
||||
"http://www.pixiv.net/member.php?id=#{@metadata.user_id}/"
|
||||
end
|
||||
|
||||
def translate_tag(tag)
|
||||
normalized_tag = tag.gsub(/\A(\S+?)_?\d+users入り\Z/i, '\1')
|
||||
|
||||
translated_tags = super(normalized_tag)
|
||||
if translated_tags.empty? && normalized_tag.include?("/")
|
||||
translated_tags = normalized_tag.split("/").flat_map { |tag| super(tag) }
|
||||
end
|
||||
|
||||
translated_tags
|
||||
end
|
||||
|
||||
def get
|
||||
return unless illust_id_from_url
|
||||
@illust_id = illust_id_from_url
|
||||
|
||||
@@ -787,6 +787,10 @@ class Tag < ApplicationRecord
|
||||
end
|
||||
|
||||
module SearchMethods
|
||||
def nonempty
|
||||
where("tags.post_count > 0")
|
||||
end
|
||||
|
||||
def name_matches(name)
|
||||
where("tags.name LIKE ? ESCAPE E'\\\\'", name.mb_chars.downcase.to_escaped_for_sql_like)
|
||||
end
|
||||
|
||||
@@ -84,7 +84,7 @@ module Sources
|
||||
pixiv_tags = @site.tags.map(&:first)
|
||||
pixiv_links = @site.tags.map(&:last)
|
||||
|
||||
assert_equal(["漫画", "foo", "bar", "tag1", "tag2", "derp", "鉛筆", "色鉛筆", "シャープペンシル"], pixiv_tags)
|
||||
assert_equal(%w[漫画 Fate/GrandOrder foo FOO 風景10users入り 伊19/陸奥 鉛筆], pixiv_tags)
|
||||
assert_contains(pixiv_links, /search\.php/)
|
||||
end
|
||||
|
||||
@@ -131,6 +131,67 @@ module Sources
|
||||
assert_equal(dtext_desc, @site.dtext_artist_commentary_desc)
|
||||
end
|
||||
end
|
||||
|
||||
context "translating the tags" do
|
||||
setup do
|
||||
CurrentUser.user = FactoryGirl.create(:user)
|
||||
CurrentUser.ip_addr = "127.0.0.1"
|
||||
|
||||
tags = {
|
||||
"comic" => "漫画",
|
||||
"scenery" => "風景",
|
||||
"i-19_(kantai_collection)" => "伊19",
|
||||
"mutsu_(kantai_collection)" => "陸奥",
|
||||
"fate/grand_order" => "Fate/GrandOrder",
|
||||
"fate" => "",
|
||||
"foo" => "",
|
||||
}
|
||||
|
||||
tags.each do |tag, other_names|
|
||||
FactoryGirl.create(:tag, name: tag, post_count: 1)
|
||||
FactoryGirl.create(:wiki_page, title: tag, other_names: other_names)
|
||||
end
|
||||
|
||||
@site = get_source("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=46304614")
|
||||
@tags = @site.tags.map(&:first)
|
||||
@translated_tags = @site.translated_tags.map(&:first)
|
||||
end
|
||||
|
||||
should "get the original tags" do
|
||||
assert_equal(%w[漫画 Fate/GrandOrder foo FOO 風景10users入り 伊19/陸奥 鉛筆], @tags)
|
||||
end
|
||||
|
||||
should "translate the tag if it matches a wiki other name" do
|
||||
assert_includes(@tags, "漫画")
|
||||
assert_includes(@translated_tags, "comic")
|
||||
end
|
||||
|
||||
should "return the same tag if it doesn't match a wiki other name but it does match a tag" do
|
||||
assert_includes(@tags, "foo")
|
||||
assert_includes(@translated_tags, "foo")
|
||||
end
|
||||
|
||||
should "not translate tags for digital media" do
|
||||
assert_equal(false, @tags.include?("Photoshop"))
|
||||
end
|
||||
|
||||
should "normalize 10users入り tags" do
|
||||
assert_includes(@tags, "風景10users入り")
|
||||
assert_includes(@translated_tags, "scenery")
|
||||
end
|
||||
|
||||
should "split the base tag if it has no match" do
|
||||
assert_includes(@tags, "伊19/陸奥")
|
||||
assert_includes(@translated_tags, "i-19_(kantai_collection)")
|
||||
assert_includes(@translated_tags, "mutsu_(kantai_collection)")
|
||||
end
|
||||
|
||||
should "not split the base tag if it has a match" do
|
||||
assert_includes(@tags, "Fate/GrandOrder")
|
||||
assert_includes(@translated_tags, "fate/grand_order")
|
||||
assert_equal(false, @translated_tags.grep("fate").any?)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user