Merge pull request #3429 from evazion/fix-bogus-find-artist
Fix #2696: Bogus results on non-matching URL searches for Artists
This commit is contained in:
@@ -31,6 +31,118 @@ class Artist < ApplicationRecord
|
||||
extend ActiveSupport::Concern
|
||||
|
||||
module ClassMethods
|
||||
# Subdomains are automatically included. e.g., "twitter.com" matches "www.twitter.com",
|
||||
# "mobile.twitter.com" and any other subdomain of "twitter.com".
|
||||
SITE_BLACKLIST = [
|
||||
"artstation.com/artist", # http://www.artstation.com/artist/serafleur/
|
||||
"www.artstation.com", # http://www.artstation.com/serafleur/
|
||||
/(?:cdn[ab]?)\.artstation\.com/i, # http://serafleur.artstation.com
|
||||
"ask.fm", # http://ask.fm/mikuroko_396
|
||||
"bcyimg.com",
|
||||
"bcyimg.com/drawer", # https://img9.bcyimg.com/drawer/32360/post/178vu/46229ec06e8111e79558c1b725ebc9e6.jpg
|
||||
"bcy.net",
|
||||
"bcy.net/illust/detail", # https://bcy.net/illust/detail/32360/1374683
|
||||
"bcy.net/u", # http://bcy.net/u/1390261
|
||||
"behance.net", # "https://www.behance.net/webang111
|
||||
"booru.org",
|
||||
"booru.org/drawfriends", # http://img.booru.org/drawfriends//images/36/de65da5f588b76bc1d9de8af976b540e2dff17e2.jpg
|
||||
"donmai.us",
|
||||
"donmai.us/users", # http://danbooru.donmai.us/users/507162/
|
||||
"derpibooru.org",
|
||||
"derpibooru.org/tags", # https://derpibooru.org/tags/artist-colon-checkerboardazn
|
||||
"deviantart.net",
|
||||
"dlsite.com",
|
||||
"doujinshi.org",
|
||||
"doujinshi.org/browse/circle", # http://www.doujinshi.org/browse/circle/65368/
|
||||
"doujinshi.org/browse/author", # http://www.doujinshi.org/browse/author/979/23/
|
||||
"doujinshi.mugimugi.org",
|
||||
"doujinshi.mugimugi.org/browse/author", # http://doujinshi.mugimugi.org/browse/author/3029/
|
||||
"doujinshi.mugimugi.org/browse/circle", # http://doujinshi.mugimugi.org/browse/circle/7210/
|
||||
"drawcrowd.net", # https://drawcrowd.com/agussw
|
||||
"drawr.net", # http://drawr.net/matsu310
|
||||
"dropbox.com",
|
||||
"dropbox.com/sh", # https://www.dropbox.com/sh/gz9okupqycr2vj2/GHt_oHDKsR
|
||||
"dropbox.com/u", # http://dl.dropbox.com/u/76682289/daitoHP-WP/pict/
|
||||
"e-hentai.org", # https://e-hentai.org/tag/artist:spirale
|
||||
"e621.net",
|
||||
"e621.net/post/index/1", # https://e621.net/post/index/1/spirale
|
||||
"enty.jp", # https://enty.jp/aizawachihiro888
|
||||
"enty.jp/users", # https://enty.jp/users/3766
|
||||
"facebook.com", # https://www.facebook.com/LuutenantsLoot
|
||||
"fantia.jp", # http://fantia.jp/no100
|
||||
"fantia.jp/fanclubs", # https://fantia.jp/fanclubs/1711
|
||||
"fav.me", # http://fav.me/d9y1njg
|
||||
/blog-imgs-\d+(?:-origin)?\.fc2\.com/i,
|
||||
"furaffinity.net",
|
||||
"furaffinity.net/user", # http://www.furaffinity.net/user/achthenuts
|
||||
"gelbooru.com", # http://gelbooru.com/index.php?page=account&s=profile&uname=junou
|
||||
"inkbunny.net", # https://inkbunny.net/achthenuts
|
||||
"plus.google.com", # https://plus.google.com/111509637967078773143/posts
|
||||
"hentai-foundry.com",
|
||||
"hentai-foundry.com/pictures/user", # http://www.hentai-foundry.com/pictures/user/aaaninja/
|
||||
"hentai-foundry.com/user", # http://www.hentai-foundry.com/user/aaaninja/profile
|
||||
%r!pictures\.hentai-foundry\.com(?:/\w)?!i, # http://pictures.hentai-foundry.com/a/aaaninja/
|
||||
"i.imgur.com", # http://i.imgur.com/Ic9q3.jpg
|
||||
"instagram.com", # http://www.instagram.com/serafleur.art/
|
||||
"iwara.tv",
|
||||
"iwara.tv/users", # http://ecchi.iwara.tv/users/marumega
|
||||
"kym-cdn.com",
|
||||
"livedoor.blogimg.jp",
|
||||
"monappy.jp",
|
||||
"monappy.jp/u", # https://monappy.jp/u/abara_bone
|
||||
"mstdn.jp", # https://mstdn.jp/@oneb
|
||||
"nicoseiga.jp",
|
||||
"nicoseiga.jp/priv", # http://lohas.nicoseiga.jp/priv/2017365fb6cfbdf47ad26c7b6039feb218c5e2d4/1498430264/6820259
|
||||
"nicovideo.jp",
|
||||
"nicovideo.jp/user/illust", # http://seiga.nicovideo.jp/user/illust/29075429
|
||||
"nijie.info", # http://nijie.info/members.php?id=15235
|
||||
"patreon.com", # http://patreon.com/serafleur
|
||||
"pawoo.net", # https://pawoo.net/@148nasuka
|
||||
"pawoo.net/web/accounts", # https://pawoo.net/web/accounts/228341
|
||||
"picarto.tv", # https://picarto.tv/CheckerBoardAZN
|
||||
"picarto.tv/live", # https://www.picarto.tv/live/channel.php?watch=aaaninja
|
||||
"pictaram.com", # http://www.pictaram.com/user/5ish/3048385011/1350040096769940245_3048385011
|
||||
"pinterest.com", # http://www.pinterest.com/alexandernanitc/
|
||||
"pixiv.cc", # http://pixiv.cc/0123456789/
|
||||
"pixiv.net", # https://www.pixiv.net/member.php?id=10442390
|
||||
"pixiv.net/stacc", # https://www.pixiv.net/stacc/aaaninja2013
|
||||
"i.pximg.net",
|
||||
"plurk.com", # http://www.plurk.com/a1amorea1a1
|
||||
"privatter.net",
|
||||
"privatter.net/u", # http://privatter.net/u/saaaatonaaaa
|
||||
"rule34.paheal.net",
|
||||
"rule34.paheal.net/post/list", # http://rule34.paheal.net/post/list/Reach025/
|
||||
"sankakucomplex.com", # https://chan.sankakucomplex.com/?tags=user%3ASubridet
|
||||
"society6.com", # http://society6.com/serafleur/
|
||||
"tinami.com",
|
||||
"tinami.com/creator/profile", # http://www.tinami.com/creator/profile/29024
|
||||
"data.tumblr.com",
|
||||
/\d+\.media\.tumblr\.com/i,
|
||||
"twipple.jp",
|
||||
"twipple.jp/user", # http://p.twipple.jp/user/Type10TK
|
||||
"twitch.tv", # https://www.twitch.tv/5ish
|
||||
"twitpic.com",
|
||||
"twitpic.com/photos", # http://twitpic.com/photos/Type10TK
|
||||
"twitter.com", # https://twitter.com/akkij0358
|
||||
"ustream.tv",
|
||||
"ustream.tv/channel", # http://www.ustream.tv/channel/633b
|
||||
"ustream.tv/user", # http://www.ustream.tv/user/kazaputi
|
||||
"vk.com", # https://vk.com/id425850679
|
||||
"weibo.com", # http://www.weibo.com/5536681649
|
||||
"wp.com",
|
||||
"yande.re",
|
||||
"youtube.com",
|
||||
"youtube.com/c", # https://www.youtube.com/c/serafleurArt
|
||||
"youtube.com/channel", # https://www.youtube.com/channel/UCfrCa2Y6VulwHD3eNd3HBRA
|
||||
"youtube.com/user", # https://www.youtube.com/user/148nasuka
|
||||
"youtu.be", # http://youtu.be/gibeLKKRT-0
|
||||
]
|
||||
|
||||
SITE_BLACKLIST_REGEXP = Regexp.union(SITE_BLACKLIST.map do |domain|
|
||||
domain = Regexp.escape(domain) if domain.is_a?(String)
|
||||
%r!\Ahttps?://(?:[a-zA-Z0-9_-]+\.)*#{domain}/\z!i
|
||||
end)
|
||||
|
||||
def find_all_by_url(url)
|
||||
url = ArtistUrl.normalize(url)
|
||||
artists = []
|
||||
@@ -42,14 +154,8 @@ class Artist < ApplicationRecord
|
||||
u = u.to_escaped_for_sql_like.gsub(/\*/, '%') + '%'
|
||||
artists += Artist.joins(:urls).where(["artists.is_active = TRUE AND artist_urls.normalized_url LIKE ? ESCAPE E'\\\\'", u]).limit(10).order("artists.name").all
|
||||
url = File.dirname(url) + "/"
|
||||
break if url =~ /pixiv\.net\/(?:img\/)?$/i
|
||||
break if url =~ /lohas\.nicoseiga\.jp\/priv\/$/i
|
||||
break if url =~ /nicovideo\.jp\/user\/illust/
|
||||
break if url =~ /(?:data|media)\.tumblr\.com\/[a-z0-9]+\/$/i
|
||||
break if url =~ /deviantart\.net\//i
|
||||
break if url =~ %r!\Ahttps?://(?:mobile\.)?twitter\.com/\Z!i
|
||||
break if url =~ %r!pawoo\.net/(?:web/)?$!i
|
||||
break if url =~ %r!\Ahttps?://(pic\d+\.)?nijie\.info/!i
|
||||
|
||||
break if url =~ SITE_BLACKLIST_REGEXP
|
||||
end
|
||||
|
||||
artists.inject({}) {|h, x| h[x.name] = x; h}.values.slice(0, 20)
|
||||
|
||||
@@ -15,6 +15,8 @@ class ArtistTest < ActiveSupport::TestCase
|
||||
|
||||
context "An artist" do
|
||||
setup do
|
||||
User.any_instance.stubs(:validate_sock_puppets).returns(true)
|
||||
|
||||
user = Timecop.travel(1.month.ago) {FactoryGirl.create(:user)}
|
||||
CurrentUser.user = user
|
||||
CurrentUser.ip_addr = "127.0.0.1"
|
||||
@@ -203,7 +205,7 @@ class ArtistTest < ActiveSupport::TestCase
|
||||
assert_artist_found("trixia", "http://trixdraws.deviantart.com/gallery/#/d722mrt")
|
||||
end
|
||||
|
||||
should_eventually "find the correct artist for image URLs" do
|
||||
should "find the correct artist for image URLs" do
|
||||
assert_artist_found("artgerm", "http://th05.deviantart.net/fs71/200H/f/2014/150/d/c/peachy_princess_by_artgerm-d7k7tmu.jpg")
|
||||
assert_artist_found("artgerm", "http://th05.deviantart.net/fs71/PRE/f/2014/150/d/c/peachy_princess_by_artgerm-d7k7tmu.jpg")
|
||||
assert_artist_found("artgerm", "http://fc06.deviantart.net/fs71/f/2014/150/d/c/peachy_princess_by_artgerm-d7k7tmu.jpg")
|
||||
@@ -212,6 +214,11 @@ class ArtistTest < ActiveSupport::TestCase
|
||||
assert_artist_found("trixia", "http://th01.deviantart.net/fs71/200H/i/2014/050/d/e/my_queen_by_trixdraws-d722mrt.jpg")
|
||||
assert_artist_found("trixia", "http://th09.deviantart.net/fs71/PRE/i/2014/050/d/e/my_queen_by_trixdraws-d722mrt.jpg")
|
||||
end
|
||||
|
||||
should "return nothing for unknown deviantart artists" do
|
||||
assert_artist_not_found("http://guweiz.deviantart.com/art/Battleship-551905391")
|
||||
assert_artist_not_found("https://orig00.deviantart.net/7585/f/2015/219/a/5/battleship__by_guweiz-d94l8xb.png")
|
||||
end
|
||||
end
|
||||
|
||||
context "when finding pixiv artists" do
|
||||
@@ -263,10 +270,17 @@ class ArtistTest < ActiveSupport::TestCase
|
||||
context "when finding nico seiga artists" do
|
||||
setup do
|
||||
FactoryGirl.create(:artist, :name => "osamari", :url_string => "http://seiga.nicovideo.jp/user/illust/7017777")
|
||||
FactoryGirl.create(:artist, :name => "hakuro109", :url_string => "http://seiga.nicovideo.jp/user/illust/16265470")
|
||||
end
|
||||
|
||||
should "find the artist by the profile" do
|
||||
assert_artist_found("osamari", "http://seiga.nicovideo.jp/seiga/im4937663")
|
||||
assert_artist_found("hakuro109", "http://lohas.nicoseiga.jp/priv/b9ea863e691f3a648dee5582fd6911c30dc8acab/1510092103/6424205")
|
||||
end
|
||||
|
||||
should "return nothing for unknown nico seiga artists" do
|
||||
assert_artist_not_found("http://seiga.nicovideo.jp/seiga/im6605221")
|
||||
assert_artist_not_found("http://lohas.nicoseiga.jp/priv/fd195b3405b19874c825eb4d81c9196086562c6b/1509089019/6605221")
|
||||
end
|
||||
end
|
||||
|
||||
@@ -338,6 +352,22 @@ class ArtistTest < ActiveSupport::TestCase
|
||||
end
|
||||
end
|
||||
|
||||
context "when finding tumblr artists" do
|
||||
setup do
|
||||
FactoryGirl.create(:artist, :name => "ilya_kuvshinov", :url_string => "http://kuvshinov-ilya.tumblr.com")
|
||||
FactoryGirl.create(:artist, :name => "j.k.", :url_string => "https://jdotkdot5.tumblr.com")
|
||||
end
|
||||
|
||||
should "find the artist" do
|
||||
assert_artist_found("ilya_kuvshinov", "http://kuvshinov-ilya.tumblr.com/post/168641755845")
|
||||
assert_artist_found("j.k.", "https://jdotkdot5.tumblr.com/post/168276640697")
|
||||
end
|
||||
|
||||
should "return nothing for unknown tumblr artists" do
|
||||
assert_artist_not_found("https://peptosis.tumblr.com/post/168162082005")
|
||||
end
|
||||
end
|
||||
|
||||
should "normalize its other names" do
|
||||
artist = FactoryGirl.create(:artist, :name => "a1", :other_names_comma => "aaa, bbb, ccc ddd")
|
||||
assert_equal("aaa, bbb, ccc_ddd", artist.other_names_comma)
|
||||
|
||||
Reference in New Issue
Block a user