diff --git a/app/models/artist.rb b/app/models/artist.rb index 2aa4361c9..08272f2ff 100644 --- a/app/models/artist.rb +++ b/app/models/artist.rb @@ -31,6 +31,118 @@ class Artist < ApplicationRecord extend ActiveSupport::Concern module ClassMethods + # Subdomains are automatically included. e.g., "twitter.com" matches "www.twitter.com", + # "mobile.twitter.com" and any other subdomain of "twitter.com". + SITE_BLACKLIST = [ + "artstation.com/artist", # http://www.artstation.com/artist/serafleur/ + "www.artstation.com", # http://www.artstation.com/serafleur/ + /(?:cdn[ab]?)\.artstation\.com/i, # http://serafleur.artstation.com + "ask.fm", # http://ask.fm/mikuroko_396 + "bcyimg.com", + "bcyimg.com/drawer", # https://img9.bcyimg.com/drawer/32360/post/178vu/46229ec06e8111e79558c1b725ebc9e6.jpg + "bcy.net", + "bcy.net/illust/detail", # https://bcy.net/illust/detail/32360/1374683 + "bcy.net/u", # http://bcy.net/u/1390261 + "behance.net", # "https://www.behance.net/webang111 + "booru.org", + "booru.org/drawfriends", # http://img.booru.org/drawfriends//images/36/de65da5f588b76bc1d9de8af976b540e2dff17e2.jpg + "donmai.us", + "donmai.us/users", # http://danbooru.donmai.us/users/507162/ + "derpibooru.org", + "derpibooru.org/tags", # https://derpibooru.org/tags/artist-colon-checkerboardazn + "deviantart.net", + "dlsite.com", + "doujinshi.org", + "doujinshi.org/browse/circle", # http://www.doujinshi.org/browse/circle/65368/ + "doujinshi.org/browse/author", # http://www.doujinshi.org/browse/author/979/23/ + "doujinshi.mugimugi.org", + "doujinshi.mugimugi.org/browse/author", # http://doujinshi.mugimugi.org/browse/author/3029/ + "doujinshi.mugimugi.org/browse/circle", # http://doujinshi.mugimugi.org/browse/circle/7210/ + "drawcrowd.net", # https://drawcrowd.com/agussw + "drawr.net", # http://drawr.net/matsu310 + "dropbox.com", + "dropbox.com/sh", # https://www.dropbox.com/sh/gz9okupqycr2vj2/GHt_oHDKsR + "dropbox.com/u", # http://dl.dropbox.com/u/76682289/daitoHP-WP/pict/ + "e-hentai.org", # https://e-hentai.org/tag/artist:spirale + "e621.net", + "e621.net/post/index/1", # https://e621.net/post/index/1/spirale + "enty.jp", # https://enty.jp/aizawachihiro888 + "enty.jp/users", # https://enty.jp/users/3766 + "facebook.com", # https://www.facebook.com/LuutenantsLoot + "fantia.jp", # http://fantia.jp/no100 + "fantia.jp/fanclubs", # https://fantia.jp/fanclubs/1711 + "fav.me", # http://fav.me/d9y1njg + /blog-imgs-\d+(?:-origin)?\.fc2\.com/i, + "furaffinity.net", + "furaffinity.net/user", # http://www.furaffinity.net/user/achthenuts + "gelbooru.com", # http://gelbooru.com/index.php?page=account&s=profile&uname=junou + "inkbunny.net", # https://inkbunny.net/achthenuts + "plus.google.com", # https://plus.google.com/111509637967078773143/posts + "hentai-foundry.com", + "hentai-foundry.com/pictures/user", # http://www.hentai-foundry.com/pictures/user/aaaninja/ + "hentai-foundry.com/user", # http://www.hentai-foundry.com/user/aaaninja/profile + %r!pictures\.hentai-foundry\.com(?:/\w)?!i, # http://pictures.hentai-foundry.com/a/aaaninja/ + "i.imgur.com", # http://i.imgur.com/Ic9q3.jpg + "instagram.com", # http://www.instagram.com/serafleur.art/ + "iwara.tv", + "iwara.tv/users", # http://ecchi.iwara.tv/users/marumega + "kym-cdn.com", + "livedoor.blogimg.jp", + "monappy.jp", + "monappy.jp/u", # https://monappy.jp/u/abara_bone + "mstdn.jp", # https://mstdn.jp/@oneb + "nicoseiga.jp", + "nicoseiga.jp/priv", # http://lohas.nicoseiga.jp/priv/2017365fb6cfbdf47ad26c7b6039feb218c5e2d4/1498430264/6820259 + "nicovideo.jp", + "nicovideo.jp/user/illust", # http://seiga.nicovideo.jp/user/illust/29075429 + "nijie.info", # http://nijie.info/members.php?id=15235 + "patreon.com", # http://patreon.com/serafleur + "pawoo.net", # https://pawoo.net/@148nasuka + "pawoo.net/web/accounts", # https://pawoo.net/web/accounts/228341 + "picarto.tv", # https://picarto.tv/CheckerBoardAZN + "picarto.tv/live", # https://www.picarto.tv/live/channel.php?watch=aaaninja + "pictaram.com", # http://www.pictaram.com/user/5ish/3048385011/1350040096769940245_3048385011 + "pinterest.com", # http://www.pinterest.com/alexandernanitc/ + "pixiv.cc", # http://pixiv.cc/0123456789/ + "pixiv.net", # https://www.pixiv.net/member.php?id=10442390 + "pixiv.net/stacc", # https://www.pixiv.net/stacc/aaaninja2013 + "i.pximg.net", + "plurk.com", # http://www.plurk.com/a1amorea1a1 + "privatter.net", + "privatter.net/u", # http://privatter.net/u/saaaatonaaaa + "rule34.paheal.net", + "rule34.paheal.net/post/list", # http://rule34.paheal.net/post/list/Reach025/ + "sankakucomplex.com", # https://chan.sankakucomplex.com/?tags=user%3ASubridet + "society6.com", # http://society6.com/serafleur/ + "tinami.com", + "tinami.com/creator/profile", # http://www.tinami.com/creator/profile/29024 + "data.tumblr.com", + /\d+\.media\.tumblr\.com/i, + "twipple.jp", + "twipple.jp/user", # http://p.twipple.jp/user/Type10TK + "twitch.tv", # https://www.twitch.tv/5ish + "twitpic.com", + "twitpic.com/photos", # http://twitpic.com/photos/Type10TK + "twitter.com", # https://twitter.com/akkij0358 + "ustream.tv", + "ustream.tv/channel", # http://www.ustream.tv/channel/633b + "ustream.tv/user", # http://www.ustream.tv/user/kazaputi + "vk.com", # https://vk.com/id425850679 + "weibo.com", # http://www.weibo.com/5536681649 + "wp.com", + "yande.re", + "youtube.com", + "youtube.com/c", # https://www.youtube.com/c/serafleurArt + "youtube.com/channel", # https://www.youtube.com/channel/UCfrCa2Y6VulwHD3eNd3HBRA + "youtube.com/user", # https://www.youtube.com/user/148nasuka + "youtu.be", # http://youtu.be/gibeLKKRT-0 + ] + + SITE_BLACKLIST_REGEXP = Regexp.union(SITE_BLACKLIST.map do |domain| + domain = Regexp.escape(domain) if domain.is_a?(String) + %r!\Ahttps?://(?:[a-zA-Z0-9_-]+\.)*#{domain}/\z!i + end) + def find_all_by_url(url) url = ArtistUrl.normalize(url) artists = [] @@ -42,14 +154,8 @@ class Artist < ApplicationRecord u = u.to_escaped_for_sql_like.gsub(/\*/, '%') + '%' artists += Artist.joins(:urls).where(["artists.is_active = TRUE AND artist_urls.normalized_url LIKE ? ESCAPE E'\\\\'", u]).limit(10).order("artists.name").all url = File.dirname(url) + "/" - break if url =~ /pixiv\.net\/(?:img\/)?$/i - break if url =~ /lohas\.nicoseiga\.jp\/priv\/$/i - break if url =~ /nicovideo\.jp\/user\/illust/ - break if url =~ /(?:data|media)\.tumblr\.com\/[a-z0-9]+\/$/i - break if url =~ /deviantart\.net\//i - break if url =~ %r!\Ahttps?://(?:mobile\.)?twitter\.com/\Z!i - break if url =~ %r!pawoo\.net/(?:web/)?$!i - break if url =~ %r!\Ahttps?://(pic\d+\.)?nijie\.info/!i + + break if url =~ SITE_BLACKLIST_REGEXP end artists.inject({}) {|h, x| h[x.name] = x; h}.values.slice(0, 20) diff --git a/test/unit/artist_test.rb b/test/unit/artist_test.rb index 92b21e337..3ac48dae0 100644 --- a/test/unit/artist_test.rb +++ b/test/unit/artist_test.rb @@ -15,6 +15,8 @@ class ArtistTest < ActiveSupport::TestCase context "An artist" do setup do + User.any_instance.stubs(:validate_sock_puppets).returns(true) + user = Timecop.travel(1.month.ago) {FactoryGirl.create(:user)} CurrentUser.user = user CurrentUser.ip_addr = "127.0.0.1" @@ -203,7 +205,7 @@ class ArtistTest < ActiveSupport::TestCase assert_artist_found("trixia", "http://trixdraws.deviantart.com/gallery/#/d722mrt") end - should_eventually "find the correct artist for image URLs" do + should "find the correct artist for image URLs" do assert_artist_found("artgerm", "http://th05.deviantart.net/fs71/200H/f/2014/150/d/c/peachy_princess_by_artgerm-d7k7tmu.jpg") assert_artist_found("artgerm", "http://th05.deviantart.net/fs71/PRE/f/2014/150/d/c/peachy_princess_by_artgerm-d7k7tmu.jpg") assert_artist_found("artgerm", "http://fc06.deviantart.net/fs71/f/2014/150/d/c/peachy_princess_by_artgerm-d7k7tmu.jpg") @@ -212,6 +214,11 @@ class ArtistTest < ActiveSupport::TestCase assert_artist_found("trixia", "http://th01.deviantart.net/fs71/200H/i/2014/050/d/e/my_queen_by_trixdraws-d722mrt.jpg") assert_artist_found("trixia", "http://th09.deviantart.net/fs71/PRE/i/2014/050/d/e/my_queen_by_trixdraws-d722mrt.jpg") end + + should "return nothing for unknown deviantart artists" do + assert_artist_not_found("http://guweiz.deviantart.com/art/Battleship-551905391") + assert_artist_not_found("https://orig00.deviantart.net/7585/f/2015/219/a/5/battleship__by_guweiz-d94l8xb.png") + end end context "when finding pixiv artists" do @@ -263,10 +270,17 @@ class ArtistTest < ActiveSupport::TestCase context "when finding nico seiga artists" do setup do FactoryGirl.create(:artist, :name => "osamari", :url_string => "http://seiga.nicovideo.jp/user/illust/7017777") + FactoryGirl.create(:artist, :name => "hakuro109", :url_string => "http://seiga.nicovideo.jp/user/illust/16265470") end should "find the artist by the profile" do assert_artist_found("osamari", "http://seiga.nicovideo.jp/seiga/im4937663") + assert_artist_found("hakuro109", "http://lohas.nicoseiga.jp/priv/b9ea863e691f3a648dee5582fd6911c30dc8acab/1510092103/6424205") + end + + should "return nothing for unknown nico seiga artists" do + assert_artist_not_found("http://seiga.nicovideo.jp/seiga/im6605221") + assert_artist_not_found("http://lohas.nicoseiga.jp/priv/fd195b3405b19874c825eb4d81c9196086562c6b/1509089019/6605221") end end @@ -338,6 +352,22 @@ class ArtistTest < ActiveSupport::TestCase end end + context "when finding tumblr artists" do + setup do + FactoryGirl.create(:artist, :name => "ilya_kuvshinov", :url_string => "http://kuvshinov-ilya.tumblr.com") + FactoryGirl.create(:artist, :name => "j.k.", :url_string => "https://jdotkdot5.tumblr.com") + end + + should "find the artist" do + assert_artist_found("ilya_kuvshinov", "http://kuvshinov-ilya.tumblr.com/post/168641755845") + assert_artist_found("j.k.", "https://jdotkdot5.tumblr.com/post/168276640697") + end + + should "return nothing for unknown tumblr artists" do + assert_artist_not_found("https://peptosis.tumblr.com/post/168162082005") + end + end + should "normalize its other names" do artist = FactoryGirl.create(:artist, :name => "a1", :other_names_comma => "aaa, bbb, ccc ddd") assert_equal("aaa, bbb, ccc_ddd", artist.other_names_comma)