Fix #2696: Bogus results on non-matching URL searches for Artists

This commit is contained in:
evazion
2017-12-05 22:50:47 -06:00
parent 86936f7200
commit 859fe99a80
2 changed files with 129 additions and 6 deletions

View File

@@ -34,15 +34,108 @@ class Artist < ApplicationRecord
# Subdomains are automatically included. e.g., "twitter.com" matches "www.twitter.com",
# "mobile.twitter.com" and any other subdomain of "twitter.com".
SITE_BLACKLIST = [
"artstation.com/artist", # http://www.artstation.com/artist/serafleur/
"www.artstation.com", # http://www.artstation.com/serafleur/
/(?:cdn[ab]?)\.artstation\.com/i, # http://serafleur.artstation.com
"ask.fm", # http://ask.fm/mikuroko_396
"bcyimg.com",
"bcyimg.com/drawer", # https://img9.bcyimg.com/drawer/32360/post/178vu/46229ec06e8111e79558c1b725ebc9e6.jpg
"bcy.net",
"bcy.net/illust/detail", # https://bcy.net/illust/detail/32360/1374683
"bcy.net/u", # http://bcy.net/u/1390261
"behance.net", # "https://www.behance.net/webang111
"booru.org",
"booru.org/drawfriends", # http://img.booru.org/drawfriends//images/36/de65da5f588b76bc1d9de8af976b540e2dff17e2.jpg
"donmai.us",
"donmai.us/users", # http://danbooru.donmai.us/users/507162/
"derpibooru.org",
"derpibooru.org/tags", # https://derpibooru.org/tags/artist-colon-checkerboardazn
"deviantart.net",
"dlsite.com",
"doujinshi.org",
"doujinshi.org/browse/circle", # http://www.doujinshi.org/browse/circle/65368/
"doujinshi.org/browse/author", # http://www.doujinshi.org/browse/author/979/23/
"doujinshi.mugimugi.org",
"doujinshi.mugimugi.org/browse/author", # http://doujinshi.mugimugi.org/browse/author/3029/
"doujinshi.mugimugi.org/browse/circle", # http://doujinshi.mugimugi.org/browse/circle/7210/
"drawcrowd.net", # https://drawcrowd.com/agussw
"drawr.net", # http://drawr.net/matsu310
"dropbox.com",
"dropbox.com/sh", # https://www.dropbox.com/sh/gz9okupqycr2vj2/GHt_oHDKsR
"dropbox.com/u", # http://dl.dropbox.com/u/76682289/daitoHP-WP/pict/
"e-hentai.org", # https://e-hentai.org/tag/artist:spirale
"e621.net",
"e621.net/post/index/1", # https://e621.net/post/index/1/spirale
"enty.jp", # https://enty.jp/aizawachihiro888
"enty.jp/users", # https://enty.jp/users/3766
"facebook.com", # https://www.facebook.com/LuutenantsLoot
"fantia.jp", # http://fantia.jp/no100
"fantia.jp/fanclubs", # https://fantia.jp/fanclubs/1711
"fav.me", # http://fav.me/d9y1njg
/blog-imgs-\d+(?:-origin)?\.fc2\.com/i,
"furaffinity.net",
"furaffinity.net/user", # http://www.furaffinity.net/user/achthenuts
"gelbooru.com", # http://gelbooru.com/index.php?page=account&s=profile&uname=junou
"inkbunny.net", # https://inkbunny.net/achthenuts
"plus.google.com", # https://plus.google.com/111509637967078773143/posts
"hentai-foundry.com",
"hentai-foundry.com/pictures/user", # http://www.hentai-foundry.com/pictures/user/aaaninja/
"hentai-foundry.com/user", # http://www.hentai-foundry.com/user/aaaninja/profile
%r!pictures\.hentai-foundry\.com(?:/\w)?!i, # http://pictures.hentai-foundry.com/a/aaaninja/
"i.imgur.com", # http://i.imgur.com/Ic9q3.jpg
"instagram.com", # http://www.instagram.com/serafleur.art/
"iwara.tv",
"iwara.tv/users", # http://ecchi.iwara.tv/users/marumega
"kym-cdn.com",
"livedoor.blogimg.jp",
"monappy.jp",
"monappy.jp/u", # https://monappy.jp/u/abara_bone
"mstdn.jp", # https://mstdn.jp/@oneb
"nicoseiga.jp",
/nicovideo\.jp\/user\/illust/,
"nijie.info",
"pawoo.net",
"pixiv.net",
"nicoseiga.jp/priv", # http://lohas.nicoseiga.jp/priv/2017365fb6cfbdf47ad26c7b6039feb218c5e2d4/1498430264/6820259
"nicovideo.jp",
"nicovideo.jp/user/illust", # http://seiga.nicovideo.jp/user/illust/29075429
"nijie.info", # http://nijie.info/members.php?id=15235
"patreon.com", # http://patreon.com/serafleur
"pawoo.net", # https://pawoo.net/@148nasuka
"pawoo.net/web/accounts", # https://pawoo.net/web/accounts/228341
"picarto.tv", # https://picarto.tv/CheckerBoardAZN
"picarto.tv/live", # https://www.picarto.tv/live/channel.php?watch=aaaninja
"pictaram.com", # http://www.pictaram.com/user/5ish/3048385011/1350040096769940245_3048385011
"pinterest.com", # http://www.pinterest.com/alexandernanitc/
"pixiv.cc", # http://pixiv.cc/0123456789/
"pixiv.net", # https://www.pixiv.net/member.php?id=10442390
"pixiv.net/stacc", # https://www.pixiv.net/stacc/aaaninja2013
"i.pximg.net",
"plurk.com", # http://www.plurk.com/a1amorea1a1
"privatter.net",
"privatter.net/u", # http://privatter.net/u/saaaatonaaaa
"rule34.paheal.net",
"rule34.paheal.net/post/list", # http://rule34.paheal.net/post/list/Reach025/
"sankakucomplex.com", # https://chan.sankakucomplex.com/?tags=user%3ASubridet
"society6.com", # http://society6.com/serafleur/
"tinami.com",
"tinami.com/creator/profile", # http://www.tinami.com/creator/profile/29024
"data.tumblr.com",
/\d+\.media\.tumblr\.com/i,
"twitter.com",
"twipple.jp",
"twipple.jp/user", # http://p.twipple.jp/user/Type10TK
"twitch.tv", # https://www.twitch.tv/5ish
"twitpic.com",
"twitpic.com/photos", # http://twitpic.com/photos/Type10TK
"twitter.com", # https://twitter.com/akkij0358
"ustream.tv",
"ustream.tv/channel", # http://www.ustream.tv/channel/633b
"ustream.tv/user", # http://www.ustream.tv/user/kazaputi
"vk.com", # https://vk.com/id425850679
"weibo.com", # http://www.weibo.com/5536681649
"wp.com",
"yande.re",
"youtube.com",
"youtube.com/c", # https://www.youtube.com/c/serafleurArt
"youtube.com/channel", # https://www.youtube.com/channel/UCfrCa2Y6VulwHD3eNd3HBRA
"youtube.com/user", # https://www.youtube.com/user/148nasuka
"youtu.be", # http://youtu.be/gibeLKKRT-0
]
SITE_BLACKLIST_REGEXP = Regexp.union(SITE_BLACKLIST.map do |domain|