Artist#find_all_by_url: refactor hardcoded bogus urls to blacklist.
This commit is contained in:
@@ -31,6 +31,25 @@ class Artist < ApplicationRecord
|
|||||||
extend ActiveSupport::Concern
|
extend ActiveSupport::Concern
|
||||||
|
|
||||||
module ClassMethods
|
module ClassMethods
|
||||||
|
# Subdomains are automatically included. e.g., "twitter.com" matches "www.twitter.com",
|
||||||
|
# "mobile.twitter.com" and any other subdomain of "twitter.com".
|
||||||
|
SITE_BLACKLIST = [
|
||||||
|
"deviantart.net",
|
||||||
|
"nicoseiga.jp",
|
||||||
|
/nicovideo\.jp\/user\/illust/,
|
||||||
|
"nijie.info",
|
||||||
|
"pawoo.net",
|
||||||
|
"pixiv.net",
|
||||||
|
"data.tumblr.com",
|
||||||
|
/\d+\.media\.tumblr\.com/i,
|
||||||
|
"twitter.com",
|
||||||
|
]
|
||||||
|
|
||||||
|
SITE_BLACKLIST_REGEXP = Regexp.union(SITE_BLACKLIST.map do |domain|
|
||||||
|
domain = Regexp.escape(domain) if domain.is_a?(String)
|
||||||
|
%r!\Ahttps?://(?:[a-zA-Z0-9_-]+\.)*#{domain}/\z!i
|
||||||
|
end)
|
||||||
|
|
||||||
def find_all_by_url(url)
|
def find_all_by_url(url)
|
||||||
url = ArtistUrl.normalize(url)
|
url = ArtistUrl.normalize(url)
|
||||||
artists = []
|
artists = []
|
||||||
@@ -42,14 +61,8 @@ class Artist < ApplicationRecord
|
|||||||
u = u.to_escaped_for_sql_like.gsub(/\*/, '%') + '%'
|
u = u.to_escaped_for_sql_like.gsub(/\*/, '%') + '%'
|
||||||
artists += Artist.joins(:urls).where(["artists.is_active = TRUE AND artist_urls.normalized_url LIKE ? ESCAPE E'\\\\'", u]).limit(10).order("artists.name").all
|
artists += Artist.joins(:urls).where(["artists.is_active = TRUE AND artist_urls.normalized_url LIKE ? ESCAPE E'\\\\'", u]).limit(10).order("artists.name").all
|
||||||
url = File.dirname(url) + "/"
|
url = File.dirname(url) + "/"
|
||||||
break if url =~ /pixiv\.net\/(?:img\/)?$/i
|
|
||||||
break if url =~ /lohas\.nicoseiga\.jp\/priv\/$/i
|
break if url =~ SITE_BLACKLIST_REGEXP
|
||||||
break if url =~ /nicovideo\.jp\/user\/illust/
|
|
||||||
break if url =~ /(?:data|media)\.tumblr\.com\/[a-z0-9]+\/$/i
|
|
||||||
break if url =~ /deviantart\.net\//i
|
|
||||||
break if url =~ %r!\Ahttps?://(?:mobile\.)?twitter\.com/\Z!i
|
|
||||||
break if url =~ %r!pawoo\.net/(?:web/)?$!i
|
|
||||||
break if url =~ %r!\Ahttps?://(pic\d+\.)?nijie\.info/!i
|
|
||||||
end
|
end
|
||||||
|
|
||||||
artists.inject({}) {|h, x| h[x.name] = x; h}.values.slice(0, 20)
|
artists.inject({}) {|h, x| h[x.name] = x; h}.values.slice(0, 20)
|
||||||
|
|||||||
Reference in New Issue
Block a user