additional fixes for deviantart artist search (#3771)

This commit is contained in:
Albert Yi
2018-07-27 12:26:39 -07:00
parent 7753461f6f
commit 135b97d511
5 changed files with 63 additions and 42 deletions

View File

@@ -7,6 +7,20 @@ module Sources
url =~ /^https?:\/\/(?:.+?\.)?deviantart\.(?:com|net)/
end
def self.normalize(url)
if url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-z0-9_]*_by_[a-z0-9_]+-d([a-z0-9]+)\.}i
"http://fav.me/d#{$1}"
elsif url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-f0-9]+-d([a-z0-9]+)\.}i
"http://fav.me/d#{$1}"
elsif url =~ %r{\Ahttps?://www\.deviantart\.com/([^/]+)/art/}
url
elsif url !~ %r{\Ahttps?://(?:fc|th|pre|orig|img|www)\.} && url =~ %r{\Ahttps?://(.+?)\.deviantart\.com(.*)}
"http://www.deviantart.com/#{$1}#{$2}"
else
url
end
end
def referer_url
if @referer_url =~ /deviantart\.com\/art\// && @url =~ /https?:\/\/(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net\//
@referer_url
@@ -63,7 +77,7 @@ module Sources
def profile_url
return "" if artist_name.blank?
"https://#{artist_name}.deviantart.com"
"https://www.deviantart.com/#{artist_name}"
end
def image_url
@@ -102,22 +116,22 @@ module Sources
api_metadata[:description]
end
def normalizable_for_artist_finder?
url !~ %r!^https?://www.deviantart.com/!
end
def normalized_for_artist_finder?
url =~ %r!^https?://www.deviantart.com/!
end
def normalize_for_artist_finder!
profile_url
end
protected
def normalized_url
@normalized_url ||= begin
if url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-z0-9_]*_by_[a-z0-9_]+-d([a-z0-9]+)\.}i
"http://fav.me/d#{$1}"
elsif url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-f0-9]+-d([a-z0-9]+)\.}i
"http://fav.me/d#{$1}"
elsif url =~ %r{\Ahttps?://www\.deviantart\.com/([^/]+)/art/}
url
elsif url !~ %r{\Ahttps?://(?:fc|th|pre|orig|img|www)\.} && url =~ %r{\Ahttps?://(.+?)\.deviantart\.com/?(.*)}
"https://www.deviantart.com/#{$1}/#{$2}"
else
nil
end
end
@normalized_url ||= self.class.normalize(url)
end
def page

View File

@@ -18,15 +18,18 @@ class ArtistUrl < ApplicationRecord
if url.nil?
nil
else
url = url.gsub(/^https:\/\//, "http://")
url = url.gsub(/^http:\/\/blog\d+\.fc2/, "http://blog.fc2")
url = url.gsub(/^http:\/\/blog-imgs-\d+\.fc2/, "http://blog.fc2")
url = url.gsub(/^http:\/\/blog-imgs-\d+-\w+\.fc2/, "http://blog.fc2")
url = url.sub(%r!(http://seiga.nicovideo.jp/user/illust/\d+)\?.+!, '\1/')
url = url.sub(%r!^https://!, "http://")
url = url.sub(%r!^http://blog\d+\.fc2!, "http://blog.fc2")
url = url.sub(%r!^http://blog-imgs-\d+\.fc2!, "http://blog.fc2")
url = url.sub(%r!^http://blog-imgs-\d+-\w+\.fc2!, "http://blog.fc2")
url = url.sub(%r!^(http://seiga.nicovideo.jp/user/illust/\d+)\?.+!, '\1/')
url = url.sub(%r!^http://pictures.hentai-foundry.com//!, "http://pictures.hentai-foundry.com/")
if url !~ %r{\Ahttps?://(?:fc|th|pre|orig|img|www)\.}
url = url.sub(%r{\Ahttps?://(.+?)\.deviantart\.com(.*)}, 'http://www.deviantart.com/\1\2')
end
# the strategy won't always work for twitter because it looks for a status
url = url.downcase if url =~ /https?:\/\/(?:mobile\.)?twitter\.com/
url = url.downcase if url =~ %r!^https?://(?:mobile\.)?twitter\.com!
begin
url = Sources::Site.new(url).normalize_for_artist_finder!
@@ -43,13 +46,13 @@ class ArtistUrl < ApplicationRecord
if url.nil?
nil
else
url = url.gsub(/^https:\/\//, "http://")
url = url.gsub(/^http:\/\/blog\d+\.fc2/, "http://blog.fc2")
url = url.gsub(/^http:\/\/blog-imgs-\d+\.fc2/, "http://blog.fc2")
url = url.gsub(/^http:\/\/blog-imgs-\d+-\w+\.fc2/, "http://blog.fc2")
url = url.gsub(/^http:\/\/img\d+\.pixiv\.net/, "http://img.pixiv.net")
url = url.gsub(/^http:\/\/i\d+\.pixiv\.net\/img\d+/, "http://img.pixiv.net")
url = url.gsub(/\/+\Z/, "")
url = url.gsub(%r!^https://!, "http://")
url = url.gsub(%r!^http://blog\d+\.fc2!, "http://blog.fc2")
url = url.gsub(%r!^http://blog-imgs-\d+\.fc2!, "http://blog.fc2")
url = url.gsub(%r!^http://blog-imgs-\d+-\w+\.fc2!, "http://blog.fc2")
url = url.gsub(%r!^http://img\d+\.pixiv\.net!, "http://img.pixiv.net")
url = url.gsub(%r!^http://i\d+\.pixiv\.net/img\d+!, "http://img.pixiv.net")
url = url.gsub(%r!/+\Z!, "")
url + "/"
end
end
@@ -59,12 +62,16 @@ class ArtistUrl < ApplicationRecord
url = File.dirname(url)
end
url = url.gsub(/^https:\/\//, "http://")
url = url.gsub(/^http:\/\/blog\d+\.fc2/, "http://blog*.fc2")
url = url.gsub(/^http:\/\/blog-imgs-\d+\.fc2/, "http://blog*.fc2")
url = url.gsub(/^http:\/\/blog-imgs-\d+-\w+\.fc2/, "http://blog*.fc2")
url = url.gsub(/^http:\/\/img\d+\.pixiv\.net/, "http://img*.pixiv.net")
url = url.gsub(/^http:\/\/i\d+\.pixiv\.net\/img\d+/, "http://*.pixiv.net/img*")
url = url.gsub(%r!^https://!, "http://")
url = url.gsub(%r!^http://blog\d+\.fc2!, "http://blog*.fc2")
url = url.gsub(%r!^http://blog-imgs-\d+\.fc2!, "http://blog*.fc2")
url = url.gsub(%r!^http://blog-imgs-\d+-\w+\.fc2!, "http://blog*.fc2")
url = url.gsub(%r!^http://img\d+\.pixiv\.net!, "http://img*.pixiv.net")
url = url.gsub(%r!^http://i\d+\.pixiv\.net/img\d+!, "http://*.pixiv.net/img*")
if url !~ %r{\Ahttps?://(?:fc|th|pre|orig|img|www)\.}
url = url.sub(%r{\Ahttps?://(.+?)\.deviantart\.com(.*)}, "http://www.deviantart.com/#\1\2")
end
url
end
def parse_prefix