additional fixes for deviantart artist search (#3771)

This commit is contained in:
Albert Yi
2018-07-27 12:26:39 -07:00
parent 7753461f6f
commit 135b97d511
5 changed files with 63 additions and 42 deletions

View File

@@ -7,6 +7,20 @@ module Sources
url =~ /^https?:\/\/(?:.+?\.)?deviantart\.(?:com|net)/
end
def self.normalize(url)
if url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-z0-9_]*_by_[a-z0-9_]+-d([a-z0-9]+)\.}i
"http://fav.me/d#{$1}"
elsif url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-f0-9]+-d([a-z0-9]+)\.}i
"http://fav.me/d#{$1}"
elsif url =~ %r{\Ahttps?://www\.deviantart\.com/([^/]+)/art/}
url
elsif url !~ %r{\Ahttps?://(?:fc|th|pre|orig|img|www)\.} && url =~ %r{\Ahttps?://(.+?)\.deviantart\.com(.*)}
"http://www.deviantart.com/#{$1}#{$2}"
else
url
end
end
def referer_url
if @referer_url =~ /deviantart\.com\/art\// && @url =~ /https?:\/\/(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net\//
@referer_url
@@ -63,7 +77,7 @@ module Sources
def profile_url
return "" if artist_name.blank?
"https://#{artist_name}.deviantart.com"
"https://www.deviantart.com/#{artist_name}"
end
def image_url
@@ -102,22 +116,22 @@ module Sources
api_metadata[:description]
end
def normalizable_for_artist_finder?
url !~ %r!^https?://www.deviantart.com/!
end
def normalized_for_artist_finder?
url =~ %r!^https?://www.deviantart.com/!
end
def normalize_for_artist_finder!
profile_url
end
protected
def normalized_url
@normalized_url ||= begin
if url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-z0-9_]*_by_[a-z0-9_]+-d([a-z0-9]+)\.}i
"http://fav.me/d#{$1}"
elsif url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-f0-9]+-d([a-z0-9]+)\.}i
"http://fav.me/d#{$1}"
elsif url =~ %r{\Ahttps?://www\.deviantart\.com/([^/]+)/art/}
url
elsif url !~ %r{\Ahttps?://(?:fc|th|pre|orig|img|www)\.} && url =~ %r{\Ahttps?://(.+?)\.deviantart\.com/?(.*)}
"https://www.deviantart.com/#{$1}/#{$2}"
else
nil
end
end
@normalized_url ||= self.class.normalize(url)
end
def page

View File

@@ -18,15 +18,18 @@ class ArtistUrl < ApplicationRecord
if url.nil?
nil
else
url = url.gsub(/^https:\/\//, "http://")
url = url.gsub(/^http:\/\/blog\d+\.fc2/, "http://blog.fc2")
url = url.gsub(/^http:\/\/blog-imgs-\d+\.fc2/, "http://blog.fc2")
url = url.gsub(/^http:\/\/blog-imgs-\d+-\w+\.fc2/, "http://blog.fc2")
url = url.sub(%r!(http://seiga.nicovideo.jp/user/illust/\d+)\?.+!, '\1/')
url = url.sub(%r!^https://!, "http://")
url = url.sub(%r!^http://blog\d+\.fc2!, "http://blog.fc2")
url = url.sub(%r!^http://blog-imgs-\d+\.fc2!, "http://blog.fc2")
url = url.sub(%r!^http://blog-imgs-\d+-\w+\.fc2!, "http://blog.fc2")
url = url.sub(%r!^(http://seiga.nicovideo.jp/user/illust/\d+)\?.+!, '\1/')
url = url.sub(%r!^http://pictures.hentai-foundry.com//!, "http://pictures.hentai-foundry.com/")
if url !~ %r{\Ahttps?://(?:fc|th|pre|orig|img|www)\.}
url = url.sub(%r{\Ahttps?://(.+?)\.deviantart\.com(.*)}, 'http://www.deviantart.com/\1\2')
end
# the strategy won't always work for twitter because it looks for a status
url = url.downcase if url =~ /https?:\/\/(?:mobile\.)?twitter\.com/
url = url.downcase if url =~ %r!^https?://(?:mobile\.)?twitter\.com!
begin
url = Sources::Site.new(url).normalize_for_artist_finder!
@@ -43,13 +46,13 @@ class ArtistUrl < ApplicationRecord
if url.nil?
nil
else
url = url.gsub(/^https:\/\//, "http://")
url = url.gsub(/^http:\/\/blog\d+\.fc2/, "http://blog.fc2")
url = url.gsub(/^http:\/\/blog-imgs-\d+\.fc2/, "http://blog.fc2")
url = url.gsub(/^http:\/\/blog-imgs-\d+-\w+\.fc2/, "http://blog.fc2")
url = url.gsub(/^http:\/\/img\d+\.pixiv\.net/, "http://img.pixiv.net")
url = url.gsub(/^http:\/\/i\d+\.pixiv\.net\/img\d+/, "http://img.pixiv.net")
url = url.gsub(/\/+\Z/, "")
url = url.gsub(%r!^https://!, "http://")
url = url.gsub(%r!^http://blog\d+\.fc2!, "http://blog.fc2")
url = url.gsub(%r!^http://blog-imgs-\d+\.fc2!, "http://blog.fc2")
url = url.gsub(%r!^http://blog-imgs-\d+-\w+\.fc2!, "http://blog.fc2")
url = url.gsub(%r!^http://img\d+\.pixiv\.net!, "http://img.pixiv.net")
url = url.gsub(%r!^http://i\d+\.pixiv\.net/img\d+!, "http://img.pixiv.net")
url = url.gsub(%r!/+\Z!, "")
url + "/"
end
end
@@ -59,12 +62,16 @@ class ArtistUrl < ApplicationRecord
url = File.dirname(url)
end
url = url.gsub(/^https:\/\//, "http://")
url = url.gsub(/^http:\/\/blog\d+\.fc2/, "http://blog*.fc2")
url = url.gsub(/^http:\/\/blog-imgs-\d+\.fc2/, "http://blog*.fc2")
url = url.gsub(/^http:\/\/blog-imgs-\d+-\w+\.fc2/, "http://blog*.fc2")
url = url.gsub(/^http:\/\/img\d+\.pixiv\.net/, "http://img*.pixiv.net")
url = url.gsub(/^http:\/\/i\d+\.pixiv\.net\/img\d+/, "http://*.pixiv.net/img*")
url = url.gsub(%r!^https://!, "http://")
url = url.gsub(%r!^http://blog\d+\.fc2!, "http://blog*.fc2")
url = url.gsub(%r!^http://blog-imgs-\d+\.fc2!, "http://blog*.fc2")
url = url.gsub(%r!^http://blog-imgs-\d+-\w+\.fc2!, "http://blog*.fc2")
url = url.gsub(%r!^http://img\d+\.pixiv\.net!, "http://img*.pixiv.net")
url = url.gsub(%r!^http://i\d+\.pixiv\.net/img\d+!, "http://*.pixiv.net/img*")
if url !~ %r{\Ahttps?://(?:fc|th|pre|orig|img|www)\.}
url = url.sub(%r{\Ahttps?://(.+?)\.deviantart\.com(.*)}, "http://www.deviantart.com/#\1\2")
end
url
end
def parse_prefix

View File

@@ -228,10 +228,10 @@ class ArtistTest < ActiveSupport::TestCase
end
should "find the correct artist for page URLs" do
assert_artist_found("artgerm", "http://artgerm.deviantart.com/art/Peachy-Princess-Ver-2-457220550")
assert_artist_found("artgerm", "http://www.deviantart.com/artgerm/art/Peachy-Princess-Ver-2-457220550")
assert_artist_found("trixia", "http://trixdraws.deviantart.com/art/My-Queen-426745289")
assert_artist_found("trixia", "http://trixdraws.deviantart.com/gallery/#/d722mrt")
assert_artist_found("trixia", "http://www.deviantart.com/trixdraws/art/My-Queen-426745289")
assert_artist_found("trixia", "http://www.deviantart.com/trixdraws/gallery/#/d722mrt")
end
should "find the correct artist for image URLs" do
@@ -243,11 +243,6 @@ class ArtistTest < ActiveSupport::TestCase
assert_artist_found("trixia", "http://th01.deviantart.net/fs71/200H/i/2014/050/d/e/my_queen_by_trixdraws-d722mrt.jpg")
assert_artist_found("trixia", "http://th09.deviantart.net/fs71/PRE/i/2014/050/d/e/my_queen_by_trixdraws-d722mrt.jpg")
end
should "return nothing for unknown deviantart artists" do
assert_artist_not_found("http://guweiz.deviantart.com/art/Battleship-551905391")
assert_artist_not_found("https://orig00.deviantart.net/7585/f/2015/219/a/5/battleship__by_guweiz-d94l8xb.png")
end
end
context "when finding pixiv artists" do

View File

@@ -55,6 +55,11 @@ class ArtistUrlTest < ActiveSupport::TestCase
assert_equal("http://blog.fc2.com/monet/", url.normalized_url)
end
should "normalize deviant art artist urls" do
url = FactoryBot.create(:artist_url, :url => "https://caidychen.deviantart.com/")
assert_equal("http://www.deviantart.com/caidychen/", url.normalized_url)
end
should "normalize nico seiga artist urls" do
url = FactoryBot.create(:artist_url, :url => "http://seiga.nicovideo.jp/user/illust/1826959")
assert_equal("http://seiga.nicovideo.jp/user/illust/1826959/", url.normalized_url)

View File

@@ -57,7 +57,7 @@ module Sources
end
should "get the profile" do
assert_equal("https://noizave.deviantart.com", @site.profile_url)
assert_equal("https://www.deviantart.com/noizave", @site.profile_url)
end
should "get the artist name" do