diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index 4d5f7cb33..1e7766d22 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -7,6 +7,20 @@ module Sources url =~ /^https?:\/\/(?:.+?\.)?deviantart\.(?:com|net)/ end + def self.normalize(url) + if url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-z0-9_]*_by_[a-z0-9_]+-d([a-z0-9]+)\.}i + "http://fav.me/d#{$1}" + elsif url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-f0-9]+-d([a-z0-9]+)\.}i + "http://fav.me/d#{$1}" + elsif url =~ %r{\Ahttps?://www\.deviantart\.com/([^/]+)/art/} + url + elsif url !~ %r{\Ahttps?://(?:fc|th|pre|orig|img|www)\.} && url =~ %r{\Ahttps?://(.+?)\.deviantart\.com(.*)} + "http://www.deviantart.com/#{$1}#{$2}" + else + url + end + end + def referer_url if @referer_url =~ /deviantart\.com\/art\// && @url =~ /https?:\/\/(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net\// @referer_url @@ -63,7 +77,7 @@ module Sources def profile_url return "" if artist_name.blank? - "https://#{artist_name}.deviantart.com" + "https://www.deviantart.com/#{artist_name}" end def image_url @@ -102,22 +116,22 @@ module Sources api_metadata[:description] end + def normalizable_for_artist_finder? + url !~ %r!^https?://www.deviantart.com/! + end + + def normalized_for_artist_finder? + url =~ %r!^https?://www.deviantart.com/! + end + + def normalize_for_artist_finder! + profile_url + end + protected def normalized_url - @normalized_url ||= begin - if url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-z0-9_]*_by_[a-z0-9_]+-d([a-z0-9]+)\.}i - "http://fav.me/d#{$1}" - elsif url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-f0-9]+-d([a-z0-9]+)\.}i - "http://fav.me/d#{$1}" - elsif url =~ %r{\Ahttps?://www\.deviantart\.com/([^/]+)/art/} - url - elsif url !~ %r{\Ahttps?://(?:fc|th|pre|orig|img|www)\.} && url =~ %r{\Ahttps?://(.+?)\.deviantart\.com/?(.*)} - "https://www.deviantart.com/#{$1}/#{$2}" - else - nil - end - end + @normalized_url ||= self.class.normalize(url) end def page diff --git a/app/models/artist_url.rb b/app/models/artist_url.rb index 98a2b54c7..98e47f139 100644 --- a/app/models/artist_url.rb +++ b/app/models/artist_url.rb @@ -18,15 +18,18 @@ class ArtistUrl < ApplicationRecord if url.nil? nil else - url = url.gsub(/^https:\/\//, "http://") - url = url.gsub(/^http:\/\/blog\d+\.fc2/, "http://blog.fc2") - url = url.gsub(/^http:\/\/blog-imgs-\d+\.fc2/, "http://blog.fc2") - url = url.gsub(/^http:\/\/blog-imgs-\d+-\w+\.fc2/, "http://blog.fc2") - url = url.sub(%r!(http://seiga.nicovideo.jp/user/illust/\d+)\?.+!, '\1/') + url = url.sub(%r!^https://!, "http://") + url = url.sub(%r!^http://blog\d+\.fc2!, "http://blog.fc2") + url = url.sub(%r!^http://blog-imgs-\d+\.fc2!, "http://blog.fc2") + url = url.sub(%r!^http://blog-imgs-\d+-\w+\.fc2!, "http://blog.fc2") + url = url.sub(%r!^(http://seiga.nicovideo.jp/user/illust/\d+)\?.+!, '\1/') url = url.sub(%r!^http://pictures.hentai-foundry.com//!, "http://pictures.hentai-foundry.com/") + if url !~ %r{\Ahttps?://(?:fc|th|pre|orig|img|www)\.} + url = url.sub(%r{\Ahttps?://(.+?)\.deviantart\.com(.*)}, 'http://www.deviantart.com/\1\2') + end # the strategy won't always work for twitter because it looks for a status - url = url.downcase if url =~ /https?:\/\/(?:mobile\.)?twitter\.com/ + url = url.downcase if url =~ %r!^https?://(?:mobile\.)?twitter\.com! begin url = Sources::Site.new(url).normalize_for_artist_finder! @@ -43,13 +46,13 @@ class ArtistUrl < ApplicationRecord if url.nil? nil else - url = url.gsub(/^https:\/\//, "http://") - url = url.gsub(/^http:\/\/blog\d+\.fc2/, "http://blog.fc2") - url = url.gsub(/^http:\/\/blog-imgs-\d+\.fc2/, "http://blog.fc2") - url = url.gsub(/^http:\/\/blog-imgs-\d+-\w+\.fc2/, "http://blog.fc2") - url = url.gsub(/^http:\/\/img\d+\.pixiv\.net/, "http://img.pixiv.net") - url = url.gsub(/^http:\/\/i\d+\.pixiv\.net\/img\d+/, "http://img.pixiv.net") - url = url.gsub(/\/+\Z/, "") + url = url.gsub(%r!^https://!, "http://") + url = url.gsub(%r!^http://blog\d+\.fc2!, "http://blog.fc2") + url = url.gsub(%r!^http://blog-imgs-\d+\.fc2!, "http://blog.fc2") + url = url.gsub(%r!^http://blog-imgs-\d+-\w+\.fc2!, "http://blog.fc2") + url = url.gsub(%r!^http://img\d+\.pixiv\.net!, "http://img.pixiv.net") + url = url.gsub(%r!^http://i\d+\.pixiv\.net/img\d+!, "http://img.pixiv.net") + url = url.gsub(%r!/+\Z!, "") url + "/" end end @@ -59,12 +62,16 @@ class ArtistUrl < ApplicationRecord url = File.dirname(url) end - url = url.gsub(/^https:\/\//, "http://") - url = url.gsub(/^http:\/\/blog\d+\.fc2/, "http://blog*.fc2") - url = url.gsub(/^http:\/\/blog-imgs-\d+\.fc2/, "http://blog*.fc2") - url = url.gsub(/^http:\/\/blog-imgs-\d+-\w+\.fc2/, "http://blog*.fc2") - url = url.gsub(/^http:\/\/img\d+\.pixiv\.net/, "http://img*.pixiv.net") - url = url.gsub(/^http:\/\/i\d+\.pixiv\.net\/img\d+/, "http://*.pixiv.net/img*") + url = url.gsub(%r!^https://!, "http://") + url = url.gsub(%r!^http://blog\d+\.fc2!, "http://blog*.fc2") + url = url.gsub(%r!^http://blog-imgs-\d+\.fc2!, "http://blog*.fc2") + url = url.gsub(%r!^http://blog-imgs-\d+-\w+\.fc2!, "http://blog*.fc2") + url = url.gsub(%r!^http://img\d+\.pixiv\.net!, "http://img*.pixiv.net") + url = url.gsub(%r!^http://i\d+\.pixiv\.net/img\d+!, "http://*.pixiv.net/img*") + if url !~ %r{\Ahttps?://(?:fc|th|pre|orig|img|www)\.} + url = url.sub(%r{\Ahttps?://(.+?)\.deviantart\.com(.*)}, "http://www.deviantart.com/#\1\2") + end + url end def parse_prefix diff --git a/test/unit/artist_test.rb b/test/unit/artist_test.rb index bd1b7ba51..f28ff2c59 100644 --- a/test/unit/artist_test.rb +++ b/test/unit/artist_test.rb @@ -228,10 +228,10 @@ class ArtistTest < ActiveSupport::TestCase end should "find the correct artist for page URLs" do - assert_artist_found("artgerm", "http://artgerm.deviantart.com/art/Peachy-Princess-Ver-2-457220550") + assert_artist_found("artgerm", "http://www.deviantart.com/artgerm/art/Peachy-Princess-Ver-2-457220550") - assert_artist_found("trixia", "http://trixdraws.deviantart.com/art/My-Queen-426745289") - assert_artist_found("trixia", "http://trixdraws.deviantart.com/gallery/#/d722mrt") + assert_artist_found("trixia", "http://www.deviantart.com/trixdraws/art/My-Queen-426745289") + assert_artist_found("trixia", "http://www.deviantart.com/trixdraws/gallery/#/d722mrt") end should "find the correct artist for image URLs" do @@ -243,11 +243,6 @@ class ArtistTest < ActiveSupport::TestCase assert_artist_found("trixia", "http://th01.deviantart.net/fs71/200H/i/2014/050/d/e/my_queen_by_trixdraws-d722mrt.jpg") assert_artist_found("trixia", "http://th09.deviantart.net/fs71/PRE/i/2014/050/d/e/my_queen_by_trixdraws-d722mrt.jpg") end - - should "return nothing for unknown deviantart artists" do - assert_artist_not_found("http://guweiz.deviantart.com/art/Battleship-551905391") - assert_artist_not_found("https://orig00.deviantart.net/7585/f/2015/219/a/5/battleship__by_guweiz-d94l8xb.png") - end end context "when finding pixiv artists" do diff --git a/test/unit/artist_url_test.rb b/test/unit/artist_url_test.rb index 8cbeef412..b6f42349a 100644 --- a/test/unit/artist_url_test.rb +++ b/test/unit/artist_url_test.rb @@ -55,6 +55,11 @@ class ArtistUrlTest < ActiveSupport::TestCase assert_equal("http://blog.fc2.com/monet/", url.normalized_url) end + should "normalize deviant art artist urls" do + url = FactoryBot.create(:artist_url, :url => "https://caidychen.deviantart.com/") + assert_equal("http://www.deviantart.com/caidychen/", url.normalized_url) + end + should "normalize nico seiga artist urls" do url = FactoryBot.create(:artist_url, :url => "http://seiga.nicovideo.jp/user/illust/1826959") assert_equal("http://seiga.nicovideo.jp/user/illust/1826959/", url.normalized_url) diff --git a/test/unit/sources/deviantart_test.rb b/test/unit/sources/deviantart_test.rb index f23059e44..1098d26e2 100644 --- a/test/unit/sources/deviantart_test.rb +++ b/test/unit/sources/deviantart_test.rb @@ -57,7 +57,7 @@ module Sources end should "get the profile" do - assert_equal("https://noizave.deviantart.com", @site.profile_url) + assert_equal("https://www.deviantart.com/noizave", @site.profile_url) end should "get the artist name" do