Perform some scheme and hostname normalization on the URL itself
- Converts scheme and hostname to lowercase - Converts unicode hostnames into Punycode This all gets done before the normalized URL gets assigned. Additionally, this removes the dead commented out line for Nicoseiga.
This commit is contained in:
@@ -20,11 +20,9 @@ class ArtistUrl < ApplicationRecord
|
||||
nil
|
||||
else
|
||||
url = url.sub(%r!^https://!, "http://")
|
||||
url = url.sub(%r!^http://([^/]+)!i) { |domain| domain.downcase }
|
||||
url = url.sub(%r!^http://blog\d+\.fc2!, "http://blog.fc2")
|
||||
url = url.sub(%r!^http://blog-imgs-\d+\.fc2!, "http://blog.fc2")
|
||||
url = url.sub(%r!^http://blog-imgs-\d+-\w+\.fc2!, "http://blog.fc2")
|
||||
# url = url.sub(%r!^(http://seiga.nicovideo.jp/user/illust/\d+)\?.+!, '\1/')
|
||||
url = url.sub(%r!^http://pictures.hentai-foundry.com//!, "http://pictures.hentai-foundry.com/")
|
||||
|
||||
# XXX should be handled by pixiv strategy.
|
||||
@@ -105,7 +103,15 @@ class ArtistUrl < ApplicationRecord
|
||||
end
|
||||
|
||||
def normalize
|
||||
# Perform some normalization with Addressable on the URL itself
|
||||
# - Converts scheme and hostname to downcase
|
||||
# - Converts unicode hostname to Punycode
|
||||
uri = Addressable::URI.parse(url)
|
||||
uri.site = uri.normalized_site
|
||||
self.url = uri.to_s
|
||||
self.normalized_url = self.class.normalize(url)
|
||||
rescue Addressable::URI::InvalidURIError
|
||||
# Don't bother normalizing the URL if there is errors
|
||||
end
|
||||
|
||||
def initialize_normalized_url
|
||||
|
||||
Reference in New Issue
Block a user