Perform some scheme and hostname normalization on the URL itself
- Converts scheme and hostname to lowercase - Converts unicode hostnames into Punycode This all gets done before the normalized URL gets assigned. Additionally, this removes the dead commented out line for Nicoseiga.
This commit is contained in:
@@ -20,11 +20,9 @@ class ArtistUrl < ApplicationRecord
|
|||||||
nil
|
nil
|
||||||
else
|
else
|
||||||
url = url.sub(%r!^https://!, "http://")
|
url = url.sub(%r!^https://!, "http://")
|
||||||
url = url.sub(%r!^http://([^/]+)!i) { |domain| domain.downcase }
|
|
||||||
url = url.sub(%r!^http://blog\d+\.fc2!, "http://blog.fc2")
|
url = url.sub(%r!^http://blog\d+\.fc2!, "http://blog.fc2")
|
||||||
url = url.sub(%r!^http://blog-imgs-\d+\.fc2!, "http://blog.fc2")
|
url = url.sub(%r!^http://blog-imgs-\d+\.fc2!, "http://blog.fc2")
|
||||||
url = url.sub(%r!^http://blog-imgs-\d+-\w+\.fc2!, "http://blog.fc2")
|
url = url.sub(%r!^http://blog-imgs-\d+-\w+\.fc2!, "http://blog.fc2")
|
||||||
# url = url.sub(%r!^(http://seiga.nicovideo.jp/user/illust/\d+)\?.+!, '\1/')
|
|
||||||
url = url.sub(%r!^http://pictures.hentai-foundry.com//!, "http://pictures.hentai-foundry.com/")
|
url = url.sub(%r!^http://pictures.hentai-foundry.com//!, "http://pictures.hentai-foundry.com/")
|
||||||
|
|
||||||
# XXX should be handled by pixiv strategy.
|
# XXX should be handled by pixiv strategy.
|
||||||
@@ -105,7 +103,15 @@ class ArtistUrl < ApplicationRecord
|
|||||||
end
|
end
|
||||||
|
|
||||||
def normalize
|
def normalize
|
||||||
|
# Perform some normalization with Addressable on the URL itself
|
||||||
|
# - Converts scheme and hostname to downcase
|
||||||
|
# - Converts unicode hostname to Punycode
|
||||||
|
uri = Addressable::URI.parse(url)
|
||||||
|
uri.site = uri.normalized_site
|
||||||
|
self.url = uri.to_s
|
||||||
self.normalized_url = self.class.normalize(url)
|
self.normalized_url = self.class.normalize(url)
|
||||||
|
rescue Addressable::URI::InvalidURIError
|
||||||
|
# Don't bother normalizing the URL if there is errors
|
||||||
end
|
end
|
||||||
|
|
||||||
def initialize_normalized_url
|
def initialize_normalized_url
|
||||||
|
|||||||
Reference in New Issue
Block a user