Fix #5002: "Urls url has already been taken" when submitting duplicate urls with different capitalization
Fix URLs being normalized after checking for duplicates rather than before, which meant that URLs that differed in capitalization weren't detected as duplicates.
This commit is contained in:
@@ -149,7 +149,7 @@ module ArtistFinder
|
||||
# @param url [String] the artist profile URL
|
||||
# @return [Array<Artist>] the list of matching artists
|
||||
def find_artists(url)
|
||||
url = ArtistUrl.normalize(url)
|
||||
url = ArtistUrl.normalize_normalized_url(url)
|
||||
artists = []
|
||||
|
||||
while artists.empty? && url.size > 10
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
class ArtistUrl < ApplicationRecord
|
||||
before_validation :initialize_normalized_url, on: :create
|
||||
before_validation :normalize
|
||||
normalize :url, :normalize_url
|
||||
|
||||
validates :url, presence: true, uniqueness: { scope: :artist_id }
|
||||
validate :validate_url_format
|
||||
belongs_to :artist, :touch => true
|
||||
@@ -18,7 +18,7 @@ class ArtistUrl < ApplicationRecord
|
||||
[is_active, url]
|
||||
end
|
||||
|
||||
def self.normalize(url)
|
||||
def self.normalize_normalized_url(url)
|
||||
if url.nil?
|
||||
nil
|
||||
else
|
||||
@@ -29,7 +29,7 @@ class ArtistUrl < ApplicationRecord
|
||||
url = url.sub(%r{^http://pictures.hentai-foundry.com//}, "http://pictures.hentai-foundry.com/")
|
||||
|
||||
# the strategy won't always work for twitter because it looks for a status
|
||||
url = url.downcase if url =~ %r{^https?://(?:mobile\.)?twitter\.com}
|
||||
url = url.downcase if url =~ %r{^https?://(?:mobile\.)?twitter\.com}i
|
||||
|
||||
url = Sources::Strategies.find(url).normalize_for_artist_finder
|
||||
|
||||
@@ -67,7 +67,7 @@ class ArtistUrl < ApplicationRecord
|
||||
elsif url.include?("*")
|
||||
where_ilike(attr, url)
|
||||
else
|
||||
where(attr => normalize(url))
|
||||
where(attr => normalize_normalized_url(url))
|
||||
end
|
||||
end
|
||||
|
||||
@@ -118,20 +118,17 @@ class ArtistUrl < ApplicationRecord
|
||||
sites.index(site_name) || 1000
|
||||
end
|
||||
|
||||
def normalize
|
||||
# Perform some normalization with Addressable on the URL itself
|
||||
# - Converts scheme and hostname to downcase
|
||||
# - Converts unicode hostname to Punycode
|
||||
def self.normalize_url(url)
|
||||
uri = Addressable::URI.parse(url)
|
||||
uri.site = uri.normalized_site
|
||||
self.url = uri.to_s
|
||||
self.normalized_url = self.class.normalize(url)
|
||||
uri.to_s
|
||||
rescue Addressable::URI::InvalidURIError
|
||||
# Don't bother normalizing the URL if there is errors
|
||||
url
|
||||
end
|
||||
|
||||
def initialize_normalized_url
|
||||
self.normalized_url = url
|
||||
def url=(url)
|
||||
super(url)
|
||||
self.normalized_url = self.class.normalize_normalized_url(self.url)
|
||||
end
|
||||
|
||||
def to_s
|
||||
|
||||
@@ -158,6 +158,13 @@ class ArtistTest < ActiveSupport::TestCase
|
||||
assert_equal(old_url_ids, ArtistUrl.order("id").pluck(&:id))
|
||||
end
|
||||
|
||||
should "normalize urls before removing duplicates" do
|
||||
@artist = create(:artist, url_string: "https://Twitter.com/o8q https://twitter.com/o8q")
|
||||
|
||||
assert_equal(1, @artist.urls.count)
|
||||
assert_equal(["https://twitter.com/o8q"], @artist.urls.map(&:to_s))
|
||||
end
|
||||
|
||||
should "ignore pixiv.net/ and pixiv.net/img/ url matches" do
|
||||
a1 = FactoryBot.create(:artist, :name => "yomosaka", :url_string => "http://i2.pixiv.net/img18/img/evazion/14901720.png")
|
||||
a2 = FactoryBot.create(:artist, :name => "niwatazumi_bf", :url_string => "http://i2.pixiv.net/img18/img/evazion/14901720_big_p0.png")
|
||||
|
||||
Reference in New Issue
Block a user