Fix #5002: "Urls url has already been taken" when submitting duplicate urls with different capitalization

Fix URLs being normalized after checking for duplicates rather than
before, which meant that URLs that differed in capitalization weren't
detected as duplicates.
This commit is contained in:
evazion
2022-02-08 17:38:47 -06:00
parent 67d834775d
commit 21c0d55aa4
3 changed files with 19 additions and 15 deletions

View File

@@ -149,7 +149,7 @@ module ArtistFinder
# @param url [String] the artist profile URL # @param url [String] the artist profile URL
# @return [Array<Artist>] the list of matching artists # @return [Array<Artist>] the list of matching artists
def find_artists(url) def find_artists(url)
url = ArtistUrl.normalize(url) url = ArtistUrl.normalize_normalized_url(url)
artists = [] artists = []
while artists.empty? && url.size > 10 while artists.empty? && url.size > 10

View File

@@ -1,8 +1,8 @@
# frozen_string_literal: true # frozen_string_literal: true
class ArtistUrl < ApplicationRecord class ArtistUrl < ApplicationRecord
before_validation :initialize_normalized_url, on: :create normalize :url, :normalize_url
before_validation :normalize
validates :url, presence: true, uniqueness: { scope: :artist_id } validates :url, presence: true, uniqueness: { scope: :artist_id }
validate :validate_url_format validate :validate_url_format
belongs_to :artist, :touch => true belongs_to :artist, :touch => true
@@ -18,7 +18,7 @@ class ArtistUrl < ApplicationRecord
[is_active, url] [is_active, url]
end end
def self.normalize(url) def self.normalize_normalized_url(url)
if url.nil? if url.nil?
nil nil
else else
@@ -29,7 +29,7 @@ class ArtistUrl < ApplicationRecord
url = url.sub(%r{^http://pictures.hentai-foundry.com//}, "http://pictures.hentai-foundry.com/") url = url.sub(%r{^http://pictures.hentai-foundry.com//}, "http://pictures.hentai-foundry.com/")
# the strategy won't always work for twitter because it looks for a status # the strategy won't always work for twitter because it looks for a status
url = url.downcase if url =~ %r{^https?://(?:mobile\.)?twitter\.com} url = url.downcase if url =~ %r{^https?://(?:mobile\.)?twitter\.com}i
url = Sources::Strategies.find(url).normalize_for_artist_finder url = Sources::Strategies.find(url).normalize_for_artist_finder
@@ -67,7 +67,7 @@ class ArtistUrl < ApplicationRecord
elsif url.include?("*") elsif url.include?("*")
where_ilike(attr, url) where_ilike(attr, url)
else else
where(attr => normalize(url)) where(attr => normalize_normalized_url(url))
end end
end end
@@ -118,20 +118,17 @@ class ArtistUrl < ApplicationRecord
sites.index(site_name) || 1000 sites.index(site_name) || 1000
end end
def normalize def self.normalize_url(url)
# Perform some normalization with Addressable on the URL itself
# - Converts scheme and hostname to downcase
# - Converts unicode hostname to Punycode
uri = Addressable::URI.parse(url) uri = Addressable::URI.parse(url)
uri.site = uri.normalized_site uri.site = uri.normalized_site
self.url = uri.to_s uri.to_s
self.normalized_url = self.class.normalize(url)
rescue Addressable::URI::InvalidURIError rescue Addressable::URI::InvalidURIError
# Don't bother normalizing the URL if there is errors url
end end
def initialize_normalized_url def url=(url)
self.normalized_url = url super(url)
self.normalized_url = self.class.normalize_normalized_url(self.url)
end end
def to_s def to_s

View File

@@ -158,6 +158,13 @@ class ArtistTest < ActiveSupport::TestCase
assert_equal(old_url_ids, ArtistUrl.order("id").pluck(&:id)) assert_equal(old_url_ids, ArtistUrl.order("id").pluck(&:id))
end end
should "normalize urls before removing duplicates" do
@artist = create(:artist, url_string: "https://Twitter.com/o8q https://twitter.com/o8q")
assert_equal(1, @artist.urls.count)
assert_equal(["https://twitter.com/o8q"], @artist.urls.map(&:to_s))
end
should "ignore pixiv.net/ and pixiv.net/img/ url matches" do should "ignore pixiv.net/ and pixiv.net/img/ url matches" do
a1 = FactoryBot.create(:artist, :name => "yomosaka", :url_string => "http://i2.pixiv.net/img18/img/evazion/14901720.png") a1 = FactoryBot.create(:artist, :name => "yomosaka", :url_string => "http://i2.pixiv.net/img18/img/evazion/14901720.png")
a2 = FactoryBot.create(:artist, :name => "niwatazumi_bf", :url_string => "http://i2.pixiv.net/img18/img/evazion/14901720_big_p0.png") a2 = FactoryBot.create(:artist, :name => "niwatazumi_bf", :url_string => "http://i2.pixiv.net/img18/img/evazion/14901720_big_p0.png")