From 21c0d55aa411ee526ce14386646f1a8687ed9dee Mon Sep 17 00:00:00 2001 From: evazion Date: Tue, 8 Feb 2022 17:38:47 -0600 Subject: [PATCH] Fix #5002: "Urls url has already been taken" when submitting duplicate urls with different capitalization Fix URLs being normalized after checking for duplicates rather than before, which meant that URLs that differed in capitalization weren't detected as duplicates. --- app/logical/artist_finder.rb | 2 +- app/models/artist_url.rb | 25 +++++++++++-------------- test/unit/artist_test.rb | 7 +++++++ 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/app/logical/artist_finder.rb b/app/logical/artist_finder.rb index 52d3d6e48..916bec996 100644 --- a/app/logical/artist_finder.rb +++ b/app/logical/artist_finder.rb @@ -149,7 +149,7 @@ module ArtistFinder # @param url [String] the artist profile URL # @return [Array] the list of matching artists def find_artists(url) - url = ArtistUrl.normalize(url) + url = ArtistUrl.normalize_normalized_url(url) artists = [] while artists.empty? && url.size > 10 diff --git a/app/models/artist_url.rb b/app/models/artist_url.rb index 21815f2de..3112f4328 100644 --- a/app/models/artist_url.rb +++ b/app/models/artist_url.rb @@ -1,8 +1,8 @@ # frozen_string_literal: true class ArtistUrl < ApplicationRecord - before_validation :initialize_normalized_url, on: :create - before_validation :normalize + normalize :url, :normalize_url + validates :url, presence: true, uniqueness: { scope: :artist_id } validate :validate_url_format belongs_to :artist, :touch => true @@ -18,7 +18,7 @@ class ArtistUrl < ApplicationRecord [is_active, url] end - def self.normalize(url) + def self.normalize_normalized_url(url) if url.nil? nil else @@ -29,7 +29,7 @@ class ArtistUrl < ApplicationRecord url = url.sub(%r{^http://pictures.hentai-foundry.com//}, "http://pictures.hentai-foundry.com/") # the strategy won't always work for twitter because it looks for a status - url = url.downcase if url =~ %r{^https?://(?:mobile\.)?twitter\.com} + url = url.downcase if url =~ %r{^https?://(?:mobile\.)?twitter\.com}i url = Sources::Strategies.find(url).normalize_for_artist_finder @@ -67,7 +67,7 @@ class ArtistUrl < ApplicationRecord elsif url.include?("*") where_ilike(attr, url) else - where(attr => normalize(url)) + where(attr => normalize_normalized_url(url)) end end @@ -118,20 +118,17 @@ class ArtistUrl < ApplicationRecord sites.index(site_name) || 1000 end - def normalize - # Perform some normalization with Addressable on the URL itself - # - Converts scheme and hostname to downcase - # - Converts unicode hostname to Punycode + def self.normalize_url(url) uri = Addressable::URI.parse(url) uri.site = uri.normalized_site - self.url = uri.to_s - self.normalized_url = self.class.normalize(url) + uri.to_s rescue Addressable::URI::InvalidURIError - # Don't bother normalizing the URL if there is errors + url end - def initialize_normalized_url - self.normalized_url = url + def url=(url) + super(url) + self.normalized_url = self.class.normalize_normalized_url(self.url) end def to_s diff --git a/test/unit/artist_test.rb b/test/unit/artist_test.rb index 91a528f05..0cb9d40e4 100644 --- a/test/unit/artist_test.rb +++ b/test/unit/artist_test.rb @@ -158,6 +158,13 @@ class ArtistTest < ActiveSupport::TestCase assert_equal(old_url_ids, ArtistUrl.order("id").pluck(&:id)) end + should "normalize urls before removing duplicates" do + @artist = create(:artist, url_string: "https://Twitter.com/o8q https://twitter.com/o8q") + + assert_equal(1, @artist.urls.count) + assert_equal(["https://twitter.com/o8q"], @artist.urls.map(&:to_s)) + end + should "ignore pixiv.net/ and pixiv.net/img/ url matches" do a1 = FactoryBot.create(:artist, :name => "yomosaka", :url_string => "http://i2.pixiv.net/img18/img/evazion/14901720.png") a2 = FactoryBot.create(:artist, :name => "niwatazumi_bf", :url_string => "http://i2.pixiv.net/img18/img/evazion/14901720_big_p0.png")