Files
danbooru/app/models/artist_url.rb
evazion cf8b8207e2 artists: change how artist urls are normalized.
Change how artist URLs are normalized in artist entries. Don't try to secretly
convert image URLs to profile URLs in artist entries. For example, if someone puts a
Pixiv image URL in an artist entry, don't secretly try to fetch the source and
convert it into a profile URL in the `normalized_url` field.

We did this because years ago, it was standard practice to put image URLs in artist
entries. Pixiv image URLs used to contain the artist's username, so we used to put
image URLs in artist entries for artist finding purposes. But Pixiv changed it so
that image URLs no longer contained the username, so we dealt with it by adding a
`normalized_url` column to artist_urls and secretly converting image URLs to profile
URLs in this field. But this is no longer necessary because now we don't normally put
image URLs in artist entries in the first place.

Now the `profile_url` method in `Source::URL` is used to normalize URLs in artist
entries. This lets us parse various profile URL formats and normalize them into a
single canonical form.

This also removes the `normalize_for_artist_finder` method from source strategies.
Instead the `profile_url` method is used for artist finding purposes. So the profile
URL returned by the source strategy needs to be the same as the URL in the artist
entry in order for artist finding to work.
2022-03-13 03:54:17 -05:00

151 lines
4.0 KiB
Ruby

# frozen_string_literal: true
class ArtistURL < ApplicationRecord
normalize :url, :normalize_url
validates :url, presence: true, uniqueness: { scope: :artist_id }
validate :validate_url_format
belongs_to :artist, :touch => true
scope :url_matches, ->(url) { url_attribute_matches(:url, url) }
scope :normalized_url_matches, ->(url) { url_attribute_matches(:normalized_url, url) }
scope :active, -> { where(is_active: true) }
def self.parse_prefix(url)
prefix, url = url.match(/\A(-)?(.*)/)[1, 2]
is_active = prefix.nil?
[is_active, url]
end
def self.normalize_normalized_url(url)
return nil if url.nil?
url = Source::URL.parse(url)&.profile_url || url
url = url.sub(%r{^https://}, "http://")
url = url.sub(%r{^http://blog-imgs-\d+\.fc2}, "http://blog.fc2")
url = url.sub(%r{^http://blog-imgs-\d+-\w+\.fc2}, "http://blog.fc2")
url = url.sub(%r{^http://blog\d*\.fc2\.com/(?:\w/){,3}(\w+)}, "http://\\1.blog.fc2.com")
url = url.gsub(%r{/+\Z}, "")
url + "/"
end
def self.search(params = {})
q = search_attributes(params, :id, :created_at, :updated_at, :url, :normalized_url, :is_active, :artist)
q = q.url_matches(params[:url_matches])
q = q.normalized_url_matches(params[:normalized_url_matches])
case params[:order]
when /\A(id|artist_id|url|normalized_url|is_active|created_at|updated_at)(?:_(asc|desc))?\z/i
dir = $2 || :desc
q = q.order($1 => dir).order(id: :desc)
else
q = q.apply_default_order(params)
end
q
end
def self.url_attribute_matches(attr, url)
if url.blank?
all
elsif url =~ %r{\A/(.*)/\z}
where_regex(attr, $1)
elsif url.include?("*")
where_ilike(attr, url)
else
profile_url = Sources::Strategies.find(url).profile_url || url
where(attr => normalize_normalized_url(profile_url))
end
end
def domain
parsed_url&.domain.to_s
end
def site_name
parsed_url&.site_name.to_s
end
# A secondary URL is an artist URL that we don't normally want to display,
# usually because it's redundant with the primary profile URL.
def secondary_url?
case url
when %r{pixiv\.net/stacc}i
true
when %r{pixiv\.net/fanbox}i
true
when %r{twitter\.com/intent}i
true
when %r{lohas\.nicoseiga\.jp}i
true
when %r{(?:www|com|dic)\.nicovideo\.jp}i
true
when %r{pawoo\.net/web/accounts}i
true
when %r{www\.artstation\.com}i
true
when %r{blogimg\.jp}i, %r{image\.blog\.livedoor\.jp}i
true
else
false
end
end
# The sort order of sites in artist URL lists.
def priority
sites = %w[
Pixiv Twitter
ArtStation Baraag BCY Deviant\ Art Hentai\ Foundry Fantia Foundation Lofter Nico\ Seiga Nijie Pawoo Pixiv\ Fanbox Pixiv\ Sketch Plurk Tinami Tumblr Weibo
Ask.fm Booth.pm Facebook FC2 Gumroad Instagram Ko-fi Livedoor Mihuashi Mixi.jp Patreon Piapro.jp Picarto Privatter Sakura.ne.jp Stickam Skeb Twitch Youtube
Amazon Circle.ms DLSite Doujinshi.org Erogamescape Mangaupdates Melonbooks Toranoana Wikipedia
]
sites.index(site_name) || 1000
end
def self.normalize_url(url)
Danbooru::URL.parse(url)&.to_normalized_s.presence || url
end
def url=(url)
super(url)
@parsed_url = Source::URL.parse(url)
self.normalized_url = self.class.normalize_normalized_url(self.url)
end
def parsed_url
@parsed_url ||= Source::URL.parse(url)
end
def to_s
if is_active?
url
else
"-#{url}"
end
end
def validate_scheme(uri)
errors.add(:url, "'#{uri}' must begin with http:// or https:// ") unless uri.scheme.in?(%w[http https])
end
def validate_hostname(uri)
errors.add(:url, "'#{uri}' has a hostname '#{uri.host}' that does not contain a dot") unless uri.host&.include?(".")
end
def validate_url_format
uri = Addressable::URI.parse(url)
validate_scheme(uri)
validate_hostname(uri)
rescue Addressable::URI::InvalidURIError => e
errors.add(:url, "'#{uri}' is malformed: #{e}")
end
def self.available_includes
[:artist]
end
end