From 88d9fc4e5ed0de34e20b9e2a4cf9a07779068ed6 Mon Sep 17 00:00:00 2001 From: evazion Date: Fri, 29 May 2020 11:49:22 -0500 Subject: [PATCH] sources: simplify artist finder url normalization. Get rid of `normalized_for_artist_finder?` and `normalizable_for_artist_finder?`. This was legacy bullshit that was originally designed to avoid API calls when saving artist entries containing old Pixiv direct image urls that had already been normalized, or that couldn't be normalized because they were bad id. Nowadays we store profile urls in artist entries instead of direct image urls, so we don't normally need to do any API calls to normalize the profile url. Strategies should take care to avoid triggering API calls inside `profile_url` when possible. --- app/logical/artist_finder.rb | 2 -- app/logical/sources/strategies/art_station.rb | 4 ---- app/logical/sources/strategies/base.rb | 23 +------------------ app/logical/sources/strategies/deviant_art.rb | 12 ---------- .../sources/strategies/hentai_foundry.rb | 8 ------- app/logical/sources/strategies/nico_seiga.rb | 12 ---------- app/logical/sources/strategies/null.rb | 12 ---------- app/logical/sources/strategies/pixiv.rb | 8 ------- app/logical/sources/strategies/weibo.rb | 13 ----------- app/models/artist_url.rb | 11 +-------- 10 files changed, 2 insertions(+), 103 deletions(-) diff --git a/app/logical/artist_finder.rb b/app/logical/artist_finder.rb index 4c72c2015..3cf5c9581 100644 --- a/app/logical/artist_finder.rb +++ b/app/logical/artist_finder.rb @@ -127,8 +127,6 @@ module ArtistFinder url = ArtistUrl.normalize(url) artists = [] - # return [] unless Sources::Strategies.find(url).normalized_for_artist_finder? - while artists.empty? && url.size > 10 u = url.sub(/\/+$/, "") + "/" u = u.to_escaped_for_sql_like.gsub(/\*/, '%') + '%' diff --git a/app/logical/sources/strategies/art_station.rb b/app/logical/sources/strategies/art_station.rb index d6498e173..2c78826fe 100644 --- a/app/logical/sources/strategies/art_station.rb +++ b/app/logical/sources/strategies/art_station.rb @@ -87,10 +87,6 @@ module Sources::Strategies end end - def normalized_for_artist_finder? - profile_url.present? && url == profile_url - end - def normalize_for_source return if project_id.blank? diff --git a/app/logical/sources/strategies/base.rb b/app/logical/sources/strategies/base.rb index b2d23fa12..55bf676ff 100644 --- a/app/logical/sources/strategies/base.rb +++ b/app/logical/sources/strategies/base.rb @@ -127,7 +127,7 @@ module Sources # A list of all profile urls associated with the artist. These urls will # be suggested when creating a new artist. def profile_urls - [normalize_for_artist_finder] + [profile_url].compact end def artist_commentary_title @@ -150,27 +150,6 @@ module Sources end memoize :size - # Subclasses should return true only if the URL is in its final normalized form. - # - # Sources::Strategies.find("http://img.pixiv.net/img/evazion").normalized_for_artist_finder? - # => true - # Sources::Strategies.find("http://i2.pixiv.net/img18/img/evazion/14901720_m.png").normalized_for_artist_finder? - # => false - def normalized_for_artist_finder? - false - end - - # Subclasses should return true only if the URL is a valid URL that could - # be converted into normalized form. - # - # Sources::Strategies.find("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054").normalizable_for_artist_finder? - # => true - # Sources::Strategies.find("http://dic.pixiv.net/a/THUNDERproject").normalizable_for_artist_finder? - # => false - def normalizable_for_artist_finder? - normalize_for_artist_finder.present? - end - # The url to use for artist finding purposes. This will be stored in the # artist entry. Normally this will be the profile url. def normalize_for_artist_finder diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index 786a37576..e43462a59 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -167,18 +167,6 @@ module Sources api_metadata[:description] end - def normalized_for_artist_finder? - url == normalize_for_artist_finder - end - - def normalizable_for_artist_finder? - normalize_for_artist_finder.present? - end - - def normalize_for_artist_finder - profile_url - end - def tags if api_metadata.blank? return [] diff --git a/app/logical/sources/strategies/hentai_foundry.rb b/app/logical/sources/strategies/hentai_foundry.rb index 24befca7f..5ab3ca2ed 100644 --- a/app/logical/sources/strategies/hentai_foundry.rb +++ b/app/logical/sources/strategies/hentai_foundry.rb @@ -104,14 +104,6 @@ module Sources DText.from_html(artist_commentary_desc).gsub(/\A[[:space:]]+|[[:space:]]+\z/, "").gsub(/\n+/, "\n") end - def normalizable_for_artist_finder? - artist_name.present? - end - - def normalized_for_artist_finder? - url =~ PROFILE_URL - end - def normalize_for_source page_url end diff --git a/app/logical/sources/strategies/nico_seiga.rb b/app/logical/sources/strategies/nico_seiga.rb index ceb93a0cb..32a3fd65f 100644 --- a/app/logical/sources/strategies/nico_seiga.rb +++ b/app/logical/sources/strategies/nico_seiga.rb @@ -122,18 +122,6 @@ module Sources api_client.desc end - def normalized_for_artist_finder? - url =~ PROFILE - end - - def normalizable_for_artist_finder? - url =~ PAGE || url =~ MANGA_PAGE || url =~ PROFILE || url =~ DIRECT1 || url =~ DIRECT2 || url =~ PAGE2 - end - - def normalize_for_artist_finder - "#{profile_url}/" - end - def normalize_for_source if illust_id.present? "https://seiga.nicovideo.jp/seiga/im#{illust_id}" diff --git a/app/logical/sources/strategies/null.rb b/app/logical/sources/strategies/null.rb index db0e3775d..e17c5e5c4 100644 --- a/app/logical/sources/strategies/null.rb +++ b/app/logical/sources/strategies/null.rb @@ -13,18 +13,6 @@ module Sources image_url end - def normalized_for_artist_finder? - true - end - - def normalizable_for_artist_finder? - false - end - - def normalize_for_artist_finder - url - end - def normalize_for_source case url when %r{\Ahttps?://(?:d3j5vwomefv46c|dn3pm25xmtlyu)\.cloudfront\.net/photos/large/(\d+)\.}i diff --git a/app/logical/sources/strategies/pixiv.rb b/app/logical/sources/strategies/pixiv.rb index 4b9ce625d..5f2bbf669 100644 --- a/app/logical/sources/strategies/pixiv.rb +++ b/app/logical/sources/strategies/pixiv.rb @@ -205,14 +205,6 @@ module Sources } end - def normalized_for_artist_finder? - url =~ PROFILE || url =~ STACC_PAGE - end - - def normalizable_for_artist_finder? - illust_id.present? || novel_id.present? || fanbox_id.present? || fanbox_account_id.present? - end - def normalize_for_source return if illust_id.blank? diff --git a/app/logical/sources/strategies/weibo.rb b/app/logical/sources/strategies/weibo.rb index b6710ef6b..8cad13cd1 100644 --- a/app/logical/sources/strategies/weibo.rb +++ b/app/logical/sources/strategies/weibo.rb @@ -155,19 +155,6 @@ module Sources end end - def normalized_for_artist_finder - url =~ %r{weibo\.com/(u|p)/\d+\z}i - end - - def normalizable_for_artist_finder? - artist_short_id_from_url.present? || artist_long_id.present? - end - - def normalize_for_artist_finder - profile_url = profile_short_url || profile_long_url - profile_url || url - end - def normalize_for_source return url if url =~ PAGE_URL_2 artist_id = artist_short_id_from_url diff --git a/app/models/artist_url.rb b/app/models/artist_url.rb index e39d3db65..b56c8ead8 100644 --- a/app/models/artist_url.rb +++ b/app/models/artist_url.rb @@ -33,16 +33,7 @@ class ArtistUrl < ApplicationRecord # the strategy won't always work for twitter because it looks for a status url = url.downcase if url =~ %r!^https?://(?:mobile\.)?twitter\.com! - begin - source = Sources::Strategies.find(url) - - if !source.normalized_for_artist_finder? && source.normalizable_for_artist_finder? - url = source.normalize_for_artist_finder - end - rescue Net::OpenTimeout, PixivApiClient::Error - raise if Rails.env.test? - end - + url = Sources::Strategies.find(url).normalize_for_artist_finder url = url.gsub(/\/+\Z/, "") url = url.gsub(%r!^https://!, "http://") url + "/"