From 9343f7c912d74162b58e1efb4416d0bdd59702a8 Mon Sep 17 00:00:00 2001 From: evazion Date: Sat, 12 Mar 2022 03:08:06 -0600 Subject: [PATCH] Source::URL: add profile_url method. Add a method for converting a source URL into a profile URL. This will be used for normalizing profile URLs in artist entries. Also add the ability to parse a few more profile URL formats. --- app/logical/source/url.rb | 21 +++++++++++++++++++++ app/logical/source/url/art_station.rb | 4 ++++ app/logical/source/url/deviant_art.rb | 4 ++++ app/logical/source/url/fanbox.rb | 15 ++++++++++++++- app/logical/source/url/fantia.rb | 4 ++++ app/logical/source/url/foundation.rb | 8 ++++++++ app/logical/source/url/hentai_foundry.rb | 4 ++++ app/logical/source/url/lofter.rb | 4 ++++ app/logical/source/url/mastodon.rb | 8 ++++++++ app/logical/source/url/newgrounds.rb | 4 ++++ app/logical/source/url/nico_seiga.rb | 19 ++++++++++++++++++- app/logical/source/url/nijie.rb | 4 ++++ app/logical/source/url/pixiv.rb | 21 ++++++++++++++------- app/logical/source/url/pixiv_sketch.rb | 4 ++++ app/logical/source/url/plurk.rb | 4 ++++ app/logical/source/url/skeb.rb | 4 ++++ app/logical/source/url/tumblr.rb | 3 +-- app/logical/source/url/twitter.rb | 23 ++++++++++++++++++++++- app/logical/source/url/weibo.rb | 20 +++++++------------- app/logical/sources/strategies/weibo.rb | 2 +- 20 files changed, 154 insertions(+), 26 deletions(-) diff --git a/app/logical/source/url.rb b/app/logical/source/url.rb index 1ea615cc6..d5025ad61 100644 --- a/app/logical/source/url.rb +++ b/app/logical/source/url.rb @@ -110,6 +110,27 @@ module Source end end + # Convert the current URL into a profile URL, or return nil if it's not + # possible to get the profile URL from the current URL. + # + # URLs in artist entries will be normalized into this form. + # + # Some sites may have multiple profile URLs, for example if the site has + # both usernames and user IDs. This may return different profile URLs, + # depending on whether the current URL has the username or the user ID. + # + # Examples: + # + # * https://www.pixiv.net/member.php?id=9948 + # * https://www.pixiv.net/stacc/bkubb + # * https://twitter.com/bkub_comic + # * https://twitter.com/intent/user?user_id=889592953 + # + # @return [String, nil] + def profile_url + nil + end + protected def initialize(...) super(...) parse diff --git a/app/logical/source/url/art_station.rb b/app/logical/source/url/art_station.rb index 9dd094283..54fb76296 100644 --- a/app/logical/source/url/art_station.rb +++ b/app/logical/source/url/art_station.rb @@ -65,4 +65,8 @@ class Source::URL::ArtStation < Source::URL "https://cdn.artstation.com/p/assets/#{@asset_type}/images/#{@asset_subdir}/#{size}/#{@file}" end end + + def profile_url + "https://www.artstation.com/#{username}" if username.present? + end end diff --git a/app/logical/source/url/deviant_art.rb b/app/logical/source/url/deviant_art.rb index 0b32d338f..769f0c434 100644 --- a/app/logical/source/url/deviant_art.rb +++ b/app/logical/source/url/deviant_art.rb @@ -142,6 +142,10 @@ module Source end end + def profile_url + "https://www.deviantart.com/#{username}" if username.present? + end + def pretty_title title.titleize.strip.squeeze(" ").tr(" ", "-") if title.present? end diff --git a/app/logical/source/url/fanbox.rb b/app/logical/source/url/fanbox.rb index 5e2557a2f..c3f344003 100644 --- a/app/logical/source/url/fanbox.rb +++ b/app/logical/source/url/fanbox.rb @@ -41,7 +41,12 @@ class Source::URL::Fanbox < Source::URL @work_id = work_id # https://www.pixiv.net/fanbox/creator/1566167 - in "www.pixiv.net", "fanbox", "creator", user_id + # http://www.pixiv.net/fanbox/user/3410642 + in "www.pixiv.net", "fanbox", ("creator" | "user"), user_id + @user_id = user_id + + # http://www.pixiv.net/fanbox/member.php?user_id=3410642 + in "www.pixiv.net", "fanbox", "member.php" if params[:user_id].present? @user_id = user_id # https://omu001.fanbox.cc/posts/39714 @@ -71,4 +76,12 @@ class Source::URL::Fanbox < Source::URL # https://pixiv.pximg.net/c/400x400_90_a2_g5/fanbox/public/images/creator/1566167/profile/BtxSp9MImFhnEZtjEZs2RPqL.jpeg to_s.gsub(%r{/[cw]/\w+/}, "/") if image_url? end + + def profile_url + if username.present? + "https://#{username}.fanbox.cc" + elsif user_id.present? + "https://www.pixiv.net/fanbox/creator/#{user_id}" + end + end end diff --git a/app/logical/source/url/fantia.rb b/app/logical/source/url/fantia.rb index 1dc1540be..c60c817d7 100644 --- a/app/logical/source/url/fantia.rb +++ b/app/logical/source/url/fantia.rb @@ -76,6 +76,10 @@ class Source::URL::Fantia < Source::URL end end + def profile_url + "https://fantia.jp/fanclubs/#{fanclub_id}" if fanclub_id.present? + end + def work_id @post_id || @product_id end diff --git a/app/logical/source/url/foundation.rb b/app/logical/source/url/foundation.rb index 4ae172eb6..76c3a8c0b 100644 --- a/app/logical/source/url/foundation.rb +++ b/app/logical/source/url/foundation.rb @@ -63,6 +63,14 @@ class Source::URL::Foundation < Source::URL end end + def profile_url + if username.present? + "https://foundation.app/@#{username}" + elsif user_id.present? + "https://foundation.app/#{user_id}" + end + end + def page_url return nil unless work_id.present? return nil if host == "f8n-production-collection-assets.imgix.net" && @hash.blank? diff --git a/app/logical/source/url/hentai_foundry.rb b/app/logical/source/url/hentai_foundry.rb index 6cd7e8d20..e5c37e687 100644 --- a/app/logical/source/url/hentai_foundry.rb +++ b/app/logical/source/url/hentai_foundry.rb @@ -62,4 +62,8 @@ class Source::URL::HentaiFoundry < Source::URL else end end + + def profile_url + "https://www.hentai-foundry.com/user/#{username}" if username.present? + end end diff --git a/app/logical/source/url/lofter.rb b/app/logical/source/url/lofter.rb index 4445722a8..8767ec9eb 100644 --- a/app/logical/source/url/lofter.rb +++ b/app/logical/source/url/lofter.rb @@ -42,4 +42,8 @@ class Source::URL::Lofter < Source::URL def full_image_url "#{site}#{path}" if image_url? end + + def profile_url + "https://#{username}.lofter.com" if username.present? + end end diff --git a/app/logical/source/url/mastodon.rb b/app/logical/source/url/mastodon.rb index ce40b3b58..4d656ec9b 100644 --- a/app/logical/source/url/mastodon.rb +++ b/app/logical/source/url/mastodon.rb @@ -70,4 +70,12 @@ class Source::URL::Mastodon < Source::URL def image_url? full_image_url.present? end + + def profile_url + if username.present? + "https://#{host}/@#{username}" + elsif user_id.present? + "https://#{host}/web/accounts/#{user_id}" + end + end end diff --git a/app/logical/source/url/newgrounds.rb b/app/logical/source/url/newgrounds.rb index 57375abff..fbab609c0 100644 --- a/app/logical/source/url/newgrounds.rb +++ b/app/logical/source/url/newgrounds.rb @@ -58,4 +58,8 @@ class Source::URL::Newgrounds < Source::URL def image_url? url.host == "art.ngfiles.com" end + + def profile_url + "https://#{username}.newgrounds.com" if username.present? + end end diff --git a/app/logical/source/url/nico_seiga.rb b/app/logical/source/url/nico_seiga.rb index 8e7079e19..4e4101dad 100644 --- a/app/logical/source/url/nico_seiga.rb +++ b/app/logical/source/url/nico_seiga.rb @@ -88,7 +88,20 @@ module Source # unhandled # https://seiga.nicovideo.jp/user/illust/456831 - in "seiga.nicovideo.jp", "user", "illust", user_id + # https://sp.seiga.nicovideo.jp/user/illust/20542122 + # https://ext.seiga.nicovideo.jp/user/illust/20542122 + in /seiga\.nicovideo\.jp$/, "user", "illust", user_id + @user_id = user_id + + # http://seiga.nicovideo.jp/manga/list?user_id=23839737 + # http://sp.seiga.nicovideo.jp/manga/list?user_id=23839737 + in /seiga\.nicovideo\.jp$/, "manga", "list" if params[:user_id].present? + @user_id = params[:user_id] + + # https://www.nicovideo.jp/user/4572975 + # https://www.nicovideo.jp/user/20446930/mylist/28674289 + # https://commons.nicovideo.jp/user/696839 + in ("commons.nicovideo.jp" | "www.nicovideo.jp"), "user", /^\d+$/ => user_id, *rest @user_id = user_id else @@ -104,5 +117,9 @@ module Source "https://seiga.nicovideo.jp/image/source/#{image_id}" end end + + def profile_url + "https://seiga.nicovideo.jp/user/illust/#{user_id}" if user_id.present? + end end end diff --git a/app/logical/source/url/nijie.rb b/app/logical/source/url/nijie.rb index dfec1ca59..658577b69 100644 --- a/app/logical/source/url/nijie.rb +++ b/app/logical/source/url/nijie.rb @@ -93,4 +93,8 @@ class Source::URL::Nijie < Source::URL def full_image_url to_s.remove(%r{__rs_\w+/}i).gsub("http:", "https:") if image_url? end + + def profile_url + "https://nijie.info/members.php?id=#{user_id}" if user_id.present? + end end diff --git a/app/logical/source/url/pixiv.rb b/app/logical/source/url/pixiv.rb index a47c04f36..2887d78ec 100644 --- a/app/logical/source/url/pixiv.rb +++ b/app/logical/source/url/pixiv.rb @@ -54,13 +54,19 @@ module Source @work_id = params[:illust_id] # https://www.pixiv.net/member.php?id=339253 - in "www.pixiv.net", "member.php" if params[:id].present? + # http://www.pixiv.net/novel/member.php?id=76567 + in "www.pixiv.net", *, "member.php" if params[:id].present? @user_id = params[:id] # https://www.pixiv.net/u/9202877 # https://www.pixiv.net/users/9202877 + # https://www.pixiv.net/users/76567/novels + in "www.pixiv.net", ("u" | "users"), user_id, *rest + @user_id = user_id + # https://www.pixiv.net/en/users/9202877 - in "www.pixiv.net", *, ("u" | "users"), user_id + # https://www.pixiv.net/en/users/76567/novels + in "www.pixiv.net", _, ("u" | "users"), user_id, *rest @user_id = user_id # https://www.pixiv.net/stacc/noizave @@ -123,11 +129,12 @@ module Source end def profile_url - "https://www.pixiv.net/users/#{user_id}" if user_id.present? - end - - def stacc_url - "https://www.pixiv.net/stacc/#{username}" if username.present? + if user_id.present? + # "https://www.pixiv.net/users/#{user_id}" + "https://www.pixiv.net/member.php?id=#{user_id}" + elsif username.present? + "https://www.pixiv.net/stacc/#{username}" + end end end end diff --git a/app/logical/source/url/pixiv_sketch.rb b/app/logical/source/url/pixiv_sketch.rb index 526d31f06..06706dd6f 100644 --- a/app/logical/source/url/pixiv_sketch.rb +++ b/app/logical/source/url/pixiv_sketch.rb @@ -39,6 +39,10 @@ module Source "https://sketch.pixiv.net/items/#{work_id}" if work_id.present? end + def profile_url + "https://sketch.pixiv.net/@#{username}" if username.present? + end + def api_url # https://sketch.pixiv.net/api/items/5835314698645024323.json (won't work in the browser; use curl) "https://sketch.pixiv.net/api/items/#{work_id}.json" if work_id.present? diff --git a/app/logical/source/url/plurk.rb b/app/logical/source/url/plurk.rb index f77105d78..b70fc21d7 100644 --- a/app/logical/source/url/plurk.rb +++ b/app/logical/source/url/plurk.rb @@ -38,4 +38,8 @@ class Source::URL::Plurk < Source::URL def image_url? host == "images.plurk.com" end + + def profile_url + "https://www.plurk.com/#{username}" if username.present? + end end diff --git a/app/logical/source/url/skeb.rb b/app/logical/source/url/skeb.rb index 552a3982b..e4f0dbaf1 100644 --- a/app/logical/source/url/skeb.rb +++ b/app/logical/source/url/skeb.rb @@ -42,6 +42,10 @@ class Source::URL::Skeb < Source::URL end end + def profile_url + "https://skeb.jp/@#{username}" if username.present? + end + def image_url? domain.in?(%[imgix.net amazonaws.com]) end diff --git a/app/logical/source/url/tumblr.rb b/app/logical/source/url/tumblr.rb index 0f8c0e7c8..dab3e85ad 100644 --- a/app/logical/source/url/tumblr.rb +++ b/app/logical/source/url/tumblr.rb @@ -65,7 +65,6 @@ class Source::URL::Tumblr < Source::URL end def profile_url - return nil unless @blog_name.present? - "https://#{@blog_name}.tumblr.com" + "https://#{blog_name}.tumblr.com" if blog_name.present? end end diff --git a/app/logical/source/url/twitter.rb b/app/logical/source/url/twitter.rb index ef703e5f6..be3042a3c 100644 --- a/app/logical/source/url/twitter.rb +++ b/app/logical/source/url/twitter.rb @@ -26,7 +26,7 @@ class Source::URL::Twitter < Source::URL # https://developer.twitter.com/en/docs/developer-utilities/configuration/api-reference/get-help-configuration RESERVED_USERNAMES = %w[home i intent search] - attr_reader :status_id, :twitter_username + attr_reader :status_id, :twitter_username, :user_id def self.match?(url) url.host.in?(%w[twitter.com mobile.twitter.com pic.twitter.com pbs.twimg.com video.twimg.com t.co]) @@ -51,6 +51,18 @@ class Source::URL::Twitter < Source::URL in "twitter.com", username, *rest @twitter_username = username unless username.in?(RESERVED_USERNAMES) + # https://twitter.com/intent/user?user_id=1485229827984531457 + in "twitter.com", "intent", "user" if params[:user_id].present? + @user_id = params[:user_id] + + # https://twitter.com/intent/user?screen_name=ryuudog_NFT + in "twitter.com", "intent", "user" if params[:screen_name].present? + @twitter_username = params[:screen_name] + + # https://twitter.com/i/user/889592953 + in "twitter.com", "i", "user", user_id + @user_id = user_id + # https://pbs.twimg.com/media/EBGbJe_U8AA4Ekb.jpg # https://pbs.twimg.com/media/EBGbJe_U8AA4Ekb.jpg:small # https://pbs.twimg.com/media/EBGbJe_U8AA4Ekb?format=jpg&name=900x900 @@ -87,4 +99,13 @@ class Source::URL::Twitter < Source::URL return nil unless @file_path.present? "#{site}/#{@file_path}:orig" end + + def profile_url + if twitter_username.present? + "https://twitter.com/#{twitter_username}" + elsif user_id.present? + # "https://twitter.com/i/user/#{user_id} + "https://twitter.com/intent/user?user_id=#{user_id}" + end + end end diff --git a/app/logical/source/url/weibo.rb b/app/logical/source/url/weibo.rb index 1df039fbe..a2d465588 100644 --- a/app/logical/source/url/weibo.rb +++ b/app/logical/source/url/weibo.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true class Source::URL::Weibo < Source::URL - attr_reader :full_image_url + attr_reader :full_image_url, :artist_short_id, :artist_long_id def self.match?(url) url.domain.in?(["weibo.com", "weibo.cn", "sinaimg.cn"]) @@ -63,18 +63,12 @@ class Source::URL::Weibo < Source::URL full_image_url.present? end - def profile_urls - [profile_short_url, profile_long_url].compact - end - - def profile_short_url - return if @artist_short_id.blank? - "https://www.weibo.com/u/#{@artist_short_id}" - end - - def profile_long_url - return if @artist_long_id.blank? - "https://www.weibo.com/p/#{@artist_long_id}" + def profile_url + if artist_short_id.present? + "https://www.weibo.com/u/#{artist_short_id}" + elsif artist_long_id.present? + "https://www.weibo.com/p/#{artist_long_id}" + end end def mobile_url diff --git a/app/logical/sources/strategies/weibo.rb b/app/logical/sources/strategies/weibo.rb index e88858a74..4f723b4b0 100644 --- a/app/logical/sources/strategies/weibo.rb +++ b/app/logical/sources/strategies/weibo.rb @@ -49,7 +49,7 @@ module Sources end def profile_urls - (parsed_url.profile_urls + parsed_referer&.profile_urls.to_a).uniq + [parsed_url.profile_url, parsed_referer&.profile_url].compact.uniq end def profile_url