Source::URL: add profile_url method.

Add a method for converting a source URL into a profile URL. This will
be used for normalizing profile URLs in artist entries.

Also add the ability to parse a few more profile URL formats.
This commit is contained in:
evazion
2022-03-12 03:08:06 -06:00
parent 787b5c8e27
commit 9343f7c912
20 changed files with 154 additions and 26 deletions

View File

@@ -110,6 +110,27 @@ module Source
end
end
# Convert the current URL into a profile URL, or return nil if it's not
# possible to get the profile URL from the current URL.
#
# URLs in artist entries will be normalized into this form.
#
# Some sites may have multiple profile URLs, for example if the site has
# both usernames and user IDs. This may return different profile URLs,
# depending on whether the current URL has the username or the user ID.
#
# Examples:
#
# * https://www.pixiv.net/member.php?id=9948
# * https://www.pixiv.net/stacc/bkubb
# * https://twitter.com/bkub_comic
# * https://twitter.com/intent/user?user_id=889592953
#
# @return [String, nil]
def profile_url
nil
end
protected def initialize(...)
super(...)
parse

View File

@@ -65,4 +65,8 @@ class Source::URL::ArtStation < Source::URL
"https://cdn.artstation.com/p/assets/#{@asset_type}/images/#{@asset_subdir}/#{size}/#{@file}"
end
end
def profile_url
"https://www.artstation.com/#{username}" if username.present?
end
end

View File

@@ -142,6 +142,10 @@ module Source
end
end
def profile_url
"https://www.deviantart.com/#{username}" if username.present?
end
def pretty_title
title.titleize.strip.squeeze(" ").tr(" ", "-") if title.present?
end

View File

@@ -41,7 +41,12 @@ class Source::URL::Fanbox < Source::URL
@work_id = work_id
# https://www.pixiv.net/fanbox/creator/1566167
in "www.pixiv.net", "fanbox", "creator", user_id
# http://www.pixiv.net/fanbox/user/3410642
in "www.pixiv.net", "fanbox", ("creator" | "user"), user_id
@user_id = user_id
# http://www.pixiv.net/fanbox/member.php?user_id=3410642
in "www.pixiv.net", "fanbox", "member.php" if params[:user_id].present?
@user_id = user_id
# https://omu001.fanbox.cc/posts/39714
@@ -71,4 +76,12 @@ class Source::URL::Fanbox < Source::URL
# https://pixiv.pximg.net/c/400x400_90_a2_g5/fanbox/public/images/creator/1566167/profile/BtxSp9MImFhnEZtjEZs2RPqL.jpeg
to_s.gsub(%r{/[cw]/\w+/}, "/") if image_url?
end
def profile_url
if username.present?
"https://#{username}.fanbox.cc"
elsif user_id.present?
"https://www.pixiv.net/fanbox/creator/#{user_id}"
end
end
end

View File

@@ -76,6 +76,10 @@ class Source::URL::Fantia < Source::URL
end
end
def profile_url
"https://fantia.jp/fanclubs/#{fanclub_id}" if fanclub_id.present?
end
def work_id
@post_id || @product_id
end

View File

@@ -63,6 +63,14 @@ class Source::URL::Foundation < Source::URL
end
end
def profile_url
if username.present?
"https://foundation.app/@#{username}"
elsif user_id.present?
"https://foundation.app/#{user_id}"
end
end
def page_url
return nil unless work_id.present?
return nil if host == "f8n-production-collection-assets.imgix.net" && @hash.blank?

View File

@@ -62,4 +62,8 @@ class Source::URL::HentaiFoundry < Source::URL
else
end
end
def profile_url
"https://www.hentai-foundry.com/user/#{username}" if username.present?
end
end

View File

@@ -42,4 +42,8 @@ class Source::URL::Lofter < Source::URL
def full_image_url
"#{site}#{path}" if image_url?
end
def profile_url
"https://#{username}.lofter.com" if username.present?
end
end

View File

@@ -70,4 +70,12 @@ class Source::URL::Mastodon < Source::URL
def image_url?
full_image_url.present?
end
def profile_url
if username.present?
"https://#{host}/@#{username}"
elsif user_id.present?
"https://#{host}/web/accounts/#{user_id}"
end
end
end

View File

@@ -58,4 +58,8 @@ class Source::URL::Newgrounds < Source::URL
def image_url?
url.host == "art.ngfiles.com"
end
def profile_url
"https://#{username}.newgrounds.com" if username.present?
end
end

View File

@@ -88,7 +88,20 @@ module Source
# unhandled
# https://seiga.nicovideo.jp/user/illust/456831
in "seiga.nicovideo.jp", "user", "illust", user_id
# https://sp.seiga.nicovideo.jp/user/illust/20542122
# https://ext.seiga.nicovideo.jp/user/illust/20542122
in /seiga\.nicovideo\.jp$/, "user", "illust", user_id
@user_id = user_id
# http://seiga.nicovideo.jp/manga/list?user_id=23839737
# http://sp.seiga.nicovideo.jp/manga/list?user_id=23839737
in /seiga\.nicovideo\.jp$/, "manga", "list" if params[:user_id].present?
@user_id = params[:user_id]
# https://www.nicovideo.jp/user/4572975
# https://www.nicovideo.jp/user/20446930/mylist/28674289
# https://commons.nicovideo.jp/user/696839
in ("commons.nicovideo.jp" | "www.nicovideo.jp"), "user", /^\d+$/ => user_id, *rest
@user_id = user_id
else
@@ -104,5 +117,9 @@ module Source
"https://seiga.nicovideo.jp/image/source/#{image_id}"
end
end
def profile_url
"https://seiga.nicovideo.jp/user/illust/#{user_id}" if user_id.present?
end
end
end

View File

@@ -93,4 +93,8 @@ class Source::URL::Nijie < Source::URL
def full_image_url
to_s.remove(%r{__rs_\w+/}i).gsub("http:", "https:") if image_url?
end
def profile_url
"https://nijie.info/members.php?id=#{user_id}" if user_id.present?
end
end

View File

@@ -54,13 +54,19 @@ module Source
@work_id = params[:illust_id]
# https://www.pixiv.net/member.php?id=339253
in "www.pixiv.net", "member.php" if params[:id].present?
# http://www.pixiv.net/novel/member.php?id=76567
in "www.pixiv.net", *, "member.php" if params[:id].present?
@user_id = params[:id]
# https://www.pixiv.net/u/9202877
# https://www.pixiv.net/users/9202877
# https://www.pixiv.net/users/76567/novels
in "www.pixiv.net", ("u" | "users"), user_id, *rest
@user_id = user_id
# https://www.pixiv.net/en/users/9202877
in "www.pixiv.net", *, ("u" | "users"), user_id
# https://www.pixiv.net/en/users/76567/novels
in "www.pixiv.net", _, ("u" | "users"), user_id, *rest
@user_id = user_id
# https://www.pixiv.net/stacc/noizave
@@ -123,11 +129,12 @@ module Source
end
def profile_url
"https://www.pixiv.net/users/#{user_id}" if user_id.present?
end
def stacc_url
"https://www.pixiv.net/stacc/#{username}" if username.present?
if user_id.present?
# "https://www.pixiv.net/users/#{user_id}"
"https://www.pixiv.net/member.php?id=#{user_id}"
elsif username.present?
"https://www.pixiv.net/stacc/#{username}"
end
end
end
end

View File

@@ -39,6 +39,10 @@ module Source
"https://sketch.pixiv.net/items/#{work_id}" if work_id.present?
end
def profile_url
"https://sketch.pixiv.net/@#{username}" if username.present?
end
def api_url
# https://sketch.pixiv.net/api/items/5835314698645024323.json (won't work in the browser; use curl)
"https://sketch.pixiv.net/api/items/#{work_id}.json" if work_id.present?

View File

@@ -38,4 +38,8 @@ class Source::URL::Plurk < Source::URL
def image_url?
host == "images.plurk.com"
end
def profile_url
"https://www.plurk.com/#{username}" if username.present?
end
end

View File

@@ -42,6 +42,10 @@ class Source::URL::Skeb < Source::URL
end
end
def profile_url
"https://skeb.jp/@#{username}" if username.present?
end
def image_url?
domain.in?(%[imgix.net amazonaws.com])
end

View File

@@ -65,7 +65,6 @@ class Source::URL::Tumblr < Source::URL
end
def profile_url
return nil unless @blog_name.present?
"https://#{@blog_name}.tumblr.com"
"https://#{blog_name}.tumblr.com" if blog_name.present?
end
end

View File

@@ -26,7 +26,7 @@ class Source::URL::Twitter < Source::URL
# https://developer.twitter.com/en/docs/developer-utilities/configuration/api-reference/get-help-configuration
RESERVED_USERNAMES = %w[home i intent search]
attr_reader :status_id, :twitter_username
attr_reader :status_id, :twitter_username, :user_id
def self.match?(url)
url.host.in?(%w[twitter.com mobile.twitter.com pic.twitter.com pbs.twimg.com video.twimg.com t.co])
@@ -51,6 +51,18 @@ class Source::URL::Twitter < Source::URL
in "twitter.com", username, *rest
@twitter_username = username unless username.in?(RESERVED_USERNAMES)
# https://twitter.com/intent/user?user_id=1485229827984531457
in "twitter.com", "intent", "user" if params[:user_id].present?
@user_id = params[:user_id]
# https://twitter.com/intent/user?screen_name=ryuudog_NFT
in "twitter.com", "intent", "user" if params[:screen_name].present?
@twitter_username = params[:screen_name]
# https://twitter.com/i/user/889592953
in "twitter.com", "i", "user", user_id
@user_id = user_id
# https://pbs.twimg.com/media/EBGbJe_U8AA4Ekb.jpg
# https://pbs.twimg.com/media/EBGbJe_U8AA4Ekb.jpg:small
# https://pbs.twimg.com/media/EBGbJe_U8AA4Ekb?format=jpg&name=900x900
@@ -87,4 +99,13 @@ class Source::URL::Twitter < Source::URL
return nil unless @file_path.present?
"#{site}/#{@file_path}:orig"
end
def profile_url
if twitter_username.present?
"https://twitter.com/#{twitter_username}"
elsif user_id.present?
# "https://twitter.com/i/user/#{user_id}
"https://twitter.com/intent/user?user_id=#{user_id}"
end
end
end

View File

@@ -1,7 +1,7 @@
# frozen_string_literal: true
class Source::URL::Weibo < Source::URL
attr_reader :full_image_url
attr_reader :full_image_url, :artist_short_id, :artist_long_id
def self.match?(url)
url.domain.in?(["weibo.com", "weibo.cn", "sinaimg.cn"])
@@ -63,18 +63,12 @@ class Source::URL::Weibo < Source::URL
full_image_url.present?
end
def profile_urls
[profile_short_url, profile_long_url].compact
end
def profile_short_url
return if @artist_short_id.blank?
"https://www.weibo.com/u/#{@artist_short_id}"
end
def profile_long_url
return if @artist_long_id.blank?
"https://www.weibo.com/p/#{@artist_long_id}"
def profile_url
if artist_short_id.present?
"https://www.weibo.com/u/#{artist_short_id}"
elsif artist_long_id.present?
"https://www.weibo.com/p/#{artist_long_id}"
end
end
def mobile_url

View File

@@ -49,7 +49,7 @@ module Sources
end
def profile_urls
(parsed_url.profile_urls + parsed_referer&.profile_urls.to_a).uniq
[parsed_url.profile_url, parsed_referer&.profile_url].compact.uniq
end
def profile_url