sources: factor out Source::URL::NicoSeiga.

This commit is contained in:
evazion
2022-03-10 04:53:51 -06:00
parent 34854185be
commit 43a665a66d
4 changed files with 123 additions and 79 deletions

View File

@@ -28,6 +28,7 @@ module Source
Source::URL::Lofter,
Source::URL::Mastodon,
Source::URL::Moebooru,
Source::URL::NicoSeiga,
Source::URL::Nijie,
Source::URL::Newgrounds,
Source::URL::PixivSketch,

View File

@@ -0,0 +1,108 @@
# frozen_string_literal: true
# NicoSeiga has two main page types, regular single-image illusts and mangas:
#
# * https://seiga.nicovideo.jp/seiga/im2163478
# * https://seiga.nicovideo.jp/watch/mg122274
#
# It's not possible to tell from the URL alone whether an image belongs to a regular illust or a manga:
#
# * https://lohas.nicoseiga.jp/priv/2e76be4c553c571b5a81e6ea1a69ab1367f02a41/1646904833/2163478 (page: https://seiga.nicovideo.jp/seiga/im2163478)
# * https://lohas.nicoseiga.jp/priv/49807693c31ed226818b9167e8e87561dd19a445/1646904643/4744553 (page: https://seiga.nicovideo.jp/watch/mg122274)
#
# You can tell them apart like this:
#
# * https://seiga.nicovideo.jp/image/source/2163478 (redirects to https://lohas.nicoseiga.jp/o/e69bba4bd6c1baaae460452bac3f29e7080ad723/1646902784/3521156)
# * https://seiga.nicovideo.jp/image/source/4744553 (redirects to https://lohas.nicoseiga.jp/priv/54142f438f4effb937e6b484395b478305ca17f2/1646905053/4744553)
#
# Unhandled URLs
#
# * https://www.nicovideo.jp/watch/sm36465441
# * https://www.nicovideo.jp/watch/nm20676560
# * https://lohas.nicoseiga.jp/material/5746c5/4459092
# * https://dic.nicovideo.jp/oekaki/52833.png
#
module Source
class URL::NicoSeiga < Source::URL
attr_reader :illust_id, :manga_id, :image_id, :user_id
def self.match?(url)
url.domain.in?(%w[nicovideo.jp nicoseiga.jp nicomanga.jp nimg.jp])
end
def site_name
"Nico Seiga"
end
def parse
case [host, *path_segments]
# https://seiga.nicovideo.jp/seiga/im520647 (anonymous artist)
# https://seiga.nicovideo.jp/seiga/im3521156
# https://sp.seiga.nicovideo.jp/seiga/im3521156
in /seiga\.nicovideo\.jp$/, "seiga", /^im(\d+)/ => illust_id
@illust_id = $1
@image_id = $1
# https://seiga.nicovideo.jp/watch/mg316708
# https://sp.seiga.nicovideo.jp/watch/mg316708
in /seiga\.nicovideo\.jp$/, "watch", /^mg(\d+)/ => manga_id
@manga_id = $1
# https://seiga.nicovideo.jp/image/source/3521156 (single image; page: https://seiga.nicovideo.jp/seiga/im3312222)
# https://seiga.nicovideo.jp/image/source/4744553 (manga image; page: https://seiga.nicovideo.jp/watch/mg122274)
#
# https://seiga.nicovideo.jp/image/source/3521156 redirects to the html page https://lohas.nicoseiga.jp/o/e69bba4bd6c1baaae460452bac3f29e7080ad723/1646902784/3521156, which contains the image https://lohas.nicoseiga.jp/priv/e69bba4bd6c1baaae460452bac3f29e7080ad723/1646902784/3521156.
# https://seiga.nicovideo.jp/image/source/4744553 redirects to the direct image https://lohas.nicoseiga.jp/priv/54142f438f4effb937e6b484395b478305ca17f2/1646905053/4744553
in "seiga.nicovideo.jp", "image", "source", image_id
@image_id = image_id
# https://seiga.nicovideo.jp/image/source?id=3521156 (redirects to https://lohas.nicoseiga.jp/o/75dfbf6404732969ded3937b89bc41d77420debe/1646906075/3521156)
# https://seiga.nicovideo.jp/image/redirect?id=3583893 (redirects to https://seiga.nicovideo.jp/seiga/im3583893)
in "seiga.nicovideo.jp", "image", ("redirect" | "source") if params[:id].present?
@image_id = params[:id]
# https://lohas.nicoseiga.jp/o/971eb8af9bbcde5c2e51d5ef3a2f62d6d9ff5552/1589933964/3583893 (page: https://seiga.nicovideo.jp/seiga/im3583893)
# https://lohas.nicoseiga.jp/priv/b80f86c0d8591b217e7513a9e175e94e00f3c7a1/1384936074/3583893 (page: https://seiga.nicovideo.jp/seiga/im3583893)
# https://lohas.nicoseiga.jp/priv/3521156?e=1382558156&h=f2e089256abd1d453a455ec8f317a6c703e2cedf (page: https://seiga.nicovideo.jp/seiga/im3521156)
in "lohas.nicoseiga.jp", ("priv" | "o"), *, /^\d+$/ => image_id
@image_id = image_id
# https://lohas.nicoseiga.jp/thumb/2163478i (page: https://seiga.nicovideo.jp/seiga/im2163478, image: https://lohas.nicoseiga.jp/priv/2e76be4c553c571b5a81e6ea1a69ab1367f02a41/1646904833/2163478)
# https://lohas.nicoseiga.jp/thumb/1591081q (page: https://seiga.nicovideo.jp/seiga/im1591081, image: https://lohas.nicoseiga.jp/priv/b6a8fc0327624e57f43c29f6e7f18797406681f7/1646904868/1591081)
in "lohas.nicoseiga.jp", "thumb", /^(\d+)[iq]$/ => image_id
@illust_id = $1
@image_id = $1
# https://lohas.nicoseiga.jp/thumb/4744553p (page: https://seiga.nicovideo.jp/watch/mg122274, image: https://lohas.nicoseiga.jp/priv/49807693c31ed226818b9167e8e87561dd19a445/1646904643/4744553)
in "lohas.nicoseiga.jp", "thumb", /^(\d+)p$/ => image_id
@illust_id = $1
# https://dcdn.cdn.nimg.jp/priv/62a56a7f67d3d3746ae5712db9cac7d465f4a339/1592186183/10466669
# https://dcdn.cdn.nimg.jp/nicoseiga/lohas/o/8ba0a9b2ea34e1ef3b5cc50785bd10cd63ec7e4a/1592187477/10466669
in "dcdn.cdn.nimg.jpg", *, /^\d+$/ => image_id
@image_id = image_id
# https://deliver.cdn.nicomanga.jp/thumb/aHR0cHM6Ly9kZWxpdmVyLmNkbi5uaWNvbWFuZ2EuanAvdGh1bWIvODEwMDk2OHA_MTU2NTY5OTg4MA.webp (page: https://seiga.nicovideo.jp/watch/mg316708, full image: https://lohas.nicoseiga.jp/priv/1f6d38ef2ba6fc9d9e27823babc4cf721cef16ec/1646906617/8100969)
in "deliver.cdn.nicomanga.jp", *rest
# unhandled
# https://seiga.nicovideo.jp/user/illust/456831
in "seiga.nicovideo.jp", "user", "illust", user_id
@user_id = user_id
else
end
end
def page_url
if illust_id.present?
"https://seiga.nicovideo.jp/seiga/im#{illust_id}"
elsif manga_id.present?
"https://seiga.nicovideo.jp/watch/mg#{manga_id}"
elsif image_id.present?
"https://seiga.nicovideo.jp/image/source/#{image_id}"
end
end
end
end

View File

@@ -1,67 +1,19 @@
# frozen_string_literal: true
# Direct URL
# * https://lohas.nicoseiga.jp/o/971eb8af9bbcde5c2e51d5ef3a2f62d6d9ff5552/1589933964/3583893
# * http://lohas.nicoseiga.jp/priv/3521156?e=1382558156&h=f2e089256abd1d453a455ec8f317a6c703e2cedf
# * http://lohas.nicoseiga.jp/priv/b80f86c0d8591b217e7513a9e175e94e00f3c7a1/1384936074/3583893
# * https://dcdn.cdn.nimg.jp/priv/62a56a7f67d3d3746ae5712db9cac7d465f4a339/1592186183/10466669
# * https://dcdn.cdn.nimg.jp/nicoseiga/lohas/o/8ba0a9b2ea34e1ef3b5cc50785bd10cd63ec7e4a/1592187477/10466669
#
# * http://lohas.nicoseiga.jp/material/5746c5/4459092
#
# (Manga direct url)
# * https://lohas.nicoseiga.jp/priv/f5b8966fd53bf7e06cccff9fbb2c4eef62877538/1590752727/8947170
#
# Samples
# * http://lohas.nicoseiga.jp/thumb/2163478i?
# * https://lohas.nicoseiga.jp/thumb/8947170p
#
## The direct urls and samples above can belong to both illust and manga.
## There's two ways to tell them apart:
## * visit the /source/ equivalent: illusts redirect to the /o/ intermediary page, manga redirect to /priv/ directly
## * try an api call: illusts will succeed, manga will fail
#
# Source Link
# * http://seiga.nicovideo.jp/image/source?id=3312222
#
# Illust Page URL
# * https://seiga.nicovideo.jp/seiga/im3521156
# * https://seiga.nicovideo.jp/seiga/im520647 (anonymous artist)
#
# Manga Page URL
# * http://seiga.nicovideo.jp/watch/mg316708
#
# Video Page URL (not supported)
# * https://www.nicovideo.jp/watch/sm36465441
#
# Oekaki
# * https://dic.nicovideo.jp/oekaki/52833.png
# @see Source::URL::NicoSeiga
module Sources
module Strategies
class NicoSeiga < Base
DIRECT = %r{\Ahttps?://lohas\.nicoseiga\.jp/(?:priv|o)/(?:\w+/\d+/)?(?<image_id>\d+)(?:\?.+)?}i
CDN_DIRECT = %r{\Ahttps?://dcdn\.cdn\.nimg\.jp/.+/\w+/\d+/(?<image_id>\d+)}i
SOURCE = %r{\Ahttps?://seiga\.nicovideo\.jp/image/source(?:/|\?id=)(?<image_id>\d+)}i
ILLUST_THUMB = %r{\Ahttps?://lohas\.nicoseiga\.jp/thumb/(?<illust_id>\d+)i}i
MANGA_THUMB = %r{\Ahttps?://lohas\.nicoseiga\.jp/thumb/(?<image_id>\d+)p}i
ILLUST_PAGE = %r{\Ahttps?://(?:sp\.)?seiga\.nicovideo\.jp/seiga/im(?<illust_id>\d+)}i
MANGA_PAGE = %r{\Ahttps?://(?:sp\.)?seiga\.nicovideo\.jp/watch/mg(?<manga_id>\d+)}i
PROFILE_PAGE = %r{\Ahttps?://seiga\.nicovideo\.jp/user/illust/(?<artist_id>\d+)}i
def self.enabled?
Danbooru.config.nico_seiga_user_session.present?
end
def domains
["nicoseiga.jp", "nicovideo.jp"]
def match?
Source::URL::NicoSeiga === parsed_url
end
def site_name
"Nico Seiga"
parsed_url.site_name
end
def image_urls
@@ -96,20 +48,15 @@ module Sources
end
def page_url
if illust_id.present?
"https://seiga.nicovideo.jp/seiga/im#{illust_id}"
elsif manga_id.present?
"https://seiga.nicovideo.jp/watch/mg#{manga_id}"
elsif image_id.present?
"https://seiga.nicovideo.jp/image/source/#{image_id}"
end
# XXX what if referer isn't from NicoSeiga?
parsed_referer&.page_url || parsed_url.page_url
end
def profile_url
user_id = api_client&.user_id
return if user_id.blank? # artists can be anonymous
"http://seiga.nicovideo.jp/user/illust/#{api_client.user_id}"
"https://seiga.nicovideo.jp/user/illust/#{api_client.user_id}"
end
def artist_name
@@ -136,15 +83,7 @@ module Sources
end
def normalize_for_source
# There's no way to tell apart illust from manga from the direct image url alone. What's worse,
# nicoseiga itself doesn't know how to normalize back to manga, so if it's not an illust type then
# it's impossible to get the original manga page back from the image url alone.
# /source/ links on the other hand correctly redirect, hence we use them to normalize saved direct sources.
if url =~ DIRECT
"https://seiga.nicovideo.jp/image/source/#{image_id}"
else
page_url
end
page_url
end
def tag_name
@@ -165,19 +104,15 @@ module Sources
end
def image_id
image_id_from_url(url)
end
def image_id_from_url(url)
url[DIRECT, :image_id] || url[SOURCE, :image_id] || url[MANGA_THUMB, :image_id] || url[CDN_DIRECT, :image_id]
parsed_url.image_id || parsed_referer&.image_id
end
def illust_id
urls.map { |u| u[ILLUST_PAGE, :illust_id] || u[ILLUST_THUMB, :illust_id] }.compact.first
parsed_url.illust_id || parsed_referer&.illust_id
end
def manga_id
urls.compact.map { |u| u[MANGA_PAGE, :manga_id] }.compact.first
parsed_url.manga_id || parsed_referer&.manga_id
end
def api_client

View File

@@ -14,9 +14,9 @@ module Sources
end
should "get the profile" do
assert_equal("http://seiga.nicovideo.jp/user/illust/7017777", @site_1.profile_url)
assert_equal("http://seiga.nicovideo.jp/user/illust/7017777", @site_2.profile_url)
assert_equal("http://seiga.nicovideo.jp/user/illust/20797022", @site_3.profile_url)
assert_equal("https://seiga.nicovideo.jp/user/illust/7017777", @site_1.profile_url)
assert_equal("https://seiga.nicovideo.jp/user/illust/7017777", @site_2.profile_url)
assert_equal("https://seiga.nicovideo.jp/user/illust/20797022", @site_3.profile_url)
end
should "get the artist name" do