From 43a665a66d3c5acd7379a593ab51f15986fcbbaa Mon Sep 17 00:00:00 2001 From: evazion Date: Thu, 10 Mar 2022 04:53:51 -0600 Subject: [PATCH] sources: factor out Source::URL::NicoSeiga. --- app/logical/source/url.rb | 1 + app/logical/source/url/nico_seiga.rb | 108 +++++++++++++++++++ app/logical/sources/strategies/nico_seiga.rb | 87 ++------------- test/unit/sources/nico_seiga_test.rb | 6 +- 4 files changed, 123 insertions(+), 79 deletions(-) create mode 100644 app/logical/source/url/nico_seiga.rb diff --git a/app/logical/source/url.rb b/app/logical/source/url.rb index 30f2bad46..7cb054f08 100644 --- a/app/logical/source/url.rb +++ b/app/logical/source/url.rb @@ -28,6 +28,7 @@ module Source Source::URL::Lofter, Source::URL::Mastodon, Source::URL::Moebooru, + Source::URL::NicoSeiga, Source::URL::Nijie, Source::URL::Newgrounds, Source::URL::PixivSketch, diff --git a/app/logical/source/url/nico_seiga.rb b/app/logical/source/url/nico_seiga.rb new file mode 100644 index 000000000..8e7079e19 --- /dev/null +++ b/app/logical/source/url/nico_seiga.rb @@ -0,0 +1,108 @@ +# frozen_string_literal: true + +# NicoSeiga has two main page types, regular single-image illusts and mangas: +# +# * https://seiga.nicovideo.jp/seiga/im2163478 +# * https://seiga.nicovideo.jp/watch/mg122274 +# +# It's not possible to tell from the URL alone whether an image belongs to a regular illust or a manga: +# +# * https://lohas.nicoseiga.jp/priv/2e76be4c553c571b5a81e6ea1a69ab1367f02a41/1646904833/2163478 (page: https://seiga.nicovideo.jp/seiga/im2163478) +# * https://lohas.nicoseiga.jp/priv/49807693c31ed226818b9167e8e87561dd19a445/1646904643/4744553 (page: https://seiga.nicovideo.jp/watch/mg122274) +# +# You can tell them apart like this: +# +# * https://seiga.nicovideo.jp/image/source/2163478 (redirects to https://lohas.nicoseiga.jp/o/e69bba4bd6c1baaae460452bac3f29e7080ad723/1646902784/3521156) +# * https://seiga.nicovideo.jp/image/source/4744553 (redirects to https://lohas.nicoseiga.jp/priv/54142f438f4effb937e6b484395b478305ca17f2/1646905053/4744553) +# +# Unhandled URLs +# +# * https://www.nicovideo.jp/watch/sm36465441 +# * https://www.nicovideo.jp/watch/nm20676560 +# * https://lohas.nicoseiga.jp/material/5746c5/4459092 +# * https://dic.nicovideo.jp/oekaki/52833.png +# +module Source + class URL::NicoSeiga < Source::URL + attr_reader :illust_id, :manga_id, :image_id, :user_id + + def self.match?(url) + url.domain.in?(%w[nicovideo.jp nicoseiga.jp nicomanga.jp nimg.jp]) + end + + def site_name + "Nico Seiga" + end + + def parse + case [host, *path_segments] + + # https://seiga.nicovideo.jp/seiga/im520647 (anonymous artist) + # https://seiga.nicovideo.jp/seiga/im3521156 + # https://sp.seiga.nicovideo.jp/seiga/im3521156 + in /seiga\.nicovideo\.jp$/, "seiga", /^im(\d+)/ => illust_id + @illust_id = $1 + @image_id = $1 + + # https://seiga.nicovideo.jp/watch/mg316708 + # https://sp.seiga.nicovideo.jp/watch/mg316708 + in /seiga\.nicovideo\.jp$/, "watch", /^mg(\d+)/ => manga_id + @manga_id = $1 + + # https://seiga.nicovideo.jp/image/source/3521156 (single image; page: https://seiga.nicovideo.jp/seiga/im3312222) + # https://seiga.nicovideo.jp/image/source/4744553 (manga image; page: https://seiga.nicovideo.jp/watch/mg122274) + # + # https://seiga.nicovideo.jp/image/source/3521156 redirects to the html page https://lohas.nicoseiga.jp/o/e69bba4bd6c1baaae460452bac3f29e7080ad723/1646902784/3521156, which contains the image https://lohas.nicoseiga.jp/priv/e69bba4bd6c1baaae460452bac3f29e7080ad723/1646902784/3521156. + # https://seiga.nicovideo.jp/image/source/4744553 redirects to the direct image https://lohas.nicoseiga.jp/priv/54142f438f4effb937e6b484395b478305ca17f2/1646905053/4744553 + in "seiga.nicovideo.jp", "image", "source", image_id + @image_id = image_id + + # https://seiga.nicovideo.jp/image/source?id=3521156 (redirects to https://lohas.nicoseiga.jp/o/75dfbf6404732969ded3937b89bc41d77420debe/1646906075/3521156) + # https://seiga.nicovideo.jp/image/redirect?id=3583893 (redirects to https://seiga.nicovideo.jp/seiga/im3583893) + in "seiga.nicovideo.jp", "image", ("redirect" | "source") if params[:id].present? + @image_id = params[:id] + + # https://lohas.nicoseiga.jp/o/971eb8af9bbcde5c2e51d5ef3a2f62d6d9ff5552/1589933964/3583893 (page: https://seiga.nicovideo.jp/seiga/im3583893) + # https://lohas.nicoseiga.jp/priv/b80f86c0d8591b217e7513a9e175e94e00f3c7a1/1384936074/3583893 (page: https://seiga.nicovideo.jp/seiga/im3583893) + # https://lohas.nicoseiga.jp/priv/3521156?e=1382558156&h=f2e089256abd1d453a455ec8f317a6c703e2cedf (page: https://seiga.nicovideo.jp/seiga/im3521156) + in "lohas.nicoseiga.jp", ("priv" | "o"), *, /^\d+$/ => image_id + @image_id = image_id + + # https://lohas.nicoseiga.jp/thumb/2163478i (page: https://seiga.nicovideo.jp/seiga/im2163478, image: https://lohas.nicoseiga.jp/priv/2e76be4c553c571b5a81e6ea1a69ab1367f02a41/1646904833/2163478) + # https://lohas.nicoseiga.jp/thumb/1591081q (page: https://seiga.nicovideo.jp/seiga/im1591081, image: https://lohas.nicoseiga.jp/priv/b6a8fc0327624e57f43c29f6e7f18797406681f7/1646904868/1591081) + in "lohas.nicoseiga.jp", "thumb", /^(\d+)[iq]$/ => image_id + @illust_id = $1 + @image_id = $1 + + # https://lohas.nicoseiga.jp/thumb/4744553p (page: https://seiga.nicovideo.jp/watch/mg122274, image: https://lohas.nicoseiga.jp/priv/49807693c31ed226818b9167e8e87561dd19a445/1646904643/4744553) + in "lohas.nicoseiga.jp", "thumb", /^(\d+)p$/ => image_id + @illust_id = $1 + + # https://dcdn.cdn.nimg.jp/priv/62a56a7f67d3d3746ae5712db9cac7d465f4a339/1592186183/10466669 + # https://dcdn.cdn.nimg.jp/nicoseiga/lohas/o/8ba0a9b2ea34e1ef3b5cc50785bd10cd63ec7e4a/1592187477/10466669 + in "dcdn.cdn.nimg.jpg", *, /^\d+$/ => image_id + @image_id = image_id + + # https://deliver.cdn.nicomanga.jp/thumb/aHR0cHM6Ly9kZWxpdmVyLmNkbi5uaWNvbWFuZ2EuanAvdGh1bWIvODEwMDk2OHA_MTU2NTY5OTg4MA.webp (page: https://seiga.nicovideo.jp/watch/mg316708, full image: https://lohas.nicoseiga.jp/priv/1f6d38ef2ba6fc9d9e27823babc4cf721cef16ec/1646906617/8100969) + in "deliver.cdn.nicomanga.jp", *rest + # unhandled + + # https://seiga.nicovideo.jp/user/illust/456831 + in "seiga.nicovideo.jp", "user", "illust", user_id + @user_id = user_id + + else + end + end + + def page_url + if illust_id.present? + "https://seiga.nicovideo.jp/seiga/im#{illust_id}" + elsif manga_id.present? + "https://seiga.nicovideo.jp/watch/mg#{manga_id}" + elsif image_id.present? + "https://seiga.nicovideo.jp/image/source/#{image_id}" + end + end + end +end diff --git a/app/logical/sources/strategies/nico_seiga.rb b/app/logical/sources/strategies/nico_seiga.rb index 4ccae57a1..6f74ef73b 100644 --- a/app/logical/sources/strategies/nico_seiga.rb +++ b/app/logical/sources/strategies/nico_seiga.rb @@ -1,67 +1,19 @@ # frozen_string_literal: true -# Direct URL -# * https://lohas.nicoseiga.jp/o/971eb8af9bbcde5c2e51d5ef3a2f62d6d9ff5552/1589933964/3583893 -# * http://lohas.nicoseiga.jp/priv/3521156?e=1382558156&h=f2e089256abd1d453a455ec8f317a6c703e2cedf -# * http://lohas.nicoseiga.jp/priv/b80f86c0d8591b217e7513a9e175e94e00f3c7a1/1384936074/3583893 -# * https://dcdn.cdn.nimg.jp/priv/62a56a7f67d3d3746ae5712db9cac7d465f4a339/1592186183/10466669 -# * https://dcdn.cdn.nimg.jp/nicoseiga/lohas/o/8ba0a9b2ea34e1ef3b5cc50785bd10cd63ec7e4a/1592187477/10466669 -# -# * http://lohas.nicoseiga.jp/material/5746c5/4459092 -# -# (Manga direct url) -# * https://lohas.nicoseiga.jp/priv/f5b8966fd53bf7e06cccff9fbb2c4eef62877538/1590752727/8947170 -# -# Samples -# * http://lohas.nicoseiga.jp/thumb/2163478i? -# * https://lohas.nicoseiga.jp/thumb/8947170p -# -## The direct urls and samples above can belong to both illust and manga. -## There's two ways to tell them apart: -## * visit the /source/ equivalent: illusts redirect to the /o/ intermediary page, manga redirect to /priv/ directly -## * try an api call: illusts will succeed, manga will fail -# -# Source Link -# * http://seiga.nicovideo.jp/image/source?id=3312222 -# -# Illust Page URL -# * https://seiga.nicovideo.jp/seiga/im3521156 -# * https://seiga.nicovideo.jp/seiga/im520647 (anonymous artist) -# -# Manga Page URL -# * http://seiga.nicovideo.jp/watch/mg316708 -# -# Video Page URL (not supported) -# * https://www.nicovideo.jp/watch/sm36465441 -# -# Oekaki -# * https://dic.nicovideo.jp/oekaki/52833.png - +# @see Source::URL::NicoSeiga module Sources module Strategies class NicoSeiga < Base - DIRECT = %r{\Ahttps?://lohas\.nicoseiga\.jp/(?:priv|o)/(?:\w+/\d+/)?(?\d+)(?:\?.+)?}i - CDN_DIRECT = %r{\Ahttps?://dcdn\.cdn\.nimg\.jp/.+/\w+/\d+/(?\d+)}i - SOURCE = %r{\Ahttps?://seiga\.nicovideo\.jp/image/source(?:/|\?id=)(?\d+)}i - - ILLUST_THUMB = %r{\Ahttps?://lohas\.nicoseiga\.jp/thumb/(?\d+)i}i - MANGA_THUMB = %r{\Ahttps?://lohas\.nicoseiga\.jp/thumb/(?\d+)p}i - - ILLUST_PAGE = %r{\Ahttps?://(?:sp\.)?seiga\.nicovideo\.jp/seiga/im(?\d+)}i - MANGA_PAGE = %r{\Ahttps?://(?:sp\.)?seiga\.nicovideo\.jp/watch/mg(?\d+)}i - - PROFILE_PAGE = %r{\Ahttps?://seiga\.nicovideo\.jp/user/illust/(?\d+)}i - def self.enabled? Danbooru.config.nico_seiga_user_session.present? end - def domains - ["nicoseiga.jp", "nicovideo.jp"] + def match? + Source::URL::NicoSeiga === parsed_url end def site_name - "Nico Seiga" + parsed_url.site_name end def image_urls @@ -96,20 +48,15 @@ module Sources end def page_url - if illust_id.present? - "https://seiga.nicovideo.jp/seiga/im#{illust_id}" - elsif manga_id.present? - "https://seiga.nicovideo.jp/watch/mg#{manga_id}" - elsif image_id.present? - "https://seiga.nicovideo.jp/image/source/#{image_id}" - end + # XXX what if referer isn't from NicoSeiga? + parsed_referer&.page_url || parsed_url.page_url end def profile_url user_id = api_client&.user_id return if user_id.blank? # artists can be anonymous - "http://seiga.nicovideo.jp/user/illust/#{api_client.user_id}" + "https://seiga.nicovideo.jp/user/illust/#{api_client.user_id}" end def artist_name @@ -136,15 +83,7 @@ module Sources end def normalize_for_source - # There's no way to tell apart illust from manga from the direct image url alone. What's worse, - # nicoseiga itself doesn't know how to normalize back to manga, so if it's not an illust type then - # it's impossible to get the original manga page back from the image url alone. - # /source/ links on the other hand correctly redirect, hence we use them to normalize saved direct sources. - if url =~ DIRECT - "https://seiga.nicovideo.jp/image/source/#{image_id}" - else - page_url - end + page_url end def tag_name @@ -165,19 +104,15 @@ module Sources end def image_id - image_id_from_url(url) - end - - def image_id_from_url(url) - url[DIRECT, :image_id] || url[SOURCE, :image_id] || url[MANGA_THUMB, :image_id] || url[CDN_DIRECT, :image_id] + parsed_url.image_id || parsed_referer&.image_id end def illust_id - urls.map { |u| u[ILLUST_PAGE, :illust_id] || u[ILLUST_THUMB, :illust_id] }.compact.first + parsed_url.illust_id || parsed_referer&.illust_id end def manga_id - urls.compact.map { |u| u[MANGA_PAGE, :manga_id] }.compact.first + parsed_url.manga_id || parsed_referer&.manga_id end def api_client diff --git a/test/unit/sources/nico_seiga_test.rb b/test/unit/sources/nico_seiga_test.rb index 11bd81a8c..a94a029e2 100644 --- a/test/unit/sources/nico_seiga_test.rb +++ b/test/unit/sources/nico_seiga_test.rb @@ -14,9 +14,9 @@ module Sources end should "get the profile" do - assert_equal("http://seiga.nicovideo.jp/user/illust/7017777", @site_1.profile_url) - assert_equal("http://seiga.nicovideo.jp/user/illust/7017777", @site_2.profile_url) - assert_equal("http://seiga.nicovideo.jp/user/illust/20797022", @site_3.profile_url) + assert_equal("https://seiga.nicovideo.jp/user/illust/7017777", @site_1.profile_url) + assert_equal("https://seiga.nicovideo.jp/user/illust/7017777", @site_2.profile_url) + assert_equal("https://seiga.nicovideo.jp/user/illust/20797022", @site_3.profile_url) end should "get the artist name" do