From 9f0e85e1b5af8b364f3612aa63f97663050fcdcb Mon Sep 17 00:00:00 2001 From: nonamethanks Date: Tue, 2 Jun 2020 12:29:04 +0200 Subject: [PATCH 1/4] Refactor nicoseiga strategy * Get rid of mechanize, fully switch to Danbooru::Http * Switch to mobile api, improving speed * Merge main and manga clients * Add full support for manga pages * Add support for anonymous and r-15 images * Don't fail when attempting to upload oekaki direct links * Various misc fixes --- app/logical/nico_seiga_api_client.rb | 136 ++++++----- app/logical/nico_seiga_manga_api_client.rb | 60 ----- app/logical/sources/strategies/nico_seiga.rb | 239 +++++++++---------- test/unit/sources/nico_seiga_manga_test.rb | 23 -- test/unit/sources/nico_seiga_test.rb | 105 ++++++-- 5 files changed, 278 insertions(+), 285 deletions(-) delete mode 100644 app/logical/nico_seiga_manga_api_client.rb delete mode 100644 test/unit/sources/nico_seiga_manga_test.rb diff --git a/app/logical/nico_seiga_api_client.rb b/app/logical/nico_seiga_api_client.rb index cb596a34e..4290dae96 100644 --- a/app/logical/nico_seiga_api_client.rb +++ b/app/logical/nico_seiga_api_client.rb @@ -1,83 +1,101 @@ class NicoSeigaApiClient extend Memoist - BASE_URL = "http://seiga.nicovideo.jp/api" - attr_reader :illust_id + XML_API = "https://seiga.nicovideo.jp/api" - def self.agent - mech = Mechanize.new - mech.redirect_ok = false - mech.keep_alive = false + def initialize(work_id:, type:) + @work_id = work_id + @work_type = type + end - session = Cache.get("nico-seiga-session") - if session - cookie = Mechanize::Cookie.new("user_session", session) - cookie.domain = ".nicovideo.jp" - cookie.path = "/" - mech.cookie_jar.add(cookie) - else - mech.get("https://account.nicovideo.jp/login") do |page| - page.form_with(:id => "login_form") do |form| - form["mail_tel"] = Danbooru.config.nico_seiga_login - form["password"] = Danbooru.config.nico_seiga_password - end.click_button - end - session = mech.cookie_jar.cookies.select {|c| c.name == "user_session"}.first - if session - Cache.put("nico-seiga-session", session.value, 1.week) - else - raise "Session not found" + def image_ids + if @work_type == "illust" + [api_response["id"]] + elsif @work_type == "manga" + manga_api_response.map do |x| + x["meta"]["source_url"].match(%r{/thumb/(\d+)\w}i).captures[0] end end - - # This cookie needs to be set to allow viewing of adult works - cookie = Mechanize::Cookie.new("skip_fetish_warning", "1") - cookie.domain = "seiga.nicovideo.jp" - cookie.path = "/" - mech.cookie_jar.add(cookie) - - mech.redirect_ok = true - mech - end - - def initialize(illust_id:, user_id: nil) - @illust_id = illust_id - @user_id = user_id - end - - def image_id - illust_xml["response"]["image"]["id"].to_i - end - - def user_id - @user_id || illust_xml["response"]["image"]["user_id"].to_i end def title - illust_xml["response"]["image"]["title"] + api_response["title"] end - def desc - illust_xml["response"]["image"]["description"] + def description + api_response["description"] end - def moniker - artist_xml["response"]["user"]["nickname"] + def tags + api_response.dig("tag_list", "tag").to_a.map { |t| t["name"] }.compact end - def illust_xml - get("#{BASE_URL}/illust/info?id=#{illust_id}") + def user_id + api_response["user_id"] end - def artist_xml - get("#{BASE_URL}/user/info?id=#{user_id}") + def user_name + if @work_type == "illust" + api_response["nickname"] + elsif @work_type == "manga" + user_api_response(user_id)["nickname"] + end + end + + def api_response + if @work_type == "illust" + resp = get("https://sp.seiga.nicovideo.jp/ajax/seiga/im#{@work_id}") + return {} if resp.blank? || resp.code.to_i == 404 + api_response = JSON.parse(resp)["target_image"] + + elsif @work_type == "manga" + resp = Danbooru::Http.cache(1.minute).get("#{XML_API}/theme/info?id=#{@work_id}") + return {} if resp.blank? || resp.code.to_i == 404 + api_response = Hash.from_xml(resp.to_s)["response"]["theme"] + end + + api_response || {} + rescue JSON::ParserError + {} + end + + def manga_api_response + resp = get("https://ssl.seiga.nicovideo.jp/api/v1/app/manga/episodes/#{@work_id}/frames") + return {} if resp.blank? || resp.code.to_i == 404 + JSON.parse(resp)["data"]["result"] + rescue JSON::ParserError + {} + end + + def user_api_response(user_id) + resp = Danbooru::Http.cache(1.minute).get("#{XML_API}/user/info?id=#{user_id}") + return {} if resp.blank? || resp.code.to_i == 404 + Hash.from_xml(resp.to_s)["response"]["user"] end def get(url) - response = Danbooru::Http.cache(1.minute).get(url) - raise "nico seiga api call failed (code=#{response.code}, body=#{response.body})" if response.code != 200 + cookie_header = Cache.get("nicoseiga-cookie-header") || regenerate_cookie_header - Hash.from_xml(response.to_s) + resp = Danbooru::Http.headers({Cookie: cookie_header}).cache(1.minute).get(url) + + if resp.headers["Location"] =~ %r{seiga\.nicovideo\.jp/login/}i + cookie_header = regenerate_cookie_header + resp = Danbooru::Http.headers({Cookie: cookie_header}).cache(1.minute).get(url) + end + + resp end - memoize :artist_xml, :illust_xml + def regenerate_cookie_header + form = { + mail_tel: Danbooru.config.nico_seiga_login, + password: Danbooru.config.nico_seiga_password + } + resp = Danbooru::Http.post("https://account.nicovideo.jp/api/v1/login", form: form) + cookies = resp.cookies.map { |c| c.name + "=" + c.value } + cookies << "accept_fetish_warning=2" + + Cache.put("nicoseiga-cookie-header", cookies.join(";"), 1.week) + end + + memoize :api_response, :manga_api_response, :user_api_response end diff --git a/app/logical/nico_seiga_manga_api_client.rb b/app/logical/nico_seiga_manga_api_client.rb deleted file mode 100644 index 4bdb6a7f4..000000000 --- a/app/logical/nico_seiga_manga_api_client.rb +++ /dev/null @@ -1,60 +0,0 @@ -class NicoSeigaMangaApiClient - extend Memoist - BASE_URL = "https://seiga.nicovideo.jp/api" - attr_reader :theme_id - - def initialize(theme_id) - @theme_id = theme_id - end - - def user_id - theme_info_xml["response"]["theme"]["user_id"].to_i - end - - def title - theme_info_xml["response"]["theme"]["title"] - end - - def desc - theme_info_xml["response"]["theme"]["description"] - end - - def moniker - artist_xml["response"]["user"]["nickname"] - end - - def image_ids - images = theme_data_xml["response"]["image_list"]["image"] - images = [images] unless images.is_a?(Array) - images.map {|x| x["id"]} - end - - def tags - theme_info_xml["response"]["theme"]["tag_list"]["tag"].map {|x| x["name"]} - end - - def theme_data_xml - uri = "#{BASE_URL}/theme/data?theme_id=#{theme_id}" - body = NicoSeigaApiClient.agent.get(uri).body - Hash.from_xml(body) - end - - def theme_info_xml - uri = "#{BASE_URL}/theme/info?id=#{theme_id}" - body = NicoSeigaApiClient.agent.get(uri).body - Hash.from_xml(body) - end - - def artist_xml - get("#{BASE_URL}/user/info?id=#{user_id}") - end - - def get(url) - response = Danbooru::Http.cache(1.minute).get(url) - raise "nico seiga api call failed (code=#{response.code}, body=#{response.body})" if response.code != 200 - - Hash.from_xml(response.to_s) - end - - memoize :theme_data_xml, :theme_info_xml, :artist_xml -end diff --git a/app/logical/sources/strategies/nico_seiga.rb b/app/logical/sources/strategies/nico_seiga.rb index 32a3fd65f..3cefcface 100644 --- a/app/logical/sources/strategies/nico_seiga.rb +++ b/app/logical/sources/strategies/nico_seiga.rb @@ -1,25 +1,51 @@ -# Image Direct URL +# Direct URL # * https://lohas.nicoseiga.jp/o/971eb8af9bbcde5c2e51d5ef3a2f62d6d9ff5552/1589933964/3583893 # * http://lohas.nicoseiga.jp/priv/3521156?e=1382558156&h=f2e089256abd1d453a455ec8f317a6c703e2cedf # * http://lohas.nicoseiga.jp/priv/b80f86c0d8591b217e7513a9e175e94e00f3c7a1/1384936074/3583893 +# +# * http://lohas.nicoseiga.jp/material/5746c5/4459092 +# +# (Manga direct url) +# * https://lohas.nicoseiga.jp/priv/f5b8966fd53bf7e06cccff9fbb2c4eef62877538/1590752727/8947170 +# +# Samples +# * http://lohas.nicoseiga.jp/thumb/2163478i? +# * https://lohas.nicoseiga.jp/thumb/8947170p +# +## The direct urls and samples above can belong to both illust and manga. +## There's two ways to tell them apart: +## * visit the /source/ equivalent: illusts redirect to the /o/ intermediary page, manga redirect to /priv/ directly +## * try an api call: illusts will succeed, manga will fail +# +# Source Link # * http://seiga.nicovideo.jp/image/source?id=3312222 # -# Image Page URL +# Illust Page URL # * https://seiga.nicovideo.jp/seiga/im3521156 +# * https://seiga.nicovideo.jp/seiga/im520647 (anonymous artist) # # Manga Page URL # * http://seiga.nicovideo.jp/watch/mg316708 +# +# Video Page URL (not supported) +# * https://www.nicovideo.jp/watch/sm36465441 +# +# Oekaki +# * https://dic.nicovideo.jp/oekaki/52833.png module Sources module Strategies class NicoSeiga < Base - URL = %r!\Ahttps?://(?:\w+\.)?nico(?:seiga|video)\.jp! - DIRECT1 = %r!\Ahttps?://lohas\.nicoseiga\.jp/priv/[0-9a-f]+! - DIRECT2 = %r!\Ahttps?://lohas\.nicoseiga\.jp/o/[0-9a-f]+/\d+/\d+! - DIRECT3 = %r!\Ahttps?://seiga\.nicovideo\.jp/images/source/\d+! - PAGE = %r!\Ahttps?://seiga\.nicovideo\.jp/seiga/im(\d+)!i - PROFILE = %r!\Ahttps?://seiga\.nicovideo\.jp/user/illust/(\d+)!i - MANGA_PAGE = %r!\Ahttps?://seiga\.nicovideo\.jp/watch/mg(\d+)!i + DIRECT = %r{\Ahttps?://lohas\.nicoseiga\.jp/(priv|o)/(?:\w+/\d+/)?(?\d+)(?:\?.+)?}i + SOURCE = %r{\Ahttps?://seiga\.nicovideo\.jp/image/source(?:/|\?id=)(?\d+)}i + + ILLUST_THUMB = %r{\Ahttps?://lohas\.nicoseiga\.jp/thumb/(?\d+)i}i + MANGA_THUMB = %r{\Ahttps?://lohas\.nicoseiga\.jp/thumb/(?\d+)p}i + + ILLUST_PAGE = %r{\Ahttps?://(?:sp\.)?seiga\.nicovideo\.jp/seiga/im(?\d+)}i + MANGA_PAGE = %r{\Ahttps?://(?:sp\.)?seiga\.nicovideo\.jp/watch/mg(?\d+)}i + + PROFILE_PAGE = %r{\Ahttps?://seiga\.nicovideo\.jp/user/illust/(?\d+)}i def domains ["nicoseiga.jp", "nicovideo.jp"] @@ -30,160 +56,125 @@ module Sources end def image_urls - if url =~ DIRECT1 - return [url] + urls = [] + return urls if api_client&.api_response.blank? + + if image_id.present? + urls << "https://seiga.nicovideo.jp/image/source/#{image_id}" + elsif illust_id.present? + urls << "https://seiga.nicovideo.jp/image/source/#{illust_id}" + elsif manga_id.present? && api_client.image_ids.present? + urls += api_client.image_ids.map { |id| "https://seiga.nicovideo.jp/image/source/#{id}" } + end + urls + end + + def image_url + return if image_urls.blank? + return url if api_client.blank? + + img = case url + when DIRECT then "https://seiga.nicovideo.jp/image/source/#{image_id_from_url(url)}" + when SOURCE then url + else image_urls.first end - if theme_id - return api_client.image_ids.map do |image_id| - "https://seiga.nicovideo.jp/image/source/#{image_id}" - end - end - - link = page.search("a#illust_link") - - if link.any? - image_url = "http://seiga.nicovideo.jp" + link[0]["href"] - page = agent.get(image_url) # need to follow this redirect while logged in or it won't work - - if page.is_a?(Mechanize::Image) - return [page.uri.to_s] - end - - images = page.search("div.illust_view_big").select {|x| x["data-src"] =~ /\/priv\//} - - if images.any? - return ["http://lohas.nicoseiga.jp" + images[0]["data-src"]] - end - end - - raise "image url not found for (#{url}, #{referer_url})" + resp = api_client.get(img) + resp.headers["Location"]&.gsub(%r{nicoseiga.jp/o/}i, 'nicoseiga.jp/priv/') end def page_url - [url, referer_url].each do |x| - if x =~ %r!\Ahttps?://lohas\.nicoseiga\.jp/o/[a-f0-9]+/\d+/(\d+)! - return "http://seiga.nicovideo.jp/seiga/im#{$1}" - end - - if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/(\d+)\?e=\d+&h=[a-f0-9]+}i - return "http://seiga.nicovideo.jp/seiga/im#{$1}" - end - - if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/[a-f0-9]+/\d+/(\d+)}i - return "http://seiga.nicovideo.jp/seiga/im#{$1}" - end - - if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/(\d+)}i - return "http://seiga.nicovideo.jp/seiga/im#{$1}" - end - - if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp//?thumb/(\d+)i?}i - return "http://seiga.nicovideo.jp/seiga/im#{$1}" - end - - if x =~ %r{/seiga/im\d+} - return x - end - - if x =~ %r{/watch/mg\d+} - return x - end - - if x =~ %r{/image/source\?id=(\d+)} - return "http://seiga.nicovideo.jp/seiga/im#{$1}" - end + if illust_id.present? + "https://seiga.nicovideo.jp/seiga/im#{illust_id}" + elsif manga_id.present? + "https://seiga.nicovideo.jp/watch/mg#{manga_id}" + elsif image_id.present? + "https://seiga.nicovideo.jp/image/source/#{image_id}" end - - return super - end - - def canonical_url - image_url end def profile_url - if url =~ PROFILE - return url - end + user_id = api_client&.user_id + return if user_id.blank? # artists can be anonymous "http://seiga.nicovideo.jp/user/illust/#{api_client.user_id}" end def artist_name - api_client.moniker + return if api_client.blank? + api_client.user_name end def artist_commentary_title + return if api_client.blank? api_client.title end def artist_commentary_desc - api_client.desc + return if api_client.blank? + api_client.description + end + + def dtext_artist_commentary_desc + DText.from_html(artist_commentary_desc).gsub(/[^\w]im(\d+)/, ' seiga #\1 ') end def normalize_for_source - if illust_id.present? - "https://seiga.nicovideo.jp/seiga/im#{illust_id}" - elsif theme_id.present? - "http://seiga.nicovideo.jp/watch/mg#{theme_id}" + # There's no way to tell apart illust from manga from the direct image url alone. What's worse, + # nicoseiga itself doesn't know how to normalize back to manga, so if it's not an illust type then + # it's impossible to get the original manga page back from the image url alone. + # /source/ links on the other hand correctly redirect, hence we use them to normalize saved direct sources. + if url =~ DIRECT + "https://seiga.nicovideo.jp/image/source/#{image_id}" + else + page_url end end def tag_name + return if api_client&.user_id.blank? "nicoseiga#{api_client.user_id}" end def tags - string = page.at("meta[name=keywords]").try(:[], "content") || "" - string.split(/,/).map do |name| - [name, "https://seiga.nicovideo.jp/tag/#{CGI.escape(name)}"] + return [] if api_client.blank? + + base_url = "https://seiga.nicovideo.jp/" + base_url += "manga/" if manga_id.present? + base_url += "tag/" + + api_client.tags.map do |name| + [name, base_url + CGI.escape(name)] end end - memoize :tags + + def image_id + image_id_from_url(url) + end + + def image_id_from_url(url) + url[DIRECT, :image_id] || url[SOURCE, :image_id] || url[MANGA_THUMB, :image_id] + end + + def illust_id + urls.map { |u| u[ILLUST_PAGE, :illust_id] || u[ILLUST_THUMB, :illust_id] }.compact.first + end + + def manga_id + urls.compact.map { |u| u[MANGA_PAGE, :manga_id] }.compact.first + end def api_client - if illust_id - NicoSeigaApiClient.new(illust_id: illust_id) - elsif theme_id - NicoSeigaMangaApiClient.new(theme_id) + if illust_id.present? + NicoSeigaApiClient.new(work_id: illust_id, type: "illust") + elsif manga_id.present? + NicoSeigaApiClient.new(work_id: manga_id, type: "manga") + elsif image_id.present? + # We default to illust to attempt getting the api anyway + NicoSeigaApiClient.new(work_id: image_id, type: "illust") end end memoize :api_client - - def illust_id - if page_url =~ PAGE - return $1.to_i - end - - return nil - end - - def theme_id - if page_url =~ MANGA_PAGE - return $1.to_i - end - - return nil - end - - def page - doc = agent.get(page_url) - - if doc.search("a#link_btn_login").any? - # Session cache is invalid, clear it and log in normally. - Cache.delete("nico-seiga-session") - doc = agent.get(page_url) - end - - doc - end - memoize :page - - def agent - NicoSeigaApiClient.agent - end - memoize :agent end end end diff --git a/test/unit/sources/nico_seiga_manga_test.rb b/test/unit/sources/nico_seiga_manga_test.rb deleted file mode 100644 index fe5bdc3b8..000000000 --- a/test/unit/sources/nico_seiga_manga_test.rb +++ /dev/null @@ -1,23 +0,0 @@ -require 'test_helper' - -module Sources - class NicoSeigaTest < ActiveSupport::TestCase - context "The source site for nico seiga" do - setup do - @site = Sources::Strategies.find("http://seiga.nicovideo.jp/watch/mg316708", "http://seiga.nicovideo.jp/watch/mg316708") - end - - should "find the image urls" do - assert_equal(["https://seiga.nicovideo.jp/image/source/8100968", "https://seiga.nicovideo.jp/image/source/8100969", "https://seiga.nicovideo.jp/image/source/8100970", "https://seiga.nicovideo.jp/image/source/8100971", "https://seiga.nicovideo.jp/image/source/8100972", "https://seiga.nicovideo.jp/image/source/8100973", "https://seiga.nicovideo.jp/image/source/8100974", "https://seiga.nicovideo.jp/image/source/8100975"], @site.image_urls) - end - - should "find the page url" do - assert_equal("http://seiga.nicovideo.jp/watch/mg316708", @site.page_url) - end - - should "find the artist name" do - assert_not_nil(@site.artist_name) - end - end - end -end diff --git a/test/unit/sources/nico_seiga_test.rb b/test/unit/sources/nico_seiga_test.rb index 328706281..9ab04edb4 100644 --- a/test/unit/sources/nico_seiga_test.rb +++ b/test/unit/sources/nico_seiga_test.rb @@ -6,32 +6,52 @@ module Sources setup do @site_1 = Sources::Strategies.find("http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663") @site_2 = Sources::Strategies.find("http://seiga.nicovideo.jp/seiga/im4937663") - @site_3 = Sources::Strategies.find("http://seiga.nicovideo.jp/watch/mg376206") + @site_3 = Sources::Strategies.find("https://seiga.nicovideo.jp/watch/mg470189?track=ct_episode") end should "get the profile" do assert_equal("http://seiga.nicovideo.jp/user/illust/7017777", @site_1.profile_url) assert_equal("http://seiga.nicovideo.jp/user/illust/7017777", @site_2.profile_url) + assert_equal("http://seiga.nicovideo.jp/user/illust/20797022", @site_3.profile_url) end should "get the artist name" do assert_equal("osamari", @site_1.artist_name) assert_equal("osamari", @site_2.artist_name) + assert_equal("風呂", @site_3.artist_name) end should "get the artist commentary" do assert_equal("コジコジ", @site_2.artist_commentary_title) assert_equal("コジコジのドット絵\nこんなかわいらしい容姿で毒を吐くコジコジが堪らん(切実)", @site_2.artist_commentary_desc) + + assert_equal("ハコ女子 1ハコ目", @site_3.artist_commentary_title) + assert_equal("同じクラスの箱田さんはいつもハコを被っている。しかしてその素顔は…? twitter(@hakojoshi1)にてだいたい毎日更新中。こっちだともうちょっと先まで読めるよ。", @site_3.artist_commentary_desc) end - should "get the image url" do - assert_match(/^http:\/\/lohas\.nicoseiga\.jp\/priv\//, @site_1.image_url) - assert_match(/^http:\/\/lohas\.nicoseiga\.jp\/priv\//, @site_2.image_url) + should "get the image url(s)" do + assert_match(%r{^https?://lohas\.nicoseiga\.jp/priv/}, @site_1.image_url) + assert_match(%r{^https?://lohas\.nicoseiga\.jp/priv/}, @site_2.image_url) + + expected = %w[ + https://seiga.nicovideo.jp/image/source/10315315 + https://seiga.nicovideo.jp/image/source/10315318 + https://seiga.nicovideo.jp/image/source/10315319 + https://seiga.nicovideo.jp/image/source/10315320 + https://seiga.nicovideo.jp/image/source/10315321 + https://seiga.nicovideo.jp/image/source/10315322 + https://seiga.nicovideo.jp/image/source/10315323 + https://seiga.nicovideo.jp/image/source/10315324 + https://seiga.nicovideo.jp/image/source/10315316 + ] + assert_equal(expected.sort, @site_3.image_urls.sort) + assert_match(%r{^https?://lohas\.nicoseiga\.jp/priv/}, @site_3.image_url) end should "get the canonical url" do - assert_match(%r!\Ahttps?://lohas\.nicoseiga\.jp/priv/\h{40}/\d+/4937663!, @site_1.canonical_url) - assert_match(%r!\Ahttps?://lohas\.nicoseiga\.jp/priv/\h{40}/\d+/4937663!, @site_2.canonical_url) + assert_equal("https://seiga.nicovideo.jp/image/source/4937663", @site_1.canonical_url) + assert_equal("https://seiga.nicovideo.jp/seiga/im4937663", @site_2.canonical_url) + assert_equal("https://seiga.nicovideo.jp/watch/mg470189", @site_3.canonical_url) end should "get the tags" do @@ -42,23 +62,70 @@ module Sources assert_not(@site_2.tags.empty?) first_tag = @site_2.tags.first assert_equal(["アニメ", "https://seiga.nicovideo.jp/tag/%E3%82%A2%E3%83%8B%E3%83%A1"], first_tag) + + assert_not(@site_3.tags.empty?) + first_tag = @site_3.tags.first + assert_equal(["4コマ漫画", "https://seiga.nicovideo.jp/manga/tag/4%E3%82%B3%E3%83%9E%E6%BC%AB%E7%94%BB"], first_tag) end should "convert a page into a json representation" do - assert_nothing_raised do - @site_1.to_h - end - assert_nothing_raised do - @site_2.to_h - end + assert_nothing_raised { @site_1.to_h } + assert_nothing_raised { @site_2.to_h } + assert_nothing_raised { @site_3.to_h } end should "work for a https://lohas.nicoseiga.jp/thumb/${id}i url" do site = Sources::Strategies.find("https://lohas.nicoseiga.jp/thumb/6844226i") - full_image_url = %r!https?://lohas.nicoseiga.jp/priv/[a-f0-9]{40}/[0-9]+/6844226! - assert_match(full_image_url, site.image_url) - assert_match(full_image_url, site.canonical_url) + assert_match(%r!https?://lohas.nicoseiga.jp/priv/[a-f0-9]{40}/[0-9]+/6844226!, site.image_url) + assert_match("https://seiga.nicovideo.jp/seiga/im6844226", site.canonical_url) + end + end + + context "A manga upload through bookmarklet" do + setup do + @url = "https://seiga.nicovideo.jp/image/source/9146749" + @ref = "https://seiga.nicovideo.jp/watch/mg389884" + @site = Sources::Strategies.find(@url, @ref) + end + + should "get the correct pic" do + assert_match(%r!https?://lohas.nicoseiga.jp/priv/[a-f0-9]{40}/[0-9]+/9146749!, @site.image_url) + end + + should "set the correct source" do + assert_equal(@ref, @site.canonical_url) + end + end + + context "A nicoseiga video" do + should "not raise anything" do + site = Sources::Strategies.find("https://www.nicovideo.jp/watch/sm36465441") + assert_nothing_raised { site.to_h } + end + end + + context "An anonymous picture" do + should "still work" do + site = Sources::Strategies.find("https://seiga.nicovideo.jp/seiga/im520647") + + assert_nothing_raised { site.to_h } + end + end + + context "An age-restricted picture" do + should "still work" do + site = Sources::Strategies.find("http://seiga.nicovideo.jp/seiga/im9208126") + + assert_match(%r!https?://lohas.nicoseiga.jp/priv/[a-f0-9]{40}/[0-9]+/9208126!, site.image_url) + assert_nothing_raised { site.to_h } + end + end + + context "An oekaki picture" do + should "still work" do + site = Sources::Strategies.find("https://dic.nicovideo.jp/oekaki/52833.png") + assert_nothing_raised { site.to_h } end end @@ -69,10 +136,10 @@ module Sources source3 = "http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663" source4 = "http://seiga.nicovideo.jp/image/source?id=3312222" - assert_equal("https://seiga.nicovideo.jp/seiga/im3521156", Sources::Strategies.normalize_source(source1)) - assert_equal("https://seiga.nicovideo.jp/seiga/im3583893", Sources::Strategies.normalize_source(source2)) - assert_equal("https://seiga.nicovideo.jp/seiga/im4937663", Sources::Strategies.normalize_source(source3)) - assert_equal("https://seiga.nicovideo.jp/seiga/im3312222", Sources::Strategies.normalize_source(source4)) + assert_equal("https://seiga.nicovideo.jp/image/source/3521156", Sources::Strategies.normalize_source(source1)) + assert_equal("https://seiga.nicovideo.jp/image/source/3583893", Sources::Strategies.normalize_source(source2)) + assert_equal("https://seiga.nicovideo.jp/image/source/4937663", Sources::Strategies.normalize_source(source3)) + assert_equal("https://seiga.nicovideo.jp/image/source/3312222", Sources::Strategies.normalize_source(source4)) end should "avoid normalizing unnormalizable urls" do From 260bc997f6cdc7eb2a2861c251c11e46ee59140d Mon Sep 17 00:00:00 2001 From: nonamethanks Date: Tue, 2 Jun 2020 13:14:09 +0200 Subject: [PATCH 2/4] NicoSeiga: Add preview urls --- app/logical/sources/strategies/nico_seiga.rb | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/app/logical/sources/strategies/nico_seiga.rb b/app/logical/sources/strategies/nico_seiga.rb index 3cefcface..385dd37f2 100644 --- a/app/logical/sources/strategies/nico_seiga.rb +++ b/app/logical/sources/strategies/nico_seiga.rb @@ -83,6 +83,19 @@ module Sources resp.headers["Location"]&.gsub(%r{nicoseiga.jp/o/}i, 'nicoseiga.jp/priv/') end + def preview_urls + if manga_id.present? + image_urls.map do |img| + id = image_id_from_url(img) + "https://lohas.nicoseiga.jp/thumb/#{id}p" + end + elsif illust_id.present? + ["https://lohas.nicoseiga.jp/thumb/#{illust_id}i"] + else + [] + end + end + def page_url if illust_id.present? "https://seiga.nicovideo.jp/seiga/im#{illust_id}" From 6fc4d3ec44273d264904693d7dba91f17e6285a2 Mon Sep 17 00:00:00 2001 From: nonamethanks Date: Wed, 3 Jun 2020 10:39:31 +0200 Subject: [PATCH 3/4] Nicoseiga: Add support for drm-served manga --- app/logical/nico_seiga_api_client.rb | 5 ++++- app/logical/sources/strategies/nico_seiga.rb | 9 ++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/app/logical/nico_seiga_api_client.rb b/app/logical/nico_seiga_api_client.rb index 4290dae96..d75aaade3 100644 --- a/app/logical/nico_seiga_api_client.rb +++ b/app/logical/nico_seiga_api_client.rb @@ -12,7 +12,10 @@ class NicoSeigaApiClient [api_response["id"]] elsif @work_type == "manga" manga_api_response.map do |x| - x["meta"]["source_url"].match(%r{/thumb/(\d+)\w}i).captures[0] + case x["meta"]["source_url"] + when %r{/thumb/(\d+)\w}i then Regexp.last_match(1) + when %r{nicoseiga\.cdn\.nimg\.jp/drm/image/\w+/(\d+)\w}i then Regexp.last_match(1) + end end end end diff --git a/app/logical/sources/strategies/nico_seiga.rb b/app/logical/sources/strategies/nico_seiga.rb index 385dd37f2..714763d3e 100644 --- a/app/logical/sources/strategies/nico_seiga.rb +++ b/app/logical/sources/strategies/nico_seiga.rb @@ -84,15 +84,10 @@ module Sources end def preview_urls - if manga_id.present? - image_urls.map do |img| - id = image_id_from_url(img) - "https://lohas.nicoseiga.jp/thumb/#{id}p" - end - elsif illust_id.present? + if illust_id.present? ["https://lohas.nicoseiga.jp/thumb/#{illust_id}i"] else - [] + image_urls end end From 5b186f30720d5de7bccb7bd2112e6ad722da608d Mon Sep 17 00:00:00 2001 From: nonamethanks Date: Mon, 15 Jun 2020 04:01:34 +0200 Subject: [PATCH 4/4] Support for new nicoseiga cdn domain --- app/logical/sources/strategies/nico_seiga.rb | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/app/logical/sources/strategies/nico_seiga.rb b/app/logical/sources/strategies/nico_seiga.rb index 714763d3e..e63094bb9 100644 --- a/app/logical/sources/strategies/nico_seiga.rb +++ b/app/logical/sources/strategies/nico_seiga.rb @@ -2,6 +2,8 @@ # * https://lohas.nicoseiga.jp/o/971eb8af9bbcde5c2e51d5ef3a2f62d6d9ff5552/1589933964/3583893 # * http://lohas.nicoseiga.jp/priv/3521156?e=1382558156&h=f2e089256abd1d453a455ec8f317a6c703e2cedf # * http://lohas.nicoseiga.jp/priv/b80f86c0d8591b217e7513a9e175e94e00f3c7a1/1384936074/3583893 +# * https://dcdn.cdn.nimg.jp/priv/62a56a7f67d3d3746ae5712db9cac7d465f4a339/1592186183/10466669 +# * https://dcdn.cdn.nimg.jp/nicoseiga/lohas/o/8ba0a9b2ea34e1ef3b5cc50785bd10cd63ec7e4a/1592187477/10466669 # # * http://lohas.nicoseiga.jp/material/5746c5/4459092 # @@ -37,6 +39,7 @@ module Sources module Strategies class NicoSeiga < Base DIRECT = %r{\Ahttps?://lohas\.nicoseiga\.jp/(priv|o)/(?:\w+/\d+/)?(?\d+)(?:\?.+)?}i + CDN_DIRECT = %r{\Ahttps?://dcdn\.cdn\.nimg\.jp/.+/\w+/\d+/(?\d+)}i SOURCE = %r{\Ahttps?://seiga\.nicovideo\.jp/image/source(?:/|\?id=)(?\d+)}i ILLUST_THUMB = %r{\Ahttps?://lohas\.nicoseiga\.jp/thumb/(?\d+)i}i @@ -74,13 +77,17 @@ module Sources return url if api_client.blank? img = case url - when DIRECT then "https://seiga.nicovideo.jp/image/source/#{image_id_from_url(url)}" + when DIRECT || CDN_DIRECT then "https://seiga.nicovideo.jp/image/source/#{image_id_from_url(url)}" when SOURCE then url else image_urls.first end resp = api_client.get(img) - resp.headers["Location"]&.gsub(%r{nicoseiga.jp/o/}i, 'nicoseiga.jp/priv/') + if resp.headers["Location"] =~ %r{https?://.+/(\w+/\d+/\d+)\z}i + "https://lohas.nicoseiga.jp/priv/#{$1}" + else + img + end end def preview_urls @@ -161,7 +168,7 @@ module Sources end def image_id_from_url(url) - url[DIRECT, :image_id] || url[SOURCE, :image_id] || url[MANGA_THUMB, :image_id] + url[DIRECT, :image_id] || url[SOURCE, :image_id] || url[MANGA_THUMB, :image_id] || url[CDN_DIRECT, :image_id] end def illust_id