Refactor nicoseiga strategy

* Get rid of mechanize, fully switch to Danbooru::Http
* Switch to mobile api, improving speed
* Merge main and manga clients
* Add full support for manga pages
* Add support for anonymous and r-15 images
* Don't fail when attempting to upload oekaki direct links
* Various misc fixes
This commit is contained in:
nonamethanks
2020-06-02 12:29:04 +02:00
parent 025e09ff7f
commit 9f0e85e1b5
5 changed files with 278 additions and 285 deletions

View File

@@ -1,25 +1,51 @@
# Image Direct URL
# Direct URL
# * https://lohas.nicoseiga.jp/o/971eb8af9bbcde5c2e51d5ef3a2f62d6d9ff5552/1589933964/3583893
# * http://lohas.nicoseiga.jp/priv/3521156?e=1382558156&h=f2e089256abd1d453a455ec8f317a6c703e2cedf
# * http://lohas.nicoseiga.jp/priv/b80f86c0d8591b217e7513a9e175e94e00f3c7a1/1384936074/3583893
#
# * http://lohas.nicoseiga.jp/material/5746c5/4459092
#
# (Manga direct url)
# * https://lohas.nicoseiga.jp/priv/f5b8966fd53bf7e06cccff9fbb2c4eef62877538/1590752727/8947170
#
# Samples
# * http://lohas.nicoseiga.jp/thumb/2163478i?
# * https://lohas.nicoseiga.jp/thumb/8947170p
#
## The direct urls and samples above can belong to both illust and manga.
## There's two ways to tell them apart:
## * visit the /source/ equivalent: illusts redirect to the /o/ intermediary page, manga redirect to /priv/ directly
## * try an api call: illusts will succeed, manga will fail
#
# Source Link
# * http://seiga.nicovideo.jp/image/source?id=3312222
#
# Image Page URL
# Illust Page URL
# * https://seiga.nicovideo.jp/seiga/im3521156
# * https://seiga.nicovideo.jp/seiga/im520647 (anonymous artist)
#
# Manga Page URL
# * http://seiga.nicovideo.jp/watch/mg316708
#
# Video Page URL (not supported)
# * https://www.nicovideo.jp/watch/sm36465441
#
# Oekaki
# * https://dic.nicovideo.jp/oekaki/52833.png
module Sources
module Strategies
class NicoSeiga < Base
URL = %r!\Ahttps?://(?:\w+\.)?nico(?:seiga|video)\.jp!
DIRECT1 = %r!\Ahttps?://lohas\.nicoseiga\.jp/priv/[0-9a-f]+!
DIRECT2 = %r!\Ahttps?://lohas\.nicoseiga\.jp/o/[0-9a-f]+/\d+/\d+!
DIRECT3 = %r!\Ahttps?://seiga\.nicovideo\.jp/images/source/\d+!
PAGE = %r!\Ahttps?://seiga\.nicovideo\.jp/seiga/im(\d+)!i
PROFILE = %r!\Ahttps?://seiga\.nicovideo\.jp/user/illust/(\d+)!i
MANGA_PAGE = %r!\Ahttps?://seiga\.nicovideo\.jp/watch/mg(\d+)!i
DIRECT = %r{\Ahttps?://lohas\.nicoseiga\.jp/(priv|o)/(?:\w+/\d+/)?(?<image_id>\d+)(?:\?.+)?}i
SOURCE = %r{\Ahttps?://seiga\.nicovideo\.jp/image/source(?:/|\?id=)(?<image_id>\d+)}i
ILLUST_THUMB = %r{\Ahttps?://lohas\.nicoseiga\.jp/thumb/(?<illust_id>\d+)i}i
MANGA_THUMB = %r{\Ahttps?://lohas\.nicoseiga\.jp/thumb/(?<image_id>\d+)p}i
ILLUST_PAGE = %r{\Ahttps?://(?:sp\.)?seiga\.nicovideo\.jp/seiga/im(?<illust_id>\d+)}i
MANGA_PAGE = %r{\Ahttps?://(?:sp\.)?seiga\.nicovideo\.jp/watch/mg(?<manga_id>\d+)}i
PROFILE_PAGE = %r{\Ahttps?://seiga\.nicovideo\.jp/user/illust/(?<artist_id>\d+)}i
def domains
["nicoseiga.jp", "nicovideo.jp"]
@@ -30,160 +56,125 @@ module Sources
end
def image_urls
if url =~ DIRECT1
return [url]
urls = []
return urls if api_client&.api_response.blank?
if image_id.present?
urls << "https://seiga.nicovideo.jp/image/source/#{image_id}"
elsif illust_id.present?
urls << "https://seiga.nicovideo.jp/image/source/#{illust_id}"
elsif manga_id.present? && api_client.image_ids.present?
urls += api_client.image_ids.map { |id| "https://seiga.nicovideo.jp/image/source/#{id}" }
end
urls
end
def image_url
return if image_urls.blank?
return url if api_client.blank?
img = case url
when DIRECT then "https://seiga.nicovideo.jp/image/source/#{image_id_from_url(url)}"
when SOURCE then url
else image_urls.first
end
if theme_id
return api_client.image_ids.map do |image_id|
"https://seiga.nicovideo.jp/image/source/#{image_id}"
end
end
link = page.search("a#illust_link")
if link.any?
image_url = "http://seiga.nicovideo.jp" + link[0]["href"]
page = agent.get(image_url) # need to follow this redirect while logged in or it won't work
if page.is_a?(Mechanize::Image)
return [page.uri.to_s]
end
images = page.search("div.illust_view_big").select {|x| x["data-src"] =~ /\/priv\//}
if images.any?
return ["http://lohas.nicoseiga.jp" + images[0]["data-src"]]
end
end
raise "image url not found for (#{url}, #{referer_url})"
resp = api_client.get(img)
resp.headers["Location"]&.gsub(%r{nicoseiga.jp/o/}i, 'nicoseiga.jp/priv/')
end
def page_url
[url, referer_url].each do |x|
if x =~ %r!\Ahttps?://lohas\.nicoseiga\.jp/o/[a-f0-9]+/\d+/(\d+)!
return "http://seiga.nicovideo.jp/seiga/im#{$1}"
end
if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/(\d+)\?e=\d+&h=[a-f0-9]+}i
return "http://seiga.nicovideo.jp/seiga/im#{$1}"
end
if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/[a-f0-9]+/\d+/(\d+)}i
return "http://seiga.nicovideo.jp/seiga/im#{$1}"
end
if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/(\d+)}i
return "http://seiga.nicovideo.jp/seiga/im#{$1}"
end
if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp//?thumb/(\d+)i?}i
return "http://seiga.nicovideo.jp/seiga/im#{$1}"
end
if x =~ %r{/seiga/im\d+}
return x
end
if x =~ %r{/watch/mg\d+}
return x
end
if x =~ %r{/image/source\?id=(\d+)}
return "http://seiga.nicovideo.jp/seiga/im#{$1}"
end
if illust_id.present?
"https://seiga.nicovideo.jp/seiga/im#{illust_id}"
elsif manga_id.present?
"https://seiga.nicovideo.jp/watch/mg#{manga_id}"
elsif image_id.present?
"https://seiga.nicovideo.jp/image/source/#{image_id}"
end
return super
end
def canonical_url
image_url
end
def profile_url
if url =~ PROFILE
return url
end
user_id = api_client&.user_id
return if user_id.blank? # artists can be anonymous
"http://seiga.nicovideo.jp/user/illust/#{api_client.user_id}"
end
def artist_name
api_client.moniker
return if api_client.blank?
api_client.user_name
end
def artist_commentary_title
return if api_client.blank?
api_client.title
end
def artist_commentary_desc
api_client.desc
return if api_client.blank?
api_client.description
end
def dtext_artist_commentary_desc
DText.from_html(artist_commentary_desc).gsub(/[^\w]im(\d+)/, ' seiga #\1 ')
end
def normalize_for_source
if illust_id.present?
"https://seiga.nicovideo.jp/seiga/im#{illust_id}"
elsif theme_id.present?
"http://seiga.nicovideo.jp/watch/mg#{theme_id}"
# There's no way to tell apart illust from manga from the direct image url alone. What's worse,
# nicoseiga itself doesn't know how to normalize back to manga, so if it's not an illust type then
# it's impossible to get the original manga page back from the image url alone.
# /source/ links on the other hand correctly redirect, hence we use them to normalize saved direct sources.
if url =~ DIRECT
"https://seiga.nicovideo.jp/image/source/#{image_id}"
else
page_url
end
end
def tag_name
return if api_client&.user_id.blank?
"nicoseiga#{api_client.user_id}"
end
def tags
string = page.at("meta[name=keywords]").try(:[], "content") || ""
string.split(/,/).map do |name|
[name, "https://seiga.nicovideo.jp/tag/#{CGI.escape(name)}"]
return [] if api_client.blank?
base_url = "https://seiga.nicovideo.jp/"
base_url += "manga/" if manga_id.present?
base_url += "tag/"
api_client.tags.map do |name|
[name, base_url + CGI.escape(name)]
end
end
memoize :tags
def image_id
image_id_from_url(url)
end
def image_id_from_url(url)
url[DIRECT, :image_id] || url[SOURCE, :image_id] || url[MANGA_THUMB, :image_id]
end
def illust_id
urls.map { |u| u[ILLUST_PAGE, :illust_id] || u[ILLUST_THUMB, :illust_id] }.compact.first
end
def manga_id
urls.compact.map { |u| u[MANGA_PAGE, :manga_id] }.compact.first
end
def api_client
if illust_id
NicoSeigaApiClient.new(illust_id: illust_id)
elsif theme_id
NicoSeigaMangaApiClient.new(theme_id)
if illust_id.present?
NicoSeigaApiClient.new(work_id: illust_id, type: "illust")
elsif manga_id.present?
NicoSeigaApiClient.new(work_id: manga_id, type: "manga")
elsif image_id.present?
# We default to illust to attempt getting the api anyway
NicoSeigaApiClient.new(work_id: image_id, type: "illust")
end
end
memoize :api_client
def illust_id
if page_url =~ PAGE
return $1.to_i
end
return nil
end
def theme_id
if page_url =~ MANGA_PAGE
return $1.to_i
end
return nil
end
def page
doc = agent.get(page_url)
if doc.search("a#link_btn_login").any?
# Session cache is invalid, clear it and log in normally.
Cache.delete("nico-seiga-session")
doc = agent.get(page_url)
end
doc
end
memoize :page
def agent
NicoSeigaApiClient.agent
end
memoize :agent
end
end
end