Refactor nicoseiga strategy
* Get rid of mechanize, fully switch to Danbooru::Http * Switch to mobile api, improving speed * Merge main and manga clients * Add full support for manga pages * Add support for anonymous and r-15 images * Don't fail when attempting to upload oekaki direct links * Various misc fixes
This commit is contained in:
@@ -1,25 +1,51 @@
|
||||
# Image Direct URL
|
||||
# Direct URL
|
||||
# * https://lohas.nicoseiga.jp/o/971eb8af9bbcde5c2e51d5ef3a2f62d6d9ff5552/1589933964/3583893
|
||||
# * http://lohas.nicoseiga.jp/priv/3521156?e=1382558156&h=f2e089256abd1d453a455ec8f317a6c703e2cedf
|
||||
# * http://lohas.nicoseiga.jp/priv/b80f86c0d8591b217e7513a9e175e94e00f3c7a1/1384936074/3583893
|
||||
#
|
||||
# * http://lohas.nicoseiga.jp/material/5746c5/4459092
|
||||
#
|
||||
# (Manga direct url)
|
||||
# * https://lohas.nicoseiga.jp/priv/f5b8966fd53bf7e06cccff9fbb2c4eef62877538/1590752727/8947170
|
||||
#
|
||||
# Samples
|
||||
# * http://lohas.nicoseiga.jp/thumb/2163478i?
|
||||
# * https://lohas.nicoseiga.jp/thumb/8947170p
|
||||
#
|
||||
## The direct urls and samples above can belong to both illust and manga.
|
||||
## There's two ways to tell them apart:
|
||||
## * visit the /source/ equivalent: illusts redirect to the /o/ intermediary page, manga redirect to /priv/ directly
|
||||
## * try an api call: illusts will succeed, manga will fail
|
||||
#
|
||||
# Source Link
|
||||
# * http://seiga.nicovideo.jp/image/source?id=3312222
|
||||
#
|
||||
# Image Page URL
|
||||
# Illust Page URL
|
||||
# * https://seiga.nicovideo.jp/seiga/im3521156
|
||||
# * https://seiga.nicovideo.jp/seiga/im520647 (anonymous artist)
|
||||
#
|
||||
# Manga Page URL
|
||||
# * http://seiga.nicovideo.jp/watch/mg316708
|
||||
#
|
||||
# Video Page URL (not supported)
|
||||
# * https://www.nicovideo.jp/watch/sm36465441
|
||||
#
|
||||
# Oekaki
|
||||
# * https://dic.nicovideo.jp/oekaki/52833.png
|
||||
|
||||
module Sources
|
||||
module Strategies
|
||||
class NicoSeiga < Base
|
||||
URL = %r!\Ahttps?://(?:\w+\.)?nico(?:seiga|video)\.jp!
|
||||
DIRECT1 = %r!\Ahttps?://lohas\.nicoseiga\.jp/priv/[0-9a-f]+!
|
||||
DIRECT2 = %r!\Ahttps?://lohas\.nicoseiga\.jp/o/[0-9a-f]+/\d+/\d+!
|
||||
DIRECT3 = %r!\Ahttps?://seiga\.nicovideo\.jp/images/source/\d+!
|
||||
PAGE = %r!\Ahttps?://seiga\.nicovideo\.jp/seiga/im(\d+)!i
|
||||
PROFILE = %r!\Ahttps?://seiga\.nicovideo\.jp/user/illust/(\d+)!i
|
||||
MANGA_PAGE = %r!\Ahttps?://seiga\.nicovideo\.jp/watch/mg(\d+)!i
|
||||
DIRECT = %r{\Ahttps?://lohas\.nicoseiga\.jp/(priv|o)/(?:\w+/\d+/)?(?<image_id>\d+)(?:\?.+)?}i
|
||||
SOURCE = %r{\Ahttps?://seiga\.nicovideo\.jp/image/source(?:/|\?id=)(?<image_id>\d+)}i
|
||||
|
||||
ILLUST_THUMB = %r{\Ahttps?://lohas\.nicoseiga\.jp/thumb/(?<illust_id>\d+)i}i
|
||||
MANGA_THUMB = %r{\Ahttps?://lohas\.nicoseiga\.jp/thumb/(?<image_id>\d+)p}i
|
||||
|
||||
ILLUST_PAGE = %r{\Ahttps?://(?:sp\.)?seiga\.nicovideo\.jp/seiga/im(?<illust_id>\d+)}i
|
||||
MANGA_PAGE = %r{\Ahttps?://(?:sp\.)?seiga\.nicovideo\.jp/watch/mg(?<manga_id>\d+)}i
|
||||
|
||||
PROFILE_PAGE = %r{\Ahttps?://seiga\.nicovideo\.jp/user/illust/(?<artist_id>\d+)}i
|
||||
|
||||
def domains
|
||||
["nicoseiga.jp", "nicovideo.jp"]
|
||||
@@ -30,160 +56,125 @@ module Sources
|
||||
end
|
||||
|
||||
def image_urls
|
||||
if url =~ DIRECT1
|
||||
return [url]
|
||||
urls = []
|
||||
return urls if api_client&.api_response.blank?
|
||||
|
||||
if image_id.present?
|
||||
urls << "https://seiga.nicovideo.jp/image/source/#{image_id}"
|
||||
elsif illust_id.present?
|
||||
urls << "https://seiga.nicovideo.jp/image/source/#{illust_id}"
|
||||
elsif manga_id.present? && api_client.image_ids.present?
|
||||
urls += api_client.image_ids.map { |id| "https://seiga.nicovideo.jp/image/source/#{id}" }
|
||||
end
|
||||
urls
|
||||
end
|
||||
|
||||
def image_url
|
||||
return if image_urls.blank?
|
||||
return url if api_client.blank?
|
||||
|
||||
img = case url
|
||||
when DIRECT then "https://seiga.nicovideo.jp/image/source/#{image_id_from_url(url)}"
|
||||
when SOURCE then url
|
||||
else image_urls.first
|
||||
end
|
||||
|
||||
if theme_id
|
||||
return api_client.image_ids.map do |image_id|
|
||||
"https://seiga.nicovideo.jp/image/source/#{image_id}"
|
||||
end
|
||||
end
|
||||
|
||||
link = page.search("a#illust_link")
|
||||
|
||||
if link.any?
|
||||
image_url = "http://seiga.nicovideo.jp" + link[0]["href"]
|
||||
page = agent.get(image_url) # need to follow this redirect while logged in or it won't work
|
||||
|
||||
if page.is_a?(Mechanize::Image)
|
||||
return [page.uri.to_s]
|
||||
end
|
||||
|
||||
images = page.search("div.illust_view_big").select {|x| x["data-src"] =~ /\/priv\//}
|
||||
|
||||
if images.any?
|
||||
return ["http://lohas.nicoseiga.jp" + images[0]["data-src"]]
|
||||
end
|
||||
end
|
||||
|
||||
raise "image url not found for (#{url}, #{referer_url})"
|
||||
resp = api_client.get(img)
|
||||
resp.headers["Location"]&.gsub(%r{nicoseiga.jp/o/}i, 'nicoseiga.jp/priv/')
|
||||
end
|
||||
|
||||
def page_url
|
||||
[url, referer_url].each do |x|
|
||||
if x =~ %r!\Ahttps?://lohas\.nicoseiga\.jp/o/[a-f0-9]+/\d+/(\d+)!
|
||||
return "http://seiga.nicovideo.jp/seiga/im#{$1}"
|
||||
end
|
||||
|
||||
if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/(\d+)\?e=\d+&h=[a-f0-9]+}i
|
||||
return "http://seiga.nicovideo.jp/seiga/im#{$1}"
|
||||
end
|
||||
|
||||
if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/[a-f0-9]+/\d+/(\d+)}i
|
||||
return "http://seiga.nicovideo.jp/seiga/im#{$1}"
|
||||
end
|
||||
|
||||
if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/(\d+)}i
|
||||
return "http://seiga.nicovideo.jp/seiga/im#{$1}"
|
||||
end
|
||||
|
||||
if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp//?thumb/(\d+)i?}i
|
||||
return "http://seiga.nicovideo.jp/seiga/im#{$1}"
|
||||
end
|
||||
|
||||
if x =~ %r{/seiga/im\d+}
|
||||
return x
|
||||
end
|
||||
|
||||
if x =~ %r{/watch/mg\d+}
|
||||
return x
|
||||
end
|
||||
|
||||
if x =~ %r{/image/source\?id=(\d+)}
|
||||
return "http://seiga.nicovideo.jp/seiga/im#{$1}"
|
||||
end
|
||||
if illust_id.present?
|
||||
"https://seiga.nicovideo.jp/seiga/im#{illust_id}"
|
||||
elsif manga_id.present?
|
||||
"https://seiga.nicovideo.jp/watch/mg#{manga_id}"
|
||||
elsif image_id.present?
|
||||
"https://seiga.nicovideo.jp/image/source/#{image_id}"
|
||||
end
|
||||
|
||||
return super
|
||||
end
|
||||
|
||||
def canonical_url
|
||||
image_url
|
||||
end
|
||||
|
||||
def profile_url
|
||||
if url =~ PROFILE
|
||||
return url
|
||||
end
|
||||
user_id = api_client&.user_id
|
||||
return if user_id.blank? # artists can be anonymous
|
||||
|
||||
"http://seiga.nicovideo.jp/user/illust/#{api_client.user_id}"
|
||||
end
|
||||
|
||||
def artist_name
|
||||
api_client.moniker
|
||||
return if api_client.blank?
|
||||
api_client.user_name
|
||||
end
|
||||
|
||||
def artist_commentary_title
|
||||
return if api_client.blank?
|
||||
api_client.title
|
||||
end
|
||||
|
||||
def artist_commentary_desc
|
||||
api_client.desc
|
||||
return if api_client.blank?
|
||||
api_client.description
|
||||
end
|
||||
|
||||
def dtext_artist_commentary_desc
|
||||
DText.from_html(artist_commentary_desc).gsub(/[^\w]im(\d+)/, ' seiga #\1 ')
|
||||
end
|
||||
|
||||
def normalize_for_source
|
||||
if illust_id.present?
|
||||
"https://seiga.nicovideo.jp/seiga/im#{illust_id}"
|
||||
elsif theme_id.present?
|
||||
"http://seiga.nicovideo.jp/watch/mg#{theme_id}"
|
||||
# There's no way to tell apart illust from manga from the direct image url alone. What's worse,
|
||||
# nicoseiga itself doesn't know how to normalize back to manga, so if it's not an illust type then
|
||||
# it's impossible to get the original manga page back from the image url alone.
|
||||
# /source/ links on the other hand correctly redirect, hence we use them to normalize saved direct sources.
|
||||
if url =~ DIRECT
|
||||
"https://seiga.nicovideo.jp/image/source/#{image_id}"
|
||||
else
|
||||
page_url
|
||||
end
|
||||
end
|
||||
|
||||
def tag_name
|
||||
return if api_client&.user_id.blank?
|
||||
"nicoseiga#{api_client.user_id}"
|
||||
end
|
||||
|
||||
def tags
|
||||
string = page.at("meta[name=keywords]").try(:[], "content") || ""
|
||||
string.split(/,/).map do |name|
|
||||
[name, "https://seiga.nicovideo.jp/tag/#{CGI.escape(name)}"]
|
||||
return [] if api_client.blank?
|
||||
|
||||
base_url = "https://seiga.nicovideo.jp/"
|
||||
base_url += "manga/" if manga_id.present?
|
||||
base_url += "tag/"
|
||||
|
||||
api_client.tags.map do |name|
|
||||
[name, base_url + CGI.escape(name)]
|
||||
end
|
||||
end
|
||||
memoize :tags
|
||||
|
||||
def image_id
|
||||
image_id_from_url(url)
|
||||
end
|
||||
|
||||
def image_id_from_url(url)
|
||||
url[DIRECT, :image_id] || url[SOURCE, :image_id] || url[MANGA_THUMB, :image_id]
|
||||
end
|
||||
|
||||
def illust_id
|
||||
urls.map { |u| u[ILLUST_PAGE, :illust_id] || u[ILLUST_THUMB, :illust_id] }.compact.first
|
||||
end
|
||||
|
||||
def manga_id
|
||||
urls.compact.map { |u| u[MANGA_PAGE, :manga_id] }.compact.first
|
||||
end
|
||||
|
||||
def api_client
|
||||
if illust_id
|
||||
NicoSeigaApiClient.new(illust_id: illust_id)
|
||||
elsif theme_id
|
||||
NicoSeigaMangaApiClient.new(theme_id)
|
||||
if illust_id.present?
|
||||
NicoSeigaApiClient.new(work_id: illust_id, type: "illust")
|
||||
elsif manga_id.present?
|
||||
NicoSeigaApiClient.new(work_id: manga_id, type: "manga")
|
||||
elsif image_id.present?
|
||||
# We default to illust to attempt getting the api anyway
|
||||
NicoSeigaApiClient.new(work_id: image_id, type: "illust")
|
||||
end
|
||||
end
|
||||
memoize :api_client
|
||||
|
||||
def illust_id
|
||||
if page_url =~ PAGE
|
||||
return $1.to_i
|
||||
end
|
||||
|
||||
return nil
|
||||
end
|
||||
|
||||
def theme_id
|
||||
if page_url =~ MANGA_PAGE
|
||||
return $1.to_i
|
||||
end
|
||||
|
||||
return nil
|
||||
end
|
||||
|
||||
def page
|
||||
doc = agent.get(page_url)
|
||||
|
||||
if doc.search("a#link_btn_login").any?
|
||||
# Session cache is invalid, clear it and log in normally.
|
||||
Cache.delete("nico-seiga-session")
|
||||
doc = agent.get(page_url)
|
||||
end
|
||||
|
||||
doc
|
||||
end
|
||||
memoize :page
|
||||
|
||||
def agent
|
||||
NicoSeigaApiClient.agent
|
||||
end
|
||||
memoize :agent
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user