Get rid of `normalized_for_artist_finder?` and `normalizable_for_artist_finder?`. This was legacy bullshit that was originally designed to avoid API calls when saving artist entries containing old Pixiv direct image urls that had already been normalized, or that couldn't be normalized because they were bad id. Nowadays we store profile urls in artist entries instead of direct image urls, so we don't normally need to do any API calls to normalize the profile url. Strategies should take care to avoid triggering API calls inside `profile_url` when possible.
454 lines
14 KiB
Ruby
454 lines
14 KiB
Ruby
# Pixiv
|
|
#
|
|
# * https://i.pximg.net/img-original/img/2014/10/03/18/10/20/46324488_p0.png
|
|
# * https://i-f.pximg.net/img-original/img/2020/02/19/00/40/18/79584713_p0.png
|
|
#
|
|
# * https://i.pximg.net/c/250x250_80_a2/img-master/img/2014/10/29/09/27/19/46785915_p0_square1200.jpg
|
|
# * https://i.pximg.net/img-master/img/2014/10/03/18/10/20/46324488_p0_master1200.jpg
|
|
#
|
|
# * https://tc-pximg01.techorus-cdn.com/img-original/img/2017/09/18/03/18/24/65015428_p4.png
|
|
#
|
|
# * https://www.pixiv.net/member_illust.php?mode=medium&illust_id=46324488
|
|
# * https://www.pixiv.net/member_illust.php?mode=manga&illust_id=46324488
|
|
# * https://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=46324488&page=0
|
|
# * https://www.pixiv.net/en/artworks/46324488
|
|
#
|
|
# * https://www.pixiv.net/member.php?id=339253
|
|
# * https://www.pixiv.net/member_illust.php?id=339253&type=illust
|
|
# * https://www.pixiv.net/u/9202877
|
|
# * https://www.pixiv.net/stacc/noizave
|
|
# * http://www.pixiv.me/noizave
|
|
#
|
|
# Fanbox
|
|
#
|
|
# * https://fanbox.pixiv.net/images/post/39714/JvjJal8v1yLgc5DPyEI05YpT.png
|
|
# * https://pixiv.pximg.net/fanbox/public/images/creator/1566167/profile/Ix6bnJmTaOAFZhXHLbWyIY1e.jpeg
|
|
#
|
|
# * https://pixiv.pximg.net/c/400x400_90_a2_g5/fanbox/public/images/creator/1566167/profile/Ix6bnJmTaOAFZhXHLbWyIY1e.jpeg
|
|
# * https://pixiv.pximg.net/c/1200x630_90_a2_g5/fanbox/public/images/post/186919/cover/VCI1Mcs2rbmWPg0mmiTisovn.jpeg
|
|
#
|
|
# * https://www.pixiv.net/fanbox/creator/1566167/post/39714
|
|
# * https://www.pixiv.net/fanbox/creator/1566167
|
|
#
|
|
# Novels
|
|
#
|
|
# * https://i.pximg.net/novel-cover-original/img/2019/01/14/01/15/05/10617324_d84daae89092d96bbe66efafec136e42.jpg
|
|
# * https://i.pximg.net/c/600x600/novel-cover-master/img/2019/01/14/01/15/05/10617324_d84daae89092d96bbe66efafec136e42_master1200.jpg
|
|
# * https://img-novel.pximg.net/img-novel/work_main/XtFbt7gsymsvyaG45lZ8/1554.jpg?20190107110435
|
|
#
|
|
# * https://www.pixiv.net/novel/show.php?id=10617324
|
|
# * https://novel.pixiv.net/works/1554
|
|
#
|
|
# Sketch
|
|
#
|
|
# * https://img-sketch.pixiv.net/uploads/medium/file/4463372/8906921629213362989.jpg
|
|
# * https://img-sketch.pximg.net/c!/w=540,f=webp:jpeg/uploads/medium/file/4463372/8906921629213362989.jpg
|
|
# * https://sketch.pixiv.net/items/1588346448904706151
|
|
# * https://sketch.pixiv.net/@0125840
|
|
#
|
|
|
|
module Sources
|
|
module Strategies
|
|
class Pixiv < Base
|
|
MONIKER = %r!(?:[a-zA-Z0-9_-]+)!
|
|
PROFILE = %r!\Ahttps?://www\.pixiv\.net/member\.php\?id=[0-9]+\z!
|
|
DATE = %r!(?<date>\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2})!i
|
|
EXT = %r!(?:jpg|jpeg|png|gif)!i
|
|
|
|
WEB = %r!(?:\A(?:https?://)?www\.pixiv\.net)!
|
|
I12 = %r!(?:\A(?:https?://)?i[0-9]+\.pixiv\.net)!
|
|
IMG = %r!(?:\A(?:https?://)?img[0-9]*\.pixiv\.net)!
|
|
PXIMG = %r!(?:\A(?:https?://)?[^.]+\.pximg\.net)!
|
|
TOUCH = %r!(?:\A(?:https?://)?touch\.pixiv\.net)!
|
|
UGOIRA = %r!#{PXIMG}/img-zip-ugoira/img/#{DATE}/(?<illust_id>\d+)_ugoira1920x1080\.zip\z!i
|
|
ORIG_IMAGE = %r!#{PXIMG}/img-original/img/#{DATE}/(?<illust_id>\d+)_p(?<page>\d+)\.#{EXT}\z!i
|
|
STACC_PAGE = %r!\A#{WEB}/stacc/#{MONIKER}/?\z!i
|
|
NOVEL_PAGE = %r!(?:\Ahttps?://www\.pixiv\.net/novel/show\.php\?id=(\d+))!
|
|
FANBOX_ACCOUNT = %r!(?:\Ahttps?://www\.pixiv\.net/fanbox/creator/\d+\z)!
|
|
FANBOX_IMAGE = %r!(?:\Ahttps?://fanbox\.pixiv\.net/images/post/(\d+))!
|
|
FANBOX_PAGE = %r!(?:\Ahttps?://www\.pixiv\.net/fanbox/creator/\d+/post/(\d+))!
|
|
|
|
def self.to_dtext(text)
|
|
if text.nil?
|
|
return nil
|
|
end
|
|
|
|
text = text.gsub(%r!https?://www\.pixiv\.net/member_illust\.php\?mode=medium&illust_id=([0-9]+)!i) do |match|
|
|
pixiv_id = $1
|
|
%(pixiv ##{pixiv_id} "»":[/posts?tags=pixiv:#{pixiv_id}])
|
|
end
|
|
|
|
text = text.gsub(%r!https?://www\.pixiv\.net/member\.php\?id=([0-9]+)!i) do |match|
|
|
member_id = $1
|
|
profile_url = "https://www.pixiv.net/member.php?id=#{member_id}"
|
|
search_params = {"search[url_matches]" => profile_url}.to_param
|
|
|
|
%("user/#{member_id}":[#{profile_url}] "»":[/artists?#{search_params}])
|
|
end
|
|
|
|
text = text.gsub(/\r\n|\r|\n/, "<br>")
|
|
DText.from_html(text)
|
|
end
|
|
|
|
def domains
|
|
["pixiv.net", "pximg.net"]
|
|
end
|
|
|
|
def match?
|
|
return false if parsed_url.nil?
|
|
parsed_url.domain.in?(domains) || parsed_url.host == "tc-pximg01.techorus-cdn.com"
|
|
end
|
|
|
|
def site_name
|
|
"Pixiv"
|
|
end
|
|
|
|
def image_urls
|
|
image_urls_sub
|
|
rescue PixivApiClient::BadIDError
|
|
[url]
|
|
end
|
|
|
|
def preview_urls
|
|
image_urls.map do |url|
|
|
case url
|
|
when ORIG_IMAGE
|
|
"https://i.pximg.net/c/240x240/img-master/img/#{$~[:date]}/#{$~[:illust_id]}_p#{$~[:page]}_master1200.jpg"
|
|
when UGOIRA
|
|
"https://i.pximg.net/c/240x240/img-master/img/#{$~[:date]}/#{$~[:illust_id]}_master1200.jpg"
|
|
else
|
|
url
|
|
end
|
|
end
|
|
end
|
|
|
|
def page_url
|
|
if novel_id.present?
|
|
return "https://www.pixiv.net/novel/show.php?id=#{novel_id}&mode=cover"
|
|
end
|
|
|
|
if fanbox_id.present?
|
|
return "https://www.pixiv.net/fanbox/creator/#{metadata.user_id}/post/#{fanbox_id}"
|
|
end
|
|
|
|
if fanbox_account_id.present?
|
|
return "https://www.pixiv.net/fanbox/creator/#{fanbox_account_id}"
|
|
end
|
|
|
|
if illust_id.present?
|
|
return "https://www.pixiv.net/artworks/#{illust_id}"
|
|
end
|
|
|
|
return url
|
|
rescue PixivApiClient::BadIDError
|
|
nil
|
|
end
|
|
|
|
def canonical_url
|
|
return image_url
|
|
end
|
|
|
|
def profile_url
|
|
[url, referer_url].each do |x|
|
|
if x =~ PROFILE
|
|
return x
|
|
end
|
|
end
|
|
|
|
"https://www.pixiv.net/member.php?id=#{metadata.user_id}"
|
|
rescue PixivApiClient::BadIDError
|
|
nil
|
|
end
|
|
|
|
def stacc_url
|
|
return nil if moniker.blank?
|
|
"https://www.pixiv.net/stacc/#{moniker}"
|
|
end
|
|
|
|
def profile_urls
|
|
[profile_url, stacc_url].compact
|
|
end
|
|
|
|
def artist_name
|
|
metadata.name
|
|
rescue PixivApiClient::BadIDError
|
|
nil
|
|
end
|
|
|
|
def other_names
|
|
[artist_name, moniker].compact.uniq
|
|
end
|
|
|
|
def artist_commentary_title
|
|
metadata.artist_commentary_title
|
|
rescue PixivApiClient::BadIDError
|
|
nil
|
|
end
|
|
|
|
def artist_commentary_desc
|
|
metadata.artist_commentary_desc
|
|
rescue PixivApiClient::BadIDError
|
|
nil
|
|
end
|
|
|
|
def headers
|
|
if fanbox_id.present?
|
|
# need the session to download fanbox images
|
|
return {
|
|
"Referer" => "https://www.pixiv.net/fanbox",
|
|
"Cookie" => HTTP::Cookie.cookie_value(agent.cookies)
|
|
}
|
|
end
|
|
|
|
return {
|
|
"Referer" => "https://www.pixiv.net"
|
|
}
|
|
end
|
|
|
|
def normalize_for_source
|
|
return if illust_id.blank?
|
|
|
|
"https://www.pixiv.net/artworks/#{illust_id}"
|
|
end
|
|
|
|
def tag_name
|
|
moniker
|
|
end
|
|
|
|
def tags
|
|
metadata.tags.map do |tag|
|
|
[tag, "https://www.pixiv.net/search.php?s_mode=s_tag_full&#{{word: tag}.to_param}"]
|
|
end
|
|
rescue PixivApiClient::BadIDError
|
|
[]
|
|
end
|
|
|
|
def normalize_tag(tag)
|
|
tag.gsub(/\d+users入り\z/i, "")
|
|
end
|
|
|
|
def translate_tag(tag)
|
|
translated_tags = super(tag)
|
|
|
|
if translated_tags.empty? && tag.include?("/")
|
|
translated_tags = tag.split("/").flat_map { |tag| super(tag) }
|
|
end
|
|
|
|
translated_tags
|
|
end
|
|
|
|
def related_posts_search_query
|
|
illust_id.present? ? "pixiv:#{illust_id}" : "source:#{canonical_url}"
|
|
end
|
|
|
|
def image_urls_sub
|
|
if url =~ FANBOX_IMAGE
|
|
return [url]
|
|
end
|
|
|
|
# there's too much normalization bullshit we have to deal with
|
|
# raw urls, so just fetch the canonical url from the api every
|
|
# time.
|
|
if manga_page.present?
|
|
return [metadata.pages[manga_page]]
|
|
end
|
|
|
|
if metadata.pages.is_a?(Hash)
|
|
return [ugoira_zip_url]
|
|
end
|
|
|
|
return metadata.pages
|
|
end
|
|
|
|
# in order to prevent recursive loops, this method should not make any
|
|
# api calls and only try to extract the illust_id from the url. therefore,
|
|
# even though it makes sense to reference page_url here, it will only look
|
|
# at (url, referer_url).
|
|
def illust_id
|
|
return nil if novel_id.present? || fanbox_id.present?
|
|
|
|
parsed_urls.each do |url|
|
|
# http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054
|
|
# http://www.pixiv.net/member_illust.php?mode=big&illust_id=18557054
|
|
# http://www.pixiv.net/member_illust.php?mode=manga&illust_id=18557054
|
|
# http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=18557054&page=1
|
|
if url.host == "www.pixiv.net" && url.path == "/member_illust.php" && url.query_values["illust_id"].present?
|
|
return url.query_values["illust_id"].to_i
|
|
|
|
# http://www.pixiv.net/en/artworks/46324488
|
|
elsif url.host == "www.pixiv.net" && url.path =~ %r!\A/(?:en/)?artworks/(?<illust_id>\d+)!i
|
|
return $~[:illust_id].to_i
|
|
|
|
# http://www.pixiv.net/i/18557054
|
|
elsif url.host == "www.pixiv.net" && url.path =~ %r!\A/i/(?<illust_id>\d+)\z!i
|
|
return $~[:illust_id].to_i
|
|
|
|
# http://img18.pixiv.net/img/evazion/14901720.png
|
|
# http://i2.pixiv.net/img18/img/evazion/14901720.png
|
|
# http://i2.pixiv.net/img18/img/evazion/14901720_m.png
|
|
# http://i2.pixiv.net/img18/img/evazion/14901720_s.png
|
|
# http://i1.pixiv.net/img07/img/pasirism/18557054_p1.png
|
|
# http://i1.pixiv.net/img07/img/pasirism/18557054_big_p1.png
|
|
elsif url.host =~ %r!\A(?:i\d+|img\d+)\.pixiv\.net\z!i &&
|
|
url.path =~ %r!\A(?:/img\d+)?/img/#{MONIKER}/(?<illust_id>\d+)(?:_\w+)?\.(?:jpg|jpeg|png|gif|zip)!i
|
|
return $~[:illust_id].to_i
|
|
|
|
# http://i1.pixiv.net/img-inf/img/2011/05/01/23/28/04/18557054_64x64.jpg
|
|
# http://i1.pixiv.net/img-inf/img/2011/05/01/23/28/04/18557054_s.png
|
|
# http://i1.pixiv.net/c/600x600/img-master/img/2014/10/02/13/51/23/46304396_p0_master1200.jpg
|
|
# http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p0.png
|
|
# http://i1.pixiv.net/img-zip-ugoira/img/2014/10/03/17/29/16/46323924_ugoira1920x1080.zip
|
|
# https://i.pximg.net/img-original/img/2014/10/03/18/10/20/46324488_p0.png
|
|
# https://i.pximg.net/img-master/img/2014/10/03/18/10/20/46324488_p0_master1200.jpg
|
|
# https://i-f.pximg.net/img-original/img/2020/02/19/00/40/18/79584713_p0.png
|
|
# https://tc-pximg01.techorus-cdn.com/img-original/img/2017/09/18/03/18/24/65015428_p4.png
|
|
#
|
|
# but not:
|
|
#
|
|
# https://i.pximg.net/novel-cover-original/img/2019/01/14/01/15/05/10617324_d84daae89092d96bbe66efafec136e42.jpg
|
|
# https://img-sketch.pixiv.net/uploads/medium/file/4463372/8906921629213362989.jpg
|
|
elsif url.host =~ %r!\A(?:[^.]+\.pximg\.net|i\d+\.pixiv\.net|tc-pximg01\.techorus-cdn\.com)\z!i &&
|
|
url.path =~ %r!\A(/c/\w+)?/img-[a-z-]+/img/#{DATE}/(?<illust_id>\d+)(?:_\w+)?\.(?:jpg|jpeg|png|gif|zip)!i
|
|
return $~[:illust_id].to_i
|
|
end
|
|
end
|
|
|
|
return nil
|
|
end
|
|
memoize :illust_id
|
|
|
|
def novel_id
|
|
[url, referer_url].each do |x|
|
|
if x =~ NOVEL_PAGE
|
|
return $1
|
|
end
|
|
end
|
|
|
|
return nil
|
|
end
|
|
memoize :novel_id
|
|
|
|
def fanbox_id
|
|
[url, referer_url].each do |x|
|
|
if x =~ FANBOX_PAGE
|
|
return $1
|
|
end
|
|
|
|
if x =~ FANBOX_IMAGE
|
|
return $1
|
|
end
|
|
end
|
|
|
|
return nil
|
|
end
|
|
memoize :fanbox_id
|
|
|
|
def fanbox_account_id
|
|
[url, referer_url].each do |x|
|
|
if x =~ FANBOX_ACCOUNT
|
|
return x
|
|
end
|
|
end
|
|
|
|
return nil
|
|
end
|
|
memoize :fanbox_account_id
|
|
|
|
def agent
|
|
PixivWebAgent.build
|
|
end
|
|
memoize :agent
|
|
|
|
def metadata
|
|
if novel_id.present?
|
|
return PixivApiClient.new.novel(novel_id)
|
|
end
|
|
|
|
if fanbox_id.present?
|
|
return PixivApiClient.new.fanbox(fanbox_id)
|
|
end
|
|
|
|
return PixivApiClient.new.work(illust_id)
|
|
end
|
|
memoize :metadata
|
|
|
|
def moniker
|
|
# we can sometimes get the moniker from the url
|
|
if url =~ %r!#{IMG}/img/(#{MONIKER})!i
|
|
return $1
|
|
end
|
|
|
|
if url =~ %r!#{I12}/img[0-9]+/img/(#{MONIKER})!i
|
|
return $1
|
|
end
|
|
|
|
if url =~ %r!#{WEB}/stacc/(#{MONIKER})/?$!i
|
|
return $1
|
|
end
|
|
|
|
return metadata.moniker
|
|
rescue PixivApiClient::BadIDError
|
|
nil
|
|
end
|
|
memoize :moniker
|
|
|
|
def data
|
|
return {
|
|
ugoira_frame_data: ugoira_frame_data
|
|
}
|
|
end
|
|
|
|
def ugoira_zip_url
|
|
if metadata.pages.is_a?(Hash) && metadata.pages["ugoira600x600"]
|
|
return metadata.pages["ugoira600x600"].sub("_ugoira600x600.zip", "_ugoira1920x1080.zip")
|
|
end
|
|
end
|
|
memoize :ugoira_zip_url
|
|
|
|
def ugoira_frame_data
|
|
return metadata.json.dig("metadata", "frames")
|
|
rescue PixivApiClient::BadIDError
|
|
nil
|
|
end
|
|
memoize :ugoira_frame_data
|
|
|
|
def ugoira_content_type
|
|
case metadata.json["image_urls"].to_s
|
|
when /\.jpg/
|
|
return "image/jpeg"
|
|
|
|
when /\.png/
|
|
return "image/png"
|
|
|
|
when /\.gif/
|
|
return "image/gif"
|
|
end
|
|
|
|
raise Sources::Error.new("content type not found for (#{url}, #{referer_url})")
|
|
end
|
|
memoize :ugoira_content_type
|
|
|
|
# Returns the current page number of the manga. This will not
|
|
# make any api calls and only looks at (url, referer_url).
|
|
def manga_page
|
|
# http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_p0.jpg
|
|
# http://i1.pixiv.net/c/600x600/img-master/img/2014/09/24/23/25/08/46168376_p0_master1200.jpg
|
|
# http://i1.pixiv.net/img-original/img/2014/09/25/23/09/29/46183440_p0.jpg
|
|
if url =~ %r!/\d+_p(\d+)(?:_\w+)?\.#{EXT}!i
|
|
return $1.to_i
|
|
end
|
|
|
|
# http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=46170939&page=0
|
|
[url, referer_url].each do |x|
|
|
if x =~ /page=(\d+)/i
|
|
return $1.to_i
|
|
end
|
|
end
|
|
|
|
return nil
|
|
end
|
|
memoize :manga_page
|
|
end
|
|
end
|
|
end
|