pixiv: fix API breakage.

Fix the Pixiv API no longer working by rewriting the Pixiv strategy to
use the Ajax API instead of the mobile API.

Before we could authenticate in the mobile API by using the OAuth 2.0
grant_type=password authentication flow. This no longer works. Now it
requires logging in through a HTML page, which is protected by Google
reCaptcha. This makes using the mobile API infeasible.

Instead we switch to the Ajax API, which only needs a PHPSESSID to
authenticate. This can be obtained by logging in manually and using the
devtools to extract the cookie.

This also temporarily removes support for Pixiv novels. This should be
moved to a separate source strategy.
This commit is contained in:
evazion
2021-02-09 05:48:02 -06:00
parent 7520c4db49
commit 39cc3ed5cf
8 changed files with 458 additions and 317 deletions

View File

@@ -2,7 +2,7 @@ module Sources
module Strategies
def self.all
[
#Strategies::Pixiv,
Strategies::Pixiv,
Strategies::Fanbox,
Strategies::NicoSeiga,
Strategies::Twitter,

View File

@@ -48,27 +48,22 @@ module Sources
I12 = %r{(?:\A(?:https?://)?i[0-9]+\.pixiv\.net)}
IMG = %r{(?:\A(?:https?://)?img[0-9]*\.pixiv\.net)}
PXIMG = %r{(?:\A(?:https?://)?[^.]+\.pximg\.net)}
TOUCH = %r{(?:\A(?:https?://)?touch\.pixiv\.net)}
UGOIRA = %r{#{PXIMG}/img-zip-ugoira/img/#{DATE}/(?<illust_id>\d+)_ugoira1920x1080\.zip\z}i
ORIG_IMAGE = %r{#{PXIMG}/img-original/img/#{DATE}/(?<illust_id>\d+)_p(?<page>\d+)\.#{EXT}\z}i
STACC_PAGE = %r{\A#{WEB}/stacc/#{MONIKER}/?\z}i
NOVEL_PAGE = %r{(?:\Ahttps?://www\.pixiv\.net/novel/show\.php\?id=(\d+))}
def self.enabled?
Danbooru.config.pixiv_login.present? && Danbooru.config.pixiv_password.present?
Danbooru.config.pixiv_phpsessid.present?
end
def self.to_dtext(text)
if text.nil?
return nil
end
return nil if text.nil?
text = text.gsub(%r{https?://www\.pixiv\.net/member_illust\.php\?mode=medium&illust_id=([0-9]+)}i) do |_match|
text = text.gsub(%r{<a href="https?://www\.pixiv\.net/en/artworks/([0-9]+)">illust/[0-9]+</a>}i) do |_match|
pixiv_id = $1
%(pixiv ##{pixiv_id} "»":[#{Routes.posts_path(tags: "pixiv:#{pixiv_id}")}])
end
text = text.gsub(%r{https?://www\.pixiv\.net/member\.php\?id=([0-9]+)}i) do |_match|
text = text.gsub(%r{<a href="https?://www\.pixiv\.net/en/users/([0-9]+)">user/[0-9]+</a>}i) do |_match|
member_id = $1
profile_url = "https://www.pixiv.net/users/#{member_id}"
artist_search_url = Routes.artists_path(search: { url_matches: profile_url })
@@ -76,7 +71,6 @@ module Sources
%("user/#{member_id}":[#{profile_url}] "»":[#{artist_search_url}])
end
text = text.gsub(/\r\n|\r|\n/, "<br>")
DText.from_html(text)
end
@@ -95,9 +89,19 @@ module Sources
end
def image_urls
image_urls_sub
rescue PixivApiClient::BadIDError
[url]
if is_ugoira?
[api_ugoira[:originalSrc]]
elsif manga_page.present? && original_urls.present?
[original_urls[manga_page]]
elsif original_urls.present?
original_urls
else
[url]
end
end
def original_urls
api_pages.map { |page| page.dig("urls", "original") }
end
def preview_urls
@@ -114,17 +118,8 @@ module Sources
end
def page_url
if novel_id.present?
return "https://www.pixiv.net/novel/show.php?id=#{novel_id}&mode=cover"
end
if illust_id.present?
return "https://www.pixiv.net/artworks/#{illust_id}"
end
url
rescue PixivApiClient::BadIDError
nil
return nil if illust_id.blank?
"https://www.pixiv.net/artworks/#{illust_id}"
end
def canonical_url
@@ -132,15 +127,15 @@ module Sources
end
def profile_url
[url, referer_url].each do |x|
if x =~ PROFILE
return x
end
end
url = urls.find { |url| url.match?(PROFILE) }
"https://www.pixiv.net/users/#{metadata.user_id}"
rescue PixivApiClient::BadIDError
nil
if url.present?
url
elsif api_illust[:userId].present?
"https://www.pixiv.net/users/#{api_illust[:userId]}"
else
nil
end
end
def stacc_url
@@ -153,9 +148,7 @@ module Sources
end
def artist_name
metadata.name
rescue PixivApiClient::BadIDError
nil
api_illust[:userName]
end
def other_names
@@ -163,15 +156,11 @@ module Sources
end
def artist_commentary_title
metadata.artist_commentary_title
rescue PixivApiClient::BadIDError
nil
api_illust[:title]
end
def artist_commentary_desc
metadata.artist_commentary_desc
rescue PixivApiClient::BadIDError
nil
api_illust[:description]
end
def headers
@@ -179,8 +168,7 @@ module Sources
end
def normalize_for_source
return if illust_id.blank?
return nil if illust_id.blank?
"https://www.pixiv.net/artworks/#{illust_id}"
end
@@ -189,11 +177,10 @@ module Sources
end
def tags
metadata.tags.map do |tag|
api_illust.dig(:tags, :tags).to_a.map do |item|
tag = item[:tag]
[tag, "https://www.pixiv.net/search.php?s_mode=s_tag_full&#{{word: tag}.to_param}"]
end
rescue PixivApiClient::BadIDError
[]
end
def normalize_tag(tag)
@@ -214,28 +201,12 @@ module Sources
illust_id.present? ? "pixiv:#{illust_id}" : "source:#{canonical_url}"
end
def image_urls_sub
# there's too much normalization bullshit we have to deal with
# raw urls, so just fetch the canonical url from the api every
# time.
if manga_page.present?
return [metadata.pages[manga_page]]
end
if metadata.pages.is_a?(Hash)
return [ugoira_zip_url]
end
metadata.pages
def is_ugoira?
# https://i.pximg.net/img-original/img/2019/05/27/17/59/33/74932152_ugoira0.jpg
url.match?(UGOIRA) || api_illust.dig(:urls, :original)&.match?(/ugoira/)
end
# in order to prevent recursive loops, this method should not make any
# api calls and only try to extract the illust_id from the url. therefore,
# even though it makes sense to reference page_url here, it will only look
# at (url, referer_url).
def illust_id
return nil if novel_id.present?
parsed_urls.each do |url|
# http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054
# http://www.pixiv.net/member_illust.php?mode=big&illust_id=18557054
@@ -284,27 +255,22 @@ module Sources
nil
end
memoize :illust_id
def novel_id
[url, referer_url].each do |x|
if x =~ NOVEL_PAGE
return $1
end
end
nil
def api_client
PixivAjaxClient.new(Danbooru.config.pixiv_phpsessid)
end
memoize :novel_id
def metadata
if novel_id.present?
return PixivApiClient.new.novel(novel_id)
end
PixivApiClient.new.work(illust_id)
def api_illust
api_client.illust(illust_id)
end
def api_pages
api_client.pages(illust_id)
end
def api_ugoira
api_client.ugoira_meta(illust_id)
end
memoize :metadata
def moniker
# we can sometimes get the moniker from the url
@@ -315,44 +281,17 @@ module Sources
elsif url =~ %r{#{WEB}/stacc/(#{MONIKER})/?$}i
$1
else
metadata.moniker
api_illust[:userAccount]
end
rescue PixivApiClient::BadIDError
nil
end
memoize :moniker
def data
{ ugoira_frame_data: ugoira_frame_data }
{ ugoira_frame_data: api_ugoira[:frames] }
end
def ugoira_zip_url
if metadata.pages.is_a?(Hash) && metadata.pages["ugoira600x600"]
metadata.pages["ugoira600x600"].sub("_ugoira600x600.zip", "_ugoira1920x1080.zip")
end
end
memoize :ugoira_zip_url
def ugoira_frame_data
metadata.json.dig("metadata", "frames")
rescue PixivApiClient::BadIDError
nil
end
memoize :ugoira_frame_data
def ugoira_content_type
case metadata.json["image_urls"].to_s
when /\.jpg/
"image/jpeg"
when /\.png/
"image/png"
when /\.gif/
"image/gif"
else
raise Sources::Error, "content type not found for (#{url}, #{referer_url})"
end
api_ugoira[:mime_type]
end
memoize :ugoira_content_type
# Returns the current page number of the manga. This will not
# make any api calls and only looks at (url, referer_url).
@@ -373,7 +312,8 @@ module Sources
nil
end
memoize :manga_page
memoize :illust_id, :api_client, :api_illust, :api_pages, :api_ugoira
end
end
end