Merge branch 'master' into fix-pixiv-profile-url
This commit is contained in:
@@ -147,7 +147,7 @@ module Sources::Strategies
|
||||
urls = urls.reverse
|
||||
end
|
||||
|
||||
chosen_url = urls.find { |url| http_exists?(url, headers) }
|
||||
chosen_url = urls.find { |url| http_exists?(url) }
|
||||
chosen_url || url
|
||||
end
|
||||
end
|
||||
|
||||
@@ -14,6 +14,8 @@
|
||||
module Sources
|
||||
module Strategies
|
||||
class Base
|
||||
class DownloadError < StandardError; end
|
||||
|
||||
attr_reader :url, :referer_url, :urls, :parsed_url, :parsed_referer, :parsed_urls
|
||||
|
||||
extend Memoist
|
||||
@@ -35,9 +37,9 @@ module Sources
|
||||
# <tt>referrer_url</tt> so the strategy can discover the HTML
|
||||
# page and other information.
|
||||
def initialize(url, referer_url = nil)
|
||||
@url = url
|
||||
@referer_url = referer_url
|
||||
@urls = [url, referer_url].select(&:present?)
|
||||
@url = url.to_s
|
||||
@referer_url = referer_url&.to_s
|
||||
@urls = [@url, @referer_url].select(&:present?)
|
||||
|
||||
@parsed_url = Addressable::URI.heuristic_parse(url) rescue nil
|
||||
@parsed_referer = Addressable::URI.heuristic_parse(referer_url) rescue nil
|
||||
@@ -139,15 +141,28 @@ module Sources
|
||||
# Subclasses should merge in any required headers needed to access resources
|
||||
# on the site.
|
||||
def headers
|
||||
Danbooru.config.http_headers
|
||||
{}
|
||||
end
|
||||
|
||||
# Returns the size of the image resource without actually downloading the file.
|
||||
def size
|
||||
Downloads::File.new(image_url).size
|
||||
http.head(image_url).content_length.to_i
|
||||
end
|
||||
memoize :size
|
||||
|
||||
# Download the file at the given url, or at the main image url by default.
|
||||
def download_file!(download_url = image_url)
|
||||
raise DownloadError, "Download failed: couldn't find download url for #{url}" if download_url.blank?
|
||||
response, file = http.download_media(download_url)
|
||||
raise DownloadError, "Download failed: #{download_url} returned error #{response.status}" if response.status != 200
|
||||
file
|
||||
end
|
||||
|
||||
def http
|
||||
Danbooru::Http.public_only.timeout(30).max_size(Danbooru.config.max_file_size)
|
||||
end
|
||||
memoize :http
|
||||
|
||||
# The url to use for artist finding purposes. This will be stored in the
|
||||
# artist entry. Normally this will be the profile url.
|
||||
def normalize_for_artist_finder
|
||||
@@ -274,9 +289,8 @@ module Sources
|
||||
to_h.to_json
|
||||
end
|
||||
|
||||
def http_exists?(url, headers)
|
||||
res = HTTParty.head(url, Danbooru.config.httparty_options.deep_merge(headers: headers))
|
||||
res.success?
|
||||
def http_exists?(url, headers = {})
|
||||
http.headers(headers).head(url).status.success?
|
||||
end
|
||||
|
||||
# Convert commentary to dtext by stripping html tags. Sites can override
|
||||
|
||||
@@ -64,11 +64,10 @@ module Sources
|
||||
def page
|
||||
return nil if page_url.blank?
|
||||
|
||||
doc = Cache.get("hentai-foundry:#{page_url}", 1.minute) do
|
||||
HTTParty.get("#{page_url}?enterAgree=1").body
|
||||
end
|
||||
response = Danbooru::Http.new.cache(1.minute).get("#{page_url}?enterAgree=1")
|
||||
return nil unless response.status == 200
|
||||
|
||||
Nokogiri::HTML(doc)
|
||||
response.parse
|
||||
end
|
||||
|
||||
def tags
|
||||
|
||||
@@ -73,8 +73,7 @@ module Sources
|
||||
end
|
||||
|
||||
def image_url
|
||||
return if image_urls.blank?
|
||||
return url if api_client.blank?
|
||||
return url if image_urls.blank? || api_client.blank?
|
||||
|
||||
img = case url
|
||||
when DIRECT || CDN_DIRECT then "https://seiga.nicovideo.jp/image/source/#{image_id_from_url(url)}"
|
||||
@@ -83,7 +82,7 @@ module Sources
|
||||
end
|
||||
|
||||
resp = api_client.get(img)
|
||||
if resp.headers["Location"] =~ %r{https?://.+/(\w+/\d+/\d+)\z}i
|
||||
if resp.uri.to_s =~ %r{https?://.+/(\w+/\d+/\d+)\z}i
|
||||
"https://lohas.nicoseiga.jp/priv/#{$1}"
|
||||
else
|
||||
img
|
||||
@@ -181,12 +180,12 @@ module Sources
|
||||
|
||||
def api_client
|
||||
if illust_id.present?
|
||||
NicoSeigaApiClient.new(work_id: illust_id, type: "illust")
|
||||
NicoSeigaApiClient.new(work_id: illust_id, type: "illust", http: http)
|
||||
elsif manga_id.present?
|
||||
NicoSeigaApiClient.new(work_id: manga_id, type: "manga")
|
||||
NicoSeigaApiClient.new(work_id: manga_id, type: "manga", http: http)
|
||||
elsif image_id.present?
|
||||
# We default to illust to attempt getting the api anyway
|
||||
NicoSeigaApiClient.new(work_id: image_id, type: "illust")
|
||||
NicoSeigaApiClient.new(work_id: image_id, type: "illust", http: http)
|
||||
end
|
||||
end
|
||||
memoize :api_client
|
||||
|
||||
@@ -178,54 +178,21 @@ module Sources
|
||||
def page
|
||||
return nil if page_url.blank?
|
||||
|
||||
doc = agent.get(page_url)
|
||||
http = Danbooru::Http.new
|
||||
form = { email: Danbooru.config.nijie_login, password: Danbooru.config.nijie_password }
|
||||
|
||||
if doc.search("div#header-login-container").any?
|
||||
# Session cache is invalid, clear it and log in normally.
|
||||
Cache.delete("nijie-session")
|
||||
doc = agent.get(page_url)
|
||||
end
|
||||
# XXX `retriable` must come after `cache` so that retries don't return cached error responses.
|
||||
response = http.cache(1.hour).use(retriable: { max_retries: 20 }).post("https://nijie.info/login_int.php", form: form)
|
||||
DanbooruLogger.info "Nijie login failed (#{url}, #{response.status})" if response.status != 200
|
||||
return nil unless response.status == 200
|
||||
|
||||
doc
|
||||
rescue Mechanize::ResponseCodeError => e
|
||||
return nil if e.response_code.to_i == 404
|
||||
raise
|
||||
response = http.cookies(R18: 1).cache(1.minute).get(page_url)
|
||||
return nil unless response.status == 200
|
||||
|
||||
response&.parse
|
||||
end
|
||||
|
||||
memoize :page
|
||||
|
||||
def agent
|
||||
mech = Mechanize.new
|
||||
|
||||
session = Cache.get("nijie-session")
|
||||
if session
|
||||
cookie = Mechanize::Cookie.new("NIJIEIJIEID", session)
|
||||
cookie.domain = ".nijie.info"
|
||||
cookie.path = "/"
|
||||
mech.cookie_jar.add(cookie)
|
||||
else
|
||||
mech.get("https://nijie.info/login.php") do |page|
|
||||
page.form_with(:action => "/login_int.php") do |form|
|
||||
form['email'] = Danbooru.config.nijie_login
|
||||
form['password'] = Danbooru.config.nijie_password
|
||||
end.click_button
|
||||
end
|
||||
session = mech.cookie_jar.cookies.select {|c| c.name == "NIJIEIJIEID"}.first
|
||||
Cache.put("nijie-session", session.value, 1.day) if session
|
||||
end
|
||||
|
||||
# This cookie needs to be set to allow viewing of adult works while anonymous
|
||||
cookie = Mechanize::Cookie.new("R18", "1")
|
||||
cookie.domain = ".nijie.info"
|
||||
cookie.path = "/"
|
||||
mech.cookie_jar.add(cookie)
|
||||
|
||||
mech
|
||||
rescue Mechanize::ResponseCodeError => e
|
||||
raise unless e.response_code.to_i == 429
|
||||
sleep(5)
|
||||
retry
|
||||
end
|
||||
memoize :agent
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -47,7 +47,7 @@ module Sources
|
||||
when %r{\Ahttps?://c(?:s|han|[1-4])\.sankakucomplex\.com/data(?:/sample)?/(?:[a-f0-9]{2}/){2}(?:sample-|preview)?([a-f0-9]{32})}i
|
||||
"https://chan.sankakucomplex.com/en/post/show?md5=#{$1}"
|
||||
|
||||
when %r{\Ahttps?://(?:www|s(?:tatic|[1-4]))\.zerochan\.net/.+(?:\.|\/)(\d+)(?:\.(?:jpe?g?))?\z}i
|
||||
when %r{\Ahttps?://(?:www|s(?:tatic|[1-4]))\.zerochan\.net/.+(?:\.|\/)(\d+)(?:\.(?:jpe?g?|png))?\z}i
|
||||
"https://www.zerochan.net/#{$1}#full"
|
||||
|
||||
when %r{\Ahttps?://static[1-6]?\.minitokyo\.net/(?:downloads|view)/(?:\d{2}/){2}(\d+)}i
|
||||
|
||||
@@ -64,9 +64,6 @@ module Sources
|
||||
ORIG_IMAGE = %r{#{PXIMG}/img-original/img/#{DATE}/(?<illust_id>\d+)_p(?<page>\d+)\.#{EXT}\z}i
|
||||
STACC_PAGE = %r{\A#{WEB}/stacc/#{MONIKER}/?\z}i
|
||||
NOVEL_PAGE = %r{(?:\Ahttps?://www\.pixiv\.net/novel/show\.php\?id=(\d+))}
|
||||
FANBOX_ACCOUNT = %r{(?:\Ahttps?://www\.pixiv\.net/fanbox/creator/\d+\z)}
|
||||
FANBOX_IMAGE = %r{(?:\Ahttps?://fanbox\.pixiv\.net/images/post/(\d+))}
|
||||
FANBOX_PAGE = %r{(?:\Ahttps?://www\.pixiv\.net/fanbox/creator/\d+/post/(\d+))}
|
||||
|
||||
def self.to_dtext(text)
|
||||
if text.nil?
|
||||
@@ -127,14 +124,6 @@ module Sources
|
||||
return "https://www.pixiv.net/novel/show.php?id=#{novel_id}&mode=cover"
|
||||
end
|
||||
|
||||
if fanbox_id.present?
|
||||
return "https://www.pixiv.net/fanbox/creator/#{metadata.user_id}/post/#{fanbox_id}"
|
||||
end
|
||||
|
||||
if fanbox_account_id.present?
|
||||
return "https://www.pixiv.net/fanbox/creator/#{fanbox_account_id}"
|
||||
end
|
||||
|
||||
if illust_id.present?
|
||||
return "https://www.pixiv.net/artworks/#{illust_id}"
|
||||
end
|
||||
@@ -192,17 +181,7 @@ module Sources
|
||||
end
|
||||
|
||||
def headers
|
||||
if fanbox_id.present?
|
||||
# need the session to download fanbox images
|
||||
return {
|
||||
"Referer" => "https://www.pixiv.net/fanbox",
|
||||
"Cookie" => HTTP::Cookie.cookie_value(agent.cookies)
|
||||
}
|
||||
end
|
||||
|
||||
{
|
||||
"Referer" => "https://www.pixiv.net"
|
||||
}
|
||||
{ "Referer" => "https://www.pixiv.net" }
|
||||
end
|
||||
|
||||
def normalize_for_source
|
||||
@@ -242,10 +221,6 @@ module Sources
|
||||
end
|
||||
|
||||
def image_urls_sub
|
||||
if url =~ FANBOX_IMAGE
|
||||
return [url]
|
||||
end
|
||||
|
||||
# there's too much normalization bullshit we have to deal with
|
||||
# raw urls, so just fetch the canonical url from the api every
|
||||
# time.
|
||||
@@ -265,7 +240,7 @@ module Sources
|
||||
# even though it makes sense to reference page_url here, it will only look
|
||||
# at (url, referer_url).
|
||||
def illust_id
|
||||
return nil if novel_id.present? || fanbox_id.present?
|
||||
return nil if novel_id.present?
|
||||
|
||||
parsed_urls.each do |url|
|
||||
# http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054
|
||||
@@ -328,46 +303,11 @@ module Sources
|
||||
end
|
||||
memoize :novel_id
|
||||
|
||||
def fanbox_id
|
||||
[url, referer_url].each do |x|
|
||||
if x =~ FANBOX_PAGE
|
||||
return $1
|
||||
end
|
||||
|
||||
if x =~ FANBOX_IMAGE
|
||||
return $1
|
||||
end
|
||||
end
|
||||
|
||||
nil
|
||||
end
|
||||
memoize :fanbox_id
|
||||
|
||||
def fanbox_account_id
|
||||
[url, referer_url].each do |x|
|
||||
if x =~ FANBOX_ACCOUNT
|
||||
return x
|
||||
end
|
||||
end
|
||||
|
||||
nil
|
||||
end
|
||||
memoize :fanbox_account_id
|
||||
|
||||
def agent
|
||||
PixivWebAgent.build
|
||||
end
|
||||
memoize :agent
|
||||
|
||||
def metadata
|
||||
if novel_id.present?
|
||||
return PixivApiClient.new.novel(novel_id)
|
||||
end
|
||||
|
||||
if fanbox_id.present?
|
||||
return PixivApiClient.new.fanbox(fanbox_id)
|
||||
end
|
||||
|
||||
PixivApiClient.new.work(illust_id)
|
||||
end
|
||||
memoize :metadata
|
||||
|
||||
@@ -23,7 +23,7 @@ module Sources::Strategies
|
||||
OLD_IMAGE = %r{\Ahttps?://#{DOMAIN}/(?<dir>#{MD5}/)?#{FILENAME}_(?<size>\w+)\.#{EXT}\z}i
|
||||
|
||||
IMAGE = %r{\Ahttps?://#{DOMAIN}/}i
|
||||
VIDEO = %r{\Ahttps?://(?:vtt|ve\.media)\.tumblr\.com/}i
|
||||
VIDEO = %r{\Ahttps?://(?:vtt|ve|va\.media)\.tumblr\.com/}i
|
||||
POST = %r{\Ahttps?://(?<blog_name>[^.]+)\.tumblr\.com/(?:post|image)/(?<post_id>\d+)}i
|
||||
|
||||
def self.enabled?
|
||||
@@ -168,7 +168,7 @@ module Sources::Strategies
|
||||
end
|
||||
|
||||
candidates.find do |candidate|
|
||||
http_exists?(candidate, headers)
|
||||
http_exists?(candidate)
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -200,7 +200,7 @@ module Sources::Strategies
|
||||
end
|
||||
|
||||
def api_response
|
||||
return {} unless self.class.enabled?
|
||||
return {} unless self.class.enabled? && status_id.present?
|
||||
api_client.status(status_id)
|
||||
end
|
||||
|
||||
|
||||
Reference in New Issue
Block a user