Files
danbooru/app/logical/sources/strategies/nijie.rb
evazion d6f7725a1e nijie: fix exception in login process.
Fix an exception when we can't find the 'url' field in the login form
because we're rate limited by Nijie and couldn't scrape the login page.
2022-02-12 17:26:25 -06:00

273 lines
9.3 KiB
Ruby

# frozen_string_literal: true
# Image URLs:
#
# * https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg (page: https://www.nijie.info/view.php?id=64240)
# * https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png (page: https://www.nijie.info/view.php?id=218856)
# * https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png (page: http://nijie.info/view.php?id=218856)
# * https://pic01.nijie.info/nijie_picture/diff/main/218856_1_236014_20170620101330.png
# * https://pic05.nijie.info/nijie_picture/diff/main/559053_20180604023346_1.png (page: http://nijie.info/view_popup.php?id=265428#diff_2)
# * https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png (page: http://nijie.info/view_popup.php?id=287736#diff_2)
#
# * https://pic.nijie.net/03/nijie_picture/236014_20170620101426_0.png (page: https://www.nijie.info/view.php?id=218856)
#
# Unhandled:
#
# * https://pic01.nijie.info/nijie_picture/20120211210359.jpg
# * https://pic01.nijie.info/nijie_picture/2012021022424020120210.jpg
# * https://pic01.nijie.info/nijie_picture/diff/main/2012061023480525712_0.jpg
# * https://pic05.nijie.info/dojin_main/dojin_sam/1_2768_20180429004232.png
# * https://pic04.nijie.info/horne_picture/diff/main/56095_20160403221810_0.jpg
# * https://pic04.nijie.info/omata/4829_20161128012012.png (page: http://nijie.info/view_popup.php?id=33224#diff_3)
#
# Preview URLs:
#
# * https://pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
# * https://pic03.nijie.info/__rs_l120x120/nijie_picture/236014_20170620101426_0.png
# * https://pic03.nijie.info/__rs_l170x170/nijie_picture/236014_20170620101426_0.png
# * https://pic03.nijie.info/__rs_l650x650/nijie_picture/236014_20170620101426_0.png
# * https://pic03.nijie.info/__rs_cns350x350/nijie_picture/236014_20170620101426_0.png
# * https://pic03.nijie.info/small_light(dh=150,dw=150,q=100)/nijie_picture/236014_20170620101426_0.png
#
# Page URLs:
#
# * https://nijie.info/view.php?id=167755 (deleted post)
# * https://nijie.info/view.php?id=218856
# * https://nijie.info/view_popup.php?id=218856
# * https://nijie.info/view_popup.php?id=218856#diff_1
# * https://www.nijie.info/view.php?id=218856
# * https://sp.nijie.info/view.php?id=218856
#
# Profile URLs
#
# * https://nijie.info/members.php?id=236014
# * https://nijie.info/members_illust.php?id=236014
#
# Doujin
# http://nijie.info/view.php?id=384548
# http://pic.nijie.net/01/dojin_main/dojin_sam/20120213044700%E3%82%B3%E3%83%94%E3%83%BC%20%EF%BD%9E%200011%E3%81%AE%E3%82%B3%E3%83%94%E3%83%BC.jpg (NSFW)
# http://pic.nijie.net/01/__rs_l120x120/dojin_main/dojin_sam/20120213044700%E3%82%B3%E3%83%94%E3%83%BC%20%EF%BD%9E%200011%E3%81%AE%E3%82%B3%E3%83%94%E3%83%BC.jpg
module Sources
module Strategies
class Nijie < Base
BASE_URL = %r{\Ahttps?://(?:[^.]+\.)?nijie\.info}i
PAGE_URL = %r{#{BASE_URL}/view(?:_popup)?\.php\?id=(?<illust_id>\d+)}i
PROFILE_URL = %r{#{BASE_URL}/members(?:_illust)?\.php\?id=(?<artist_id>\d+)\z}i
# https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg
# https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png
# http://pic.nijie.net/03/nijie_picture/829001_20190620004513_0.mp4
# https://pic05.nijie.info/nijie_picture/diff/main/559053_20180604023346_1.png
FILENAME1 = /(?<artist_id>\d+)_(?<timestamp>\d{14})(?:_\d+)?/i
# https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png
FILENAME2 = /(?<illust_id>\d+)_\d+_(?<artist_id>\d+)_(?<timestamp>\d{14})/i
# https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png
FILENAME3 = /(?<illust_id>\d+)_(?<artist_id>\d+)_(?<timestamp>\d{14})_\d+/i
IMAGE_BASE_URL = %r{\Ahttps?://(?:pic\d+\.nijie\.info|pic\.nijie\.net)}i
DIR = %r{(?:\d+/)?(?:__rs_\w+/)?nijie_picture(?:/diff/main)?}
IMAGE_URL = %r{#{IMAGE_BASE_URL}/#{DIR}/#{Regexp.union(FILENAME1, FILENAME2, FILENAME3)}\.\w+\z}i
DOJIN_DIR = %r{(?:\d+/)?(?:__rs_\w+/)?dojin_main(?:/dojin_sam)?}i
DOJIN_URL = %r{#{IMAGE_BASE_URL}/#{DOJIN_DIR}/.*\.\w+\z}i
def self.enabled?
Danbooru.config.nijie_login.present? && Danbooru.config.nijie_password.present?
end
def domains
["nijie.info", "nijie.net"]
end
def site_name
"Nijie"
end
def image_url
return to_full_image_url(url) if url =~ IMAGE_URL || url =~ DOJIN_URL
return url if url =~ IMAGE_BASE_URL
image_urls.first
end
def image_urls
if doujin?
images = page&.search("#dojin_left .left img").to_a.map { |img| img["src"] }
images += page&.search("#dojin_diff img.mozamoza").to_a.map { |img| img["data-original"] }
else
images = page&.search("div#gallery a > .mozamoza").to_a.map { |img| img["src"] }
end
# Can't use URI.join here because nijie urls may contain japanese characters
images = images.map { |img| "https:#{img}" }
images = [url] if url.match?(IMAGE_URL) && images.empty?
images.map(&method(:to_full_image_url)).uniq
end
def preview_url
return nil if image_url.blank?
to_preview_url(image_url)
end
def preview_urls
image_urls.map(&method(:to_preview_url))
end
def page_url
return nil if illust_id.blank?
"https://nijie.info/view.php?id=#{illust_id}"
end
def profile_url
return nil if artist_id.blank?
"https://nijie.info/members.php?id=#{artist_id}"
end
def artist_name
if doujin?
page&.at("#dojin_left .right a[href*='members.php?id=']")&.text
else
page&.at("a.name")&.text
end
end
def artist_commentary_title
if doujin?
page&.search("#dojin_text p.title")&.text
else
page&.search("h2.illust_title")&.text
end
end
def artist_commentary_desc
if doujin?
page&.search("#dojin_text p:not(.title)")&.to_html
else
page&.search('#illust_text > p')&.to_html
end
end
def tags
links = page&.search("div#view-tag a") || []
search_links = links.select do |node|
node["href"] =~ /search(?:_dojin)?\.php/
end
search_links.map do |node|
[node.inner_text, "https://nijie.info" + node.attr("href")]
end
end
def tag_name
"nijie" + artist_id.to_s
end
def self.to_dtext(text)
text = text.to_s.gsub(/\r\n|\r/, "<br>")
dtext = DText.from_html(text) do |element|
if element.name == "a" && element["href"]&.start_with?("/jump.php")
element["href"] = element.text
end
end
dtext.strip
end
def to_full_image_url(x)
x.gsub(%r{__rs_\w+/}i, "").gsub(/\Ahttp:/, "https:")
end
def to_preview_url(url)
url.gsub(/nijie_picture/, "__rs_l170x170/nijie_picture").gsub(/\Ahttp:/, "https:")
end
def illust_id
urls.map { |url| url[PAGE_URL, :illust_id] || url[IMAGE_URL, :illust_id] }.compact.first
end
def artist_id_from_url
urls.map { |url| url[IMAGE_URL, :artist_id] || url[PROFILE_URL, :artist_id] }.compact.first
end
def artist_id_from_page
page&.search("a.name")&.first&.attr("href")&.match(/members\.php\?id=(\d+)/) { $1.to_i }
end
def artist_id
artist_id_from_url || artist_id_from_page
end
def normalize_for_source
return if illust_id.blank?
"https://nijie.info/view.php?id=#{illust_id}"
end
def doujin?
page&.at("#dojin_left").present?
end
def page
return nil if page_url.blank? || client.blank?
response = client.cache(1.minute).get(page_url)
if response.status != 200 || response.parse.search("#login_illust").present?
clear_cached_session_cookie!
else
response.parse
end
end
memoize :page
def client
return nil if cached_session_cookie.nil?
http.cookies(R18: 1, **cached_session_cookie)
end
def http
super.timeout(60).use(retriable: { max_retries: 20 })
end
# { "NIJIEIJIEID" => "5ca3f816c0c1f3e647940b08b8ab7a45", "nijie_tok" => <long-base64-string> }
def cached_session_cookie
Cache.get("nijie-session-cookie", 60.minutes, skip_nil: true) do
session_cookie
end
end
def clear_cached_session_cookie!
flush_cache # clear memoized session cookie
Cache.delete("nijie-session-cookie")
end
def session_cookie
login_page = http.get("https://nijie.info/login.php").parse
form = {
email: Danbooru.config.nijie_login,
password: Danbooru.config.nijie_password,
url: login_page.at("input[name='url']")&.fetch("value"),
save: "on",
ticket: ""
}
response = http.post("https://nijie.info/login_int.php", form: form)
if response.status == 200
response.cookies.cookies.map { |cookie| [cookie.name, cookie.value] }.to_h
else
DanbooruLogger.info "Nijie login failed (#{url}, #{response.status})"
nil
end
end
memoize :client, :cached_session_cookie
end
end
end