Bug: if a Nijie login failed with a 429 Too Many Requests error, the error would get cached, so when we retried the request, we would just get our own cached response back every time. The 429 error would eventually be passed up to the Nijie strategy, which caused random methods to fail because they couldn't get the html page. Fix: add the `retriable` feature *after* the `cache` feature so that retries don't go through the cache. This is a hack. We want retries to go at the bottom of the stack, below caching, but we can't enforce this ordering.
199 lines
6.9 KiB
Ruby
199 lines
6.9 KiB
Ruby
# Image URLs:
|
|
#
|
|
# * https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg (page: https://www.nijie.info/view.php?id=64240)
|
|
# * https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png (page: https://www.nijie.info/view.php?id=218856)
|
|
# * https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png (page: http://nijie.info/view.php?id=218856)
|
|
# * https://pic01.nijie.info/nijie_picture/diff/main/218856_1_236014_20170620101330.png
|
|
# * https://pic05.nijie.info/nijie_picture/diff/main/559053_20180604023346_1.png (page: http://nijie.info/view_popup.php?id=265428#diff_2)
|
|
# * https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png (page: http://nijie.info/view_popup.php?id=287736#diff_2)
|
|
#
|
|
# * https://pic.nijie.net/03/nijie_picture/236014_20170620101426_0.png (page: https://www.nijie.info/view.php?id=218856)
|
|
#
|
|
# Unhandled:
|
|
#
|
|
# * https://pic01.nijie.info/nijie_picture/20120211210359.jpg
|
|
# * https://pic01.nijie.info/nijie_picture/2012021022424020120210.jpg
|
|
# * https://pic01.nijie.info/nijie_picture/diff/main/2012061023480525712_0.jpg
|
|
# * https://pic05.nijie.info/dojin_main/dojin_sam/1_2768_20180429004232.png
|
|
# * https://pic04.nijie.info/horne_picture/diff/main/56095_20160403221810_0.jpg
|
|
# * https://pic04.nijie.info/omata/4829_20161128012012.png (page: http://nijie.info/view_popup.php?id=33224#diff_3)
|
|
#
|
|
# Preview URLs:
|
|
#
|
|
# * https://pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
|
|
# * https://pic03.nijie.info/__rs_l120x120/nijie_picture/236014_20170620101426_0.png
|
|
# * https://pic03.nijie.info/__rs_l170x170/nijie_picture/236014_20170620101426_0.png
|
|
# * https://pic03.nijie.info/__rs_l650x650/nijie_picture/236014_20170620101426_0.png
|
|
# * https://pic03.nijie.info/__rs_cns350x350/nijie_picture/236014_20170620101426_0.png
|
|
# * https://pic03.nijie.info/small_light(dh=150,dw=150,q=100)/nijie_picture/236014_20170620101426_0.png
|
|
#
|
|
# Page URLs:
|
|
#
|
|
# * https://nijie.info/view.php?id=167755 (deleted post)
|
|
# * https://nijie.info/view.php?id=218856
|
|
# * https://nijie.info/view_popup.php?id=218856
|
|
# * https://nijie.info/view_popup.php?id=218856#diff_1
|
|
# * https://www.nijie.info/view.php?id=218856
|
|
# * https://sp.nijie.info/view.php?id=218856
|
|
#
|
|
# Profile URLs
|
|
#
|
|
# * https://nijie.info/members.php?id=236014
|
|
# * https://nijie.info/members_illust.php?id=236014
|
|
|
|
module Sources
|
|
module Strategies
|
|
class Nijie < Base
|
|
BASE_URL = %r{\Ahttps?://(?:[^.]+\.)?nijie\.info}i
|
|
PAGE_URL = %r{#{BASE_URL}/view(?:_popup)?\.php\?id=(?<illust_id>\d+)}i
|
|
PROFILE_URL = %r{#{BASE_URL}/members(?:_illust)?\.php\?id=(?<artist_id>\d+)\z}i
|
|
|
|
# https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg
|
|
# https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png
|
|
# http://pic.nijie.net/03/nijie_picture/829001_20190620004513_0.mp4
|
|
# https://pic05.nijie.info/nijie_picture/diff/main/559053_20180604023346_1.png
|
|
FILENAME1 = /(?<artist_id>\d+)_(?<timestamp>\d{14})(?:_\d+)?/i
|
|
|
|
# https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png
|
|
FILENAME2 = /(?<illust_id>\d+)_\d+_(?<artist_id>\d+)_(?<timestamp>\d{14})/i
|
|
|
|
# https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png
|
|
FILENAME3 = /(?<illust_id>\d+)_(?<artist_id>\d+)_(?<timestamp>\d{14})_\d+/i
|
|
|
|
IMAGE_BASE_URL = %r{\Ahttps?://(?:pic\d+\.nijie\.info|pic\.nijie\.net)}i
|
|
DIR = %r{(?:\d+/)?(?:__rs_\w+/)?nijie_picture(?:/diff/main)?}
|
|
IMAGE_URL = %r{#{IMAGE_BASE_URL}/#{DIR}/#{Regexp.union(FILENAME1, FILENAME2, FILENAME3)}\.\w+\z}i
|
|
|
|
def domains
|
|
["nijie.info", "nijie.net"]
|
|
end
|
|
|
|
def site_name
|
|
"Nijie"
|
|
end
|
|
|
|
def image_url
|
|
return to_full_image_url(url) if url.match?(IMAGE_URL)
|
|
image_urls.first
|
|
end
|
|
|
|
def image_urls
|
|
images = page&.search("div#gallery a > .mozamoza").to_a.map do |img|
|
|
"https:#{img["src"]}"
|
|
end
|
|
|
|
images = [url] if url.match?(IMAGE_URL) && images.empty?
|
|
images.map(&method(:to_full_image_url)).uniq
|
|
end
|
|
|
|
def preview_url
|
|
return nil if image_url.blank?
|
|
to_preview_url(image_url)
|
|
end
|
|
|
|
def preview_urls
|
|
image_urls.map(&method(:to_preview_url))
|
|
end
|
|
|
|
def page_url
|
|
return nil if illust_id.blank?
|
|
"https://nijie.info/view.php?id=#{illust_id}"
|
|
end
|
|
|
|
def profile_url
|
|
return nil if artist_id.blank?
|
|
"https://nijie.info/members.php?id=#{artist_id}"
|
|
end
|
|
|
|
def artist_name
|
|
page&.search("a.name")&.first&.text
|
|
end
|
|
|
|
def artist_commentary_title
|
|
page&.search("h2.illust_title")&.text
|
|
end
|
|
|
|
def artist_commentary_desc
|
|
page&.search('#illust_text > p')&.to_html
|
|
end
|
|
|
|
def tags
|
|
links = page&.search("div#view-tag a") || []
|
|
|
|
search_links = links.select do |node|
|
|
node["href"] =~ /search\.php/
|
|
end
|
|
|
|
search_links.map do |node|
|
|
[node.inner_text, "https://nijie.info" + node.attr("href")]
|
|
end
|
|
end
|
|
|
|
def tag_name
|
|
"nijie" + artist_id.to_s
|
|
end
|
|
|
|
def self.to_dtext(text)
|
|
text = text.to_s.gsub(/\r\n|\r/, "<br>")
|
|
|
|
dtext = DText.from_html(text) do |element|
|
|
if element.name == "a" && element["href"]&.start_with?("/jump.php")
|
|
element["href"] = element.text
|
|
end
|
|
end
|
|
|
|
dtext.strip
|
|
end
|
|
|
|
def to_full_image_url(x)
|
|
x.gsub(%r{__rs_\w+/}i, "").gsub(/\Ahttp:/, "https:")
|
|
end
|
|
|
|
def to_preview_url(url)
|
|
url.gsub(/nijie_picture/, "__rs_l170x170/nijie_picture").gsub(/\Ahttp:/, "https:")
|
|
end
|
|
|
|
def illust_id
|
|
urls.map { |url| url[PAGE_URL, :illust_id] || url[IMAGE_URL, :illust_id] }.compact.first
|
|
end
|
|
|
|
def artist_id_from_url
|
|
urls.map { |url| url[IMAGE_URL, :artist_id] || url[PROFILE_URL, :artist_id] }.compact.first
|
|
end
|
|
|
|
def artist_id_from_page
|
|
page&.search("a.name")&.first&.attr("href")&.match(/members\.php\?id=(\d+)/) { $1.to_i }
|
|
end
|
|
|
|
def artist_id
|
|
artist_id_from_url || artist_id_from_page
|
|
end
|
|
|
|
def normalize_for_source
|
|
return if illust_id.blank?
|
|
|
|
"https://nijie.info/view.php?id=#{illust_id}"
|
|
end
|
|
|
|
def page
|
|
return nil if page_url.blank?
|
|
|
|
http = Danbooru::Http.new
|
|
form = { email: Danbooru.config.nijie_login, password: Danbooru.config.nijie_password }
|
|
|
|
# XXX `retriable` must come after `cache` so that retries don't return cached error responses.
|
|
response = http.cache(1.hour).use(:retriable).post("https://nijie.info/login_int.php", form: form)
|
|
DanbooruLogger.info "Nijie login failed (#{url}, #{response.status})" if response.status != 200
|
|
return nil unless response.status == 200
|
|
|
|
response = http.cookies(R18: 1).cache(1.minute).get(page_url)
|
|
return nil unless response.status == 200
|
|
|
|
response&.parse
|
|
end
|
|
|
|
memoize :page
|
|
end
|
|
end
|
|
end
|