Nijie tests fail often under parallel testing. This is because every test needs to login to Nijie first, but Nijie rate-limits the login endpoint, so eventually we hit the limit and tests start failing. This is made worse by a thundering herd problem. Eight test processes try to login to Nijie at the same time, but only one succeeds, so the rest sleep and try again, but they all wakeup and try again at the same time, hitting the rate limits again. The workaround is to set the retry limit ridiculously high, higher than we would ideally like in production. Another workaround would be to serialize the Nijie tests in the test suite. This can be done with lockfiles and flock(2). This helps, but we can still hit the rate limit even under serialized execution.
199 lines
7.0 KiB
Ruby
199 lines
7.0 KiB
Ruby
# Image URLs:
|
|
#
|
|
# * https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg (page: https://www.nijie.info/view.php?id=64240)
|
|
# * https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png (page: https://www.nijie.info/view.php?id=218856)
|
|
# * https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png (page: http://nijie.info/view.php?id=218856)
|
|
# * https://pic01.nijie.info/nijie_picture/diff/main/218856_1_236014_20170620101330.png
|
|
# * https://pic05.nijie.info/nijie_picture/diff/main/559053_20180604023346_1.png (page: http://nijie.info/view_popup.php?id=265428#diff_2)
|
|
# * https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png (page: http://nijie.info/view_popup.php?id=287736#diff_2)
|
|
#
|
|
# * https://pic.nijie.net/03/nijie_picture/236014_20170620101426_0.png (page: https://www.nijie.info/view.php?id=218856)
|
|
#
|
|
# Unhandled:
|
|
#
|
|
# * https://pic01.nijie.info/nijie_picture/20120211210359.jpg
|
|
# * https://pic01.nijie.info/nijie_picture/2012021022424020120210.jpg
|
|
# * https://pic01.nijie.info/nijie_picture/diff/main/2012061023480525712_0.jpg
|
|
# * https://pic05.nijie.info/dojin_main/dojin_sam/1_2768_20180429004232.png
|
|
# * https://pic04.nijie.info/horne_picture/diff/main/56095_20160403221810_0.jpg
|
|
# * https://pic04.nijie.info/omata/4829_20161128012012.png (page: http://nijie.info/view_popup.php?id=33224#diff_3)
|
|
#
|
|
# Preview URLs:
|
|
#
|
|
# * https://pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
|
|
# * https://pic03.nijie.info/__rs_l120x120/nijie_picture/236014_20170620101426_0.png
|
|
# * https://pic03.nijie.info/__rs_l170x170/nijie_picture/236014_20170620101426_0.png
|
|
# * https://pic03.nijie.info/__rs_l650x650/nijie_picture/236014_20170620101426_0.png
|
|
# * https://pic03.nijie.info/__rs_cns350x350/nijie_picture/236014_20170620101426_0.png
|
|
# * https://pic03.nijie.info/small_light(dh=150,dw=150,q=100)/nijie_picture/236014_20170620101426_0.png
|
|
#
|
|
# Page URLs:
|
|
#
|
|
# * https://nijie.info/view.php?id=167755 (deleted post)
|
|
# * https://nijie.info/view.php?id=218856
|
|
# * https://nijie.info/view_popup.php?id=218856
|
|
# * https://nijie.info/view_popup.php?id=218856#diff_1
|
|
# * https://www.nijie.info/view.php?id=218856
|
|
# * https://sp.nijie.info/view.php?id=218856
|
|
#
|
|
# Profile URLs
|
|
#
|
|
# * https://nijie.info/members.php?id=236014
|
|
# * https://nijie.info/members_illust.php?id=236014
|
|
|
|
module Sources
|
|
module Strategies
|
|
class Nijie < Base
|
|
BASE_URL = %r{\Ahttps?://(?:[^.]+\.)?nijie\.info}i
|
|
PAGE_URL = %r{#{BASE_URL}/view(?:_popup)?\.php\?id=(?<illust_id>\d+)}i
|
|
PROFILE_URL = %r{#{BASE_URL}/members(?:_illust)?\.php\?id=(?<artist_id>\d+)\z}i
|
|
|
|
# https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg
|
|
# https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png
|
|
# http://pic.nijie.net/03/nijie_picture/829001_20190620004513_0.mp4
|
|
# https://pic05.nijie.info/nijie_picture/diff/main/559053_20180604023346_1.png
|
|
FILENAME1 = /(?<artist_id>\d+)_(?<timestamp>\d{14})(?:_\d+)?/i
|
|
|
|
# https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png
|
|
FILENAME2 = /(?<illust_id>\d+)_\d+_(?<artist_id>\d+)_(?<timestamp>\d{14})/i
|
|
|
|
# https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png
|
|
FILENAME3 = /(?<illust_id>\d+)_(?<artist_id>\d+)_(?<timestamp>\d{14})_\d+/i
|
|
|
|
IMAGE_BASE_URL = %r{\Ahttps?://(?:pic\d+\.nijie\.info|pic\.nijie\.net)}i
|
|
DIR = %r{(?:\d+/)?(?:__rs_\w+/)?nijie_picture(?:/diff/main)?}
|
|
IMAGE_URL = %r{#{IMAGE_BASE_URL}/#{DIR}/#{Regexp.union(FILENAME1, FILENAME2, FILENAME3)}\.\w+\z}i
|
|
|
|
def domains
|
|
["nijie.info", "nijie.net"]
|
|
end
|
|
|
|
def site_name
|
|
"Nijie"
|
|
end
|
|
|
|
def image_url
|
|
return to_full_image_url(url) if url.match?(IMAGE_URL)
|
|
image_urls.first
|
|
end
|
|
|
|
def image_urls
|
|
images = page&.search("div#gallery a > .mozamoza").to_a.map do |img|
|
|
"https:#{img["src"]}"
|
|
end
|
|
|
|
images = [url] if url.match?(IMAGE_URL) && images.empty?
|
|
images.map(&method(:to_full_image_url)).uniq
|
|
end
|
|
|
|
def preview_url
|
|
return nil if image_url.blank?
|
|
to_preview_url(image_url)
|
|
end
|
|
|
|
def preview_urls
|
|
image_urls.map(&method(:to_preview_url))
|
|
end
|
|
|
|
def page_url
|
|
return nil if illust_id.blank?
|
|
"https://nijie.info/view.php?id=#{illust_id}"
|
|
end
|
|
|
|
def profile_url
|
|
return nil if artist_id.blank?
|
|
"https://nijie.info/members.php?id=#{artist_id}"
|
|
end
|
|
|
|
def artist_name
|
|
page&.search("a.name")&.first&.text
|
|
end
|
|
|
|
def artist_commentary_title
|
|
page&.search("h2.illust_title")&.text
|
|
end
|
|
|
|
def artist_commentary_desc
|
|
page&.search('#illust_text > p')&.to_html
|
|
end
|
|
|
|
def tags
|
|
links = page&.search("div#view-tag a") || []
|
|
|
|
search_links = links.select do |node|
|
|
node["href"] =~ /search\.php/
|
|
end
|
|
|
|
search_links.map do |node|
|
|
[node.inner_text, "https://nijie.info" + node.attr("href")]
|
|
end
|
|
end
|
|
|
|
def tag_name
|
|
"nijie" + artist_id.to_s
|
|
end
|
|
|
|
def self.to_dtext(text)
|
|
text = text.to_s.gsub(/\r\n|\r/, "<br>")
|
|
|
|
dtext = DText.from_html(text) do |element|
|
|
if element.name == "a" && element["href"]&.start_with?("/jump.php")
|
|
element["href"] = element.text
|
|
end
|
|
end
|
|
|
|
dtext.strip
|
|
end
|
|
|
|
def to_full_image_url(x)
|
|
x.gsub(%r{__rs_\w+/}i, "").gsub(/\Ahttp:/, "https:")
|
|
end
|
|
|
|
def to_preview_url(url)
|
|
url.gsub(/nijie_picture/, "__rs_l170x170/nijie_picture").gsub(/\Ahttp:/, "https:")
|
|
end
|
|
|
|
def illust_id
|
|
urls.map { |url| url[PAGE_URL, :illust_id] || url[IMAGE_URL, :illust_id] }.compact.first
|
|
end
|
|
|
|
def artist_id_from_url
|
|
urls.map { |url| url[IMAGE_URL, :artist_id] || url[PROFILE_URL, :artist_id] }.compact.first
|
|
end
|
|
|
|
def artist_id_from_page
|
|
page&.search("a.name")&.first&.attr("href")&.match(/members\.php\?id=(\d+)/) { $1.to_i }
|
|
end
|
|
|
|
def artist_id
|
|
artist_id_from_url || artist_id_from_page
|
|
end
|
|
|
|
def normalize_for_source
|
|
return if illust_id.blank?
|
|
|
|
"https://nijie.info/view.php?id=#{illust_id}"
|
|
end
|
|
|
|
def page
|
|
return nil if page_url.blank?
|
|
|
|
http = Danbooru::Http.new
|
|
form = { email: Danbooru.config.nijie_login, password: Danbooru.config.nijie_password }
|
|
|
|
# XXX `retriable` must come after `cache` so that retries don't return cached error responses.
|
|
response = http.cache(1.hour).use(retriable: { max_retries: 20 }).post("https://nijie.info/login_int.php", form: form)
|
|
DanbooruLogger.info "Nijie login failed (#{url}, #{response.status})" if response.status != 200
|
|
return nil unless response.status == 200
|
|
|
|
response = http.cookies(R18: 1).cache(1.minute).get(page_url)
|
|
return nil unless response.status == 200
|
|
|
|
response&.parse
|
|
end
|
|
|
|
memoize :page
|
|
end
|
|
end
|
|
end
|