nijie: don't crash on invalid urls or deleted works (#3919).
This commit is contained in:
@@ -1,49 +1,81 @@
|
|||||||
|
# Image URLs:
|
||||||
|
#
|
||||||
|
# * https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg (page: https://www.nijie.info/view.php?id=64240)
|
||||||
|
# * https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png (page: https://www.nijie.info/view.php?id=218856)
|
||||||
|
# * https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png (page: http://nijie.info/view.php?id=218856)
|
||||||
|
# * https://pic01.nijie.info/nijie_picture/diff/main/218856_1_236014_20170620101330.png
|
||||||
|
# * https://pic05.nijie.info/nijie_picture/diff/main/559053_20180604023346_1.png (page: http://nijie.info/view_popup.php?id=265428#diff_2)
|
||||||
|
#
|
||||||
|
# Unhandled:
|
||||||
|
#
|
||||||
|
# * https://pic01.nijie.info/nijie_picture/20120211210359.jpg
|
||||||
|
# * https://pic01.nijie.info/nijie_picture/2012021022424020120210.jpg
|
||||||
|
# * https://pic01.nijie.info/nijie_picture/diff/main/2012061023480525712_0.jpg
|
||||||
|
# * https://pic05.nijie.info/dojin_main/dojin_sam/1_2768_20180429004232.png
|
||||||
|
# * https://pic04.nijie.info/horne_picture/diff/main/56095_20160403221810_0.jpg
|
||||||
|
# * https://pic04.nijie.info/omata/4829_20161128012012.png (page: http://nijie.info/view_popup.php?id=33224#diff_3)
|
||||||
|
#
|
||||||
|
# Preview URLs:
|
||||||
|
#
|
||||||
|
# * https://pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
|
||||||
|
# * https://pic03.nijie.info/__rs_l120x120/nijie_picture/236014_20170620101426_0.png
|
||||||
|
# * https://pic03.nijie.info/__rs_l170x170/nijie_picture/236014_20170620101426_0.png
|
||||||
|
# * https://pic03.nijie.info/__rs_l650x650/nijie_picture/236014_20170620101426_0.png
|
||||||
|
# * https://pic03.nijie.info/__rs_cns350x350/nijie_picture/236014_20170620101426_0.png
|
||||||
|
# * https://pic03.nijie.info/small_light(dh=150,dw=150,q=100)/nijie_picture/236014_20170620101426_0.png
|
||||||
|
#
|
||||||
|
# Page URLs:
|
||||||
|
#
|
||||||
|
# * https://nijie.info/view.php?id=167755 (deleted post)
|
||||||
|
# * https://nijie.info/view.php?id=218856
|
||||||
|
# * https://nijie.info/view_popup.php?id=218856
|
||||||
|
# * https://nijie.info/view_popup.php?id=218856#diff_1
|
||||||
|
# * https://www.nijie.info/view.php?id=218856
|
||||||
|
# * https://sp.nijie.info/view.php?id=218856
|
||||||
|
#
|
||||||
|
# Profile URLs
|
||||||
|
#
|
||||||
|
# * https://nijie.info/members.php?id=236014
|
||||||
|
# * https://nijie.info/members_illust.php?id=236014
|
||||||
|
|
||||||
module Sources
|
module Sources
|
||||||
module Strategies
|
module Strategies
|
||||||
class Nijie < Base
|
class Nijie < Base
|
||||||
PROFILE = %r!\Ahttps?://nijie.info/members\.php\?id=(?<member_id>\d+)\z!i
|
BASE_URL = %r!\Ahttps?://(?:[^.]+\.)?nijie\.info!i
|
||||||
PICTURE = %r{pic\d+\.nijie.info/nijie_picture/}
|
PAGE_URL = %r!#{BASE_URL}/view(?:_popup)?\.php\?id=(?<illust_id>\d+)!i
|
||||||
PAGE = %r{\Ahttps?://nijie\.info/view\.php.+id=\d+}
|
PROFILE_URL = %r!#{BASE_URL}/members(?:_illust)?\.php\?id=(?<artist_id>\d+)\z!i
|
||||||
DIFF = %r!\Ahttps?://pic\d+\.nijie\.info/__rs_l120x120/nijie_picture/diff/main/[0-9_]+\.\w+\z!i
|
|
||||||
|
# https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg
|
||||||
|
# https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png
|
||||||
|
# https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png
|
||||||
|
# https://pic05.nijie.info/nijie_picture/diff/main/559053_20180604023346_1.png
|
||||||
|
DIR = %r!(?:__rs_\w+/)?nijie_picture(?:/diff/main)?!
|
||||||
|
FILENAME = %r!(?:(?<illust_id>\d+)_(?<page>\d+_))?(?<artist_id>\d+)_(?<timestamp>\d{14})(?:_\d+)?!i
|
||||||
|
IMAGE_URL = %r!\Ahttps?://pic\d+\.nijie\.info/#{DIR}/#{FILENAME}\.\w+\z!i
|
||||||
|
|
||||||
def self.match?(*urls)
|
def self.match?(*urls)
|
||||||
urls.compact.any? { |x| x.match?(/^https?:\/\/(?:.+?\.)?nijie\.info/) }
|
urls.compact.any? { |x| x.match?(BASE_URL) }
|
||||||
end
|
end
|
||||||
|
|
||||||
def site_name
|
def site_name
|
||||||
"Nijie"
|
"Nijie"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def image_url
|
||||||
|
return normalize_thumbnails(url) if url =~ IMAGE_URL
|
||||||
|
image_urls.first
|
||||||
|
end
|
||||||
|
|
||||||
def image_urls
|
def image_urls
|
||||||
if url =~ PICTURE
|
images = page&.search("div#gallery a > img") || []
|
||||||
return [url]
|
images.map do |img|
|
||||||
end
|
|
||||||
|
|
||||||
# http://pic03.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
|
|
||||||
# => http://pic03.nijie.info/nijie_picture/diff/main/218856_3_236014_20170620101331.png
|
|
||||||
if url =~ DIFF
|
|
||||||
return [normalize_thumbnails(url)]
|
|
||||||
end
|
|
||||||
|
|
||||||
page.search("div#gallery a > img").map do |img|
|
|
||||||
# //pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
|
|
||||||
# => https://pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
|
|
||||||
normalize_thumbnails("https:" + img.attr("src"))
|
normalize_thumbnails("https:" + img.attr("src"))
|
||||||
end.uniq
|
end.uniq
|
||||||
end
|
end
|
||||||
|
|
||||||
def page_url
|
def page_url
|
||||||
[url, referer_url].each do |x|
|
return nil if illust_id.blank?
|
||||||
if x =~ PAGE
|
"https://nijie.info/view.php?id=#{illust_id}"
|
||||||
return x
|
|
||||||
end
|
|
||||||
|
|
||||||
if x =~ %r!https?://nijie\.info/view_popup\.php.+id=(\d+)!
|
|
||||||
return "https://nijie.info/view.php?id=#{$1}"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return super
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def profile_url
|
def profile_url
|
||||||
@@ -59,35 +91,25 @@ module Sources
|
|||||||
end
|
end
|
||||||
|
|
||||||
def artist_name
|
def artist_name
|
||||||
links = page.search("a.name")
|
page&.search("a.name")&.first&.text
|
||||||
|
|
||||||
if links.any?
|
|
||||||
return links[0].text
|
|
||||||
end
|
|
||||||
|
|
||||||
return nil
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def artist_commentary_title
|
def artist_commentary_title
|
||||||
page.search("h2.illust_title").text
|
page&.search("h2.illust_title")&.text
|
||||||
end
|
end
|
||||||
|
|
||||||
def artist_commentary_desc
|
def artist_commentary_desc
|
||||||
page.search('meta[property="og:description"]').attr("content").value
|
page&.search('meta[property="og:description"]')&.attr("content")&.value
|
||||||
end
|
end
|
||||||
|
|
||||||
def tags
|
def tags
|
||||||
links = page.search("div#view-tag a").find_all do |node|
|
links = page&.search("div#view-tag a") || []
|
||||||
|
|
||||||
|
links.select do |node|
|
||||||
node["href"] =~ /search\.php/
|
node["href"] =~ /search\.php/
|
||||||
|
end.map do |node|
|
||||||
|
[node.inner_text, "https://nijie.info" + node.attr("href")]
|
||||||
end
|
end
|
||||||
|
|
||||||
if links.any?
|
|
||||||
return links.map do |node|
|
|
||||||
[node.inner_text, "https://nijie.info" + node.attr("href")]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return []
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def unique_id
|
def unique_id
|
||||||
@@ -98,7 +120,7 @@ module Sources
|
|||||||
public
|
public
|
||||||
|
|
||||||
def self.to_dtext(text)
|
def self.to_dtext(text)
|
||||||
text = text.gsub(/\r\n|\r/, "<br>")
|
text = text.to_s.gsub(/\r\n|\r/, "<br>")
|
||||||
DText.from_html(text).strip
|
DText.from_html(text).strip
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -106,7 +128,13 @@ module Sources
|
|||||||
x.gsub(%r!__rs_l120x120/!i, "")
|
x.gsub(%r!__rs_l120x120/!i, "")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def illust_id
|
||||||
|
urls.map { |url| url[PAGE_URL, :illust_id] || url[IMAGE_URL, :illust_id] }.compact.first
|
||||||
|
end
|
||||||
|
|
||||||
def page
|
def page
|
||||||
|
return nil if page_url.blank?
|
||||||
|
|
||||||
doc = agent.get(page_url)
|
doc = agent.get(page_url)
|
||||||
|
|
||||||
if doc.search("div#header-login-container").any?
|
if doc.search("div#header-login-container").any?
|
||||||
@@ -116,6 +144,9 @@ module Sources
|
|||||||
end
|
end
|
||||||
|
|
||||||
return doc
|
return doc
|
||||||
|
rescue Mechanize::ResponseCodeError => e
|
||||||
|
return nil if e.response_code.to_i == 404
|
||||||
|
raise
|
||||||
end
|
end
|
||||||
memoize :page
|
memoize :page
|
||||||
|
|
||||||
|
|||||||
@@ -168,6 +168,12 @@ class ArtistUrlTest < ActiveSupport::TestCase
|
|||||||
|
|
||||||
url = FactoryBot.create(:artist_url, url: "https://nijie.info/members.php?id=161703")
|
url = FactoryBot.create(:artist_url, url: "https://nijie.info/members.php?id=161703")
|
||||||
assert_equal("http://nijie.info/members.php?id=161703/", url.normalized_url)
|
assert_equal("http://nijie.info/members.php?id=161703/", url.normalized_url)
|
||||||
|
|
||||||
|
url = FactoryBot.create(:artist_url, url: "https://www.nijie.info/members_illust.php?id=161703")
|
||||||
|
assert_equal("http://nijie.info/members.php?id=161703/", url.normalized_url)
|
||||||
|
|
||||||
|
url = FactoryBot.create(:artist_url, url: "https://nijie.info/invalid.php")
|
||||||
|
assert_equal("http://nijie.info/invalid.php/", url.normalized_url)
|
||||||
end
|
end
|
||||||
|
|
||||||
context "#search method" do
|
context "#search method" do
|
||||||
|
|||||||
@@ -131,6 +131,62 @@ module Sources
|
|||||||
assert_equal(image_url, site.image_url)
|
assert_equal(image_url, site.image_url)
|
||||||
assert_equal(image_url, site.canonical_url)
|
assert_equal(image_url, site.canonical_url)
|
||||||
assert_equal("https://nijie.info/members.php?id=236014", site.profile_url)
|
assert_equal("https://nijie.info/members.php?id=236014", site.profile_url)
|
||||||
|
assert_nothing_raised { site.to_h }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context "An image url that contains the illust id" do
|
||||||
|
should "fetch all the data" do
|
||||||
|
site = Sources::Strategies.find("https://pic03.nijie.info/nijie_picture/diff/main/218856_4_236014_20170620101333.png")
|
||||||
|
|
||||||
|
assert_equal("https://nijie.info/view.php?id=218856", site.page_url)
|
||||||
|
assert_equal("https://nijie.info/view.php?id=218856", site.canonical_url)
|
||||||
|
assert_equal("https://nijie.info/members.php?id=236014", site.profile_url)
|
||||||
|
assert_equal("名無しのチンポップ", site.artist_name)
|
||||||
|
assert_equal(site.url, site.image_url)
|
||||||
|
assert_equal(6, site.image_urls.size)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context "An artist profile url" do
|
||||||
|
should "not fail" do
|
||||||
|
site = Sources::Strategies.find("https://nijie.info/members_illust.php?id=236014")
|
||||||
|
assert_equal("https://nijie.info/members.php?id=236014", site.profile_url)
|
||||||
|
assert_nothing_raised { site.to_h }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context "An url that is invalid" do
|
||||||
|
should "not fail" do
|
||||||
|
site = Sources::Strategies.find("http://nijie.info/index.php")
|
||||||
|
assert_nothing_raised { site.to_h }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context "A deleted work" do
|
||||||
|
context "for an image url" do
|
||||||
|
should "find the profile url" do
|
||||||
|
site = Sources::Strategies.find("http://pic01.nijie.info/nijie_picture/diff/main/196201_20150201033106_0.jpg")
|
||||||
|
|
||||||
|
assert_nothing_raised { site.to_h }
|
||||||
|
assert_equal("https://nijie.info/members.php?id=196201", site.profile_url)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context "for a page url" do
|
||||||
|
should "not fail" do
|
||||||
|
site = Sources::Strategies.find("http://www.nijie.info/view_popup.php?id=212355")
|
||||||
|
|
||||||
|
assert_equal("https://nijie.info/view.php?id=212355", site.page_url)
|
||||||
|
assert_nil(site.profile_url)
|
||||||
|
assert_nil(site.artist_name)
|
||||||
|
assert_nil(site.artist_commentary_desc)
|
||||||
|
assert_nil(site.artist_commentary_title)
|
||||||
|
assert_nil(site.image_url)
|
||||||
|
assert_empty(site.image_urls)
|
||||||
|
assert_empty(site.tags)
|
||||||
|
assert_nothing_raised { site.to_h }
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user