sources: factor out Source::URL::Nijie.

Also fixes the uploader uploading all images when trying to upload only a
single image in a multi-image work. Caused by `image_urls` incorrectly
returning all images when the source strategy was given a url for a
single image.
This commit is contained in:
evazion
2022-02-27 02:20:35 -06:00
parent 926a8fa81f
commit 317ec886bc
4 changed files with 159 additions and 115 deletions

View File

@@ -25,6 +25,7 @@ module Source
Source::URL::Lofter,
Source::URL::Mastodon,
Source::URL::Moebooru,
Source::URL::Nijie,
Source::URL::Newgrounds,
Source::URL::Plurk,
Source::URL::Skeb,

View File

@@ -0,0 +1,134 @@
# frozen_string_literal: true
# Image URLs:
#
# * https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg (page: https://www.nijie.info/view.php?id=64240)
# * https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png (page: https://www.nijie.info/view.php?id=218856)
# * https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png (page: http://nijie.info/view.php?id=218856)
# * https://pic01.nijie.info/nijie_picture/diff/main/218856_1_236014_20170620101330.png
# * https://pic05.nijie.info/nijie_picture/diff/main/559053_20180604023346_1.png (page: http://nijie.info/view_popup.php?id=265428#diff_2)
# * https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png (page: http://nijie.info/view_popup.php?id=287736#diff_2)
# * https://pic.nijie.net/03/nijie_picture/236014_20170620101426_0.png (page: https://www.nijie.info/view.php?id=218856)
#
# * https://pic.nijie.net/07/nijie/17/95/728995/illust/0_0_403fdd541191110c_c25585.jpg
#
# Unhandled:
#
# * https://pic01.nijie.info/nijie_picture/20120211210359.jpg
# * https://pic01.nijie.info/nijie_picture/2012021022424020120210.jpg
# * https://pic01.nijie.info/nijie_picture/diff/main/2012061023480525712_0.jpg
# * https://pic05.nijie.info/dojin_main/dojin_sam/1_2768_20180429004232.png
# * https://pic04.nijie.info/horne_picture/diff/main/56095_20160403221810_0.jpg
# * https://pic04.nijie.info/omata/4829_20161128012012.png (page: http://nijie.info/view_popup.php?id=33224#diff_3)
#
# Preview URLs:
#
# * https://pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
# * https://pic03.nijie.info/__rs_l120x120/nijie_picture/236014_20170620101426_0.png
# * https://pic03.nijie.info/__rs_l170x170/nijie_picture/236014_20170620101426_0.png
# * https://pic03.nijie.info/__rs_l650x650/nijie_picture/236014_20170620101426_0.png
# * https://pic03.nijie.info/__rs_cns350x350/nijie_picture/236014_20170620101426_0.png
# * https://pic03.nijie.info/small_light(dh=150,dw=150,q=100)/nijie_picture/236014_20170620101426_0.png
#
# Page URLs:
#
# * https://nijie.info/view.php?id=167755 (deleted post)
# * https://nijie.info/view.php?id=218856
# * https://nijie.info/view_popup.php?id=218856
# * https://nijie.info/view_popup.php?id=218856#diff_1
# * https://www.nijie.info/view.php?id=218856
# * https://sp.nijie.info/view.php?id=218856
#
# Profile URLs
#
# * https://nijie.info/members.php?id=236014
# * https://nijie.info/members_illust.php?id=236014
#
# Doujin
#
# * http://nijie.info/view.php?id=384548
# * http://pic.nijie.net/01/dojin_main/dojin_sam/20120213044700%E3%82%B3%E3%83%94%E3%83%BC%20%EF%BD%9E%200011%E3%81%AE%E3%82%B3%E3%83%94%E3%83%BC.jpg (NSFW)
# * http://pic.nijie.net/01/__rs_l120x120/dojin_main/dojin_sam/20120213044700%E3%82%B3%E3%83%94%E3%83%BC%20%EF%BD%9E%200011%E3%81%AE%E3%82%B3%E3%83%94%E3%83%BC.jpg
class Source::URL::Nijie < Source::URL
attr_reader :work_id, :user_id
def self.match?(url)
url.domain.in?(%w[nijie.net nijie.info])
end
def parse
case [domain, *path_segments]
# https://nijie.info/view.php?id=167755 (deleted post)
# https://nijie.info/view.php?id=218856
# https://nijie.info/view_popup.php?id=218856
# https://nijie.info/view_popup.php?id=218856#diff_1
# https://www.nijie.info/view.php?id=218856
# https://sp.nijie.info/view.php?id=218856
in "nijie.info", ("view.php" | "view_popup.php") if params[:id].present?
@work_id = params[:id]
# https://nijie.info/members.php?id=236014
# https://nijie.info/members_illust.php?id=236014
in "nijie.info", ("members.php" | "members_illust.php") if params[:id].present?
@user_id = params[:id]
# https://pic.nijie.net/07/nijie/17/95/728995/illust/0_0_403fdd541191110c_c25585.jpg
in _, "nijie_picture", /^\d{2}$/, "nijie", /^\d{2}$/, /^\d{2}$/, user_id, "illust", _ if image_url?
@user_id = user_id
# https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png (page: http://nijie.info/view.php?id=218856)
# https://pic01.nijie.info/nijie_picture/diff/main/218856_1_236014_20170620101330.png
# https://pic05.nijie.info/nijie_picture/diff/main/559053_20180604023346_1.png (page: http://nijie.info/view_popup.php?id=265428#diff_2)
# https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png (page: http://nijie.info/view_popup.php?id=287736#diff_2)
# https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg (page: https://www.nijie.info/view.php?id=64240)
# https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png (page: https://www.nijie.info/view.php?id=218856)
# https://pic.nijie.net/03/nijie_picture/236014_20170620101426_0.png (page: https://www.nijie.info/view.php?id=218856)
# https://pic.nijie.net/01/nijie_picture/diff/main/196201_20150201033106_0.jpg
in [*, "nijie_picture", *] if image_url?
parse_filename
# http://pic.nijie.net/01/dojin_main/dojin_sam/20120213044700コピー 0011のコピー.jpg (NSFW)
# http://pic.nijie.net/01/__rs_l120x120/dojin_main/dojin_sam/20120213044700コピー 0011のコピー.jpg
in _, /^\d+$/, *subdir, "dojin_main", "dojin_sam", file if image_url?
nil
else
end
end
def parse_filename
case filename.split("_")
# 28310_20131101215959.jpg
# 236014_20170620101426_0.png
# 829001_20190620004513_0.mp4
# 559053_20180604023346_1.png
in /^\d+$/ => user_id, /^\d{14}$/ => timestamp, *rest
@user_id = user_id
# 218856_0_236014_20170620101329.png
in /^\d+$/ => work_id, /^\d+$/, /^\d+$/ => user_id, /^\d{14}$/ => timestamp
@work_id, @user_id = work_id, user_id
# 287736_161475_20181112032855_1.png
in /^\d+$/ => work_id, /^\d+$/ => user_id, /^\d{14}$/ => timestamp, /^\d+$/
@work_id, @user_id = work_id, user_id
else
end
end
def image_url?
subdomain.to_s.starts_with?("pic")
end
def preview_image_url
to_s.gsub(/nijie_picture/, "__rs_l170x170/nijie_picture") if image_url?
end
def full_image_url
to_s.remove(%r{__rs_\w+/}i).gsub("http:", "https:") if image_url?
end
end

View File

@@ -1,118 +1,42 @@
# frozen_string_literal: true
# Image URLs:
#
# * https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg (page: https://www.nijie.info/view.php?id=64240)
# * https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png (page: https://www.nijie.info/view.php?id=218856)
# * https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png (page: http://nijie.info/view.php?id=218856)
# * https://pic01.nijie.info/nijie_picture/diff/main/218856_1_236014_20170620101330.png
# * https://pic05.nijie.info/nijie_picture/diff/main/559053_20180604023346_1.png (page: http://nijie.info/view_popup.php?id=265428#diff_2)
# * https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png (page: http://nijie.info/view_popup.php?id=287736#diff_2)
#
# * https://pic.nijie.net/03/nijie_picture/236014_20170620101426_0.png (page: https://www.nijie.info/view.php?id=218856)
#
# Unhandled:
#
# * https://pic01.nijie.info/nijie_picture/20120211210359.jpg
# * https://pic01.nijie.info/nijie_picture/2012021022424020120210.jpg
# * https://pic01.nijie.info/nijie_picture/diff/main/2012061023480525712_0.jpg
# * https://pic05.nijie.info/dojin_main/dojin_sam/1_2768_20180429004232.png
# * https://pic04.nijie.info/horne_picture/diff/main/56095_20160403221810_0.jpg
# * https://pic04.nijie.info/omata/4829_20161128012012.png (page: http://nijie.info/view_popup.php?id=33224#diff_3)
#
# Preview URLs:
#
# * https://pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
# * https://pic03.nijie.info/__rs_l120x120/nijie_picture/236014_20170620101426_0.png
# * https://pic03.nijie.info/__rs_l170x170/nijie_picture/236014_20170620101426_0.png
# * https://pic03.nijie.info/__rs_l650x650/nijie_picture/236014_20170620101426_0.png
# * https://pic03.nijie.info/__rs_cns350x350/nijie_picture/236014_20170620101426_0.png
# * https://pic03.nijie.info/small_light(dh=150,dw=150,q=100)/nijie_picture/236014_20170620101426_0.png
#
# Page URLs:
#
# * https://nijie.info/view.php?id=167755 (deleted post)
# * https://nijie.info/view.php?id=218856
# * https://nijie.info/view_popup.php?id=218856
# * https://nijie.info/view_popup.php?id=218856#diff_1
# * https://www.nijie.info/view.php?id=218856
# * https://sp.nijie.info/view.php?id=218856
#
# Profile URLs
#
# * https://nijie.info/members.php?id=236014
# * https://nijie.info/members_illust.php?id=236014
#
# Doujin
# http://nijie.info/view.php?id=384548
# http://pic.nijie.net/01/dojin_main/dojin_sam/20120213044700%E3%82%B3%E3%83%94%E3%83%BC%20%EF%BD%9E%200011%E3%81%AE%E3%82%B3%E3%83%94%E3%83%BC.jpg (NSFW)
# http://pic.nijie.net/01/__rs_l120x120/dojin_main/dojin_sam/20120213044700%E3%82%B3%E3%83%94%E3%83%BC%20%EF%BD%9E%200011%E3%81%AE%E3%82%B3%E3%83%94%E3%83%BC.jpg
# @see Source::URL::Nijie
module Sources
module Strategies
class Nijie < Base
BASE_URL = %r{\Ahttps?://(?:[^.]+\.)?nijie\.info}i
PAGE_URL = %r{#{BASE_URL}/view(?:_popup)?\.php\?id=(?<illust_id>\d+)}i
PROFILE_URL = %r{#{BASE_URL}/members(?:_illust)?\.php\?id=(?<artist_id>\d+)\z}i
# https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg
# https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png
# http://pic.nijie.net/03/nijie_picture/829001_20190620004513_0.mp4
# https://pic05.nijie.info/nijie_picture/diff/main/559053_20180604023346_1.png
FILENAME1 = /(?<artist_id>\d+)_(?<timestamp>\d{14})(?:_\d+)?/i
# https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png
FILENAME2 = /(?<illust_id>\d+)_\d+_(?<artist_id>\d+)_(?<timestamp>\d{14})/i
# https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png
FILENAME3 = /(?<illust_id>\d+)_(?<artist_id>\d+)_(?<timestamp>\d{14})_\d+/i
IMAGE_BASE_URL = %r{\Ahttps?://(?:pic\d+\.nijie\.info|pic\.nijie\.net)}i
DIR = %r{(?:\d+/)?(?:__rs_\w+/)?nijie_picture(?:/diff/main)?}
IMAGE_URL = %r{#{IMAGE_BASE_URL}/#{DIR}/#{Regexp.union(FILENAME1, FILENAME2, FILENAME3)}\.\w+\z}i
DOJIN_DIR = %r{(?:\d+/)?(?:__rs_\w+/)?dojin_main(?:/dojin_sam)?}i
DOJIN_URL = %r{#{IMAGE_BASE_URL}/#{DOJIN_DIR}/.*\.\w+\z}i
def self.enabled?
Danbooru.config.nijie_login.present? && Danbooru.config.nijie_password.present?
end
def domains
["nijie.info", "nijie.net"]
def match?
Source::URL::Nijie === parsed_url
end
def site_name
"Nijie"
end
def image_url
return to_full_image_url(url) if url =~ IMAGE_URL || url =~ DOJIN_URL
return url if url =~ IMAGE_BASE_URL
image_urls.first
parsed_url.site_name
end
def image_urls
if doujin?
images = page&.search("#dojin_left .left img").to_a.map { |img| img["src"] }
images += page&.search("#dojin_diff img.mozamoza").to_a.map { |img| img["data-original"] }
if parsed_url.image_url?
[parsed_url.full_image_url]
else
images = page&.search("div#gallery a > .mozamoza").to_a.map { |img| img["src"] }
image_urls_from_page
end
# Can't use URI.join here because nijie urls may contain japanese characters
images = images.map { |img| "https:#{img}" }
images = [url] if images.empty?
images.map(&method(:to_full_image_url)).uniq
end
def preview_url
return nil if image_url.blank?
to_preview_url(image_url)
def image_urls_from_page
if doujin?
images = page&.search("#dojin_left .left img").to_a.pluck("src")
images += page&.search("#dojin_diff img.mozamoza").to_a.pluck("data-original")
else
images = page&.search("div#gallery a > .mozamoza").to_a.pluck("src")
end
images.map { |img| Source::URL.parse("https:#{img}").full_image_url }
end
def preview_urls
image_urls.map(&method(:to_preview_url))
image_urls.map { |url| Source::URL.parse(url).preview_image_url }
end
def page_url
@@ -177,20 +101,12 @@ module Sources
dtext.strip
end
def to_full_image_url(x)
x.gsub(%r{__rs_\w+/}i, "").gsub(/\Ahttp:/, "https:")
end
def to_preview_url(url)
url.gsub(/nijie_picture/, "__rs_l170x170/nijie_picture").gsub(/\Ahttp:/, "https:")
end
def illust_id
urls.map { |url| url[PAGE_URL, :illust_id] || url[IMAGE_URL, :illust_id] }.compact.first
parsed_url.work_id || parsed_referer&.work_id
end
def artist_id_from_url
urls.map { |url| url[IMAGE_URL, :artist_id] || url[PROFILE_URL, :artist_id] }.compact.first
parsed_url.user_id || parsed_referer&.user_id
end
def artist_id_from_page

View File

@@ -128,7 +128,7 @@ module Sources
should "get the preview urls" do
assert_equal("https://pic.nijie.net/03/__rs_l170x170/nijie_picture/728995_20170505014820_0.jpg", @site.preview_url)
assert_equal(["https://pic.nijie.net/07/nijie/17/95/728995/illust/0_0_403fdd541191110c_c25585.jpg"], @site.preview_urls)
assert_equal(["https://pic.nijie.net/03/__rs_l170x170/nijie_picture/728995_20170505014820_0.jpg"], @site.preview_urls)
end
should "get the canonical url" do
@@ -221,9 +221,8 @@ module Sources
assert_equal("https://nijie.info/view.php?id=218856", site.canonical_url)
assert_equal("https://nijie.info/members.php?id=236014", site.profile_url)
assert_equal("名無しのチンポップ", site.artist_name)
assert_equal(site.url, site.image_url)
assert_equal(6, site.image_urls.size)
assert_equal(6, site.preview_urls.size)
assert_equal([site.url], site.image_urls)
assert_equal(["https://pic.nijie.net/03/__rs_l170x170/nijie_picture/diff/main/218856_4_236014_20170620101333.png"], site.preview_urls)
end
end
@@ -235,8 +234,7 @@ module Sources
assert_equal("https://nijie.info/view.php?id=287736", site.canonical_url)
assert_equal("https://nijie.info/members.php?id=161475", site.profile_url)
assert_equal("みな本", site.artist_name)
assert_equal(site.url, site.image_url)
assert_equal(3, site.image_urls.size)
assert_equal([site.url], site.image_urls)
end
end
@@ -321,11 +319,6 @@ module Sources
page = "https://nijie.info/view.php?id=53023"
site = Sources::Strategies.find(image, page)
images = %w[
https://pic.nijie.net/06/nijie/12/34/334/illust/0_0_f16b3c9eac5b1c03_bbe7a0.jpg
https://pic.nijie.net/06/nijie/12/34/334/illust/53023_1_76769657f10e5d57_3e00d7.jpg
https://pic.nijie.net/06/nijie/12/34/334/illust/53023_2_9cb4d9bdf9cc75a6_872020.jpg
]
tags = [%w[中出し https://nijie.info/search_dojin.php?word=%E4%B8%AD%E5%87%BA%E3%81%97],
%w[フェラ https://nijie.info/search_dojin.php?word=%E3%83%95%E3%82%A7%E3%83%A9],
%w[TS https://nijie.info/search_dojin.php?word=TS],
@@ -333,7 +326,7 @@ module Sources
assert(true, site.doujin?)
assert_equal(image, site.image_url)
assert_equal(images, site.image_urls)
assert_equal([image], site.image_urls)
assert_equal("作品情報", site.artist_commentary_title)
assert_equal("<p>ある日目がさめると女の子になっていたいつき<br>\nそこへ幼馴染の小梅が現れて…<br>\n2010年コミックマーケット78で販売したコピー本のDL版で<br>\n本編18Pの短編マンガです <br>\n</p>", site.artist_commentary_desc)
assert_equal(tags, site.tags)