From 317ec886bc1ea96be34ab1a1d3cb4bd234d8c4f9 Mon Sep 17 00:00:00 2001 From: evazion Date: Sun, 27 Feb 2022 02:20:35 -0600 Subject: [PATCH] sources: factor out Source::URL::Nijie. Also fixes the uploader uploading all images when trying to upload only a single image in a multi-image work. Caused by `image_urls` incorrectly returning all images when the source strategy was given a url for a single image. --- app/logical/source/url.rb | 1 + app/logical/source/url/nijie.rb | 134 ++++++++++++++++++++++++ app/logical/sources/strategies/nijie.rb | 122 ++++----------------- test/unit/sources/nijie_test.rb | 17 +-- 4 files changed, 159 insertions(+), 115 deletions(-) create mode 100644 app/logical/source/url/nijie.rb diff --git a/app/logical/source/url.rb b/app/logical/source/url.rb index da85c210d..89ea83dbd 100644 --- a/app/logical/source/url.rb +++ b/app/logical/source/url.rb @@ -25,6 +25,7 @@ module Source Source::URL::Lofter, Source::URL::Mastodon, Source::URL::Moebooru, + Source::URL::Nijie, Source::URL::Newgrounds, Source::URL::Plurk, Source::URL::Skeb, diff --git a/app/logical/source/url/nijie.rb b/app/logical/source/url/nijie.rb new file mode 100644 index 000000000..05ad6370d --- /dev/null +++ b/app/logical/source/url/nijie.rb @@ -0,0 +1,134 @@ +# frozen_string_literal: true + +# Image URLs: +# +# * https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg (page: https://www.nijie.info/view.php?id=64240) +# * https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png (page: https://www.nijie.info/view.php?id=218856) +# * https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png (page: http://nijie.info/view.php?id=218856) +# * https://pic01.nijie.info/nijie_picture/diff/main/218856_1_236014_20170620101330.png +# * https://pic05.nijie.info/nijie_picture/diff/main/559053_20180604023346_1.png (page: http://nijie.info/view_popup.php?id=265428#diff_2) +# * https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png (page: http://nijie.info/view_popup.php?id=287736#diff_2) +# * https://pic.nijie.net/03/nijie_picture/236014_20170620101426_0.png (page: https://www.nijie.info/view.php?id=218856) +# +# * https://pic.nijie.net/07/nijie/17/95/728995/illust/0_0_403fdd541191110c_c25585.jpg +# +# Unhandled: +# +# * https://pic01.nijie.info/nijie_picture/20120211210359.jpg +# * https://pic01.nijie.info/nijie_picture/2012021022424020120210.jpg +# * https://pic01.nijie.info/nijie_picture/diff/main/2012061023480525712_0.jpg +# * https://pic05.nijie.info/dojin_main/dojin_sam/1_2768_20180429004232.png +# * https://pic04.nijie.info/horne_picture/diff/main/56095_20160403221810_0.jpg +# * https://pic04.nijie.info/omata/4829_20161128012012.png (page: http://nijie.info/view_popup.php?id=33224#diff_3) +# +# Preview URLs: +# +# * https://pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png +# * https://pic03.nijie.info/__rs_l120x120/nijie_picture/236014_20170620101426_0.png +# * https://pic03.nijie.info/__rs_l170x170/nijie_picture/236014_20170620101426_0.png +# * https://pic03.nijie.info/__rs_l650x650/nijie_picture/236014_20170620101426_0.png +# * https://pic03.nijie.info/__rs_cns350x350/nijie_picture/236014_20170620101426_0.png +# * https://pic03.nijie.info/small_light(dh=150,dw=150,q=100)/nijie_picture/236014_20170620101426_0.png +# +# Page URLs: +# +# * https://nijie.info/view.php?id=167755 (deleted post) +# * https://nijie.info/view.php?id=218856 +# * https://nijie.info/view_popup.php?id=218856 +# * https://nijie.info/view_popup.php?id=218856#diff_1 +# * https://www.nijie.info/view.php?id=218856 +# * https://sp.nijie.info/view.php?id=218856 +# +# Profile URLs +# +# * https://nijie.info/members.php?id=236014 +# * https://nijie.info/members_illust.php?id=236014 +# +# Doujin +# +# * http://nijie.info/view.php?id=384548 +# * http://pic.nijie.net/01/dojin_main/dojin_sam/20120213044700%E3%82%B3%E3%83%94%E3%83%BC%20%EF%BD%9E%200011%E3%81%AE%E3%82%B3%E3%83%94%E3%83%BC.jpg (NSFW) +# * http://pic.nijie.net/01/__rs_l120x120/dojin_main/dojin_sam/20120213044700%E3%82%B3%E3%83%94%E3%83%BC%20%EF%BD%9E%200011%E3%81%AE%E3%82%B3%E3%83%94%E3%83%BC.jpg + +class Source::URL::Nijie < Source::URL + attr_reader :work_id, :user_id + + def self.match?(url) + url.domain.in?(%w[nijie.net nijie.info]) + end + + def parse + case [domain, *path_segments] + + # https://nijie.info/view.php?id=167755 (deleted post) + # https://nijie.info/view.php?id=218856 + # https://nijie.info/view_popup.php?id=218856 + # https://nijie.info/view_popup.php?id=218856#diff_1 + # https://www.nijie.info/view.php?id=218856 + # https://sp.nijie.info/view.php?id=218856 + in "nijie.info", ("view.php" | "view_popup.php") if params[:id].present? + @work_id = params[:id] + + # https://nijie.info/members.php?id=236014 + # https://nijie.info/members_illust.php?id=236014 + in "nijie.info", ("members.php" | "members_illust.php") if params[:id].present? + @user_id = params[:id] + + # https://pic.nijie.net/07/nijie/17/95/728995/illust/0_0_403fdd541191110c_c25585.jpg + in _, "nijie_picture", /^\d{2}$/, "nijie", /^\d{2}$/, /^\d{2}$/, user_id, "illust", _ if image_url? + @user_id = user_id + + # https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png (page: http://nijie.info/view.php?id=218856) + # https://pic01.nijie.info/nijie_picture/diff/main/218856_1_236014_20170620101330.png + # https://pic05.nijie.info/nijie_picture/diff/main/559053_20180604023346_1.png (page: http://nijie.info/view_popup.php?id=265428#diff_2) + # https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png (page: http://nijie.info/view_popup.php?id=287736#diff_2) + # https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg (page: https://www.nijie.info/view.php?id=64240) + # https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png (page: https://www.nijie.info/view.php?id=218856) + # https://pic.nijie.net/03/nijie_picture/236014_20170620101426_0.png (page: https://www.nijie.info/view.php?id=218856) + # https://pic.nijie.net/01/nijie_picture/diff/main/196201_20150201033106_0.jpg + in [*, "nijie_picture", *] if image_url? + parse_filename + + # http://pic.nijie.net/01/dojin_main/dojin_sam/20120213044700コピー ~ 0011のコピー.jpg (NSFW) + # http://pic.nijie.net/01/__rs_l120x120/dojin_main/dojin_sam/20120213044700コピー ~ 0011のコピー.jpg + in _, /^\d+$/, *subdir, "dojin_main", "dojin_sam", file if image_url? + nil + + else + end + end + + def parse_filename + case filename.split("_") + + # 28310_20131101215959.jpg + # 236014_20170620101426_0.png + # 829001_20190620004513_0.mp4 + # 559053_20180604023346_1.png + in /^\d+$/ => user_id, /^\d{14}$/ => timestamp, *rest + @user_id = user_id + + # 218856_0_236014_20170620101329.png + in /^\d+$/ => work_id, /^\d+$/, /^\d+$/ => user_id, /^\d{14}$/ => timestamp + @work_id, @user_id = work_id, user_id + + # 287736_161475_20181112032855_1.png + in /^\d+$/ => work_id, /^\d+$/ => user_id, /^\d{14}$/ => timestamp, /^\d+$/ + @work_id, @user_id = work_id, user_id + + else + end + end + + def image_url? + subdomain.to_s.starts_with?("pic") + end + + def preview_image_url + to_s.gsub(/nijie_picture/, "__rs_l170x170/nijie_picture") if image_url? + end + + def full_image_url + to_s.remove(%r{__rs_\w+/}i).gsub("http:", "https:") if image_url? + end +end diff --git a/app/logical/sources/strategies/nijie.rb b/app/logical/sources/strategies/nijie.rb index b8f1fa478..15656d09d 100644 --- a/app/logical/sources/strategies/nijie.rb +++ b/app/logical/sources/strategies/nijie.rb @@ -1,118 +1,42 @@ # frozen_string_literal: true -# Image URLs: -# -# * https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg (page: https://www.nijie.info/view.php?id=64240) -# * https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png (page: https://www.nijie.info/view.php?id=218856) -# * https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png (page: http://nijie.info/view.php?id=218856) -# * https://pic01.nijie.info/nijie_picture/diff/main/218856_1_236014_20170620101330.png -# * https://pic05.nijie.info/nijie_picture/diff/main/559053_20180604023346_1.png (page: http://nijie.info/view_popup.php?id=265428#diff_2) -# * https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png (page: http://nijie.info/view_popup.php?id=287736#diff_2) -# -# * https://pic.nijie.net/03/nijie_picture/236014_20170620101426_0.png (page: https://www.nijie.info/view.php?id=218856) -# -# Unhandled: -# -# * https://pic01.nijie.info/nijie_picture/20120211210359.jpg -# * https://pic01.nijie.info/nijie_picture/2012021022424020120210.jpg -# * https://pic01.nijie.info/nijie_picture/diff/main/2012061023480525712_0.jpg -# * https://pic05.nijie.info/dojin_main/dojin_sam/1_2768_20180429004232.png -# * https://pic04.nijie.info/horne_picture/diff/main/56095_20160403221810_0.jpg -# * https://pic04.nijie.info/omata/4829_20161128012012.png (page: http://nijie.info/view_popup.php?id=33224#diff_3) -# -# Preview URLs: -# -# * https://pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png -# * https://pic03.nijie.info/__rs_l120x120/nijie_picture/236014_20170620101426_0.png -# * https://pic03.nijie.info/__rs_l170x170/nijie_picture/236014_20170620101426_0.png -# * https://pic03.nijie.info/__rs_l650x650/nijie_picture/236014_20170620101426_0.png -# * https://pic03.nijie.info/__rs_cns350x350/nijie_picture/236014_20170620101426_0.png -# * https://pic03.nijie.info/small_light(dh=150,dw=150,q=100)/nijie_picture/236014_20170620101426_0.png -# -# Page URLs: -# -# * https://nijie.info/view.php?id=167755 (deleted post) -# * https://nijie.info/view.php?id=218856 -# * https://nijie.info/view_popup.php?id=218856 -# * https://nijie.info/view_popup.php?id=218856#diff_1 -# * https://www.nijie.info/view.php?id=218856 -# * https://sp.nijie.info/view.php?id=218856 -# -# Profile URLs -# -# * https://nijie.info/members.php?id=236014 -# * https://nijie.info/members_illust.php?id=236014 -# -# Doujin -# http://nijie.info/view.php?id=384548 -# http://pic.nijie.net/01/dojin_main/dojin_sam/20120213044700%E3%82%B3%E3%83%94%E3%83%BC%20%EF%BD%9E%200011%E3%81%AE%E3%82%B3%E3%83%94%E3%83%BC.jpg (NSFW) -# http://pic.nijie.net/01/__rs_l120x120/dojin_main/dojin_sam/20120213044700%E3%82%B3%E3%83%94%E3%83%BC%20%EF%BD%9E%200011%E3%81%AE%E3%82%B3%E3%83%94%E3%83%BC.jpg - +# @see Source::URL::Nijie module Sources module Strategies class Nijie < Base - BASE_URL = %r{\Ahttps?://(?:[^.]+\.)?nijie\.info}i - PAGE_URL = %r{#{BASE_URL}/view(?:_popup)?\.php\?id=(?\d+)}i - PROFILE_URL = %r{#{BASE_URL}/members(?:_illust)?\.php\?id=(?\d+)\z}i - - # https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg - # https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png - # http://pic.nijie.net/03/nijie_picture/829001_20190620004513_0.mp4 - # https://pic05.nijie.info/nijie_picture/diff/main/559053_20180604023346_1.png - FILENAME1 = /(?\d+)_(?\d{14})(?:_\d+)?/i - - # https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png - FILENAME2 = /(?\d+)_\d+_(?\d+)_(?\d{14})/i - - # https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png - FILENAME3 = /(?\d+)_(?\d+)_(?\d{14})_\d+/i - - IMAGE_BASE_URL = %r{\Ahttps?://(?:pic\d+\.nijie\.info|pic\.nijie\.net)}i - DIR = %r{(?:\d+/)?(?:__rs_\w+/)?nijie_picture(?:/diff/main)?} - IMAGE_URL = %r{#{IMAGE_BASE_URL}/#{DIR}/#{Regexp.union(FILENAME1, FILENAME2, FILENAME3)}\.\w+\z}i - - DOJIN_DIR = %r{(?:\d+/)?(?:__rs_\w+/)?dojin_main(?:/dojin_sam)?}i - DOJIN_URL = %r{#{IMAGE_BASE_URL}/#{DOJIN_DIR}/.*\.\w+\z}i - def self.enabled? Danbooru.config.nijie_login.present? && Danbooru.config.nijie_password.present? end - def domains - ["nijie.info", "nijie.net"] + def match? + Source::URL::Nijie === parsed_url end def site_name - "Nijie" - end - - def image_url - return to_full_image_url(url) if url =~ IMAGE_URL || url =~ DOJIN_URL - return url if url =~ IMAGE_BASE_URL - image_urls.first + parsed_url.site_name end def image_urls - if doujin? - images = page&.search("#dojin_left .left img").to_a.map { |img| img["src"] } - images += page&.search("#dojin_diff img.mozamoza").to_a.map { |img| img["data-original"] } + if parsed_url.image_url? + [parsed_url.full_image_url] else - images = page&.search("div#gallery a > .mozamoza").to_a.map { |img| img["src"] } + image_urls_from_page end - - # Can't use URI.join here because nijie urls may contain japanese characters - images = images.map { |img| "https:#{img}" } - images = [url] if images.empty? - images.map(&method(:to_full_image_url)).uniq end - def preview_url - return nil if image_url.blank? - to_preview_url(image_url) + def image_urls_from_page + if doujin? + images = page&.search("#dojin_left .left img").to_a.pluck("src") + images += page&.search("#dojin_diff img.mozamoza").to_a.pluck("data-original") + else + images = page&.search("div#gallery a > .mozamoza").to_a.pluck("src") + end + + images.map { |img| Source::URL.parse("https:#{img}").full_image_url } end def preview_urls - image_urls.map(&method(:to_preview_url)) + image_urls.map { |url| Source::URL.parse(url).preview_image_url } end def page_url @@ -177,20 +101,12 @@ module Sources dtext.strip end - def to_full_image_url(x) - x.gsub(%r{__rs_\w+/}i, "").gsub(/\Ahttp:/, "https:") - end - - def to_preview_url(url) - url.gsub(/nijie_picture/, "__rs_l170x170/nijie_picture").gsub(/\Ahttp:/, "https:") - end - def illust_id - urls.map { |url| url[PAGE_URL, :illust_id] || url[IMAGE_URL, :illust_id] }.compact.first + parsed_url.work_id || parsed_referer&.work_id end def artist_id_from_url - urls.map { |url| url[IMAGE_URL, :artist_id] || url[PROFILE_URL, :artist_id] }.compact.first + parsed_url.user_id || parsed_referer&.user_id end def artist_id_from_page diff --git a/test/unit/sources/nijie_test.rb b/test/unit/sources/nijie_test.rb index 1597044e0..00d4563a6 100644 --- a/test/unit/sources/nijie_test.rb +++ b/test/unit/sources/nijie_test.rb @@ -128,7 +128,7 @@ module Sources should "get the preview urls" do assert_equal("https://pic.nijie.net/03/__rs_l170x170/nijie_picture/728995_20170505014820_0.jpg", @site.preview_url) - assert_equal(["https://pic.nijie.net/07/nijie/17/95/728995/illust/0_0_403fdd541191110c_c25585.jpg"], @site.preview_urls) + assert_equal(["https://pic.nijie.net/03/__rs_l170x170/nijie_picture/728995_20170505014820_0.jpg"], @site.preview_urls) end should "get the canonical url" do @@ -221,9 +221,8 @@ module Sources assert_equal("https://nijie.info/view.php?id=218856", site.canonical_url) assert_equal("https://nijie.info/members.php?id=236014", site.profile_url) assert_equal("名無しのチンポップ", site.artist_name) - assert_equal(site.url, site.image_url) - assert_equal(6, site.image_urls.size) - assert_equal(6, site.preview_urls.size) + assert_equal([site.url], site.image_urls) + assert_equal(["https://pic.nijie.net/03/__rs_l170x170/nijie_picture/diff/main/218856_4_236014_20170620101333.png"], site.preview_urls) end end @@ -235,8 +234,7 @@ module Sources assert_equal("https://nijie.info/view.php?id=287736", site.canonical_url) assert_equal("https://nijie.info/members.php?id=161475", site.profile_url) assert_equal("みな本", site.artist_name) - assert_equal(site.url, site.image_url) - assert_equal(3, site.image_urls.size) + assert_equal([site.url], site.image_urls) end end @@ -321,11 +319,6 @@ module Sources page = "https://nijie.info/view.php?id=53023" site = Sources::Strategies.find(image, page) - images = %w[ - https://pic.nijie.net/06/nijie/12/34/334/illust/0_0_f16b3c9eac5b1c03_bbe7a0.jpg - https://pic.nijie.net/06/nijie/12/34/334/illust/53023_1_76769657f10e5d57_3e00d7.jpg - https://pic.nijie.net/06/nijie/12/34/334/illust/53023_2_9cb4d9bdf9cc75a6_872020.jpg - ] tags = [%w[中出し https://nijie.info/search_dojin.php?word=%E4%B8%AD%E5%87%BA%E3%81%97], %w[フェラ https://nijie.info/search_dojin.php?word=%E3%83%95%E3%82%A7%E3%83%A9], %w[TS https://nijie.info/search_dojin.php?word=TS], @@ -333,7 +326,7 @@ module Sources assert(true, site.doujin?) assert_equal(image, site.image_url) - assert_equal(images, site.image_urls) + assert_equal([image], site.image_urls) assert_equal("作品情報", site.artist_commentary_title) assert_equal("

ある日目がさめると女の子になっていたいつき
\nそこへ幼馴染の小梅が現れて…
\n2010年コミックマーケット78で販売したコピー本のDL版で
\n本編18Pの短編マンガです
\n

", site.artist_commentary_desc) assert_equal(tags, site.tags)