From df0bb70486a4146baf7723cca3773802ddc627e0 Mon Sep 17 00:00:00 2001 From: evazion Date: Tue, 8 Mar 2022 18:09:22 -0600 Subject: [PATCH] sources: factor out Source::URL::PixivSketch. Add upload support for Pixiv Sketch. Fetch tags, commentary, and artist, and rewrite sample images to full images. Authentication isn't required. R18 images are hidden in the browser but visible in the API. --- app/logical/source/url.rb | 1 + app/logical/source/url/pixiv_sketch.rb | 47 ++++++ app/logical/sources/strategies.rb | 1 + app/logical/sources/strategies/pixiv.rb | 8 +- .../sources/strategies/pixiv_sketch.rb | 88 +++++++++++ test/functional/uploads_controller_test.rb | 2 + test/unit/sources/pixiv_sketch_test.rb | 140 ++++++++++++++++++ test/unit/sources/pixiv_test.rb | 6 - 8 files changed, 281 insertions(+), 12 deletions(-) create mode 100644 app/logical/source/url/pixiv_sketch.rb create mode 100644 app/logical/sources/strategies/pixiv_sketch.rb create mode 100644 test/unit/sources/pixiv_sketch_test.rb diff --git a/app/logical/source/url.rb b/app/logical/source/url.rb index b6612906d..11cca3406 100644 --- a/app/logical/source/url.rb +++ b/app/logical/source/url.rb @@ -28,6 +28,7 @@ module Source Source::URL::Moebooru, Source::URL::Nijie, Source::URL::Newgrounds, + Source::URL::PixivSketch, Source::URL::Plurk, Source::URL::Skeb, Source::URL::TwitPic, diff --git a/app/logical/source/url/pixiv_sketch.rb b/app/logical/source/url/pixiv_sketch.rb new file mode 100644 index 000000000..526d31f06 --- /dev/null +++ b/app/logical/source/url/pixiv_sketch.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +module Source + class URL::PixivSketch < Source::URL + attr_reader :work_id, :username, :full_image_url + + def self.match?(url) + url.host.in?(%w[sketch.pixiv.net img-sketch.pixiv.net img-sketch.pximg.net]) + end + + def parse + case [host, *path_segments] + + # https://sketch.pixiv.net/items/5835314698645024323 + in "sketch.pixiv.net", "items", work_id + @work_id = work_id + + # https://img-sketch.pixiv.net/uploads/medium/file/4463372/8906921629213362989.jpg (page: https://sketch.pixiv.net/items/5835314698645024323) + # https://img-sketch.pximg.net/c!/w=540,f=webp:jpeg/uploads/medium/file/4463372/8906921629213362989.jpg + # https://img-sketch.pixiv.net/c/f_540/uploads/medium/file/9986983/8431631593768139653.jpg + in *, "uploads", "medium", "file", dir, file if image_url? + @full_image_url = "https://img-sketch.pixiv.net/uploads/medium/file/#{dir}/#{file}" + + # https://sketch.pixiv.net/@user_ejkv8372 + # https://sketch.pixiv.net/@user_ejkv8372/followings + in "sketch.pixiv.net", /^@/ => username, *rest + @username = username.delete_prefix("@") + + else + end + end + + def image_url? + url.host.in?(%w[img-sketch.pixiv.net img-sketch.pximg.net]) + end + + def page_url + # https://sketch.pixiv.net/items/5835314698645024323 + "https://sketch.pixiv.net/items/#{work_id}" if work_id.present? + end + + def api_url + # https://sketch.pixiv.net/api/items/5835314698645024323.json (won't work in the browser; use curl) + "https://sketch.pixiv.net/api/items/#{work_id}.json" if work_id.present? + end + end +end diff --git a/app/logical/sources/strategies.rb b/app/logical/sources/strategies.rb index 105fb6062..2f949c1fc 100644 --- a/app/logical/sources/strategies.rb +++ b/app/logical/sources/strategies.rb @@ -16,6 +16,7 @@ module Sources Strategies::HentaiFoundry, Strategies::Fanbox, Strategies::Mastodon, + Strategies::PixivSketch, Strategies::Weibo, Strategies::Newgrounds, Strategies::Skeb, diff --git a/app/logical/sources/strategies/pixiv.rb b/app/logical/sources/strategies/pixiv.rb index 7cc714ed2..88b77f0d5 100644 --- a/app/logical/sources/strategies/pixiv.rb +++ b/app/logical/sources/strategies/pixiv.rb @@ -88,16 +88,12 @@ module Sources def match? return false if parsed_url.nil? return false if url.include? "/fanbox/" + return false if Source::URL::PixivSketch === parsed_url parsed_url.domain.in?(domains) || parsed_url.host == "tc-pximg01.techorus-cdn.com" end def site_name - # XXX pixiv sketch should be in a separate strategy - if parsed_url.host.in?(%w[sketch.pixiv.net img-sketch.pixiv.net img-sketch.pximg.net]) - "Pixiv Sketch" - else - "Pixiv" - end + "Pixiv" end def image_urls diff --git a/app/logical/sources/strategies/pixiv_sketch.rb b/app/logical/sources/strategies/pixiv_sketch.rb new file mode 100644 index 000000000..9b8601be8 --- /dev/null +++ b/app/logical/sources/strategies/pixiv_sketch.rb @@ -0,0 +1,88 @@ +# frozen_string_literal: true + +# @see Source::URL::PixivSketch +module Sources + module Strategies + class PixivSketch < Base + extend Memoist + + def match? + Source::URL::PixivSketch === parsed_url + end + + def site_name + parsed_url.site_name + end + + def image_urls + if parsed_url.image_url? + [parsed_url.full_image_url] + else + image_urls_from_api + end + end + + def image_urls_from_api + api_response.dig("data", "media").to_a.pluck("photo").pluck("original").pluck("url2x") + end + + def profile_url + "https://sketch.pixiv.net/@#{artist_name}" if artist_name.present? + end + + def artist_name + api_response.dig("data", "user", "unique_name") + end + + def other_names + [artist_name, display_name].compact + end + + def profile_urls + [profile_url, pixiv_profile_url].compact + end + + def artist_commentary_desc + api_response.dig("data", "text") + end + + def tags + api_response.dig("data", "tags").to_a.map do |tag| + [tag, "https://sketch.pixiv.net/tags/#{tag}"] + end + end + + def display_name + api_response.dig("data", "user", "name") + end + + def pixiv_profile_url + "https://www.pixiv.net/users/#{pixiv_user_id}" if pixiv_user_id.present? + end + + def pixiv_user_id + api_response.dig("data", "user", "pixiv_user_id") + end + + # curl https://sketch.pixiv.net/api/items/5835314698645024323.json | jq + def api_response + return {} if api_url.blank? + + response = http.cache(1.minute).get(api_url) + return {} if response.status == 404 + + response.parse + end + + def page_url + parsed_url.page_url || parsed_referer&.page_url + end + + def api_url + parsed_url.api_url || parsed_referer&.api_url + end + + memoize :api_response + end + end +end diff --git a/test/functional/uploads_controller_test.rb b/test/functional/uploads_controller_test.rb index dcbf0152b..01900ef92 100644 --- a/test/functional/uploads_controller_test.rb +++ b/test/functional/uploads_controller_test.rb @@ -318,6 +318,8 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest should_upload_successfully("https://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364") should_upload_successfully("https://i.pximg.net/img-original/img/2017/08/18/00/09/21/64476642_p0.jpg") + should_upload_successfully("https://sketch.pixiv.net/items/5835314698645024323") + should_upload_successfully("https://noizave.tumblr.com/post/162206271767") should_upload_successfully("https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_1280.png") diff --git a/test/unit/sources/pixiv_sketch_test.rb b/test/unit/sources/pixiv_sketch_test.rb new file mode 100644 index 000000000..8da37f8ff --- /dev/null +++ b/test/unit/sources/pixiv_sketch_test.rb @@ -0,0 +1,140 @@ +require 'test_helper' + +module Sources + class PixivSketchTest < ActiveSupport::TestCase + context "A Pixiv Sketch source" do + should "work for a post with a single image" do + source = Sources::Strategies.find("https://sketch.pixiv.net/items/5835314698645024323") + + assert_equal("Pixiv Sketch", source.site_name) + assert_equal(["https://img-sketch.pixiv.net/uploads/medium/file/9986983/8431631593768139653.jpg"], source.image_urls) + assert_equal("https://sketch.pixiv.net/items/5835314698645024323", source.page_url) + assert_equal("https://sketch.pixiv.net/items/5835314698645024323", source.canonical_url) + assert_equal("https://sketch.pixiv.net/@user_ejkv8372", source.profile_url) + assert_equal(["https://sketch.pixiv.net/@user_ejkv8372", "https://www.pixiv.net/users/44772126"], source.profile_urls) + assert_equal("user_ejkv8372", source.artist_name) + assert_equal(["user_ejkv8372", "サコ"], source.other_names) + assert_equal("🍻シャンクスずミホヌク誕生日おめでずう🍻過去絵 ", source.artist_commentary_desc) + assert_equal([], source.tags.map(&:first)) + assert_nothing_raised { source.to_h } + end + + should "work for an image url without a referer" do + # page: https://sketch.pixiv.net/items/8052785510155853613 + source = Sources::Strategies.find("https://img-sketch.pixiv.net/uploads/medium/file/9988973/7216948861306830496.jpg") + + assert_equal(["https://img-sketch.pixiv.net/uploads/medium/file/9988973/7216948861306830496.jpg"], source.image_urls) + assert_nil(source.page_url) + assert_equal(source.url, source.canonical_url) + assert_nil(source.profile_url) + assert_equal([], source.profile_urls) + assert_nil(source.artist_name) + assert_equal([], source.other_names) + assert_nil(source.artist_commentary_desc) + assert_equal([], source.tags.map(&:first)) + assert_nothing_raised { source.to_h } + end + + should "work for an image url with a referer" do + source = Sources::Strategies.find("https://img-sketch.pixiv.net/uploads/medium/file/9988973/7216948861306830496.jpg", "https://sketch.pixiv.net/items/8052785510155853613") + + assert_equal("https://sketch.pixiv.net/items/8052785510155853613", source.page_url) + assert_equal("https://sketch.pixiv.net/items/8052785510155853613", source.canonical_url) + assert_equal("https://sketch.pixiv.net/@op-one", source.profile_url) + assert_equal(["https://sketch.pixiv.net/@op-one", "https://www.pixiv.net/users/5903369"], source.profile_urls) + assert_equal("op-one", source.artist_name) + assert_equal(["op-one", "俺P号"], source.other_names) + assert_match(/\A3月3日は「うさぎの日」らしいので/, source.artist_commentary_desc) + assert_equal(%w[制䜜過皋 このすば この玠晎らしい䞖界に祝犏を セナ バニヌ 3月3日 å·šä¹³ 黒髪巚乳 タむツ], source.tags.map(&:first)) + assert_nothing_raised { source.to_h } + end + + should "work for a NSFW post" do + source = Sources::Strategies.find("https://sketch.pixiv.net/items/193462611994864256") + + assert_equal(["https://img-sketch.pixiv.net/uploads/medium/file/884876/4909517173982299587.jpg"], source.image_urls) + assert_equal("https://sketch.pixiv.net/items/193462611994864256", source.page_url) + assert_equal("https://sketch.pixiv.net/items/193462611994864256", source.canonical_url) + assert_equal("https://sketch.pixiv.net/@lithla", source.profile_url) + assert_equal(["https://sketch.pixiv.net/@lithla", "https://www.pixiv.net/users/4957"], source.profile_urls) + assert_equal("lithla", source.artist_name) + assert_equal(["lithla", "リリスラりダ"], source.other_names) + assert_equal("チビッコ露出プレむ ピヌス", source.artist_commentary_desc) + assert_equal([], source.tags.map(&:first)) + assert_nothing_raised { source.to_h } + end + + should "work for a post with a multiple images" do + source = Sources::Strategies.find("https://sketch.pixiv.net/items/8052785510155853613") + + assert_equal(%w[ + https://img-sketch.pixiv.net/uploads/medium/file/9988964/1564052114639195387.png + https://img-sketch.pixiv.net/uploads/medium/file/9988965/3187185972065199018.png + https://img-sketch.pixiv.net/uploads/medium/file/9988966/5281789458380074490.png + https://img-sketch.pixiv.net/uploads/medium/file/9988967/8187710652175488805.png + https://img-sketch.pixiv.net/uploads/medium/file/9988968/3497441770651131427.png + https://img-sketch.pixiv.net/uploads/medium/file/9988969/1770110164450415039.png + https://img-sketch.pixiv.net/uploads/medium/file/9988970/1340350233137289970.png + https://img-sketch.pixiv.net/uploads/medium/file/9988971/9105451079763734305.jpg + https://img-sketch.pixiv.net/uploads/medium/file/9988972/2641925439408057307.jpg + https://img-sketch.pixiv.net/uploads/medium/file/9988973/7216948861306830496.jpg + ], source.image_urls) + assert_equal("https://sketch.pixiv.net/items/8052785510155853613", source.page_url) + assert_equal("https://sketch.pixiv.net/items/8052785510155853613", source.canonical_url) + assert_equal("https://sketch.pixiv.net/@op-one", source.profile_url) + assert_equal("op-one", source.artist_name) + assert_equal(<<~EOS.normalize_whitespace, source.artist_commentary_desc) + 3月3日は「うさぎの日」らしいので + + + ▌制䜜過皋 + ◎制䜜過皋 + ①() + ②() + ③()+色 + ④1原(原) + â‘€1原(原)(線のみ) + ⑥色 + ⑊仕䞊げ⇒完成 + ⑚完成() + ⑧完成() + + 色たで぀ける時間ず心の䜙裕が無いのでモノクロでらくがき + それでも5時間ぐらいかかっおる(③④の間で30分ぐらい雑務) + + やっぱから原は時間かかる  + ・線画だけから立䜓が把握できない(頭の䞭で3D化できない) + ・描き続けおるず立䜓感がゲシュタルト厩壊する + ・目のピントが合わない + ので12回䌑憩しお目ず頭䌑たせないずいけないのがき぀い + 目ず頭のスタミナ䞍足は劂䜕ずもしがたい + + 線画のみから感芚的に立䜓把握できる「確かめ算」みたいな手法を緎りこむ必芁がある のはわかっおるけど + 「断面図」 + 「透明な板を蚭定しお奥行きパヌス確認」 + 「地面に正方圢を描いお瞊パヌス確認」 + 「関節郚や胎䜓䞭倮郚に栞(äžž)を描いお立䜓確認」 + 「線画」を淡く衚瀺し䞊から簡単な立䜓モデルを描いおみお「倧きさ比率の確認」 +  ぐらいかな思い぀くのは + + あず初期に足銖の関節玠䜓描いお立䜓把握しおる跡がある + いただに関節の軞を足銖のドコに蚭定すれば自然に芋えるか迷う + 倚分最倧に䌞ばしたり曲げたりしおるずきは関節浮いおたりするんだろうから簡単な軞蚭定だず違和感が出おくるんだずは思う + + #制䜜過皋 + #このすば + #この玠晎らしい䞖界に祝犏を + #セナ + #バニヌ + #3月3日 + #å·šä¹³ + #黒髪巚乳 + #タむツ + EOS + + assert_equal(%w[制䜜過皋 このすば この玠晎らしい䞖界に祝犏を セナ バニヌ 3月3日 å·šä¹³ 黒髪巚乳 タむツ], source.tags.map(&:first)) + assert_nothing_raised { source.to_h } + end + end + end +end diff --git a/test/unit/sources/pixiv_test.rb b/test/unit/sources/pixiv_test.rb index 017ad8060..dbe3cd8fb 100644 --- a/test/unit/sources/pixiv_test.rb +++ b/test/unit/sources/pixiv_test.rb @@ -344,12 +344,6 @@ module Sources assert_illust_id(46323924, "http://i1.pixiv.net/img-zip-ugoira/img/2014/10/03/17/29/16/46323924_ugoira1920x1080.zip") end - should "not misparse ids from sketch urls" do - assert_nil_illust_id("https://img-sketch.pixiv.net/uploads/medium/file/4463372/8906921629213362989.jpg") - assert_nil_illust_id("https://img-sketch.pximg.net/c!/w=540,f=webp:jpeg/uploads/medium/file/4463372/8906921629213362989.jpg") - assert_nil_illust_id("https://sketch.pixiv.net/items/1588346448904706151") - end - should "not misparse ids from novel urls" do assert_nil_illust_id("https://i.pximg.net/novel-cover-original/img/2019/01/14/01/15/05/10617324_d84daae89092d96bbe66efafec136e42.jpg") assert_nil_illust_id("https://i.pximg.net/c/600x600/novel-cover-master/img/2019/01/14/01/15/05/10617324_d84daae89092d96bbe66efafec136e42_master1200.jpg")