sources: factor out Source::URL::PixivSketch.

Add upload support for Pixiv Sketch. Fetch tags, commentary, and artist,
and rewrite sample images to full images.

Authentication isn't required. R18 images are hidden in the browser but
visible in the API.
This commit is contained in:
evazion
2022-03-08 18:09:22 -06:00
parent 37441d6b1a
commit df0bb70486
8 changed files with 281 additions and 12 deletions

View File

@@ -28,6 +28,7 @@ module Source
Source::URL::Moebooru,
Source::URL::Nijie,
Source::URL::Newgrounds,
Source::URL::PixivSketch,
Source::URL::Plurk,
Source::URL::Skeb,
Source::URL::TwitPic,

View File

@@ -0,0 +1,47 @@
# frozen_string_literal: true
module Source
class URL::PixivSketch < Source::URL
attr_reader :work_id, :username, :full_image_url
def self.match?(url)
url.host.in?(%w[sketch.pixiv.net img-sketch.pixiv.net img-sketch.pximg.net])
end
def parse
case [host, *path_segments]
# https://sketch.pixiv.net/items/5835314698645024323
in "sketch.pixiv.net", "items", work_id
@work_id = work_id
# https://img-sketch.pixiv.net/uploads/medium/file/4463372/8906921629213362989.jpg (page: https://sketch.pixiv.net/items/5835314698645024323)
# https://img-sketch.pximg.net/c!/w=540,f=webp:jpeg/uploads/medium/file/4463372/8906921629213362989.jpg
# https://img-sketch.pixiv.net/c/f_540/uploads/medium/file/9986983/8431631593768139653.jpg
in *, "uploads", "medium", "file", dir, file if image_url?
@full_image_url = "https://img-sketch.pixiv.net/uploads/medium/file/#{dir}/#{file}"
# https://sketch.pixiv.net/@user_ejkv8372
# https://sketch.pixiv.net/@user_ejkv8372/followings
in "sketch.pixiv.net", /^@/ => username, *rest
@username = username.delete_prefix("@")
else
end
end
def image_url?
url.host.in?(%w[img-sketch.pixiv.net img-sketch.pximg.net])
end
def page_url
# https://sketch.pixiv.net/items/5835314698645024323
"https://sketch.pixiv.net/items/#{work_id}" if work_id.present?
end
def api_url
# https://sketch.pixiv.net/api/items/5835314698645024323.json (won't work in the browser; use curl)
"https://sketch.pixiv.net/api/items/#{work_id}.json" if work_id.present?
end
end
end

View File

@@ -16,6 +16,7 @@ module Sources
Strategies::HentaiFoundry,
Strategies::Fanbox,
Strategies::Mastodon,
Strategies::PixivSketch,
Strategies::Weibo,
Strategies::Newgrounds,
Strategies::Skeb,

View File

@@ -88,16 +88,12 @@ module Sources
def match?
return false if parsed_url.nil?
return false if url.include? "/fanbox/"
return false if Source::URL::PixivSketch === parsed_url
parsed_url.domain.in?(domains) || parsed_url.host == "tc-pximg01.techorus-cdn.com"
end
def site_name
# XXX pixiv sketch should be in a separate strategy
if parsed_url.host.in?(%w[sketch.pixiv.net img-sketch.pixiv.net img-sketch.pximg.net])
"Pixiv Sketch"
else
"Pixiv"
end
"Pixiv"
end
def image_urls

View File

@@ -0,0 +1,88 @@
# frozen_string_literal: true
# @see Source::URL::PixivSketch
module Sources
module Strategies
class PixivSketch < Base
extend Memoist
def match?
Source::URL::PixivSketch === parsed_url
end
def site_name
parsed_url.site_name
end
def image_urls
if parsed_url.image_url?
[parsed_url.full_image_url]
else
image_urls_from_api
end
end
def image_urls_from_api
api_response.dig("data", "media").to_a.pluck("photo").pluck("original").pluck("url2x")
end
def profile_url
"https://sketch.pixiv.net/@#{artist_name}" if artist_name.present?
end
def artist_name
api_response.dig("data", "user", "unique_name")
end
def other_names
[artist_name, display_name].compact
end
def profile_urls
[profile_url, pixiv_profile_url].compact
end
def artist_commentary_desc
api_response.dig("data", "text")
end
def tags
api_response.dig("data", "tags").to_a.map do |tag|
[tag, "https://sketch.pixiv.net/tags/#{tag}"]
end
end
def display_name
api_response.dig("data", "user", "name")
end
def pixiv_profile_url
"https://www.pixiv.net/users/#{pixiv_user_id}" if pixiv_user_id.present?
end
def pixiv_user_id
api_response.dig("data", "user", "pixiv_user_id")
end
# curl https://sketch.pixiv.net/api/items/5835314698645024323.json | jq
def api_response
return {} if api_url.blank?
response = http.cache(1.minute).get(api_url)
return {} if response.status == 404
response.parse
end
def page_url
parsed_url.page_url || parsed_referer&.page_url
end
def api_url
parsed_url.api_url || parsed_referer&.api_url
end
memoize :api_response
end
end
end

View File

@@ -318,6 +318,8 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest
should_upload_successfully("https://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364")
should_upload_successfully("https://i.pximg.net/img-original/img/2017/08/18/00/09/21/64476642_p0.jpg")
should_upload_successfully("https://sketch.pixiv.net/items/5835314698645024323")
should_upload_successfully("https://noizave.tumblr.com/post/162206271767")
should_upload_successfully("https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_1280.png")

View File

@@ -0,0 +1,140 @@
require 'test_helper'
module Sources
class PixivSketchTest < ActiveSupport::TestCase
context "A Pixiv Sketch source" do
should "work for a post with a single image" do
source = Sources::Strategies.find("https://sketch.pixiv.net/items/5835314698645024323")
assert_equal("Pixiv Sketch", source.site_name)
assert_equal(["https://img-sketch.pixiv.net/uploads/medium/file/9986983/8431631593768139653.jpg"], source.image_urls)
assert_equal("https://sketch.pixiv.net/items/5835314698645024323", source.page_url)
assert_equal("https://sketch.pixiv.net/items/5835314698645024323", source.canonical_url)
assert_equal("https://sketch.pixiv.net/@user_ejkv8372", source.profile_url)
assert_equal(["https://sketch.pixiv.net/@user_ejkv8372", "https://www.pixiv.net/users/44772126"], source.profile_urls)
assert_equal("user_ejkv8372", source.artist_name)
assert_equal(["user_ejkv8372", "サコ"], source.other_names)
assert_equal("🍻シャンクスとミホーク誕生日おめでとう🍻(過去絵) ", source.artist_commentary_desc)
assert_equal([], source.tags.map(&:first))
assert_nothing_raised { source.to_h }
end
should "work for an image url without a referer" do
# page: https://sketch.pixiv.net/items/8052785510155853613
source = Sources::Strategies.find("https://img-sketch.pixiv.net/uploads/medium/file/9988973/7216948861306830496.jpg")
assert_equal(["https://img-sketch.pixiv.net/uploads/medium/file/9988973/7216948861306830496.jpg"], source.image_urls)
assert_nil(source.page_url)
assert_equal(source.url, source.canonical_url)
assert_nil(source.profile_url)
assert_equal([], source.profile_urls)
assert_nil(source.artist_name)
assert_equal([], source.other_names)
assert_nil(source.artist_commentary_desc)
assert_equal([], source.tags.map(&:first))
assert_nothing_raised { source.to_h }
end
should "work for an image url with a referer" do
source = Sources::Strategies.find("https://img-sketch.pixiv.net/uploads/medium/file/9988973/7216948861306830496.jpg", "https://sketch.pixiv.net/items/8052785510155853613")
assert_equal("https://sketch.pixiv.net/items/8052785510155853613", source.page_url)
assert_equal("https://sketch.pixiv.net/items/8052785510155853613", source.canonical_url)
assert_equal("https://sketch.pixiv.net/@op-one", source.profile_url)
assert_equal(["https://sketch.pixiv.net/@op-one", "https://www.pixiv.net/users/5903369"], source.profile_urls)
assert_equal("op-one", source.artist_name)
assert_equal(["op-one", "俺P"], source.other_names)
assert_match(/\A3月3日は「うさぎの日」らしいので/, source.artist_commentary_desc)
assert_equal(%w[制作過程 このすば この素晴らしい世界に祝福を セナ バニー 3月3日 巨乳 黒髪巨乳 タイツ], source.tags.map(&:first))
assert_nothing_raised { source.to_h }
end
should "work for a NSFW post" do
source = Sources::Strategies.find("https://sketch.pixiv.net/items/193462611994864256")
assert_equal(["https://img-sketch.pixiv.net/uploads/medium/file/884876/4909517173982299587.jpg"], source.image_urls)
assert_equal("https://sketch.pixiv.net/items/193462611994864256", source.page_url)
assert_equal("https://sketch.pixiv.net/items/193462611994864256", source.canonical_url)
assert_equal("https://sketch.pixiv.net/@lithla", source.profile_url)
assert_equal(["https://sketch.pixiv.net/@lithla", "https://www.pixiv.net/users/4957"], source.profile_urls)
assert_equal("lithla", source.artist_name)
assert_equal(["lithla", "リリスラウダ"], source.other_names)
assert_equal("チビッコ露出プレイ ピース", source.artist_commentary_desc)
assert_equal([], source.tags.map(&:first))
assert_nothing_raised { source.to_h }
end
should "work for a post with a multiple images" do
source = Sources::Strategies.find("https://sketch.pixiv.net/items/8052785510155853613")
assert_equal(%w[
https://img-sketch.pixiv.net/uploads/medium/file/9988964/1564052114639195387.png
https://img-sketch.pixiv.net/uploads/medium/file/9988965/3187185972065199018.png
https://img-sketch.pixiv.net/uploads/medium/file/9988966/5281789458380074490.png
https://img-sketch.pixiv.net/uploads/medium/file/9988967/8187710652175488805.png
https://img-sketch.pixiv.net/uploads/medium/file/9988968/3497441770651131427.png
https://img-sketch.pixiv.net/uploads/medium/file/9988969/1770110164450415039.png
https://img-sketch.pixiv.net/uploads/medium/file/9988970/1340350233137289970.png
https://img-sketch.pixiv.net/uploads/medium/file/9988971/9105451079763734305.jpg
https://img-sketch.pixiv.net/uploads/medium/file/9988972/2641925439408057307.jpg
https://img-sketch.pixiv.net/uploads/medium/file/9988973/7216948861306830496.jpg
], source.image_urls)
assert_equal("https://sketch.pixiv.net/items/8052785510155853613", source.page_url)
assert_equal("https://sketch.pixiv.net/items/8052785510155853613", source.canonical_url)
assert_equal("https://sketch.pixiv.net/@op-one", source.profile_url)
assert_equal("op-one", source.artist_name)
assert_equal(<<~EOS.normalize_whitespace, source.artist_commentary_desc)
33
()
()
()+
1()
1()()
()
()
5(30)
(3D化できない)
12
()
#制作過程
#このすば
#この素晴らしい世界に祝福を!
#セナ
#バニー
#3月3日
#巨乳
#黒髪巨乳
#タイツ
EOS
assert_equal(%w[制作過程 このすば この素晴らしい世界に祝福を セナ バニー 3月3日 巨乳 黒髪巨乳 タイツ], source.tags.map(&:first))
assert_nothing_raised { source.to_h }
end
end
end
end

View File

@@ -344,12 +344,6 @@ module Sources
assert_illust_id(46323924, "http://i1.pixiv.net/img-zip-ugoira/img/2014/10/03/17/29/16/46323924_ugoira1920x1080.zip")
end
should "not misparse ids from sketch urls" do
assert_nil_illust_id("https://img-sketch.pixiv.net/uploads/medium/file/4463372/8906921629213362989.jpg")
assert_nil_illust_id("https://img-sketch.pximg.net/c!/w=540,f=webp:jpeg/uploads/medium/file/4463372/8906921629213362989.jpg")
assert_nil_illust_id("https://sketch.pixiv.net/items/1588346448904706151")
end
should "not misparse ids from novel urls" do
assert_nil_illust_id("https://i.pximg.net/novel-cover-original/img/2019/01/14/01/15/05/10617324_d84daae89092d96bbe66efafec136e42.jpg")
assert_nil_illust_id("https://i.pximg.net/c/600x600/novel-cover-master/img/2019/01/14/01/15/05/10617324_d84daae89092d96bbe66efafec136e42_master1200.jpg")