From 9a7a1e20ca9b3b1b099797b5d89b2b83d530bbe4 Mon Sep 17 00:00:00 2001 From: nonamethanks Date: Sat, 8 Aug 2020 14:42:33 +0200 Subject: [PATCH] Add fanbox support --- app/logical/sources/strategies.rb | 1 + app/logical/sources/strategies/fanbox.rb | 188 +++++++++++++++++++++++ app/logical/sources/strategies/pixiv.rb | 12 +- test/unit/sources/fanbox.rb | 139 +++++++++++++++++ test/unit/sources/pixiv_test.rb | 7 - 5 files changed, 329 insertions(+), 18 deletions(-) create mode 100644 app/logical/sources/strategies/fanbox.rb create mode 100644 test/unit/sources/fanbox.rb diff --git a/app/logical/sources/strategies.rb b/app/logical/sources/strategies.rb index 05a0ef2d5..fddc1fdd3 100644 --- a/app/logical/sources/strategies.rb +++ b/app/logical/sources/strategies.rb @@ -3,6 +3,7 @@ module Sources def self.all [ Strategies::Pixiv, + Strategies::Fanbox, Strategies::NicoSeiga, Strategies::Twitter, Strategies::Stash, # must come before DeviantArt diff --git a/app/logical/sources/strategies/fanbox.rb b/app/logical/sources/strategies/fanbox.rb new file mode 100644 index 000000000..c6099ca0b --- /dev/null +++ b/app/logical/sources/strategies/fanbox.rb @@ -0,0 +1,188 @@ +# Image URLs ############################################################# +# +# * OLD DOMAIN +# ** https://fanbox.pixiv.net/images/post/39714/JvjJal8v1yLgc5DPyEI05YpT.png +# +# * NEW DOMAIN +# ** https://downloads.fanbox.cc/images/post/39714/JvjJal8v1yLgc5DPyEI05YpT.png (full res) +# ** https://downloads.fanbox.cc/images/post/39714/c/1200x630/JvjJal8v1yLgc5DPyEI05YpT.jpeg (sample) +# ** https://downloads.fanbox.cc/images/post/39714/w/1200/JvjJal8v1yLgc5DPyEI05YpT.jpeg (sample) +# +# * POST COVERS +# * https://pixiv.pximg.net/c/1200x630_90_a2_g5/fanbox/public/images/post/186919/cover/VCI1Mcs2rbmWPg0mmiTisovn.jpeg +# +# * PROFILE IMAGES +# * https://pixiv.pximg.net/c/400x400_90_a2_g5/fanbox/public/images/creator/1566167/profile/Ix6bnJmTaOAFZhXHLbWyIY1e.jpeg +# * https://pixiv.pximg.net/fanbox/public/images/creator/1566167/profile/Ix6bnJmTaOAFZhXHLbWyIY1e.jpeg (dead URL type) +# * https://pixiv.pximg.net/c/1620x580_90_a2_g5/fanbox/public/images/creator/1566167/cover/WPqKsvKVGRq4qUjKFAMi23Z5.jpeg +# * https://pixiv.pximg.net/c/936x600_90_a2_g5/fanbox/public/images/plan/4635/cover/L6AZNneFuHW6r25CHHlkpHg4.jpeg +# +# Page URLs ############################################################## +# +# * OLD +# ** https://www.pixiv.net/fanbox/creator/1566167/post/39714 +# +# * NEW +# ** https://omu001.fanbox.cc/posts/39714 +# ** https://www.fanbox.cc/@tsukiori/posts/1080657 +# ** https://brllbrll.fanbox.cc/posts/626093 (R-18) +# +# +# Profile URLs ########################################################### +# +# * OLD +# ** https://www.pixiv.net/fanbox/creator/1566167 +# +# * NEW +# ** https://omu001.fanbox.cc/ +# + +module Sources + module Strategies + class Fanbox < Base + PROFILE_OLD = %r{\Ahttps?://(?:www\.)?pixiv\.net/fanbox/creator/(?\d+)}i + PROFILE_NEW = %r{\Ahttps?://(?:(?!www|downloads)(?[\w-]+)\.fanbox\.cc|(?:www\.)?fanbox\.cc/@(?[\w-]+))}i + + PAGE_OLD = %r{#{PROFILE_OLD}/post/(?\d+)}i + PAGE_NEW = %r{#{PROFILE_NEW}/posts/(?\d+)}i + + IMAGE = %r{\Ahttps?://(?:fanbox\.pixiv\.net|downloads\.fanbox\.cc)/images/post/(?\d+)/(?:\w+/)*\w+\.\w+}i + + OTHER_IMAGES = %r{\Ahttps?://pixiv\.pximg\.net/.*/fanbox/.*?/(?:(?:creator|user)/(?\d+)|post/(?\d+))?/(?:.*/)?\w+\.\w+}i + + def domains + ["fanbox.cc", "pixiv.net", "pximg.net"] + end + + def site_name + "Pixiv Fanbox" + end + + def image_urls + if url =~ IMAGE || url =~ OTHER_IMAGES + [url] + elsif api_response.present? + # There's two ways pics are returned via api: + # Pics in proper array: https://yanmi0308.fanbox.cc/posts/1141325 + # Embedded pics (imageMap): https://www.fanbox.cc/@tsukiori/posts/1080657 + images = api_response.dig("body", "images").to_a + api_response.dig("body", "imageMap").to_a.map { |id| id[1] } + images.map { |img| img["originalUrl"] } + else + [url] + end + end + + def page_url + if illust_id.present? + "https://#{artist_name}.fanbox.cc/posts/#{illust_id}" + elsif url =~ OTHER_IMAGES && artist_name.present? + # Cover images + "https://#{artist_name}.fanbox.cc" + end + end + + def normalize_for_source + if illust_id.present? + if artist_name_from_url.present? + "https://#{artist_name_from_url}.fanbox.cc/posts/#{illust_id}" + elsif artist_id_from_url.present? + "https://www.pixiv.net/fanbox/creator/#{artist_id_from_url}/post/#{illust_id}" + end + elsif artist_id_from_url.present? + # Cover images + "https://www.pixiv.net/fanbox/creator/#{artist_id_from_url}" + end + end + + def profile_url + return if artist_name.blank? + + "https://#{artist_name}.fanbox.cc" + end + + def artist_name + artist_name_from_url || api_response["creatorId"] || artist_api_response["creatorId"] + end + + def display_name + if api_response.present? + api_response["user"]["name"] + elsif artist_api_response.present? + artist_api_response["user"]["name"] + end + end + + def other_names + [artist_name, display_name].compact.uniq + end + + def tags + api_response["tags"].to_a.map { |tag| [tag, "https://fanbox.cc/tags/#{tag}"] } + end + + def artist_commentary_title + api_response["title"] + end + + def artist_commentary_desc + return if api_response.blank? + body = api_response["body"] + if body["text"].present? + body["text"] + elsif body["blocks"].present? + # Reference: https://official.fanbox.cc/posts/182757 + # Commentary can get pretty complex, but unfortunately it's served in json format so it's a pain to parse it. + # I've left out parsing external embeds because each supported site has its own id mapped to the domain + commentary = body["blocks"].map do |node| + if node["type"] == "image" + body["imageMap"][node["imageId"]]["originalUrl"] + else + node["text"] || "\n" + end + end + commentary.join("\n") + end + end + + def illust_id + urls.map { |url| url[PAGE_NEW, :illust_id] || url[IMAGE, :illust_id] || url[PAGE_OLD, :illust_id] || url[OTHER_IMAGES, :illust_id] }.compact.first + end + + def artist_id_from_url + urls.map { |url| url[PAGE_OLD, :artist_id] || url[OTHER_IMAGES, :artist_id] }.compact.first + end + + def artist_name_from_url + urls.map { |url| url[PROFILE_NEW, :artist_name] }.compact.first + end + + def api_response + return {} if illust_id.blank? + resp = client.get("https://api.fanbox.cc/post.info?postId=#{illust_id}") + json_response = JSON.parse(resp)["body"] + if json_response["restrictedFor"] == 2 && json_response["body"].blank? + # Pixiv Fanbox login is protected by Google Recaptcha, so it's not possible for us to extract anything from them (save for the title). + # Other projects like PixivUtils ask the user to periodically extract cookies from the browser, but this is not feasible for Danbooru. + raise Sources::Error, "Age-restricted posts from Pixiv Fanbox are not supported." + end + + json_response + rescue JSON::ParserError + {} + end + + def artist_api_response + # Needed to fetch artist from cover pages + return {} if artist_id_from_url.blank? + resp = client.get("https://api.fanbox.cc/creator.get?userId=#{artist_id_from_url}") + JSON.parse(resp)["body"] + rescue JSON::ParserError + {} + end + + def client + Danbooru::Http.headers(Origin: "https://fanbox.cc").cache(1.minute) + end + end + end +end diff --git a/app/logical/sources/strategies/pixiv.rb b/app/logical/sources/strategies/pixiv.rb index ce2d0f63a..25700d0fd 100644 --- a/app/logical/sources/strategies/pixiv.rb +++ b/app/logical/sources/strategies/pixiv.rb @@ -19,17 +19,6 @@ # * https://www.pixiv.net/stacc/noizave # * http://www.pixiv.me/noizave # -# Fanbox -# -# * https://fanbox.pixiv.net/images/post/39714/JvjJal8v1yLgc5DPyEI05YpT.png -# * https://pixiv.pximg.net/fanbox/public/images/creator/1566167/profile/Ix6bnJmTaOAFZhXHLbWyIY1e.jpeg -# -# * https://pixiv.pximg.net/c/400x400_90_a2_g5/fanbox/public/images/creator/1566167/profile/Ix6bnJmTaOAFZhXHLbWyIY1e.jpeg -# * https://pixiv.pximg.net/c/1200x630_90_a2_g5/fanbox/public/images/post/186919/cover/VCI1Mcs2rbmWPg0mmiTisovn.jpeg -# -# * https://www.pixiv.net/fanbox/creator/1566167/post/39714 -# * https://www.pixiv.net/fanbox/creator/1566167 -# # Novels # # * https://i.pximg.net/novel-cover-original/img/2019/01/14/01/15/05/10617324_d84daae89092d96bbe66efafec136e42.jpg @@ -97,6 +86,7 @@ module Sources def match? return false if parsed_url.nil? + return false if url.include? "/fanbox/" parsed_url.domain.in?(domains) || parsed_url.host == "tc-pximg01.techorus-cdn.com" end diff --git a/test/unit/sources/fanbox.rb b/test/unit/sources/fanbox.rb new file mode 100644 index 000000000..cfd1d7519 --- /dev/null +++ b/test/unit/sources/fanbox.rb @@ -0,0 +1,139 @@ +require 'test_helper' + +module Sources + class FanboxTest < ActiveSupport::TestCase + context "A free Pixiv Fanbox post" do + setup do + @post1 = Sources::Strategies.find("https://yanmi0308.fanbox.cc/posts/1141325") + @post2 = Sources::Strategies.find("https://www.fanbox.cc/@tsukiori/posts/1080657") + @post3 = Sources::Strategies.find("https://downloads.fanbox.cc/images/post/1080657/SaakPC251KafLL6jIo1WPPmr.png") + + assert_nothing_raised { @post1.to_h } + assert_nothing_raised { @post2.to_h } + assert_nothing_raised { @post3.to_h } + end + + should "get the image urls" do + # "images" in api response + images1 = %w[ + https://downloads.fanbox.cc/images/post/1141325/q7GaJ0A9J5Uz8kvEAUizHJoN.png + https://downloads.fanbox.cc/images/post/1141325/LMJz0sAig5h9D3rPZGCEGniZ.png + https://downloads.fanbox.cc/images/post/1141325/dRSz380Uf3N8s4pT2ADEXBco.png + https://downloads.fanbox.cc/images/post/1141325/h48L2mbm39qqNUB1abLAvzvg.png + ] + assert_equal(images1, @post1.image_urls) + + # "imageMapi" in api response (embedded pics) + images2 = %w[ + https://downloads.fanbox.cc/images/post/1080657/fMD4FYzodzcNrEamag7oSpUt.png + https://downloads.fanbox.cc/images/post/1080657/IHhfqr4jjos6XWLBOD7QP4BJ.png + https://downloads.fanbox.cc/images/post/1080657/mIUSuwQsGiStRrLQMZ6oKMAl.png + https://downloads.fanbox.cc/images/post/1080657/s0UHQTY6zqN3LYoeS4OoB184.png + https://downloads.fanbox.cc/images/post/1080657/SaakPC251KafLL6jIo1WPPmr.png + https://downloads.fanbox.cc/images/post/1080657/z6iw3dewfzAiZEOrG10a8ALa.png + ] + assert_equal(images2, @post2.image_urls) + assert_equal([@post3.url], @post3.image_urls) + end + + should "get the commentary" do + # Normal commentary + assert_equal("栗山やんみ(デザイン)", @post1.artist_commentary_title) + + body1 = "˗ˋˏ Special Thanks ˎˊ˗ (敬称略)\n\n🎨キャラクターデザイン\n特急みかん https://twitter.com/tokkyuumikan\n\n🤖3Dモデリング\n(仮) https://twitter.com/Admiral_TMP\n\n⚙プログラミング\n神無月ユズカ https://twitter.com/Kannaduki_Yzk\n\n🎧OP・EDミュージック\n卓球少年 https://twitter.com/takkyuu_s\n\n📻BGM\nC https://twitter.com/nica2c\n\n🖌ロゴデザイン\nてづかもり https://twitter.com/tezkamori\n\n🎨SDキャラクター\nAZU。 https://twitter.com/tokitou_aaa" + assert_equal(body1, @post1.artist_commentary_desc) + + # With embedded pics + assert_equal("はじめまして #1", @post2.artist_commentary_title) + assert_equal("はじめまして #1", @post3.artist_commentary_title) + + body2 = "\nhttps://downloads.fanbox.cc/images/post/1080657/z6iw3dewfzAiZEOrG10a8ALa.png\nいらっしゃいませ……\nあら?あらあら、もしかして……初めてのお客さま!?\n\nhttps://downloads.fanbox.cc/images/post/1080657/SaakPC251KafLL6jIo1WPPmr.png\n調ノ宮喫茶店へようこそっ!\n\nhttps://downloads.fanbox.cc/images/post/1080657/mIUSuwQsGiStRrLQMZ6oKMAl.png\nあ、すみません。ひとりで盛り上がってしまって。\nなにせこんな辺鄙(へんぴ)なところに来て下さるお客さまは少ないものですから。\n\n藍ちゃん、藍ちゃーん。\n初めてのお客様だよ。\n\nhttps://downloads.fanbox.cc/images/post/1080657/IHhfqr4jjos6XWLBOD7QP4BJ.png\nえ。なに?\n今日はちゃんと化粧してない? はずかしい?\n大丈夫だよいつもと変わんないから……あ!ちょっと!\n\nhttps://downloads.fanbox.cc/images/post/1080657/s0UHQTY6zqN3LYoeS4OoB184.png\n…………\nえっと……すみません。\nなんかちょっと照れてるみたいで。\n\nなにはともあれ、せっかく来られたんですからゆっくりしていってください。\n\nhttps://downloads.fanbox.cc/images/post/1080657/fMD4FYzodzcNrEamag7oSpUt.png\nあ、そっちの陽が差している窓際の席がオススメですよ。\n向かいの島がよく見渡せるんです。\n\nではご注文が決まりましたら伺いますので……\n藍ちゃん……じゃなくて、店主の焼くパンケーキはふわふわでバターの香りがして、\nナッツとシロップがたっぷり乗っててとってもおいしいですよ。\nぜひ食べてみてくださいね。\n" + assert_equal(body2, @post2.artist_commentary_desc) + assert_equal(body2, @post3.artist_commentary_desc) + end + + should "get the right page url" do + assert_equal("https://yanmi0308.fanbox.cc/posts/1141325", @post1.page_url) + assert_equal("https://tsukiori.fanbox.cc/posts/1080657", @post2.page_url) + assert_equal("https://tsukiori.fanbox.cc/posts/1080657", @post3.page_url) + end + + should "correctly download the right image" do + assert_downloaded(431_225, @post1.image_url) + assert_downloaded(76_012, @post2.image_url) + assert_downloaded(78_751, @post3.image_url) + end + + should "get the tags" do + tags = [ + ["栗山やんみ", "https://fanbox.cc/tags/栗山やんみ"], ["VTuber", "https://fanbox.cc/tags/VTuber"], ["三面図", "https://fanbox.cc/tags/三面図"], + ["イラスト", "https://fanbox.cc/tags/イラスト"], ["ロゴデザイン", "https://fanbox.cc/tags/ロゴデザイン"], ["モデリング", "https://fanbox.cc/tags/モデリング"] + ] + assert_equal(tags, @post1.tags) + end + + should "find the correct artist" do + @artist1 = FactoryBot.create(:artist, name: "yanmi", url_string: @post1.url) + @artist2 = FactoryBot.create(:artist, name: "tsukiori", url_string: @post2.url) + assert_equal([@artist1], @post1.artists) + assert_equal([@artist2], @post2.artists) + assert_equal([@artist2], @post3.artists) + end + + should "find the right artist names" do + assert_equal("yanmi0308", @post1.artist_name) + assert_equal("栗山やんみ", @post1.display_name) + assert_equal("tsukiori", @post2.artist_name) + assert_equal("調ノ宮喫茶店", @post2.display_name) + assert_equal(@post2.artist_name, @post3.artist_name) + assert_equal(@post2.display_name, @post3.display_name) + end + end + + context "A link in the old format" do + should "still work" do + post = Sources::Strategies.find("https://www.pixiv.net/fanbox/creator/1566167/post/39714") + assert_nothing_raised { post.to_h } + assert_equal("https://omu001.fanbox.cc", post.profile_url) + assert_equal("https://omu001.fanbox.cc/posts/39714", post.page_url) + artist = FactoryBot.create(:artist, name: "omu", url_string: "https://omu001.fanbox.cc") + assert_equal([artist], post.artists) + end + end + + context "A cover image" do + should "still work" do + post = Sources::Strategies.find("https://pixiv.pximg.net/c/1620x580_90_a2_g5/fanbox/public/images/creator/1566167/cover/WPqKsvKVGRq4qUjKFAMi23Z5.jpeg") + assert_nothing_raised { post.to_h } + assert_downloaded(276_301, post.image_url) + assert_equal("https://omu001.fanbox.cc", post.profile_url) + assert_equal(post.profile_url, post.canonical_url) + artist = FactoryBot.create(:artist, name: "omu", url_string: "https://omu001.fanbox.cc") + assert_equal([artist], post.artists) + end + end + + context "A dead profile picture from the old domain" do + should "still find the artist" do + post = Sources::Strategies.find("https://pixiv.pximg.net/c/400x400_90_a2_g5/fanbox/public/images/creator/1566167/profile/Ix6bnJmTaOAFZhXHLbWyIY1e.jpeg") + assert_equal("https://omu001.fanbox.cc", post.profile_url) + artist = FactoryBot.create(:artist, name: "omu", url_string: "https://omu001.fanbox.cc") + assert_equal([artist], post.artists) + end + end + + context "normalizing for source" do + should "normalize cover images to the profile link" do + cover = "https://pixiv.pximg.net/c/400x400_90_a2_g5/fanbox/public/images/creator/1566167/profile/Ix6bnJmTaOAFZhXHLbWyIY1e.jpeg" + assert_equal("https://www.pixiv.net/fanbox/creator/1566167", Sources::Strategies.normalize_source(cover)) + end + + should "avoid normalizing unnormalizable urls" do + bad_source1 = "https://pixiv.pximg.net/c/936x600_90_a2_g5/fanbox/public/images/plan/4635/cover/L6AZNneFuHW6r25CHHlkpHg4.jpeg" + bad_source2 = "https://downloads.fanbox.cc/images/post/39714/JvjJal8v1yLgc5DPyEI05YpT.png" + assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1)) + assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2)) + end + end + end +end diff --git a/test/unit/sources/pixiv_test.rb b/test/unit/sources/pixiv_test.rb index e58cd029c..d9a96a348 100644 --- a/test/unit/sources/pixiv_test.rb +++ b/test/unit/sources/pixiv_test.rb @@ -326,13 +326,6 @@ module Sources assert_illust_id(46323924, "http://i1.pixiv.net/img-zip-ugoira/img/2014/10/03/17/29/16/46323924_ugoira1920x1080.zip") end - should "not misparse ids from fanbox urls" do - assert_nil_illust_id("https://fanbox.pixiv.net/images/post/39714/JvjJal8v1yLgc5DPyEI05YpT.png") - assert_nil_illust_id("https://pixiv.pximg.net/fanbox/public/images/creator/1566167/profile/Ix6bnJmTaOAFZhXHLbWyIY1e.jpeg") - assert_nil_illust_id("https://pixiv.pximg.net/c/400x400_90_a2_g5/fanbox/public/images/creator/1566167/profile/Ix6bnJmTaOAFZhXHLbWyIY1e.jpeg") - assert_nil_illust_id("https://pixiv.pximg.net/c/1200x630_90_a2_g5/fanbox/public/images/post/186919/cover/VCI1Mcs2rbmWPg0mmiTisovn.jpeg") - end - should "not misparse ids from sketch urls" do assert_nil_illust_id("https://img-sketch.pixiv.net/uploads/medium/file/4463372/8906921629213362989.jpg") assert_nil_illust_id("https://img-sketch.pximg.net/c!/w=540,f=webp:jpeg/uploads/medium/file/4463372/8906921629213362989.jpg")