Merge pull request #5125 from nonamethanks/booth-support
Add Booth support
This commit is contained in:
@@ -27,6 +27,7 @@ module Source
|
||||
DOWNLOAD_TIMEOUT = 60
|
||||
|
||||
attr_reader :url, :referer_url, :parsed_url, :parsed_referer
|
||||
|
||||
delegate :site_name, to: :parsed_url
|
||||
|
||||
SUBCLASSES = [
|
||||
@@ -50,6 +51,7 @@ module Source
|
||||
Source::Extractor::Plurk,
|
||||
Source::Extractor::Tinami,
|
||||
Source::Extractor::Fantia,
|
||||
Source::Extractor::Booth,
|
||||
]
|
||||
|
||||
# Should return true if the extractor is configured correctly. Return false
|
||||
|
||||
83
app/logical/source/extractor/booth.rb
Normal file
83
app/logical/source/extractor/booth.rb
Normal file
@@ -0,0 +1,83 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# @see Source::URL::Booth
|
||||
class Source::Extractor
|
||||
class Booth < Source::Extractor
|
||||
def match?
|
||||
Source::URL::Booth === parsed_url
|
||||
end
|
||||
|
||||
def image_urls
|
||||
if parsed_url.image_url?
|
||||
if parsed_url.full_image_url?
|
||||
[parsed_url.to_s]
|
||||
else
|
||||
[find_right_extension(parsed_url)]
|
||||
end
|
||||
else
|
||||
page&.css(".market-item-detail-item-image")&.pluck("data-origin").to_a.compact
|
||||
end
|
||||
end
|
||||
|
||||
def profile_url
|
||||
if page.present?
|
||||
page.at(".summary [data-product-list*='shop_index']")&.[]("href")&.chomp("/")
|
||||
else
|
||||
parsed_url.profile_url || parsed_referer&.profile_url
|
||||
end
|
||||
end
|
||||
|
||||
def artist_name
|
||||
return nil unless profile_url.present?
|
||||
Source::URL.parse(profile_url)&.username
|
||||
end
|
||||
|
||||
def display_name
|
||||
page&.at(".summary .user-avatar")&.[]("alt")
|
||||
end
|
||||
|
||||
def other_names
|
||||
[display_name].compact
|
||||
end
|
||||
|
||||
def artist_commentary_title
|
||||
page&.at(".summary .u-tpg-title1")&.text
|
||||
end
|
||||
|
||||
def artist_commentary_desc
|
||||
page&.at(".autolink")&.to_html
|
||||
end
|
||||
|
||||
def dtext_artist_commentary_desc
|
||||
DText.from_html(artist_commentary_desc)
|
||||
end
|
||||
|
||||
def tags
|
||||
page&.css(".item-info-detail [data-product-list*='tag_category_search']").to_a.map do |element|
|
||||
[element.text.gsub(/ x .*/, ""), element["href"]]
|
||||
end
|
||||
end
|
||||
|
||||
def page_url
|
||||
parsed_url.page_url || parsed_referer&.page_url
|
||||
end
|
||||
|
||||
def page
|
||||
return nil if parsed_url.page_url.blank?
|
||||
|
||||
resp = http.cache(1.minute).cookies(adult: "t").get(page_url)
|
||||
return nil if resp.code != 200
|
||||
|
||||
resp.parse
|
||||
end
|
||||
memoize :page
|
||||
|
||||
def find_right_extension(parsed_url)
|
||||
extensions = %w[png jpg jpeg]
|
||||
candidates = extensions.map { |ext| parsed_url.full_image_url_for(ext) }
|
||||
|
||||
chosen_url = candidates.find { |candidate| http_exists?(candidate) }
|
||||
chosen_url || parsed_url.to_s
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,15 +1,13 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# Unhandled:
|
||||
#
|
||||
# https://booth.pximg.net/c/128x128/users/3193929/icon_image/5be9eff4-1d9e-4a79-b097-33c1cd4ad314_base_resized.jpg (profile icon)
|
||||
# https://s2.booth.pm/8bb9e4e3-d171-4027-88df-84480480f79d/3d70de06-8e7c-444e-b8eb-a8a95bf20638.png (profile cover image)
|
||||
# Unhandled
|
||||
# https://booth.pm/downloadables/1376468 (from https://booth.pm/en/items/2425521, requires pixiv login to download)
|
||||
|
||||
module Source
|
||||
class URL::Booth < Source::URL
|
||||
RESERVED_SUBDOMAINS = ["www", "s", "s2", "asset", "accounts", nil]
|
||||
|
||||
attr_reader :work_id, :user_id, :username
|
||||
attr_reader :work_id, :user_id, :user_uuid, :username
|
||||
|
||||
def self.match?(url)
|
||||
url.domain == "booth.pm" || url.host == "booth.pximg.net"
|
||||
@@ -21,6 +19,7 @@ module Source
|
||||
# https://booth.pximg.net/8bb9e4e3-d171-4027-88df-84480480f79d/i/2864768/00cdfef0-e8d5-454b-8554-4885a7e4827d_base_resized.jpg (full)
|
||||
# https://booth.pximg.net/c/300x300_a2_g5/8bb9e4e3-d171-4027-88df-84480480f79d/i/2864768/00cdfef0-e8d5-454b-8554-4885a7e4827d_base_resized.jpg (thumb)
|
||||
# https://booth.pximg.net/c/72x72_a2_g5/8bb9e4e3-d171-4027-88df-84480480f79d/i/2864768/00cdfef0-e8d5-454b-8554-4885a7e4827d_base_resized.jpg (thumb)
|
||||
# https://booth.pximg.net/8bb9e4e3-d171-4027-88df-84480480f79d/i/2864768/00cdfef0-e8d5-454b-8554-4885a7e4827d.jpeg (full)
|
||||
#
|
||||
# https://s2.booth.pm/b242a7bd-0747-48c4-891d-9e8552edd5d7/i/3746752/52dbee27-7ad2-4048-9c1d-827eee36625c_base_resized.jpg (sample)
|
||||
# https://booth.pximg.net/b242a7bd-0747-48c4-891d-9e8552edd5d7/i/3746752/52dbee27-7ad2-4048-9c1d-827eee36625c.jpg (full)
|
||||
@@ -32,6 +31,20 @@ module Source
|
||||
@work_id = work_id
|
||||
@file = file
|
||||
|
||||
# profile icons
|
||||
# https://booth.pximg.net/c/128x128/users/3193929/icon_image/5be9eff4-1d9e-4a79-b097-33c1cd4ad314_base_resized.jpg (sample)
|
||||
# https://booth.pximg.net/users/3193929/icon_image/5be9eff4-1d9e-4a79-b097-33c1cd4ad314.png (full)
|
||||
in _, _, *, "users", user_id, "icon_image", file
|
||||
@user_id = user_id
|
||||
@file = file
|
||||
|
||||
# profile cover images
|
||||
# https://s2.booth.pm/8bb9e4e3-d171-4027-88df-84480480f79d/3d70de06-8e7c-444e-b8eb-a8a95bf20638_base_resized.jpg (sample)
|
||||
# https://s2.booth.pm/8bb9e4e3-d171-4027-88df-84480480f79d/3d70de06-8e7c-444e-b8eb-a8a95bf20638.png (full)
|
||||
in _, _, *, /\h{8}-\h{4}-\h{4}-\h{4}-\h{12}/i => user_uuid, file
|
||||
@user_uuid = user_uuid
|
||||
@file = file
|
||||
|
||||
# https://booth.pm/en/items/2864768
|
||||
# https://booth.pm/ja/items/2864768
|
||||
in _, "booth.pm", _, "items", work_id
|
||||
@@ -53,7 +66,25 @@ module Source
|
||||
end
|
||||
|
||||
def image_url?
|
||||
url.host == "booth.pximg.net"
|
||||
url.host.in?(["booth.pximg.net", "s2.booth.pm"])
|
||||
end
|
||||
|
||||
def full_image_url?
|
||||
image_url? && @file.exclude?("_base_resized")
|
||||
end
|
||||
|
||||
def full_image_url_for(extension)
|
||||
return unless @file.present?
|
||||
full_file = @file.gsub(/_base_resized\.\w+$/, ".#{extension}")
|
||||
if user_uuid
|
||||
if work_id
|
||||
"https://#{host}/#{user_uuid}/i/#{work_id}/#{full_file}"
|
||||
else
|
||||
"https://#{host}/#{user_uuid}/#{full_file}"
|
||||
end
|
||||
elsif user_id
|
||||
"https://#{host}/users/#{user_id}/icon_image/#{full_file}"
|
||||
end
|
||||
end
|
||||
|
||||
def page_url
|
||||
|
||||
@@ -95,8 +95,8 @@ class ArtistURL < ApplicationRecord
|
||||
def priority
|
||||
sites = %w[
|
||||
Pixiv Twitter
|
||||
ArtStation Baraag BCY Deviant\ Art Hentai\ Foundry Fantia Foundation Lofter Nico\ Seiga Nijie Pawoo Fanbox Pixiv\ Sketch Plurk Tinami Tumblr Weibo
|
||||
Ask.fm Booth Facebook FC2 Gumroad Instagram Ko-fi Livedoor Mihuashi Mixi.jp Patreon Piapro.jp Picarto Privatter Sakura.ne.jp Stickam Skeb Twitch Youtube
|
||||
ArtStation Baraag BCY Booth Deviant\ Art Hentai\ Foundry Fantia Foundation Lofter Nico\ Seiga Nijie Pawoo Fanbox Pixiv\ Sketch Plurk Tinami Tumblr Weibo
|
||||
Ask.fm Facebook FC2 Gumroad Instagram Ko-fi Livedoor Mihuashi Mixi.jp Patreon Piapro.jp Picarto Privatter Sakura.ne.jp Stickam Skeb Twitch Youtube
|
||||
Amazon Circle.ms DLSite Doujinshi.org Erogamescape Mangaupdates Melonbooks Toranoana Wikipedia
|
||||
]
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@ class ActiveSupport::TestCase
|
||||
include DownloadTestHelper
|
||||
include IqdbTestHelper
|
||||
include UploadTestHelper
|
||||
extend SourceTestHelper
|
||||
extend StripeTestHelper
|
||||
extend NormalizeAttributeHelper
|
||||
|
||||
|
||||
93
test/test_helpers/source_test_helper.rb
Normal file
93
test/test_helpers/source_test_helper.rb
Normal file
@@ -0,0 +1,93 @@
|
||||
module SourceTestHelper
|
||||
# A helper method to automate all the checks needed to make sure that a strategy does not break.
|
||||
#
|
||||
# * If download_size is nil, it tests that the file is downloaded correctly, otherwise it also checks the filesize.
|
||||
# * If deleted is true, it skips the downloading check, but it still tries everything else and makes sure nothing breaks.
|
||||
# * Any passed kwargs parameter is tested against the strategy.
|
||||
def strategy_should_work(url, referer: nil, download_size: nil, deleted: false, **methods_to_test)
|
||||
context "a strategy for #{url}#{", referer: #{referer}" if referer.present?}".chomp do
|
||||
strategy = Source::Extractor.find(url, referer)
|
||||
|
||||
should "not raise anything" do
|
||||
assert_nothing_raised { strategy.to_h }
|
||||
end
|
||||
|
||||
should "make sure that image_urls is an array of valid elements" do
|
||||
assert((strategy.image_urls.instance_of? Array))
|
||||
assert_not(strategy.image_urls.include?(nil))
|
||||
end
|
||||
|
||||
should_download_successfully(strategy, download_size) unless deleted
|
||||
|
||||
# {profile_url: nil}[:profile_url].present? -> false
|
||||
# Doing it this way instead we can check profile_url even if it's passed as a nil.
|
||||
if methods_to_test.include? :profile_url
|
||||
profile_url = methods_to_test.delete(:profile_url)
|
||||
should_handle_artists_correctly(strategy, profile_url)
|
||||
end
|
||||
|
||||
tags = methods_to_test.delete(:tags)
|
||||
should_validate_tags(strategy, tags)
|
||||
|
||||
# check any method that is passed as kwargs, in order to hardcode as few thingss as possible
|
||||
methods_to_test.each do |method_name, expected_value|
|
||||
should "make sure that '#{method_name}' matches" do
|
||||
if expected_value.instance_of? Regexp
|
||||
assert_match(expected_value, strategy.try(method_name))
|
||||
elsif expected_value.nil?
|
||||
assert_nil(strategy.try(method_name))
|
||||
else
|
||||
assert_equal(expected_value, strategy.try(method_name))
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def should_download_successfully(strategy, download_size = nil)
|
||||
should "download successfully" do
|
||||
file = strategy.download_file!(strategy.image_urls.first)
|
||||
if download_size.present?
|
||||
assert_equal(expected_filesize, file.size)
|
||||
else
|
||||
assert_not_nil(file.size)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def should_handle_artists_correctly(strategy, profile_url)
|
||||
if profile_url.present?
|
||||
should "correctly match a strategy to an artist with the same profile url" do
|
||||
assert_equal(profile_url, strategy.profile_url)
|
||||
artist = FactoryBot.create(:artist, name: strategy.artist_name, url_string: profile_url)
|
||||
assert_equal([artist], strategy.artists)
|
||||
end
|
||||
else
|
||||
should "not incorrectly extract a profile url or artist data when there's none to be found" do
|
||||
assert_nil(strategy.profile_url)
|
||||
assert_nil(strategy.artist_name)
|
||||
assert_equal([], strategy.other_names)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def should_validate_tags(strategy, tags = nil)
|
||||
should "make sure that tags return an array of arrays" do
|
||||
assert((strategy.tags.instance_of? Array))
|
||||
if strategy.tags.present?
|
||||
assert((strategy.tags.first.instance_of? Array))
|
||||
end
|
||||
end
|
||||
|
||||
return unless tags.present?
|
||||
|
||||
should "make sure that tags match" do
|
||||
if tags&.first.instance_of? Array
|
||||
assert_equal(tags.sort, strategy.tags.sort)
|
||||
elsif tags&.first.instance_of? String
|
||||
assert_equal(tags.map(&:downcase).sort, strategy.tags.map(&:first).map(&:downcase).sort)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
60
test/unit/sources/booth_test.rb
Normal file
60
test/unit/sources/booth_test.rb
Normal file
@@ -0,0 +1,60 @@
|
||||
require "test_helper"
|
||||
|
||||
module Sources
|
||||
class BoothTest < ActiveSupport::TestCase
|
||||
standard_url_images = %w[
|
||||
https://booth.pximg.net/a212cd73-75ab-482d-8fce-1ce2965e4d4f/i/3713604/ae0fdbcf-e4c5-4840-8d5c-43e18bddc93e.jpg
|
||||
https://booth.pximg.net/a212cd73-75ab-482d-8fce-1ce2965e4d4f/i/3713604/d12bce50-a0c7-43f8-a4fb-5ee0ea6855a3.jpg
|
||||
https://booth.pximg.net/a212cd73-75ab-482d-8fce-1ce2965e4d4f/i/3713604/f5332da3-4097-4d33-bbf6-a9b64c7671b3.jpg
|
||||
]
|
||||
strategy_should_work(
|
||||
"https://booth.pm/en/items/3713604",
|
||||
image_urls: standard_url_images,
|
||||
profile_url: "https://amedamacon.booth.pm",
|
||||
page_url: "https://booth.pm/en/items/3713604",
|
||||
artist_name: "amedamacon",
|
||||
other_names: ["あめうさぎBOOTH"],
|
||||
tags: [["抱き枕カバー", "https://booth.pm/en/browse/Pillow%20Cover?tags%5B%5D=%E6%8A%B1%E3%81%8D%E6%9E%95%E3%82%AB%E3%83%90%E3%83%BC"]],
|
||||
artist_commentary_title: "フユちゃん抱き枕カバー",
|
||||
dtext_artist_commentary_desc: /発送:6月上旬頃(BOOTH倉庫より発送)/
|
||||
)
|
||||
|
||||
strategy_should_work(
|
||||
"https://booth.pximg.net/a212cd73-75ab-482d-8fce-1ce2965e4d4f/i/3713604/d12bce50-a0c7-43f8-a4fb-5ee0ea6855a3_base_resized.jpg",
|
||||
image_urls: [standard_url_images.second],
|
||||
profile_url: "https://amedamacon.booth.pm",
|
||||
page_url: "https://booth.pm/en/items/3713604",
|
||||
artist_name: "amedamacon",
|
||||
other_names: ["あめうさぎBOOTH"],
|
||||
tags: [["抱き枕カバー", "https://booth.pm/en/browse/Pillow%20Cover?tags%5B%5D=%E6%8A%B1%E3%81%8D%E6%9E%95%E3%82%AB%E3%83%90%E3%83%BC"]],
|
||||
artist_commentary_title: "フユちゃん抱き枕カバー",
|
||||
dtext_artist_commentary_desc: /発送:6月上旬頃(BOOTH倉庫より発送)/
|
||||
)
|
||||
|
||||
strategy_should_work(
|
||||
"https://re-face.booth.pm/items/2423989",
|
||||
image_urls: ["https://booth.pximg.net/8bb9e4e3-d171-4027-88df-84480480f79d/i/2423989/a692d4f3-4371-4a86-a337-83fee82d46a4.png"],
|
||||
profile_url: "https://re-face.booth.pm",
|
||||
page_url: "https://booth.pm/en/items/2423989",
|
||||
artist_name: "re-face",
|
||||
other_names: ["Re:fAce/りふぇいす。"],
|
||||
tags: ["original"],
|
||||
artist_commentary_title: "RwithV vol.1 -アイドルはじめます!-",
|
||||
dtext_artist_commentary_desc: /注文が殺到した際は、発送が遅れてしまう場合もございますので予めご了承ください。/
|
||||
)
|
||||
|
||||
strategy_should_work(
|
||||
"https://s2.booth.pm/8bb9e4e3-d171-4027-88df-84480480f79d/3d70de06-8e7c-444e-b8eb-a8a95bf20638_base_resized.jpg",
|
||||
image_urls: ["https://s2.booth.pm/8bb9e4e3-d171-4027-88df-84480480f79d/3d70de06-8e7c-444e-b8eb-a8a95bf20638.png"],
|
||||
profile_url: nil
|
||||
)
|
||||
|
||||
strategy_should_work(
|
||||
"https://booth.pximg.net/c/128x128/users/3193929/icon_image/5be9eff4-1d9e-4a79-b097-33c1cd4ad314_base_resized.jpg",
|
||||
image_urls: ["https://booth.pximg.net/users/3193929/icon_image/5be9eff4-1d9e-4a79-b097-33c1cd4ad314.png"],
|
||||
profile_url: nil
|
||||
)
|
||||
|
||||
strategy_should_work("https://booth.pm/en/items/2003079", deleted: true)
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user