Merge pull request #5125 from nonamethanks/booth-support
Add Booth support
This commit is contained in:
@@ -27,6 +27,7 @@ module Source
|
|||||||
DOWNLOAD_TIMEOUT = 60
|
DOWNLOAD_TIMEOUT = 60
|
||||||
|
|
||||||
attr_reader :url, :referer_url, :parsed_url, :parsed_referer
|
attr_reader :url, :referer_url, :parsed_url, :parsed_referer
|
||||||
|
|
||||||
delegate :site_name, to: :parsed_url
|
delegate :site_name, to: :parsed_url
|
||||||
|
|
||||||
SUBCLASSES = [
|
SUBCLASSES = [
|
||||||
@@ -50,6 +51,7 @@ module Source
|
|||||||
Source::Extractor::Plurk,
|
Source::Extractor::Plurk,
|
||||||
Source::Extractor::Tinami,
|
Source::Extractor::Tinami,
|
||||||
Source::Extractor::Fantia,
|
Source::Extractor::Fantia,
|
||||||
|
Source::Extractor::Booth,
|
||||||
]
|
]
|
||||||
|
|
||||||
# Should return true if the extractor is configured correctly. Return false
|
# Should return true if the extractor is configured correctly. Return false
|
||||||
|
|||||||
83
app/logical/source/extractor/booth.rb
Normal file
83
app/logical/source/extractor/booth.rb
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
# @see Source::URL::Booth
|
||||||
|
class Source::Extractor
|
||||||
|
class Booth < Source::Extractor
|
||||||
|
def match?
|
||||||
|
Source::URL::Booth === parsed_url
|
||||||
|
end
|
||||||
|
|
||||||
|
def image_urls
|
||||||
|
if parsed_url.image_url?
|
||||||
|
if parsed_url.full_image_url?
|
||||||
|
[parsed_url.to_s]
|
||||||
|
else
|
||||||
|
[find_right_extension(parsed_url)]
|
||||||
|
end
|
||||||
|
else
|
||||||
|
page&.css(".market-item-detail-item-image")&.pluck("data-origin").to_a.compact
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def profile_url
|
||||||
|
if page.present?
|
||||||
|
page.at(".summary [data-product-list*='shop_index']")&.[]("href")&.chomp("/")
|
||||||
|
else
|
||||||
|
parsed_url.profile_url || parsed_referer&.profile_url
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def artist_name
|
||||||
|
return nil unless profile_url.present?
|
||||||
|
Source::URL.parse(profile_url)&.username
|
||||||
|
end
|
||||||
|
|
||||||
|
def display_name
|
||||||
|
page&.at(".summary .user-avatar")&.[]("alt")
|
||||||
|
end
|
||||||
|
|
||||||
|
def other_names
|
||||||
|
[display_name].compact
|
||||||
|
end
|
||||||
|
|
||||||
|
def artist_commentary_title
|
||||||
|
page&.at(".summary .u-tpg-title1")&.text
|
||||||
|
end
|
||||||
|
|
||||||
|
def artist_commentary_desc
|
||||||
|
page&.at(".autolink")&.to_html
|
||||||
|
end
|
||||||
|
|
||||||
|
def dtext_artist_commentary_desc
|
||||||
|
DText.from_html(artist_commentary_desc)
|
||||||
|
end
|
||||||
|
|
||||||
|
def tags
|
||||||
|
page&.css(".item-info-detail [data-product-list*='tag_category_search']").to_a.map do |element|
|
||||||
|
[element.text.gsub(/ x .*/, ""), element["href"]]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def page_url
|
||||||
|
parsed_url.page_url || parsed_referer&.page_url
|
||||||
|
end
|
||||||
|
|
||||||
|
def page
|
||||||
|
return nil if parsed_url.page_url.blank?
|
||||||
|
|
||||||
|
resp = http.cache(1.minute).cookies(adult: "t").get(page_url)
|
||||||
|
return nil if resp.code != 200
|
||||||
|
|
||||||
|
resp.parse
|
||||||
|
end
|
||||||
|
memoize :page
|
||||||
|
|
||||||
|
def find_right_extension(parsed_url)
|
||||||
|
extensions = %w[png jpg jpeg]
|
||||||
|
candidates = extensions.map { |ext| parsed_url.full_image_url_for(ext) }
|
||||||
|
|
||||||
|
chosen_url = candidates.find { |candidate| http_exists?(candidate) }
|
||||||
|
chosen_url || parsed_url.to_s
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
@@ -1,15 +1,13 @@
|
|||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
# Unhandled:
|
# Unhandled
|
||||||
#
|
# https://booth.pm/downloadables/1376468 (from https://booth.pm/en/items/2425521, requires pixiv login to download)
|
||||||
# https://booth.pximg.net/c/128x128/users/3193929/icon_image/5be9eff4-1d9e-4a79-b097-33c1cd4ad314_base_resized.jpg (profile icon)
|
|
||||||
# https://s2.booth.pm/8bb9e4e3-d171-4027-88df-84480480f79d/3d70de06-8e7c-444e-b8eb-a8a95bf20638.png (profile cover image)
|
|
||||||
|
|
||||||
module Source
|
module Source
|
||||||
class URL::Booth < Source::URL
|
class URL::Booth < Source::URL
|
||||||
RESERVED_SUBDOMAINS = ["www", "s", "s2", "asset", "accounts", nil]
|
RESERVED_SUBDOMAINS = ["www", "s", "s2", "asset", "accounts", nil]
|
||||||
|
|
||||||
attr_reader :work_id, :user_id, :username
|
attr_reader :work_id, :user_id, :user_uuid, :username
|
||||||
|
|
||||||
def self.match?(url)
|
def self.match?(url)
|
||||||
url.domain == "booth.pm" || url.host == "booth.pximg.net"
|
url.domain == "booth.pm" || url.host == "booth.pximg.net"
|
||||||
@@ -21,6 +19,7 @@ module Source
|
|||||||
# https://booth.pximg.net/8bb9e4e3-d171-4027-88df-84480480f79d/i/2864768/00cdfef0-e8d5-454b-8554-4885a7e4827d_base_resized.jpg (full)
|
# https://booth.pximg.net/8bb9e4e3-d171-4027-88df-84480480f79d/i/2864768/00cdfef0-e8d5-454b-8554-4885a7e4827d_base_resized.jpg (full)
|
||||||
# https://booth.pximg.net/c/300x300_a2_g5/8bb9e4e3-d171-4027-88df-84480480f79d/i/2864768/00cdfef0-e8d5-454b-8554-4885a7e4827d_base_resized.jpg (thumb)
|
# https://booth.pximg.net/c/300x300_a2_g5/8bb9e4e3-d171-4027-88df-84480480f79d/i/2864768/00cdfef0-e8d5-454b-8554-4885a7e4827d_base_resized.jpg (thumb)
|
||||||
# https://booth.pximg.net/c/72x72_a2_g5/8bb9e4e3-d171-4027-88df-84480480f79d/i/2864768/00cdfef0-e8d5-454b-8554-4885a7e4827d_base_resized.jpg (thumb)
|
# https://booth.pximg.net/c/72x72_a2_g5/8bb9e4e3-d171-4027-88df-84480480f79d/i/2864768/00cdfef0-e8d5-454b-8554-4885a7e4827d_base_resized.jpg (thumb)
|
||||||
|
# https://booth.pximg.net/8bb9e4e3-d171-4027-88df-84480480f79d/i/2864768/00cdfef0-e8d5-454b-8554-4885a7e4827d.jpeg (full)
|
||||||
#
|
#
|
||||||
# https://s2.booth.pm/b242a7bd-0747-48c4-891d-9e8552edd5d7/i/3746752/52dbee27-7ad2-4048-9c1d-827eee36625c_base_resized.jpg (sample)
|
# https://s2.booth.pm/b242a7bd-0747-48c4-891d-9e8552edd5d7/i/3746752/52dbee27-7ad2-4048-9c1d-827eee36625c_base_resized.jpg (sample)
|
||||||
# https://booth.pximg.net/b242a7bd-0747-48c4-891d-9e8552edd5d7/i/3746752/52dbee27-7ad2-4048-9c1d-827eee36625c.jpg (full)
|
# https://booth.pximg.net/b242a7bd-0747-48c4-891d-9e8552edd5d7/i/3746752/52dbee27-7ad2-4048-9c1d-827eee36625c.jpg (full)
|
||||||
@@ -32,6 +31,20 @@ module Source
|
|||||||
@work_id = work_id
|
@work_id = work_id
|
||||||
@file = file
|
@file = file
|
||||||
|
|
||||||
|
# profile icons
|
||||||
|
# https://booth.pximg.net/c/128x128/users/3193929/icon_image/5be9eff4-1d9e-4a79-b097-33c1cd4ad314_base_resized.jpg (sample)
|
||||||
|
# https://booth.pximg.net/users/3193929/icon_image/5be9eff4-1d9e-4a79-b097-33c1cd4ad314.png (full)
|
||||||
|
in _, _, *, "users", user_id, "icon_image", file
|
||||||
|
@user_id = user_id
|
||||||
|
@file = file
|
||||||
|
|
||||||
|
# profile cover images
|
||||||
|
# https://s2.booth.pm/8bb9e4e3-d171-4027-88df-84480480f79d/3d70de06-8e7c-444e-b8eb-a8a95bf20638_base_resized.jpg (sample)
|
||||||
|
# https://s2.booth.pm/8bb9e4e3-d171-4027-88df-84480480f79d/3d70de06-8e7c-444e-b8eb-a8a95bf20638.png (full)
|
||||||
|
in _, _, *, /\h{8}-\h{4}-\h{4}-\h{4}-\h{12}/i => user_uuid, file
|
||||||
|
@user_uuid = user_uuid
|
||||||
|
@file = file
|
||||||
|
|
||||||
# https://booth.pm/en/items/2864768
|
# https://booth.pm/en/items/2864768
|
||||||
# https://booth.pm/ja/items/2864768
|
# https://booth.pm/ja/items/2864768
|
||||||
in _, "booth.pm", _, "items", work_id
|
in _, "booth.pm", _, "items", work_id
|
||||||
@@ -53,7 +66,25 @@ module Source
|
|||||||
end
|
end
|
||||||
|
|
||||||
def image_url?
|
def image_url?
|
||||||
url.host == "booth.pximg.net"
|
url.host.in?(["booth.pximg.net", "s2.booth.pm"])
|
||||||
|
end
|
||||||
|
|
||||||
|
def full_image_url?
|
||||||
|
image_url? && @file.exclude?("_base_resized")
|
||||||
|
end
|
||||||
|
|
||||||
|
def full_image_url_for(extension)
|
||||||
|
return unless @file.present?
|
||||||
|
full_file = @file.gsub(/_base_resized\.\w+$/, ".#{extension}")
|
||||||
|
if user_uuid
|
||||||
|
if work_id
|
||||||
|
"https://#{host}/#{user_uuid}/i/#{work_id}/#{full_file}"
|
||||||
|
else
|
||||||
|
"https://#{host}/#{user_uuid}/#{full_file}"
|
||||||
|
end
|
||||||
|
elsif user_id
|
||||||
|
"https://#{host}/users/#{user_id}/icon_image/#{full_file}"
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def page_url
|
def page_url
|
||||||
|
|||||||
@@ -95,8 +95,8 @@ class ArtistURL < ApplicationRecord
|
|||||||
def priority
|
def priority
|
||||||
sites = %w[
|
sites = %w[
|
||||||
Pixiv Twitter
|
Pixiv Twitter
|
||||||
ArtStation Baraag BCY Deviant\ Art Hentai\ Foundry Fantia Foundation Lofter Nico\ Seiga Nijie Pawoo Fanbox Pixiv\ Sketch Plurk Tinami Tumblr Weibo
|
ArtStation Baraag BCY Booth Deviant\ Art Hentai\ Foundry Fantia Foundation Lofter Nico\ Seiga Nijie Pawoo Fanbox Pixiv\ Sketch Plurk Tinami Tumblr Weibo
|
||||||
Ask.fm Booth Facebook FC2 Gumroad Instagram Ko-fi Livedoor Mihuashi Mixi.jp Patreon Piapro.jp Picarto Privatter Sakura.ne.jp Stickam Skeb Twitch Youtube
|
Ask.fm Facebook FC2 Gumroad Instagram Ko-fi Livedoor Mihuashi Mixi.jp Patreon Piapro.jp Picarto Privatter Sakura.ne.jp Stickam Skeb Twitch Youtube
|
||||||
Amazon Circle.ms DLSite Doujinshi.org Erogamescape Mangaupdates Melonbooks Toranoana Wikipedia
|
Amazon Circle.ms DLSite Doujinshi.org Erogamescape Mangaupdates Melonbooks Toranoana Wikipedia
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ class ActiveSupport::TestCase
|
|||||||
include DownloadTestHelper
|
include DownloadTestHelper
|
||||||
include IqdbTestHelper
|
include IqdbTestHelper
|
||||||
include UploadTestHelper
|
include UploadTestHelper
|
||||||
|
extend SourceTestHelper
|
||||||
extend StripeTestHelper
|
extend StripeTestHelper
|
||||||
extend NormalizeAttributeHelper
|
extend NormalizeAttributeHelper
|
||||||
|
|
||||||
|
|||||||
93
test/test_helpers/source_test_helper.rb
Normal file
93
test/test_helpers/source_test_helper.rb
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
module SourceTestHelper
|
||||||
|
# A helper method to automate all the checks needed to make sure that a strategy does not break.
|
||||||
|
#
|
||||||
|
# * If download_size is nil, it tests that the file is downloaded correctly, otherwise it also checks the filesize.
|
||||||
|
# * If deleted is true, it skips the downloading check, but it still tries everything else and makes sure nothing breaks.
|
||||||
|
# * Any passed kwargs parameter is tested against the strategy.
|
||||||
|
def strategy_should_work(url, referer: nil, download_size: nil, deleted: false, **methods_to_test)
|
||||||
|
context "a strategy for #{url}#{", referer: #{referer}" if referer.present?}".chomp do
|
||||||
|
strategy = Source::Extractor.find(url, referer)
|
||||||
|
|
||||||
|
should "not raise anything" do
|
||||||
|
assert_nothing_raised { strategy.to_h }
|
||||||
|
end
|
||||||
|
|
||||||
|
should "make sure that image_urls is an array of valid elements" do
|
||||||
|
assert((strategy.image_urls.instance_of? Array))
|
||||||
|
assert_not(strategy.image_urls.include?(nil))
|
||||||
|
end
|
||||||
|
|
||||||
|
should_download_successfully(strategy, download_size) unless deleted
|
||||||
|
|
||||||
|
# {profile_url: nil}[:profile_url].present? -> false
|
||||||
|
# Doing it this way instead we can check profile_url even if it's passed as a nil.
|
||||||
|
if methods_to_test.include? :profile_url
|
||||||
|
profile_url = methods_to_test.delete(:profile_url)
|
||||||
|
should_handle_artists_correctly(strategy, profile_url)
|
||||||
|
end
|
||||||
|
|
||||||
|
tags = methods_to_test.delete(:tags)
|
||||||
|
should_validate_tags(strategy, tags)
|
||||||
|
|
||||||
|
# check any method that is passed as kwargs, in order to hardcode as few thingss as possible
|
||||||
|
methods_to_test.each do |method_name, expected_value|
|
||||||
|
should "make sure that '#{method_name}' matches" do
|
||||||
|
if expected_value.instance_of? Regexp
|
||||||
|
assert_match(expected_value, strategy.try(method_name))
|
||||||
|
elsif expected_value.nil?
|
||||||
|
assert_nil(strategy.try(method_name))
|
||||||
|
else
|
||||||
|
assert_equal(expected_value, strategy.try(method_name))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def should_download_successfully(strategy, download_size = nil)
|
||||||
|
should "download successfully" do
|
||||||
|
file = strategy.download_file!(strategy.image_urls.first)
|
||||||
|
if download_size.present?
|
||||||
|
assert_equal(expected_filesize, file.size)
|
||||||
|
else
|
||||||
|
assert_not_nil(file.size)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def should_handle_artists_correctly(strategy, profile_url)
|
||||||
|
if profile_url.present?
|
||||||
|
should "correctly match a strategy to an artist with the same profile url" do
|
||||||
|
assert_equal(profile_url, strategy.profile_url)
|
||||||
|
artist = FactoryBot.create(:artist, name: strategy.artist_name, url_string: profile_url)
|
||||||
|
assert_equal([artist], strategy.artists)
|
||||||
|
end
|
||||||
|
else
|
||||||
|
should "not incorrectly extract a profile url or artist data when there's none to be found" do
|
||||||
|
assert_nil(strategy.profile_url)
|
||||||
|
assert_nil(strategy.artist_name)
|
||||||
|
assert_equal([], strategy.other_names)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def should_validate_tags(strategy, tags = nil)
|
||||||
|
should "make sure that tags return an array of arrays" do
|
||||||
|
assert((strategy.tags.instance_of? Array))
|
||||||
|
if strategy.tags.present?
|
||||||
|
assert((strategy.tags.first.instance_of? Array))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return unless tags.present?
|
||||||
|
|
||||||
|
should "make sure that tags match" do
|
||||||
|
if tags&.first.instance_of? Array
|
||||||
|
assert_equal(tags.sort, strategy.tags.sort)
|
||||||
|
elsif tags&.first.instance_of? String
|
||||||
|
assert_equal(tags.map(&:downcase).sort, strategy.tags.map(&:first).map(&:downcase).sort)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
60
test/unit/sources/booth_test.rb
Normal file
60
test/unit/sources/booth_test.rb
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
require "test_helper"
|
||||||
|
|
||||||
|
module Sources
|
||||||
|
class BoothTest < ActiveSupport::TestCase
|
||||||
|
standard_url_images = %w[
|
||||||
|
https://booth.pximg.net/a212cd73-75ab-482d-8fce-1ce2965e4d4f/i/3713604/ae0fdbcf-e4c5-4840-8d5c-43e18bddc93e.jpg
|
||||||
|
https://booth.pximg.net/a212cd73-75ab-482d-8fce-1ce2965e4d4f/i/3713604/d12bce50-a0c7-43f8-a4fb-5ee0ea6855a3.jpg
|
||||||
|
https://booth.pximg.net/a212cd73-75ab-482d-8fce-1ce2965e4d4f/i/3713604/f5332da3-4097-4d33-bbf6-a9b64c7671b3.jpg
|
||||||
|
]
|
||||||
|
strategy_should_work(
|
||||||
|
"https://booth.pm/en/items/3713604",
|
||||||
|
image_urls: standard_url_images,
|
||||||
|
profile_url: "https://amedamacon.booth.pm",
|
||||||
|
page_url: "https://booth.pm/en/items/3713604",
|
||||||
|
artist_name: "amedamacon",
|
||||||
|
other_names: ["あめうさぎBOOTH"],
|
||||||
|
tags: [["抱き枕カバー", "https://booth.pm/en/browse/Pillow%20Cover?tags%5B%5D=%E6%8A%B1%E3%81%8D%E6%9E%95%E3%82%AB%E3%83%90%E3%83%BC"]],
|
||||||
|
artist_commentary_title: "フユちゃん抱き枕カバー",
|
||||||
|
dtext_artist_commentary_desc: /発送:6月上旬頃(BOOTH倉庫より発送)/
|
||||||
|
)
|
||||||
|
|
||||||
|
strategy_should_work(
|
||||||
|
"https://booth.pximg.net/a212cd73-75ab-482d-8fce-1ce2965e4d4f/i/3713604/d12bce50-a0c7-43f8-a4fb-5ee0ea6855a3_base_resized.jpg",
|
||||||
|
image_urls: [standard_url_images.second],
|
||||||
|
profile_url: "https://amedamacon.booth.pm",
|
||||||
|
page_url: "https://booth.pm/en/items/3713604",
|
||||||
|
artist_name: "amedamacon",
|
||||||
|
other_names: ["あめうさぎBOOTH"],
|
||||||
|
tags: [["抱き枕カバー", "https://booth.pm/en/browse/Pillow%20Cover?tags%5B%5D=%E6%8A%B1%E3%81%8D%E6%9E%95%E3%82%AB%E3%83%90%E3%83%BC"]],
|
||||||
|
artist_commentary_title: "フユちゃん抱き枕カバー",
|
||||||
|
dtext_artist_commentary_desc: /発送:6月上旬頃(BOOTH倉庫より発送)/
|
||||||
|
)
|
||||||
|
|
||||||
|
strategy_should_work(
|
||||||
|
"https://re-face.booth.pm/items/2423989",
|
||||||
|
image_urls: ["https://booth.pximg.net/8bb9e4e3-d171-4027-88df-84480480f79d/i/2423989/a692d4f3-4371-4a86-a337-83fee82d46a4.png"],
|
||||||
|
profile_url: "https://re-face.booth.pm",
|
||||||
|
page_url: "https://booth.pm/en/items/2423989",
|
||||||
|
artist_name: "re-face",
|
||||||
|
other_names: ["Re:fAce/りふぇいす。"],
|
||||||
|
tags: ["original"],
|
||||||
|
artist_commentary_title: "RwithV vol.1 -アイドルはじめます!-",
|
||||||
|
dtext_artist_commentary_desc: /注文が殺到した際は、発送が遅れてしまう場合もございますので予めご了承ください。/
|
||||||
|
)
|
||||||
|
|
||||||
|
strategy_should_work(
|
||||||
|
"https://s2.booth.pm/8bb9e4e3-d171-4027-88df-84480480f79d/3d70de06-8e7c-444e-b8eb-a8a95bf20638_base_resized.jpg",
|
||||||
|
image_urls: ["https://s2.booth.pm/8bb9e4e3-d171-4027-88df-84480480f79d/3d70de06-8e7c-444e-b8eb-a8a95bf20638.png"],
|
||||||
|
profile_url: nil
|
||||||
|
)
|
||||||
|
|
||||||
|
strategy_should_work(
|
||||||
|
"https://booth.pximg.net/c/128x128/users/3193929/icon_image/5be9eff4-1d9e-4a79-b097-33c1cd4ad314_base_resized.jpg",
|
||||||
|
image_urls: ["https://booth.pximg.net/users/3193929/icon_image/5be9eff4-1d9e-4a79-b097-33c1cd4ad314.png"],
|
||||||
|
profile_url: nil
|
||||||
|
)
|
||||||
|
|
||||||
|
strategy_should_work("https://booth.pm/en/items/2003079", deleted: true)
|
||||||
|
end
|
||||||
|
end
|
||||||
Reference in New Issue
Block a user