Merge pull request #5125 from nonamethanks/booth-support

Add Booth support
This commit is contained in:
evazion
2022-04-17 23:00:14 -05:00
committed by GitHub
7 changed files with 278 additions and 8 deletions

View File

@@ -27,6 +27,7 @@ module Source
DOWNLOAD_TIMEOUT = 60
attr_reader :url, :referer_url, :parsed_url, :parsed_referer
delegate :site_name, to: :parsed_url
SUBCLASSES = [
@@ -50,6 +51,7 @@ module Source
Source::Extractor::Plurk,
Source::Extractor::Tinami,
Source::Extractor::Fantia,
Source::Extractor::Booth,
]
# Should return true if the extractor is configured correctly. Return false

View File

@@ -0,0 +1,83 @@
# frozen_string_literal: true
# @see Source::URL::Booth
class Source::Extractor
class Booth < Source::Extractor
def match?
Source::URL::Booth === parsed_url
end
def image_urls
if parsed_url.image_url?
if parsed_url.full_image_url?
[parsed_url.to_s]
else
[find_right_extension(parsed_url)]
end
else
page&.css(".market-item-detail-item-image")&.pluck("data-origin").to_a.compact
end
end
def profile_url
if page.present?
page.at(".summary [data-product-list*='shop_index']")&.[]("href")&.chomp("/")
else
parsed_url.profile_url || parsed_referer&.profile_url
end
end
def artist_name
return nil unless profile_url.present?
Source::URL.parse(profile_url)&.username
end
def display_name
page&.at(".summary .user-avatar")&.[]("alt")
end
def other_names
[display_name].compact
end
def artist_commentary_title
page&.at(".summary .u-tpg-title1")&.text
end
def artist_commentary_desc
page&.at(".autolink")&.to_html
end
def dtext_artist_commentary_desc
DText.from_html(artist_commentary_desc)
end
def tags
page&.css(".item-info-detail [data-product-list*='tag_category_search']").to_a.map do |element|
[element.text.gsub(/ x .*/, ""), element["href"]]
end
end
def page_url
parsed_url.page_url || parsed_referer&.page_url
end
def page
return nil if parsed_url.page_url.blank?
resp = http.cache(1.minute).cookies(adult: "t").get(page_url)
return nil if resp.code != 200
resp.parse
end
memoize :page
def find_right_extension(parsed_url)
extensions = %w[png jpg jpeg]
candidates = extensions.map { |ext| parsed_url.full_image_url_for(ext) }
chosen_url = candidates.find { |candidate| http_exists?(candidate) }
chosen_url || parsed_url.to_s
end
end
end

View File

@@ -1,15 +1,13 @@
# frozen_string_literal: true
# Unhandled:
#
# https://booth.pximg.net/c/128x128/users/3193929/icon_image/5be9eff4-1d9e-4a79-b097-33c1cd4ad314_base_resized.jpg (profile icon)
# https://s2.booth.pm/8bb9e4e3-d171-4027-88df-84480480f79d/3d70de06-8e7c-444e-b8eb-a8a95bf20638.png (profile cover image)
# Unhandled
# https://booth.pm/downloadables/1376468 (from https://booth.pm/en/items/2425521, requires pixiv login to download)
module Source
class URL::Booth < Source::URL
RESERVED_SUBDOMAINS = ["www", "s", "s2", "asset", "accounts", nil]
attr_reader :work_id, :user_id, :username
attr_reader :work_id, :user_id, :user_uuid, :username
def self.match?(url)
url.domain == "booth.pm" || url.host == "booth.pximg.net"
@@ -21,6 +19,7 @@ module Source
# https://booth.pximg.net/8bb9e4e3-d171-4027-88df-84480480f79d/i/2864768/00cdfef0-e8d5-454b-8554-4885a7e4827d_base_resized.jpg (full)
# https://booth.pximg.net/c/300x300_a2_g5/8bb9e4e3-d171-4027-88df-84480480f79d/i/2864768/00cdfef0-e8d5-454b-8554-4885a7e4827d_base_resized.jpg (thumb)
# https://booth.pximg.net/c/72x72_a2_g5/8bb9e4e3-d171-4027-88df-84480480f79d/i/2864768/00cdfef0-e8d5-454b-8554-4885a7e4827d_base_resized.jpg (thumb)
# https://booth.pximg.net/8bb9e4e3-d171-4027-88df-84480480f79d/i/2864768/00cdfef0-e8d5-454b-8554-4885a7e4827d.jpeg (full)
#
# https://s2.booth.pm/b242a7bd-0747-48c4-891d-9e8552edd5d7/i/3746752/52dbee27-7ad2-4048-9c1d-827eee36625c_base_resized.jpg (sample)
# https://booth.pximg.net/b242a7bd-0747-48c4-891d-9e8552edd5d7/i/3746752/52dbee27-7ad2-4048-9c1d-827eee36625c.jpg (full)
@@ -32,6 +31,20 @@ module Source
@work_id = work_id
@file = file
# profile icons
# https://booth.pximg.net/c/128x128/users/3193929/icon_image/5be9eff4-1d9e-4a79-b097-33c1cd4ad314_base_resized.jpg (sample)
# https://booth.pximg.net/users/3193929/icon_image/5be9eff4-1d9e-4a79-b097-33c1cd4ad314.png (full)
in _, _, *, "users", user_id, "icon_image", file
@user_id = user_id
@file = file
# profile cover images
# https://s2.booth.pm/8bb9e4e3-d171-4027-88df-84480480f79d/3d70de06-8e7c-444e-b8eb-a8a95bf20638_base_resized.jpg (sample)
# https://s2.booth.pm/8bb9e4e3-d171-4027-88df-84480480f79d/3d70de06-8e7c-444e-b8eb-a8a95bf20638.png (full)
in _, _, *, /\h{8}-\h{4}-\h{4}-\h{4}-\h{12}/i => user_uuid, file
@user_uuid = user_uuid
@file = file
# https://booth.pm/en/items/2864768
# https://booth.pm/ja/items/2864768
in _, "booth.pm", _, "items", work_id
@@ -53,7 +66,25 @@ module Source
end
def image_url?
url.host == "booth.pximg.net"
url.host.in?(["booth.pximg.net", "s2.booth.pm"])
end
def full_image_url?
image_url? && @file.exclude?("_base_resized")
end
def full_image_url_for(extension)
return unless @file.present?
full_file = @file.gsub(/_base_resized\.\w+$/, ".#{extension}")
if user_uuid
if work_id
"https://#{host}/#{user_uuid}/i/#{work_id}/#{full_file}"
else
"https://#{host}/#{user_uuid}/#{full_file}"
end
elsif user_id
"https://#{host}/users/#{user_id}/icon_image/#{full_file}"
end
end
def page_url

View File

@@ -95,8 +95,8 @@ class ArtistURL < ApplicationRecord
def priority
sites = %w[
Pixiv Twitter
ArtStation Baraag BCY Deviant\ Art Hentai\ Foundry Fantia Foundation Lofter Nico\ Seiga Nijie Pawoo Fanbox Pixiv\ Sketch Plurk Tinami Tumblr Weibo
Ask.fm Booth Facebook FC2 Gumroad Instagram Ko-fi Livedoor Mihuashi Mixi.jp Patreon Piapro.jp Picarto Privatter Sakura.ne.jp Stickam Skeb Twitch Youtube
ArtStation Baraag BCY Booth Deviant\ Art Hentai\ Foundry Fantia Foundation Lofter Nico\ Seiga Nijie Pawoo Fanbox Pixiv\ Sketch Plurk Tinami Tumblr Weibo
Ask.fm Facebook FC2 Gumroad Instagram Ko-fi Livedoor Mihuashi Mixi.jp Patreon Piapro.jp Picarto Privatter Sakura.ne.jp Stickam Skeb Twitch Youtube
Amazon Circle.ms DLSite Doujinshi.org Erogamescape Mangaupdates Melonbooks Toranoana Wikipedia
]

View File

@@ -26,6 +26,7 @@ class ActiveSupport::TestCase
include DownloadTestHelper
include IqdbTestHelper
include UploadTestHelper
extend SourceTestHelper
extend StripeTestHelper
extend NormalizeAttributeHelper

View File

@@ -0,0 +1,93 @@
module SourceTestHelper
# A helper method to automate all the checks needed to make sure that a strategy does not break.
#
# * If download_size is nil, it tests that the file is downloaded correctly, otherwise it also checks the filesize.
# * If deleted is true, it skips the downloading check, but it still tries everything else and makes sure nothing breaks.
# * Any passed kwargs parameter is tested against the strategy.
def strategy_should_work(url, referer: nil, download_size: nil, deleted: false, **methods_to_test)
context "a strategy for #{url}#{", referer: #{referer}" if referer.present?}".chomp do
strategy = Source::Extractor.find(url, referer)
should "not raise anything" do
assert_nothing_raised { strategy.to_h }
end
should "make sure that image_urls is an array of valid elements" do
assert((strategy.image_urls.instance_of? Array))
assert_not(strategy.image_urls.include?(nil))
end
should_download_successfully(strategy, download_size) unless deleted
# {profile_url: nil}[:profile_url].present? -> false
# Doing it this way instead we can check profile_url even if it's passed as a nil.
if methods_to_test.include? :profile_url
profile_url = methods_to_test.delete(:profile_url)
should_handle_artists_correctly(strategy, profile_url)
end
tags = methods_to_test.delete(:tags)
should_validate_tags(strategy, tags)
# check any method that is passed as kwargs, in order to hardcode as few thingss as possible
methods_to_test.each do |method_name, expected_value|
should "make sure that '#{method_name}' matches" do
if expected_value.instance_of? Regexp
assert_match(expected_value, strategy.try(method_name))
elsif expected_value.nil?
assert_nil(strategy.try(method_name))
else
assert_equal(expected_value, strategy.try(method_name))
end
end
end
end
end
def should_download_successfully(strategy, download_size = nil)
should "download successfully" do
file = strategy.download_file!(strategy.image_urls.first)
if download_size.present?
assert_equal(expected_filesize, file.size)
else
assert_not_nil(file.size)
end
end
end
def should_handle_artists_correctly(strategy, profile_url)
if profile_url.present?
should "correctly match a strategy to an artist with the same profile url" do
assert_equal(profile_url, strategy.profile_url)
artist = FactoryBot.create(:artist, name: strategy.artist_name, url_string: profile_url)
assert_equal([artist], strategy.artists)
end
else
should "not incorrectly extract a profile url or artist data when there's none to be found" do
assert_nil(strategy.profile_url)
assert_nil(strategy.artist_name)
assert_equal([], strategy.other_names)
end
end
end
def should_validate_tags(strategy, tags = nil)
should "make sure that tags return an array of arrays" do
assert((strategy.tags.instance_of? Array))
if strategy.tags.present?
assert((strategy.tags.first.instance_of? Array))
end
end
return unless tags.present?
should "make sure that tags match" do
if tags&.first.instance_of? Array
assert_equal(tags.sort, strategy.tags.sort)
elsif tags&.first.instance_of? String
assert_equal(tags.map(&:downcase).sort, strategy.tags.map(&:first).map(&:downcase).sort)
end
end
end
end

View File

@@ -0,0 +1,60 @@
require "test_helper"
module Sources
class BoothTest < ActiveSupport::TestCase
standard_url_images = %w[
https://booth.pximg.net/a212cd73-75ab-482d-8fce-1ce2965e4d4f/i/3713604/ae0fdbcf-e4c5-4840-8d5c-43e18bddc93e.jpg
https://booth.pximg.net/a212cd73-75ab-482d-8fce-1ce2965e4d4f/i/3713604/d12bce50-a0c7-43f8-a4fb-5ee0ea6855a3.jpg
https://booth.pximg.net/a212cd73-75ab-482d-8fce-1ce2965e4d4f/i/3713604/f5332da3-4097-4d33-bbf6-a9b64c7671b3.jpg
]
strategy_should_work(
"https://booth.pm/en/items/3713604",
image_urls: standard_url_images,
profile_url: "https://amedamacon.booth.pm",
page_url: "https://booth.pm/en/items/3713604",
artist_name: "amedamacon",
other_names: ["あめうさぎBOOTH"],
tags: [["抱き枕カバー", "https://booth.pm/en/browse/Pillow%20Cover?tags%5B%5D=%E6%8A%B1%E3%81%8D%E6%9E%95%E3%82%AB%E3%83%90%E3%83%BC"]],
artist_commentary_title: "フユちゃん抱き枕カバー",
dtext_artist_commentary_desc: /発送6月上旬頃BOOTH倉庫より発送/
)
strategy_should_work(
"https://booth.pximg.net/a212cd73-75ab-482d-8fce-1ce2965e4d4f/i/3713604/d12bce50-a0c7-43f8-a4fb-5ee0ea6855a3_base_resized.jpg",
image_urls: [standard_url_images.second],
profile_url: "https://amedamacon.booth.pm",
page_url: "https://booth.pm/en/items/3713604",
artist_name: "amedamacon",
other_names: ["あめうさぎBOOTH"],
tags: [["抱き枕カバー", "https://booth.pm/en/browse/Pillow%20Cover?tags%5B%5D=%E6%8A%B1%E3%81%8D%E6%9E%95%E3%82%AB%E3%83%90%E3%83%BC"]],
artist_commentary_title: "フユちゃん抱き枕カバー",
dtext_artist_commentary_desc: /発送6月上旬頃BOOTH倉庫より発送/
)
strategy_should_work(
"https://re-face.booth.pm/items/2423989",
image_urls: ["https://booth.pximg.net/8bb9e4e3-d171-4027-88df-84480480f79d/i/2423989/a692d4f3-4371-4a86-a337-83fee82d46a4.png"],
profile_url: "https://re-face.booth.pm",
page_url: "https://booth.pm/en/items/2423989",
artist_name: "re-face",
other_names: ["Re:fAce/りふぇいす。"],
tags: ["original"],
artist_commentary_title: "RwithV vol.1 -アイドルはじめます!-",
dtext_artist_commentary_desc: /注文が殺到した際は、発送が遅れてしまう場合もございますので予めご了承ください。/
)
strategy_should_work(
"https://s2.booth.pm/8bb9e4e3-d171-4027-88df-84480480f79d/3d70de06-8e7c-444e-b8eb-a8a95bf20638_base_resized.jpg",
image_urls: ["https://s2.booth.pm/8bb9e4e3-d171-4027-88df-84480480f79d/3d70de06-8e7c-444e-b8eb-a8a95bf20638.png"],
profile_url: nil
)
strategy_should_work(
"https://booth.pximg.net/c/128x128/users/3193929/icon_image/5be9eff4-1d9e-4a79-b097-33c1cd4ad314_base_resized.jpg",
image_urls: ["https://booth.pximg.net/users/3193929/icon_image/5be9eff4-1d9e-4a79-b097-33c1cd4ad314.png"],
profile_url: nil
)
strategy_should_work("https://booth.pm/en/items/2003079", deleted: true)
end
end