Add Booth support

This commit is contained in:
nonamethanks
2022-04-15 22:11:16 +02:00
parent 70148366d9
commit 9612578fcb
5 changed files with 184 additions and 8 deletions

View File

@@ -27,6 +27,7 @@ module Source
DOWNLOAD_TIMEOUT = 60
attr_reader :url, :referer_url, :parsed_url, :parsed_referer
delegate :site_name, to: :parsed_url
SUBCLASSES = [
@@ -50,6 +51,7 @@ module Source
Source::Extractor::Plurk,
Source::Extractor::Tinami,
Source::Extractor::Fantia,
Source::Extractor::Booth,
]
# Should return true if the extractor is configured correctly. Return false

View File

@@ -0,0 +1,83 @@
# frozen_string_literal: true
# @see Source::URL::Booth
class Source::Extractor
class Booth < Source::Extractor
def match?
Source::URL::Booth === parsed_url
end
def image_urls
if parsed_url.image_url?
if parsed_url.full_image_url?
[parsed_url.to_s]
else
[find_right_extension(parsed_url)]
end
else
page&.css(".market-item-detail-item-image")&.pluck("data-origin").to_a.compact
end
end
def profile_url
if page.present?
page.at(".summary [data-product-list*='shop_index']")&.[]("href")&.chomp("/")
else
parsed_url.profile_url || parsed_referer&.profile_url
end
end
def artist_name
return nil unless profile_url.present?
Source::URL.parse(profile_url)&.username
end
def display_name
page&.at(".summary .user-avatar")&.[]("alt")
end
def other_names
[display_name].compact
end
def artist_commentary_title
page&.at(".summary .u-tpg-title1")&.text
end
def artist_commentary_desc
page&.at(".autolink")&.to_html
end
def dtext_artist_commentary_desc
DText.from_html(artist_commentary_desc)
end
def tags
page&.css(".item-info-detail [data-product-list*='tag_category_search']").to_a.map do |element|
[element.text.gsub(/ x .*/, ""), element["href"]]
end
end
def page_url
parsed_url.page_url || parsed_referer&.page_url
end
def page
return nil if parsed_url.page_url.blank?
resp = http.cache(1.minute).cookies(adult: "t").get(page_url)
return nil if resp.code != 200
resp.parse
end
memoize :page
def find_right_extension(parsed_url)
extensions = %w[png jpg jpeg]
candidates = extensions.map { |ext| parsed_url.full_image_url_for(ext) }
chosen_url = candidates.find { |candidate| http_exists?(candidate) }
chosen_url || parsed_url.to_s
end
end
end

View File

@@ -1,15 +1,13 @@
# frozen_string_literal: true
# Unhandled:
#
# https://booth.pximg.net/c/128x128/users/3193929/icon_image/5be9eff4-1d9e-4a79-b097-33c1cd4ad314_base_resized.jpg (profile icon)
# https://s2.booth.pm/8bb9e4e3-d171-4027-88df-84480480f79d/3d70de06-8e7c-444e-b8eb-a8a95bf20638.png (profile cover image)
# Unhandled
# https://booth.pm/downloadables/1376468 (from https://booth.pm/en/items/2425521, requires pixiv login to download)
module Source
class URL::Booth < Source::URL
RESERVED_SUBDOMAINS = ["www", "s", "s2", "asset", "accounts", nil]
attr_reader :work_id, :user_id, :username
attr_reader :work_id, :user_id, :user_uuid, :username
def self.match?(url)
url.domain == "booth.pm" || url.host == "booth.pximg.net"
@@ -21,6 +19,7 @@ module Source
# https://booth.pximg.net/8bb9e4e3-d171-4027-88df-84480480f79d/i/2864768/00cdfef0-e8d5-454b-8554-4885a7e4827d_base_resized.jpg (full)
# https://booth.pximg.net/c/300x300_a2_g5/8bb9e4e3-d171-4027-88df-84480480f79d/i/2864768/00cdfef0-e8d5-454b-8554-4885a7e4827d_base_resized.jpg (thumb)
# https://booth.pximg.net/c/72x72_a2_g5/8bb9e4e3-d171-4027-88df-84480480f79d/i/2864768/00cdfef0-e8d5-454b-8554-4885a7e4827d_base_resized.jpg (thumb)
# https://booth.pximg.net/8bb9e4e3-d171-4027-88df-84480480f79d/i/2864768/00cdfef0-e8d5-454b-8554-4885a7e4827d.jpeg (full)
#
# https://s2.booth.pm/b242a7bd-0747-48c4-891d-9e8552edd5d7/i/3746752/52dbee27-7ad2-4048-9c1d-827eee36625c_base_resized.jpg (sample)
# https://booth.pximg.net/b242a7bd-0747-48c4-891d-9e8552edd5d7/i/3746752/52dbee27-7ad2-4048-9c1d-827eee36625c.jpg (full)
@@ -32,6 +31,20 @@ module Source
@work_id = work_id
@file = file
# profile icons
# https://booth.pximg.net/c/128x128/users/3193929/icon_image/5be9eff4-1d9e-4a79-b097-33c1cd4ad314_base_resized.jpg (sample)
# https://booth.pximg.net/users/3193929/icon_image/5be9eff4-1d9e-4a79-b097-33c1cd4ad314.png (full)
in _, _, *, "users", user_id, "icon_image", file
@user_id = user_id
@file = file
# profile cover images
# https://s2.booth.pm/8bb9e4e3-d171-4027-88df-84480480f79d/3d70de06-8e7c-444e-b8eb-a8a95bf20638_base_resized.jpg (sample)
# https://s2.booth.pm/8bb9e4e3-d171-4027-88df-84480480f79d/3d70de06-8e7c-444e-b8eb-a8a95bf20638.png (full)
in _, _, *, /\h{8}-\h{4}-\h{4}-\h{4}-\h{12}/i => user_uuid, file
@user_uuid = user_uuid
@file = file
# https://booth.pm/en/items/2864768
# https://booth.pm/ja/items/2864768
in _, "booth.pm", _, "items", work_id
@@ -53,7 +66,25 @@ module Source
end
def image_url?
url.host == "booth.pximg.net"
url.host.in?(["booth.pximg.net", "s2.booth.pm"])
end
def full_image_url?
image_url? && @file.exclude?("_base_resized")
end
def full_image_url_for(extension)
return unless @file.present?
full_file = @file.gsub(/_base_resized\.\w+$/, ".#{extension}")
if user_uuid
if work_id
"https://#{host}/#{user_uuid}/i/#{work_id}/#{full_file}"
else
"https://#{host}/#{user_uuid}/#{full_file}"
end
elsif user_id
"https://#{host}/users/#{user_id}/icon_image/#{full_file}"
end
end
def page_url

View File

@@ -95,8 +95,8 @@ class ArtistURL < ApplicationRecord
def priority
sites = %w[
Pixiv Twitter
ArtStation Baraag BCY Deviant\ Art Hentai\ Foundry Fantia Foundation Lofter Nico\ Seiga Nijie Pawoo Fanbox Pixiv\ Sketch Plurk Tinami Tumblr Weibo
Ask.fm Booth Facebook FC2 Gumroad Instagram Ko-fi Livedoor Mihuashi Mixi.jp Patreon Piapro.jp Picarto Privatter Sakura.ne.jp Stickam Skeb Twitch Youtube
ArtStation Baraag BCY Booth Deviant\ Art Hentai\ Foundry Fantia Foundation Lofter Nico\ Seiga Nijie Pawoo Fanbox Pixiv\ Sketch Plurk Tinami Tumblr Weibo
Ask.fm Facebook FC2 Gumroad Instagram Ko-fi Livedoor Mihuashi Mixi.jp Patreon Piapro.jp Picarto Privatter Sakura.ne.jp Stickam Skeb Twitch Youtube
Amazon Circle.ms DLSite Doujinshi.org Erogamescape Mangaupdates Melonbooks Toranoana Wikipedia
]

View File

@@ -0,0 +1,60 @@
require "test_helper"
module Sources
class BoothTest < ActiveSupport::TestCase
standard_url_images = %w[
https://booth.pximg.net/a212cd73-75ab-482d-8fce-1ce2965e4d4f/i/3713604/ae0fdbcf-e4c5-4840-8d5c-43e18bddc93e.jpg
https://booth.pximg.net/a212cd73-75ab-482d-8fce-1ce2965e4d4f/i/3713604/d12bce50-a0c7-43f8-a4fb-5ee0ea6855a3.jpg
https://booth.pximg.net/a212cd73-75ab-482d-8fce-1ce2965e4d4f/i/3713604/f5332da3-4097-4d33-bbf6-a9b64c7671b3.jpg
]
strategy_should_work(
"https://booth.pm/en/items/3713604",
image_urls: standard_url_images,
profile_url: "https://amedamacon.booth.pm",
page_url: "https://booth.pm/en/items/3713604",
artist_name: "amedamacon",
other_names: ["あめうさぎBOOTH"],
tags: [["抱き枕カバー", "https://booth.pm/en/browse/Pillow%20Cover?tags%5B%5D=%E6%8A%B1%E3%81%8D%E6%9E%95%E3%82%AB%E3%83%90%E3%83%BC"]],
artist_commentary_title: "フユちゃん抱き枕カバー",
dtext_artist_commentary_desc: /発送6月上旬頃BOOTH倉庫より発送/
)
strategy_should_work(
"https://booth.pximg.net/a212cd73-75ab-482d-8fce-1ce2965e4d4f/i/3713604/d12bce50-a0c7-43f8-a4fb-5ee0ea6855a3_base_resized.jpg",
image_urls: [standard_url_images.second],
profile_url: "https://amedamacon.booth.pm",
page_url: "https://booth.pm/en/items/3713604",
artist_name: "amedamacon",
other_names: ["あめうさぎBOOTH"],
tags: [["抱き枕カバー", "https://booth.pm/en/browse/Pillow%20Cover?tags%5B%5D=%E6%8A%B1%E3%81%8D%E6%9E%95%E3%82%AB%E3%83%90%E3%83%BC"]],
artist_commentary_title: "フユちゃん抱き枕カバー",
dtext_artist_commentary_desc: /発送6月上旬頃BOOTH倉庫より発送/
)
strategy_should_work(
"https://re-face.booth.pm/items/2423989",
image_urls: ["https://booth.pximg.net/8bb9e4e3-d171-4027-88df-84480480f79d/i/2423989/a692d4f3-4371-4a86-a337-83fee82d46a4.png"],
profile_url: "https://re-face.booth.pm",
page_url: "https://booth.pm/en/items/2423989",
artist_name: "re-face",
other_names: ["Re:fAce/りふぇいす。"],
tags: ["original"],
artist_commentary_title: "RwithV vol.1 -アイドルはじめます!-",
dtext_artist_commentary_desc: /注文が殺到した際は、発送が遅れてしまう場合もございますので予めご了承ください。/
)
strategy_should_work(
"https://s2.booth.pm/8bb9e4e3-d171-4027-88df-84480480f79d/3d70de06-8e7c-444e-b8eb-a8a95bf20638_base_resized.jpg",
image_urls: ["https://s2.booth.pm/8bb9e4e3-d171-4027-88df-84480480f79d/3d70de06-8e7c-444e-b8eb-a8a95bf20638.png"],
profile_url: nil
)
strategy_should_work(
"https://booth.pximg.net/c/128x128/users/3193929/icon_image/5be9eff4-1d9e-4a79-b097-33c1cd4ad314_base_resized.jpg",
image_urls: ["https://booth.pximg.net/users/3193929/icon_image/5be9eff4-1d9e-4a79-b097-33c1cd4ad314.png"],
profile_url: nil
)
strategy_should_work("https://booth.pm/en/items/2003079", deleted: true)
end
end