Merge pull request #5125 from nonamethanks/booth-support

Add Booth support
This commit is contained in:
evazion
2022-04-17 23:00:14 -05:00
committed by GitHub
7 changed files with 278 additions and 8 deletions

View File

@@ -27,6 +27,7 @@ module Source
DOWNLOAD_TIMEOUT = 60
attr_reader :url, :referer_url, :parsed_url, :parsed_referer
delegate :site_name, to: :parsed_url
SUBCLASSES = [
@@ -50,6 +51,7 @@ module Source
Source::Extractor::Plurk,
Source::Extractor::Tinami,
Source::Extractor::Fantia,
Source::Extractor::Booth,
]
# Should return true if the extractor is configured correctly. Return false

View File

@@ -0,0 +1,83 @@
# frozen_string_literal: true
# @see Source::URL::Booth
class Source::Extractor
class Booth < Source::Extractor
def match?
Source::URL::Booth === parsed_url
end
def image_urls
if parsed_url.image_url?
if parsed_url.full_image_url?
[parsed_url.to_s]
else
[find_right_extension(parsed_url)]
end
else
page&.css(".market-item-detail-item-image")&.pluck("data-origin").to_a.compact
end
end
def profile_url
if page.present?
page.at(".summary [data-product-list*='shop_index']")&.[]("href")&.chomp("/")
else
parsed_url.profile_url || parsed_referer&.profile_url
end
end
def artist_name
return nil unless profile_url.present?
Source::URL.parse(profile_url)&.username
end
def display_name
page&.at(".summary .user-avatar")&.[]("alt")
end
def other_names
[display_name].compact
end
def artist_commentary_title
page&.at(".summary .u-tpg-title1")&.text
end
def artist_commentary_desc
page&.at(".autolink")&.to_html
end
def dtext_artist_commentary_desc
DText.from_html(artist_commentary_desc)
end
def tags
page&.css(".item-info-detail [data-product-list*='tag_category_search']").to_a.map do |element|
[element.text.gsub(/ x .*/, ""), element["href"]]
end
end
def page_url
parsed_url.page_url || parsed_referer&.page_url
end
def page
return nil if parsed_url.page_url.blank?
resp = http.cache(1.minute).cookies(adult: "t").get(page_url)
return nil if resp.code != 200
resp.parse
end
memoize :page
def find_right_extension(parsed_url)
extensions = %w[png jpg jpeg]
candidates = extensions.map { |ext| parsed_url.full_image_url_for(ext) }
chosen_url = candidates.find { |candidate| http_exists?(candidate) }
chosen_url || parsed_url.to_s
end
end
end

View File

@@ -1,15 +1,13 @@
# frozen_string_literal: true
# Unhandled:
#
# https://booth.pximg.net/c/128x128/users/3193929/icon_image/5be9eff4-1d9e-4a79-b097-33c1cd4ad314_base_resized.jpg (profile icon)
# https://s2.booth.pm/8bb9e4e3-d171-4027-88df-84480480f79d/3d70de06-8e7c-444e-b8eb-a8a95bf20638.png (profile cover image)
# Unhandled
# https://booth.pm/downloadables/1376468 (from https://booth.pm/en/items/2425521, requires pixiv login to download)
module Source
class URL::Booth < Source::URL
RESERVED_SUBDOMAINS = ["www", "s", "s2", "asset", "accounts", nil]
attr_reader :work_id, :user_id, :username
attr_reader :work_id, :user_id, :user_uuid, :username
def self.match?(url)
url.domain == "booth.pm" || url.host == "booth.pximg.net"
@@ -21,6 +19,7 @@ module Source
# https://booth.pximg.net/8bb9e4e3-d171-4027-88df-84480480f79d/i/2864768/00cdfef0-e8d5-454b-8554-4885a7e4827d_base_resized.jpg (full)
# https://booth.pximg.net/c/300x300_a2_g5/8bb9e4e3-d171-4027-88df-84480480f79d/i/2864768/00cdfef0-e8d5-454b-8554-4885a7e4827d_base_resized.jpg (thumb)
# https://booth.pximg.net/c/72x72_a2_g5/8bb9e4e3-d171-4027-88df-84480480f79d/i/2864768/00cdfef0-e8d5-454b-8554-4885a7e4827d_base_resized.jpg (thumb)
# https://booth.pximg.net/8bb9e4e3-d171-4027-88df-84480480f79d/i/2864768/00cdfef0-e8d5-454b-8554-4885a7e4827d.jpeg (full)
#
# https://s2.booth.pm/b242a7bd-0747-48c4-891d-9e8552edd5d7/i/3746752/52dbee27-7ad2-4048-9c1d-827eee36625c_base_resized.jpg (sample)
# https://booth.pximg.net/b242a7bd-0747-48c4-891d-9e8552edd5d7/i/3746752/52dbee27-7ad2-4048-9c1d-827eee36625c.jpg (full)
@@ -32,6 +31,20 @@ module Source
@work_id = work_id
@file = file
# profile icons
# https://booth.pximg.net/c/128x128/users/3193929/icon_image/5be9eff4-1d9e-4a79-b097-33c1cd4ad314_base_resized.jpg (sample)
# https://booth.pximg.net/users/3193929/icon_image/5be9eff4-1d9e-4a79-b097-33c1cd4ad314.png (full)
in _, _, *, "users", user_id, "icon_image", file
@user_id = user_id
@file = file
# profile cover images
# https://s2.booth.pm/8bb9e4e3-d171-4027-88df-84480480f79d/3d70de06-8e7c-444e-b8eb-a8a95bf20638_base_resized.jpg (sample)
# https://s2.booth.pm/8bb9e4e3-d171-4027-88df-84480480f79d/3d70de06-8e7c-444e-b8eb-a8a95bf20638.png (full)
in _, _, *, /\h{8}-\h{4}-\h{4}-\h{4}-\h{12}/i => user_uuid, file
@user_uuid = user_uuid
@file = file
# https://booth.pm/en/items/2864768
# https://booth.pm/ja/items/2864768
in _, "booth.pm", _, "items", work_id
@@ -53,7 +66,25 @@ module Source
end
def image_url?
url.host == "booth.pximg.net"
url.host.in?(["booth.pximg.net", "s2.booth.pm"])
end
def full_image_url?
image_url? && @file.exclude?("_base_resized")
end
def full_image_url_for(extension)
return unless @file.present?
full_file = @file.gsub(/_base_resized\.\w+$/, ".#{extension}")
if user_uuid
if work_id
"https://#{host}/#{user_uuid}/i/#{work_id}/#{full_file}"
else
"https://#{host}/#{user_uuid}/#{full_file}"
end
elsif user_id
"https://#{host}/users/#{user_id}/icon_image/#{full_file}"
end
end
def page_url