Add safebooru.org support.

Refactor the Gelbooru source extractor to support Safebooru.org as well.
This commit is contained in:
evazion
2022-11-09 15:42:59 -06:00
parent 35bfcbc3bd
commit 5b4856273f
5 changed files with 153 additions and 37 deletions

View File

@@ -1,11 +1,11 @@
# frozen_string_literal: true
# Source extractor for Gelbooru. The commentary and artist information are
# pulled from the Gelbooru post's source, while the translated tags include
# both the Gelbooru tags and the source's tags.
# Source extractor for Gelbooru and Safebooru.org. The commentary and artist information are pulled from the
# booru post's source, while the translated tags include both the booru tags and the source's tags.
#
# @see Source::URL::Gelbooru
# @see https://gelbooru.com/index.php?page=wiki&s=view&id=18780 (howto:api)
# @see https://safebooru.org/index.php?page=help&topic=dapi
module Source
class Extractor
class Gelbooru < Source::Extractor
@@ -16,23 +16,27 @@ module Source
end
def image_urls
[api_response[:file_url]].compact
if parsed_url.full_image_url.present?
[parsed_url.full_image_url]
else
[api_response[:file_url]].compact
end
end
def page_url
"https://gelbooru.com/index.php?page=post&s=view&id=#{post_id}" if post_id.present?
"https://#{domain}/index.php?page=post&s=view&id=#{post_id}" if post_id.present?
end
def tags
gelbooru_tags + source_tags
site_tags + source_tags
end
def gelbooru_tags
def site_tags
return [] if api_response.blank?
tags = api_response[:tags].split + ["rating:#{api_response[:rating]}"]
tags.map do |tag|
[tag, "https://gelbooru.com/index.php?page=post&s=list&tags=#{CGI.escape(tag)}"]
[tag, "https://#{domain}/index.php?page=post&s=list&tags=#{CGI.escape(tag)}"]
end
end
@@ -44,13 +48,18 @@ module Source
sub_extractor&.other_names.to_a
end
def domain
parsed_url.domain
end
def post_id
parsed_url.post_id || parsed_referer&.post_id || post_id_from_md5
parsed_url.post_id || parsed_referer&.post_id || api_response[:id]
end
def api_url
# https://gelbooru.com/index.php?page=dapi&s=post&q=index&id=779812&json=1
"https://gelbooru.com/index.php?page=dapi&s=post&q=index&id=#{post_id}&json=1" if post_id.present?
# https://gelbooru.com//index.php?page=dapi&s=post&q=index&tags=md5:338078144fe77c9e5f35dbb585e749ec
# https://gelbooru.com//index.php?page=dapi&s=post&q=index&tags=id:7903922
parsed_url.api_url || parsed_referer&.api_url
end
memoize def api_response
@@ -59,20 +68,11 @@ module Source
response = http.cache(1.minute).get(api_url)
return {} unless response.status == 200
response.parse["post"]&.first.to_h.with_indifferent_access
end
memoize def post_id_from_md5
return nil unless parsed_url.page_url.present?
response = http.cache(1.minute).head(parsed_url.page_url)
return nil unless response.status == 200
Source::URL.parse(response.uri).post_id
response.parse.dig("posts", "post").to_h.with_indifferent_access
end
def sub_extractor
return nil if api_response[:source].nil?
return nil if !api_response[:source].to_s.match?(%r{\Ahttps?://}i)
@sub_extractor ||= Source::Extractor.find(api_response[:source], default: nil)
end
end

View File

@@ -1,10 +1,11 @@
# frozen_string_literal: true
# This covers both Gelbooru and Safebooru.
class Source::URL::Gelbooru < Source::URL
attr_reader :post_id, :md5, :full_image_url
attr_reader :post_id, :md5, :image_type, :full_image_url
def self.match?(url)
url.domain.in?(%w[gelbooru.com])
url.domain.in?(%w[safebooru.org gelbooru.com])
end
def parse
@@ -12,31 +13,50 @@ class Source::URL::Gelbooru < Source::URL
# https://gelbooru.com/index.php?page=post&s=view&id=7798045
# https://www.gelbooru.com/index.php?page=post&s=view&id=7798045
in "gelbooru.com", "index.php" if params[:page] == "post" && params[:s] == "view" && params[:id].present?
# https://safebooru.org/index.php?page=post&s=view&id=4196948
in _, "index.php" if params[:page] == "post" && params[:s] == "view" && params[:id].present?
@post_id = params[:id].to_i
# https://gelbooru.com/index.php?page=post&s=list&md5=99d9977d6c3aa185083a2da22bd8acfb
in "gelbooru.com", "index.php" if params[:page] == "post" && params[:s] == "list" && params[:md5].present?
# https://safebooru.org/index.php?page=post&s=list&md5=99d9977d6c3aa185083a2da22bd8acfb
in _, "index.php" if params[:page] == "post" && params[:s] == "list" && params[:md5].present?
@md5 = params[:md5]
# https://gelbooru.com/index.php?page=dapi&s=post&q=index&id=7798045&json=1
in "gelbooru.com", "index.php" if params[:page] == "dapi" && params[:q] == "index" && params[:id].present?
# https://safebooru.org/index.php?page=dapi&s=post&q=index&id=4196948&json=1
in _, "index.php" if params[:page] == "dapi" && params[:q] == "index" && params[:id].present?
@post_id = params[:id].to_i
# https://gelbooru.com//images/ee/5c/ee5c9a69db9602c95debdb9b98fb3e3e.jpeg
# https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg
# https://video-cdn3.gelbooru.com/images/62/95/6295154d082f04009160261b90e7176e.mp4
# https://img2.gelbooru.com//images/a9/64/a96478bbf9bc3f0584f2b5ddf56025fa.webm
# https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg
# https://gelbooru.com/thumbnails/08/06/thumbnail_08066c138e7e138a47489a0934c29156.jpg
in "gelbooru.com", ("images" | "samples" | "thumbnails"), h1, h2, /\A(?:\w+_)?(\h{32})\.(jpeg|jpg|png|gif|mp4|webm)\z/i
in _, ("images" | "samples" | "thumbnails") => image_type, /\A\h{2}\z/ => h1, /\A\h{2}\z/ => h2, /\A(?:sample_|thumbnail_)?(\h{32})\.\w+\z/i
@md5 = $1
@full_image_url = "https://#{host}/images/#{h1}/#{h2}/#{md5}.#{file_ext}"
@image_type = image_type
@full_image_url = url.to_s if image_type == "images"
# http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png
# http://simg2.gelbooru.com//samples/619/sample_fe84fb3f86020e120f4b4712fcbd3abf.jpeg?755046
in "gelbooru.com", ("images" | "samples"), /\A\d+\z/ => dir, /\A(?:\w+_)?(\h{32})\.(jpeg|jpg|png|gif|mp4|webm)/i
# http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png
# https://safebooru.org//images/4016/64779fbfc87020ed5fd94854fe973bc0.jpeg
# https://safebooru.org//samples/4016/sample_64779fbfc87020ed5fd94854fe973bc0.jpg?4196692
# https://safebooru.org/thumbnails/4016/thumbnail_64779fbfc87020ed5fd94854fe973bc0.jpg?4196692
in _, ("images" | "samples" | "thumbnails") => image_type, /\A\d+\z/ => directory, /\A(?:sample_|thumbnail_)?(\h{32})\.\w+\z/
@md5 = $1
@full_image_url = url
@post_id = query if query&.match?(/\A\d+\z/)
@image_type = image_type
@full_image_url = url.to_s if image_type == "images"
# Safebooru uses an unknown 40-byte hash for most image URLs.
# https://safebooru.org//images/4016/d2f50befcdc304cbd9030f2d0832029f5fe8cccc.png
# https://safebooru.org//samples/4016/sample_ffc6c5705d31422ddbaa7478deb560c985d2ee71.jpg?4196970
# https://safebooru.org/thumbnails/4016/thumbnail_8d0664867c59acb3103bccd9a9a5562a193eadcd.jpg?4196980
in "safebooru.org", ("images" | "samples" | "thumbnails") => image_type, /\A\d+\z/ => directory, /\A(?:sample_|thumbnail_)?(\h{40})\.\w+\z/
@hash = $1
@post_id = query if query&.match?(/\A\d+\z/)
@image_type = image_type
@full_image_url = url.to_s if image_type == "images"
else
nil
@@ -44,18 +64,27 @@ class Source::URL::Gelbooru < Source::URL
end
def image_url?
full_image_url.present?
image_type.present?
end
def page_url
if post_id.present?
"https://gelbooru.com/index.php?page=post&s=view&id=#{post_id}"
"https://#{domain}/index.php?page=post&s=view&id=#{post_id}"
elsif md5.present?
"https://gelbooru.com/index.php?page=post&s=list&md5=#{md5}"
"https://#{domain}/index.php?page=post&s=list&md5=#{md5}"
end
end
def api_url
"https://gelbooru.com/index.php?page=dapi&s=post&q=index&id=#{post_id}&json=1" if post_id.present?
# https://gelbooru.com//index.php?page=dapi&s=post&q=index&tags=id:7903922
# https://safebooru.org/index.php?page=dapi&s=post&q=index&tags=id:4197087
if post_id.present?
# "https://#{domain}/index.php?page=dapi&s=post&q=index&id=#{post_id}&json=1"
"https://#{domain}/index.php?page=dapi&s=post&q=index&tags=id:#{post_id}"
# https://gelbooru.com//index.php?page=dapi&s=post&q=index&tags=md5:338078144fe77c9e5f35dbb585e749ec
# https://safebooru.org/index.php?page=dapi&s=post&q=index&tags=md5:8c1fe66ff46d03725caa30135ad70e7e
elsif md5.present?
"https://#{domain}/index.php?page=dapi&s=post&q=index&tags=md5:#{md5}"
end
end
end