sources: add Gelbooru support.

Add support for uploading posts from Gelbooru. Note that the translated
tags will include both the Gelbooru tags and the tags from the Gelbooru
post's source. The commentary and artist information will also be taken
from the Gelbooru post's source. The source of the Danbooru post however
will be left as the Gelbooru post itself, not as the Gelbooru post's source.
This commit is contained in:
evazion
2022-10-10 23:50:04 -05:00
parent 7beb1a5c0b
commit f05268df7f
7 changed files with 194 additions and 9 deletions

View File

@@ -39,6 +39,7 @@ module Source
Source::Extractor::Moebooru,
Source::Extractor::Nijie,
Source::Extractor::ArtStation,
Source::Extractor::Gelbooru,
Source::Extractor::HentaiFoundry,
Source::Extractor::Fanbox,
Source::Extractor::Mastodon,

View File

@@ -0,0 +1,76 @@
# frozen_string_literal: true
# Source extractor for Gelbooru. The commentary and artist information are
# pulled from the Gelbooru post's source, while the translated tags include
# both the Gelbooru tags and the source's tags.
#
# @see Source::URL::Gelbooru
# @see https://gelbooru.com/index.php?page=wiki&s=view&id=18780 (howto:api)
module Source
class Extractor
class Gelbooru < Source::Extractor
delegate :artist_name, :profile_url, :profile_urls, :other_names, :tag_name, :artist_commentary_title, :artist_commentary_desc, :dtext_artist_commentary_title, :dtext_artist_commentary_desc, to: :sub_extractor, allow_nil: true
def match?
Source::URL::Gelbooru === parsed_url
end
def image_urls
[api_response[:file_url]].compact
end
def page_url
"https://gelbooru.com/index.php?page=post&s=view&id=#{post_id}" if post_id.present?
end
def tags
gelbooru_tags + source_tags
end
def gelbooru_tags
return [] if api_response.nil?
tags = api_response[:tags].split + ["rating:#{api_response[:rating]}"]
tags.map do |tag|
[tag, "https://gelbooru.com/index.php?page=post&s=list&tags=#{CGI.escape(tag)}"]
end
end
def source_tags
sub_extractor&.tags.to_a
end
def post_id
parsed_url.post_id || parsed_referer&.post_id || post_id_from_md5
end
def api_url
# https://gelbooru.com/index.php?page=dapi&s=post&q=index&id=779812&json=1
"https://gelbooru.com/index.php?page=dapi&s=post&q=index&id=#{post_id}&json=1" if post_id.present?
end
memoize def api_response
return nil unless api_url.present?
response = http.cache(1.minute).get(api_url)
return nil unless response.status == 200
response.parse["post"].first.with_indifferent_access
end
memoize def post_id_from_md5
return nil unless parsed_url.image_url? && parsed_url.page_url.present?
response = http.cache(1.minute).head(parsed_url.page_url)
return nil unless response.status == 200
Source::URL.parse(response.uri).post_id
end
def sub_extractor
return nil if api_response[:source].nil?
@sub_extractor ||= Source::Extractor.find(api_response[:source], default: nil)
end
end
end
end

View File

@@ -40,6 +40,7 @@ module Source
Source::URL::Fantia,
Source::URL::Fc2,
Source::URL::Foundation,
Source::URL::Gelbooru,
Source::URL::HentaiFoundry,
Source::URL::Instagram,
Source::URL::Lofter,

View File

@@ -0,0 +1,56 @@
# frozen_string_literal: true
class Source::URL::Gelbooru < Source::URL
attr_reader :post_id, :md5, :full_image_url
def self.match?(url)
url.domain.in?(%w[gelbooru.com])
end
def parse
case [domain, *path_segments]
# https://gelbooru.com/index.php?page=post&s=view&id=7798045
# https://www.gelbooru.com/index.php?page=post&s=view&id=7798045
in "gelbooru.com", "index.php" if params[:page] == "post" && params[:s] == "view" && params[:id].present?
@post_id = params[:id].to_i
# https://gelbooru.com/index.php?page=post&s=list&md5=99d9977d6c3aa185083a2da22bd8acfb
in "gelbooru.com", "index.php" if params[:page] == "post" && params[:s] == "list" && params[:md5].present?
@md5 = params[:md5]
# https://gelbooru.com/index.php?page=dapi&s=post&q=index&id=7798045&json=1
in "gelbooru.com", "index.php" if params[:page] == "dapi" && params[:q] == "index" && params[:id].present?
@post_id = params[:id].to_i
# https://gelbooru.com//images/ee/5c/ee5c9a69db9602c95debdb9b98fb3e3e.jpeg
# http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png
# https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg
# https://video-cdn3.gelbooru.com/images/62/95/6295154d082f04009160261b90e7176e.mp4
# https://img2.gelbooru.com//images/a9/64/a96478bbf9bc3f0584f2b5ddf56025fa.webm
# https://gelbooru.com/thumbnails/08/06/thumbnail_08066c138e7e138a47489a0934c29156.jpg
in "gelbooru.com", ("images" | "samples" | "thumbnails"), h1, h2, /\A(?:\w+_)?(\h{32})\.(jpeg|jpg|png|gif|mp4|webm)\z/i
@md5 = $1
@full_image_url = "https://#{host}/images/#{h1}/#{h2}/#{md5}.#{file_ext}"
else
nil
end
end
def image_url?
full_image_url.present?
end
def page_url
if post_id.present?
"https://gelbooru.com/index.php?page=post&s=view&id=#{post_id}"
elsif md5.present?
"https://gelbooru.com/index.php?page=post&s=list&md5=#{md5}"
end
end
def api_url
"https://gelbooru.com/index.php?page=dapi&s=post&q=index&id=#{post_id}&json=1" if post_id.present?
end
end

View File

@@ -131,15 +131,6 @@ class Source::URL::Null < Source::URL
@work_id = $1
@page_url = "https://www.facebook.com/photo.php?fbid=#{work_id}"
# https://gelbooru.com//images/ee/5c/ee5c9a69db9602c95debdb9b98fb3e3e.jpeg
# http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png
# https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg
# https://video-cdn3.gelbooru.com/images/62/95/6295154d082f04009160261b90e7176e.mp4
# https://img2.gelbooru.com//images/a9/64/a96478bbf9bc3f0584f2b5ddf56025fa.webm
in _, "gelbooru.com", ("images" | "samples"), *subdirs, /^(?:sample_)?(\h{32})\.(jpeg|jpg|png|gif|mp4|webm)$/
@md5 = $1
@page_url = "https://gelbooru.com/index.php?page=post&s=list&tags=md5:#{@md5}"
# https://a.hitomi.la/galleries/907838/1.png
# https://0a.hitomi.la/galleries/1169701/23.png
# https://aa.hitomi.la/galleries/990722/003_01_002.jpg

View File

@@ -313,6 +313,8 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest
should_upload_successfully("https://konachan.com/post/show/270916")
should_upload_successfully("https://konachan.com/image/ca12cdb79a66d242e95a6f958341bf05/Konachan.com%20-%20270916.png")
should_upload_successfully("https://gelbooru.com/index.php?page=post&s=view&id=7798121")
should_upload_successfully("http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663")
should_upload_successfully("http://seiga.nicovideo.jp/seiga/im4937663")
should_upload_successfully("https://seiga.nicovideo.jp/image/source/9146749")

View File

@@ -0,0 +1,58 @@
require "test_helper"
module Sources
class GelbooruTest < ActiveSupport::TestCase
context "A Gelbooru direct image url without a referer" do
strategy_should_work(
"https://img3.gelbooru.com/images/04/f2/04f2767c64593c3030ce74ecc2528704.jpg",
image_urls: ["https://img3.gelbooru.com/images/04/f2/04f2767c64593c3030ce74ecc2528704.jpg"],
artist_name: "灰色灰烬bot",
profile_url: "https://www.pixiv.net/users/3330425",
tags: %w[1girl back_bow bangs black_pantyhose blue_bow blue_hair blue_ribbon boots bow cape chibi chinese_commentary closed_eyes full_body hair_between_eyes hair_ribbon hat hatsune_miku indai_(3330425) on_ground pantyhose pom_pom_(clothes) rabbit rabbit_yukine rating:general ribbon simple_background sitting solo twintails vocaloid white_background white_cape white_headwear witch_hat yuki_miku yuki_miku_(2014) 初音ミク 雪ミク],
artist_commentary_title: "2010~2021雪ミク",
artist_commentary_desc: "动作参考@速写班长",
download_size: 480_621,
)
end
context "A Gelbooru direct image url with a referer" do
strategy_should_work(
"https://img3.gelbooru.com/images/04/f2/04f2767c64593c3030ce74ecc2528704.jpg",
referer: "https://gelbooru.com/index.php?page=post&s=view&id=7798121",
image_urls: ["https://img3.gelbooru.com/images/04/f2/04f2767c64593c3030ce74ecc2528704.jpg"],
artist_name: "灰色灰烬bot",
profile_url: "https://www.pixiv.net/users/3330425",
tags: %w[1girl back_bow bangs black_pantyhose blue_bow blue_hair blue_ribbon boots bow cape chibi chinese_commentary closed_eyes full_body hair_between_eyes hair_ribbon hat hatsune_miku indai_(3330425) on_ground pantyhose pom_pom_(clothes) rabbit rabbit_yukine rating:general ribbon simple_background sitting solo twintails vocaloid white_background white_cape white_headwear witch_hat yuki_miku yuki_miku_(2014) 初音ミク 雪ミク],
artist_commentary_title: "2010~2021雪ミク",
artist_commentary_desc: "动作参考@速写班长",
download_size: 480_621,
)
end
context "A Gelbooru sample image url" do
strategy_should_work(
"https://img3.gelbooru.com/samples/04/f2/sample_04f2767c64593c3030ce74ecc2528704.jpg",
image_urls: ["https://img3.gelbooru.com/images/04/f2/04f2767c64593c3030ce74ecc2528704.jpg"],
artist_name: "灰色灰烬bot",
profile_url: "https://www.pixiv.net/users/3330425",
tags: %w[1girl back_bow bangs black_pantyhose blue_bow blue_hair blue_ribbon boots bow cape chibi chinese_commentary closed_eyes full_body hair_between_eyes hair_ribbon hat hatsune_miku indai_(3330425) on_ground pantyhose pom_pom_(clothes) rabbit rabbit_yukine rating:general ribbon simple_background sitting solo twintails vocaloid white_background white_cape white_headwear witch_hat yuki_miku yuki_miku_(2014) 初音ミク 雪ミク],
artist_commentary_title: "2010~2021雪ミク",
artist_commentary_desc: "动作参考@速写班长",
download_size: 480_621,
)
end
context "A Gelbooru page url" do
strategy_should_work(
"https://gelbooru.com/index.php?page=post&s=view&id=7798121",
image_urls: ["https://img3.gelbooru.com/images/04/f2/04f2767c64593c3030ce74ecc2528704.jpg"],
artist_name: "灰色灰烬bot",
profile_url: "https://www.pixiv.net/users/3330425",
tags: %w[1girl back_bow bangs black_pantyhose blue_bow blue_hair blue_ribbon boots bow cape chibi chinese_commentary closed_eyes full_body hair_between_eyes hair_ribbon hat hatsune_miku indai_(3330425) on_ground pantyhose pom_pom_(clothes) rabbit rabbit_yukine rating:general ribbon simple_background sitting solo twintails vocaloid white_background white_cape white_headwear witch_hat yuki_miku yuki_miku_(2014) 初音ミク 雪ミク],
artist_commentary_title: "2010~2021雪ミク",
artist_commentary_desc: "动作参考@速写班长",
download_size: 480_621,
)
end
end
end