Add safebooru.org support.

Refactor the Gelbooru source extractor to support Safebooru.org as well.
This commit is contained in:
evazion
2022-11-09 15:42:59 -06:00
parent 35bfcbc3bd
commit 5b4856273f
5 changed files with 153 additions and 37 deletions

View File

@@ -3,6 +3,7 @@
module Danbooru
class Http
class XmlAdapter < HTTP::MimeType::Adapter
HTTP::MimeType.register_adapter "text/xml", self
HTTP::MimeType.register_adapter "application/xml", self
HTTP::MimeType.register_alias "application/xml", :xml

View File

@@ -1,11 +1,11 @@
# frozen_string_literal: true
# Source extractor for Gelbooru. The commentary and artist information are
# pulled from the Gelbooru post's source, while the translated tags include
# both the Gelbooru tags and the source's tags.
# Source extractor for Gelbooru and Safebooru.org. The commentary and artist information are pulled from the
# booru post's source, while the translated tags include both the booru tags and the source's tags.
#
# @see Source::URL::Gelbooru
# @see https://gelbooru.com/index.php?page=wiki&s=view&id=18780 (howto:api)
# @see https://safebooru.org/index.php?page=help&topic=dapi
module Source
class Extractor
class Gelbooru < Source::Extractor
@@ -16,23 +16,27 @@ module Source
end
def image_urls
[api_response[:file_url]].compact
if parsed_url.full_image_url.present?
[parsed_url.full_image_url]
else
[api_response[:file_url]].compact
end
end
def page_url
"https://gelbooru.com/index.php?page=post&s=view&id=#{post_id}" if post_id.present?
"https://#{domain}/index.php?page=post&s=view&id=#{post_id}" if post_id.present?
end
def tags
gelbooru_tags + source_tags
site_tags + source_tags
end
def gelbooru_tags
def site_tags
return [] if api_response.blank?
tags = api_response[:tags].split + ["rating:#{api_response[:rating]}"]
tags.map do |tag|
[tag, "https://gelbooru.com/index.php?page=post&s=list&tags=#{CGI.escape(tag)}"]
[tag, "https://#{domain}/index.php?page=post&s=list&tags=#{CGI.escape(tag)}"]
end
end
@@ -44,13 +48,18 @@ module Source
sub_extractor&.other_names.to_a
end
def domain
parsed_url.domain
end
def post_id
parsed_url.post_id || parsed_referer&.post_id || post_id_from_md5
parsed_url.post_id || parsed_referer&.post_id || api_response[:id]
end
def api_url
# https://gelbooru.com/index.php?page=dapi&s=post&q=index&id=779812&json=1
"https://gelbooru.com/index.php?page=dapi&s=post&q=index&id=#{post_id}&json=1" if post_id.present?
# https://gelbooru.com//index.php?page=dapi&s=post&q=index&tags=md5:338078144fe77c9e5f35dbb585e749ec
# https://gelbooru.com//index.php?page=dapi&s=post&q=index&tags=id:7903922
parsed_url.api_url || parsed_referer&.api_url
end
memoize def api_response
@@ -59,20 +68,11 @@ module Source
response = http.cache(1.minute).get(api_url)
return {} unless response.status == 200
response.parse["post"]&.first.to_h.with_indifferent_access
end
memoize def post_id_from_md5
return nil unless parsed_url.page_url.present?
response = http.cache(1.minute).head(parsed_url.page_url)
return nil unless response.status == 200
Source::URL.parse(response.uri).post_id
response.parse.dig("posts", "post").to_h.with_indifferent_access
end
def sub_extractor
return nil if api_response[:source].nil?
return nil if !api_response[:source].to_s.match?(%r{\Ahttps?://}i)
@sub_extractor ||= Source::Extractor.find(api_response[:source], default: nil)
end
end

View File

@@ -1,10 +1,11 @@
# frozen_string_literal: true
# This covers both Gelbooru and Safebooru.
class Source::URL::Gelbooru < Source::URL
attr_reader :post_id, :md5, :full_image_url
attr_reader :post_id, :md5, :image_type, :full_image_url
def self.match?(url)
url.domain.in?(%w[gelbooru.com])
url.domain.in?(%w[safebooru.org gelbooru.com])
end
def parse
@@ -12,31 +13,50 @@ class Source::URL::Gelbooru < Source::URL
# https://gelbooru.com/index.php?page=post&s=view&id=7798045
# https://www.gelbooru.com/index.php?page=post&s=view&id=7798045
in "gelbooru.com", "index.php" if params[:page] == "post" && params[:s] == "view" && params[:id].present?
# https://safebooru.org/index.php?page=post&s=view&id=4196948
in _, "index.php" if params[:page] == "post" && params[:s] == "view" && params[:id].present?
@post_id = params[:id].to_i
# https://gelbooru.com/index.php?page=post&s=list&md5=99d9977d6c3aa185083a2da22bd8acfb
in "gelbooru.com", "index.php" if params[:page] == "post" && params[:s] == "list" && params[:md5].present?
# https://safebooru.org/index.php?page=post&s=list&md5=99d9977d6c3aa185083a2da22bd8acfb
in _, "index.php" if params[:page] == "post" && params[:s] == "list" && params[:md5].present?
@md5 = params[:md5]
# https://gelbooru.com/index.php?page=dapi&s=post&q=index&id=7798045&json=1
in "gelbooru.com", "index.php" if params[:page] == "dapi" && params[:q] == "index" && params[:id].present?
# https://safebooru.org/index.php?page=dapi&s=post&q=index&id=4196948&json=1
in _, "index.php" if params[:page] == "dapi" && params[:q] == "index" && params[:id].present?
@post_id = params[:id].to_i
# https://gelbooru.com//images/ee/5c/ee5c9a69db9602c95debdb9b98fb3e3e.jpeg
# https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg
# https://video-cdn3.gelbooru.com/images/62/95/6295154d082f04009160261b90e7176e.mp4
# https://img2.gelbooru.com//images/a9/64/a96478bbf9bc3f0584f2b5ddf56025fa.webm
# https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg
# https://gelbooru.com/thumbnails/08/06/thumbnail_08066c138e7e138a47489a0934c29156.jpg
in "gelbooru.com", ("images" | "samples" | "thumbnails"), h1, h2, /\A(?:\w+_)?(\h{32})\.(jpeg|jpg|png|gif|mp4|webm)\z/i
in _, ("images" | "samples" | "thumbnails") => image_type, /\A\h{2}\z/ => h1, /\A\h{2}\z/ => h2, /\A(?:sample_|thumbnail_)?(\h{32})\.\w+\z/i
@md5 = $1
@full_image_url = "https://#{host}/images/#{h1}/#{h2}/#{md5}.#{file_ext}"
@image_type = image_type
@full_image_url = url.to_s if image_type == "images"
# http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png
# http://simg2.gelbooru.com//samples/619/sample_fe84fb3f86020e120f4b4712fcbd3abf.jpeg?755046
in "gelbooru.com", ("images" | "samples"), /\A\d+\z/ => dir, /\A(?:\w+_)?(\h{32})\.(jpeg|jpg|png|gif|mp4|webm)/i
# http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png
# https://safebooru.org//images/4016/64779fbfc87020ed5fd94854fe973bc0.jpeg
# https://safebooru.org//samples/4016/sample_64779fbfc87020ed5fd94854fe973bc0.jpg?4196692
# https://safebooru.org/thumbnails/4016/thumbnail_64779fbfc87020ed5fd94854fe973bc0.jpg?4196692
in _, ("images" | "samples" | "thumbnails") => image_type, /\A\d+\z/ => directory, /\A(?:sample_|thumbnail_)?(\h{32})\.\w+\z/
@md5 = $1
@full_image_url = url
@post_id = query if query&.match?(/\A\d+\z/)
@image_type = image_type
@full_image_url = url.to_s if image_type == "images"
# Safebooru uses an unknown 40-byte hash for most image URLs.
# https://safebooru.org//images/4016/d2f50befcdc304cbd9030f2d0832029f5fe8cccc.png
# https://safebooru.org//samples/4016/sample_ffc6c5705d31422ddbaa7478deb560c985d2ee71.jpg?4196970
# https://safebooru.org/thumbnails/4016/thumbnail_8d0664867c59acb3103bccd9a9a5562a193eadcd.jpg?4196980
in "safebooru.org", ("images" | "samples" | "thumbnails") => image_type, /\A\d+\z/ => directory, /\A(?:sample_|thumbnail_)?(\h{40})\.\w+\z/
@hash = $1
@post_id = query if query&.match?(/\A\d+\z/)
@image_type = image_type
@full_image_url = url.to_s if image_type == "images"
else
nil
@@ -44,18 +64,27 @@ class Source::URL::Gelbooru < Source::URL
end
def image_url?
full_image_url.present?
image_type.present?
end
def page_url
if post_id.present?
"https://gelbooru.com/index.php?page=post&s=view&id=#{post_id}"
"https://#{domain}/index.php?page=post&s=view&id=#{post_id}"
elsif md5.present?
"https://gelbooru.com/index.php?page=post&s=list&md5=#{md5}"
"https://#{domain}/index.php?page=post&s=list&md5=#{md5}"
end
end
def api_url
"https://gelbooru.com/index.php?page=dapi&s=post&q=index&id=#{post_id}&json=1" if post_id.present?
# https://gelbooru.com//index.php?page=dapi&s=post&q=index&tags=id:7903922
# https://safebooru.org/index.php?page=dapi&s=post&q=index&tags=id:4197087
if post_id.present?
# "https://#{domain}/index.php?page=dapi&s=post&q=index&id=#{post_id}&json=1"
"https://#{domain}/index.php?page=dapi&s=post&q=index&tags=id:#{post_id}"
# https://gelbooru.com//index.php?page=dapi&s=post&q=index&tags=md5:338078144fe77c9e5f35dbb585e749ec
# https://safebooru.org/index.php?page=dapi&s=post&q=index&tags=md5:8c1fe66ff46d03725caa30135ad70e7e
elsif md5.present?
"https://#{domain}/index.php?page=dapi&s=post&q=index&tags=md5:#{md5}"
end
end
end

View File

@@ -441,6 +441,7 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest
should_upload_successfully("https://konachan.com/image/ca12cdb79a66d242e95a6f958341bf05/Konachan.com%20-%20270916.png")
should_upload_successfully("https://gelbooru.com/index.php?page=post&s=view&id=7798121")
should_upload_successfully("https://safebooru.org/index.php?page=post&s=view&id=4189916")
should_upload_successfully("http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663")
should_upload_successfully("http://seiga.nicovideo.jp/seiga/im4937663")

View File

@@ -101,5 +101,90 @@ module Sources
assert_equal("https://gelbooru.com/index.php?page=post&s=list&md5=edd1d2b3881cf70c3acf540780507531", Source::URL.page_url(source2))
assert_equal("https://gelbooru.com/index.php?page=post&s=list&md5=0b3ae5e225072b8e391c827cb470d29c", Source::URL.page_url(source3))
end
context "Safebooru:" do
# source: https://i.pximg.net/img-original/img/2021/10/24/09/53/44/93646177_p0.jpg
context "A https://safebooru.org/images/$dir/$hash.jpg?$post_id URL without a referer" do
strategy_should_work(
"https://safebooru.org//images/4010/febe33d5f6d46e21c073289bb9884d4e0630761c.jpg?4189916",
image_urls: ["https://safebooru.org//images/4010/febe33d5f6d46e21c073289bb9884d4e0630761c.jpg?4189916"],
artist_name: "チー之介",
profile_url: "https://www.pixiv.net/users/57673194",
tags: %w[1girl ^^^ animal_ears black_bow black_bowtie black_gloves black_hair black_hairband black_skirt black_wings blue_flower blue_rose bow bowtie brooch center_frills changing_room cheesecake_(artist) collared_shirt commentary dated demon_wings fangs flower flying_sweatdrops frilled_hairband frilled_sleeves frills frown gloves hair_flower hair_ornament hair_over_one_eye hairband halloween halloween_costume high-waist_skirt highres horse_ears horse_girl indoors jack-o'-lantern_ornament jewelry lace-trimmed_gloves lace_trim long_hair looking_at_viewer make_up_in_halloween!_(umamusume) official_alternate_costume open_mouth orange_bow puffy_short_sleeves puffy_sleeves rice_shower_(make_up_vampire!)_(umamusume) rice_shower_(umamusume) rose shirt short_sleeves skirt skirt_bow solo spider_web_print standing star_ornament twitter_username umamusume violet_eyes white_shirt wings rating:q ウマ娘 ライスシャワー ハロウィンイラスト 二次創作 ウマ娘プリティーダービー ライスシャワー(ウマ娘) 更衣室 Make_up_Vampire! ドラキュライス 困り顔],
artist_commentary_title: "ハロウィンライス",
artist_commentary_desc: "更衣室でハロウィン衣装に着替えたあと「がおーっ!」のポーズを鏡の前で密かに練習してたら、見つかっちゃってあわてるライスシャワーを描きました。",
download_size: 771_175,
)
end
# source: https://i.pximg.net/img-original/img/2021/10/24/09/53/44/93646177_p0.jpg
context "A https://safebooru.org/images/$dir/$hash.jpg URL without a referer" do
strategy_should_work(
"https://safebooru.org//images/4010/febe33d5f6d46e21c073289bb9884d4e0630761c.jpg",
image_urls: ["https://safebooru.org//images/4010/febe33d5f6d46e21c073289bb9884d4e0630761c.jpg"],
artist_name: nil,
profile_url: nil,
tags: [],
artist_commentary_title: nil,
artist_commentary_desc: nil,
)
end
# source: https://i.pximg.net/img-original/img/2021/10/24/09/53/44/93646177_p0.jpg
context "A https://safebooru.org/images/$dir/$hash.jpg URL with a referer" do
strategy_should_work(
"https://safebooru.org//images/4010/febe33d5f6d46e21c073289bb9884d4e0630761c.jpg",
referer: "https://safebooru.org/index.php?page=post&s=view&id=4189916",
image_urls: ["https://safebooru.org//images/4010/febe33d5f6d46e21c073289bb9884d4e0630761c.jpg"],
artist_name: "チー之介",
profile_url: "https://www.pixiv.net/users/57673194",
tags: %w[1girl ^^^ animal_ears black_bow black_bowtie black_gloves black_hair black_hairband black_skirt black_wings blue_flower blue_rose bow bowtie brooch center_frills changing_room cheesecake_(artist) collared_shirt commentary dated demon_wings fangs flower flying_sweatdrops frilled_hairband frilled_sleeves frills frown gloves hair_flower hair_ornament hair_over_one_eye hairband halloween halloween_costume high-waist_skirt highres horse_ears horse_girl indoors jack-o'-lantern_ornament jewelry lace-trimmed_gloves lace_trim long_hair looking_at_viewer make_up_in_halloween!_(umamusume) official_alternate_costume open_mouth orange_bow puffy_short_sleeves puffy_sleeves rice_shower_(make_up_vampire!)_(umamusume) rice_shower_(umamusume) rose shirt short_sleeves skirt skirt_bow solo spider_web_print standing star_ornament twitter_username umamusume violet_eyes white_shirt wings rating:q ウマ娘 ライスシャワー ハロウィンイラスト 二次創作 ウマ娘プリティーダービー ライスシャワー(ウマ娘) 更衣室 Make_up_Vampire! ドラキュライス 困り顔],
artist_commentary_title: "ハロウィンライス",
artist_commentary_desc: "更衣室でハロウィン衣装に着替えたあと「がおーっ!」のポーズを鏡の前で密かに練習してたら、見つかっちゃってあわてるライスシャワーを描きました。",
download_size: 771_175,
)
end
# source: https://i.pximg.net/img-original/img/2021/10/24/09/53/44/93646177_p0.jpg
context "A https://safebooru.org/index.php?page=post&s=view&id=$post_id URL" do
strategy_should_work(
"https://safebooru.org/index.php?page=post&s=view&id=4189916",
image_urls: ["https://safebooru.org/images/4010/febe33d5f6d46e21c073289bb9884d4e0630761c.jpg"],
artist_name: "チー之介",
profile_url: "https://www.pixiv.net/users/57673194",
tags: %w[1girl ^^^ animal_ears black_bow black_bowtie black_gloves black_hair black_hairband black_skirt black_wings blue_flower blue_rose bow bowtie brooch center_frills changing_room cheesecake_(artist) collared_shirt commentary dated demon_wings fangs flower flying_sweatdrops frilled_hairband frilled_sleeves frills frown gloves hair_flower hair_ornament hair_over_one_eye hairband halloween halloween_costume high-waist_skirt highres horse_ears horse_girl indoors jack-o'-lantern_ornament jewelry lace-trimmed_gloves lace_trim long_hair looking_at_viewer make_up_in_halloween!_(umamusume) official_alternate_costume open_mouth orange_bow puffy_short_sleeves puffy_sleeves rice_shower_(make_up_vampire!)_(umamusume) rice_shower_(umamusume) rose shirt short_sleeves skirt skirt_bow solo spider_web_print standing star_ornament twitter_username umamusume violet_eyes white_shirt wings rating:q ウマ娘 ライスシャワー ハロウィンイラスト 二次創作 ウマ娘プリティーダービー ライスシャワー(ウマ娘) 更衣室 Make_up_Vampire! ドラキュライス 困り顔],
artist_commentary_title: "ハロウィンライス",
artist_commentary_desc: "更衣室でハロウィン衣装に着替えたあと「がおーっ!」のポーズを鏡の前で密かに練習してたら、見つかっちゃってあわてるライスシャワーを描きました。",
download_size: 771_175,
)
end
# source: https://i.pximg.net/img-original/img/2021/10/24/09/53/44/93646177_p0.jpg
context "A https://safebooru.org/index.php?page=post&s=list&md5=$md5 URL" do
strategy_should_work(
"https://safebooru.org/index.php?page=post&s=list&md5=8ca0f76e014175f11085d64932d980a5",
image_urls: ["https://safebooru.org/images/4010/febe33d5f6d46e21c073289bb9884d4e0630761c.jpg"],
artist_name: "チー之介",
profile_url: "https://www.pixiv.net/users/57673194",
tags: %w[1girl ^^^ animal_ears black_bow black_bowtie black_gloves black_hair black_hairband black_skirt black_wings blue_flower blue_rose bow bowtie brooch center_frills changing_room cheesecake_(artist) collared_shirt commentary dated demon_wings fangs flower flying_sweatdrops frilled_hairband frilled_sleeves frills frown gloves hair_flower hair_ornament hair_over_one_eye hairband halloween halloween_costume high-waist_skirt highres horse_ears horse_girl indoors jack-o'-lantern_ornament jewelry lace-trimmed_gloves lace_trim long_hair looking_at_viewer make_up_in_halloween!_(umamusume) official_alternate_costume open_mouth orange_bow puffy_short_sleeves puffy_sleeves rice_shower_(make_up_vampire!)_(umamusume) rice_shower_(umamusume) rose shirt short_sleeves skirt skirt_bow solo spider_web_print standing star_ornament twitter_username umamusume violet_eyes white_shirt wings rating:q ウマ娘 ライスシャワー ハロウィンイラスト 二次創作 ウマ娘プリティーダービー ライスシャワー(ウマ娘) 更衣室 Make_up_Vampire! ドラキュライス 困り顔],
artist_commentary_title: "ハロウィンライス",
artist_commentary_desc: "更衣室でハロウィン衣装に着替えたあと「がおーっ!」のポーズを鏡の前で密かに練習してたら、見つかっちゃってあわてるライスシャワーを描きました。",
download_size: 771_175,
)
end
# source: https://i.pximg.net/img-original/img/2021/10/24/09/53/44/93646177_p0.jpg
context "A https://safebooru.org/images/$dir/$md5.jpg URL without a referer" do
strategy_should_work(
"https://safebooru.org//images/4016/64779fbfc87020ed5fd94854fe973bc0.jpeg",
image_urls: ["https://safebooru.org//images/4016/64779fbfc87020ed5fd94854fe973bc0.jpeg"],
artist_name: nil,
profile_url: nil,
tags: %w[brown_eyes d4dj dress long_hair pink_hair sword yano_hiiro yorha_no._2_type_b rating:s],
artist_commentary_title: nil,
artist_commentary_desc: nil,
)
end
end
end
end