From f05268df7ff0d32bd95ab22d032eba8430b91fa9 Mon Sep 17 00:00:00 2001 From: evazion Date: Mon, 10 Oct 2022 23:50:04 -0500 Subject: [PATCH] sources: add Gelbooru support. Add support for uploading posts from Gelbooru. Note that the translated tags will include both the Gelbooru tags and the tags from the Gelbooru post's source. The commentary and artist information will also be taken from the Gelbooru post's source. The source of the Danbooru post however will be left as the Gelbooru post itself, not as the Gelbooru post's source. --- app/logical/source/extractor.rb | 1 + app/logical/source/extractor/gelbooru.rb | 76 ++++++++++++++++++++++ app/logical/source/url.rb | 1 + app/logical/source/url/gelbooru.rb | 56 ++++++++++++++++ app/logical/source/url/null.rb | 9 --- test/functional/uploads_controller_test.rb | 2 + test/unit/sources/gelbooru_test.rb | 58 +++++++++++++++++ 7 files changed, 194 insertions(+), 9 deletions(-) create mode 100644 app/logical/source/extractor/gelbooru.rb create mode 100644 app/logical/source/url/gelbooru.rb create mode 100644 test/unit/sources/gelbooru_test.rb diff --git a/app/logical/source/extractor.rb b/app/logical/source/extractor.rb index e79daa769..f720e6a01 100644 --- a/app/logical/source/extractor.rb +++ b/app/logical/source/extractor.rb @@ -39,6 +39,7 @@ module Source Source::Extractor::Moebooru, Source::Extractor::Nijie, Source::Extractor::ArtStation, + Source::Extractor::Gelbooru, Source::Extractor::HentaiFoundry, Source::Extractor::Fanbox, Source::Extractor::Mastodon, diff --git a/app/logical/source/extractor/gelbooru.rb b/app/logical/source/extractor/gelbooru.rb new file mode 100644 index 000000000..b3d1717ac --- /dev/null +++ b/app/logical/source/extractor/gelbooru.rb @@ -0,0 +1,76 @@ +# frozen_string_literal: true + +# Source extractor for Gelbooru. The commentary and artist information are +# pulled from the Gelbooru post's source, while the translated tags include +# both the Gelbooru tags and the source's tags. +# +# @see Source::URL::Gelbooru +# @see https://gelbooru.com/index.php?page=wiki&s=view&id=18780 (howto:api) +module Source + class Extractor + class Gelbooru < Source::Extractor + delegate :artist_name, :profile_url, :profile_urls, :other_names, :tag_name, :artist_commentary_title, :artist_commentary_desc, :dtext_artist_commentary_title, :dtext_artist_commentary_desc, to: :sub_extractor, allow_nil: true + + def match? + Source::URL::Gelbooru === parsed_url + end + + def image_urls + [api_response[:file_url]].compact + end + + def page_url + "https://gelbooru.com/index.php?page=post&s=view&id=#{post_id}" if post_id.present? + end + + def tags + gelbooru_tags + source_tags + end + + def gelbooru_tags + return [] if api_response.nil? + + tags = api_response[:tags].split + ["rating:#{api_response[:rating]}"] + tags.map do |tag| + [tag, "https://gelbooru.com/index.php?page=post&s=list&tags=#{CGI.escape(tag)}"] + end + end + + def source_tags + sub_extractor&.tags.to_a + end + + def post_id + parsed_url.post_id || parsed_referer&.post_id || post_id_from_md5 + end + + def api_url + # https://gelbooru.com/index.php?page=dapi&s=post&q=index&id=779812&json=1 + "https://gelbooru.com/index.php?page=dapi&s=post&q=index&id=#{post_id}&json=1" if post_id.present? + end + + memoize def api_response + return nil unless api_url.present? + + response = http.cache(1.minute).get(api_url) + return nil unless response.status == 200 + + response.parse["post"].first.with_indifferent_access + end + + memoize def post_id_from_md5 + return nil unless parsed_url.image_url? && parsed_url.page_url.present? + + response = http.cache(1.minute).head(parsed_url.page_url) + return nil unless response.status == 200 + + Source::URL.parse(response.uri).post_id + end + + def sub_extractor + return nil if api_response[:source].nil? + @sub_extractor ||= Source::Extractor.find(api_response[:source], default: nil) + end + end + end +end diff --git a/app/logical/source/url.rb b/app/logical/source/url.rb index 4cac2fd7e..e9c48ca00 100644 --- a/app/logical/source/url.rb +++ b/app/logical/source/url.rb @@ -40,6 +40,7 @@ module Source Source::URL::Fantia, Source::URL::Fc2, Source::URL::Foundation, + Source::URL::Gelbooru, Source::URL::HentaiFoundry, Source::URL::Instagram, Source::URL::Lofter, diff --git a/app/logical/source/url/gelbooru.rb b/app/logical/source/url/gelbooru.rb new file mode 100644 index 000000000..8c9e7f6a8 --- /dev/null +++ b/app/logical/source/url/gelbooru.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +class Source::URL::Gelbooru < Source::URL + attr_reader :post_id, :md5, :full_image_url + + def self.match?(url) + url.domain.in?(%w[gelbooru.com]) + end + + def parse + case [domain, *path_segments] + + # https://gelbooru.com/index.php?page=post&s=view&id=7798045 + # https://www.gelbooru.com/index.php?page=post&s=view&id=7798045 + in "gelbooru.com", "index.php" if params[:page] == "post" && params[:s] == "view" && params[:id].present? + @post_id = params[:id].to_i + + # https://gelbooru.com/index.php?page=post&s=list&md5=99d9977d6c3aa185083a2da22bd8acfb + in "gelbooru.com", "index.php" if params[:page] == "post" && params[:s] == "list" && params[:md5].present? + @md5 = params[:md5] + + # https://gelbooru.com/index.php?page=dapi&s=post&q=index&id=7798045&json=1 + in "gelbooru.com", "index.php" if params[:page] == "dapi" && params[:q] == "index" && params[:id].present? + @post_id = params[:id].to_i + + # https://gelbooru.com//images/ee/5c/ee5c9a69db9602c95debdb9b98fb3e3e.jpeg + # http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png + # https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg + # https://video-cdn3.gelbooru.com/images/62/95/6295154d082f04009160261b90e7176e.mp4 + # https://img2.gelbooru.com//images/a9/64/a96478bbf9bc3f0584f2b5ddf56025fa.webm + # https://gelbooru.com/thumbnails/08/06/thumbnail_08066c138e7e138a47489a0934c29156.jpg + in "gelbooru.com", ("images" | "samples" | "thumbnails"), h1, h2, /\A(?:\w+_)?(\h{32})\.(jpeg|jpg|png|gif|mp4|webm)\z/i + @md5 = $1 + @full_image_url = "https://#{host}/images/#{h1}/#{h2}/#{md5}.#{file_ext}" + + else + nil + end + end + + def image_url? + full_image_url.present? + end + + def page_url + if post_id.present? + "https://gelbooru.com/index.php?page=post&s=view&id=#{post_id}" + elsif md5.present? + "https://gelbooru.com/index.php?page=post&s=list&md5=#{md5}" + end + end + + def api_url + "https://gelbooru.com/index.php?page=dapi&s=post&q=index&id=#{post_id}&json=1" if post_id.present? + end +end diff --git a/app/logical/source/url/null.rb b/app/logical/source/url/null.rb index 92ddd4ec4..9ded3f6d0 100644 --- a/app/logical/source/url/null.rb +++ b/app/logical/source/url/null.rb @@ -131,15 +131,6 @@ class Source::URL::Null < Source::URL @work_id = $1 @page_url = "https://www.facebook.com/photo.php?fbid=#{work_id}" - # https://gelbooru.com//images/ee/5c/ee5c9a69db9602c95debdb9b98fb3e3e.jpeg - # http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png - # https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg - # https://video-cdn3.gelbooru.com/images/62/95/6295154d082f04009160261b90e7176e.mp4 - # https://img2.gelbooru.com//images/a9/64/a96478bbf9bc3f0584f2b5ddf56025fa.webm - in _, "gelbooru.com", ("images" | "samples"), *subdirs, /^(?:sample_)?(\h{32})\.(jpeg|jpg|png|gif|mp4|webm)$/ - @md5 = $1 - @page_url = "https://gelbooru.com/index.php?page=post&s=list&tags=md5:#{@md5}" - # https://a.hitomi.la/galleries/907838/1.png # https://0a.hitomi.la/galleries/1169701/23.png # https://aa.hitomi.la/galleries/990722/003_01_002.jpg diff --git a/test/functional/uploads_controller_test.rb b/test/functional/uploads_controller_test.rb index c75aa2926..11ad1860b 100644 --- a/test/functional/uploads_controller_test.rb +++ b/test/functional/uploads_controller_test.rb @@ -313,6 +313,8 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest should_upload_successfully("https://konachan.com/post/show/270916") should_upload_successfully("https://konachan.com/image/ca12cdb79a66d242e95a6f958341bf05/Konachan.com%20-%20270916.png") + should_upload_successfully("https://gelbooru.com/index.php?page=post&s=view&id=7798121") + should_upload_successfully("http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663") should_upload_successfully("http://seiga.nicovideo.jp/seiga/im4937663") should_upload_successfully("https://seiga.nicovideo.jp/image/source/9146749") diff --git a/test/unit/sources/gelbooru_test.rb b/test/unit/sources/gelbooru_test.rb new file mode 100644 index 000000000..128514dad --- /dev/null +++ b/test/unit/sources/gelbooru_test.rb @@ -0,0 +1,58 @@ +require "test_helper" + +module Sources + class GelbooruTest < ActiveSupport::TestCase + context "A Gelbooru direct image url without a referer" do + strategy_should_work( + "https://img3.gelbooru.com/images/04/f2/04f2767c64593c3030ce74ecc2528704.jpg", + image_urls: ["https://img3.gelbooru.com/images/04/f2/04f2767c64593c3030ce74ecc2528704.jpg"], + artist_name: "灰色灰烬bot", + profile_url: "https://www.pixiv.net/users/3330425", + tags: %w[1girl back_bow bangs black_pantyhose blue_bow blue_hair blue_ribbon boots bow cape chibi chinese_commentary closed_eyes full_body hair_between_eyes hair_ribbon hat hatsune_miku indai_(3330425) on_ground pantyhose pom_pom_(clothes) rabbit rabbit_yukine rating:general ribbon simple_background sitting solo twintails vocaloid white_background white_cape white_headwear witch_hat yuki_miku yuki_miku_(2014) 初音ミク 雪ミク], + artist_commentary_title: "2010~2021雪ミク", + artist_commentary_desc: "动作参考@速写班长", + download_size: 480_621, + ) + end + + context "A Gelbooru direct image url with a referer" do + strategy_should_work( + "https://img3.gelbooru.com/images/04/f2/04f2767c64593c3030ce74ecc2528704.jpg", + referer: "https://gelbooru.com/index.php?page=post&s=view&id=7798121", + image_urls: ["https://img3.gelbooru.com/images/04/f2/04f2767c64593c3030ce74ecc2528704.jpg"], + artist_name: "灰色灰烬bot", + profile_url: "https://www.pixiv.net/users/3330425", + tags: %w[1girl back_bow bangs black_pantyhose blue_bow blue_hair blue_ribbon boots bow cape chibi chinese_commentary closed_eyes full_body hair_between_eyes hair_ribbon hat hatsune_miku indai_(3330425) on_ground pantyhose pom_pom_(clothes) rabbit rabbit_yukine rating:general ribbon simple_background sitting solo twintails vocaloid white_background white_cape white_headwear witch_hat yuki_miku yuki_miku_(2014) 初音ミク 雪ミク], + artist_commentary_title: "2010~2021雪ミク", + artist_commentary_desc: "动作参考@速写班长", + download_size: 480_621, + ) + end + + context "A Gelbooru sample image url" do + strategy_should_work( + "https://img3.gelbooru.com/samples/04/f2/sample_04f2767c64593c3030ce74ecc2528704.jpg", + image_urls: ["https://img3.gelbooru.com/images/04/f2/04f2767c64593c3030ce74ecc2528704.jpg"], + artist_name: "灰色灰烬bot", + profile_url: "https://www.pixiv.net/users/3330425", + tags: %w[1girl back_bow bangs black_pantyhose blue_bow blue_hair blue_ribbon boots bow cape chibi chinese_commentary closed_eyes full_body hair_between_eyes hair_ribbon hat hatsune_miku indai_(3330425) on_ground pantyhose pom_pom_(clothes) rabbit rabbit_yukine rating:general ribbon simple_background sitting solo twintails vocaloid white_background white_cape white_headwear witch_hat yuki_miku yuki_miku_(2014) 初音ミク 雪ミク], + artist_commentary_title: "2010~2021雪ミク", + artist_commentary_desc: "动作参考@速写班长", + download_size: 480_621, + ) + end + + context "A Gelbooru page url" do + strategy_should_work( + "https://gelbooru.com/index.php?page=post&s=view&id=7798121", + image_urls: ["https://img3.gelbooru.com/images/04/f2/04f2767c64593c3030ce74ecc2528704.jpg"], + artist_name: "灰色灰烬bot", + profile_url: "https://www.pixiv.net/users/3330425", + tags: %w[1girl back_bow bangs black_pantyhose blue_bow blue_hair blue_ribbon boots bow cape chibi chinese_commentary closed_eyes full_body hair_between_eyes hair_ribbon hat hatsune_miku indai_(3330425) on_ground pantyhose pom_pom_(clothes) rabbit rabbit_yukine rating:general ribbon simple_background sitting solo twintails vocaloid white_background white_cape white_headwear witch_hat yuki_miku yuki_miku_(2014) 初音ミク 雪ミク], + artist_commentary_title: "2010~2021雪ミク", + artist_commentary_desc: "动作参考@速写班长", + download_size: 480_621, + ) + end + end +end