diff --git a/app/logical/source/extractor.rb b/app/logical/source/extractor.rb index e79daa769..f720e6a01 100644 --- a/app/logical/source/extractor.rb +++ b/app/logical/source/extractor.rb @@ -39,6 +39,7 @@ module Source Source::Extractor::Moebooru, Source::Extractor::Nijie, Source::Extractor::ArtStation, + Source::Extractor::Gelbooru, Source::Extractor::HentaiFoundry, Source::Extractor::Fanbox, Source::Extractor::Mastodon, diff --git a/app/logical/source/extractor/gelbooru.rb b/app/logical/source/extractor/gelbooru.rb new file mode 100644 index 000000000..b3d1717ac --- /dev/null +++ b/app/logical/source/extractor/gelbooru.rb @@ -0,0 +1,76 @@ +# frozen_string_literal: true + +# Source extractor for Gelbooru. The commentary and artist information are +# pulled from the Gelbooru post's source, while the translated tags include +# both the Gelbooru tags and the source's tags. +# +# @see Source::URL::Gelbooru +# @see https://gelbooru.com/index.php?page=wiki&s=view&id=18780 (howto:api) +module Source + class Extractor + class Gelbooru < Source::Extractor + delegate :artist_name, :profile_url, :profile_urls, :other_names, :tag_name, :artist_commentary_title, :artist_commentary_desc, :dtext_artist_commentary_title, :dtext_artist_commentary_desc, to: :sub_extractor, allow_nil: true + + def match? + Source::URL::Gelbooru === parsed_url + end + + def image_urls + [api_response[:file_url]].compact + end + + def page_url + "https://gelbooru.com/index.php?page=post&s=view&id=#{post_id}" if post_id.present? + end + + def tags + gelbooru_tags + source_tags + end + + def gelbooru_tags + return [] if api_response.nil? + + tags = api_response[:tags].split + ["rating:#{api_response[:rating]}"] + tags.map do |tag| + [tag, "https://gelbooru.com/index.php?page=post&s=list&tags=#{CGI.escape(tag)}"] + end + end + + def source_tags + sub_extractor&.tags.to_a + end + + def post_id + parsed_url.post_id || parsed_referer&.post_id || post_id_from_md5 + end + + def api_url + # https://gelbooru.com/index.php?page=dapi&s=post&q=index&id=779812&json=1 + "https://gelbooru.com/index.php?page=dapi&s=post&q=index&id=#{post_id}&json=1" if post_id.present? + end + + memoize def api_response + return nil unless api_url.present? + + response = http.cache(1.minute).get(api_url) + return nil unless response.status == 200 + + response.parse["post"].first.with_indifferent_access + end + + memoize def post_id_from_md5 + return nil unless parsed_url.image_url? && parsed_url.page_url.present? + + response = http.cache(1.minute).head(parsed_url.page_url) + return nil unless response.status == 200 + + Source::URL.parse(response.uri).post_id + end + + def sub_extractor + return nil if api_response[:source].nil? + @sub_extractor ||= Source::Extractor.find(api_response[:source], default: nil) + end + end + end +end diff --git a/app/logical/source/url.rb b/app/logical/source/url.rb index 4cac2fd7e..e9c48ca00 100644 --- a/app/logical/source/url.rb +++ b/app/logical/source/url.rb @@ -40,6 +40,7 @@ module Source Source::URL::Fantia, Source::URL::Fc2, Source::URL::Foundation, + Source::URL::Gelbooru, Source::URL::HentaiFoundry, Source::URL::Instagram, Source::URL::Lofter, diff --git a/app/logical/source/url/gelbooru.rb b/app/logical/source/url/gelbooru.rb new file mode 100644 index 000000000..8c9e7f6a8 --- /dev/null +++ b/app/logical/source/url/gelbooru.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +class Source::URL::Gelbooru < Source::URL + attr_reader :post_id, :md5, :full_image_url + + def self.match?(url) + url.domain.in?(%w[gelbooru.com]) + end + + def parse + case [domain, *path_segments] + + # https://gelbooru.com/index.php?page=post&s=view&id=7798045 + # https://www.gelbooru.com/index.php?page=post&s=view&id=7798045 + in "gelbooru.com", "index.php" if params[:page] == "post" && params[:s] == "view" && params[:id].present? + @post_id = params[:id].to_i + + # https://gelbooru.com/index.php?page=post&s=list&md5=99d9977d6c3aa185083a2da22bd8acfb + in "gelbooru.com", "index.php" if params[:page] == "post" && params[:s] == "list" && params[:md5].present? + @md5 = params[:md5] + + # https://gelbooru.com/index.php?page=dapi&s=post&q=index&id=7798045&json=1 + in "gelbooru.com", "index.php" if params[:page] == "dapi" && params[:q] == "index" && params[:id].present? + @post_id = params[:id].to_i + + # https://gelbooru.com//images/ee/5c/ee5c9a69db9602c95debdb9b98fb3e3e.jpeg + # http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png + # https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg + # https://video-cdn3.gelbooru.com/images/62/95/6295154d082f04009160261b90e7176e.mp4 + # https://img2.gelbooru.com//images/a9/64/a96478bbf9bc3f0584f2b5ddf56025fa.webm + # https://gelbooru.com/thumbnails/08/06/thumbnail_08066c138e7e138a47489a0934c29156.jpg + in "gelbooru.com", ("images" | "samples" | "thumbnails"), h1, h2, /\A(?:\w+_)?(\h{32})\.(jpeg|jpg|png|gif|mp4|webm)\z/i + @md5 = $1 + @full_image_url = "https://#{host}/images/#{h1}/#{h2}/#{md5}.#{file_ext}" + + else + nil + end + end + + def image_url? + full_image_url.present? + end + + def page_url + if post_id.present? + "https://gelbooru.com/index.php?page=post&s=view&id=#{post_id}" + elsif md5.present? + "https://gelbooru.com/index.php?page=post&s=list&md5=#{md5}" + end + end + + def api_url + "https://gelbooru.com/index.php?page=dapi&s=post&q=index&id=#{post_id}&json=1" if post_id.present? + end +end diff --git a/app/logical/source/url/null.rb b/app/logical/source/url/null.rb index 92ddd4ec4..9ded3f6d0 100644 --- a/app/logical/source/url/null.rb +++ b/app/logical/source/url/null.rb @@ -131,15 +131,6 @@ class Source::URL::Null < Source::URL @work_id = $1 @page_url = "https://www.facebook.com/photo.php?fbid=#{work_id}" - # https://gelbooru.com//images/ee/5c/ee5c9a69db9602c95debdb9b98fb3e3e.jpeg - # http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png - # https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg - # https://video-cdn3.gelbooru.com/images/62/95/6295154d082f04009160261b90e7176e.mp4 - # https://img2.gelbooru.com//images/a9/64/a96478bbf9bc3f0584f2b5ddf56025fa.webm - in _, "gelbooru.com", ("images" | "samples"), *subdirs, /^(?:sample_)?(\h{32})\.(jpeg|jpg|png|gif|mp4|webm)$/ - @md5 = $1 - @page_url = "https://gelbooru.com/index.php?page=post&s=list&tags=md5:#{@md5}" - # https://a.hitomi.la/galleries/907838/1.png # https://0a.hitomi.la/galleries/1169701/23.png # https://aa.hitomi.la/galleries/990722/003_01_002.jpg diff --git a/test/functional/uploads_controller_test.rb b/test/functional/uploads_controller_test.rb index c75aa2926..11ad1860b 100644 --- a/test/functional/uploads_controller_test.rb +++ b/test/functional/uploads_controller_test.rb @@ -313,6 +313,8 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest should_upload_successfully("https://konachan.com/post/show/270916") should_upload_successfully("https://konachan.com/image/ca12cdb79a66d242e95a6f958341bf05/Konachan.com%20-%20270916.png") + should_upload_successfully("https://gelbooru.com/index.php?page=post&s=view&id=7798121") + should_upload_successfully("http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663") should_upload_successfully("http://seiga.nicovideo.jp/seiga/im4937663") should_upload_successfully("https://seiga.nicovideo.jp/image/source/9146749") diff --git a/test/unit/sources/gelbooru_test.rb b/test/unit/sources/gelbooru_test.rb new file mode 100644 index 000000000..128514dad --- /dev/null +++ b/test/unit/sources/gelbooru_test.rb @@ -0,0 +1,58 @@ +require "test_helper" + +module Sources + class GelbooruTest < ActiveSupport::TestCase + context "A Gelbooru direct image url without a referer" do + strategy_should_work( + "https://img3.gelbooru.com/images/04/f2/04f2767c64593c3030ce74ecc2528704.jpg", + image_urls: ["https://img3.gelbooru.com/images/04/f2/04f2767c64593c3030ce74ecc2528704.jpg"], + artist_name: "灰色灰烬bot", + profile_url: "https://www.pixiv.net/users/3330425", + tags: %w[1girl back_bow bangs black_pantyhose blue_bow blue_hair blue_ribbon boots bow cape chibi chinese_commentary closed_eyes full_body hair_between_eyes hair_ribbon hat hatsune_miku indai_(3330425) on_ground pantyhose pom_pom_(clothes) rabbit rabbit_yukine rating:general ribbon simple_background sitting solo twintails vocaloid white_background white_cape white_headwear witch_hat yuki_miku yuki_miku_(2014) 初音ミク 雪ミク], + artist_commentary_title: "2010~2021雪ミク", + artist_commentary_desc: "动作参考@速写班长", + download_size: 480_621, + ) + end + + context "A Gelbooru direct image url with a referer" do + strategy_should_work( + "https://img3.gelbooru.com/images/04/f2/04f2767c64593c3030ce74ecc2528704.jpg", + referer: "https://gelbooru.com/index.php?page=post&s=view&id=7798121", + image_urls: ["https://img3.gelbooru.com/images/04/f2/04f2767c64593c3030ce74ecc2528704.jpg"], + artist_name: "灰色灰烬bot", + profile_url: "https://www.pixiv.net/users/3330425", + tags: %w[1girl back_bow bangs black_pantyhose blue_bow blue_hair blue_ribbon boots bow cape chibi chinese_commentary closed_eyes full_body hair_between_eyes hair_ribbon hat hatsune_miku indai_(3330425) on_ground pantyhose pom_pom_(clothes) rabbit rabbit_yukine rating:general ribbon simple_background sitting solo twintails vocaloid white_background white_cape white_headwear witch_hat yuki_miku yuki_miku_(2014) 初音ミク 雪ミク], + artist_commentary_title: "2010~2021雪ミク", + artist_commentary_desc: "动作参考@速写班长", + download_size: 480_621, + ) + end + + context "A Gelbooru sample image url" do + strategy_should_work( + "https://img3.gelbooru.com/samples/04/f2/sample_04f2767c64593c3030ce74ecc2528704.jpg", + image_urls: ["https://img3.gelbooru.com/images/04/f2/04f2767c64593c3030ce74ecc2528704.jpg"], + artist_name: "灰色灰烬bot", + profile_url: "https://www.pixiv.net/users/3330425", + tags: %w[1girl back_bow bangs black_pantyhose blue_bow blue_hair blue_ribbon boots bow cape chibi chinese_commentary closed_eyes full_body hair_between_eyes hair_ribbon hat hatsune_miku indai_(3330425) on_ground pantyhose pom_pom_(clothes) rabbit rabbit_yukine rating:general ribbon simple_background sitting solo twintails vocaloid white_background white_cape white_headwear witch_hat yuki_miku yuki_miku_(2014) 初音ミク 雪ミク], + artist_commentary_title: "2010~2021雪ミク", + artist_commentary_desc: "动作参考@速写班长", + download_size: 480_621, + ) + end + + context "A Gelbooru page url" do + strategy_should_work( + "https://gelbooru.com/index.php?page=post&s=view&id=7798121", + image_urls: ["https://img3.gelbooru.com/images/04/f2/04f2767c64593c3030ce74ecc2528704.jpg"], + artist_name: "灰色灰烬bot", + profile_url: "https://www.pixiv.net/users/3330425", + tags: %w[1girl back_bow bangs black_pantyhose blue_bow blue_hair blue_ribbon boots bow cape chibi chinese_commentary closed_eyes full_body hair_between_eyes hair_ribbon hat hatsune_miku indai_(3330425) on_ground pantyhose pom_pom_(clothes) rabbit rabbit_yukine rating:general ribbon simple_background sitting solo twintails vocaloid white_background white_cape white_headwear witch_hat yuki_miku yuki_miku_(2014) 初音ミク 雪ミク], + artist_commentary_title: "2010~2021雪ミク", + artist_commentary_desc: "动作参考@速写班长", + download_size: 480_621, + ) + end + end +end