diff --git a/app/logical/danbooru/http/xml_adapter.rb b/app/logical/danbooru/http/xml_adapter.rb index 002841612..bff48f301 100644 --- a/app/logical/danbooru/http/xml_adapter.rb +++ b/app/logical/danbooru/http/xml_adapter.rb @@ -3,6 +3,7 @@ module Danbooru class Http class XmlAdapter < HTTP::MimeType::Adapter + HTTP::MimeType.register_adapter "text/xml", self HTTP::MimeType.register_adapter "application/xml", self HTTP::MimeType.register_alias "application/xml", :xml diff --git a/app/logical/source/extractor/gelbooru.rb b/app/logical/source/extractor/gelbooru.rb index e8dd371a2..b3925bcf3 100644 --- a/app/logical/source/extractor/gelbooru.rb +++ b/app/logical/source/extractor/gelbooru.rb @@ -1,11 +1,11 @@ # frozen_string_literal: true -# Source extractor for Gelbooru. The commentary and artist information are -# pulled from the Gelbooru post's source, while the translated tags include -# both the Gelbooru tags and the source's tags. +# Source extractor for Gelbooru and Safebooru.org. The commentary and artist information are pulled from the +# booru post's source, while the translated tags include both the booru tags and the source's tags. # # @see Source::URL::Gelbooru # @see https://gelbooru.com/index.php?page=wiki&s=view&id=18780 (howto:api) +# @see https://safebooru.org/index.php?page=help&topic=dapi module Source class Extractor class Gelbooru < Source::Extractor @@ -16,23 +16,27 @@ module Source end def image_urls - [api_response[:file_url]].compact + if parsed_url.full_image_url.present? + [parsed_url.full_image_url] + else + [api_response[:file_url]].compact + end end def page_url - "https://gelbooru.com/index.php?page=post&s=view&id=#{post_id}" if post_id.present? + "https://#{domain}/index.php?page=post&s=view&id=#{post_id}" if post_id.present? end def tags - gelbooru_tags + source_tags + site_tags + source_tags end - def gelbooru_tags + def site_tags return [] if api_response.blank? tags = api_response[:tags].split + ["rating:#{api_response[:rating]}"] tags.map do |tag| - [tag, "https://gelbooru.com/index.php?page=post&s=list&tags=#{CGI.escape(tag)}"] + [tag, "https://#{domain}/index.php?page=post&s=list&tags=#{CGI.escape(tag)}"] end end @@ -44,13 +48,18 @@ module Source sub_extractor&.other_names.to_a end + def domain + parsed_url.domain + end + def post_id - parsed_url.post_id || parsed_referer&.post_id || post_id_from_md5 + parsed_url.post_id || parsed_referer&.post_id || api_response[:id] end def api_url - # https://gelbooru.com/index.php?page=dapi&s=post&q=index&id=779812&json=1 - "https://gelbooru.com/index.php?page=dapi&s=post&q=index&id=#{post_id}&json=1" if post_id.present? + # https://gelbooru.com//index.php?page=dapi&s=post&q=index&tags=md5:338078144fe77c9e5f35dbb585e749ec + # https://gelbooru.com//index.php?page=dapi&s=post&q=index&tags=id:7903922 + parsed_url.api_url || parsed_referer&.api_url end memoize def api_response @@ -59,20 +68,11 @@ module Source response = http.cache(1.minute).get(api_url) return {} unless response.status == 200 - response.parse["post"]&.first.to_h.with_indifferent_access - end - - memoize def post_id_from_md5 - return nil unless parsed_url.page_url.present? - - response = http.cache(1.minute).head(parsed_url.page_url) - return nil unless response.status == 200 - - Source::URL.parse(response.uri).post_id + response.parse.dig("posts", "post").to_h.with_indifferent_access end def sub_extractor - return nil if api_response[:source].nil? + return nil if !api_response[:source].to_s.match?(%r{\Ahttps?://}i) @sub_extractor ||= Source::Extractor.find(api_response[:source], default: nil) end end diff --git a/app/logical/source/url/gelbooru.rb b/app/logical/source/url/gelbooru.rb index acbc882a1..98dac8227 100644 --- a/app/logical/source/url/gelbooru.rb +++ b/app/logical/source/url/gelbooru.rb @@ -1,10 +1,11 @@ # frozen_string_literal: true +# This covers both Gelbooru and Safebooru. class Source::URL::Gelbooru < Source::URL - attr_reader :post_id, :md5, :full_image_url + attr_reader :post_id, :md5, :image_type, :full_image_url def self.match?(url) - url.domain.in?(%w[gelbooru.com]) + url.domain.in?(%w[safebooru.org gelbooru.com]) end def parse @@ -12,31 +13,50 @@ class Source::URL::Gelbooru < Source::URL # https://gelbooru.com/index.php?page=post&s=view&id=7798045 # https://www.gelbooru.com/index.php?page=post&s=view&id=7798045 - in "gelbooru.com", "index.php" if params[:page] == "post" && params[:s] == "view" && params[:id].present? + # https://safebooru.org/index.php?page=post&s=view&id=4196948 + in _, "index.php" if params[:page] == "post" && params[:s] == "view" && params[:id].present? @post_id = params[:id].to_i # https://gelbooru.com/index.php?page=post&s=list&md5=99d9977d6c3aa185083a2da22bd8acfb - in "gelbooru.com", "index.php" if params[:page] == "post" && params[:s] == "list" && params[:md5].present? + # https://safebooru.org/index.php?page=post&s=list&md5=99d9977d6c3aa185083a2da22bd8acfb + in _, "index.php" if params[:page] == "post" && params[:s] == "list" && params[:md5].present? @md5 = params[:md5] # https://gelbooru.com/index.php?page=dapi&s=post&q=index&id=7798045&json=1 - in "gelbooru.com", "index.php" if params[:page] == "dapi" && params[:q] == "index" && params[:id].present? + # https://safebooru.org/index.php?page=dapi&s=post&q=index&id=4196948&json=1 + in _, "index.php" if params[:page] == "dapi" && params[:q] == "index" && params[:id].present? @post_id = params[:id].to_i # https://gelbooru.com//images/ee/5c/ee5c9a69db9602c95debdb9b98fb3e3e.jpeg - # https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg # https://video-cdn3.gelbooru.com/images/62/95/6295154d082f04009160261b90e7176e.mp4 # https://img2.gelbooru.com//images/a9/64/a96478bbf9bc3f0584f2b5ddf56025fa.webm + # https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg # https://gelbooru.com/thumbnails/08/06/thumbnail_08066c138e7e138a47489a0934c29156.jpg - in "gelbooru.com", ("images" | "samples" | "thumbnails"), h1, h2, /\A(?:\w+_)?(\h{32})\.(jpeg|jpg|png|gif|mp4|webm)\z/i + in _, ("images" | "samples" | "thumbnails") => image_type, /\A\h{2}\z/ => h1, /\A\h{2}\z/ => h2, /\A(?:sample_|thumbnail_)?(\h{32})\.\w+\z/i @md5 = $1 - @full_image_url = "https://#{host}/images/#{h1}/#{h2}/#{md5}.#{file_ext}" + @image_type = image_type + @full_image_url = url.to_s if image_type == "images" - # http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png # http://simg2.gelbooru.com//samples/619/sample_fe84fb3f86020e120f4b4712fcbd3abf.jpeg?755046 - in "gelbooru.com", ("images" | "samples"), /\A\d+\z/ => dir, /\A(?:\w+_)?(\h{32})\.(jpeg|jpg|png|gif|mp4|webm)/i + # http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png + # https://safebooru.org//images/4016/64779fbfc87020ed5fd94854fe973bc0.jpeg + # https://safebooru.org//samples/4016/sample_64779fbfc87020ed5fd94854fe973bc0.jpg?4196692 + # https://safebooru.org/thumbnails/4016/thumbnail_64779fbfc87020ed5fd94854fe973bc0.jpg?4196692 + in _, ("images" | "samples" | "thumbnails") => image_type, /\A\d+\z/ => directory, /\A(?:sample_|thumbnail_)?(\h{32})\.\w+\z/ @md5 = $1 - @full_image_url = url + @post_id = query if query&.match?(/\A\d+\z/) + @image_type = image_type + @full_image_url = url.to_s if image_type == "images" + + # Safebooru uses an unknown 40-byte hash for most image URLs. + # https://safebooru.org//images/4016/d2f50befcdc304cbd9030f2d0832029f5fe8cccc.png + # https://safebooru.org//samples/4016/sample_ffc6c5705d31422ddbaa7478deb560c985d2ee71.jpg?4196970 + # https://safebooru.org/thumbnails/4016/thumbnail_8d0664867c59acb3103bccd9a9a5562a193eadcd.jpg?4196980 + in "safebooru.org", ("images" | "samples" | "thumbnails") => image_type, /\A\d+\z/ => directory, /\A(?:sample_|thumbnail_)?(\h{40})\.\w+\z/ + @hash = $1 + @post_id = query if query&.match?(/\A\d+\z/) + @image_type = image_type + @full_image_url = url.to_s if image_type == "images" else nil @@ -44,18 +64,27 @@ class Source::URL::Gelbooru < Source::URL end def image_url? - full_image_url.present? + image_type.present? end def page_url if post_id.present? - "https://gelbooru.com/index.php?page=post&s=view&id=#{post_id}" + "https://#{domain}/index.php?page=post&s=view&id=#{post_id}" elsif md5.present? - "https://gelbooru.com/index.php?page=post&s=list&md5=#{md5}" + "https://#{domain}/index.php?page=post&s=list&md5=#{md5}" end end def api_url - "https://gelbooru.com/index.php?page=dapi&s=post&q=index&id=#{post_id}&json=1" if post_id.present? + # https://gelbooru.com//index.php?page=dapi&s=post&q=index&tags=id:7903922 + # https://safebooru.org/index.php?page=dapi&s=post&q=index&tags=id:4197087 + if post_id.present? + # "https://#{domain}/index.php?page=dapi&s=post&q=index&id=#{post_id}&json=1" + "https://#{domain}/index.php?page=dapi&s=post&q=index&tags=id:#{post_id}" + # https://gelbooru.com//index.php?page=dapi&s=post&q=index&tags=md5:338078144fe77c9e5f35dbb585e749ec + # https://safebooru.org/index.php?page=dapi&s=post&q=index&tags=md5:8c1fe66ff46d03725caa30135ad70e7e + elsif md5.present? + "https://#{domain}/index.php?page=dapi&s=post&q=index&tags=md5:#{md5}" + end end end diff --git a/test/functional/uploads_controller_test.rb b/test/functional/uploads_controller_test.rb index c000b65d1..4d8b5baba 100644 --- a/test/functional/uploads_controller_test.rb +++ b/test/functional/uploads_controller_test.rb @@ -441,6 +441,7 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest should_upload_successfully("https://konachan.com/image/ca12cdb79a66d242e95a6f958341bf05/Konachan.com%20-%20270916.png") should_upload_successfully("https://gelbooru.com/index.php?page=post&s=view&id=7798121") + should_upload_successfully("https://safebooru.org/index.php?page=post&s=view&id=4189916") should_upload_successfully("http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663") should_upload_successfully("http://seiga.nicovideo.jp/seiga/im4937663") diff --git a/test/unit/sources/gelbooru_test.rb b/test/unit/sources/gelbooru_test.rb index d8d043cd8..afc49fe5d 100644 --- a/test/unit/sources/gelbooru_test.rb +++ b/test/unit/sources/gelbooru_test.rb @@ -101,5 +101,90 @@ module Sources assert_equal("https://gelbooru.com/index.php?page=post&s=list&md5=edd1d2b3881cf70c3acf540780507531", Source::URL.page_url(source2)) assert_equal("https://gelbooru.com/index.php?page=post&s=list&md5=0b3ae5e225072b8e391c827cb470d29c", Source::URL.page_url(source3)) end + + context "Safebooru:" do + # source: https://i.pximg.net/img-original/img/2021/10/24/09/53/44/93646177_p0.jpg + context "A https://safebooru.org/images/$dir/$hash.jpg?$post_id URL without a referer" do + strategy_should_work( + "https://safebooru.org//images/4010/febe33d5f6d46e21c073289bb9884d4e0630761c.jpg?4189916", + image_urls: ["https://safebooru.org//images/4010/febe33d5f6d46e21c073289bb9884d4e0630761c.jpg?4189916"], + artist_name: "チー之介", + profile_url: "https://www.pixiv.net/users/57673194", + tags: %w[1girl ^^^ animal_ears black_bow black_bowtie black_gloves black_hair black_hairband black_skirt black_wings blue_flower blue_rose bow bowtie brooch center_frills changing_room cheesecake_(artist) collared_shirt commentary dated demon_wings fangs flower flying_sweatdrops frilled_hairband frilled_sleeves frills frown gloves hair_flower hair_ornament hair_over_one_eye hairband halloween halloween_costume high-waist_skirt highres horse_ears horse_girl indoors jack-o'-lantern_ornament jewelry lace-trimmed_gloves lace_trim long_hair looking_at_viewer make_up_in_halloween!_(umamusume) official_alternate_costume open_mouth orange_bow puffy_short_sleeves puffy_sleeves rice_shower_(make_up_vampire!)_(umamusume) rice_shower_(umamusume) rose shirt short_sleeves skirt skirt_bow solo spider_web_print standing star_ornament twitter_username umamusume violet_eyes white_shirt wings rating:q ウマ娘 ライスシャワー ハロウィンイラスト 二次創作 ウマ娘プリティーダービー ライスシャワー(ウマ娘) 更衣室 Make_up_Vampire! ドラキュライス 困り顔], + artist_commentary_title: "ハロウィンライス", + artist_commentary_desc: "更衣室でハロウィン衣装に着替えたあと「がおーっ!」のポーズを鏡の前で密かに練習してたら、見つかっちゃってあわてるライスシャワーを描きました。", + download_size: 771_175, + ) + end + + # source: https://i.pximg.net/img-original/img/2021/10/24/09/53/44/93646177_p0.jpg + context "A https://safebooru.org/images/$dir/$hash.jpg URL without a referer" do + strategy_should_work( + "https://safebooru.org//images/4010/febe33d5f6d46e21c073289bb9884d4e0630761c.jpg", + image_urls: ["https://safebooru.org//images/4010/febe33d5f6d46e21c073289bb9884d4e0630761c.jpg"], + artist_name: nil, + profile_url: nil, + tags: [], + artist_commentary_title: nil, + artist_commentary_desc: nil, + ) + end + + # source: https://i.pximg.net/img-original/img/2021/10/24/09/53/44/93646177_p0.jpg + context "A https://safebooru.org/images/$dir/$hash.jpg URL with a referer" do + strategy_should_work( + "https://safebooru.org//images/4010/febe33d5f6d46e21c073289bb9884d4e0630761c.jpg", + referer: "https://safebooru.org/index.php?page=post&s=view&id=4189916", + image_urls: ["https://safebooru.org//images/4010/febe33d5f6d46e21c073289bb9884d4e0630761c.jpg"], + artist_name: "チー之介", + profile_url: "https://www.pixiv.net/users/57673194", + tags: %w[1girl ^^^ animal_ears black_bow black_bowtie black_gloves black_hair black_hairband black_skirt black_wings blue_flower blue_rose bow bowtie brooch center_frills changing_room cheesecake_(artist) collared_shirt commentary dated demon_wings fangs flower flying_sweatdrops frilled_hairband frilled_sleeves frills frown gloves hair_flower hair_ornament hair_over_one_eye hairband halloween halloween_costume high-waist_skirt highres horse_ears horse_girl indoors jack-o'-lantern_ornament jewelry lace-trimmed_gloves lace_trim long_hair looking_at_viewer make_up_in_halloween!_(umamusume) official_alternate_costume open_mouth orange_bow puffy_short_sleeves puffy_sleeves rice_shower_(make_up_vampire!)_(umamusume) rice_shower_(umamusume) rose shirt short_sleeves skirt skirt_bow solo spider_web_print standing star_ornament twitter_username umamusume violet_eyes white_shirt wings rating:q ウマ娘 ライスシャワー ハロウィンイラスト 二次創作 ウマ娘プリティーダービー ライスシャワー(ウマ娘) 更衣室 Make_up_Vampire! ドラキュライス 困り顔], + artist_commentary_title: "ハロウィンライス", + artist_commentary_desc: "更衣室でハロウィン衣装に着替えたあと「がおーっ!」のポーズを鏡の前で密かに練習してたら、見つかっちゃってあわてるライスシャワーを描きました。", + download_size: 771_175, + ) + end + + # source: https://i.pximg.net/img-original/img/2021/10/24/09/53/44/93646177_p0.jpg + context "A https://safebooru.org/index.php?page=post&s=view&id=$post_id URL" do + strategy_should_work( + "https://safebooru.org/index.php?page=post&s=view&id=4189916", + image_urls: ["https://safebooru.org/images/4010/febe33d5f6d46e21c073289bb9884d4e0630761c.jpg"], + artist_name: "チー之介", + profile_url: "https://www.pixiv.net/users/57673194", + tags: %w[1girl ^^^ animal_ears black_bow black_bowtie black_gloves black_hair black_hairband black_skirt black_wings blue_flower blue_rose bow bowtie brooch center_frills changing_room cheesecake_(artist) collared_shirt commentary dated demon_wings fangs flower flying_sweatdrops frilled_hairband frilled_sleeves frills frown gloves hair_flower hair_ornament hair_over_one_eye hairband halloween halloween_costume high-waist_skirt highres horse_ears horse_girl indoors jack-o'-lantern_ornament jewelry lace-trimmed_gloves lace_trim long_hair looking_at_viewer make_up_in_halloween!_(umamusume) official_alternate_costume open_mouth orange_bow puffy_short_sleeves puffy_sleeves rice_shower_(make_up_vampire!)_(umamusume) rice_shower_(umamusume) rose shirt short_sleeves skirt skirt_bow solo spider_web_print standing star_ornament twitter_username umamusume violet_eyes white_shirt wings rating:q ウマ娘 ライスシャワー ハロウィンイラスト 二次創作 ウマ娘プリティーダービー ライスシャワー(ウマ娘) 更衣室 Make_up_Vampire! ドラキュライス 困り顔], + artist_commentary_title: "ハロウィンライス", + artist_commentary_desc: "更衣室でハロウィン衣装に着替えたあと「がおーっ!」のポーズを鏡の前で密かに練習してたら、見つかっちゃってあわてるライスシャワーを描きました。", + download_size: 771_175, + ) + end + + # source: https://i.pximg.net/img-original/img/2021/10/24/09/53/44/93646177_p0.jpg + context "A https://safebooru.org/index.php?page=post&s=list&md5=$md5 URL" do + strategy_should_work( + "https://safebooru.org/index.php?page=post&s=list&md5=8ca0f76e014175f11085d64932d980a5", + image_urls: ["https://safebooru.org/images/4010/febe33d5f6d46e21c073289bb9884d4e0630761c.jpg"], + artist_name: "チー之介", + profile_url: "https://www.pixiv.net/users/57673194", + tags: %w[1girl ^^^ animal_ears black_bow black_bowtie black_gloves black_hair black_hairband black_skirt black_wings blue_flower blue_rose bow bowtie brooch center_frills changing_room cheesecake_(artist) collared_shirt commentary dated demon_wings fangs flower flying_sweatdrops frilled_hairband frilled_sleeves frills frown gloves hair_flower hair_ornament hair_over_one_eye hairband halloween halloween_costume high-waist_skirt highres horse_ears horse_girl indoors jack-o'-lantern_ornament jewelry lace-trimmed_gloves lace_trim long_hair looking_at_viewer make_up_in_halloween!_(umamusume) official_alternate_costume open_mouth orange_bow puffy_short_sleeves puffy_sleeves rice_shower_(make_up_vampire!)_(umamusume) rice_shower_(umamusume) rose shirt short_sleeves skirt skirt_bow solo spider_web_print standing star_ornament twitter_username umamusume violet_eyes white_shirt wings rating:q ウマ娘 ライスシャワー ハロウィンイラスト 二次創作 ウマ娘プリティーダービー ライスシャワー(ウマ娘) 更衣室 Make_up_Vampire! ドラキュライス 困り顔], + artist_commentary_title: "ハロウィンライス", + artist_commentary_desc: "更衣室でハロウィン衣装に着替えたあと「がおーっ!」のポーズを鏡の前で密かに練習してたら、見つかっちゃってあわてるライスシャワーを描きました。", + download_size: 771_175, + ) + end + + # source: https://i.pximg.net/img-original/img/2021/10/24/09/53/44/93646177_p0.jpg + context "A https://safebooru.org/images/$dir/$md5.jpg URL without a referer" do + strategy_should_work( + "https://safebooru.org//images/4016/64779fbfc87020ed5fd94854fe973bc0.jpeg", + image_urls: ["https://safebooru.org//images/4016/64779fbfc87020ed5fd94854fe973bc0.jpeg"], + artist_name: nil, + profile_url: nil, + tags: %w[brown_eyes d4dj dress long_hair pink_hair sword yano_hiiro yorha_no._2_type_b rating:s], + artist_commentary_title: nil, + artist_commentary_desc: nil, + ) + end + end end end