diff --git a/app/logical/sources/strategies/moebooru.rb b/app/logical/sources/strategies/moebooru.rb index 8218eee3b..55c2773ab 100644 --- a/app/logical/sources/strategies/moebooru.rb +++ b/app/logical/sources/strategies/moebooru.rb @@ -1,7 +1,40 @@ +# Original images: +# +# * https://yande.re/image/b4b1d11facd1700544554e4805d47bb6/.png +# * https://files.yande.re/image/2a5d1d688f565cb08a69ecf4e35017ab/yande.re%20349790%20breast_hold%20kurashima_tomoyasu%20mahouka_koukou_no_rettousei%20naked%20nipples.jpg +# * https://ayase.yande.re/image/2d0d229fd8465a325ee7686fcc7f75d2/yande.re%20192481%20animal_ears%20bunny_ears%20garter_belt%20headphones%20mitha%20stockings%20thighhighs.jpg +# * https://yuno.yande.re/image/1764b95ae99e1562854791c232e3444b/yande.re%20281544%20cameltoe%20erect_nipples%20fundoshi%20horns%20loli%20miyama-zero%20sarashi%20sling_bikini%20swimsuits.jpg +# * https://konachan.com/image/5d633771614e4bf5c17df19a0f0f333f/Konachan.com%20-%20270807%20black_hair%20bokuden%20clouds%20grass%20landscape%20long_hair%20original%20phone%20rope%20scenic%20seifuku%20skirt%20sky%20summer%20torii%20tree.jpg +# +# Jpeg sample images (full size is .png): +# +# * https://yande.re/jpeg/22577d2344fe694cf47f80563031b3cd.jpg +# * https://yande.re/jpeg/0c9ec0ffcaa40470093cb44c3fd40056/yande.re%2064649%20animal_ears%20cameltoe%20fixme%20nekomimi%20nipples%20ryohka%20school_swimsuit%20see_through%20shiraishi_nagomi%20suzuya%20swimsuits%20tail%20thighhighs.jpg +# * https://konachan.com/jpeg/e2e2994bae738ff52fff7f4f50b069d5/Konachan.com%20-%20270803%20banishment%20bicycle%20grass%20group%20male%20night%20original%20rooftop%20scenic%20signed%20stars%20tree.jpg +# +# Sample images (full size is .png or .jpg): +# +# * https://yande.re/sample/ceb6a12e87945413a95b90fada406f91/.jpg +# * https://files.yande.re/sample/0d79447ce2c89138146f64ba93633568/yande.re%20290757%20sample%20seifuku%20thighhighs%20tsukudani_norio.jpg +# * https://konachan.com/sample/e2e2994bae738ff52fff7f4f50b069d5/Konachan.com%20-%20270803%20sample.jpg +# +# Preview images: +# +# * https://assets.yande.re/data/preview/7e/cf/7ecfdead705d7b956b26b1d37b98d089.jpg +# * https://konachan.com/data/preview/5d/63/5d633771614e4bf5c17df19a0f0f333f.jpg +# +# Post pages: +# +# * https://yande.re/post/show/3 +# * https://konachan.com/post/show/270803/banishment-bicycle-grass-group-male-night-original + module Sources module Strategies class Moebooru < Base BASE_URL = %r!\Ahttps?://(?:[^.]+\.)?(?yande\.re|konachan\.com)!i + POST_URL = %r!#{BASE_URL}/post/show/(?\d+)!i + URL_SLUG = %r!/(?:yande\.re%20|Konachan\.com%20-%20)(?\d+).*!i + IMAGE_URL = %r!#{BASE_URL}/(?image|jpeg|sample)/(?\h{32})#{URL_SLUG}?\.(?jpg|jpeg|png|gif)\z!i def self.match?(*urls) urls.compact.any? { |x| x.match?(BASE_URL) } @@ -12,11 +45,14 @@ module Sources end def image_url - if url =~ %r{\A(?#{BASE_URL})/jpeg/(?\h+(?:/.*)?)\.jpg\Z} - return $~[:base] + "/image/" + $~[:md5] + ".png" + # try to include the post_id so that it's saved for posterity in the canonical_url. + if post_md5.present? && file_ext.present? && post_id.present? + "https://#{file_host}/image/#{post_md5}/#{filename_prefix}#{post_id}.#{file_ext}" + elsif post_md5.present? && file_ext.present? + "https://#{file_host}/image/#{post_md5}.#{file_ext}" + else + url end - - return url end def image_urls @@ -24,7 +60,8 @@ module Sources end def page_url - return url + return nil if post_id.blank? + "https://#{site_name}/post/show/#{post_id}" end def canonical_url @@ -32,11 +69,84 @@ module Sources end def profile_url - return url + nil end def artist_name - return "" + nil + end + + # Moebooru returns an empty array when doing an md5: search for a + # deleted post. Because of this, api_response may be empty in some cases. + def api_response + if post_id_from_url.present? + params = { tags: "id:#{post_id_from_url}" } + elsif post_md5_from_url.present? + params = { tags: "md5:#{post_md5_from_url}" } + else + return {} + end + + body, code = HttpartyCache.get("/post.json", base_uri: "https://#{site_name}", params: params) + post = JSON.parse(body, symbolize_names: true).first + post || {} + end + memoize :api_response + + concerning :HelperMethods do + def file_host + case site_name + when "yande.re" then "files.yande.re" + when "konachan.com" then "konachan.com" + end + end + + def filename_prefix + case site_name + when "yande.re" then "yande.re%20" + when "konachan.com" then "Konachan.com%20-%20" + end + end + + def file_ext + if url[IMAGE_URL, :type] == "jpeg" + "png" + + elsif url[IMAGE_URL, :type] == "image" + url[IMAGE_URL, :ext] + + # file_ext is not present in konachan's api (only on yande.re) + elsif api_response[:file_ext].present? + api_response[:file_ext] + + # file_url is not present in yande.re's api on deleted posts + elsif api_response[:file_url].present? + api_response[:file_url][/\.(jpg|jpeg|png|gif)\z/i, 1] + + # the api_response wasn't available because it's a deleted post. + elsif post_md5.present? + %w[jpg png gif].find { |ext| http_exists?("https://#{site_name}/image/#{post_md5}.#{ext}", headers) } + + else + nil + end + end + + def post_id_from_url + urls.map { |url| url[POST_URL, :id] || url[IMAGE_URL, :id] }.compact.first + end + + def post_md5_from_url + urls.map { |url| url[IMAGE_URL, :md5] }.compact.first + end + + def post_id + post_id_from_url || api_response[:id] + end + + def post_md5 + post_md5_from_url || api_response[:md5] + end end end end diff --git a/test/unit/sources/moebooru_test.rb b/test/unit/sources/moebooru_test.rb index 0b5b68e86..e0ed0b333 100644 --- a/test/unit/sources/moebooru_test.rb +++ b/test/unit/sources/moebooru_test.rb @@ -2,23 +2,83 @@ require "test_helper" module Sources class MoebooruTest < ActiveSupport::TestCase + def assert_source_data_equals(url, referer = nil, site_name: nil, image_url: nil, page_url: nil, size: nil) + site = Sources::Strategies.find(url) + + assert_equal(site_name, site.site_name) + assert_equal(image_url, site.image_url) + assert_equal([image_url], site.image_urls) + assert_equal(image_url, site.canonical_url) + assert_equal(page_url, site.page_url) if page_url.present? + assert_equal(size, site.size) + assert_nothing_raised { site.to_h } + end + context "Yande.re:" do context "A 'https://yande.re/jpeg/:hash/:file.jpg' jpeg sample url" do should "download the original file" do @source = "https://yande.re/jpeg/2c6876ac2317fce617e3c5f1a642123b/yande.re%20292092%20hatsune_miku%20tid%20vocaloid.jpg" - @rewrite = "https://yande.re/image/2c6876ac2317fce617e3c5f1a642123b/yande.re%20292092%20hatsune_miku%20tid%20vocaloid.png" + @rewrite = "https://files.yande.re/image/2c6876ac2317fce617e3c5f1a642123b/yande.re%20292092.png" assert_rewritten(@rewrite, @source) assert_downloaded(1_050_117, @source) end end - context "A 'https://files.yande.re/sample/:hash/:file.jpg' sample url" do + context "Fetching data for an active yande.re .jpg post" do should "work" do - @site = Sources::Strategies.find("https://files.yande.re/sample/7ecfdead705d7b956b26b1d37b98d089/yande.re%20482880%20sample%20bayashiko%20journey_to_the_west%20sun_wukong.jpg") + @samp = "https://files.yande.re/sample/7ecfdead705d7b956b26b1d37b98d089/yande.re%20482880%20sample%20bayashiko%20journey_to_the_west%20sun_wukong.jpg" + @full = "https://files.yande.re/image/7ecfdead705d7b956b26b1d37b98d089/yande.re%20482880.jpg" + @page = "https://yande.re/post/show/482880" + @size = 362_554 - assert_equal("yande.re", @site.site_name) - assert_equal(@site.image_url, @site.canonical_url) - assert_nothing_raised { @site.to_h } + assert_source_data_equals(@samp, site_name: "yande.re", image_url: @full, page_url: @page, size: @size) + assert_source_data_equals(@full, site_name: "yande.re", image_url: @full, page_url: @page, size: @size) + assert_source_data_equals(@page, site_name: "yande.re", image_url: @full, page_url: @page, size: @size) + end + end + + context "Fetching data for a deleted yande.re .png post with the post id" do + should "work" do + @samp = "https://files.yande.re/sample/fb27a7ea6c48b2ef76fe915e378b9098/yande.re%20398018%20detexted%20misaki_kurehito%20saenai_heroine_no_sodatekata%20sawamura_spencer_eriri%20thighhighs.jpg" + @jpeg = "https://files.yande.re/sample/fb27a7ea6c48b2ef76fe915e378b9098/yande.re%20398018%20detexted%20misaki_kurehito%20saenai_heroine_no_sodatekata%20sawamura_spencer_eriri%20thighhighs.jpg" + @full = "https://files.yande.re/image/fb27a7ea6c48b2ef76fe915e378b9098/yande.re%20398018.png" + @page = "https://yande.re/post/show/398018" + @size = 9_118_998 + + assert_source_data_equals(@samp, site_name: "yande.re", image_url: @full, page_url: @page, size: @size) + assert_source_data_equals(@jpeg, site_name: "yande.re", image_url: @full, page_url: @page, size: @size) + assert_source_data_equals(@full, site_name: "yande.re", image_url: @full, page_url: @page, size: @size) + assert_source_data_equals(@page, site_name: "yande.re", image_url: @full, page_url: @page, size: @size) + end + end + + context "Fetching data for a deleted yande.re .png post without the post id" do + should "work" do + @samp = "https://files.yande.re/sample/fb27a7ea6c48b2ef76fe915e378b9098.jpg" + @jpeg = "https://files.yande.re/jpeg/fb27a7ea6c48b2ef76fe915e378b9098.jpg" + @full = "https://files.yande.re/image/fb27a7ea6c48b2ef76fe915e378b9098.png" + @size = 9_118_998 + + assert_source_data_equals(@samp, site_name: "yande.re", image_url: @full, size: @size) + assert_source_data_equals(@jpeg, site_name: "yande.re", image_url: @full, size: @size) + assert_source_data_equals(@full, site_name: "yande.re", image_url: @full, size: @size) + end + end + end + + context "Konachan.com:" do + context "Fetching data for an active konachan.com .png post" do + should "work" do + @samp = "https://konachan.com/sample/ca12cdb79a66d242e95a6f958341bf05/Konachan.com%20-%20270916%20sample.jpg" + @jpeg = "https://konachan.com/jpeg/ca12cdb79a66d242e95a6f958341bf05/Konachan.com%20-%20270916%20anthropomorphism%20bed%20blonde_hair%20bow%20brown_eyes%20doll%20girls_frontline%20hara_shoutarou%20hoodie%20long_hair%20pantyhose%20scar%20skirt%20twintails.jpg" + @full = "https://konachan.com/image/ca12cdb79a66d242e95a6f958341bf05/Konachan.com%20-%20270916.png" + @page = "https://konachan.com/post/show/270916" + @size = 8_167_593 + + assert_source_data_equals(@samp, site_name: "konachan.com", image_url: @full, page_url: @page, size: @size) + assert_source_data_equals(@jpeg, site_name: "konachan.com", image_url: @full, page_url: @page, size: @size) + assert_source_data_equals(@full, site_name: "konachan.com", image_url: @full, page_url: @page, size: @size) + assert_source_data_equals(@page, site_name: "konachan.com", image_url: @full, page_url: @page, size: @size) end end end