diff --git a/app/logical/source/url.rb b/app/logical/source/url.rb index da38b5cd1..cb5f6e5ba 100644 --- a/app/logical/source/url.rb +++ b/app/logical/source/url.rb @@ -22,6 +22,7 @@ module Source Source::URL::HentaiFoundry, Source::URL::Lofter, Source::URL::Mastodon, + Source::URL::Moebooru, Source::URL::Newgrounds, Source::URL::Plurk, Source::URL::Skeb, diff --git a/app/logical/source/url/moebooru.rb b/app/logical/source/url/moebooru.rb new file mode 100644 index 000000000..5b75a07ba --- /dev/null +++ b/app/logical/source/url/moebooru.rb @@ -0,0 +1,150 @@ +# frozen_string_literal: true + +# Original images: +# +# * https://yande.re/image/b4b1d11facd1700544554e4805d47bb6/.png +# * https://files.yande.re/image/e4c2ba38de88ff1640aaebff84c84e81/469784.jpg +# * https://files.yande.re/image/2a5d1d688f565cb08a69ecf4e35017ab/yande.re%20349790%20breast_hold%20kurashima_tomoyasu%20mahouka_koukou_no_rettousei%20naked%20nipples.jpg +# * https://ayase.yande.re/image/2d0d229fd8465a325ee7686fcc7f75d2/yande.re%20192481%20animal_ears%20bunny_ears%20garter_belt%20headphones%20mitha%20stockings%20thighhighs.jpg +# * https://yuno.yande.re/image/1764b95ae99e1562854791c232e3444b/yande.re%20281544%20cameltoe%20erect_nipples%20fundoshi%20horns%20loli%20miyama-zero%20sarashi%20sling_bikini%20swimsuits.jpg +# * https://konachan.com/image/5d633771614e4bf5c17df19a0f0f333f/Konachan.com%20-%20270807%20black_hair%20bokuden%20clouds%20grass%20landscape%20long_hair%20original%20phone%20rope%20scenic%20seifuku%20skirt%20sky%20summer%20torii%20tree.jpg +# +# Jpeg sample images (full size is .png): +# +# * https://yande.re/jpeg/22577d2344fe694cf47f80563031b3cd.jpg +# * https://yande.re/jpeg/0c9ec0ffcaa40470093cb44c3fd40056/yande.re%2064649%20animal_ears%20cameltoe%20fixme%20nekomimi%20nipples%20ryohka%20school_swimsuit%20see_through%20shiraishi_nagomi%20suzuya%20swimsuits%20tail%20thighhighs.jpg +# * https://konachan.com/jpeg/e2e2994bae738ff52fff7f4f50b069d5/Konachan.com%20-%20270803%20banishment%20bicycle%20grass%20group%20male%20night%20original%20rooftop%20scenic%20signed%20stars%20tree.jpg +# +# Sample images (full size is .png or .jpg): +# +# * https://yande.re/sample/ceb6a12e87945413a95b90fada406f91/.jpg +# * https://files.yande.re/sample/0d79447ce2c89138146f64ba93633568/yande.re%20290757%20sample%20seifuku%20thighhighs%20tsukudani_norio.jpg +# * https://konachan.com/sample/e2e2994bae738ff52fff7f4f50b069d5/Konachan.com%20-%20270803%20sample.jpg +# +# Preview images: +# +# * https://assets.yande.re/data/preview/7e/cf/7ecfdead705d7b956b26b1d37b98d089.jpg +# * https://konachan.com/data/preview/5d/63/5d633771614e4bf5c17df19a0f0f333f.jpg +# +# Post pages: +# +# * https://yande.re/post/show/3 +# * https://konachan.com/post/show/270803/banishment-bicycle-grass-group-male-night-original + +class Source::URL::Moebooru < Source::URL + attr_reader :work_id, :md5, :sample_type, :original_file_ext + + def self.match?(url) + url.domain.in?(%w[yande.re konachan.com]) + end + + def parse + case [domain, *path_segments] + + # https://yande.re/post/show/3 + # https://konachan.com/post/show/270803/banishment-bicycle-grass-group-male-night-original + in _, "post", "show", work_id, *rest + @work_id = work_id + + # https://assets.yande.re/data/preview/7e/cf/7ecfdead705d7b956b26b1d37b98d089.jpg + # https://konachan.com/data/preview/5d/63/5d633771614e4bf5c17df19a0f0f333f.jpg + in _, "data", "preview", *subdirs, /^(\h{32})\.jpg$/ + @md5 = $1 + + # https://yande.re/sample/ceb6a12e87945413a95b90fada406f91/.jpg + # https://files.yande.re/sample/0d79447ce2c89138146f64ba93633568/yande.re%20290757%20sample%20seifuku%20thighhighs%20tsukudani_norio.jpg + # https://konachan.com/sample/e2e2994bae738ff52fff7f4f50b069d5/Konachan.com%20-%20270803%20sample.jpg + # + # https://yande.re/jpeg/0c9ec0ffcaa40470093cb44c3fd40056/yande.re%2064649%20animal_ears%20cameltoe%20fixme%20nekomimi%20nipples%20ryohka%20school_swimsuit%20see_through%20shiraishi_nagomi%20suzuya%20swimsuits%20tail%20thighhighs.jpg + # https://konachan.com/jpeg/e2e2994bae738ff52fff7f4f50b069d5/Konachan.com%20-%20270803%20banishment%20bicycle%20grass%20group%20male%20night%20original%20rooftop%20scenic%20signed%20stars%20tree.jpg + # + # https://yande.re/image/b4b1d11facd1700544554e4805d47bb6/.png + # https://files.yande.re/image/2a5d1d688f565cb08a69ecf4e35017ab/yande.re%20349790%20breast_hold%20kurashima_tomoyasu%20mahouka_koukou_no_rettousei%20naked%20nipples.jpg + # https://ayase.yande.re/image/2d0d229fd8465a325ee7686fcc7f75d2/yande.re%20192481%20animal_ears%20bunny_ears%20garter_belt%20headphones%20mitha%20stockings%20thighhighs.jpg + # https://yuno.yande.re/image/1764b95ae99e1562854791c232e3444b/yande.re%20281544%20cameltoe%20erect_nipples%20fundoshi%20horns%20loli%20miyama-zero%20sarashi%20sling_bikini%20swimsuits.jpg + # https://konachan.com/image/5d633771614e4bf5c17df19a0f0f333f/Konachan.com%20-%20270807%20black_hair%20bokuden%20clouds%20grass%20landscape%20long_hair%20original%20phone%20rope%20scenic%20seifuku%20skirt%20sky%20summer%20torii%20tree.jpg + # + # https://files.yande.re/image/e4c2ba38de88ff1640aaebff84c84e81/469784.jpg + in _, ("sample" | "jpeg" | "image") => sample_type, /^\h{32}$/ => md5, filename + @md5 = md5 + @work_id = parse_filename(filename) + + case sample_type + when "image" + @original_file_ext = File.extname(filename).delete_prefix(".") + when "jpeg" + @original_file_ext = "png" + end + + # https://yande.re/jpeg/22577d2344fe694cf47f80563031b3cd.jpg + # https://files.yande.re/image/22577d2344fe694cf47f80563031b3cd.png + # https://files.yande.re/sample/fb27a7ea6c48b2ef76fe915e378b9098.jpg + in _, ("sample" | "jpeg" | "image") => sample_type, /^(\h{32})\.\w+$/ => filename + @md5 = $1 + + case sample_type + when "image" + @original_file_ext = File.extname(filename).delete_prefix(".") + when "jpeg" + @original_file_ext = "png" + end + + else + end + end + + def parse_filename(filename) + basename = File.basename(filename, ".*") + + case CGI.unescape(basename).split + # yande.re 290757 sample seifuku thighhighs tsukudani_norio + # yande.re 290757 + in "yande.re", /^\d+$/ => work_id, *rest + work_id + + # Konachan.com - 270803 sample + in "Konachan.com", "-", /^\d+$/ => work_id, *rest + work_id + + # 469784 + in [/^\d+$/ => work_id] + work_id + + else + end + end + + def site_name + case domain + when "yande.re" then "Yande.re" + when "konachan.com" then "Konachan" + end + end + + def self.preview_image_url(site_name, md5) + case site_name + when "Yande.re" + "https://files.yande.re/data/preview/#{md5[0..1]}/#{md5[2..3]}/#{md5}.jpg" + when "Konachan" + "https://konachan.com/data/preview/#{md5[0..1]}/#{md5[2..3]}/#{md5}.jpg" + end + end + + def self.full_image_url(site_name, md5, file_ext, post_id = nil) + case site_name + when "Yande.re" + file_host = "files.yande.re" + filename_prefix = "yande.re%20" + when "Konachan" + file_host = "konachan.com" + filename_prefix = "Konachan.com%20-%20" + end + + # try to include the post_id so that it's saved for posterity in the canonical_url. + if post_id.present? + "https://#{file_host}/image/#{md5}/#{filename_prefix}#{post_id}.#{file_ext}" + else + "https://#{file_host}/image/#{md5}.#{file_ext}" + end + end +end diff --git a/app/logical/sources/strategies/moebooru.rb b/app/logical/sources/strategies/moebooru.rb index 7838e9ecf..eb6c50ae2 100644 --- a/app/logical/sources/strategies/moebooru.rb +++ b/app/logical/sources/strategies/moebooru.rb @@ -1,77 +1,29 @@ # frozen_string_literal: true -# Original images: -# -# * https://yande.re/image/b4b1d11facd1700544554e4805d47bb6/.png -# * https://files.yande.re/image/e4c2ba38de88ff1640aaebff84c84e81/469784.jpg -# * https://files.yande.re/image/2a5d1d688f565cb08a69ecf4e35017ab/yande.re%20349790%20breast_hold%20kurashima_tomoyasu%20mahouka_koukou_no_rettousei%20naked%20nipples.jpg -# * https://ayase.yande.re/image/2d0d229fd8465a325ee7686fcc7f75d2/yande.re%20192481%20animal_ears%20bunny_ears%20garter_belt%20headphones%20mitha%20stockings%20thighhighs.jpg -# * https://yuno.yande.re/image/1764b95ae99e1562854791c232e3444b/yande.re%20281544%20cameltoe%20erect_nipples%20fundoshi%20horns%20loli%20miyama-zero%20sarashi%20sling_bikini%20swimsuits.jpg -# * https://konachan.com/image/5d633771614e4bf5c17df19a0f0f333f/Konachan.com%20-%20270807%20black_hair%20bokuden%20clouds%20grass%20landscape%20long_hair%20original%20phone%20rope%20scenic%20seifuku%20skirt%20sky%20summer%20torii%20tree.jpg -# -# Jpeg sample images (full size is .png): -# -# * https://yande.re/jpeg/22577d2344fe694cf47f80563031b3cd.jpg -# * https://yande.re/jpeg/0c9ec0ffcaa40470093cb44c3fd40056/yande.re%2064649%20animal_ears%20cameltoe%20fixme%20nekomimi%20nipples%20ryohka%20school_swimsuit%20see_through%20shiraishi_nagomi%20suzuya%20swimsuits%20tail%20thighhighs.jpg -# * https://konachan.com/jpeg/e2e2994bae738ff52fff7f4f50b069d5/Konachan.com%20-%20270803%20banishment%20bicycle%20grass%20group%20male%20night%20original%20rooftop%20scenic%20signed%20stars%20tree.jpg -# -# Sample images (full size is .png or .jpg): -# -# * https://yande.re/sample/ceb6a12e87945413a95b90fada406f91/.jpg -# * https://files.yande.re/sample/0d79447ce2c89138146f64ba93633568/yande.re%20290757%20sample%20seifuku%20thighhighs%20tsukudani_norio.jpg -# * https://konachan.com/sample/e2e2994bae738ff52fff7f4f50b069d5/Konachan.com%20-%20270803%20sample.jpg -# -# Preview images: -# -# * https://assets.yande.re/data/preview/7e/cf/7ecfdead705d7b956b26b1d37b98d089.jpg -# * https://konachan.com/data/preview/5d/63/5d633771614e4bf5c17df19a0f0f333f.jpg -# -# Post pages: -# -# * https://yande.re/post/show/3 -# * https://konachan.com/post/show/270803/banishment-bicycle-grass-group-male-night-original - +# @see Source::URL::Moebooru module Sources module Strategies class Moebooru < Base - BASE_URL = %r{\Ahttps?://(?:[^.]+\.)?(?yande\.re|konachan\.com)}i - POST_URL = %r{#{BASE_URL}/post/show/(?\d+)}i - URL_SLUG = %r{/(?:yande\.re%20|Konachan\.com%20-%20)?(?\d+)?.*}i - IMAGE_URL = %r{#{BASE_URL}/(?image|jpeg|sample)/(?\h{32})#{URL_SLUG}?\.(?jpg|jpeg|png|gif)\z}i - delegate :artist_name, :profile_url, :tag_name, :artist_commentary_title, :artist_commentary_desc, :dtext_artist_commentary_title, :dtext_artist_commentary_desc, to: :sub_strategy, allow_nil: true + delegate :site_name, :domain, to: :parsed_url - def domains - ["yande.re", "konachan.com"] - end - - def site_name - urls.map { |url| url[BASE_URL, :domain] }.compact.first - end - - def image_url - # try to include the post_id so that it's saved for posterity in the canonical_url. - if post_md5.present? && file_ext.present? && post_id.present? - "https://#{file_host}/image/#{post_md5}/#{filename_prefix}#{post_id}.#{file_ext}" - elsif post_md5.present? && file_ext.present? - "https://#{file_host}/image/#{post_md5}.#{file_ext}" - else - url - end + def match? + Source::URL::Moebooru === parsed_url end def image_urls - [image_url] + return [] if post_md5.blank? || file_ext.blank? + [Source::URL::Moebooru.full_image_url(site_name, post_md5, file_ext, post_id)] end def preview_urls return image_urls if post_md5.blank? - ["https://#{file_host}/data/preview/#{post_md5[0..1]}/#{post_md5[2..3]}/#{post_md5}.jpg"] + [Source::URL::Moebooru.preview_image_url(site_name, post_md5)] end def page_url return nil if post_id.blank? - "https://#{site_name}/post/show/#{post_id}" + "https://#{domain}/post/show/#{post_id}" end def canonical_url @@ -83,15 +35,15 @@ module Sources md5 = post_md5_from_url if id.present? - "https://#{site_name}/post/show/#{id}" + "https://#{domain}/post/show/#{id}" elsif md5.present? - "https://#{site_name}/post?tags=md5:#{md5}" + "https://#{domain}/post?tags=md5:#{md5}" end end def tags api_response[:tags].to_s.split.map do |tag| - [tag, "https://#{site_name}/post?tags=#{CGI.escape(tag)}"] + [tag, "https://#{domain}/post?tags=#{CGI.escape(tag)}"] end end @@ -111,7 +63,7 @@ module Sources return {} end - response = http.cache(1.minute).get("https://#{site_name}/post.json", params: params) + response = http.cache(1.minute).get("https://#{domain}/post.json", params: params) post = response.parse.first&.with_indifferent_access post || {} end @@ -122,26 +74,9 @@ module Sources @sub_strategy ||= Sources::Strategies.find(api_response[:source], default: nil) end - def file_host - case site_name - when "yande.re" then "files.yande.re" - when "konachan.com" then "konachan.com" - end - end - - def filename_prefix - case site_name - when "yande.re" then "yande.re%20" - when "konachan.com" then "Konachan.com%20-%20" - end - end - def file_ext - if url[IMAGE_URL, :type] == "jpeg" - "png" - - elsif url[IMAGE_URL, :type] == "image" - url[IMAGE_URL, :ext] + if parsed_url.original_file_ext.present? + parsed_url.original_file_ext # file_ext is not present in konachan's api (only on yande.re) elsif api_response[:file_ext].present? @@ -153,7 +88,7 @@ module Sources # the api_response wasn't available because it's a deleted post. elsif post_md5.present? - %w[jpg png gif].find { |ext| http_exists?("https://#{site_name}/image/#{post_md5}.#{ext}") } + %w[jpg png gif].find { |ext| http_exists?("https://#{domain}/image/#{post_md5}.#{ext}") } else nil @@ -161,11 +96,11 @@ module Sources end def post_id_from_url - urls.map { |url| url[POST_URL, :id] || url[IMAGE_URL, :id] }.compact.first + parsed_url.work_id || parsed_referer&.work_id end def post_md5_from_url - urls.map { |url| url[IMAGE_URL, :md5] }.compact.first + parsed_url.md5 || parsed_referer&.md5 end def post_id diff --git a/test/unit/sources/moebooru_test.rb b/test/unit/sources/moebooru_test.rb index cffb05f52..51cb58fe8 100644 --- a/test/unit/sources/moebooru_test.rb +++ b/test/unit/sources/moebooru_test.rb @@ -27,9 +27,9 @@ module Sources end end - context "A 'https://assets.yande.re/preview/:hh/:hh/:file.jpg' preview url" do + context "A 'https://files.yande.re/preview/:hh/:hh/:file.jpg' preview url" do should "return a non-empty list of preview_urls" do - url = "https://assets.yande.re/data/preview/7c/d1/7cd124fc28203233cce3bade26651d43.jpg" + url = "https://files.yande.re/data/preview/7c/d1/7cd124fc28203233cce3bade26651d43.jpg" site = Sources::Strategies.find(url) assert_equal([url], site.preview_urls) @@ -45,7 +45,7 @@ module Sources @tags = ["bayashiko", "journey_to_the_west", "sun_wukong"] @size = 362_554 @profile_url = "https://twitter.com/apononori" - @data = { site_name: "yande.re", preview_url: @prev, image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: @profile_url } + @data = { site_name: "Yande.re", preview_url: @prev, image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: @profile_url } assert_source_data_equals(@samp, **@data) assert_source_data_equals(@full, **@data) @@ -62,7 +62,7 @@ module Sources @page = "https://yande.re/post/show/398018" @tags = ["misaki_kurehito", "saenai_heroine_no_sodatekata", "sawamura_spencer_eriri", "detexted", "thighhighs"] @size = 9_118_998 - @data = { site_name: "yande.re", preview_url: @prev, image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: nil } + @data = { site_name: "Yande.re", preview_url: @prev, image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: nil } assert_source_data_equals(@samp, **@data) assert_source_data_equals(@jpeg, **@data) @@ -79,7 +79,7 @@ module Sources @full = "https://files.yande.re/image/fb27a7ea6c48b2ef76fe915e378b9098.png" @tags = [] @size = 9_118_998 - @data = { site_name: "yande.re", preview_url: @prev, image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: nil } + @data = { site_name: "Yande.re", preview_url: @prev, image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: nil } assert_source_data_equals(@samp, **@data) assert_source_data_equals(@jpeg, **@data) @@ -104,7 +104,7 @@ module Sources ] @profile_url = "https://www.pixiv.net/users/22528152" - @data = { site_name: "konachan.com", preview_url: @prev, image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: @profile_url } + @data = { site_name: "Konachan", preview_url: @prev, image_url: @full, page_url: @page, size: @size, tags: @tags, profile_url: @profile_url } assert_source_data_equals(@samp, **@data) assert_source_data_equals(@jpeg, **@data) assert_source_data_equals(@full, **@data)