diff --git a/app/logical/source/extractor.rb b/app/logical/source/extractor.rb index 5bc6e9ef9..b77a2ba61 100644 --- a/app/logical/source/extractor.rb +++ b/app/logical/source/extractor.rb @@ -57,6 +57,7 @@ module Source Source::Extractor::Furaffinity, Source::Extractor::Reddit, Source::Extractor::Bilibili, + Source::Extractor::Rule34DotUs, ] # Should return true if the extractor is configured correctly. Return false diff --git a/app/logical/source/extractor/rule34_dot_us.rb b/app/logical/source/extractor/rule34_dot_us.rb new file mode 100644 index 000000000..d87597921 --- /dev/null +++ b/app/logical/source/extractor/rule34_dot_us.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +# https://rule34.us is running a modified fork of Gelbooru 0.1, so its structure is similar but not identical to that of +# other Gelbooru-based sites. +# +# @see Source::Extractor::Gelbooru +# @see Source::URL::Rule34DotUs +# @see https://rule34.us +module Source + class Extractor + class Rule34DotUs < Source::Extractor + def match? + Source::URL::Rule34DotUs === parsed_url + end + + def image_urls + if parsed_url.full_image_url.present? + [parsed_url.full_image_url] + else + image_url = page&.css(".tag-list-left > a[href*='/images/']")&.attr("href")&.value + [image_url].compact + end + end + + def page_url + "https://rule34.us/index.php?r=posts/view&id=#{post_id}" if post_id.present? + end + + def tags + page&.css("meta[name='keywords']")&.attr("content")&.value.to_s.split(/, /).compact.map do |tag| + [tag.tr(" ", "_"), "https://rule34.us/index.php?r=posts/index&q=#{CGI.escape(tag)}"] + end + end + + def post_id + parsed_url.post_id || parsed_referer&.post_id || post_id_from_page + end + + def post_id_from_page + # title = "Rule34 - If it exists, there is porn of it / sora / 6204967" + page&.title.to_s[/([0-9]+)\z/, 1] + end + + def api_url + parsed_url.page_url || parsed_referer&.page_url + end + + memoize def page + return nil if api_url.blank? + + response = http.cache(1.minute).get(api_url) + return nil unless response.status == 200 + + response.parse + end + end + end +end diff --git a/app/logical/source/url.rb b/app/logical/source/url.rb index 9da644091..9cc22d98b 100644 --- a/app/logical/source/url.rb +++ b/app/logical/source/url.rb @@ -60,6 +60,7 @@ module Source Source::URL::Anifty, Source::URL::Furaffinity, Source::URL::Bilibili, + Source::URL::Rule34DotUs, ] # Parse a URL into a subclass of Source::URL, or raise an exception if the URL is not a valid HTTP or HTTPS URL. diff --git a/app/logical/source/url/rule34_dot_us.rb b/app/logical/source/url/rule34_dot_us.rb new file mode 100644 index 000000000..3cd5391d8 --- /dev/null +++ b/app/logical/source/url/rule34_dot_us.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +# https://rule34.us is running a modified fork of Gelbooru 0.1, so its URL structure is similar but not identical to +# that of other Gelbooru-based sites. +# +# @see Source::URL::Gelbooru +class Source::URL::Rule34DotUs < Source::URL + attr_reader :post_id, :md5, :image_type, :full_image_url + + def self.match?(url) + url.domain.in?(%w[rule34.us]) + end + + def parse + case [domain, *path_segments] + + # https://rule34.us/index.php?r=posts/view&id=6204967 + in _, "index.php" if params[:r] == "posts/view" && params[:id].present? + @post_id = params[:id].to_i + + # https://rule34.us/hotlink.php?hash=236690fd962fa394edf9894450261dac + in _, "hotlink.php" if params[:hash]&.match?(/\A\h{32}\z/) + @md5 = params[:hash] + + # https://img2.rule34.us/thumbnails/23/66/thumbnail_236690fd962fa394edf9894450261dac.jpg + # https://img2.rule34.us/images/23/66/236690fd962fa394edf9894450261dac.png + # https://video.rule34.us/images/d8/1d/d81d79f0292cdb096a8653efa001342d.webm + # no samples + in _, ("images" | "thumbnails") => image_type, /\A\h{2}\z/ => h1, /\A\h{2}\z/ => h2, /\A(?:thumbnail_)?(\h{32})\.\w+\z/i + @md5 = $1 + @image_type = image_type + @full_image_url = url.to_s if image_type == "images" + + else + nil + end + end + + def image_url? + image_type.present? + end + + def page_url + if post_id.present? + "https://rule34.us/index.php?r=posts/view&id=#{post_id}" + elsif md5.present? + "https://rule34.us/hotlink.php?hash=#{md5}" + end + end +end diff --git a/test/functional/uploads_controller_test.rb b/test/functional/uploads_controller_test.rb index 3fbb6f221..0b7af84be 100644 --- a/test/functional/uploads_controller_test.rb +++ b/test/functional/uploads_controller_test.rb @@ -444,6 +444,7 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest should_upload_successfully("https://safebooru.org/index.php?page=post&s=view&id=4189916") should_upload_successfully("https://tbib.org/index.php?page=post&s=view&id=11480218") should_upload_successfully("https://rule34.xxx/index.php?page=post&s=view&id=6961597") + should_upload_successfully("https://rule34.us/index.php?r=posts/view&id=6204967") should_upload_successfully("http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663") should_upload_successfully("http://seiga.nicovideo.jp/seiga/im4937663") diff --git a/test/unit/sources/rule34_dot_us_test.rb b/test/unit/sources/rule34_dot_us_test.rb new file mode 100644 index 000000000..2020cc0a7 --- /dev/null +++ b/test/unit/sources/rule34_dot_us_test.rb @@ -0,0 +1,37 @@ +require "test_helper" + +module Sources + class Rule34DotUsTest < ActiveSupport::TestCase + context "Rule34.us:" do + context "A https://rule34.us/index.php?r=posts/view&id=$post_id URL" do + strategy_should_work( + "https://rule34.us/index.php?r=posts/view&id=6204967", + page_url: "https://rule34.us/index.php?r=posts/view&id=6204967", + image_urls: ["https://img2.rule34.us/images/23/66/236690fd962fa394edf9894450261dac.png"], + tags: %w[sora kingdom_hearts rule_63 ai_generated brown_hair female genderswap_(mtf) nai_diffusion stable_diffusion], + download_size: 503_358, + ) + end + + context "A https://rule34.us/hotlink.php?hash=$md5 URL" do + strategy_should_work( + "https://rule34.us/hotlink.php?hash=236690fd962fa394edf9894450261dac", + page_url: "https://rule34.us/index.php?r=posts/view&id=6204967", + image_urls: ["https://img2.rule34.us/images/23/66/236690fd962fa394edf9894450261dac.png"], + tags: %w[sora kingdom_hearts rule_63 ai_generated brown_hair female genderswap_(mtf) nai_diffusion stable_diffusion], + download_size: 503_358, + ) + end + + context "A https://rule34.us/images/xx/xx/$md5.png URL" do + strategy_should_work( + "https://img2.rule34.us/images/23/66/236690fd962fa394edf9894450261dac.png", + page_url: "https://rule34.us/index.php?r=posts/view&id=6204967", + image_urls: ["https://img2.rule34.us/images/23/66/236690fd962fa394edf9894450261dac.png"], + tags: %w[sora kingdom_hearts rule_63 ai_generated brown_hair female genderswap_(mtf) nai_diffusion stable_diffusion], + download_size: 503_358, + ) + end + end + end +end