diff --git a/app/logical/source/extractor.rb b/app/logical/source/extractor.rb index 3113451c4..335132d88 100644 --- a/app/logical/source/extractor.rb +++ b/app/logical/source/extractor.rb @@ -55,6 +55,7 @@ module Source Source::Extractor::Booth, Source::Extractor::Anifty, Source::Extractor::Furaffinity, + Source::Extractor::Reddit, ] # Should return true if the extractor is configured correctly. Return false diff --git a/app/logical/source/extractor/reddit.rb b/app/logical/source/extractor/reddit.rb new file mode 100644 index 000000000..83eae867c --- /dev/null +++ b/app/logical/source/extractor/reddit.rb @@ -0,0 +1,68 @@ +# frozen_string_literal: true + +# @see Source::URL::Reddit +module Source + class Extractor + class Reddit < Source::Extractor + def match? + Source::URL::Reddit === parsed_url + end + + def image_urls + if parsed_url&.full_image_url.present? + [parsed_url.full_image_url] + elsif data.present? + images = [data.dig("media", "resolutions", 0, "url")].compact + images += ordered_gallery_images + images.compact.uniq.map { |i| Source::URL.parse(i)&.full_image_url }.compact + else + [parsed_url.original_url] + end + end + + def ordered_gallery_images + gallery_images = data.dig("media", "mediaMetadata") + return [] unless gallery_images.present? + gallery_order = data.dig("media", "gallery", "items").pluck("mediaId") + + gallery_order.map { |id| gallery_images[id].dig("s", "u") } + end + + def profile_url + return nil if artist_name.blank? + "https://reddit.com/user/#{artist_name}" + end + + def page_url + data["permalink"] || parsed_url.page_url || parsed_referer&.page_url + end + + def artist_commentary_title + data["title"] + end + + def work_id + parsed_url.work_id || parsed_referer&.work_id + end + + def artist_name + data["author"] || parsed_url.username || parsed_referer&.username + end + + def data + return nil if work_id.blank? + + response = http.cache(1.minute).get("https://reddit.com/gallery/#{work_id}") + return {} if response.status != 200 + + json_string = response.parse&.at("script#data").to_s[/\s({.*})/, 1] + data = JSON.parse(json_string).with_indifferent_access + data.dig("posts", "models").values.min_by { |p| p["created"].to_i } # to avoid reposts + rescue JSON::ParserError + {} + end + + memoize :data + end + end +end diff --git a/app/logical/source/url/reddit.rb b/app/logical/source/url/reddit.rb index d96e5ece9..615007f99 100644 --- a/app/logical/source/url/reddit.rb +++ b/app/logical/source/url/reddit.rb @@ -3,7 +3,7 @@ module Source class URL class Reddit < Source::URL - attr_reader :subreddit, :work_id, :title, :username + attr_reader :subreddit, :work_id, :title, :username, :file def self.match?(url) url.domain.in?(["reddit.com", "redd.it"]) @@ -14,11 +14,14 @@ module Source # https://i.redd.it/p5utgk06ryq81.png # https://preview.redd.it/qoyhz3o8yde71.jpg?width=1440&format=pjpg&auto=webp&s=5cbe3b0b097d6e7263761c461dae19a43038db22 + in ("i" | "preview"), "redd.it", file + @file = file + # https://external-preview.redd.it/92G2gkb545UNlA-PywJqM_F-4TT0xngvmf_gb9sFDqk.jpg?auto=webp&s=0f1e3d0603dbaabe1ead7352202d0de1653d76f6 # https://g.redditmedia.com/f-OWw5C5aVumPS4HXVFhTspgzgQB4S77mO-6ad0rzpg.gif?fm=mp4&mp4-fragmented=false&s=ed3d767bf3b0360a50ddd7f503d46225 # https://i.redditmedia.com/9cYFBDQ3QsqWnF9v7EhW5uOcQNHz1Ak9_E1zVNeSLek.png?s=6fee1bb56e7d926847dc3ece01a1ffd4 in *rest if image_url? - # pass + # pass # https://www.reddit.com/user/xSlimes # https://www.reddit.com/u/Valshier @@ -73,6 +76,12 @@ module Source def profile_url "https://www.reddit.com/user/#{username}" if username.present? end + + def full_image_url + return unless image_url? + return "https://i.redd.it/#{file}" if file.present? + original_url + end end end end diff --git a/test/unit/sources/reddit_test.rb b/test/unit/sources/reddit_test.rb index fded82174..1aba6e195 100644 --- a/test/unit/sources/reddit_test.rb +++ b/test/unit/sources/reddit_test.rb @@ -2,6 +2,75 @@ require 'test_helper' module Sources class RedditTest < ActiveSupport::TestCase + context "A reddit post" do + strategy_should_work( + "https://www.reddit.com/gallery/ttyccp", + image_urls: [ + "https://i.redd.it/p5utgk06ryq81.png", + "https://i.redd.it/qtdv0k06ryq81.png", + "https://i.redd.it/0m8f6k06ryq81.png", + "https://i.redd.it/oc5y8k06ryq81.png", + ], + artist_name: "Darksin31", + profile_url: "https://reddit.com/user/Darksin31", + page_url: "https://www.reddit.com/r/arknights/comments/ttyccp/maria_nearl_versus_the_leftarmed_knight_dankestsin/", + artist_commentary_title: "Maria Nearl Versus the Left-Armed Knight (@dankestsin)" + ) + end + + context "A crosspost" do + strategy_should_work( + "https://www.reddit.com/gallery/yc0b8g", + image_urls: ["https://i.redd.it/eao0je8wzlv91.jpg"], + page_url: "https://www.reddit.com/r/furrymemes/comments/ybr04z/_/", + profile_url: "https://reddit.com/user/lightmare69", + artist_name: "lightmare69", + artist_commentary_title: "\u{1FAF5}😐" + ) + end + + context "An age-restricted post" do + strategy_should_work( + "https://www.reddit.com/r/Genshin_Impact/comments/u9zilq/cookie_shinobu", + image_urls: ["https://i.redd.it/bxh5xkp088v81.jpg"], + profile_url: "https://reddit.com/user/onethingidkwhy", + artist_name: "onethingidkwhy", + artist_commentary_title: "cookie shinobu" + ) + end + + context "A reddit image" do + strategy_should_work( + "https://i.redd.it/oc5y8k06ryq81.png", + image_urls: ["https://i.redd.it/oc5y8k06ryq81.png"], + download_size: 940_616, + page_url: nil + ) + end + + context "A reddit image sample" do + strategy_should_work( + "https://preview.redd.it/qtdv0k06ryq81.png?width=960&crop=smart&auto=webp&s=3b1505f76f3c8b7ce47da5ab2dd17c511d3c2a44", + image_urls: ["https://i.redd.it/qtdv0k06ryq81.png"], + download_size: 699_898, + page_url: nil + ) + end + + context "A redditmedia url" do + strategy_should_work( + "https://i.redditmedia.com/9cYFBDQ3QsqWnF9v7EhW5uOcQNHz1Ak9_E1zVNeSLek.png?s=6fee1bb56e7d926847dc3ece01a1ffd4", + image_urls: ["https://i.redditmedia.com/9cYFBDQ3QsqWnF9v7EhW5uOcQNHz1Ak9_E1zVNeSLek.png?s=6fee1bb56e7d926847dc3ece01a1ffd4"] + ) + end + + context "An external preview url" do + strategy_should_work( + "https://external-preview.redd.it/92G2gkb545UNlA-PywJqM_F-4TT0xngvmf_gb9sFDqk.jpg?auto=webp&s=0f1e3d0603dbaabe1ead7352202d0de1653d76f6", + image_urls: ["https://external-preview.redd.it/92G2gkb545UNlA-PywJqM_F-4TT0xngvmf_gb9sFDqk.jpg?auto=webp&s=0f1e3d0603dbaabe1ead7352202d0de1653d76f6"] + ) + end + context "Reddit:" do should "Parse Reddit URLs correctly" do assert(Source::URL.image_url?("https://i.redd.it/p5utgk06ryq81.png"))