From f062f2d14584a23461df73a375457dae44fdadd1 Mon Sep 17 00:00:00 2001 From: evazion Date: Fri, 25 Feb 2022 22:00:42 -0600 Subject: [PATCH] sources: factor out Source::URL::Newgrounds. Also fix it so that the image URL is set as the source for Newgrounds posts, not the page URL. It's possible to generate the page URL from the image URL (except for images after the first in multi-image posts). * Page: https://www.newgrounds.com/art/view/natthelich/weaver * Image: https://art.ngfiles.com/images/1520000/1520217_natthelich_weaver.jpg?f1606365031 --- app/logical/source/url.rb | 1 + app/logical/source/url/newgrounds.rb | 92 ++++++++++++++++++++ app/logical/sources/strategies/newgrounds.rb | 41 ++++----- test/unit/sources/newgrounds_test.rb | 16 +++- 4 files changed, 123 insertions(+), 27 deletions(-) create mode 100644 app/logical/source/url/newgrounds.rb diff --git a/app/logical/source/url.rb b/app/logical/source/url.rb index 5600480c9..92bfd10bd 100644 --- a/app/logical/source/url.rb +++ b/app/logical/source/url.rb @@ -20,6 +20,7 @@ module Source SUBCLASSES = [ Source::URL::Twitter, Source::URL::HentaiFoundry, + Source::URL::Newgrounds, Source::URL::Plurk, Source::URL::Skeb, Source::URL::TwitPic, diff --git a/app/logical/source/url/newgrounds.rb b/app/logical/source/url/newgrounds.rb new file mode 100644 index 000000000..b819ba971 --- /dev/null +++ b/app/logical/source/url/newgrounds.rb @@ -0,0 +1,92 @@ +# frozen_string_literal: true + +# Image Urls +# +# * https://art.ngfiles.com/images/1543000/1543982_natthelich_pandora-2.jpg?f1607971817 +# * https://art.ngfiles.com/images/1033000/1033622_natthelich_fire-emblem-marth-plus-progress-pic.png?f1569487181 +# +# * https://www.newgrounds.com/art/view/natthelich/weaver (page) +# * https://art.ngfiles.com/images/1520000/1520217_natthelich_weaver.jpg?f1606365031 +# * https://art.ngfiles.com/comments/57000/iu_57615_7115981.jpg +# +# Thumbnail URLs +# +# * https://art.ngfiles.com/thumbnails/1543000/1543982_full.png?f1607971901 +# * https://art.ngfiles.com/thumbnails/1254000/1254985.png?f1588263349 +# +# Page URLs +# +# * https://www.newgrounds.com/art/view/puddbytes/costanza-at-bat +# * https://www.newgrounds.com/art/view/natthelich/weaver (multiple) +# +# Video URLs +# +# * https://www.newgrounds.com/portal/view/825916 (page) +# * https://picon.ngfiles.com/825000/flash_825916_card.png?f1639666239 (poster) +# * https://uploads.ungrounded.net/alternate/1801000/1801343_alternate_165104.1080p.mp4?1639666238 +# * https://uploads.ungrounded.net/alternate/1801000/1801343_alternate_165104.720p.mp4?1639666238 +# * https://uploads.ungrounded.net/alternate/1801000/1801343_alternate_165104.360p.mp4?1639666238 +# +# Flash URLs +# +# * https://www.newgrounds.com/portal/view/225625 (page) +# * https://uploads.ungrounded.net/225000/225625_colormedressup.swf?1111143751 (file) +# +# Other URLs +# +# * https://www.newgrounds.com/reviews/portal/1543982/4/ +# * https://www.newgrounds.com/reviews/portal/1543982/4/score/1 +# * https://www.newgrounds.com/content/share/1543982/4/ +# * https://www.newgrounds.com/favorites/content/who/1543982/4 +# +# Profile URLs +# +# * https://natthelich.newgrounds.com +# * https://natthelich.newgrounds.com/art +# +class Source::URL::Newgrounds < Source::URL + attr_reader :username, :work_id, :work_title + + def self.match?(url) + url.domain.in?(["newgrounds.com", "ngfiles.com", "ungrounded.net"]) + end + + def parse + case [host, *path_segments] + + # https://www.newgrounds.com/art/view/puddbytes/costanza-at-bat + # https://www.newgrounds.com/art/view/natthelich/fire-emblem-marth-plus-progress-pic + in "www.newgrounds.com", "art", "view", username, work_title + @username = username + @work_title = work_title + + # https://art.ngfiles.com/images/1254000/1254722_natthelich_pandora.jpg + # https://art.ngfiles.com/images/1033000/1033622_natthelich_fire-emblem-marth-plus-progress-pic.png?f1569487181 + in "art.ngfiles.com", "images", _, /^(\d+)_([^_]+)_(.*)\.\w+$/ => filename + @work_id = $1 + @username = $2 + @work_title = $3 + @filename = filename + + # https://art.ngfiles.com/thumbnails/1254000/1254985.png?f1588263349 + in "art.ngfiles.com", "thumbnails", _, /^(\d+)\.\w+$/ => filename + @work_id = $1 + @filename = filename + + # https://art.ngfiles.com/comments/57000/iu_57615_7115981.jpg + in "art.ngfiles.com", "comments", _, /^iu/ => filename + @filename = filename + + # https://natthelich.newgrounds.com + # https://natthelich.newgrounds.com/art/ + in /^([a-z0-9-]+)\.newgrounds\.com$/, *rest if host != "www.newgrounds.com" + @username = $1 + + else + end + end + + def image_url? + url.host == "art.ngfiles.com" + end +end diff --git a/app/logical/sources/strategies/newgrounds.rb b/app/logical/sources/strategies/newgrounds.rb index d777d30fb..422c1ac40 100644 --- a/app/logical/sources/strategies/newgrounds.rb +++ b/app/logical/sources/strategies/newgrounds.rb @@ -1,37 +1,19 @@ # frozen_string_literal: true -# Image Urls -# * https://art.ngfiles.com/images/1254000/1254722_natthelich_pandora.jpg -# * https://art.ngfiles.com/images/1033000/1033622_natthelich_fire-emblem-marth-plus-progress-pic.png?f1569487181 -# * https://art.ngfiles.com/comments/57000/iu_57615_7115981.jpg -# -# Page URLs -# * https://www.newgrounds.com/art/view/puddbytes/costanza-at-bat -# * https://www.newgrounds.com/art/view/natthelich/fire-emblem-marth-plus-progress-pic (multiple) -# -# Profile URLs -# * https://natthelich.newgrounds.com/ - +# @see Source::URL::Newgrounds module Sources module Strategies class Newgrounds < Base - IMAGE_URL = %r{\Ahttps?://art\.ngfiles\.com/images/\d+/\d+_(?[0-9a-z-]+)_(?[0-9a-z-]+)\.\w+}i - COMMENT_URL = %r{\Ahttps?://art\.ngfiles\.com/comments/\d+/\w+\.\w+}i - - PAGE_URL = %r{\Ahttps?://(?:www\.)?newgrounds\.com/art/view/(?[0-9a-z-]+)/(?[0-9a-z-]+)(?:\?.*)?}i - - PROFILE_URL = %r{\Ahttps?://(?(?!www)[0-9a-z-]+)\.newgrounds\.com(?:/.*)?}i - - def domains - ["newgrounds.com", "ngfiles.com"] + def match? + parsed_url&.site_name == "Newgrounds" end def site_name - "NewGrounds" + "Newgrounds" end def image_urls - if url =~ COMMENT_URL || url =~ IMAGE_URL + if parsed_url.image_url? [url] else urls = [] @@ -97,16 +79,25 @@ module Sources DText.from_html(artist_commentary_desc) end + # The image url should be the post source, if we can generate the page url from the image url. + def canonical_url + if page_url.present? + url + else + page_url + end + end + def normalize_for_source page_url end def user_name - urls.map { |u| url[PROFILE_URL, :artist_name] || u[IMAGE_URL, :user_name] || u[PAGE_URL, :user_name] }.compact.first + parsed_url.username || parsed_referer&.username end def illust_title - urls.map { |u| u[IMAGE_URL, :illust_title] || u[PAGE_URL, :illust_title] }.compact.first + parsed_url.work_title || parsed_referer&.work_title end end end diff --git a/test/unit/sources/newgrounds_test.rb b/test/unit/sources/newgrounds_test.rb index 59d379c06..9f6679021 100644 --- a/test/unit/sources/newgrounds_test.rb +++ b/test/unit/sources/newgrounds_test.rb @@ -1,7 +1,7 @@ require 'test_helper' module Sources - class NewGroundsTest < ActiveSupport::TestCase + class NewgroundsTest < ActiveSupport::TestCase context "The source for a newgrounds picture" do setup do @url = "https://www.newgrounds.com/art/view/hcnone/sephiroth" @@ -32,7 +32,7 @@ module Sources should "get the canonical url" do assert_equal(@url, @image_1.canonical_url) - assert_equal(@url, @image_2.canonical_url) + assert_equal(@image_url, @image_2.canonical_url) end should "download an image" do @@ -62,6 +62,18 @@ module Sources end end + context "A multi-image Newgrounds post" do + should "get all the images" do + source = Sources::Strategies.find("https://www.newgrounds.com/art/view/natthelich/weaver") + image_urls = [ + "https://art.ngfiles.com/images/1520000/1520217_natthelich_weaver.jpg?f1606365031", + "https://art.ngfiles.com/comments/199000/iu_199826_7115981.jpg", + ] + + assert_equal(image_urls, source.image_urls) + end + end + context "A deleted or not existing picture" do setup do @fake_1 = Sources::Strategies.find("https://www.newgrounds.com/art/view/ThisUser/DoesNotExist")