sources: factor out Source::URL::Newgrounds.

Also fix it so that the image URL is set as the source for Newgrounds
posts, not the page URL. It's possible to generate the page URL from the
image URL (except for images after the first in multi-image posts).

* Page: https://www.newgrounds.com/art/view/natthelich/weaver
* Image: https://art.ngfiles.com/images/1520000/1520217_natthelich_weaver.jpg?f1606365031
This commit is contained in:
evazion
2022-02-25 22:00:42 -06:00
parent 64472a7b7e
commit f062f2d145
4 changed files with 123 additions and 27 deletions

View File

@@ -20,6 +20,7 @@ module Source
SUBCLASSES = [
Source::URL::Twitter,
Source::URL::HentaiFoundry,
Source::URL::Newgrounds,
Source::URL::Plurk,
Source::URL::Skeb,
Source::URL::TwitPic,

View File

@@ -0,0 +1,92 @@
# frozen_string_literal: true
# Image Urls
#
# * https://art.ngfiles.com/images/1543000/1543982_natthelich_pandora-2.jpg?f1607971817
# * https://art.ngfiles.com/images/1033000/1033622_natthelich_fire-emblem-marth-plus-progress-pic.png?f1569487181
#
# * https://www.newgrounds.com/art/view/natthelich/weaver (page)
# * https://art.ngfiles.com/images/1520000/1520217_natthelich_weaver.jpg?f1606365031
# * https://art.ngfiles.com/comments/57000/iu_57615_7115981.jpg
#
# Thumbnail URLs
#
# * https://art.ngfiles.com/thumbnails/1543000/1543982_full.png?f1607971901
# * https://art.ngfiles.com/thumbnails/1254000/1254985.png?f1588263349
#
# Page URLs
#
# * https://www.newgrounds.com/art/view/puddbytes/costanza-at-bat
# * https://www.newgrounds.com/art/view/natthelich/weaver (multiple)
#
# Video URLs
#
# * https://www.newgrounds.com/portal/view/825916 (page)
# * https://picon.ngfiles.com/825000/flash_825916_card.png?f1639666239 (poster)
# * https://uploads.ungrounded.net/alternate/1801000/1801343_alternate_165104.1080p.mp4?1639666238
# * https://uploads.ungrounded.net/alternate/1801000/1801343_alternate_165104.720p.mp4?1639666238
# * https://uploads.ungrounded.net/alternate/1801000/1801343_alternate_165104.360p.mp4?1639666238
#
# Flash URLs
#
# * https://www.newgrounds.com/portal/view/225625 (page)
# * https://uploads.ungrounded.net/225000/225625_colormedressup.swf?1111143751 (file)
#
# Other URLs
#
# * https://www.newgrounds.com/reviews/portal/1543982/4/
# * https://www.newgrounds.com/reviews/portal/1543982/4/score/1
# * https://www.newgrounds.com/content/share/1543982/4/
# * https://www.newgrounds.com/favorites/content/who/1543982/4
#
# Profile URLs
#
# * https://natthelich.newgrounds.com
# * https://natthelich.newgrounds.com/art
#
class Source::URL::Newgrounds < Source::URL
attr_reader :username, :work_id, :work_title
def self.match?(url)
url.domain.in?(["newgrounds.com", "ngfiles.com", "ungrounded.net"])
end
def parse
case [host, *path_segments]
# https://www.newgrounds.com/art/view/puddbytes/costanza-at-bat
# https://www.newgrounds.com/art/view/natthelich/fire-emblem-marth-plus-progress-pic
in "www.newgrounds.com", "art", "view", username, work_title
@username = username
@work_title = work_title
# https://art.ngfiles.com/images/1254000/1254722_natthelich_pandora.jpg
# https://art.ngfiles.com/images/1033000/1033622_natthelich_fire-emblem-marth-plus-progress-pic.png?f1569487181
in "art.ngfiles.com", "images", _, /^(\d+)_([^_]+)_(.*)\.\w+$/ => filename
@work_id = $1
@username = $2
@work_title = $3
@filename = filename
# https://art.ngfiles.com/thumbnails/1254000/1254985.png?f1588263349
in "art.ngfiles.com", "thumbnails", _, /^(\d+)\.\w+$/ => filename
@work_id = $1
@filename = filename
# https://art.ngfiles.com/comments/57000/iu_57615_7115981.jpg
in "art.ngfiles.com", "comments", _, /^iu/ => filename
@filename = filename
# https://natthelich.newgrounds.com
# https://natthelich.newgrounds.com/art/
in /^([a-z0-9-]+)\.newgrounds\.com$/, *rest if host != "www.newgrounds.com"
@username = $1
else
end
end
def image_url?
url.host == "art.ngfiles.com"
end
end

View File

@@ -1,37 +1,19 @@
# frozen_string_literal: true
# Image Urls
# * https://art.ngfiles.com/images/1254000/1254722_natthelich_pandora.jpg
# * https://art.ngfiles.com/images/1033000/1033622_natthelich_fire-emblem-marth-plus-progress-pic.png?f1569487181
# * https://art.ngfiles.com/comments/57000/iu_57615_7115981.jpg
#
# Page URLs
# * https://www.newgrounds.com/art/view/puddbytes/costanza-at-bat
# * https://www.newgrounds.com/art/view/natthelich/fire-emblem-marth-plus-progress-pic (multiple)
#
# Profile URLs
# * https://natthelich.newgrounds.com/
# @see Source::URL::Newgrounds
module Sources
module Strategies
class Newgrounds < Base
IMAGE_URL = %r{\Ahttps?://art\.ngfiles\.com/images/\d+/\d+_(?<user_name>[0-9a-z-]+)_(?<illust_title>[0-9a-z-]+)\.\w+}i
COMMENT_URL = %r{\Ahttps?://art\.ngfiles\.com/comments/\d+/\w+\.\w+}i
PAGE_URL = %r{\Ahttps?://(?:www\.)?newgrounds\.com/art/view/(?<user_name>[0-9a-z-]+)/(?<illust_title>[0-9a-z-]+)(?:\?.*)?}i
PROFILE_URL = %r{\Ahttps?://(?<artist_name>(?!www)[0-9a-z-]+)\.newgrounds\.com(?:/.*)?}i
def domains
["newgrounds.com", "ngfiles.com"]
def match?
parsed_url&.site_name == "Newgrounds"
end
def site_name
"NewGrounds"
"Newgrounds"
end
def image_urls
if url =~ COMMENT_URL || url =~ IMAGE_URL
if parsed_url.image_url?
[url]
else
urls = []
@@ -97,16 +79,25 @@ module Sources
DText.from_html(artist_commentary_desc)
end
# The image url should be the post source, if we can generate the page url from the image url.
def canonical_url
if page_url.present?
url
else
page_url
end
end
def normalize_for_source
page_url
end
def user_name
urls.map { |u| url[PROFILE_URL, :artist_name] || u[IMAGE_URL, :user_name] || u[PAGE_URL, :user_name] }.compact.first
parsed_url.username || parsed_referer&.username
end
def illust_title
urls.map { |u| u[IMAGE_URL, :illust_title] || u[PAGE_URL, :illust_title] }.compact.first
parsed_url.work_title || parsed_referer&.work_title
end
end
end