sources: factor out Source::URL::Newgrounds.
Also fix it so that the image URL is set as the source for Newgrounds posts, not the page URL. It's possible to generate the page URL from the image URL (except for images after the first in multi-image posts). * Page: https://www.newgrounds.com/art/view/natthelich/weaver * Image: https://art.ngfiles.com/images/1520000/1520217_natthelich_weaver.jpg?f1606365031
This commit is contained in:
@@ -20,6 +20,7 @@ module Source
|
||||
SUBCLASSES = [
|
||||
Source::URL::Twitter,
|
||||
Source::URL::HentaiFoundry,
|
||||
Source::URL::Newgrounds,
|
||||
Source::URL::Plurk,
|
||||
Source::URL::Skeb,
|
||||
Source::URL::TwitPic,
|
||||
|
||||
92
app/logical/source/url/newgrounds.rb
Normal file
92
app/logical/source/url/newgrounds.rb
Normal file
@@ -0,0 +1,92 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# Image Urls
|
||||
#
|
||||
# * https://art.ngfiles.com/images/1543000/1543982_natthelich_pandora-2.jpg?f1607971817
|
||||
# * https://art.ngfiles.com/images/1033000/1033622_natthelich_fire-emblem-marth-plus-progress-pic.png?f1569487181
|
||||
#
|
||||
# * https://www.newgrounds.com/art/view/natthelich/weaver (page)
|
||||
# * https://art.ngfiles.com/images/1520000/1520217_natthelich_weaver.jpg?f1606365031
|
||||
# * https://art.ngfiles.com/comments/57000/iu_57615_7115981.jpg
|
||||
#
|
||||
# Thumbnail URLs
|
||||
#
|
||||
# * https://art.ngfiles.com/thumbnails/1543000/1543982_full.png?f1607971901
|
||||
# * https://art.ngfiles.com/thumbnails/1254000/1254985.png?f1588263349
|
||||
#
|
||||
# Page URLs
|
||||
#
|
||||
# * https://www.newgrounds.com/art/view/puddbytes/costanza-at-bat
|
||||
# * https://www.newgrounds.com/art/view/natthelich/weaver (multiple)
|
||||
#
|
||||
# Video URLs
|
||||
#
|
||||
# * https://www.newgrounds.com/portal/view/825916 (page)
|
||||
# * https://picon.ngfiles.com/825000/flash_825916_card.png?f1639666239 (poster)
|
||||
# * https://uploads.ungrounded.net/alternate/1801000/1801343_alternate_165104.1080p.mp4?1639666238
|
||||
# * https://uploads.ungrounded.net/alternate/1801000/1801343_alternate_165104.720p.mp4?1639666238
|
||||
# * https://uploads.ungrounded.net/alternate/1801000/1801343_alternate_165104.360p.mp4?1639666238
|
||||
#
|
||||
# Flash URLs
|
||||
#
|
||||
# * https://www.newgrounds.com/portal/view/225625 (page)
|
||||
# * https://uploads.ungrounded.net/225000/225625_colormedressup.swf?1111143751 (file)
|
||||
#
|
||||
# Other URLs
|
||||
#
|
||||
# * https://www.newgrounds.com/reviews/portal/1543982/4/
|
||||
# * https://www.newgrounds.com/reviews/portal/1543982/4/score/1
|
||||
# * https://www.newgrounds.com/content/share/1543982/4/
|
||||
# * https://www.newgrounds.com/favorites/content/who/1543982/4
|
||||
#
|
||||
# Profile URLs
|
||||
#
|
||||
# * https://natthelich.newgrounds.com
|
||||
# * https://natthelich.newgrounds.com/art
|
||||
#
|
||||
class Source::URL::Newgrounds < Source::URL
|
||||
attr_reader :username, :work_id, :work_title
|
||||
|
||||
def self.match?(url)
|
||||
url.domain.in?(["newgrounds.com", "ngfiles.com", "ungrounded.net"])
|
||||
end
|
||||
|
||||
def parse
|
||||
case [host, *path_segments]
|
||||
|
||||
# https://www.newgrounds.com/art/view/puddbytes/costanza-at-bat
|
||||
# https://www.newgrounds.com/art/view/natthelich/fire-emblem-marth-plus-progress-pic
|
||||
in "www.newgrounds.com", "art", "view", username, work_title
|
||||
@username = username
|
||||
@work_title = work_title
|
||||
|
||||
# https://art.ngfiles.com/images/1254000/1254722_natthelich_pandora.jpg
|
||||
# https://art.ngfiles.com/images/1033000/1033622_natthelich_fire-emblem-marth-plus-progress-pic.png?f1569487181
|
||||
in "art.ngfiles.com", "images", _, /^(\d+)_([^_]+)_(.*)\.\w+$/ => filename
|
||||
@work_id = $1
|
||||
@username = $2
|
||||
@work_title = $3
|
||||
@filename = filename
|
||||
|
||||
# https://art.ngfiles.com/thumbnails/1254000/1254985.png?f1588263349
|
||||
in "art.ngfiles.com", "thumbnails", _, /^(\d+)\.\w+$/ => filename
|
||||
@work_id = $1
|
||||
@filename = filename
|
||||
|
||||
# https://art.ngfiles.com/comments/57000/iu_57615_7115981.jpg
|
||||
in "art.ngfiles.com", "comments", _, /^iu/ => filename
|
||||
@filename = filename
|
||||
|
||||
# https://natthelich.newgrounds.com
|
||||
# https://natthelich.newgrounds.com/art/
|
||||
in /^([a-z0-9-]+)\.newgrounds\.com$/, *rest if host != "www.newgrounds.com"
|
||||
@username = $1
|
||||
|
||||
else
|
||||
end
|
||||
end
|
||||
|
||||
def image_url?
|
||||
url.host == "art.ngfiles.com"
|
||||
end
|
||||
end
|
||||
@@ -1,37 +1,19 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# Image Urls
|
||||
# * https://art.ngfiles.com/images/1254000/1254722_natthelich_pandora.jpg
|
||||
# * https://art.ngfiles.com/images/1033000/1033622_natthelich_fire-emblem-marth-plus-progress-pic.png?f1569487181
|
||||
# * https://art.ngfiles.com/comments/57000/iu_57615_7115981.jpg
|
||||
#
|
||||
# Page URLs
|
||||
# * https://www.newgrounds.com/art/view/puddbytes/costanza-at-bat
|
||||
# * https://www.newgrounds.com/art/view/natthelich/fire-emblem-marth-plus-progress-pic (multiple)
|
||||
#
|
||||
# Profile URLs
|
||||
# * https://natthelich.newgrounds.com/
|
||||
|
||||
# @see Source::URL::Newgrounds
|
||||
module Sources
|
||||
module Strategies
|
||||
class Newgrounds < Base
|
||||
IMAGE_URL = %r{\Ahttps?://art\.ngfiles\.com/images/\d+/\d+_(?<user_name>[0-9a-z-]+)_(?<illust_title>[0-9a-z-]+)\.\w+}i
|
||||
COMMENT_URL = %r{\Ahttps?://art\.ngfiles\.com/comments/\d+/\w+\.\w+}i
|
||||
|
||||
PAGE_URL = %r{\Ahttps?://(?:www\.)?newgrounds\.com/art/view/(?<user_name>[0-9a-z-]+)/(?<illust_title>[0-9a-z-]+)(?:\?.*)?}i
|
||||
|
||||
PROFILE_URL = %r{\Ahttps?://(?<artist_name>(?!www)[0-9a-z-]+)\.newgrounds\.com(?:/.*)?}i
|
||||
|
||||
def domains
|
||||
["newgrounds.com", "ngfiles.com"]
|
||||
def match?
|
||||
parsed_url&.site_name == "Newgrounds"
|
||||
end
|
||||
|
||||
def site_name
|
||||
"NewGrounds"
|
||||
"Newgrounds"
|
||||
end
|
||||
|
||||
def image_urls
|
||||
if url =~ COMMENT_URL || url =~ IMAGE_URL
|
||||
if parsed_url.image_url?
|
||||
[url]
|
||||
else
|
||||
urls = []
|
||||
@@ -97,16 +79,25 @@ module Sources
|
||||
DText.from_html(artist_commentary_desc)
|
||||
end
|
||||
|
||||
# The image url should be the post source, if we can generate the page url from the image url.
|
||||
def canonical_url
|
||||
if page_url.present?
|
||||
url
|
||||
else
|
||||
page_url
|
||||
end
|
||||
end
|
||||
|
||||
def normalize_for_source
|
||||
page_url
|
||||
end
|
||||
|
||||
def user_name
|
||||
urls.map { |u| url[PROFILE_URL, :artist_name] || u[IMAGE_URL, :user_name] || u[PAGE_URL, :user_name] }.compact.first
|
||||
parsed_url.username || parsed_referer&.username
|
||||
end
|
||||
|
||||
def illust_title
|
||||
urls.map { |u| u[IMAGE_URL, :illust_title] || u[PAGE_URL, :illust_title] }.compact.first
|
||||
parsed_url.work_title || parsed_referer&.work_title
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user