sources: factor out Source::URL::ArtStation.

This commit is contained in:
evazion
2022-02-26 21:01:16 -06:00
parent 9169f00e80
commit fcf517834d
4 changed files with 123 additions and 66 deletions

View File

@@ -10,7 +10,7 @@ module Danbooru
# @return [Addressable:URI] The parsed and normalized URL.
attr_reader :url
delegate :domain, :host, :site, :path, to: :url
delegate :domain, :host, :site, :path, :query, to: :url
# Parse a string into a URL, or raise an exception if the string is not a valid HTTPS or HTTPS URL.
#
@@ -62,5 +62,18 @@ module Danbooru
def params
url.query_values.to_h.with_indifferent_access
end
# Return the subdomain of the URL, or nil if absent. For example, for "http://senpenbankashiki.hp.infoseek.co.jp", the
# subdomain is "senpenbankashiki.hp", the domain is "infoseek.co.jp", the SLD is "infoseek", and the TLD is "co.jp".
#
# @return [String, nil]
def subdomain
parsed_domain.trd
end
# @return [PublicSuffix::Domain]
def parsed_domain
@parsed_domain ||= PublicSuffix.parse(host)
end
end
end

View File

@@ -19,6 +19,8 @@ module Source
class URL < Danbooru::URL
SUBCLASSES = [
Source::URL::Twitter,
Source::URL::ArtStation,
Source::URL::Foundation,
Source::URL::HentaiFoundry,
Source::URL::Lofter,
Source::URL::Mastodon,
@@ -27,7 +29,6 @@ module Source
Source::URL::Plurk,
Source::URL::Skeb,
Source::URL::TwitPic,
Source::URL::Foundation,
]
# Parse a URL into a subclass of Source::URL, or raise an exception if the URL is not a valid HTTP or HTTPS URL.

View File

@@ -0,0 +1,93 @@
# frozen_string_literal: true
# Page URLs:
#
# * https://www.artstation.com/artwork/04XA4
# * https://www.artstation.com/artwork/cody-from-sf (old; redirects to https://www.artstation.com/artwork/3JJA)
# * https://sa-dui.artstation.com/projects/DVERn
# * https://dudeunderscore.artstation.com/projects/NoNmD?album_id=23041
#
# Profile URLs:
#
# * https://www.artstation.com/artist/sa-dui
# * https://www.artstation.com/sa-dui
# * https://sa-dui.artstation.com/
# * https://hosi_na.artstation.com
#
# Image URLs
#
# * https://cdna.artstation.com/p/assets/images/images/005/804/224/large/titapa-khemakavat-sa-dui-srevere.jpg?1493887236
# * https://cdnb.artstation.com/p/assets/images/images/014/410/217/smaller_square/bart-osz-bartosz1812041.jpg?1543866276
# * https://cdna.artstation.com/p/assets/images/images/007/253/680/4k/ina-wong-demon-girl-done-ttd-comp.jpg?1504793833
# * https://cdna.artstation.com/p/assets/covers/images/007/262/828/small/monica-kyrie-1.jpg?1504865060
#
# API URLs
#
# * https://www.artstation.com/projects/04XA4.json
class Source::URL::ArtStation < Source::URL
RESERVED_SUBDOMAINS = %w[www cdn cdna cdnb]
IMAGE_SUBDOMAINS = %w[cdn cdna cdnb]
attr_reader :username, :work_id
def self.match?(url)
url.domain == "artstation.com"
end
def site_name
"ArtStation"
end
def parse
case [host, *path_segments]
# https://cdna.artstation.com/p/assets/images/images/005/804/224/large/titapa-khemakavat-sa-dui-srevere.jpg?1493887236
# https://cdnb.artstation.com/p/assets/images/images/014/410/217/smaller_square/bart-osz-bartosz1812041.jpg?1543866276
# https://cdna.artstation.com/p/assets/images/images/007/253/680/4k/ina-wong-demon-girl-done-ttd-comp.jpg?1504793833
# https://cdna.artstation.com/p/assets/covers/images/007/262/828/small/monica-kyrie-1.jpg?1504865060
in _, "p", "assets", ("images" | "covers") => asset_type, "images", *subdirs, size, filename
@asset_type = asset_type
@asset_subdir = subdirs.join("/")
@filename = filename
@timestamp = query if query&.match?(/^\d+$/)
# https://www.artstation.com/artwork/04XA4
# https://www.artstation.com/artwork/cody-from-sf (old; redirects to https://www.artstation.com/artwork/3JJA)
# https://sa-dui.artstation.com/projects/DVERn
# https://dudeunderscore.artstation.com/projects/NoNmD?album_id=23041
in _, ("artwork" | "projects"), work_id
@work_id = work_id
@username = subdomain unless subdomain.in?(RESERVED_SUBDOMAINS)
# https://www.artstation.com/artist/sa-dui
in "www.artstation.com", "artist", username
@username = username
# https://www.artstation.com/sa-dui
in "www.artstation.com", username
@username = username
# https://sa-dui.artstation.com
# https://hosi_na.artstation.com
in *rest unless subdomain.in?(RESERVED_SUBDOMAINS)
@username = subdomain
else
end
end
def image_url?
@filename.present?
end
def full_image_url(size = "original")
return nil unless image_url?
if @timestamp.present?
"https://cdn.artstation.com/p/assets/#{@asset_type}/images/#{@asset_subdir}/#{size}/#{@filename}?#{@timestamp}"
else
"https://cdn.artstation.com/p/assets/#{@asset_type}/images/#{@asset_subdir}/#{size}/#{@filename}"
end
end
end

View File

@@ -1,47 +1,14 @@
# frozen_string_literal: true
# Page URLs:
#
# * https://www.artstation.com/artwork/04XA4
# * https://www.artstation.com/artwork/cody-from-sf
# * https://sa-dui.artstation.com/projects/DVERn
# * https://dudeunderscore.artstation.com/projects/NoNmD?album_id=23041
#
# Profile URLs:
#
# * https://www.artstation.com/artist/sa-dui
# * https://www.artstation.com/sa-dui
# * https://sa-dui.artstation.com/
# * https://hosi_na.artstation.com
#
# Image URLs
#
# * https://cdna.artstation.com/p/assets/images/images/005/804/224/large/titapa-khemakavat-sa-dui-srevere.jpg?1493887236
# * https://cdnb.artstation.com/p/assets/images/images/014/410/217/smaller_square/bart-osz-bartosz1812041.jpg?1543866276
# * https://cdna.artstation.com/p/assets/images/images/007/253/680/4k/ina-wong-demon-girl-done-ttd-comp.jpg?1504793833
#
# * https://cdna.artstation.com/p/assets/covers/images/007/262/828/small/monica-kyrie-1.jpg?1504865060
# @see Source::URL::ArtStation
module Sources::Strategies
class ArtStation < Base
PROJECT1 = %r{\Ahttps?://www\.artstation\.com/artwork/(?<project_id>[a-z0-9-]+)/?\z}i
PROJECT2 = %r{\Ahttps?://(?<artist_name>[\w-]+)\.artstation\.com/projects/(?<project_id>[a-z0-9-]+)(?:/|\?[\w=-]+)?\z}i
PROJECT = Regexp.union(PROJECT1, PROJECT2)
ARTIST1 = %r{\Ahttps?://(?<artist_name>[\w-]+)(?<!www)\.artstation\.com/?\z}i
ARTIST2 = %r{\Ahttps?://www\.artstation\.com/artist/(?<artist_name>[\w-]+)/?\z}i
ARTIST3 = %r{\Ahttps?://www\.artstation\.com/(?<artist_name>[\w-]+)/?\z}i
ARTIST = Regexp.union(ARTIST1, ARTIST2, ARTIST3)
ASSET = %r{\Ahttps?://cdn\w*\.artstation\.com/p/assets/(?<type>images|covers)/images/(?<id>\d+/\d+/\d+)/(?<size>[^/]+)/(?<filename>.+)\z}i
attr_reader :json
def domains
["artstation.com"]
def match?
Source::URL::ArtStation === parsed_url
end
def site_name
"ArtStation"
parsed_url.site_name
end
def image_urls
@@ -100,25 +67,19 @@ module Sources::Strategies
end
def image_urls_sub
if url.match?(ASSET)
return [url]
if parsed_url.image_url?
[url]
else
api_response[:assets].to_a.select { |asset| asset[:asset_type] == "image" }.pluck(:image_url)
end
api_response[:assets]
.to_a
.select { |asset| asset[:asset_type] == "image" }
.map { |asset| asset[:image_url] }
end
# these are de facto private methods but are public for testing
# purposes
def artist_name_from_url
urls.map { |url| url[PROJECT, :artist_name] || url[ARTIST, :artist_name] }.compact.first
parsed_url.username || parsed_referer&.username
end
def project_id
urls.map { |url| url[PROJECT, :project_id] }.compact.first
parsed_url.work_id || parsed_referer&.work_id
end
def api_response
@@ -131,23 +92,12 @@ module Sources::Strategies
end
memoize :api_response
def image_url_sizes(type, id, filename)
[
"https://cdn.artstation.com/p/assets/#{type}/images/#{id}/original/#{filename}",
"https://cdn.artstation.com/p/assets/#{type}/images/#{id}/4k/#{filename}",
"https://cdn.artstation.com/p/assets/#{type}/images/#{id}/large/#{filename}",
"https://cdn.artstation.com/p/assets/#{type}/images/#{id}/medium/#{filename}",
"https://cdn.artstation.com/p/assets/#{type}/images/#{id}/small/#{filename}",
]
end
def asset_url(url, size)
return url unless url =~ ASSET
parsed_url = Source::URL.parse(url)
urls = image_url_sizes($~[:type], $~[:id], $~[:filename])
if size == :smallest
urls = urls.reverse
end
image_sizes = %w[original 4k large medium small]
urls = image_sizes.map { |size| parsed_url.full_image_url(size) }
urls = urls.reverse if size == :smallest
chosen_url = urls.find { |url| http_exists?(url) }
chosen_url || url