diff --git a/app/logical/danbooru/url.rb b/app/logical/danbooru/url.rb index 28d255db1..7777890fe 100644 --- a/app/logical/danbooru/url.rb +++ b/app/logical/danbooru/url.rb @@ -10,7 +10,7 @@ module Danbooru # @return [Addressable:URI] The parsed and normalized URL. attr_reader :url - delegate :domain, :host, :site, :path, to: :url + delegate :domain, :host, :site, :path, :query, to: :url # Parse a string into a URL, or raise an exception if the string is not a valid HTTPS or HTTPS URL. # @@ -62,5 +62,18 @@ module Danbooru def params url.query_values.to_h.with_indifferent_access end + + # Return the subdomain of the URL, or nil if absent. For example, for "http://senpenbankashiki.hp.infoseek.co.jp", the + # subdomain is "senpenbankashiki.hp", the domain is "infoseek.co.jp", the SLD is "infoseek", and the TLD is "co.jp". + # + # @return [String, nil] + def subdomain + parsed_domain.trd + end + + # @return [PublicSuffix::Domain] + def parsed_domain + @parsed_domain ||= PublicSuffix.parse(host) + end end end diff --git a/app/logical/source/url.rb b/app/logical/source/url.rb index cb5f6e5ba..da85c210d 100644 --- a/app/logical/source/url.rb +++ b/app/logical/source/url.rb @@ -19,6 +19,8 @@ module Source class URL < Danbooru::URL SUBCLASSES = [ Source::URL::Twitter, + Source::URL::ArtStation, + Source::URL::Foundation, Source::URL::HentaiFoundry, Source::URL::Lofter, Source::URL::Mastodon, @@ -27,7 +29,6 @@ module Source Source::URL::Plurk, Source::URL::Skeb, Source::URL::TwitPic, - Source::URL::Foundation, ] # Parse a URL into a subclass of Source::URL, or raise an exception if the URL is not a valid HTTP or HTTPS URL. diff --git a/app/logical/source/url/art_station.rb b/app/logical/source/url/art_station.rb new file mode 100644 index 000000000..3a7481d0d --- /dev/null +++ b/app/logical/source/url/art_station.rb @@ -0,0 +1,93 @@ +# frozen_string_literal: true + +# Page URLs: +# +# * https://www.artstation.com/artwork/04XA4 +# * https://www.artstation.com/artwork/cody-from-sf (old; redirects to https://www.artstation.com/artwork/3JJA) +# * https://sa-dui.artstation.com/projects/DVERn +# * https://dudeunderscore.artstation.com/projects/NoNmD?album_id=23041 +# +# Profile URLs: +# +# * https://www.artstation.com/artist/sa-dui +# * https://www.artstation.com/sa-dui +# * https://sa-dui.artstation.com/ +# * https://hosi_na.artstation.com +# +# Image URLs +# +# * https://cdna.artstation.com/p/assets/images/images/005/804/224/large/titapa-khemakavat-sa-dui-srevere.jpg?1493887236 +# * https://cdnb.artstation.com/p/assets/images/images/014/410/217/smaller_square/bart-osz-bartosz1812041.jpg?1543866276 +# * https://cdna.artstation.com/p/assets/images/images/007/253/680/4k/ina-wong-demon-girl-done-ttd-comp.jpg?1504793833 +# * https://cdna.artstation.com/p/assets/covers/images/007/262/828/small/monica-kyrie-1.jpg?1504865060 +# +# API URLs +# +# * https://www.artstation.com/projects/04XA4.json + +class Source::URL::ArtStation < Source::URL + RESERVED_SUBDOMAINS = %w[www cdn cdna cdnb] + IMAGE_SUBDOMAINS = %w[cdn cdna cdnb] + + attr_reader :username, :work_id + + def self.match?(url) + url.domain == "artstation.com" + end + + def site_name + "ArtStation" + end + + def parse + case [host, *path_segments] + + # https://cdna.artstation.com/p/assets/images/images/005/804/224/large/titapa-khemakavat-sa-dui-srevere.jpg?1493887236 + # https://cdnb.artstation.com/p/assets/images/images/014/410/217/smaller_square/bart-osz-bartosz1812041.jpg?1543866276 + # https://cdna.artstation.com/p/assets/images/images/007/253/680/4k/ina-wong-demon-girl-done-ttd-comp.jpg?1504793833 + # https://cdna.artstation.com/p/assets/covers/images/007/262/828/small/monica-kyrie-1.jpg?1504865060 + in _, "p", "assets", ("images" | "covers") => asset_type, "images", *subdirs, size, filename + @asset_type = asset_type + @asset_subdir = subdirs.join("/") + @filename = filename + @timestamp = query if query&.match?(/^\d+$/) + + # https://www.artstation.com/artwork/04XA4 + # https://www.artstation.com/artwork/cody-from-sf (old; redirects to https://www.artstation.com/artwork/3JJA) + # https://sa-dui.artstation.com/projects/DVERn + # https://dudeunderscore.artstation.com/projects/NoNmD?album_id=23041 + in _, ("artwork" | "projects"), work_id + @work_id = work_id + @username = subdomain unless subdomain.in?(RESERVED_SUBDOMAINS) + + # https://www.artstation.com/artist/sa-dui + in "www.artstation.com", "artist", username + @username = username + + # https://www.artstation.com/sa-dui + in "www.artstation.com", username + @username = username + + # https://sa-dui.artstation.com + # https://hosi_na.artstation.com + in *rest unless subdomain.in?(RESERVED_SUBDOMAINS) + @username = subdomain + + else + end + end + + def image_url? + @filename.present? + end + + def full_image_url(size = "original") + return nil unless image_url? + + if @timestamp.present? + "https://cdn.artstation.com/p/assets/#{@asset_type}/images/#{@asset_subdir}/#{size}/#{@filename}?#{@timestamp}" + else + "https://cdn.artstation.com/p/assets/#{@asset_type}/images/#{@asset_subdir}/#{size}/#{@filename}" + end + end +end diff --git a/app/logical/sources/strategies/art_station.rb b/app/logical/sources/strategies/art_station.rb index dc0e7ac54..2fd76caf2 100644 --- a/app/logical/sources/strategies/art_station.rb +++ b/app/logical/sources/strategies/art_station.rb @@ -1,47 +1,14 @@ # frozen_string_literal: true -# Page URLs: -# -# * https://www.artstation.com/artwork/04XA4 -# * https://www.artstation.com/artwork/cody-from-sf -# * https://sa-dui.artstation.com/projects/DVERn -# * https://dudeunderscore.artstation.com/projects/NoNmD?album_id=23041 -# -# Profile URLs: -# -# * https://www.artstation.com/artist/sa-dui -# * https://www.artstation.com/sa-dui -# * https://sa-dui.artstation.com/ -# * https://hosi_na.artstation.com -# -# Image URLs -# -# * https://cdna.artstation.com/p/assets/images/images/005/804/224/large/titapa-khemakavat-sa-dui-srevere.jpg?1493887236 -# * https://cdnb.artstation.com/p/assets/images/images/014/410/217/smaller_square/bart-osz-bartosz1812041.jpg?1543866276 -# * https://cdna.artstation.com/p/assets/images/images/007/253/680/4k/ina-wong-demon-girl-done-ttd-comp.jpg?1504793833 -# -# * https://cdna.artstation.com/p/assets/covers/images/007/262/828/small/monica-kyrie-1.jpg?1504865060 - +# @see Source::URL::ArtStation module Sources::Strategies class ArtStation < Base - PROJECT1 = %r{\Ahttps?://www\.artstation\.com/artwork/(?[a-z0-9-]+)/?\z}i - PROJECT2 = %r{\Ahttps?://(?[\w-]+)\.artstation\.com/projects/(?[a-z0-9-]+)(?:/|\?[\w=-]+)?\z}i - PROJECT = Regexp.union(PROJECT1, PROJECT2) - ARTIST1 = %r{\Ahttps?://(?[\w-]+)(?[\w-]+)/?\z}i - ARTIST3 = %r{\Ahttps?://www\.artstation\.com/(?[\w-]+)/?\z}i - ARTIST = Regexp.union(ARTIST1, ARTIST2, ARTIST3) - - ASSET = %r{\Ahttps?://cdn\w*\.artstation\.com/p/assets/(?images|covers)/images/(?\d+/\d+/\d+)/(?[^/]+)/(?.+)\z}i - - attr_reader :json - - def domains - ["artstation.com"] + def match? + Source::URL::ArtStation === parsed_url end def site_name - "ArtStation" + parsed_url.site_name end def image_urls @@ -100,25 +67,19 @@ module Sources::Strategies end def image_urls_sub - if url.match?(ASSET) - return [url] + if parsed_url.image_url? + [url] + else + api_response[:assets].to_a.select { |asset| asset[:asset_type] == "image" }.pluck(:image_url) end - - api_response[:assets] - .to_a - .select { |asset| asset[:asset_type] == "image" } - .map { |asset| asset[:image_url] } end - # these are de facto private methods but are public for testing - # purposes - def artist_name_from_url - urls.map { |url| url[PROJECT, :artist_name] || url[ARTIST, :artist_name] }.compact.first + parsed_url.username || parsed_referer&.username end def project_id - urls.map { |url| url[PROJECT, :project_id] }.compact.first + parsed_url.work_id || parsed_referer&.work_id end def api_response @@ -131,23 +92,12 @@ module Sources::Strategies end memoize :api_response - def image_url_sizes(type, id, filename) - [ - "https://cdn.artstation.com/p/assets/#{type}/images/#{id}/original/#{filename}", - "https://cdn.artstation.com/p/assets/#{type}/images/#{id}/4k/#{filename}", - "https://cdn.artstation.com/p/assets/#{type}/images/#{id}/large/#{filename}", - "https://cdn.artstation.com/p/assets/#{type}/images/#{id}/medium/#{filename}", - "https://cdn.artstation.com/p/assets/#{type}/images/#{id}/small/#{filename}", - ] - end - def asset_url(url, size) - return url unless url =~ ASSET + parsed_url = Source::URL.parse(url) - urls = image_url_sizes($~[:type], $~[:id], $~[:filename]) - if size == :smallest - urls = urls.reverse - end + image_sizes = %w[original 4k large medium small] + urls = image_sizes.map { |size| parsed_url.full_image_url(size) } + urls = urls.reverse if size == :smallest chosen_url = urls.find { |url| http_exists?(url) } chosen_url || url