sources: factor out Source::URL::Foundation.

Add support for a couple more URL types:

* https://foundation.app/@asuka111art/dinner-with-cats-82426
* https://f8n-production-collection-assets.imgix.net/0x3B3ee1931Dc30C1957379FAc9aba94D1C48a5405/128711/QmcBfbeCMSxqYB3L1owPAxFencFx3jLzCPFx6xUBxgSCkH/nft.png

Also include these URLs in the list of profile URLs:

* https://foundation.app/0x7E2ef75C0C09b2fc6BCd1C68B6D409720CcD58d2 (for https://foundation.app/@mochiiimo)

These URLs should be stable even if the user changes their name.
This commit is contained in:
evazion
2022-02-23 23:16:11 -06:00
parent 043c08eb05
commit ffe52f5ead
4 changed files with 203 additions and 44 deletions

View File

@@ -20,6 +20,7 @@ module Source
SUBCLASSES = [
Source::URL::Twitter,
Source::URL::TwitPic,
Source::URL::Foundation,
]
# Parse a URL into a subclass of Source::URL, or raise an exception if the URL is not a valid HTTP or HTTPS URL.

View File

@@ -0,0 +1,137 @@
# frozen_string_literal: true
#
# Page URLs
#
# * https://foundation.app/@mochiiimo/~/97376
# * https://foundation.app/@mochiiimo/foundation/97376
# * https://foundation.app/@KILLERGF/kgfgen/4
# * https://foundation.app/@huwari/~/88982 (video)
# * https://foundation.app/@asuka111art/dinner-with-cats-82426 (redirects to https://foundation.app/@asuka111art/foundation/82426)
#
# Even if the username is wrong, the ID is still fetched correctly. Example:
#
# * https://foundation.app/@foundation/~/97376
#
# Full image URLs
#
# # Page: https://foundation.app/@mochiiimo/~/97376
# * https://f8n-ipfs-production.imgix.net/QmX4MotNAAj9Rcyew43KdgGDxU1QtXemMHoUTNacMLLSjQ/nft.png
# * https://ipfs.io/ipfs/QmX4MotNAAj9Rcyew43KdgGDxU1QtXemMHoUTNacMLLSjQ/nft.png
#
# # Page: https://foundation.app/@mochiiimo/~/128711
# * https://f8n-production-collection-assets.imgix.net/0x3B3ee1931Dc30C1957379FAc9aba94D1C48a5405/128711/QmcBfbeCMSxqYB3L1owPAxFencFx3jLzCPFx6xUBxgSCkH/nft.png
# * https://f8n-ipfs-production.imgix.net/QmcBfbeCMSxqYB3L1owPAxFencFx3jLzCPFx6xUBxgSCkH/nft.png
# * https://ipfs.io/ipfs/QmcBfbeCMSxqYB3L1owPAxFencFx3jLzCPFx6xUBxgSCkH/nft.png
#
# # Page: https://foundation.app/@KILLERGF/kgfgen/4
# * https://f8n-production-collection-assets.imgix.net/0xFb0a8e1bB97fD7231Cd73c489dA4732Ae87995F0/4/nft.png
# * https://ipfs.io/ipfs/QmbdxcWQ9bg6JUMfK4ubpW2rGDFA8qfTidoCaf6GKMqvr7/nft.png
#
# Video URLs
#
# # Page: https://foundation.app/@huwari/foundation/88982
# * https://assets.foundation.app/7i/gs/QmU8bbsjaVQpEKMDWbSZdDD6GsPmRYBhQtYRn8bEGv7igs/nft_q4.mp4
# * https://f8n-ipfs-production.imgix.net/QmU8bbsjaVQpEKMDWbSZdDD6GsPmRYBhQtYRn8bEGv7igs/nft.mp4
# * https://ipfs.io/ipfs/QmU8bbsjaVQpEKMDWbSZdDD6GsPmRYBhQtYRn8bEGv7igs/nft.mp4
#
# Sample image URLs
#
# * https://f8n-ipfs-production.imgix.net/QmX4MotNAAj9Rcyew43KdgGDxU1QtXemMHoUTNacMLLSjQ/nft.png?q=80&auto=format%2Ccompress&cs=srgb&max-w=1680&max-h=1680
# * https://f8n-production-collection-assets.imgix.net/0x3B3ee1931Dc30C1957379FAc9aba94D1C48a5405/128711/QmcBfbeCMSxqYB3L1owPAxFencFx3jLzCPFx6xUBxgSCkH/nft.png?q=80&auto=format%2Ccompress&cs=srgb&h=640
#
# Profile URLs
#
# Profile urls seem to accept any character in them, even no character at all:
#
# * https://foundation.app/@mochiiimo
# * https://foundation.app/@ <- This seems to be a novelty account.
#
# Public key profile URLs:
#
# * https://foundation.app/0x7E2ef75C0C09b2fc6BCd1C68B6D409720CcD58d2 (@mochiiimo)
#
# The @ is optional:
#
# * https://foundation.app/mochiiimo
#
class Source::URL::Foundation < Source::URL
attr_reader :username, :token_id, :work_id, :hash, :file_ext
def self.match?(url)
url.host.in?(%w[foundation.app assets.foundation.app f8n-ipfs-production.imgix.net f8n-production-collection-assets.imgix.net])
end
def parse
case [host, *path_segments]
# https://foundation.app/@mochiiimo
# https://foundation.app/@KILLERGF
in "foundation.app", /^@/ => username
@username = username.delete_prefix("@")
# https://foundation.app/0x7E2ef75C0C09b2fc6BCd1C68B6D409720CcD58d2
in "foundation.app", /^0x\h{39}/ => user_id
@user_id = user_id
# https://foundation.app/@mochiiimo/~/97376
# https://foundation.app/@mochiiimo/foundation/97376
# https://foundation.app/@KILLERGF/kgfgen/4
in "foundation.app", /^@/ => username, collection, /^\d+/ => work_id
@username = username.delete_prefix("@")
@collection = collection
@work_id = work_id
# https://foundation.app/@asuka111art/dinner-with-cats-82426
in "foundation.app", /^@/ => username, /^.+-\d+$/ => slug
@username = username.delete_prefix("@")
@work_id = slug.split("-").last
# https://f8n-ipfs-production.imgix.net/QmX4MotNAAj9Rcyew43KdgGDxU1QtXemMHoUTNacMLLSjQ/nft.png
# https://f8n-ipfs-production.imgix.net/QmX4MotNAAj9Rcyew43KdgGDxU1QtXemMHoUTNacMLLSjQ/nft.png?q=80&auto=format%2Ccompress&cs=srgb&max-w=1680&max-h=1680
in "f8n-ipfs-production.imgix.net", hash, filename
@hash = hash
@filename, @file_ext = filename.split(".")
# https://f8n-production-collection-assets.imgix.net/0x3B3ee1931Dc30C1957379FAc9aba94D1C48a5405/128711/QmcBfbeCMSxqYB3L1owPAxFencFx3jLzCPFx6xUBxgSCkH/nft.png
in "f8n-production-collection-assets.imgix.net", token_id, work_id, hash, filename
@token_id = token_id
@work_id = work_id
@hash = hash
@filename, @file_ext = filename.split(".")
# https://f8n-production-collection-assets.imgix.net/0xFb0a8e1bB97fD7231Cd73c489dA4732Ae87995F0/4/nft.png
in "f8n-production-collection-assets.imgix.net", token_id, work_id, filename
@token_id = token_id
@work_id = work_id
@filename, @file_ext = filename.split(".")
# https://assets.foundation.app/7i/gs/QmU8bbsjaVQpEKMDWbSZdDD6GsPmRYBhQtYRn8bEGv7igs/nft_q4.mp4
in "assets.foundation.app", *subdirs, hash, filename
@hash = hash
@filename, @file_ext = filename.split(".")
else
end
end
def page_url
return nil unless work_id.present?
username = @username || "foundation"
collection = @collection || "foundation"
"https://foundation.app/@#{username}/#{collection}/#{work_id}"
end
def full_image_url
if hash.present? && file_ext.present?
"https://f8n-ipfs-production.imgix.net/#{hash}/nft.#{file_ext}"
elsif host == "f8n-production-collection-assets.imgix.net" && token_id.present? && work_id.present? && file_ext.present?
"https://f8n-production-collection-assets.imgix.net/#{token_id}/#{work_id}/nft.#{file_ext}"
end
end
def ipfs_url
return nil unless hash.present? && file_ext.present?
"ipfs://#{hash}/nft.#{file_ext}"
end
end

View File

@@ -1,43 +1,13 @@
# frozen_string_literal: true
# Image URLs
# * https://f8n-ipfs-production.imgix.net/QmX4MotNAAj9Rcyew43KdgGDxU1QtXemMHoUTNacMLLSjQ/nft.png
#
# Page URLs
#
# * https://foundation.app/@mochiiimo/~/97376
# * https://foundation.app/@mochiiimo/foundation/97376
# * https://foundation.app/@KILLERGF/kgfgen/4
# * https://foundation.app/@huwari/~/88982 (video)
#
# Even if the username is wrong, the ID is still fetched correctly. Example:
# * https://foundation.app/@asdasdasd/~/97376
#
# Profile URLs
#
# Profile urls seem to accept any character in them, even no character at all:
# * https://foundation.app/@mochiiimo
# * https://foundation.app/@ <- This seems to be a novelty account.
# Probably not worth supporting it given its
# uniqueness and chance for headaches
# @see Source::URL::Foundation
module Sources
module Strategies
class Foundation < Base
BASE_URL = %r{\Ahttps?://(www\.)?foundation\.app}i
PROFILE_URL = %r{#{BASE_URL}/@(?<artist_name>[^/]+)/?}i
PAGE_URL = %r{#{PROFILE_URL}/[^/]+/(?<illust_id>\d+)}i
IMAGE_HOST = /f8n-ipfs-production\.imgix\.net/
IMAGE_URL = %r{\Ahttps?://#{IMAGE_HOST}/\w+/nft.\w+}i
def domains
["foundation.app"]
end
extend Memoist
def match?
return false if parsed_url.nil?
parsed_url.domain.in?(domains) || parsed_url.host =~ IMAGE_HOST
parsed_url&.site_name == "Foundation"
end
def site_name
@@ -45,14 +15,13 @@ module Sources
end
def image_urls
return [url.gsub(/\?.*/, "")] if url =~ IMAGE_URL
image = page&.at(".fullscreen img, .fullscreen video")&.[](:src)&.gsub(/\?.*/, "")
if image =~ %r{assets\.foundation\.app/(?:\w+/)+(\w+)/nft_\w+\.(\w+)}i
image = "https://f8n-ipfs-production.imgix.net/#{$1}/nft.#{$2}"
if parsed_url.full_image_url.present?
[parsed_url.full_image_url]
elsif image_url = page&.at(".fullscreen img, .fullscreen video")&.attr(:src)
[Source::URL.parse(image_url).full_image_url].compact
else
[]
end
[image].compact
end
def preview_urls
@@ -62,7 +31,7 @@ module Sources
end
def page_url
urls.select { |url| url[PAGE_URL]}.compact.first
parsed_url.page_url || parsed_referer&.page_url
end
def page
@@ -75,15 +44,15 @@ module Sources
end
def tags
tags = page&.search("a[href^='/tags/']").to_a
tags = api_response.dig("props", "pageProps", "artwork", "tags").to_a
tags.map do |tag|
[tag.text, URI.join(page_url, tag.attr("href")).to_s]
[tag, "https://foundation.app/tags/#{tag}"]
end
end
def artist_name
urls.map { |u| u[PROFILE_URL, :artist_name] }.compact.first
parsed_url.username || parsed_referer&.username || api_response.dig("props", "pageProps", "artwork", "creator", "username")
end
def profile_url
@@ -91,6 +60,19 @@ module Sources
"https://foundation.app/@#{artist_name}"
end
def profile_urls
[profile_url, creator_public_key_url].compact
end
def creator_public_key_url
return nil if creator_public_key.nil?
"https://foundation.app/#{creator_public_key}"
end
def creator_public_key
api_response.dig("props", "pageProps", "artwork", "creator", "publicKey")
end
def artist_commentary_title
return nil if page.blank?
page.at("meta[property='og:title']")["content"].gsub(/ \| Foundation$/, "")
@@ -109,6 +91,17 @@ module Sources
def normalize_for_source
page_url
end
def api_response
return {} if page.nil?
data = page.at("#__NEXT_DATA__")&.text
return {} if data.blank?
JSON.parse(data).with_indifferent_access
end
memoize :api_response
end
end
end

View File

@@ -24,7 +24,10 @@ module Sources
should "get profile url" do
assert_equal("https://foundation.app/@dadachyo", @image1.profile_url)
assert_equal(["https://foundation.app/@dadachyo", "https://foundation.app/0xb4D9073800c7935351ACDC1e46F0CF670853eA99"], @image1.profile_urls)
assert_equal("https://foundation.app/@huwari", @image3.profile_url)
assert_equal(["https://foundation.app/@huwari", "https://foundation.app/0xaa2f2eDE4D502F59b3706d2E2dA873C8A00A3d4d"], @image3.profile_urls)
end
should "get the image url" do
@@ -50,6 +53,31 @@ module Sources
end
end
context "for a foundation.app/@username/foo-bar-1234 URL" do
should "work" do
page_url = "https://foundation.app/@asuka111art/dinner-with-cats-82426"
image_url = "https://f8n-ipfs-production.imgix.net/Qma7Lz2LfFb4swoqzr1V43oRGh9xikgigM11g3EukdU61R/nft.png"
source = Sources::Strategies.find(page_url)
assert_equal("asuka111art", source.artist_name)
assert_equal(["https://foundation.app/@asuka111art", "https://foundation.app/0x9A94f94626352566e0A9105F1e3DA0439E3e3783"], source.profile_urls)
assert_equal([image_url], source.image_urls)
assert_equal(%w[2d anime illustration digital fantasy], source.tags.map(&:first))
end
end
context "for a f8n-production-collection-assets.imgix.net URL" do
should "work" do
image_url = "https://f8n-production-collection-assets.imgix.net/0x3B3ee1931Dc30C1957379FAc9aba94D1C48a5405/128711/QmcBfbeCMSxqYB3L1owPAxFencFx3jLzCPFx6xUBxgSCkH/nft.png?q=80&auto=format%2Ccompress&cs=srgb&h=640"
source = Sources::Strategies.find(image_url)
assert_equal("mochiiimo", source.artist_name)
assert_equal(["https://foundation.app/@mochiiimo", "https://foundation.app/0x7E2ef75C0C09b2fc6BCd1C68B6D409720CcD58d2"], source.profile_urls)
assert_equal(["https://f8n-ipfs-production.imgix.net/QmcBfbeCMSxqYB3L1owPAxFencFx3jLzCPFx6xUBxgSCkH/nft.png"], source.image_urls)
assert_equal(%w[anime landscape girl cat 2d illustration matcrewnft], source.tags.map(&:first))
end
end
context "for a collection" do
should "get the image urls" do
assert_equal(