Danbooru::URL: add #basename, #filename, and #file_ext utility methods.

Add `#basename`, `#filename`, and `#file_ext` utility methods to
Danbooru::URL and change a few places to use them. Simplifies parsing
filenames in source URLs in various places.
This commit is contained in:
evazion
2022-02-27 00:56:23 -06:00
parent fcf517834d
commit 926a8fa81f
13 changed files with 93 additions and 65 deletions

View File

@@ -56,6 +56,7 @@ gem "ffi"
gem "rbtrace"
gem "good_job"
gem "crass"
gem "public_suffix"
group :development do
gem 'rubocop', require: false

View File

@@ -587,6 +587,7 @@ DEPENDENCIES
pg
pry-byebug
pry-rails
public_suffix
puma
puma-metrics
puma_worker_killer

View File

@@ -1,5 +1,29 @@
# frozen_string_literal: true
# A utility class representing a HTTP URL. A wrapper around Addressable::URI that adds
# extra utility methods. Anything dealing with URLs inside Danbooru should use this class
# instead of using `Addressable::URI` or the Ruby `URI` class directly,
#
# Source::URL is a subclass that adds further methods for parsing URLs from source sites,
# such as Twitter, Pixiv, etc.
#
# @example
# url = Danbooru::URL.parse("https://cdn.donmai.us/original/d3/4e/d34e4cf0a437a5d65f8e82b7bcd02606.jpg")
# url.path # => "/original/d3/4e/d34e4cf0a437a5d65f8e82b7bcd02606.jpg"
# url.path_segments # => ["original", "d3", "43", "d34e4cf0a437a5d65f8e82b7bcd02606.jpg"]
# url.basename # => "d34e4cf0a437a5d65f8e82b7bcd02606.jpg"
# url.filename # => "d34e4cf0a437a5d65f8e82b7bcd02606"
# url.file_ext # => "jpg"
# url.host # => "cdn.donmai.us"
# url.domain # => "donmai.us"
# url.subdomain # => "cdn"
# url.site # => "https://cdn.donmai.us"
#
# url = Danbooru::URL.parse("https://danbooru.donmai.us/posts?tags=touhou")
# url.params # => { tags: "touhou" }
# url.query # => "tags=touhou"
#
# @see Source::URL
module Danbooru
class URL
class Error < StandardError; end
@@ -12,7 +36,7 @@ module Danbooru
delegate :domain, :host, :site, :path, :query, to: :url
# Parse a string into a URL, or raise an exception if the string is not a valid HTTPS or HTTPS URL.
# Parse a string into a URL, or raise an exception if the string is not a valid HTTP or HTTPS URL.
#
# @param url [String, Danbooru::URL]
def initialize(url)
@@ -25,7 +49,7 @@ module Danbooru
raise Error, e
end
# Parse a string into a URL, or raise an exception if the string is not a valid HTTPS or HTTPS URL.
# Parse a string into a URL, or raise an exception if the string is not a valid HTTP or HTTPS URL.
#
# @param url [String, Danbooru::URL]
# @return [Danbooru::URL]
@@ -63,7 +87,22 @@ module Danbooru
url.query_values.to_h.with_indifferent_access
end
# Return the subdomain of the URL, or nil if absent. For example, for "http://senpenbankashiki.hp.infoseek.co.jp", the
# @return [String, nil] The name of the file with the file extension, or nil if not present.
def basename
path_segments.last
end
#
# @return [String, nil] The name of the file without the file extension, or nil if not present.
def filename
basename&.slice(/^(.*)\./, 1)
end
# @return [String, nil] The file extension (without the dot), or nil if not present.
def file_ext
basename&.slice(/\.([[:alnum:]]+)$/, 1)
end
# The subdomain of the URL, or nil if absent. For example, for "http://senpenbankashiki.hp.infoseek.co.jp", the
# subdomain is "senpenbankashiki.hp", the domain is "infoseek.co.jp", the SLD is "infoseek", and the TLD is "co.jp".
#
# @return [String, nil]

View File

@@ -46,10 +46,10 @@ class Source::URL::ArtStation < Source::URL
# https://cdnb.artstation.com/p/assets/images/images/014/410/217/smaller_square/bart-osz-bartosz1812041.jpg?1543866276
# https://cdna.artstation.com/p/assets/images/images/007/253/680/4k/ina-wong-demon-girl-done-ttd-comp.jpg?1504793833
# https://cdna.artstation.com/p/assets/covers/images/007/262/828/small/monica-kyrie-1.jpg?1504865060
in _, "p", "assets", ("images" | "covers") => asset_type, "images", *subdirs, size, filename
in _, "p", "assets", ("images" | "covers") => asset_type, "images", *subdirs, size, file
@asset_type = asset_type
@asset_subdir = subdirs.join("/")
@filename = filename
@file = file
@timestamp = query if query&.match?(/^\d+$/)
# https://www.artstation.com/artwork/04XA4
@@ -78,16 +78,16 @@ class Source::URL::ArtStation < Source::URL
end
def image_url?
@filename.present?
@file.present?
end
def full_image_url(size = "original")
return nil unless image_url?
if @timestamp.present?
"https://cdn.artstation.com/p/assets/#{@asset_type}/images/#{@asset_subdir}/#{size}/#{@filename}?#{@timestamp}"
"https://cdn.artstation.com/p/assets/#{@asset_type}/images/#{@asset_subdir}/#{size}/#{@file}?#{@timestamp}"
else
"https://cdn.artstation.com/p/assets/#{@asset_type}/images/#{@asset_subdir}/#{size}/#{@filename}"
"https://cdn.artstation.com/p/assets/#{@asset_type}/images/#{@asset_subdir}/#{size}/#{@file}"
end
end
end

View File

@@ -55,7 +55,7 @@
# * https://foundation.app/mochiiimo
#
class Source::URL::Foundation < Source::URL
attr_reader :username, :token_id, :work_id, :hash, :file_ext
attr_reader :username, :token_id, :work_id, :hash
def self.match?(url)
url.host.in?(%w[foundation.app assets.foundation.app f8n-ipfs-production.imgix.net f8n-production-collection-assets.imgix.net])
@@ -88,27 +88,23 @@ class Source::URL::Foundation < Source::URL
# https://f8n-ipfs-production.imgix.net/QmX4MotNAAj9Rcyew43KdgGDxU1QtXemMHoUTNacMLLSjQ/nft.png
# https://f8n-ipfs-production.imgix.net/QmX4MotNAAj9Rcyew43KdgGDxU1QtXemMHoUTNacMLLSjQ/nft.png?q=80&auto=format%2Ccompress&cs=srgb&max-w=1680&max-h=1680
in "f8n-ipfs-production.imgix.net", hash, filename
in "f8n-ipfs-production.imgix.net", hash, file
@hash = hash
@filename, @file_ext = filename.split(".")
# https://f8n-production-collection-assets.imgix.net/0x3B3ee1931Dc30C1957379FAc9aba94D1C48a5405/128711/QmcBfbeCMSxqYB3L1owPAxFencFx3jLzCPFx6xUBxgSCkH/nft.png
in "f8n-production-collection-assets.imgix.net", token_id, work_id, hash, filename
in "f8n-production-collection-assets.imgix.net", token_id, work_id, hash, file
@token_id = token_id
@work_id = work_id
@hash = hash
@filename, @file_ext = filename.split(".")
# https://f8n-production-collection-assets.imgix.net/0xFb0a8e1bB97fD7231Cd73c489dA4732Ae87995F0/4/nft.png
in "f8n-production-collection-assets.imgix.net", token_id, work_id, filename
in "f8n-production-collection-assets.imgix.net", token_id, work_id, file
@token_id = token_id
@work_id = work_id
@filename, @file_ext = filename.split(".")
# https://assets.foundation.app/7i/gs/QmU8bbsjaVQpEKMDWbSZdDD6GsPmRYBhQtYRn8bEGv7igs/nft_q4.mp4
in "assets.foundation.app", *subdirs, hash, filename
in "assets.foundation.app", *subdirs, hash, file
@hash = hash
@filename, @file_ext = filename.split(".")
else
end

View File

@@ -48,14 +48,14 @@ class Source::URL::HentaiFoundry < Source::URL
@work_id = work_id
# http://pictures.hentai-foundry.com//s/soranamae/363663.jpg
in "pictures.hentai-foundry.com", _, username, /^\d+\.\w+$/ => filename
in "pictures.hentai-foundry.com", _, username, /^(\d+)\.\w+$/
@username = username
@work_id, @file_ext = filename.split(".")
@work_id = $1
# http://www.hentai-foundry.com/piccies/d/dmitrys/1183.jpg
in "www.hentai-foundry.com", "piccies", _, username, /^\d+\.\w+$/ => filename
in "www.hentai-foundry.com", "piccies", _, username, /^(\d+)\.\w+$/
@username = username
@work_id, @file_ext = filename.split(".")
@work_id = $1
# https://www.hentai-foundry.com/pictures/user/Afrobull/795025
# https://www.hentai-foundry.com/pictures/user/Afrobull/795025/kuroeda

View File

@@ -40,8 +40,8 @@ class Source::URL::Lofter < Source::URL
# https://imglf3.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJSzFCWFlnUWgzb01DcUdpT1lreG5yQjJVMkhGS09HNGR3PT0.png?imageView&thumbnail=1680x0&quality=96&stripmeta=0
# https://imglf3.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJSzFCWFlnUWgzb01DcUdpT1lreG5yQjJVMkhGS09HNGR3PT0.png
# http://imglf0.nosdn.127.net/img/cHl3bXNZdDRaaHBnNWJuN1Y4OXBqR01CeVBZSVNmU2FWZWtHc1h4ZTZiUGxlRzMwZnFDM1JnPT0.jpg (404)
in /127\.net$/, "img", filename
@filename = filename
in /127\.net$/, "img", _
nil
# https://gengar563.lofter.com/post/1e82da8c_1c98dae1b
in /^([a-z0-9-]+)\.lofter\.com$/, "post", work_id unless host.in?(RESERVED_SUBDOMAINS)

View File

@@ -56,17 +56,17 @@ class Source::URL::Mastodon < Source::URL
# Page: https://pawoo.net/@evazion/19451018
# https://img.pawoo.net/media_attachments/files/001/297/997/small/c4272a09570757c2.png
# https://img.pawoo.net/media_attachments/files/001/297/997/original/c4272a09570757c2.png
in "img.pawoo.net", "media_attachments", "files", *subdirs, file_size, filename
in "img.pawoo.net", "media_attachments", "files", *subdirs, file_size, file
@file_size = file_size
@full_image_url = "#{site}/media_attachments/files/#{subdirs.join("/")}/original/#{filename}"
@full_image_url = "#{site}/media_attachments/files/#{subdirs.join("/")}/original/#{file}"
# Page: https://baraag.net/@danbooru/107866090743238456
# https://baraag.net/system/media_attachments/files/107/866/084/749/942/932/original/a9e0f553e332f303.mp4
# https://baraag.net/system/media_attachments/files/107/866/084/754/127/256/original/3895a14ce3736f13.mp4
# https://baraag.net/system/media_attachments/files/107/866/084/754/651/925/original/8f3df857681a1639.png
in "baraag.net", "system", "media_attachments", "files", *subdirs, file_size, filename
in "baraag.net", "system", "media_attachments", "files", *subdirs, file_size, file
@file_size = file_size
@full_image_url = "#{site}/system/media_attachments/files/#{subdirs.join("/")}/original/#{filename}"
@full_image_url = "#{site}/system/media_attachments/files/#{subdirs.join("/")}/original/#{file}"
# https://pawoo.net/media/lU2uV7C1MMQSb1czwvg
in "pawoo.net", "media", media_hash

View File

@@ -32,7 +32,7 @@
# * https://konachan.com/post/show/270803/banishment-bicycle-grass-group-male-night-original
class Source::URL::Moebooru < Source::URL
attr_reader :work_id, :md5, :sample_type, :original_file_ext
attr_reader :work_id, :md5, :original_file_ext
def self.match?(url)
url.domain.in?(%w[yande.re konachan.com])
@@ -65,38 +65,33 @@ class Source::URL::Moebooru < Source::URL
# https://konachan.com/image/5d633771614e4bf5c17df19a0f0f333f/Konachan.com%20-%20270807%20black_hair%20bokuden%20clouds%20grass%20landscape%20long_hair%20original%20phone%20rope%20scenic%20seifuku%20skirt%20sky%20summer%20torii%20tree.jpg
#
# https://files.yande.re/image/e4c2ba38de88ff1640aaebff84c84e81/469784.jpg
in _, ("sample" | "jpeg" | "image") => sample_type, /^\h{32}$/ => md5, filename
in _, ("sample" | "jpeg" | "image") => sample_type, /^\h{32}$/ => md5, file
@md5 = md5
@work_id = parse_filename(filename)
case sample_type
when "image"
@original_file_ext = File.extname(filename).delete_prefix(".")
when "jpeg"
@original_file_ext = "png"
end
@work_id = work_id_from_filename
@original_file_ext = file_ext_for(sample_type)
# https://yande.re/jpeg/22577d2344fe694cf47f80563031b3cd.jpg
# https://files.yande.re/image/22577d2344fe694cf47f80563031b3cd.png
# https://files.yande.re/sample/fb27a7ea6c48b2ef76fe915e378b9098.jpg
in _, ("sample" | "jpeg" | "image") => sample_type, /^(\h{32})\.\w+$/ => filename
in _, ("sample" | "jpeg" | "image") => sample_type, /^(\h{32})\.\w+$/
@md5 = $1
case sample_type
when "image"
@original_file_ext = File.extname(filename).delete_prefix(".")
when "jpeg"
@original_file_ext = "png"
end
@original_file_ext = file_ext_for(sample_type)
else
end
end
def parse_filename(filename)
basename = File.basename(filename, ".*")
def file_ext_for(sample_type)
case sample_type
when "image"
file_ext
when "jpeg"
"png"
end
end
case CGI.unescape(basename).split
def work_id_from_filename
case CGI.unescape(filename).split
# yande.re 290757 sample seifuku thighhighs tsukudani_norio
# yande.re 290757
in "yande.re", /^\d+$/ => work_id, *rest

View File

@@ -62,20 +62,18 @@ class Source::URL::Newgrounds < Source::URL
# https://art.ngfiles.com/images/1254000/1254722_natthelich_pandora.jpg
# https://art.ngfiles.com/images/1033000/1033622_natthelich_fire-emblem-marth-plus-progress-pic.png?f1569487181
in "art.ngfiles.com", "images", _, /^(\d+)_([^_]+)_(.*)\.\w+$/ => filename
in "art.ngfiles.com", "images", _, /^(\d+)_([^_]+)_(.*)\.\w+$/
@work_id = $1
@username = $2
@work_title = $3
@filename = filename
# https://art.ngfiles.com/thumbnails/1254000/1254985.png?f1588263349
in "art.ngfiles.com", "thumbnails", _, /^(\d+)\.\w+$/ => filename
in "art.ngfiles.com", "thumbnails", _, /^(\d+)\.\w+$/
@work_id = $1
@filename = filename
# https://art.ngfiles.com/comments/57000/iu_57615_7115981.jpg
in "art.ngfiles.com", "comments", _, /^iu/ => filename
@filename = filename
in "art.ngfiles.com", "comments", _, /^iu/
nil
# https://natthelich.newgrounds.com
# https://natthelich.newgrounds.com/art/

View File

@@ -40,7 +40,7 @@ class Source::URL::Plurk < Source::URL
# https://images.plurk.com/5wj6WD0r6y4rLN0DL3sqag.jpg
# https://images.plurk.com/mx_5wj6WD0r6y4rLN0DL3sqag.jpg
in "plurk.com", /^(mx_)?(\w{22})\.(\w+)$/
@filename, @file_ext = $2, $3
@image_id = $2
# https://www.plurk.com/p/om6zv4
in "plurk.com", "p", work_id

View File

@@ -40,9 +40,8 @@ class Source::URL::TwitPic < Source::URL
@base36_id = base36_id
# https://twitpic.com/show/large/carwkf.jpg
in "twitpic.com", "show", size, filename
@filename, @file_ext = filename.split(".")
@base36_id = @filename
in "twitpic.com", "show", size, _
@base36_id = filename
# https://o.twimg.com/1/proxy.jpg?t=FQQVBBgpaHR0cHM6Ly90d2l0cGljLmNvbS9zaG93L2xhcmdlL2NhcndrZi5qcGcUBBYAEgA&s=y8haxddqxJYpWql9uVnP3aoFFS7rA10vOGPdTO5HXvk
# https://o.twimg.com/2/proxy.jpg?t=HBgpaHR0cHM6Ly90d2l0cGljLmNvbS9zaG93L2xhcmdlL2R0bnVydS5qcGcUsAkU0ggAFgASAA&s=dnN4DHCdnojC-iCJWdvZ-UZinrlWqAP7k7lmll2fTxs
@@ -61,9 +60,8 @@ class Source::URL::TwitPic < Source::URL
# http://d3j5vwomefv46c.cloudfront.net/photos/large/820960031.jpg?1384107199
# https://dn3pm25xmtlyu.cloudfront.net/photos/large/839006715.jpg?Expires=1646850828&Signature=d60CmLlmNqZJvOTteTOan13QWZ8gY3C4rUWCkh-IUoRr012vYtUYtip74GslGwCG0dxV5mpUpVFkaVZf16PiY7CsTdpAlA8Pmu2tN98D2dmC5FuW9KhhygDv6eFC8faoaGEyj~ArLuwz-8lC6Y05TVf0FgweeWwsRxFOfD5JHgCeIB0iZqzUx1t~eb6UMAWvbaKpfgvcp2oaDuCdZlMNi9T5OUBFoTh2DfnGy8t5COys1nOYYfZ9l69TDvVb2PKBaV8lsKK9xMwjoJNaWa1HL5S4MgODS5hiNDvycoBpu9KUvQ7q~rhC8cV6ZNctB5H9u~MmvBPoTKfy4w37cSc5uw__&Key-Pair-Id=APKAJROXZ7FN26MABHYA
in /cloudfront\.net/, "photos", size, filename
@filename, @file_ext = filename.split(".")
@base36_id = @filename.to_i.to_s(36)
in /cloudfront\.net/, "photos", size, _
@base36_id = filename.to_i.to_s(36)
else
end

View File

@@ -87,10 +87,10 @@ class Source::URL::Twitter < Source::URL
# https://pbs.twimg.com/tweet_video_thumb/ETkN_L3X0AMy1aT.jpg
# https://pbs.twimg.com/ext_tw_video_thumb/1243725361986375680/pu/img/JDA7g7lcw7wK-PIv.jpg
# https://pbs.twimg.com/amplify_video_thumb/1215590775364259840/img/lolCkEEioFZTb5dl.jpg
in "twimg.com", ("media" | "tweet_video_thumb" | "ext_tw_video_thumb" | "amplify_video_thumb") => media_type, *subdirs, filename
in "twimg.com", ("media" | "tweet_video_thumb" | "ext_tw_video_thumb" | "amplify_video_thumb") => media_type, *subdirs, file
# EBGbJe_U8AA4Ekb.jpg:small
@filename, @file_size = filename.split(":")
@filename, @file_ext = @filename.split(".")
@file, @file_size = file.split(":")
@file, @file_ext = @file.split(".")
# EBGbJe_U8AA4Ekb?format=jpg&name=900x900
@file_size = params[:name] if params[:name].present?
@@ -98,7 +98,7 @@ class Source::URL::Twitter < Source::URL
# /media/EBGbJe_U8AA4Ekb.jpg
# /ext_tw_video_thumb/1243725361986375680/pu/img/JDA7g7lcw7wK-PIv.jpg
@file_path = File.join(media_type, subdirs.join("/"), "#{@filename}.#{@file_ext}")
@file_path = File.join(media_type, subdirs.join("/"), "#{@file}.#{@file_ext}")
else
end
end