From 926a8fa81fc766263aada1ec92b2a19364013b42 Mon Sep 17 00:00:00 2001 From: evazion Date: Sun, 27 Feb 2022 00:56:23 -0600 Subject: [PATCH] Danbooru::URL: add `#basename`, `#filename`, and `#file_ext` utility methods. Add `#basename`, `#filename`, and `#file_ext` utility methods to Danbooru::URL and change a few places to use them. Simplifies parsing filenames in source URLs in various places. --- Gemfile | 1 + Gemfile.lock | 1 + app/logical/danbooru/url.rb | 45 ++++++++++++++++++++++-- app/logical/source/url/art_station.rb | 10 +++--- app/logical/source/url/foundation.rb | 14 +++----- app/logical/source/url/hentai_foundry.rb | 8 ++--- app/logical/source/url/lofter.rb | 4 +-- app/logical/source/url/mastodon.rb | 8 ++--- app/logical/source/url/moebooru.rb | 37 +++++++++---------- app/logical/source/url/newgrounds.rb | 10 +++--- app/logical/source/url/plurk.rb | 2 +- app/logical/source/url/twit_pic.rb | 10 +++--- app/logical/source/url/twitter.rb | 8 ++--- 13 files changed, 93 insertions(+), 65 deletions(-) diff --git a/Gemfile b/Gemfile index 07fd358e1..d5950755e 100644 --- a/Gemfile +++ b/Gemfile @@ -56,6 +56,7 @@ gem "ffi" gem "rbtrace" gem "good_job" gem "crass" +gem "public_suffix" group :development do gem 'rubocop', require: false diff --git a/Gemfile.lock b/Gemfile.lock index 5362fe340..335fad594 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -587,6 +587,7 @@ DEPENDENCIES pg pry-byebug pry-rails + public_suffix puma puma-metrics puma_worker_killer diff --git a/app/logical/danbooru/url.rb b/app/logical/danbooru/url.rb index 7777890fe..5a0641b09 100644 --- a/app/logical/danbooru/url.rb +++ b/app/logical/danbooru/url.rb @@ -1,5 +1,29 @@ # frozen_string_literal: true +# A utility class representing a HTTP URL. A wrapper around Addressable::URI that adds +# extra utility methods. Anything dealing with URLs inside Danbooru should use this class +# instead of using `Addressable::URI` or the Ruby `URI` class directly, +# +# Source::URL is a subclass that adds further methods for parsing URLs from source sites, +# such as Twitter, Pixiv, etc. +# +# @example +# url = Danbooru::URL.parse("https://cdn.donmai.us/original/d3/4e/d34e4cf0a437a5d65f8e82b7bcd02606.jpg") +# url.path # => "/original/d3/4e/d34e4cf0a437a5d65f8e82b7bcd02606.jpg" +# url.path_segments # => ["original", "d3", "43", "d34e4cf0a437a5d65f8e82b7bcd02606.jpg"] +# url.basename # => "d34e4cf0a437a5d65f8e82b7bcd02606.jpg" +# url.filename # => "d34e4cf0a437a5d65f8e82b7bcd02606" +# url.file_ext # => "jpg" +# url.host # => "cdn.donmai.us" +# url.domain # => "donmai.us" +# url.subdomain # => "cdn" +# url.site # => "https://cdn.donmai.us" +# +# url = Danbooru::URL.parse("https://danbooru.donmai.us/posts?tags=touhou") +# url.params # => { tags: "touhou" } +# url.query # => "tags=touhou" +# +# @see Source::URL module Danbooru class URL class Error < StandardError; end @@ -12,7 +36,7 @@ module Danbooru delegate :domain, :host, :site, :path, :query, to: :url - # Parse a string into a URL, or raise an exception if the string is not a valid HTTPS or HTTPS URL. + # Parse a string into a URL, or raise an exception if the string is not a valid HTTP or HTTPS URL. # # @param url [String, Danbooru::URL] def initialize(url) @@ -25,7 +49,7 @@ module Danbooru raise Error, e end - # Parse a string into a URL, or raise an exception if the string is not a valid HTTPS or HTTPS URL. + # Parse a string into a URL, or raise an exception if the string is not a valid HTTP or HTTPS URL. # # @param url [String, Danbooru::URL] # @return [Danbooru::URL] @@ -63,7 +87,22 @@ module Danbooru url.query_values.to_h.with_indifferent_access end - # Return the subdomain of the URL, or nil if absent. For example, for "http://senpenbankashiki.hp.infoseek.co.jp", the + # @return [String, nil] The name of the file with the file extension, or nil if not present. + def basename + path_segments.last + end + # + # @return [String, nil] The name of the file without the file extension, or nil if not present. + def filename + basename&.slice(/^(.*)\./, 1) + end + + # @return [String, nil] The file extension (without the dot), or nil if not present. + def file_ext + basename&.slice(/\.([[:alnum:]]+)$/, 1) + end + + # The subdomain of the URL, or nil if absent. For example, for "http://senpenbankashiki.hp.infoseek.co.jp", the # subdomain is "senpenbankashiki.hp", the domain is "infoseek.co.jp", the SLD is "infoseek", and the TLD is "co.jp". # # @return [String, nil] diff --git a/app/logical/source/url/art_station.rb b/app/logical/source/url/art_station.rb index 3a7481d0d..f3b0b5c9b 100644 --- a/app/logical/source/url/art_station.rb +++ b/app/logical/source/url/art_station.rb @@ -46,10 +46,10 @@ class Source::URL::ArtStation < Source::URL # https://cdnb.artstation.com/p/assets/images/images/014/410/217/smaller_square/bart-osz-bartosz1812041.jpg?1543866276 # https://cdna.artstation.com/p/assets/images/images/007/253/680/4k/ina-wong-demon-girl-done-ttd-comp.jpg?1504793833 # https://cdna.artstation.com/p/assets/covers/images/007/262/828/small/monica-kyrie-1.jpg?1504865060 - in _, "p", "assets", ("images" | "covers") => asset_type, "images", *subdirs, size, filename + in _, "p", "assets", ("images" | "covers") => asset_type, "images", *subdirs, size, file @asset_type = asset_type @asset_subdir = subdirs.join("/") - @filename = filename + @file = file @timestamp = query if query&.match?(/^\d+$/) # https://www.artstation.com/artwork/04XA4 @@ -78,16 +78,16 @@ class Source::URL::ArtStation < Source::URL end def image_url? - @filename.present? + @file.present? end def full_image_url(size = "original") return nil unless image_url? if @timestamp.present? - "https://cdn.artstation.com/p/assets/#{@asset_type}/images/#{@asset_subdir}/#{size}/#{@filename}?#{@timestamp}" + "https://cdn.artstation.com/p/assets/#{@asset_type}/images/#{@asset_subdir}/#{size}/#{@file}?#{@timestamp}" else - "https://cdn.artstation.com/p/assets/#{@asset_type}/images/#{@asset_subdir}/#{size}/#{@filename}" + "https://cdn.artstation.com/p/assets/#{@asset_type}/images/#{@asset_subdir}/#{size}/#{@file}" end end end diff --git a/app/logical/source/url/foundation.rb b/app/logical/source/url/foundation.rb index ad061f3cd..791817847 100644 --- a/app/logical/source/url/foundation.rb +++ b/app/logical/source/url/foundation.rb @@ -55,7 +55,7 @@ # * https://foundation.app/mochiiimo # class Source::URL::Foundation < Source::URL - attr_reader :username, :token_id, :work_id, :hash, :file_ext + attr_reader :username, :token_id, :work_id, :hash def self.match?(url) url.host.in?(%w[foundation.app assets.foundation.app f8n-ipfs-production.imgix.net f8n-production-collection-assets.imgix.net]) @@ -88,27 +88,23 @@ class Source::URL::Foundation < Source::URL # https://f8n-ipfs-production.imgix.net/QmX4MotNAAj9Rcyew43KdgGDxU1QtXemMHoUTNacMLLSjQ/nft.png # https://f8n-ipfs-production.imgix.net/QmX4MotNAAj9Rcyew43KdgGDxU1QtXemMHoUTNacMLLSjQ/nft.png?q=80&auto=format%2Ccompress&cs=srgb&max-w=1680&max-h=1680 - in "f8n-ipfs-production.imgix.net", hash, filename + in "f8n-ipfs-production.imgix.net", hash, file @hash = hash - @filename, @file_ext = filename.split(".") # https://f8n-production-collection-assets.imgix.net/0x3B3ee1931Dc30C1957379FAc9aba94D1C48a5405/128711/QmcBfbeCMSxqYB3L1owPAxFencFx3jLzCPFx6xUBxgSCkH/nft.png - in "f8n-production-collection-assets.imgix.net", token_id, work_id, hash, filename + in "f8n-production-collection-assets.imgix.net", token_id, work_id, hash, file @token_id = token_id @work_id = work_id @hash = hash - @filename, @file_ext = filename.split(".") # https://f8n-production-collection-assets.imgix.net/0xFb0a8e1bB97fD7231Cd73c489dA4732Ae87995F0/4/nft.png - in "f8n-production-collection-assets.imgix.net", token_id, work_id, filename + in "f8n-production-collection-assets.imgix.net", token_id, work_id, file @token_id = token_id @work_id = work_id - @filename, @file_ext = filename.split(".") # https://assets.foundation.app/7i/gs/QmU8bbsjaVQpEKMDWbSZdDD6GsPmRYBhQtYRn8bEGv7igs/nft_q4.mp4 - in "assets.foundation.app", *subdirs, hash, filename + in "assets.foundation.app", *subdirs, hash, file @hash = hash - @filename, @file_ext = filename.split(".") else end diff --git a/app/logical/source/url/hentai_foundry.rb b/app/logical/source/url/hentai_foundry.rb index a10a2946c..2a6a384f9 100644 --- a/app/logical/source/url/hentai_foundry.rb +++ b/app/logical/source/url/hentai_foundry.rb @@ -48,14 +48,14 @@ class Source::URL::HentaiFoundry < Source::URL @work_id = work_id # http://pictures.hentai-foundry.com//s/soranamae/363663.jpg - in "pictures.hentai-foundry.com", _, username, /^\d+\.\w+$/ => filename + in "pictures.hentai-foundry.com", _, username, /^(\d+)\.\w+$/ @username = username - @work_id, @file_ext = filename.split(".") + @work_id = $1 # http://www.hentai-foundry.com/piccies/d/dmitrys/1183.jpg - in "www.hentai-foundry.com", "piccies", _, username, /^\d+\.\w+$/ => filename + in "www.hentai-foundry.com", "piccies", _, username, /^(\d+)\.\w+$/ @username = username - @work_id, @file_ext = filename.split(".") + @work_id = $1 # https://www.hentai-foundry.com/pictures/user/Afrobull/795025 # https://www.hentai-foundry.com/pictures/user/Afrobull/795025/kuroeda diff --git a/app/logical/source/url/lofter.rb b/app/logical/source/url/lofter.rb index 46a494917..ad44b0bc5 100644 --- a/app/logical/source/url/lofter.rb +++ b/app/logical/source/url/lofter.rb @@ -40,8 +40,8 @@ class Source::URL::Lofter < Source::URL # https://imglf3.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJSzFCWFlnUWgzb01DcUdpT1lreG5yQjJVMkhGS09HNGR3PT0.png?imageView&thumbnail=1680x0&quality=96&stripmeta=0 # https://imglf3.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJSzFCWFlnUWgzb01DcUdpT1lreG5yQjJVMkhGS09HNGR3PT0.png # http://imglf0.nosdn.127.net/img/cHl3bXNZdDRaaHBnNWJuN1Y4OXBqR01CeVBZSVNmU2FWZWtHc1h4ZTZiUGxlRzMwZnFDM1JnPT0.jpg (404) - in /127\.net$/, "img", filename - @filename = filename + in /127\.net$/, "img", _ + nil # https://gengar563.lofter.com/post/1e82da8c_1c98dae1b in /^([a-z0-9-]+)\.lofter\.com$/, "post", work_id unless host.in?(RESERVED_SUBDOMAINS) diff --git a/app/logical/source/url/mastodon.rb b/app/logical/source/url/mastodon.rb index a8ccf64d6..a979068c2 100644 --- a/app/logical/source/url/mastodon.rb +++ b/app/logical/source/url/mastodon.rb @@ -56,17 +56,17 @@ class Source::URL::Mastodon < Source::URL # Page: https://pawoo.net/@evazion/19451018 # https://img.pawoo.net/media_attachments/files/001/297/997/small/c4272a09570757c2.png # https://img.pawoo.net/media_attachments/files/001/297/997/original/c4272a09570757c2.png - in "img.pawoo.net", "media_attachments", "files", *subdirs, file_size, filename + in "img.pawoo.net", "media_attachments", "files", *subdirs, file_size, file @file_size = file_size - @full_image_url = "#{site}/media_attachments/files/#{subdirs.join("/")}/original/#{filename}" + @full_image_url = "#{site}/media_attachments/files/#{subdirs.join("/")}/original/#{file}" # Page: https://baraag.net/@danbooru/107866090743238456 # https://baraag.net/system/media_attachments/files/107/866/084/749/942/932/original/a9e0f553e332f303.mp4 # https://baraag.net/system/media_attachments/files/107/866/084/754/127/256/original/3895a14ce3736f13.mp4 # https://baraag.net/system/media_attachments/files/107/866/084/754/651/925/original/8f3df857681a1639.png - in "baraag.net", "system", "media_attachments", "files", *subdirs, file_size, filename + in "baraag.net", "system", "media_attachments", "files", *subdirs, file_size, file @file_size = file_size - @full_image_url = "#{site}/system/media_attachments/files/#{subdirs.join("/")}/original/#{filename}" + @full_image_url = "#{site}/system/media_attachments/files/#{subdirs.join("/")}/original/#{file}" # https://pawoo.net/media/lU2uV7C1MMQSb1czwvg in "pawoo.net", "media", media_hash diff --git a/app/logical/source/url/moebooru.rb b/app/logical/source/url/moebooru.rb index 5b75a07ba..67ca55f3a 100644 --- a/app/logical/source/url/moebooru.rb +++ b/app/logical/source/url/moebooru.rb @@ -32,7 +32,7 @@ # * https://konachan.com/post/show/270803/banishment-bicycle-grass-group-male-night-original class Source::URL::Moebooru < Source::URL - attr_reader :work_id, :md5, :sample_type, :original_file_ext + attr_reader :work_id, :md5, :original_file_ext def self.match?(url) url.domain.in?(%w[yande.re konachan.com]) @@ -65,38 +65,33 @@ class Source::URL::Moebooru < Source::URL # https://konachan.com/image/5d633771614e4bf5c17df19a0f0f333f/Konachan.com%20-%20270807%20black_hair%20bokuden%20clouds%20grass%20landscape%20long_hair%20original%20phone%20rope%20scenic%20seifuku%20skirt%20sky%20summer%20torii%20tree.jpg # # https://files.yande.re/image/e4c2ba38de88ff1640aaebff84c84e81/469784.jpg - in _, ("sample" | "jpeg" | "image") => sample_type, /^\h{32}$/ => md5, filename + in _, ("sample" | "jpeg" | "image") => sample_type, /^\h{32}$/ => md5, file @md5 = md5 - @work_id = parse_filename(filename) - - case sample_type - when "image" - @original_file_ext = File.extname(filename).delete_prefix(".") - when "jpeg" - @original_file_ext = "png" - end + @work_id = work_id_from_filename + @original_file_ext = file_ext_for(sample_type) # https://yande.re/jpeg/22577d2344fe694cf47f80563031b3cd.jpg # https://files.yande.re/image/22577d2344fe694cf47f80563031b3cd.png # https://files.yande.re/sample/fb27a7ea6c48b2ef76fe915e378b9098.jpg - in _, ("sample" | "jpeg" | "image") => sample_type, /^(\h{32})\.\w+$/ => filename + in _, ("sample" | "jpeg" | "image") => sample_type, /^(\h{32})\.\w+$/ @md5 = $1 - - case sample_type - when "image" - @original_file_ext = File.extname(filename).delete_prefix(".") - when "jpeg" - @original_file_ext = "png" - end + @original_file_ext = file_ext_for(sample_type) else end end - def parse_filename(filename) - basename = File.basename(filename, ".*") + def file_ext_for(sample_type) + case sample_type + when "image" + file_ext + when "jpeg" + "png" + end + end - case CGI.unescape(basename).split + def work_id_from_filename + case CGI.unescape(filename).split # yande.re 290757 sample seifuku thighhighs tsukudani_norio # yande.re 290757 in "yande.re", /^\d+$/ => work_id, *rest diff --git a/app/logical/source/url/newgrounds.rb b/app/logical/source/url/newgrounds.rb index b819ba971..3f71bf858 100644 --- a/app/logical/source/url/newgrounds.rb +++ b/app/logical/source/url/newgrounds.rb @@ -62,20 +62,18 @@ class Source::URL::Newgrounds < Source::URL # https://art.ngfiles.com/images/1254000/1254722_natthelich_pandora.jpg # https://art.ngfiles.com/images/1033000/1033622_natthelich_fire-emblem-marth-plus-progress-pic.png?f1569487181 - in "art.ngfiles.com", "images", _, /^(\d+)_([^_]+)_(.*)\.\w+$/ => filename + in "art.ngfiles.com", "images", _, /^(\d+)_([^_]+)_(.*)\.\w+$/ @work_id = $1 @username = $2 @work_title = $3 - @filename = filename # https://art.ngfiles.com/thumbnails/1254000/1254985.png?f1588263349 - in "art.ngfiles.com", "thumbnails", _, /^(\d+)\.\w+$/ => filename + in "art.ngfiles.com", "thumbnails", _, /^(\d+)\.\w+$/ @work_id = $1 - @filename = filename # https://art.ngfiles.com/comments/57000/iu_57615_7115981.jpg - in "art.ngfiles.com", "comments", _, /^iu/ => filename - @filename = filename + in "art.ngfiles.com", "comments", _, /^iu/ + nil # https://natthelich.newgrounds.com # https://natthelich.newgrounds.com/art/ diff --git a/app/logical/source/url/plurk.rb b/app/logical/source/url/plurk.rb index d32a14552..c24ea6b8c 100644 --- a/app/logical/source/url/plurk.rb +++ b/app/logical/source/url/plurk.rb @@ -40,7 +40,7 @@ class Source::URL::Plurk < Source::URL # https://images.plurk.com/5wj6WD0r6y4rLN0DL3sqag.jpg # https://images.plurk.com/mx_5wj6WD0r6y4rLN0DL3sqag.jpg in "plurk.com", /^(mx_)?(\w{22})\.(\w+)$/ - @filename, @file_ext = $2, $3 + @image_id = $2 # https://www.plurk.com/p/om6zv4 in "plurk.com", "p", work_id diff --git a/app/logical/source/url/twit_pic.rb b/app/logical/source/url/twit_pic.rb index 8e8259d8f..b0ffff395 100644 --- a/app/logical/source/url/twit_pic.rb +++ b/app/logical/source/url/twit_pic.rb @@ -40,9 +40,8 @@ class Source::URL::TwitPic < Source::URL @base36_id = base36_id # https://twitpic.com/show/large/carwkf.jpg - in "twitpic.com", "show", size, filename - @filename, @file_ext = filename.split(".") - @base36_id = @filename + in "twitpic.com", "show", size, _ + @base36_id = filename # https://o.twimg.com/1/proxy.jpg?t=FQQVBBgpaHR0cHM6Ly90d2l0cGljLmNvbS9zaG93L2xhcmdlL2NhcndrZi5qcGcUBBYAEgA&s=y8haxddqxJYpWql9uVnP3aoFFS7rA10vOGPdTO5HXvk # https://o.twimg.com/2/proxy.jpg?t=HBgpaHR0cHM6Ly90d2l0cGljLmNvbS9zaG93L2xhcmdlL2R0bnVydS5qcGcUsAkU0ggAFgASAA&s=dnN4DHCdnojC-iCJWdvZ-UZinrlWqAP7k7lmll2fTxs @@ -61,9 +60,8 @@ class Source::URL::TwitPic < Source::URL # http://d3j5vwomefv46c.cloudfront.net/photos/large/820960031.jpg?1384107199 # https://dn3pm25xmtlyu.cloudfront.net/photos/large/839006715.jpg?Expires=1646850828&Signature=d60CmLlmNqZJvOTteTOan13QWZ8gY3C4rUWCkh-IUoRr012vYtUYtip74GslGwCG0dxV5mpUpVFkaVZf16PiY7CsTdpAlA8Pmu2tN98D2dmC5FuW9KhhygDv6eFC8faoaGEyj~ArLuwz-8lC6Y05TVf0FgweeWwsRxFOfD5JHgCeIB0iZqzUx1t~eb6UMAWvbaKpfgvcp2oaDuCdZlMNi9T5OUBFoTh2DfnGy8t5COys1nOYYfZ9l69TDvVb2PKBaV8lsKK9xMwjoJNaWa1HL5S4MgODS5hiNDvycoBpu9KUvQ7q~rhC8cV6ZNctB5H9u~MmvBPoTKfy4w37cSc5uw__&Key-Pair-Id=APKAJROXZ7FN26MABHYA - in /cloudfront\.net/, "photos", size, filename - @filename, @file_ext = filename.split(".") - @base36_id = @filename.to_i.to_s(36) + in /cloudfront\.net/, "photos", size, _ + @base36_id = filename.to_i.to_s(36) else end diff --git a/app/logical/source/url/twitter.rb b/app/logical/source/url/twitter.rb index 7fc683091..270eb74a5 100644 --- a/app/logical/source/url/twitter.rb +++ b/app/logical/source/url/twitter.rb @@ -87,10 +87,10 @@ class Source::URL::Twitter < Source::URL # https://pbs.twimg.com/tweet_video_thumb/ETkN_L3X0AMy1aT.jpg # https://pbs.twimg.com/ext_tw_video_thumb/1243725361986375680/pu/img/JDA7g7lcw7wK-PIv.jpg # https://pbs.twimg.com/amplify_video_thumb/1215590775364259840/img/lolCkEEioFZTb5dl.jpg - in "twimg.com", ("media" | "tweet_video_thumb" | "ext_tw_video_thumb" | "amplify_video_thumb") => media_type, *subdirs, filename + in "twimg.com", ("media" | "tweet_video_thumb" | "ext_tw_video_thumb" | "amplify_video_thumb") => media_type, *subdirs, file # EBGbJe_U8AA4Ekb.jpg:small - @filename, @file_size = filename.split(":") - @filename, @file_ext = @filename.split(".") + @file, @file_size = file.split(":") + @file, @file_ext = @file.split(".") # EBGbJe_U8AA4Ekb?format=jpg&name=900x900 @file_size = params[:name] if params[:name].present? @@ -98,7 +98,7 @@ class Source::URL::Twitter < Source::URL # /media/EBGbJe_U8AA4Ekb.jpg # /ext_tw_video_thumb/1243725361986375680/pu/img/JDA7g7lcw7wK-PIv.jpg - @file_path = File.join(media_type, subdirs.join("/"), "#{@filename}.#{@file_ext}") + @file_path = File.join(media_type, subdirs.join("/"), "#{@file}.#{@file_ext}") else end end