Add Danbooru::URL class.

Introduce a Danbooru::URL class for dealing with URLs. This is a wrapper
around Addressable::URI that adds some additional helper methods. Most
significantly, the `parse` method only allows valid http/https URLs, and
it returns nil instead of raising an exception when the URL is invalid.
This commit is contained in:
evazion
2022-02-21 18:37:18 -06:00
parent 60a26af6e3
commit 7d49ab6130
4 changed files with 53 additions and 11 deletions

View File

@@ -0,0 +1,48 @@
# frozen_string_literal: true
module Danbooru
class URL
class Error < StandardError; end
attr_reader :original_url, :url
delegate_missing_to :url
# Parse a string into an URL, or raise an exception if the string is not a valid HTTPS or HTTPS URL.
#
# @param string [String]
# @return [Danbooru::URL]
def initialize(string)
@original_url = string
@url = Addressable::URI.heuristic_parse(string).display_uri
@url.path = nil if @url.path == "/"
raise Error, "#{string} is not an http:// URL" if !@url.normalized_scheme.in?(["http", "https"])
rescue Addressable::URI::InvalidURIError => e
raise Error, e
end
# Parse a string into an URL, or return nil if the string is not a valid HTTP or HTTPS URL.
#
# @param string [String]
# @return [Danbooru::URL]
def self.parse(string)
new(string)
rescue StandardError => e
nil
end
# @return [String] the URL in normalized form
def to_s
url.to_str
end
# @return [Array<String>] the URL's path split into segments
def path_segments
path.split("/").compact_blank
end
# @return [Hash] the URL's query parameters
def params
url.query_values.with_indifferent_access
end
end
end

View File

@@ -46,8 +46,8 @@ module Sources
@referer_url = referer_url&.to_s
@urls = [@url, @referer_url].select(&:present?)
@parsed_url = Addressable::URI.heuristic_parse(url) rescue nil
@parsed_referer = Addressable::URI.heuristic_parse(referer_url) rescue nil
@parsed_url = Danbooru::URL.parse(url)
@parsed_referer = Danbooru::URL.parse(referer_url)
@parsed_urls = [parsed_url, parsed_referer].select(&:present?)
end
@@ -65,7 +65,7 @@ module Sources
end
def site_name
host = Addressable::URI.heuristic_parse(url)&.host
host = parsed_url&.host
# XXX should go in dedicated strategies.
case host
@@ -134,8 +134,6 @@ module Sources
else
host
end
rescue Addressable::URI::InvalidURIError
nil
end
# Whatever <tt>url</tt> is, this method should return the direct links

View File

@@ -314,10 +314,7 @@ class Post < ApplicationRecord
def source_domain
return "" unless source =~ %r{\Ahttps?://}i
url = Addressable::URI.parse(normalized_source)
url.domain
rescue StandardError
""
Danbooru::URL.parse(normalized_source)&.domain.to_s
end
end

View File

@@ -70,8 +70,7 @@ class Upload < ApplicationRecord
class_methods do
# percent-encode unicode characters in the URL
def normalize_source(url)
return nil if url.nil?
Addressable::URI.normalized_encode(url)
Danbooru::URL.parse(url)&.to_s.presence || url
end
end
end