diff --git a/app/logical/danbooru/url.rb b/app/logical/danbooru/url.rb new file mode 100644 index 000000000..f17614077 --- /dev/null +++ b/app/logical/danbooru/url.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +module Danbooru + class URL + class Error < StandardError; end + + attr_reader :original_url, :url + delegate_missing_to :url + + # Parse a string into an URL, or raise an exception if the string is not a valid HTTPS or HTTPS URL. + # + # @param string [String] + # @return [Danbooru::URL] + def initialize(string) + @original_url = string + @url = Addressable::URI.heuristic_parse(string).display_uri + @url.path = nil if @url.path == "/" + raise Error, "#{string} is not an http:// URL" if !@url.normalized_scheme.in?(["http", "https"]) + rescue Addressable::URI::InvalidURIError => e + raise Error, e + end + + # Parse a string into an URL, or return nil if the string is not a valid HTTP or HTTPS URL. + # + # @param string [String] + # @return [Danbooru::URL] + def self.parse(string) + new(string) + rescue StandardError => e + nil + end + + # @return [String] the URL in normalized form + def to_s + url.to_str + end + + # @return [Array] the URL's path split into segments + def path_segments + path.split("/").compact_blank + end + + # @return [Hash] the URL's query parameters + def params + url.query_values.with_indifferent_access + end + end +end diff --git a/app/logical/sources/strategies/base.rb b/app/logical/sources/strategies/base.rb index 9d68389fc..d28da7e1b 100644 --- a/app/logical/sources/strategies/base.rb +++ b/app/logical/sources/strategies/base.rb @@ -46,8 +46,8 @@ module Sources @referer_url = referer_url&.to_s @urls = [@url, @referer_url].select(&:present?) - @parsed_url = Addressable::URI.heuristic_parse(url) rescue nil - @parsed_referer = Addressable::URI.heuristic_parse(referer_url) rescue nil + @parsed_url = Danbooru::URL.parse(url) + @parsed_referer = Danbooru::URL.parse(referer_url) @parsed_urls = [parsed_url, parsed_referer].select(&:present?) end @@ -65,7 +65,7 @@ module Sources end def site_name - host = Addressable::URI.heuristic_parse(url)&.host + host = parsed_url&.host # XXX should go in dedicated strategies. case host @@ -134,8 +134,6 @@ module Sources else host end - rescue Addressable::URI::InvalidURIError - nil end # Whatever url is, this method should return the direct links diff --git a/app/models/post.rb b/app/models/post.rb index 6ba8f34ca..a041f3c26 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -314,10 +314,7 @@ class Post < ApplicationRecord def source_domain return "" unless source =~ %r{\Ahttps?://}i - url = Addressable::URI.parse(normalized_source) - url.domain - rescue StandardError - "" + Danbooru::URL.parse(normalized_source)&.domain.to_s end end diff --git a/app/models/upload.rb b/app/models/upload.rb index 21581cb5c..1a3e21b32 100644 --- a/app/models/upload.rb +++ b/app/models/upload.rb @@ -70,8 +70,7 @@ class Upload < ApplicationRecord class_methods do # percent-encode unicode characters in the URL def normalize_source(url) - return nil if url.nil? - Addressable::URI.normalized_encode(url) + Danbooru::URL.parse(url)&.to_s.presence || url end end end