danbooru::http: factor out Cloudflare Polish bypassing.

* Factor out the Cloudflare Polish bypass code to a standalone feature.

* Add `http_downloader` method to the base source strategy. This is a
  HTTP client that should be used for downloading images or making
  requests to images. This client ensures that referrer spoofing and
  Cloudflare bypassing are performed.

This fixes a bug with the upload page reporting the polished filesize
instead of the original filesize when uploading ArtStation images.
This commit is contained in:
evazion
2020-06-24 21:33:10 -05:00
parent d3bb5c67ee
commit 5af50b7fcd
5 changed files with 47 additions and 21 deletions

View File

@@ -146,7 +146,7 @@ module Sources
# Returns the size of the image resource without actually downloading the file.
def remote_size
response = http.head(image_url)
response = http_downloader.head(image_url)
return nil unless response.status == 200 && response.content_length.present?
response.content_length.to_i
@@ -156,16 +156,23 @@ module Sources
# Download the file at the given url, or at the main image url by default.
def download_file!(download_url = image_url)
raise DownloadError, "Download failed: couldn't find download url for #{url}" if download_url.blank?
response, file = http.download_media(download_url)
response, file = http_downloader.download_media(download_url)
raise DownloadError, "Download failed: #{download_url} returned error #{response.status}" if response.status != 200
file
end
# A http client for API requests.
def http
Danbooru::Http.headers(headers).public_only.timeout(30).max_size(Danbooru.config.max_file_size)
Danbooru::Http.new.public_only
end
memoize :http
# A http client for downloading files.
def http_downloader
http.timeout(30).max_size(Danbooru.config.max_file_size).use(:spoof_referrer).use(:unpolish_cloudflare)
end
memoize :http_downloader
# The url to use for artist finding purposes. This will be stored in the
# artist entry. Normally this will be the profile url.
def normalize_for_artist_finder
@@ -292,8 +299,8 @@ module Sources
to_h.to_json
end
def http_exists?(url, headers = {})
http.headers(headers).head(url).status.success?
def http_exists?(url)
http_downloader.head(url).status.success?
end
# Convert commentary to dtext by stripping html tags. Sites can override

View File

@@ -155,7 +155,7 @@ module Sources
# the api_response wasn't available because it's a deleted post.
elsif post_md5.present?
%w[jpg png gif].find { |ext| http_exists?("https://#{site_name}/image/#{post_md5}.#{ext}", headers) }
%w[jpg png gif].find { |ext| http_exists?("https://#{site_name}/image/#{post_md5}.#{ext}") }
else
nil