From 5af50b7fcd544aba612f074e2b32bd297fc0986a Mon Sep 17 00:00:00 2001 From: evazion Date: Wed, 24 Jun 2020 21:33:10 -0500 Subject: [PATCH] danbooru::http: factor out Cloudflare Polish bypassing. * Factor out the Cloudflare Polish bypass code to a standalone feature. * Add `http_downloader` method to the base source strategy. This is a HTTP client that should be used for downloading images or making requests to images. This client ensures that referrer spoofing and Cloudflare bypassing are performed. This fixes a bug with the upload page reporting the polished filesize instead of the original filesize when uploading ArtStation images. --- app/logical/danbooru/http.rb | 17 +++------------- .../danbooru/http/unpolish_cloudflare.rb | 20 +++++++++++++++++++ app/logical/sources/strategies/base.rb | 17 +++++++++++----- app/logical/sources/strategies/moebooru.rb | 2 +- test/unit/danbooru_http_test.rb | 12 ++++++++++- 5 files changed, 47 insertions(+), 21 deletions(-) create mode 100644 app/logical/danbooru/http/unpolish_cloudflare.rb diff --git a/app/logical/danbooru/http.rb b/app/logical/danbooru/http.rb index d5c223b57..43530522e 100644 --- a/app/logical/danbooru/http.rb +++ b/app/logical/danbooru/http.rb @@ -5,6 +5,7 @@ require "danbooru/http/redirector" require "danbooru/http/retriable" require "danbooru/http/session" require "danbooru/http/spoof_referrer" +require "danbooru/http/unpolish_cloudflare" module Danbooru class Http @@ -26,7 +27,6 @@ module Danbooru .timeout(DEFAULT_TIMEOUT) .headers("Accept-Encoding" => "gzip") .headers("User-Agent": "#{Danbooru.config.canonical_app_name}/#{Rails.application.config.x.git_hash}") - .use(:spoof_referrer) .use(:auto_inflate) .use(redirector: { max_redirects: MAX_REDIRECTS }) .use(:session) @@ -98,20 +98,9 @@ module Danbooru end concerning :DownloadMethods do - def download_media(url, no_polish: true, **options) + def download_media(url, file: Tempfile.new("danbooru-download-", binmode: true)) response = get(url) - # prevent Cloudflare Polish from modifying images. - if no_polish && response.headers["CF-Polished"].present? - url.query_values = url.query_values.to_h.merge(danbooru_no_polish: SecureRandom.uuid) - return download_media(url, no_polish: false) - end - - file = download_response(response, **options) - [response, MediaFile.open(file)] - end - - def download_response(response, file: Tempfile.new("danbooru-download-", binmode: true)) raise DownloadError, "Downloading #{response.uri} failed with code #{response.status}" if response.status != 200 raise FileTooLargeError, response if @max_size && response.content_length.to_i > @max_size @@ -123,7 +112,7 @@ module Danbooru end file.rewind - file + [response, MediaFile.open(file)] end end diff --git a/app/logical/danbooru/http/unpolish_cloudflare.rb b/app/logical/danbooru/http/unpolish_cloudflare.rb new file mode 100644 index 000000000..5ad62dcba --- /dev/null +++ b/app/logical/danbooru/http/unpolish_cloudflare.rb @@ -0,0 +1,20 @@ +# Bypass Cloudflare Polish (https://support.cloudflare.com/hc/en-us/articles/360000607372-Using-Cloudflare-Polish-to-compress-images) + +module Danbooru + class Http + class UnpolishCloudflare < HTTP::Feature + HTTP::Options.register_feature :unpolish_cloudflare, self + + def perform(request, &block) + response = yield request + + if response.headers["CF-Polished"].present? + request.uri.query_values = request.uri.query_values.to_h.merge(danbooru_no_polish: SecureRandom.uuid) + response = yield request + end + + response + end + end + end +end diff --git a/app/logical/sources/strategies/base.rb b/app/logical/sources/strategies/base.rb index 718e3f73e..15f476a15 100644 --- a/app/logical/sources/strategies/base.rb +++ b/app/logical/sources/strategies/base.rb @@ -146,7 +146,7 @@ module Sources # Returns the size of the image resource without actually downloading the file. def remote_size - response = http.head(image_url) + response = http_downloader.head(image_url) return nil unless response.status == 200 && response.content_length.present? response.content_length.to_i @@ -156,16 +156,23 @@ module Sources # Download the file at the given url, or at the main image url by default. def download_file!(download_url = image_url) raise DownloadError, "Download failed: couldn't find download url for #{url}" if download_url.blank? - response, file = http.download_media(download_url) + response, file = http_downloader.download_media(download_url) raise DownloadError, "Download failed: #{download_url} returned error #{response.status}" if response.status != 200 file end + # A http client for API requests. def http - Danbooru::Http.headers(headers).public_only.timeout(30).max_size(Danbooru.config.max_file_size) + Danbooru::Http.new.public_only end memoize :http + # A http client for downloading files. + def http_downloader + http.timeout(30).max_size(Danbooru.config.max_file_size).use(:spoof_referrer).use(:unpolish_cloudflare) + end + memoize :http_downloader + # The url to use for artist finding purposes. This will be stored in the # artist entry. Normally this will be the profile url. def normalize_for_artist_finder @@ -292,8 +299,8 @@ module Sources to_h.to_json end - def http_exists?(url, headers = {}) - http.headers(headers).head(url).status.success? + def http_exists?(url) + http_downloader.head(url).status.success? end # Convert commentary to dtext by stripping html tags. Sites can override diff --git a/app/logical/sources/strategies/moebooru.rb b/app/logical/sources/strategies/moebooru.rb index 9c4cc1f8d..4a47b8d54 100644 --- a/app/logical/sources/strategies/moebooru.rb +++ b/app/logical/sources/strategies/moebooru.rb @@ -155,7 +155,7 @@ module Sources # the api_response wasn't available because it's a deleted post. elsif post_md5.present? - %w[jpg png gif].find { |ext| http_exists?("https://#{site_name}/image/#{post_md5}.#{ext}", headers) } + %w[jpg png gif].find { |ext| http_exists?("https://#{site_name}/image/#{post_md5}.#{ext}") } else nil diff --git a/test/unit/danbooru_http_test.rb b/test/unit/danbooru_http_test.rb index 7f3e0a2ad..08ad71f64 100644 --- a/test/unit/danbooru_http_test.rb +++ b/test/unit/danbooru_http_test.rb @@ -129,13 +129,23 @@ class DanbooruHttpTest < ActiveSupport::TestCase context "spoof referrer feature" do should "spoof the referer" do - response = Danbooru::Http.get("https://httpbin.org/anything") + response = Danbooru::Http.use(:spoof_referrer).get("https://httpbin.org/anything") assert_equal(200, response.status) assert_equal("https://httpbin.org", response.parse.dig("headers", "Referer")) end end + context "unpolish cloudflare feature" do + should "return the original image for polished images" do + url = "https://cdnb.artstation.com/p/assets/images/images/025/273/307/4k/atey-ghailan-a-sage-keyart-s-ch-04-outlined-1.jpg?1585246642" + response = Danbooru::Http.use(:unpolish_cloudflare).get(url) + + assert_equal(200, response.status) + assert_equal(720_743, response.content_length) + end + end + context "#download method" do should "download files" do response, file = Danbooru::Http.download_media("https://httpbin.org/bytes/1000")