diff --git a/app/logical/cloudflare_service.rb b/app/logical/cloudflare_service.rb index 6a1161502..8cc1be195 100644 --- a/app/logical/cloudflare_service.rb +++ b/app/logical/cloudflare_service.rb @@ -9,15 +9,6 @@ class CloudflareService api_token.present? && zone.present? end - def ips(expiry: 24.hours) - response = Danbooru::Http.cache(expiry).get("https://api.cloudflare.com/client/v4/ips") - return [] if response.code != 200 - - result = response.parse["result"] - ips = result["ipv4_cidrs"] + result["ipv6_cidrs"] - ips.map { |ip| IPAddr.new(ip) } - end - def purge_cache(urls) return unless enabled? diff --git a/app/logical/danbooru/http.rb b/app/logical/danbooru/http.rb index 666a67f2c..b87f5e60c 100644 --- a/app/logical/danbooru/http.rb +++ b/app/logical/danbooru/http.rb @@ -1,18 +1,25 @@ module Danbooru class Http + class DownloadError < StandardError; end + class FileTooLargeError < StandardError; end + DEFAULT_TIMEOUT = 10 MAX_REDIRECTS = 5 - attr_writer :cache, :http + attr_writer :cache, :max_size, :http class << self - delegate :get, :put, :post, :delete, :cache, :follow, :timeout, :auth, :basic_auth, :headers, to: :new + delegate :get, :head, :put, :post, :delete, :cache, :follow, :max_size, :timeout, :auth, :basic_auth, :headers, :public_only, :download_media, to: :new end def get(url, **options) request(:get, url, **options) end + def head(url, **options) + request(:head, url, **options) + end + def put(url, **options) request(:get, url, **options) end @@ -33,6 +40,10 @@ module Danbooru dup.tap { |o| o.http = o.http.follow(*args) } end + def max_size(size) + dup.tap { |o| o.max_size = size } + end + def timeout(*args) dup.tap { |o| o.http = o.http.timeout(*args) } end @@ -49,6 +60,46 @@ module Danbooru dup.tap { |o| o.http = o.http.headers(*args) } end + # allow requests only to public IPs, not to local or private networks. + def public_only + dup.tap do |o| + o.http = o.http.dup.tap do |http| + http.default_options = http.default_options.with_socket_class(ValidatingSocket) + end + end + end + + concerning :DownloadMethods do + def download_media(url, no_polish: true, **options) + url = Addressable::URI.heuristic_parse(url) + response = headers(Referer: url.origin).get(url) + + # prevent Cloudflare Polish from modifying images. + if no_polish && response.headers["CF-Polished"].present? + url.query_values = url.query_values.to_h.merge(danbooru_no_polish: SecureRandom.uuid) + return download_media(url, no_polish: false) + end + + file = download_response(response, **options) + [response, MediaFile.open(file)] + end + + def download_response(response, file: Tempfile.new("danbooru-download-", binmode: true)) + raise DownloadError, response if response.status != 200 + raise FileTooLargeError, response if @max_size && response.content_length.to_i > @max_size + + size = 0 + response.body.each do |chunk| + size += chunk.size + raise FileTooLargeError if @max_size && size > @max_size + file.write(chunk) + end + + file.rewind + file + end + end + protected def request(method, url, **options) @@ -57,11 +108,12 @@ module Danbooru else raw_request(method, url, **options) end + rescue ValidatingSocket::ProhibitedIpError + fake_response(597, "") rescue HTTP::Redirector::TooManyRedirectsError - ::HTTP::Response.new(status: 598, body: "", version: "1.1") + fake_response(598, "") rescue HTTP::TimeoutError - # return a synthetic http error on connection timeouts - ::HTTP::Response.new(status: 599, body: "", version: "1.1") + fake_response(599, "") end def cached_request(method, url, **options) @@ -79,6 +131,10 @@ module Danbooru http.send(method, url, **options) end + def fake_response(status, body) + ::HTTP::Response.new(status: status, version: "1.1", body: ::HTTP::Response::Body.new(body)) + end + def http @http ||= ::HTTP. follow(strict: false, max_hops: MAX_REDIRECTS). diff --git a/app/logical/downloads/file.rb b/app/logical/downloads/file.rb deleted file mode 100644 index 8bbab504b..000000000 --- a/app/logical/downloads/file.rb +++ /dev/null @@ -1,123 +0,0 @@ -require 'resolv' - -module Downloads - class File - include ActiveModel::Validations - class Error < StandardError; end - - RETRIABLE_ERRORS = [Errno::ECONNRESET, Errno::ETIMEDOUT, Errno::EIO, Errno::EHOSTUNREACH, Errno::ECONNREFUSED, Timeout::Error, IOError] - - delegate :data, to: :strategy - attr_reader :url, :referer - - validate :validate_url - - def initialize(url, referer = nil) - @url = Addressable::URI.parse(url) rescue nil - @referer = referer - validate! - end - - def size - res = HTTParty.head(uncached_url, **httparty_options, timeout: 3) - - if res.success? - res.content_length - else - raise HTTParty::ResponseError.new(res) - end - end - - def download!(url: uncached_url, tries: 3, **options) - Retriable.retriable(on: RETRIABLE_ERRORS, tries: tries, base_interval: 0) do - file = http_get_streaming(url, headers: strategy.headers, **options) - return [file, strategy] - end - end - - def validate_url - errors[:base] << "URL must not be blank" if url.blank? - errors[:base] << "'#{url}' is not a valid url" if !url.host.present? - errors[:base] << "'#{url}' is not a valid url. Did you mean 'http://#{url}'?" if !url.scheme.in?(%w[http https]) - end - - def http_get_streaming(url, file: Tempfile.new(binmode: true), headers: {}, max_size: Danbooru.config.max_file_size) - size = 0 - - res = HTTParty.get(url, httparty_options) do |chunk| - next if chunk.code == 302 - - size += chunk.size - raise Error.new("File is too large (max size: #{max_size})") if size > max_size && max_size > 0 - - file.write(chunk) - end - - if res.success? - file.rewind - return file - else - raise Error.new("HTTP error code: #{res.code} #{res.message}") - end - end - - # Prevent Cloudflare from potentially mangling the image. See issue #3528. - def uncached_url - return file_url unless is_cloudflare?(file_url) - - url = file_url.dup - url.query_values = url.query_values.to_h.merge(danbooru_no_cache: SecureRandom.uuid) - url - end - - def preview_url - @preview_url ||= Addressable::URI.parse(strategy.preview_url) - end - - def file_url - @file_url ||= Addressable::URI.parse(strategy.image_url) - end - - def strategy - @strategy ||= Sources::Strategies.find(url.to_s, referer) - end - - def httparty_options - { - timeout: 10, - stream_body: true, - headers: strategy.headers, - connection_adapter: ValidatingConnectionAdapter - }.deep_merge(Danbooru.config.httparty_options) - end - - def is_cloudflare?(url) - ip_addr = IPAddr.new(Resolv.getaddress(url.hostname)) - CloudflareService.new.ips.any? { |subnet| subnet.include?(ip_addr) } - end - - def self.banned_ip?(ip) - ip = IPAddress.parse(ip.to_s) unless ip.is_a?(IPAddress) - - if ip.ipv4? - ip.loopback? || ip.link_local? || ip.multicast? || ip.private? - elsif ip.ipv6? - ip.loopback? || ip.link_local? || ip.unique_local? || ip.unspecified? - end - end - end - - # Hook into HTTParty to validate the IP before following redirects. - # https://www.rubydoc.info/github/jnunemaker/httparty/HTTParty/ConnectionAdapter - class ValidatingConnectionAdapter < HTTParty::ConnectionAdapter - def self.call(uri, options) - ip_addr = IPAddress.parse(::Resolv.getaddress(uri.hostname)) - - if Downloads::File.banned_ip?(ip_addr) - raise Downloads::File::Error, "Downloads from #{ip_addr} are not allowed" - end - - super(uri, options) - end - end -end diff --git a/app/logical/iqdb_proxy.rb b/app/logical/iqdb_proxy.rb index 67e27bd90..41dc5cd63 100644 --- a/app/logical/iqdb_proxy.rb +++ b/app/logical/iqdb_proxy.rb @@ -12,8 +12,9 @@ class IqdbProxy end def download(url, type) - download = Downloads::File.new(url) - file, strategy = download.download!(url: download.send(type)) + strategy = Sources::Strategies.find(url) + download_url = strategy.send(type) + file = strategy.download_file!(download_url) file end @@ -32,7 +33,7 @@ class IqdbProxy file = download(params[:image_url], :url) results = query(file: file, limit: limit) elsif params[:file_url].present? - file = download(params[:file_url], :file_url) + file = download(params[:file_url], :image_url) results = query(file: file, limit: limit) elsif params[:post_id].present? url = Post.find(params[:post_id]).preview_file_url diff --git a/app/logical/sources/strategies/base.rb b/app/logical/sources/strategies/base.rb index 0ee78f365..fcc0ba6e6 100644 --- a/app/logical/sources/strategies/base.rb +++ b/app/logical/sources/strategies/base.rb @@ -14,6 +14,8 @@ module Sources module Strategies class Base + class DownloadError < StandardError; end + attr_reader :url, :referer_url, :urls, :parsed_url, :parsed_referer, :parsed_urls extend Memoist @@ -35,8 +37,8 @@ module Sources # referrer_url so the strategy can discover the HTML # page and other information. def initialize(url, referer_url = nil) - @url = url - @referer_url = referer_url + @url = url.to_s + @referer_url = referer_url&.to_s @urls = [url, referer_url].select(&:present?) @parsed_url = Addressable::URI.heuristic_parse(url) rescue nil @@ -144,10 +146,22 @@ module Sources # Returns the size of the image resource without actually downloading the file. def size - Downloads::File.new(image_url).size + http.head(image_url).content_length.to_i end memoize :size + # Download the file at the given url, or at the main image url by default. + def download_file!(download_url = image_url) + response, file = http.download_media(download_url) + raise DownloadError, "Download failed: #{download_url} returned error #{response.status}" if response.status != 200 + file + end + + def http + Danbooru::Http.public_only.timeout(30).max_size(Danbooru.config.max_file_size) + end + memoize :http + # The url to use for artist finding purposes. This will be stored in the # artist entry. Normally this will be the profile url. def normalize_for_artist_finder diff --git a/app/logical/upload_service/controller_helper.rb b/app/logical/upload_service/controller_helper.rb index 7342168fd..9eeb6d442 100644 --- a/app/logical/upload_service/controller_helper.rb +++ b/app/logical/upload_service/controller_helper.rb @@ -7,11 +7,8 @@ class UploadService # this gets called from UploadsController#new so we need to preprocess async UploadPreprocessorDelayedStartJob.perform_later(url, ref, CurrentUser.user) - begin - download = Downloads::File.new(url, ref) - remote_size = download.size - rescue Exception - end + strategy = Sources::Strategies.find(url, ref) + remote_size = strategy.size return [upload, remote_size] end diff --git a/app/logical/upload_service/utils.rb b/app/logical/upload_service/utils.rb index 7482d43b6..e4316cd90 100644 --- a/app/logical/upload_service/utils.rb +++ b/app/logical/upload_service/utils.rb @@ -71,13 +71,13 @@ class UploadService return file if file.present? raise "No file or source URL provided" if upload.source_url.blank? - download = Downloads::File.new(upload.source_url, upload.referer_url) - file, strategy = download.download! + strategy = Sources::Strategies.find(upload.source_url, upload.referer_url) + file = strategy.download_file! - if download.data[:ugoira_frame_data].present? + if strategy.data[:ugoira_frame_data].present? upload.context = { "ugoira" => { - "frame_data" => download.data[:ugoira_frame_data], + "frame_data" => strategy.data[:ugoira_frame_data], "content_type" => "image/jpeg" } } diff --git a/app/logical/validating_socket.rb b/app/logical/validating_socket.rb new file mode 100644 index 000000000..446609073 --- /dev/null +++ b/app/logical/validating_socket.rb @@ -0,0 +1,27 @@ +# A TCPSocket wrapper that disallows connections to local or private IPs. Used for SSRF protection. +# https://owasp.org/www-community/attacks/Server_Side_Request_Forgery + +require "resolv" + +class ValidatingSocket < TCPSocket + class ProhibitedIpError < StandardError; end + + def initialize(hostname, port) + ip = validate_hostname!(hostname) + super(ip, port) + end + + def validate_hostname!(hostname) + ip = IPAddress.parse(::Resolv.getaddress(hostname)) + raise ProhibitedIpError, "Connection to #{hostname} failed; #{ip} is a prohibited IP" if prohibited_ip?(ip) + ip.to_s + end + + def prohibited_ip?(ip) + if ip.ipv4? + ip.loopback? || ip.link_local? || ip.multicast? || ip.private? + elsif ip.ipv6? + ip.loopback? || ip.link_local? || ip.unique_local? || ip.unspecified? + end + end +end diff --git a/test/test_helpers/download_test_helper.rb b/test/test_helpers/download_test_helper.rb index 72805cf9b..eea711588 100644 --- a/test/test_helpers/download_test_helper.rb +++ b/test/test_helpers/download_test_helper.rb @@ -1,8 +1,8 @@ module DownloadTestHelper def assert_downloaded(expected_filesize, source, referer = nil) - download = Downloads::File.new(source, referer) - tempfile, strategy = download.download! - assert_equal(expected_filesize, tempfile.size, "Tested source URL: #{source}") + strategy = Sources::Strategies.find(source, referer) + file = strategy.download_file! + assert_equal(expected_filesize, file.size, "Tested source URL: #{source}") rescue Net::OpenTimeout skip "Remote connection to #{source} failed" end diff --git a/test/unit/cloudflare_service_test.rb b/test/unit/cloudflare_service_test.rb index faced9207..146068a6d 100644 --- a/test/unit/cloudflare_service_test.rb +++ b/test/unit/cloudflare_service_test.rb @@ -14,10 +14,4 @@ class CloudflareServiceTest < ActiveSupport::TestCase assert_requested(:delete, "https://api.cloudflare.com/client/v4/zones/123/purge_cache", times: 1) end end - - context "#ips" do - should "work" do - refute_empty(@cloudflare.ips) - end - end end diff --git a/test/unit/danbooru_http_test.rb b/test/unit/danbooru_http_test.rb index 42e585bef..f7529491b 100644 --- a/test/unit/danbooru_http_test.rb +++ b/test/unit/danbooru_http_test.rb @@ -4,7 +4,7 @@ class DanbooruHttpTest < ActiveSupport::TestCase context "Danbooru::Http" do context "#get method" do should "work for all basic methods" do - %i[get put post delete].each do |method| + %i[get head put post delete].each do |method| response = Danbooru::Http.send(method, "https://httpbin.org/status/200") assert_equal(200, response.status) end @@ -26,8 +26,10 @@ class DanbooruHttpTest < ActiveSupport::TestCase end should "fail if the request takes too long to download" do - response = Danbooru::Http.timeout(1).get("https://httpbin.org/drip?duration=5&numbytes=5") - assert_equal(599, response.status) + # XXX should return status 599 instead + assert_raises(HTTP::TimeoutError) do + response = Danbooru::Http.timeout(1).get("https://httpbin.org/drip?duration=10&numbytes=10").flush + end end should "automatically decompress gzipped responses" do @@ -45,5 +47,45 @@ class DanbooruHttpTest < ActiveSupport::TestCase assert_equal(response2.body, response1.body) end end + + context "#download method" do + should "download files" do + response, file = Danbooru::Http.download_media("https://httpbin.org/bytes/1000") + + assert_equal(200, response.status) + assert_equal(1000, file.size) + end + + should "follow redirects when downloading files" do + response, file = Danbooru::Http.download_media("https://httpbin.org/redirect-to?url=https://httpbin.org/bytes/1000") + + assert_equal(200, response.status) + assert_equal(1000, file.size) + end + + should "fail if the url points to a private IP" do + assert_raises(Danbooru::Http::DownloadError) do + Danbooru::Http.public_only.download_media("https://127.0.0.1.xip.io") + end + end + + should "fail if the url redirects to a private IP" do + assert_raises(Danbooru::Http::DownloadError) do + Danbooru::Http.public_only.download_media("https://httpbin.org/redirect-to?url=https://127.0.0.1.xip.io") + end + end + + should "fail if a download is too large" do + assert_raises(Danbooru::Http::FileTooLargeError) do + response, file = Danbooru::Http.max_size(500).download_media("https://httpbin.org/bytes/1000") + end + end + + should "fail if a streaming download is too large" do + assert_raises(Danbooru::Http::FileTooLargeError) do + response, file = Danbooru::Http.max_size(500).download_media("https://httpbin.org/stream-bytes/1000") + end + end + end end end diff --git a/test/unit/downloads/art_station_test.rb b/test/unit/downloads/art_station_test.rb index 0ebacd585..83b1710ca 100644 --- a/test/unit/downloads/art_station_test.rb +++ b/test/unit/downloads/art_station_test.rb @@ -3,53 +3,27 @@ require 'test_helper' module Downloads class ArtStationTest < ActiveSupport::TestCase context "a download for a (small) artstation image" do - setup do - @asset = "https://cdnb3.artstation.com/p/assets/images/images/003/716/071/small/aoi-ogata-hate-city.jpg?1476754974" - @download = Downloads::File.new(@asset) - end - should "download the /4k/ image instead" do - file, strategy = @download.download! - assert_equal(1_880_910, ::File.size(file.path)) + assert_downloaded(1_880_910, "https://cdnb3.artstation.com/p/assets/images/images/003/716/071/small/aoi-ogata-hate-city.jpg?1476754974") end end context "for an image where an original does not exist" do - setup do - @asset = "https://cdna.artstation.com/p/assets/images/images/004/730/278/large/mendel-oh-dragonll.jpg" - @download = Downloads::File.new(@asset) - end - should "not try to download the original" do - file, strategy = @download.download! - assert_equal(483_192, ::File.size(file.path)) + assert_downloaded(483_192, "https://cdna.artstation.com/p/assets/images/images/004/730/278/large/mendel-oh-dragonll.jpg") end end context "a download for an ArtStation image hosted on CloudFlare" do - setup do - @asset = "https://cdnb.artstation.com/p/assets/images/images/003/716/071/large/aoi-ogata-hate-city.jpg?1476754974" - end - should "return the original file, not the polished file" do + @asset = "https://cdnb.artstation.com/p/assets/images/images/003/716/071/large/aoi-ogata-hate-city.jpg?1476754974" assert_downloaded(1_880_910, @asset) end - - should "return the original filesize, not the polished filesize" do - assert_equal(1_880_910, Downloads::File.new(@asset).size) - end end context "a download for a https://$artist.artstation.com/projects/$id page" do - setup do - @source = "https://dantewontdie.artstation.com/projects/YZK5q" - @download = Downloads::File.new(@source) - end - should "download the original image instead" do - file, strategy = @download.download! - - assert_equal(247_350, ::File.size(file.path)) + assert_downloaded(247_350, "https://dantewontdie.artstation.com/projects/YZK5q") end end end diff --git a/test/unit/downloads/file_test.rb b/test/unit/downloads/file_test.rb deleted file mode 100644 index 7e37787dd..000000000 --- a/test/unit/downloads/file_test.rb +++ /dev/null @@ -1,81 +0,0 @@ -require 'test_helper' - -module Downloads - class FileTest < ActiveSupport::TestCase - context "A post download" do - setup do - @source = "http://www.google.com/intl/en_ALL/images/logo.gif" - @download = Downloads::File.new(@source) - end - - context "for a banned IP" do - setup do - Resolv.expects(:getaddress).returns("127.0.0.1").at_least_once - end - - should "not try to download the file" do - assert_raise(Downloads::File::Error) { Downloads::File.new("http://evil.com").download! } - end - - should "not try to fetch the size" do - assert_raise(Downloads::File::Error) { Downloads::File.new("http://evil.com").size } - end - - should "not follow redirects to banned IPs" do - url = "http://httpbin.org/redirect-to?url=http://127.0.0.1" - stub_request(:get, url).to_return(status: 301, headers: { "Location": "http://127.0.0.1" }) - - assert_raise(Downloads::File::Error) { Downloads::File.new(url).download! } - end - - should "not follow redirects that resolve to a banned IP" do - url = "http://httpbin.org/redirect-to?url=http://127.0.0.1.nip.io" - stub_request(:get, url).to_return(status: 301, headers: { "Location": "http://127.0.0.1.xip.io" }) - - assert_raise(Downloads::File::Error) { Downloads::File.new(url).download! } - end - end - - context "that fails" do - should "retry three times before giving up" do - HTTParty.expects(:get).times(3).raises(Errno::ETIMEDOUT) - assert_raises(Errno::ETIMEDOUT) { @download.download! } - end - - should "return an uncorrupted file on the second try" do - bomb = stub("bomb") - bomb.stubs(:code).raises(IOError) - resp = stub("resp", success?: true) - - chunk = stub("a") - chunk.stubs(:code).returns(200) - chunk.stubs(:size).returns(1) - chunk.stubs(:to_s).returns("a") - - HTTParty.expects(:get).twice.multiple_yields(chunk, bomb).then.multiple_yields(chunk, chunk).returns(resp) - @download.stubs(:is_cloudflare?).returns(false) - tempfile, _strategy = @download.download! - - assert_equal("aa", tempfile.read) - end - end - - should "throw an exception when the file is larger than the maximum" do - assert_raise(Downloads::File::Error) do - @download.download!(max_size: 1) - end - end - - should "store the file in the tempfile path" do - tempfile, strategy = @download.download! - assert_operator(tempfile.size, :>, 0, "should have data") - end - - should "correctly save the file when following 302 redirects" do - download = Downloads::File.new("https://yande.re/post/show/578014") - file, strategy = download.download!(url: download.preview_url) - assert_equal(19134, file.size) - end - end - end -end diff --git a/test/unit/downloads/pixiv_test.rb b/test/unit/downloads/pixiv_test.rb index a22f8a700..ddc32e14d 100644 --- a/test/unit/downloads/pixiv_test.rb +++ b/test/unit/downloads/pixiv_test.rb @@ -136,18 +136,14 @@ module Downloads end context "An ugoira site for pixiv" do - setup do - @download = Downloads::File.new("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364") - @tempfile, strategy = @download.download! - @tempfile.close! - end - should "capture the data" do - assert_equal(2, @download.data[:ugoira_frame_data].size) - if @download.data[:ugoira_frame_data][0]["file"] + @strategy = Sources::Strategies.find("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364") + + assert_equal(2, @strategy.data[:ugoira_frame_data].size) + if @strategy.data[:ugoira_frame_data][0]["file"] assert_equal([{"file" => "000000.jpg", "delay" => 125}, {"file" => "000001.jpg", "delay" => 125}], @download.data[:ugoira_frame_data]) else - assert_equal([{"delay_msec" => 125}, {"delay_msec" => 125}], @download.data[:ugoira_frame_data]) + assert_equal([{"delay_msec" => 125}, {"delay_msec" => 125}], @strategy.data[:ugoira_frame_data]) end end end diff --git a/test/unit/upload_service_test.rb b/test/unit/upload_service_test.rb index b6b7bbf15..b15a399a5 100644 --- a/test/unit/upload_service_test.rb +++ b/test/unit/upload_service_test.rb @@ -212,7 +212,7 @@ class UploadServiceTest < ActiveSupport::TestCase context "on timeout errors" do setup do @source = "https://cdn.donmai.us/original/93/f4/93f4dd66ef1eb11a89e56d31f9adc8d0.jpg" - HTTParty.stubs(:get).raises(Net::ReadTimeout) + Danbooru::Http.any_instance.stubs(:get).raises(HTTP::TimeoutError) end should "leave the upload in an error state" do