Files
danbooru/app/logical/downloads/file.rb
evazion c76463f34d uploads: use storage manager to distribute files.
Refactors the upload process to pass around temp files, rather than
passing around file paths and directly writing output to the local
filesystem. This way we can pass the storage manager the preview /
sample / original temp files, so it can deal with storage itself.

* Change Download::File#download! to return a temp file.

* Change DanbooruImageResizer and PixivUgoiraConverter to accept/return
  temp files instead of file paths.

* Change Upload#generate_resizes to return temp files for previews and samples.

* Change Upload#generate_resizes to generate ugoira .webm samples
  synchronously instead of asynchronously.
2018-03-20 19:49:06 -05:00

154 lines
4.6 KiB
Ruby

module Downloads
class File
class Error < Exception ; end
attr_reader :data, :options
attr_accessor :source, :original_source, :downloaded_source
def initialize(source, options = {})
# source can potentially get rewritten in the course
# of downloading a file, so check it again
@source = source
@original_source = source
# the URL actually downloaded after rewriting the original source.
@downloaded_source = nil
# we sometimes need to capture data from the source page
@data = {}
@options = options
@data[:get_thumbnail] = options[:get_thumbnail]
end
def size
url, headers, _ = before_download(@source, @data)
options = { timeout: 3, headers: headers }.deep_merge(Danbooru.config.httparty_options)
res = HTTParty.head(url, options)
res.content_length
end
def download!
url, headers, @data = before_download(@source, @data)
output_file = Tempfile.new(binmode: true)
http_get_streaming(uncached_url(url, headers), output_file, headers)
@downloaded_source = url
@source = after_download(url)
output_file
end
def before_download(url, datums)
headers = Danbooru.config.http_headers
RewriteStrategies::Base.strategies.each do |strategy|
url, headers, datums = strategy.new(url).rewrite(url, headers, datums)
end
return [url, headers, datums]
end
def after_download(src)
src = fix_twitter_sources(src)
if options[:referer_url].present?
src = set_source_to_referer(src, options[:referer_url])
end
src
end
def validate_local_hosts(url)
ip_addr = IPAddr.new(Resolv.getaddress(url.hostname))
if Danbooru.config.banned_ip_for_download?(ip_addr)
raise Error.new("Banned server for download")
end
end
def http_get_streaming(src, file, headers = {}, max_size: Danbooru.config.max_file_size)
tries = 0
url = URI.parse(src)
while true
unless url.is_a?(URI::HTTP) || url.is_a?(URI::HTTPS)
raise Error.new("URL must be HTTP or HTTPS")
end
validate_local_hosts(url)
begin
size = 0
options = { stream_body: true, timeout: 10, headers: headers }
res = HTTParty.get(url, options.deep_merge(Danbooru.config.httparty_options)) do |chunk|
size += chunk.size
raise Error.new("File is too large (max size: #{max_size})") if size > max_size && max_size > 0
file.write(chunk)
end
if res.success?
file.rewind
return file
else
raise Error.new("HTTP error code: #{res.code} #{res.message}")
end
rescue Errno::ECONNRESET, Errno::ETIMEDOUT, Errno::EIO, Errno::EHOSTUNREACH, Errno::ECONNREFUSED, IOError => x
tries += 1
if tries < 3
retry
else
raise
end
end
end # while
end # def
def fix_twitter_sources(src)
if src =~ %r!^https?://(?:video|pbs)\.twimg\.com/! && original_source =~ %r!^https?://twitter\.com/!
original_source
elsif src =~ %r!^https?://img\.pawoo\.net/! && original_source =~ %r!^https?://pawoo\.net/!
original_source
else
src
end
end
def set_source_to_referer(src, referer)
if Sources::Strategies::Nijie.url_match?(src) ||
Sources::Strategies::Twitter.url_match?(src) || Sources::Strategies::Twitter.url_match?(referer) ||
Sources::Strategies::Pawoo.url_match?(src) ||
Sources::Strategies::Tumblr.url_match?(src) || Sources::Strategies::Tumblr.url_match?(referer) ||
Sources::Strategies::ArtStation.url_match?(src) || Sources::Strategies::ArtStation.url_match?(referer)
strategy = Sources::Site.new(src, :referer_url => referer)
strategy.referer_url
else
src
end
end
private
# Prevent Cloudflare from potentially mangling the image. See issue #3528.
def uncached_url(url, headers = {})
url = Addressable::URI.parse(url)
if is_cloudflare?(url, headers)
url.query_values = (url.query_values || {}).merge(danbooru_no_cache: SecureRandom.uuid)
end
url
end
def is_cloudflare?(url, headers = {})
Cache.get("is_cloudflare:#{url.origin}", 4.hours) do
res = HTTParty.head(url, { headers: headers }.deep_merge(Danbooru.config.httparty_options))
raise Error.new("HTTP error code: #{res.code} #{res.message}") unless res.success?
res.key?("CF-Ray")
end
end
end
end