Allow uploading .zip, .rar, and .7z files from disk. The archive will be extracted and the images inside will be uploaded. This only works for archive files uploaded from disk, not from a source URL. Post source URLs will look something like this: "file://foo.zip/1.jpg", "file://foo.zip/2.jpg", etc. Sometimes artists uses Shift JIS or other encodings instead of UTF-8 for filenames. In these cases we just assume the filename is UTF-8 and replace invalid characters with '?', so filenames might be wrong in some cases. There are various protections to prevent uploading malicious archive files: * Archives with more than 100 files aren't allowed. * Archives that decompress to more than 100MB aren't allowed. * Archives with filenames containing '..' components aren't allowed (e.g. '../../../../../etc/passwd'). * Archives with filenames containing absolute paths aren't allowed (e.g. '/etc/passwd'). * Archives containing symlinks aren't allowed (e.g. 'foo -> /etc/passwd'). * Archive types other than .zip, .rar, and .7z aren't allowed (e.g. .tar.gz, .cpio). * File permissions, owners, and other metadata are ignored. Partial fix for #5340: Add support for extracting archive attachments from certain sources
258 lines
9.5 KiB
Ruby
258 lines
9.5 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
# Danbooru::Archive is a utility class representing a .zip, .rar, or .7z archive file. This is a wrapper around
|
|
# libarchive that adds some utility methods for extracting an archive safely.
|
|
#
|
|
# @example
|
|
# Danbooru::Archive.extract!("foo.zip") do |dir, filenames|
|
|
# puts dir, filenames
|
|
# end
|
|
#
|
|
# @see https://github.com/chef/ffi-libarchive
|
|
# @see https://www.rubydoc.info/gems/ffi-libarchive/0.4.2
|
|
# @see https://github.com/libarchive/libarchive/wiki/ManualPages
|
|
|
|
module Archive
|
|
module C
|
|
# XXX Monkey patch ffi-libarchive to add some functions we need.
|
|
# https://www.freebsd.org/cgi/man.cgi?query=archive_util&sektion=3&format=html
|
|
attach_function_maybe :archive_format_name, [:pointer], :string
|
|
attach_function_maybe :archive_filter_name, [:pointer, :int], :string
|
|
attach_function_maybe :archive_filter_count, [:pointer], :int
|
|
end
|
|
end
|
|
|
|
module Danbooru
|
|
class Archive
|
|
class Error < StandardError; end
|
|
|
|
# Default flags when extracting files.
|
|
# @see https://www.freebsd.org/cgi/man.cgi?query=archive_write_disk&sektion=3&format=html
|
|
DEFAULT_FLAGS =
|
|
::Archive::EXTRACT_NO_OVERWRITE |
|
|
#::Archive::EXTRACT_SECURE_NOABSOLUTEPATHS |
|
|
::Archive::EXTRACT_SECURE_SYMLINKS |
|
|
::Archive::EXTRACT_SECURE_NODOTDOT
|
|
|
|
attr_reader :file
|
|
|
|
# Open an archive, or raise an error if the archive can't be opened. If given a block, pass the archive to the block
|
|
# and close the archive after the block finishes.
|
|
#
|
|
# @param filelike [String, File] The filename of the archive, or an open archive file.
|
|
# @yieldparam [Danbooru::Archive] The archive.
|
|
# @return [Danbooru::Archive] The archive.
|
|
def self.open!(filelike, &block)
|
|
file = filelike.is_a?(File) ? filelike : Kernel.open(filelike, binmode: true)
|
|
archive = new(file)
|
|
|
|
if block_given?
|
|
begin
|
|
yield archive
|
|
ensure
|
|
archive.close
|
|
end
|
|
else
|
|
archive
|
|
end
|
|
rescue => error
|
|
archive&.close
|
|
raise Error, error
|
|
end
|
|
|
|
# Open an archive, or return nil if the archive can't be opened. See `#open!` for details.
|
|
def self.open(filelike, &block)
|
|
open!(filelike, &block)
|
|
rescue Error
|
|
nil
|
|
end
|
|
|
|
# Extract the archive to the given directory. If a block is given, extract the archive to a temp directory and
|
|
# delete the directory afterwards. The block is given the name of the directory and the list of files.
|
|
#
|
|
# @param filelike [String, File] The filename of the archive, or an open archive file.
|
|
# @param directory [String] The directory to extract the files to. By default, this is a temp directory the caller must clean up.
|
|
# @yieldparam [String, Array<String>] The path to the temp directory, and the list of extracted files in the directory.
|
|
# @return [(String, Array<String>)] The path to the directory, and the list of extracted files in the directory.
|
|
def self.extract!(filelike, directory = nil, flags: DEFAULT_FLAGS, &block)
|
|
open!(filelike) do |archive|
|
|
archive.extract!(directory, flags: flags, &block)
|
|
end
|
|
end
|
|
|
|
# @param file [File] The archive file.
|
|
def initialize(file)
|
|
@file = file
|
|
end
|
|
|
|
def close
|
|
# no-op
|
|
end
|
|
|
|
# Iterate across each entry (file) in the archive.
|
|
#
|
|
# @return [Enumerator, Danbooru:Archive] If given a block, call the block on each entry and return the archive
|
|
# itself. If not given a block, return an Enumerator.
|
|
def each_entry(&block)
|
|
return enum_for(:each_entry) unless block_given?
|
|
|
|
# XXX We have to re-open the archive on every call because libarchive is designed for streaming and doesn't
|
|
# support iterating across the archive multiple times.
|
|
archive = ::Archive::Reader.open_filename(file.path)
|
|
while (entry = archive.next_header(clone_entry: true))
|
|
yield Entry.new(archive, entry)
|
|
end
|
|
|
|
self
|
|
ensure
|
|
archive&.close
|
|
end
|
|
|
|
# XXX You can't call `extract!` on these entries because libarchive doesn't let you extract an entry after you iterate past it.
|
|
#
|
|
# @return [Array<Danbooru::Archive::Entry>] The list of entries in the archive.
|
|
def entries
|
|
@entries ||= each_entry.to_a
|
|
end
|
|
|
|
# Extract the files in the archive to a directory.
|
|
#
|
|
# If a block is given, extract the archive to a temp directory and delete the directory after the block finishes.
|
|
# Otherwise, extract to a temp directory and return the directory. The caller should delete the directory afterwards.
|
|
#
|
|
# @param directory [String] The directory to extract the files to. By default, this is a temp directory the caller must clean up.
|
|
# @yieldparam [String, Array<String>] The name of the temp directory, and the list of files in the directory.
|
|
# @return [(String, Array<String>)] The path to the directory, and the list of extracted files.
|
|
def extract!(directory = nil, flags: DEFAULT_FLAGS, &block)
|
|
raise ArgumentError, "can't pass directory and block at the same time" if block_given? && directory.present?
|
|
|
|
if block_given?
|
|
Dir.mktmpdir(["danbooru-archive-", "-" + File.basename(file.path)]) do |dir|
|
|
filenames = extract_to!(dir, flags: flags)
|
|
yield dir, filenames
|
|
end
|
|
else
|
|
dir = directory.presence || Dir.mktmpdir(["danbooru-archive-", "-" + File.basename(file.path)])
|
|
filenames = extract_to!(dir, flags: flags)
|
|
[dir, filenames]
|
|
end
|
|
end
|
|
|
|
# Extract the archive to a directory. See `extract!` for details.
|
|
def extract_to!(directory, flags: DEFAULT_FLAGS)
|
|
each_entry.map do |entry|
|
|
raise Danbooru::Archive::Error, "Can't extract archive containing absolute path (path: '#{entry.pathname_utf8}')" if entry.pathname_utf8.starts_with?("/")
|
|
raise Danbooru::Archive::Error, "'#{entry.pathname_utf8}' is not a regular file" if !entry.file?
|
|
|
|
path = "#{directory}/#{entry.pathname_utf8}"
|
|
entry.extract!(path, flags: flags)
|
|
end
|
|
end
|
|
|
|
# @return [Integer] The number of files in the archive.
|
|
def file_count
|
|
@file_count ||= entries.count
|
|
end
|
|
|
|
# @return [Integer] The total decompressed size of all files in the archive.
|
|
def uncompressed_size
|
|
@uncompressed_size ||= entries.sum(&:size)
|
|
end
|
|
|
|
# @return [Boolean] True if any entry in the archive satisfies the condition; otherwise false.
|
|
def exists?(&block)
|
|
each_entry.with_index { |entry, index| return true if yield entry, index + 1 }
|
|
false
|
|
end
|
|
|
|
# @return [Symbol] The archive format as detected by us (:zip, :rar, :7z, etc).
|
|
def file_ext
|
|
@file_ext ||= FileTypeDetector.new(file).file_ext
|
|
end
|
|
|
|
# @return [String] The archive format as returned by libarchive ("RAR", "ZIP", etc).
|
|
def format
|
|
@format ||= each_entry.lazy.map(&:format).first
|
|
end
|
|
|
|
# Print the archive contents in `ls -l` format.
|
|
def ls(io = STDOUT)
|
|
io.puts(entries.map(&:ls).join("\n"))
|
|
end
|
|
end
|
|
|
|
# An entry represents a single file in an archive.
|
|
class Entry
|
|
attr_reader :archive, :entry
|
|
delegate :directory?, :file?, :close, :pathname, :pathname=, :size, :strmode, :uid, :gid, :mtime, to: :entry
|
|
|
|
# @param entry [::Archive] The archive the entry belongs to.
|
|
# @param entry [::Archive::Entry] The archive entry.
|
|
def initialize(archive, entry)
|
|
@archive = archive
|
|
@entry = entry
|
|
end
|
|
|
|
# Copy the entry. Called by `dup`.
|
|
def initialize_copy(entry)
|
|
@archive = entry.archive
|
|
@entry = ::Archive::Entry.new(entry.ffi_ptr, clone: true)
|
|
end
|
|
|
|
# Extract the file to the given destination. By default, don't overwrite files, don't allow symlinks or paths
|
|
# containing '..', and don't extract file ownership, permission, or timestamp information.
|
|
#
|
|
# @param destination [String] The path to extract the file to.
|
|
# @param flags [Integer] The extraction flags.
|
|
# @return [String] The path to the extracted file.
|
|
def extract!(destination, flags: Danbooru::Archive::DEFAULT_FLAGS)
|
|
entry = dup
|
|
entry.pathname = destination
|
|
|
|
result = ::Archive::C.archive_read_extract(entry.archive_ffi_ptr, entry.ffi_ptr, flags)
|
|
raise Danbooru::Archive::Error, "Error extracting '#{entry.pathname_utf8}': #{archive.error_string}" if result != ::Archive::C::OK
|
|
|
|
entry.pathname_utf8
|
|
end
|
|
|
|
# @see https://security.snyk.io/research/zip-slip-vulnerability
|
|
# @return [Boolean] True if the pathname contains any ".." components, e.g. "../../../../../etc/passwd"
|
|
def directory_traversal?
|
|
entry.pathname.split("/").compact_blank.grep("..").any?
|
|
end
|
|
|
|
# @return [String, nil] The pathname encoded as UTF-8 instead of ASCII-8BIT. May be wrong if the original pathname wasn't UTF-8.
|
|
def pathname_utf8
|
|
pathname&.encode("UTF-8", invalid: :replace, undef: :replace, replace: "?")
|
|
end
|
|
|
|
# @return [String] The archive entry format ("RAR", "ZIP", etc).
|
|
def format
|
|
::Archive::C::archive_format_name(archive_ffi_ptr)
|
|
end
|
|
|
|
# @return [Array<String>] The list of filters for the entry.
|
|
def filters
|
|
count = ::Archive::C::archive_filter_count(archive_ffi_ptr)
|
|
|
|
count.times.map do |n|
|
|
::Archive::C::archive_filter_name(archive_ffi_ptr, n)
|
|
end
|
|
end
|
|
|
|
# @return [String] The entry in `ls -l` format.
|
|
def ls
|
|
"#{strmode} #{uid} #{gid} #{"%9d" % size} #{mtime.to_fs(:db)} #{pathname_utf8}"
|
|
end
|
|
|
|
def archive_ffi_ptr
|
|
archive.send(:archive)
|
|
end
|
|
|
|
# @return [FFI::Pointer] The pointer to the libarchive entry object.
|
|
def ffi_ptr
|
|
entry.entry
|
|
end
|
|
end
|
|
end
|