uploads: allow uploading .zip, .rar., and .7z files from disk.

Allow uploading .zip, .rar, and .7z files from disk. The archive will be extracted and the images
inside will be uploaded.

This only works for archive files uploaded from disk, not from a source URL.

Post source URLs will look something like this: "file://foo.zip/1.jpg", "file://foo.zip/2.jpg", etc.
Sometimes artists uses Shift JIS or other encodings instead of UTF-8 for filenames. In these cases
we just assume the filename is UTF-8 and replace invalid characters with '?', so filenames might be
wrong in some cases.

There are various protections to prevent uploading malicious archive files:

* Archives with more than 100 files aren't allowed.
* Archives that decompress to more than 100MB aren't allowed.
* Archives with filenames containing '..' components aren't allowed (e.g. '../../../../../etc/passwd').
* Archives with filenames containing absolute paths aren't allowed (e.g. '/etc/passwd').
* Archives containing symlinks aren't allowed (e.g. 'foo -> /etc/passwd').
* Archive types other than .zip, .rar, and .7z aren't allowed (e.g. .tar.gz, .cpio).
* File permissions, owners, and other metadata are ignored.

Partial fix for #5340: Add support for extracting archive attachments from certain sources
This commit is contained in:
evazion
2022-11-16 01:53:50 -06:00
parent d791924aad
commit 2deae38a4e
14 changed files with 232 additions and 31 deletions

View File

@@ -107,10 +107,15 @@ module Danbooru
ensure
archive&.close
end
alias_method :entries, :each_entry
# Extract the files in the archive to a directory. Subdirectories inside the archive are ignored; all files are
# extracted to a single top-level directory.
# XXX You can't call `extract!` on these entries because libarchive doesn't let you extract an entry after you iterate past it.
#
# @return [Array<Danbooru::Archive::Entry>] The list of entries in the archive.
def entries
@entries ||= each_entry.to_a
end
# Extract the files in the archive to a directory.
#
# If a block is given, extract the archive to a temp directory and delete the directory after the block finishes.
# Otherwise, extract to a temp directory and return the directory. The caller should delete the directory afterwards.
@@ -135,15 +140,20 @@ module Danbooru
# Extract the archive to a directory. See `extract!` for details.
def extract_to!(directory, flags: DEFAULT_FLAGS)
entries.map do |entry|
each_entry.map do |entry|
raise Danbooru::Archive::Error, "Can't extract archive containing absolute path (path: '#{entry.pathname_utf8}')" if entry.pathname_utf8.starts_with?("/")
raise Danbooru::Archive::Error, "'#{entry.pathname_utf8}' is not a regular file" if !entry.file?
path = "#{directory}/#{entry.pathname_utf8.tr("/", "_")}"
path = "#{directory}/#{entry.pathname_utf8}"
entry.extract!(path, flags: flags)
end
end
# @return [Integer] The number of files in the archive.
def file_count
@file_count ||= entries.count
end
# @return [Integer] The total decompressed size of all files in the archive.
def uncompressed_size
@uncompressed_size ||= entries.sum(&:size)
@@ -151,7 +161,7 @@ module Danbooru
# @return [Boolean] True if any entry in the archive satisfies the condition; otherwise false.
def exists?(&block)
entries.with_index { |entry, index| return true if yield entry, index + 1 }
each_entry.with_index { |entry, index| return true if yield entry, index + 1 }
false
end
@@ -162,7 +172,7 @@ module Danbooru
# @return [String] The archive format as returned by libarchive ("RAR", "ZIP", etc).
def format
@format ||= entries.lazy.map(&:format).first
@format ||= each_entry.lazy.map(&:format).first
end
# Print the archive contents in `ls -l` format.
@@ -205,9 +215,15 @@ module Danbooru
entry.pathname_utf8
end
# @return [String] The pathname encoded as UTF-8 instead of ASCII-8BIT. May be wrong if the original pathname wasn't UTF-8.
# @see https://security.snyk.io/research/zip-slip-vulnerability
# @return [Boolean] True if the pathname contains any ".." components, e.g. "../../../../../etc/passwd"
def directory_traversal?
entry.pathname.split("/").compact_blank.grep("..").any?
end
# @return [String, nil] The pathname encoded as UTF-8 instead of ASCII-8BIT. May be wrong if the original pathname wasn't UTF-8.
def pathname_utf8
pathname.encode("UTF-8", invalid: :replace, undef: :replace, replace: "?")
pathname&.encode("UTF-8", invalid: :replace, undef: :replace, replace: "?")
end
# @return [String] The archive entry format ("RAR", "ZIP", etc).

View File

@@ -4,6 +4,10 @@ class Upload < ApplicationRecord
extend Memoist
class Error < StandardError; end
# The list of allowed archive file types.
ARCHIVE_FILE_TYPES = %i[zip rar 7z]
# The maximum number of files allowed per upload.
MAX_FILES_PER_UPLOAD = 100
# The maximum number of 'pending' or 'processing' media assets a single user can have at once.
@@ -18,11 +22,11 @@ class Upload < ApplicationRecord
normalize :source, :normalize_source
validates :files, length: { maximum: MAX_FILES_PER_UPLOAD, message: "can't have more than #{MAX_FILES_PER_UPLOAD} files per upload" }
validates :source, format: { with: %r{\Ahttps?://}i, message: "is not a valid URL" }, if: -> { source.present? }
validates :referer_url, format: { with: %r{\Ahttps?://}i, message: "is not a valid URL" }, if: -> { referer_url.present? }
validate :validate_file_and_source, on: :create
validate :uploader_is_not_limited, on: :create
validate :validate_archive_files, on: :create
validate :validate_uploader_is_not_limited, on: :create
after_create :async_process_upload!
@@ -69,13 +73,40 @@ class Upload < ApplicationRecord
end
end
def uploader_is_not_limited
def validate_uploader_is_not_limited
queued_asset_count = uploader.upload_media_assets.unfinished.count
if queued_asset_count > MAX_QUEUED_ASSETS
errors.add(:base, "You have too many images queued for upload (queued: #{queued_asset_count}; limit: #{MAX_QUEUED_ASSETS}). Try again later.")
end
end
def validate_archive_files
return unless files.present?
archive_files.each do |archive, filename|
if !archive.file_ext.in?(ARCHIVE_FILE_TYPES)
errors.add(:base, "'#{filename}' is not a supported file type")
elsif archive.exists? { |_, count| count > MAX_FILES_PER_UPLOAD }
# XXX Potential zip bomb containing thousands of files; don't process it any further.
errors.add(:base, "'#{filename}' contains too many files (max #{MAX_FILES_PER_UPLOAD} files per upload)")
next
elsif archive.uncompressed_size > MediaAsset::MAX_FILE_SIZE
errors.add(:base, "'#{filename}' is too large (uncompressed size: #{archive.uncompressed_size.to_fs(:human_size)}; max size: #{MediaAsset::MAX_FILE_SIZE.to_fs(:human_size)})")
elsif entry = archive.entries.find { |entry| entry.pathname.starts_with?("/") }
errors.add(:base, "'#{entry.pathname_utf8}' in '#{filename}' can't start with '/'")
elsif entry = archive.entries.find { |entry| entry.directory_traversal? }
errors.add(:base, "'#{entry.pathname_utf8}' in '#{filename}' can't contain '..' components")
elsif entry = archive.entries.find { |entry| !entry.file? && !entry.directory? }
errors.add(:base, "'#{entry.pathname_utf8}' in '#{filename}' isn't a regular file")
end
end
total_files = archive_files.map(&:first).sum(&:file_count) + (files.size - archive_files.size)
if total_files > MAX_FILES_PER_UPLOAD
errors.add(:base, "Can't upload more than #{MAX_FILES_PER_UPLOAD} files at a time (total: #{total_files})")
end
end
end
concerning :SourceMethods do
@@ -132,30 +163,75 @@ class Upload < ApplicationRecord
update!(status: "processing")
if files.present?
upload_media_assets = files.map do |_index, file|
UploadMediaAsset.new(upload: self, file: file.tempfile, source_url: "file://#{file.original_filename}")
end
process_file_upload!
elsif source.present?
page_url = source_extractor.page_url
image_urls = source_extractor.image_urls
if image_urls.empty?
raise Error, "#{source} doesn't contain any images"
end
upload_media_assets = image_urls.map do |image_url|
UploadMediaAsset.new(upload: self, source_url: image_url, page_url: page_url, media_asset: nil)
end
process_source_upload!
else
raise Error, "No file or source given" # Should never happen
end
rescue Exception => e
update!(status: "error", error: e.message)
end
def process_source_upload!
page_url = source_extractor.page_url
image_urls = source_extractor.image_urls
if image_urls.empty?
raise Error, "#{source} doesn't contain any images"
end
upload_media_assets = image_urls.map do |image_url|
UploadMediaAsset.new(upload: self, source_url: image_url, page_url: page_url, media_asset: nil)
end
transaction do
update!(media_asset_count: upload_media_assets.size)
upload_media_assets.each(&:save!)
end
rescue Exception => e
update!(status: "error", error: e.message)
end
def process_file_upload!
tmpdirs = []
upload_media_assets = uploaded_files.flat_map do |file, original_filename|
if file.is_a?(Danbooru::Archive)
tmpdir, filenames = file.extract!
tmpdirs << tmpdir
filenames.map do |filename|
name = "file://#{original_filename}/#{Pathname.new(filename).relative_path_from(tmpdir)}" # "file://foo.zip/foo/1.jpg"
UploadMediaAsset.new(upload: self, file: filename, source_url: name)
end
else
UploadMediaAsset.new(upload: self, file: file, source_url: "file://#{original_filename}")
end
end
transaction do
update!(media_asset_count: upload_media_assets.size)
upload_media_assets.each(&:save!)
end
ensure
tmpdirs.each { |tmpdir| FileUtils.rm_rf(tmpdir) }
end
# The list of files uploaded from disk, with their filenames.
def uploaded_files
files.map do |_index, file|
if FileTypeDetector.new(file.tempfile).file_ext.in?(ARCHIVE_FILE_TYPES)
[Danbooru::Archive.open!(file.tempfile), file.original_filename]
else
[MediaFile.open(file.tempfile), file.original_filename]
end
end
end
# The list of archive files uploaded from disk, with their filenames.
def archive_files
uploaded_files.select do |file, original_filename|
file.is_a?(Danbooru::Archive)
end
end
def source_extractor
@@ -167,5 +243,5 @@ class Upload < ApplicationRecord
[:uploader, :upload_media_assets, :media_assets, :posts]
end
memoize :source_extractor
memoize :source_extractor, :archive_files, :uploaded_files
end

BIN
test/files/archive/42.zip Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -134,7 +134,7 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest
should "fail if given both a file and source" do
assert_no_difference("Upload.count") do
file = File.open("test/files/test.jpg")
file = Rack::Test::UploadedFile.new("test/files/test.jpg")
source = "https://files.catbox.moe/om3tcw.webm"
post_auth uploads_path(format: :json), @user, params: { upload: { files: { "0" => file }, source: source }}
end
@@ -296,6 +296,48 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest
end
end
context "for an unsupported archive type" do
should "fail for a .tar file" do
create_upload!("test/files/archive/ugoira.tar", user: @user)
assert_match("File is not an image or video", Upload.last.error)
end
should "fail for a .tar.gz file" do
create_upload!("test/files/archive/ugoira.tar.gz", user: @user)
assert_match("File is not an image or video", Upload.last.error)
end
should "fail for an archive containing more than 100 files" do
create_upload!("test/files/archive/bomb-10k-files.7z", user: @user)
assert_response 422
assert_match("'bomb-10k-files.7z' contains too many files (max 100 files per upload)", response.parsed_body.dig("errors", "base", 0))
end
should "fail for a decompression bomb" do
create_upload!("test/files/archive/bomb-1-1G.rar", user: @user)
assert_response 422
assert_match("'bomb-1-1G.rar' is too large (uncompressed size: 1,000 MB; max size: 100 MB)", response.parsed_body.dig("errors", "base", 0))
end
should "fail for an archive containing absolute paths" do
create_upload!("test/files/archive/absolute-path.7z", user: @user)
assert_response 422
assert_match("'/tmp/foo/foo.txt' in 'absolute-path.7z' can't start with '/'", response.parsed_body.dig("errors", "base", 0))
end
should "fail for an archive containing '..' paths" do
create_upload!("test/files/archive/zip-slip.zip", user: @user)
assert_response 422
assert_match(/'.*' in 'zip-slip\.zip' can't contain '\.\.' components/, response.parsed_body.dig("errors", "base", 0))
end
should "fail for an archive containing symlinks" do
create_upload!("test/files/archive/symlink.zip", user: @user)
assert_response 422
assert_match("'passwd' in 'symlink.zip' isn't a regular file", response.parsed_body.dig("errors", "base", 0))
end
end
context "when re-uploading a media asset stuck in the 'processing' state" do
should "mark the asset as failed" do
asset = create(:media_asset, file: File.open("test/files/test.jpg"), status: "processing")
@@ -395,6 +437,36 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest
should_upload_successfully("test/files/webp/lossy_alpha1.webp")
end
context "uploading a .zip file from your computer" do
should "work" do
upload = assert_successful_upload("test/files/archive/ugoira.zip", user: @user)
assert_equal(5, upload.media_asset_count)
assert_equal(5, upload.upload_media_assets.size)
assert_equal("file://ugoira.zip/000000.jpg", upload.upload_media_assets[0].source_url)
end
end
context "uploading a .rar file from your computer" do
should "work" do
upload = assert_successful_upload("test/files/archive/ugoira.rar", user: @user)
assert_equal(5, upload.media_asset_count)
assert_equal(5, upload.upload_media_assets.size)
assert_equal("file://ugoira.rar/000000.jpg", upload.upload_media_assets[0].source_url)
end
end
context "uploading a .7z file from your computer" do
should "work" do
upload = assert_successful_upload("test/files/archive/ugoira.7z", user: @user)
assert_equal(5, upload.media_asset_count)
assert_equal(5, upload.upload_media_assets.size)
assert_equal("file://ugoira.7z/000000.jpg", upload.upload_media_assets[0].source_url)
end
end
context "uploading multiple files from your computer" do
should "work" do
files = {

View File

@@ -117,7 +117,8 @@ class DanbooruArchiveTest < ActiveSupport::TestCase
assert_equal("ZIP 2.0 (uncompressed)", Danbooru::Archive.open("test/files/archive/ugoira.zip").format)
assert_equal("RAR5", Danbooru::Archive.open("test/files/archive/ugoira.rar").format)
assert_equal("7-Zip", Danbooru::Archive.open("test/files/archive/ugoira.7z").format)
assert_equal("7-Zip", Danbooru::Archive.open("test/files/archive/ugoira.tar.7z").format)
assert_equal("GNU tar format", Danbooru::Archive.open("test/files/archive/ugoira.tar").format)
assert_equal("GNU tar format", Danbooru::Archive.open("test/files/archive/ugoira.tar.gz").format)
end
end
@@ -126,7 +127,8 @@ class DanbooruArchiveTest < ActiveSupport::TestCase
assert_equal(:zip, Danbooru::Archive.open("test/files/archive/ugoira.zip").file_ext)
assert_equal(:rar, Danbooru::Archive.open("test/files/archive/ugoira.rar").file_ext)
assert_equal(:"7z", Danbooru::Archive.open("test/files/archive/ugoira.7z").file_ext)
assert_equal(:"7z", Danbooru::Archive.open("test/files/archive/ugoira.tar.7z").file_ext)
assert_equal(:bin, Danbooru::Archive.open("test/files/archive/ugoira.tar").file_ext)
assert_equal(:bin, Danbooru::Archive.open("test/files/archive/ugoira.tar.gz").file_ext)
end
end
@@ -139,5 +141,40 @@ class DanbooruArchiveTest < ActiveSupport::TestCase
assert_match(/^-rw-rw-r-- *0 0 *1639 2014-10-05 23:31:06 000000\.jpg$/, output.tap(&:rewind).read)
end
end
should "detect directory traversal attacks" do
archive = Danbooru::Archive.open!("test/files/archive/zip-slip.zip")
assert_equal(true, archive.entries.any? { |e| e.directory_traversal? })
assert_raises(Danbooru::Archive::Error) { archive.extract! }
end
should "detect symlinks" do
archive = Danbooru::Archive.open!("test/files/archive/symlink.zip")
assert_equal(true, archive.entries.any? { |e| !e.file? && !e.directory? })
assert_raises(Danbooru::Archive::Error) { archive.extract! }
end
should "detect absolute paths" do
archive = Danbooru::Archive.open!("test/files/archive/absolute-path.7z")
assert_equal(true, archive.entries.any? { |e| e.pathname.starts_with?("/") })
assert_raises(Danbooru::Archive::Error) { archive.extract! }
end
should "detect archives with large numbers of files" do
archive = Danbooru::Archive.open!("test/files/archive/bomb-10k-files.7z")
assert_equal(true, archive.exists? { |_, count| count > 100 })
assert_equal(10_000, archive.file_count)
end
should "detect decompression bombs" do
archive = Danbooru::Archive.open!("test/files/archive/bomb-1-1G.rar")
assert_equal(1, archive.file_count)
assert_equal(1_048_576_000, archive.uncompressed_size)
end
end
end