uploads: allow uploading .zip, .rar., and .7z files from disk.
Allow uploading .zip, .rar, and .7z files from disk. The archive will be extracted and the images inside will be uploaded. This only works for archive files uploaded from disk, not from a source URL. Post source URLs will look something like this: "file://foo.zip/1.jpg", "file://foo.zip/2.jpg", etc. Sometimes artists uses Shift JIS or other encodings instead of UTF-8 for filenames. In these cases we just assume the filename is UTF-8 and replace invalid characters with '?', so filenames might be wrong in some cases. There are various protections to prevent uploading malicious archive files: * Archives with more than 100 files aren't allowed. * Archives that decompress to more than 100MB aren't allowed. * Archives with filenames containing '..' components aren't allowed (e.g. '../../../../../etc/passwd'). * Archives with filenames containing absolute paths aren't allowed (e.g. '/etc/passwd'). * Archives containing symlinks aren't allowed (e.g. 'foo -> /etc/passwd'). * Archive types other than .zip, .rar, and .7z aren't allowed (e.g. .tar.gz, .cpio). * File permissions, owners, and other metadata are ignored. Partial fix for #5340: Add support for extracting archive attachments from certain sources
This commit is contained in:
@@ -107,10 +107,15 @@ module Danbooru
|
||||
ensure
|
||||
archive&.close
|
||||
end
|
||||
alias_method :entries, :each_entry
|
||||
|
||||
# Extract the files in the archive to a directory. Subdirectories inside the archive are ignored; all files are
|
||||
# extracted to a single top-level directory.
|
||||
# XXX You can't call `extract!` on these entries because libarchive doesn't let you extract an entry after you iterate past it.
|
||||
#
|
||||
# @return [Array<Danbooru::Archive::Entry>] The list of entries in the archive.
|
||||
def entries
|
||||
@entries ||= each_entry.to_a
|
||||
end
|
||||
|
||||
# Extract the files in the archive to a directory.
|
||||
#
|
||||
# If a block is given, extract the archive to a temp directory and delete the directory after the block finishes.
|
||||
# Otherwise, extract to a temp directory and return the directory. The caller should delete the directory afterwards.
|
||||
@@ -135,15 +140,20 @@ module Danbooru
|
||||
|
||||
# Extract the archive to a directory. See `extract!` for details.
|
||||
def extract_to!(directory, flags: DEFAULT_FLAGS)
|
||||
entries.map do |entry|
|
||||
each_entry.map do |entry|
|
||||
raise Danbooru::Archive::Error, "Can't extract archive containing absolute path (path: '#{entry.pathname_utf8}')" if entry.pathname_utf8.starts_with?("/")
|
||||
raise Danbooru::Archive::Error, "'#{entry.pathname_utf8}' is not a regular file" if !entry.file?
|
||||
|
||||
path = "#{directory}/#{entry.pathname_utf8.tr("/", "_")}"
|
||||
path = "#{directory}/#{entry.pathname_utf8}"
|
||||
entry.extract!(path, flags: flags)
|
||||
end
|
||||
end
|
||||
|
||||
# @return [Integer] The number of files in the archive.
|
||||
def file_count
|
||||
@file_count ||= entries.count
|
||||
end
|
||||
|
||||
# @return [Integer] The total decompressed size of all files in the archive.
|
||||
def uncompressed_size
|
||||
@uncompressed_size ||= entries.sum(&:size)
|
||||
@@ -151,7 +161,7 @@ module Danbooru
|
||||
|
||||
# @return [Boolean] True if any entry in the archive satisfies the condition; otherwise false.
|
||||
def exists?(&block)
|
||||
entries.with_index { |entry, index| return true if yield entry, index + 1 }
|
||||
each_entry.with_index { |entry, index| return true if yield entry, index + 1 }
|
||||
false
|
||||
end
|
||||
|
||||
@@ -162,7 +172,7 @@ module Danbooru
|
||||
|
||||
# @return [String] The archive format as returned by libarchive ("RAR", "ZIP", etc).
|
||||
def format
|
||||
@format ||= entries.lazy.map(&:format).first
|
||||
@format ||= each_entry.lazy.map(&:format).first
|
||||
end
|
||||
|
||||
# Print the archive contents in `ls -l` format.
|
||||
@@ -205,9 +215,15 @@ module Danbooru
|
||||
entry.pathname_utf8
|
||||
end
|
||||
|
||||
# @return [String] The pathname encoded as UTF-8 instead of ASCII-8BIT. May be wrong if the original pathname wasn't UTF-8.
|
||||
# @see https://security.snyk.io/research/zip-slip-vulnerability
|
||||
# @return [Boolean] True if the pathname contains any ".." components, e.g. "../../../../../etc/passwd"
|
||||
def directory_traversal?
|
||||
entry.pathname.split("/").compact_blank.grep("..").any?
|
||||
end
|
||||
|
||||
# @return [String, nil] The pathname encoded as UTF-8 instead of ASCII-8BIT. May be wrong if the original pathname wasn't UTF-8.
|
||||
def pathname_utf8
|
||||
pathname.encode("UTF-8", invalid: :replace, undef: :replace, replace: "?")
|
||||
pathname&.encode("UTF-8", invalid: :replace, undef: :replace, replace: "?")
|
||||
end
|
||||
|
||||
# @return [String] The archive entry format ("RAR", "ZIP", etc).
|
||||
|
||||
@@ -4,6 +4,10 @@ class Upload < ApplicationRecord
|
||||
extend Memoist
|
||||
class Error < StandardError; end
|
||||
|
||||
# The list of allowed archive file types.
|
||||
ARCHIVE_FILE_TYPES = %i[zip rar 7z]
|
||||
|
||||
# The maximum number of files allowed per upload.
|
||||
MAX_FILES_PER_UPLOAD = 100
|
||||
|
||||
# The maximum number of 'pending' or 'processing' media assets a single user can have at once.
|
||||
@@ -18,11 +22,11 @@ class Upload < ApplicationRecord
|
||||
|
||||
normalize :source, :normalize_source
|
||||
|
||||
validates :files, length: { maximum: MAX_FILES_PER_UPLOAD, message: "can't have more than #{MAX_FILES_PER_UPLOAD} files per upload" }
|
||||
validates :source, format: { with: %r{\Ahttps?://}i, message: "is not a valid URL" }, if: -> { source.present? }
|
||||
validates :referer_url, format: { with: %r{\Ahttps?://}i, message: "is not a valid URL" }, if: -> { referer_url.present? }
|
||||
validate :validate_file_and_source, on: :create
|
||||
validate :uploader_is_not_limited, on: :create
|
||||
validate :validate_archive_files, on: :create
|
||||
validate :validate_uploader_is_not_limited, on: :create
|
||||
|
||||
after_create :async_process_upload!
|
||||
|
||||
@@ -69,13 +73,40 @@ class Upload < ApplicationRecord
|
||||
end
|
||||
end
|
||||
|
||||
def uploader_is_not_limited
|
||||
def validate_uploader_is_not_limited
|
||||
queued_asset_count = uploader.upload_media_assets.unfinished.count
|
||||
|
||||
if queued_asset_count > MAX_QUEUED_ASSETS
|
||||
errors.add(:base, "You have too many images queued for upload (queued: #{queued_asset_count}; limit: #{MAX_QUEUED_ASSETS}). Try again later.")
|
||||
end
|
||||
end
|
||||
|
||||
def validate_archive_files
|
||||
return unless files.present?
|
||||
|
||||
archive_files.each do |archive, filename|
|
||||
if !archive.file_ext.in?(ARCHIVE_FILE_TYPES)
|
||||
errors.add(:base, "'#{filename}' is not a supported file type")
|
||||
elsif archive.exists? { |_, count| count > MAX_FILES_PER_UPLOAD }
|
||||
# XXX Potential zip bomb containing thousands of files; don't process it any further.
|
||||
errors.add(:base, "'#{filename}' contains too many files (max #{MAX_FILES_PER_UPLOAD} files per upload)")
|
||||
next
|
||||
elsif archive.uncompressed_size > MediaAsset::MAX_FILE_SIZE
|
||||
errors.add(:base, "'#{filename}' is too large (uncompressed size: #{archive.uncompressed_size.to_fs(:human_size)}; max size: #{MediaAsset::MAX_FILE_SIZE.to_fs(:human_size)})")
|
||||
elsif entry = archive.entries.find { |entry| entry.pathname.starts_with?("/") }
|
||||
errors.add(:base, "'#{entry.pathname_utf8}' in '#{filename}' can't start with '/'")
|
||||
elsif entry = archive.entries.find { |entry| entry.directory_traversal? }
|
||||
errors.add(:base, "'#{entry.pathname_utf8}' in '#{filename}' can't contain '..' components")
|
||||
elsif entry = archive.entries.find { |entry| !entry.file? && !entry.directory? }
|
||||
errors.add(:base, "'#{entry.pathname_utf8}' in '#{filename}' isn't a regular file")
|
||||
end
|
||||
end
|
||||
|
||||
total_files = archive_files.map(&:first).sum(&:file_count) + (files.size - archive_files.size)
|
||||
if total_files > MAX_FILES_PER_UPLOAD
|
||||
errors.add(:base, "Can't upload more than #{MAX_FILES_PER_UPLOAD} files at a time (total: #{total_files})")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
concerning :SourceMethods do
|
||||
@@ -132,30 +163,75 @@ class Upload < ApplicationRecord
|
||||
update!(status: "processing")
|
||||
|
||||
if files.present?
|
||||
upload_media_assets = files.map do |_index, file|
|
||||
UploadMediaAsset.new(upload: self, file: file.tempfile, source_url: "file://#{file.original_filename}")
|
||||
end
|
||||
process_file_upload!
|
||||
elsif source.present?
|
||||
page_url = source_extractor.page_url
|
||||
image_urls = source_extractor.image_urls
|
||||
|
||||
if image_urls.empty?
|
||||
raise Error, "#{source} doesn't contain any images"
|
||||
end
|
||||
|
||||
upload_media_assets = image_urls.map do |image_url|
|
||||
UploadMediaAsset.new(upload: self, source_url: image_url, page_url: page_url, media_asset: nil)
|
||||
end
|
||||
process_source_upload!
|
||||
else
|
||||
raise Error, "No file or source given" # Should never happen
|
||||
end
|
||||
rescue Exception => e
|
||||
update!(status: "error", error: e.message)
|
||||
end
|
||||
|
||||
def process_source_upload!
|
||||
page_url = source_extractor.page_url
|
||||
image_urls = source_extractor.image_urls
|
||||
|
||||
if image_urls.empty?
|
||||
raise Error, "#{source} doesn't contain any images"
|
||||
end
|
||||
|
||||
upload_media_assets = image_urls.map do |image_url|
|
||||
UploadMediaAsset.new(upload: self, source_url: image_url, page_url: page_url, media_asset: nil)
|
||||
end
|
||||
|
||||
transaction do
|
||||
update!(media_asset_count: upload_media_assets.size)
|
||||
upload_media_assets.each(&:save!)
|
||||
end
|
||||
rescue Exception => e
|
||||
update!(status: "error", error: e.message)
|
||||
end
|
||||
|
||||
def process_file_upload!
|
||||
tmpdirs = []
|
||||
|
||||
upload_media_assets = uploaded_files.flat_map do |file, original_filename|
|
||||
if file.is_a?(Danbooru::Archive)
|
||||
tmpdir, filenames = file.extract!
|
||||
tmpdirs << tmpdir
|
||||
|
||||
filenames.map do |filename|
|
||||
name = "file://#{original_filename}/#{Pathname.new(filename).relative_path_from(tmpdir)}" # "file://foo.zip/foo/1.jpg"
|
||||
UploadMediaAsset.new(upload: self, file: filename, source_url: name)
|
||||
end
|
||||
else
|
||||
UploadMediaAsset.new(upload: self, file: file, source_url: "file://#{original_filename}")
|
||||
end
|
||||
end
|
||||
|
||||
transaction do
|
||||
update!(media_asset_count: upload_media_assets.size)
|
||||
upload_media_assets.each(&:save!)
|
||||
end
|
||||
ensure
|
||||
tmpdirs.each { |tmpdir| FileUtils.rm_rf(tmpdir) }
|
||||
end
|
||||
|
||||
# The list of files uploaded from disk, with their filenames.
|
||||
def uploaded_files
|
||||
files.map do |_index, file|
|
||||
if FileTypeDetector.new(file.tempfile).file_ext.in?(ARCHIVE_FILE_TYPES)
|
||||
[Danbooru::Archive.open!(file.tempfile), file.original_filename]
|
||||
else
|
||||
[MediaFile.open(file.tempfile), file.original_filename]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# The list of archive files uploaded from disk, with their filenames.
|
||||
def archive_files
|
||||
uploaded_files.select do |file, original_filename|
|
||||
file.is_a?(Danbooru::Archive)
|
||||
end
|
||||
end
|
||||
|
||||
def source_extractor
|
||||
@@ -167,5 +243,5 @@ class Upload < ApplicationRecord
|
||||
[:uploader, :upload_media_assets, :media_assets, :posts]
|
||||
end
|
||||
|
||||
memoize :source_extractor
|
||||
memoize :source_extractor, :archive_files, :uploaded_files
|
||||
end
|
||||
|
||||
BIN
test/files/archive/42.zip
Normal file
BIN
test/files/archive/42.zip
Normal file
Binary file not shown.
BIN
test/files/archive/absolute-path.7z
Normal file
BIN
test/files/archive/absolute-path.7z
Normal file
Binary file not shown.
BIN
test/files/archive/bomb-1-1G.rar
Normal file
BIN
test/files/archive/bomb-1-1G.rar
Normal file
Binary file not shown.
BIN
test/files/archive/bomb-100-10M.rar
Normal file
BIN
test/files/archive/bomb-100-10M.rar
Normal file
Binary file not shown.
BIN
test/files/archive/bomb-10k-files.7z
Normal file
BIN
test/files/archive/bomb-10k-files.7z
Normal file
Binary file not shown.
BIN
test/files/archive/symlink.zip
Normal file
BIN
test/files/archive/symlink.zip
Normal file
Binary file not shown.
BIN
test/files/archive/ugoira.tar
Normal file
BIN
test/files/archive/ugoira.tar
Normal file
Binary file not shown.
Binary file not shown.
BIN
test/files/archive/ugoira.tar.gz
Normal file
BIN
test/files/archive/ugoira.tar.gz
Normal file
Binary file not shown.
BIN
test/files/archive/zip-slip.zip
Normal file
BIN
test/files/archive/zip-slip.zip
Normal file
Binary file not shown.
@@ -134,7 +134,7 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest
|
||||
|
||||
should "fail if given both a file and source" do
|
||||
assert_no_difference("Upload.count") do
|
||||
file = File.open("test/files/test.jpg")
|
||||
file = Rack::Test::UploadedFile.new("test/files/test.jpg")
|
||||
source = "https://files.catbox.moe/om3tcw.webm"
|
||||
post_auth uploads_path(format: :json), @user, params: { upload: { files: { "0" => file }, source: source }}
|
||||
end
|
||||
@@ -296,6 +296,48 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest
|
||||
end
|
||||
end
|
||||
|
||||
context "for an unsupported archive type" do
|
||||
should "fail for a .tar file" do
|
||||
create_upload!("test/files/archive/ugoira.tar", user: @user)
|
||||
assert_match("File is not an image or video", Upload.last.error)
|
||||
end
|
||||
|
||||
should "fail for a .tar.gz file" do
|
||||
create_upload!("test/files/archive/ugoira.tar.gz", user: @user)
|
||||
assert_match("File is not an image or video", Upload.last.error)
|
||||
end
|
||||
|
||||
should "fail for an archive containing more than 100 files" do
|
||||
create_upload!("test/files/archive/bomb-10k-files.7z", user: @user)
|
||||
assert_response 422
|
||||
assert_match("'bomb-10k-files.7z' contains too many files (max 100 files per upload)", response.parsed_body.dig("errors", "base", 0))
|
||||
end
|
||||
|
||||
should "fail for a decompression bomb" do
|
||||
create_upload!("test/files/archive/bomb-1-1G.rar", user: @user)
|
||||
assert_response 422
|
||||
assert_match("'bomb-1-1G.rar' is too large (uncompressed size: 1,000 MB; max size: 100 MB)", response.parsed_body.dig("errors", "base", 0))
|
||||
end
|
||||
|
||||
should "fail for an archive containing absolute paths" do
|
||||
create_upload!("test/files/archive/absolute-path.7z", user: @user)
|
||||
assert_response 422
|
||||
assert_match("'/tmp/foo/foo.txt' in 'absolute-path.7z' can't start with '/'", response.parsed_body.dig("errors", "base", 0))
|
||||
end
|
||||
|
||||
should "fail for an archive containing '..' paths" do
|
||||
create_upload!("test/files/archive/zip-slip.zip", user: @user)
|
||||
assert_response 422
|
||||
assert_match(/'.*' in 'zip-slip\.zip' can't contain '\.\.' components/, response.parsed_body.dig("errors", "base", 0))
|
||||
end
|
||||
|
||||
should "fail for an archive containing symlinks" do
|
||||
create_upload!("test/files/archive/symlink.zip", user: @user)
|
||||
assert_response 422
|
||||
assert_match("'passwd' in 'symlink.zip' isn't a regular file", response.parsed_body.dig("errors", "base", 0))
|
||||
end
|
||||
end
|
||||
|
||||
context "when re-uploading a media asset stuck in the 'processing' state" do
|
||||
should "mark the asset as failed" do
|
||||
asset = create(:media_asset, file: File.open("test/files/test.jpg"), status: "processing")
|
||||
@@ -395,6 +437,36 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest
|
||||
should_upload_successfully("test/files/webp/lossy_alpha1.webp")
|
||||
end
|
||||
|
||||
context "uploading a .zip file from your computer" do
|
||||
should "work" do
|
||||
upload = assert_successful_upload("test/files/archive/ugoira.zip", user: @user)
|
||||
|
||||
assert_equal(5, upload.media_asset_count)
|
||||
assert_equal(5, upload.upload_media_assets.size)
|
||||
assert_equal("file://ugoira.zip/000000.jpg", upload.upload_media_assets[0].source_url)
|
||||
end
|
||||
end
|
||||
|
||||
context "uploading a .rar file from your computer" do
|
||||
should "work" do
|
||||
upload = assert_successful_upload("test/files/archive/ugoira.rar", user: @user)
|
||||
|
||||
assert_equal(5, upload.media_asset_count)
|
||||
assert_equal(5, upload.upload_media_assets.size)
|
||||
assert_equal("file://ugoira.rar/000000.jpg", upload.upload_media_assets[0].source_url)
|
||||
end
|
||||
end
|
||||
|
||||
context "uploading a .7z file from your computer" do
|
||||
should "work" do
|
||||
upload = assert_successful_upload("test/files/archive/ugoira.7z", user: @user)
|
||||
|
||||
assert_equal(5, upload.media_asset_count)
|
||||
assert_equal(5, upload.upload_media_assets.size)
|
||||
assert_equal("file://ugoira.7z/000000.jpg", upload.upload_media_assets[0].source_url)
|
||||
end
|
||||
end
|
||||
|
||||
context "uploading multiple files from your computer" do
|
||||
should "work" do
|
||||
files = {
|
||||
|
||||
@@ -117,7 +117,8 @@ class DanbooruArchiveTest < ActiveSupport::TestCase
|
||||
assert_equal("ZIP 2.0 (uncompressed)", Danbooru::Archive.open("test/files/archive/ugoira.zip").format)
|
||||
assert_equal("RAR5", Danbooru::Archive.open("test/files/archive/ugoira.rar").format)
|
||||
assert_equal("7-Zip", Danbooru::Archive.open("test/files/archive/ugoira.7z").format)
|
||||
assert_equal("7-Zip", Danbooru::Archive.open("test/files/archive/ugoira.tar.7z").format)
|
||||
assert_equal("GNU tar format", Danbooru::Archive.open("test/files/archive/ugoira.tar").format)
|
||||
assert_equal("GNU tar format", Danbooru::Archive.open("test/files/archive/ugoira.tar.gz").format)
|
||||
end
|
||||
end
|
||||
|
||||
@@ -126,7 +127,8 @@ class DanbooruArchiveTest < ActiveSupport::TestCase
|
||||
assert_equal(:zip, Danbooru::Archive.open("test/files/archive/ugoira.zip").file_ext)
|
||||
assert_equal(:rar, Danbooru::Archive.open("test/files/archive/ugoira.rar").file_ext)
|
||||
assert_equal(:"7z", Danbooru::Archive.open("test/files/archive/ugoira.7z").file_ext)
|
||||
assert_equal(:"7z", Danbooru::Archive.open("test/files/archive/ugoira.tar.7z").file_ext)
|
||||
assert_equal(:bin, Danbooru::Archive.open("test/files/archive/ugoira.tar").file_ext)
|
||||
assert_equal(:bin, Danbooru::Archive.open("test/files/archive/ugoira.tar.gz").file_ext)
|
||||
end
|
||||
end
|
||||
|
||||
@@ -139,5 +141,40 @@ class DanbooruArchiveTest < ActiveSupport::TestCase
|
||||
assert_match(/^-rw-rw-r-- *0 0 *1639 2014-10-05 23:31:06 000000\.jpg$/, output.tap(&:rewind).read)
|
||||
end
|
||||
end
|
||||
|
||||
should "detect directory traversal attacks" do
|
||||
archive = Danbooru::Archive.open!("test/files/archive/zip-slip.zip")
|
||||
|
||||
assert_equal(true, archive.entries.any? { |e| e.directory_traversal? })
|
||||
assert_raises(Danbooru::Archive::Error) { archive.extract! }
|
||||
end
|
||||
|
||||
should "detect symlinks" do
|
||||
archive = Danbooru::Archive.open!("test/files/archive/symlink.zip")
|
||||
|
||||
assert_equal(true, archive.entries.any? { |e| !e.file? && !e.directory? })
|
||||
assert_raises(Danbooru::Archive::Error) { archive.extract! }
|
||||
end
|
||||
|
||||
should "detect absolute paths" do
|
||||
archive = Danbooru::Archive.open!("test/files/archive/absolute-path.7z")
|
||||
|
||||
assert_equal(true, archive.entries.any? { |e| e.pathname.starts_with?("/") })
|
||||
assert_raises(Danbooru::Archive::Error) { archive.extract! }
|
||||
end
|
||||
|
||||
should "detect archives with large numbers of files" do
|
||||
archive = Danbooru::Archive.open!("test/files/archive/bomb-10k-files.7z")
|
||||
|
||||
assert_equal(true, archive.exists? { |_, count| count > 100 })
|
||||
assert_equal(10_000, archive.file_count)
|
||||
end
|
||||
|
||||
should "detect decompression bombs" do
|
||||
archive = Danbooru::Archive.open!("test/files/archive/bomb-1-1G.rar")
|
||||
|
||||
assert_equal(1, archive.file_count)
|
||||
assert_equal(1_048_576_000, archive.uncompressed_size)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user