diff --git a/app/logical/danbooru/archive.rb b/app/logical/danbooru/archive.rb index 2f33f9ecb..ec039b7f9 100644 --- a/app/logical/danbooru/archive.rb +++ b/app/logical/danbooru/archive.rb @@ -107,10 +107,15 @@ module Danbooru ensure archive&.close end - alias_method :entries, :each_entry - # Extract the files in the archive to a directory. Subdirectories inside the archive are ignored; all files are - # extracted to a single top-level directory. + # XXX You can't call `extract!` on these entries because libarchive doesn't let you extract an entry after you iterate past it. + # + # @return [Array] The list of entries in the archive. + def entries + @entries ||= each_entry.to_a + end + + # Extract the files in the archive to a directory. # # If a block is given, extract the archive to a temp directory and delete the directory after the block finishes. # Otherwise, extract to a temp directory and return the directory. The caller should delete the directory afterwards. @@ -135,15 +140,20 @@ module Danbooru # Extract the archive to a directory. See `extract!` for details. def extract_to!(directory, flags: DEFAULT_FLAGS) - entries.map do |entry| + each_entry.map do |entry| raise Danbooru::Archive::Error, "Can't extract archive containing absolute path (path: '#{entry.pathname_utf8}')" if entry.pathname_utf8.starts_with?("/") raise Danbooru::Archive::Error, "'#{entry.pathname_utf8}' is not a regular file" if !entry.file? - path = "#{directory}/#{entry.pathname_utf8.tr("/", "_")}" + path = "#{directory}/#{entry.pathname_utf8}" entry.extract!(path, flags: flags) end end + # @return [Integer] The number of files in the archive. + def file_count + @file_count ||= entries.count + end + # @return [Integer] The total decompressed size of all files in the archive. def uncompressed_size @uncompressed_size ||= entries.sum(&:size) @@ -151,7 +161,7 @@ module Danbooru # @return [Boolean] True if any entry in the archive satisfies the condition; otherwise false. def exists?(&block) - entries.with_index { |entry, index| return true if yield entry, index + 1 } + each_entry.with_index { |entry, index| return true if yield entry, index + 1 } false end @@ -162,7 +172,7 @@ module Danbooru # @return [String] The archive format as returned by libarchive ("RAR", "ZIP", etc). def format - @format ||= entries.lazy.map(&:format).first + @format ||= each_entry.lazy.map(&:format).first end # Print the archive contents in `ls -l` format. @@ -205,9 +215,15 @@ module Danbooru entry.pathname_utf8 end - # @return [String] The pathname encoded as UTF-8 instead of ASCII-8BIT. May be wrong if the original pathname wasn't UTF-8. + # @see https://security.snyk.io/research/zip-slip-vulnerability + # @return [Boolean] True if the pathname contains any ".." components, e.g. "../../../../../etc/passwd" + def directory_traversal? + entry.pathname.split("/").compact_blank.grep("..").any? + end + + # @return [String, nil] The pathname encoded as UTF-8 instead of ASCII-8BIT. May be wrong if the original pathname wasn't UTF-8. def pathname_utf8 - pathname.encode("UTF-8", invalid: :replace, undef: :replace, replace: "?") + pathname&.encode("UTF-8", invalid: :replace, undef: :replace, replace: "?") end # @return [String] The archive entry format ("RAR", "ZIP", etc). diff --git a/app/models/upload.rb b/app/models/upload.rb index f59fa1224..876f22a7b 100644 --- a/app/models/upload.rb +++ b/app/models/upload.rb @@ -4,6 +4,10 @@ class Upload < ApplicationRecord extend Memoist class Error < StandardError; end + # The list of allowed archive file types. + ARCHIVE_FILE_TYPES = %i[zip rar 7z] + + # The maximum number of files allowed per upload. MAX_FILES_PER_UPLOAD = 100 # The maximum number of 'pending' or 'processing' media assets a single user can have at once. @@ -18,11 +22,11 @@ class Upload < ApplicationRecord normalize :source, :normalize_source - validates :files, length: { maximum: MAX_FILES_PER_UPLOAD, message: "can't have more than #{MAX_FILES_PER_UPLOAD} files per upload" } validates :source, format: { with: %r{\Ahttps?://}i, message: "is not a valid URL" }, if: -> { source.present? } validates :referer_url, format: { with: %r{\Ahttps?://}i, message: "is not a valid URL" }, if: -> { referer_url.present? } validate :validate_file_and_source, on: :create - validate :uploader_is_not_limited, on: :create + validate :validate_archive_files, on: :create + validate :validate_uploader_is_not_limited, on: :create after_create :async_process_upload! @@ -69,13 +73,40 @@ class Upload < ApplicationRecord end end - def uploader_is_not_limited + def validate_uploader_is_not_limited queued_asset_count = uploader.upload_media_assets.unfinished.count if queued_asset_count > MAX_QUEUED_ASSETS errors.add(:base, "You have too many images queued for upload (queued: #{queued_asset_count}; limit: #{MAX_QUEUED_ASSETS}). Try again later.") end end + + def validate_archive_files + return unless files.present? + + archive_files.each do |archive, filename| + if !archive.file_ext.in?(ARCHIVE_FILE_TYPES) + errors.add(:base, "'#{filename}' is not a supported file type") + elsif archive.exists? { |_, count| count > MAX_FILES_PER_UPLOAD } + # XXX Potential zip bomb containing thousands of files; don't process it any further. + errors.add(:base, "'#{filename}' contains too many files (max #{MAX_FILES_PER_UPLOAD} files per upload)") + next + elsif archive.uncompressed_size > MediaAsset::MAX_FILE_SIZE + errors.add(:base, "'#{filename}' is too large (uncompressed size: #{archive.uncompressed_size.to_fs(:human_size)}; max size: #{MediaAsset::MAX_FILE_SIZE.to_fs(:human_size)})") + elsif entry = archive.entries.find { |entry| entry.pathname.starts_with?("/") } + errors.add(:base, "'#{entry.pathname_utf8}' in '#{filename}' can't start with '/'") + elsif entry = archive.entries.find { |entry| entry.directory_traversal? } + errors.add(:base, "'#{entry.pathname_utf8}' in '#{filename}' can't contain '..' components") + elsif entry = archive.entries.find { |entry| !entry.file? && !entry.directory? } + errors.add(:base, "'#{entry.pathname_utf8}' in '#{filename}' isn't a regular file") + end + end + + total_files = archive_files.map(&:first).sum(&:file_count) + (files.size - archive_files.size) + if total_files > MAX_FILES_PER_UPLOAD + errors.add(:base, "Can't upload more than #{MAX_FILES_PER_UPLOAD} files at a time (total: #{total_files})") + end + end end concerning :SourceMethods do @@ -132,30 +163,75 @@ class Upload < ApplicationRecord update!(status: "processing") if files.present? - upload_media_assets = files.map do |_index, file| - UploadMediaAsset.new(upload: self, file: file.tempfile, source_url: "file://#{file.original_filename}") - end + process_file_upload! elsif source.present? - page_url = source_extractor.page_url - image_urls = source_extractor.image_urls - - if image_urls.empty? - raise Error, "#{source} doesn't contain any images" - end - - upload_media_assets = image_urls.map do |image_url| - UploadMediaAsset.new(upload: self, source_url: image_url, page_url: page_url, media_asset: nil) - end + process_source_upload! else raise Error, "No file or source given" # Should never happen end + rescue Exception => e + update!(status: "error", error: e.message) + end + + def process_source_upload! + page_url = source_extractor.page_url + image_urls = source_extractor.image_urls + + if image_urls.empty? + raise Error, "#{source} doesn't contain any images" + end + + upload_media_assets = image_urls.map do |image_url| + UploadMediaAsset.new(upload: self, source_url: image_url, page_url: page_url, media_asset: nil) + end transaction do update!(media_asset_count: upload_media_assets.size) upload_media_assets.each(&:save!) end - rescue Exception => e - update!(status: "error", error: e.message) + end + + def process_file_upload! + tmpdirs = [] + + upload_media_assets = uploaded_files.flat_map do |file, original_filename| + if file.is_a?(Danbooru::Archive) + tmpdir, filenames = file.extract! + tmpdirs << tmpdir + + filenames.map do |filename| + name = "file://#{original_filename}/#{Pathname.new(filename).relative_path_from(tmpdir)}" # "file://foo.zip/foo/1.jpg" + UploadMediaAsset.new(upload: self, file: filename, source_url: name) + end + else + UploadMediaAsset.new(upload: self, file: file, source_url: "file://#{original_filename}") + end + end + + transaction do + update!(media_asset_count: upload_media_assets.size) + upload_media_assets.each(&:save!) + end + ensure + tmpdirs.each { |tmpdir| FileUtils.rm_rf(tmpdir) } + end + + # The list of files uploaded from disk, with their filenames. + def uploaded_files + files.map do |_index, file| + if FileTypeDetector.new(file.tempfile).file_ext.in?(ARCHIVE_FILE_TYPES) + [Danbooru::Archive.open!(file.tempfile), file.original_filename] + else + [MediaFile.open(file.tempfile), file.original_filename] + end + end + end + + # The list of archive files uploaded from disk, with their filenames. + def archive_files + uploaded_files.select do |file, original_filename| + file.is_a?(Danbooru::Archive) + end end def source_extractor @@ -167,5 +243,5 @@ class Upload < ApplicationRecord [:uploader, :upload_media_assets, :media_assets, :posts] end - memoize :source_extractor + memoize :source_extractor, :archive_files, :uploaded_files end diff --git a/test/files/archive/42.zip b/test/files/archive/42.zip new file mode 100644 index 000000000..e7681537c Binary files /dev/null and b/test/files/archive/42.zip differ diff --git a/test/files/archive/absolute-path.7z b/test/files/archive/absolute-path.7z new file mode 100644 index 000000000..d4c92ac71 Binary files /dev/null and b/test/files/archive/absolute-path.7z differ diff --git a/test/files/archive/bomb-1-1G.rar b/test/files/archive/bomb-1-1G.rar new file mode 100644 index 000000000..587beec16 Binary files /dev/null and b/test/files/archive/bomb-1-1G.rar differ diff --git a/test/files/archive/bomb-100-10M.rar b/test/files/archive/bomb-100-10M.rar new file mode 100644 index 000000000..ffcdac450 Binary files /dev/null and b/test/files/archive/bomb-100-10M.rar differ diff --git a/test/files/archive/bomb-10k-files.7z b/test/files/archive/bomb-10k-files.7z new file mode 100644 index 000000000..58bc0b017 Binary files /dev/null and b/test/files/archive/bomb-10k-files.7z differ diff --git a/test/files/archive/symlink.zip b/test/files/archive/symlink.zip new file mode 100644 index 000000000..0cea07dd8 Binary files /dev/null and b/test/files/archive/symlink.zip differ diff --git a/test/files/archive/ugoira.tar b/test/files/archive/ugoira.tar new file mode 100644 index 000000000..a49f64cb5 Binary files /dev/null and b/test/files/archive/ugoira.tar differ diff --git a/test/files/archive/ugoira.tar.7z b/test/files/archive/ugoira.tar.7z deleted file mode 100644 index e6c7033b6..000000000 Binary files a/test/files/archive/ugoira.tar.7z and /dev/null differ diff --git a/test/files/archive/ugoira.tar.gz b/test/files/archive/ugoira.tar.gz new file mode 100644 index 000000000..3fa92ba6a Binary files /dev/null and b/test/files/archive/ugoira.tar.gz differ diff --git a/test/files/archive/zip-slip.zip b/test/files/archive/zip-slip.zip new file mode 100644 index 000000000..38b3f499d Binary files /dev/null and b/test/files/archive/zip-slip.zip differ diff --git a/test/functional/uploads_controller_test.rb b/test/functional/uploads_controller_test.rb index 0b7af84be..003d312da 100644 --- a/test/functional/uploads_controller_test.rb +++ b/test/functional/uploads_controller_test.rb @@ -134,7 +134,7 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest should "fail if given both a file and source" do assert_no_difference("Upload.count") do - file = File.open("test/files/test.jpg") + file = Rack::Test::UploadedFile.new("test/files/test.jpg") source = "https://files.catbox.moe/om3tcw.webm" post_auth uploads_path(format: :json), @user, params: { upload: { files: { "0" => file }, source: source }} end @@ -296,6 +296,48 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest end end + context "for an unsupported archive type" do + should "fail for a .tar file" do + create_upload!("test/files/archive/ugoira.tar", user: @user) + assert_match("File is not an image or video", Upload.last.error) + end + + should "fail for a .tar.gz file" do + create_upload!("test/files/archive/ugoira.tar.gz", user: @user) + assert_match("File is not an image or video", Upload.last.error) + end + + should "fail for an archive containing more than 100 files" do + create_upload!("test/files/archive/bomb-10k-files.7z", user: @user) + assert_response 422 + assert_match("'bomb-10k-files.7z' contains too many files (max 100 files per upload)", response.parsed_body.dig("errors", "base", 0)) + end + + should "fail for a decompression bomb" do + create_upload!("test/files/archive/bomb-1-1G.rar", user: @user) + assert_response 422 + assert_match("'bomb-1-1G.rar' is too large (uncompressed size: 1,000 MB; max size: 100 MB)", response.parsed_body.dig("errors", "base", 0)) + end + + should "fail for an archive containing absolute paths" do + create_upload!("test/files/archive/absolute-path.7z", user: @user) + assert_response 422 + assert_match("'/tmp/foo/foo.txt' in 'absolute-path.7z' can't start with '/'", response.parsed_body.dig("errors", "base", 0)) + end + + should "fail for an archive containing '..' paths" do + create_upload!("test/files/archive/zip-slip.zip", user: @user) + assert_response 422 + assert_match(/'.*' in 'zip-slip\.zip' can't contain '\.\.' components/, response.parsed_body.dig("errors", "base", 0)) + end + + should "fail for an archive containing symlinks" do + create_upload!("test/files/archive/symlink.zip", user: @user) + assert_response 422 + assert_match("'passwd' in 'symlink.zip' isn't a regular file", response.parsed_body.dig("errors", "base", 0)) + end + end + context "when re-uploading a media asset stuck in the 'processing' state" do should "mark the asset as failed" do asset = create(:media_asset, file: File.open("test/files/test.jpg"), status: "processing") @@ -395,6 +437,36 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest should_upload_successfully("test/files/webp/lossy_alpha1.webp") end + context "uploading a .zip file from your computer" do + should "work" do + upload = assert_successful_upload("test/files/archive/ugoira.zip", user: @user) + + assert_equal(5, upload.media_asset_count) + assert_equal(5, upload.upload_media_assets.size) + assert_equal("file://ugoira.zip/000000.jpg", upload.upload_media_assets[0].source_url) + end + end + + context "uploading a .rar file from your computer" do + should "work" do + upload = assert_successful_upload("test/files/archive/ugoira.rar", user: @user) + + assert_equal(5, upload.media_asset_count) + assert_equal(5, upload.upload_media_assets.size) + assert_equal("file://ugoira.rar/000000.jpg", upload.upload_media_assets[0].source_url) + end + end + + context "uploading a .7z file from your computer" do + should "work" do + upload = assert_successful_upload("test/files/archive/ugoira.7z", user: @user) + + assert_equal(5, upload.media_asset_count) + assert_equal(5, upload.upload_media_assets.size) + assert_equal("file://ugoira.7z/000000.jpg", upload.upload_media_assets[0].source_url) + end + end + context "uploading multiple files from your computer" do should "work" do files = { diff --git a/test/unit/danbooru_archive_test.rb b/test/unit/danbooru_archive_test.rb index a86fd1c8a..318a6be9a 100644 --- a/test/unit/danbooru_archive_test.rb +++ b/test/unit/danbooru_archive_test.rb @@ -117,7 +117,8 @@ class DanbooruArchiveTest < ActiveSupport::TestCase assert_equal("ZIP 2.0 (uncompressed)", Danbooru::Archive.open("test/files/archive/ugoira.zip").format) assert_equal("RAR5", Danbooru::Archive.open("test/files/archive/ugoira.rar").format) assert_equal("7-Zip", Danbooru::Archive.open("test/files/archive/ugoira.7z").format) - assert_equal("7-Zip", Danbooru::Archive.open("test/files/archive/ugoira.tar.7z").format) + assert_equal("GNU tar format", Danbooru::Archive.open("test/files/archive/ugoira.tar").format) + assert_equal("GNU tar format", Danbooru::Archive.open("test/files/archive/ugoira.tar.gz").format) end end @@ -126,7 +127,8 @@ class DanbooruArchiveTest < ActiveSupport::TestCase assert_equal(:zip, Danbooru::Archive.open("test/files/archive/ugoira.zip").file_ext) assert_equal(:rar, Danbooru::Archive.open("test/files/archive/ugoira.rar").file_ext) assert_equal(:"7z", Danbooru::Archive.open("test/files/archive/ugoira.7z").file_ext) - assert_equal(:"7z", Danbooru::Archive.open("test/files/archive/ugoira.tar.7z").file_ext) + assert_equal(:bin, Danbooru::Archive.open("test/files/archive/ugoira.tar").file_ext) + assert_equal(:bin, Danbooru::Archive.open("test/files/archive/ugoira.tar.gz").file_ext) end end @@ -139,5 +141,40 @@ class DanbooruArchiveTest < ActiveSupport::TestCase assert_match(/^-rw-rw-r-- *0 0 *1639 2014-10-05 23:31:06 000000\.jpg$/, output.tap(&:rewind).read) end end + + should "detect directory traversal attacks" do + archive = Danbooru::Archive.open!("test/files/archive/zip-slip.zip") + + assert_equal(true, archive.entries.any? { |e| e.directory_traversal? }) + assert_raises(Danbooru::Archive::Error) { archive.extract! } + end + + should "detect symlinks" do + archive = Danbooru::Archive.open!("test/files/archive/symlink.zip") + + assert_equal(true, archive.entries.any? { |e| !e.file? && !e.directory? }) + assert_raises(Danbooru::Archive::Error) { archive.extract! } + end + + should "detect absolute paths" do + archive = Danbooru::Archive.open!("test/files/archive/absolute-path.7z") + + assert_equal(true, archive.entries.any? { |e| e.pathname.starts_with?("/") }) + assert_raises(Danbooru::Archive::Error) { archive.extract! } + end + + should "detect archives with large numbers of files" do + archive = Danbooru::Archive.open!("test/files/archive/bomb-10k-files.7z") + + assert_equal(true, archive.exists? { |_, count| count > 100 }) + assert_equal(10_000, archive.file_count) + end + + should "detect decompression bombs" do + archive = Danbooru::Archive.open!("test/files/archive/bomb-1-1G.rar") + + assert_equal(1, archive.file_count) + assert_equal(1_048_576_000, archive.uncompressed_size) + end end end