Add Danbooru::Archive library for handling .zip and .rar files.

Introduce a new Danbooru::Archive library. This is a wrapper around libarchive that lets us extract
.zip, .rar, .7z, and other archive formats. Replace the rubyzip library in MediaFile::Ugoira with
the new Danbooru::Archive library.

This is a step towards fixing #5340: Add support for extracting archive attachments from certain sources.

This adds a new dependency on libarchive. Downstream users should `apt-get install libarchive13` if
they're not using Docker.

https://github.com/chef/ffi-libarchive
https://github.com/libarchive/libarchive
https://www.rubydoc.info/gems/ffi-libarchive/0.4.2
https://github.com/libarchive/libarchive/wiki/Examples#a-complete-extractor
This commit is contained in:
evazion
2022-11-14 13:36:09 -06:00
parent f942768ce8
commit 0c1e9a1618
6 changed files with 361 additions and 27 deletions

View File

@@ -0,0 +1,236 @@
# frozen_string_literal: true
# Danbooru::Archive is a utility class representing a .zip, .rar, or .7z archive file. This is a wrapper around
# libarchive that adds some utility methods for extracting an archive safely.
#
# @example
# Danbooru::Archive.extract!("foo.zip") do |dir, filenames|
# puts dir, filenames
# end
#
# @see https://github.com/chef/ffi-libarchive
# @see https://www.rubydoc.info/gems/ffi-libarchive/0.4.2
# @see https://github.com/libarchive/libarchive/wiki/ManualPages
module Archive
module C
# XXX Monkey patch ffi-libarchive to add some functions we need.
# https://www.freebsd.org/cgi/man.cgi?query=archive_util&sektion=3&format=html
attach_function_maybe :archive_format_name, [:pointer], :string
attach_function_maybe :archive_filter_name, [:pointer, :int], :string
attach_function_maybe :archive_filter_count, [:pointer], :int
end
end
module Danbooru
class Archive
class Error < StandardError; end
# Default flags when extracting files.
# @see https://www.freebsd.org/cgi/man.cgi?query=archive_write_disk&sektion=3&format=html
DEFAULT_FLAGS =
::Archive::EXTRACT_NO_OVERWRITE |
#::Archive::EXTRACT_SECURE_NOABSOLUTEPATHS |
::Archive::EXTRACT_SECURE_SYMLINKS |
::Archive::EXTRACT_SECURE_NODOTDOT
attr_reader :file
# Open an archive, or raise an error if the archive can't be opened. If given a block, pass the archive to the block
# and close the archive after the block finishes.
#
# @param filelike [String, File] The filename of the archive, or an open archive file.
# @yieldparam [Danbooru::Archive] The archive.
# @return [Danbooru::Archive] The archive.
def self.open!(filelike, &block)
file = filelike.is_a?(File) ? filelike : Kernel.open(filelike, binmode: true)
archive = new(file)
if block_given?
begin
yield archive
ensure
archive.close
end
else
archive
end
rescue => error
archive&.close
raise Error, error
end
# Open an archive, or return nil if the archive can't be opened. See `#open!` for details.
def self.open(filelike, &block)
open!(filelike, &block)
rescue Error
nil
end
# Extract the archive to the given directory. If a block is given, extract the archive to a temp directory and
# delete the directory afterwards. The block is given the name of the directory and the list of files.
#
# @param filelike [String, File] The filename of the archive, or an open archive file.
# @param directory [String] The directory to extract the files to. By default, this is a temp directory the caller must clean up.
# @yieldparam [String, Array<String>] The path to the temp directory, and the list of extracted files in the directory.
# @return [(String, Array<String>)] The path to the directory, and the list of extracted files in the directory.
def self.extract!(filelike, directory = nil, flags: DEFAULT_FLAGS, &block)
open!(filelike) do |archive|
archive.extract!(directory, flags: flags, &block)
end
end
# @param file [File] The archive file.
def initialize(file)
@file = file
end
def close
# no-op
end
# Iterate across each entry (file) in the archive.
#
# @return [Enumerator, Danbooru:Archive] If given a block, call the block on each entry and return the archive
# itself. If not given a block, return an Enumerator.
def each_entry(&block)
return enum_for(:each_entry) unless block_given?
# XXX We have to re-open the archive on every call because libarchive is designed for streaming and doesn't
# support iterating across the archive multiple times.
archive = ::Archive::Reader.open_filename(file.path)
while (entry = archive.next_header(clone_entry: true))
yield Entry.new(archive, entry)
end
self
ensure
archive&.close
end
alias_method :entries, :each_entry
# Extract the files in the archive to a directory. Subdirectories inside the archive are ignored; all files are
# extracted to a single top-level directory.
#
# If a block is given, extract the archive to a temp directory and delete the directory after the block finishes.
# Otherwise, extract to a temp directory and return the directory. The caller should delete the directory afterwards.
#
# @param directory [String] The directory to extract the files to. By default, this is a temp directory the caller must clean up.
# @yieldparam [String, Array<String>] The name of the temp directory, and the list of files in the directory.
# @return [(String, Array<String>)] The path to the directory, and the list of extracted files.
def extract!(directory = nil, flags: DEFAULT_FLAGS, &block)
raise ArgumentError, "can't pass directory and block at the same time" if block_given? && directory.present?
if block_given?
Dir.mktmpdir(["danbooru-archive-", "-" + File.basename(file.path)]) do |dir|
filenames = extract_to!(dir, flags: flags)
yield dir, filenames
end
else
dir = directory.presence || Dir.mktmpdir(["danbooru-archive-", "-" + File.basename(file.path)])
filenames = extract_to!(dir, flags: flags)
[dir, filenames]
end
end
# Extract the archive to a directory. See `extract!` for details.
def extract_to!(directory, flags: DEFAULT_FLAGS)
entries.map do |entry|
raise Danbooru::Archive::Error, "Can't extract archive containing absolute path (path: '#{entry.pathname_utf8}')" if entry.pathname_utf8.starts_with?("/")
raise Danbooru::Archive::Error, "'#{entry.pathname_utf8}' is not a regular file" if !entry.file?
path = "#{directory}/#{entry.pathname_utf8.tr("/", "_")}"
entry.extract!(path, flags: flags)
end
end
# @return [Integer] The total decompressed size of all files in the archive.
def uncompressed_size
@uncompressed_size ||= entries.sum(&:size)
end
# @return [Boolean] True if any entry in the archive satisfies the condition; otherwise false.
def exists?(&block)
entries.with_index { |entry, index| return true if yield entry, index + 1 }
false
end
# @return [String] The archive format ("RAR", "ZIP", etc).
def format
@format ||= entries.lazy.map(&:format).first
end
# Print the archive contents in `ls -l` format.
def ls(io = STDOUT)
io.puts(entries.map(&:ls).join("\n"))
end
end
# An entry represents a single file in an archive.
class Entry
attr_reader :archive, :entry
delegate :directory?, :file?, :close, :pathname, :pathname=, :size, :strmode, :uid, :gid, :mtime, to: :entry
# @param entry [::Archive] The archive the entry belongs to.
# @param entry [::Archive::Entry] The archive entry.
def initialize(archive, entry)
@archive = archive
@entry = entry
end
# Copy the entry. Called by `dup`.
def initialize_copy(entry)
@archive = entry.archive
@entry = ::Archive::Entry.new(entry.ffi_ptr, clone: true)
end
# Extract the file to the given destination. By default, don't overwrite files, don't allow symlinks or paths
# containing '..', and don't extract file ownership, permission, or timestamp information.
#
# @param destination [String] The path to extract the file to.
# @param flags [Integer] The extraction flags.
# @return [String] The path to the extracted file.
def extract!(destination, flags: Danbooru::Archive::DEFAULT_FLAGS)
entry = dup
entry.pathname = destination
result = ::Archive::C.archive_read_extract(entry.archive_ffi_ptr, entry.ffi_ptr, flags)
raise Danbooru::Archive::Error, "Error extracting '#{entry.pathname_utf8}': #{archive.error_string}" if result != ::Archive::C::OK
entry.pathname_utf8
end
# @return [String] The pathname encoded as UTF-8 instead of ASCII-8BIT. May be wrong if the original pathname wasn't UTF-8.
def pathname_utf8
pathname.encode("UTF-8", invalid: :replace, undef: :replace, replace: "?")
end
# @return [String] The archive entry format ("RAR", "ZIP", etc).
def format
::Archive::C::archive_format_name(archive_ffi_ptr)
end
# @return [Array<String>] The list of filters for the entry.
def filters
count = ::Archive::C::archive_filter_count(archive_ffi_ptr)
count.times.map do |n|
::Archive::C::archive_filter_name(archive_ffi_ptr, n)
end
end
# @return [String] The entry in `ls -l` format.
def ls
"#{strmode} #{uid} #{gid} #{"%9d" % size} #{mtime.to_fs(:db)} #{pathname_utf8}"
end
def archive_ffi_ptr
archive.send(:archive)
end
# @return [FFI::Pointer] The pointer to the libarchive entry object.
def ffi_ptr
entry.entry
end
end
end

View File

@@ -18,7 +18,6 @@ class MediaFile::Ugoira < MediaFile
def close
file.close
zipfile.close
preview_frame.close
end
@@ -52,24 +51,15 @@ class MediaFile::Ugoira < MediaFile
raise NotImplementedError, "can't convert ugoira to webm: ffmpeg or mkvmerge not installed" unless self.class.videos_enabled?
raise RuntimeError, "can't convert ugoira to webm: no ugoira frame data was provided" unless frame_delays.present?
Dir.mktmpdir("ugoira-#{md5}") do |tmpdir|
Danbooru::Archive.extract!(file) do |tmpdir, filenames|
output_file = Tempfile.new(["ugoira-conversion", ".webm"], binmode: true)
FileUtils.mkdir_p("#{tmpdir}/images")
zipfile.each do |entry|
path = File.join(tmpdir, "images", entry.name)
entry.extract(path)
end
# Duplicate last frame to avoid it being displayed only for a very short amount of time.
last_file_name = zipfile.entries.last.name
last_file_name =~ /\A(\d{6})(\.\w{,4})\Z/
new_last_index = $1.to_i + 1
file_ext = $2
new_last_filename = ("%06d" % new_last_index) + file_ext
path_from = File.join(tmpdir, "images", last_file_name)
path_to = File.join(tmpdir, "images", new_last_filename)
last_file_name = File.basename(filenames.last)
last_index, file_ext = last_file_name.split(".")
new_last_filename = "#{"%06d" % (last_index.to_i + 1)}.#{file_ext}"
path_from = File.join(tmpdir, last_file_name)
path_to = File.join(tmpdir, new_last_filename)
FileUtils.cp(path_from, path_to)
delay_sum = 0
@@ -84,11 +74,10 @@ class MediaFile::Ugoira < MediaFile
f.write("#{delay_sum}\n")
end
ext = zipfile.first.name.match(/\.(\w{,4})$/)[1]
ffmpeg_out, status = Open3.capture2e("ffmpeg -i #{tmpdir}/images/%06d.#{ext} -codec:v libvpx-vp9 -crf 12 -b:v 0 -an -threads 8 -tile-columns 2 -tile-rows 1 -row-mt 1 -pass 1 -passlogfile #{tmpdir}/ffmpeg2pass -f null /dev/null")
ffmpeg_out, status = Open3.capture2e("ffmpeg -i #{tmpdir}/%06d.#{file_ext} -codec:v libvpx-vp9 -crf 12 -b:v 0 -an -threads 8 -tile-columns 2 -tile-rows 1 -row-mt 1 -pass 1 -passlogfile #{tmpdir}/ffmpeg2pass -f null /dev/null")
raise Error, "ffmpeg failed: #{ffmpeg_out}" unless status.success?
ffmpeg_out, status = Open3.capture2e("ffmpeg -i #{tmpdir}/images/%06d.#{ext} -codec:v libvpx-vp9 -crf 12 -b:v 0 -an -threads 8 -tile-columns 2 -tile-rows 1 -row-mt 1 -pass 2 -passlogfile #{tmpdir}/ffmpeg2pass #{tmpdir}/tmp.webm")
ffmpeg_out, status = Open3.capture2e("ffmpeg -i #{tmpdir}/%06d.#{file_ext} -codec:v libvpx-vp9 -crf 12 -b:v 0 -an -threads 8 -tile-columns 2 -tile-rows 1 -row-mt 1 -pass 2 -passlogfile #{tmpdir}/ffmpeg2pass #{tmpdir}/tmp.webm")
raise Error, "ffmpeg failed: #{ffmpeg_out}" unless status.success?
mkvmerge_out, status = Open3.capture2e("mkvmerge -o #{output_file.path} --webm --timecodes 0:#{tmpdir}/timecodes.tc #{tmpdir}/tmp.webm")
@@ -100,13 +89,9 @@ class MediaFile::Ugoira < MediaFile
private
def zipfile
Zip::File.new(file.path)
end
def preview_frame
FFmpeg.new(convert).smart_video_preview
end
memoize :zipfile, :preview_frame, :dimensions, :convert, :metadata
memoize :preview_frame, :dimensions, :convert, :metadata
end