Add Danbooru::Archive library for handling .zip and .rar files.
Introduce a new Danbooru::Archive library. This is a wrapper around libarchive that lets us extract .zip, .rar, .7z, and other archive formats. Replace the rubyzip library in MediaFile::Ugoira with the new Danbooru::Archive library. This is a step towards fixing #5340: Add support for extracting archive attachments from certain sources. This adds a new dependency on libarchive. Downstream users should `apt-get install libarchive13` if they're not using Docker. https://github.com/chef/ffi-libarchive https://github.com/libarchive/libarchive https://www.rubydoc.info/gems/ffi-libarchive/0.4.2 https://github.com/libarchive/libarchive/wiki/Examples#a-complete-extractor
This commit is contained in:
2
Gemfile
2
Gemfile
@@ -9,7 +9,6 @@ gem "sanitize"
|
||||
gem 'ruby-vips'
|
||||
gem 'diff-lcs', :require => "diff/lcs/array"
|
||||
gem 'bcrypt', :require => "bcrypt"
|
||||
gem 'rubyzip', :require => "zip"
|
||||
gem 'stripe'
|
||||
gem 'aws-sdk-sqs', '~> 1'
|
||||
gem 'responders'
|
||||
@@ -59,6 +58,7 @@ gem "ffaker"
|
||||
gem "composite_primary_keys"
|
||||
gem "resolv"
|
||||
gem "rover-df"
|
||||
gem "ffi-libarchive"
|
||||
|
||||
group :development do
|
||||
gem 'rubocop', require: false
|
||||
|
||||
@@ -187,6 +187,8 @@ GEM
|
||||
ffi-compiler (1.0.1)
|
||||
ffi (>= 1.0.0)
|
||||
rake
|
||||
ffi-libarchive (1.1.3)
|
||||
ffi (~> 1.0)
|
||||
flamegraph (0.9.5)
|
||||
fugit (1.7.1)
|
||||
et-orbi (~> 1, >= 1.2.7)
|
||||
@@ -550,6 +552,7 @@ DEPENDENCIES
|
||||
factory_bot
|
||||
ffaker
|
||||
ffi
|
||||
ffi-libarchive
|
||||
flamegraph
|
||||
good_job
|
||||
google-cloud-bigquery
|
||||
@@ -589,7 +592,6 @@ DEPENDENCIES
|
||||
rubocop
|
||||
rubocop-rails
|
||||
ruby-vips
|
||||
rubyzip
|
||||
sanitize
|
||||
scenic
|
||||
selenium-webdriver
|
||||
|
||||
236
app/logical/danbooru/archive.rb
Normal file
236
app/logical/danbooru/archive.rb
Normal file
@@ -0,0 +1,236 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# Danbooru::Archive is a utility class representing a .zip, .rar, or .7z archive file. This is a wrapper around
|
||||
# libarchive that adds some utility methods for extracting an archive safely.
|
||||
#
|
||||
# @example
|
||||
# Danbooru::Archive.extract!("foo.zip") do |dir, filenames|
|
||||
# puts dir, filenames
|
||||
# end
|
||||
#
|
||||
# @see https://github.com/chef/ffi-libarchive
|
||||
# @see https://www.rubydoc.info/gems/ffi-libarchive/0.4.2
|
||||
# @see https://github.com/libarchive/libarchive/wiki/ManualPages
|
||||
|
||||
module Archive
|
||||
module C
|
||||
# XXX Monkey patch ffi-libarchive to add some functions we need.
|
||||
# https://www.freebsd.org/cgi/man.cgi?query=archive_util&sektion=3&format=html
|
||||
attach_function_maybe :archive_format_name, [:pointer], :string
|
||||
attach_function_maybe :archive_filter_name, [:pointer, :int], :string
|
||||
attach_function_maybe :archive_filter_count, [:pointer], :int
|
||||
end
|
||||
end
|
||||
|
||||
module Danbooru
|
||||
class Archive
|
||||
class Error < StandardError; end
|
||||
|
||||
# Default flags when extracting files.
|
||||
# @see https://www.freebsd.org/cgi/man.cgi?query=archive_write_disk&sektion=3&format=html
|
||||
DEFAULT_FLAGS =
|
||||
::Archive::EXTRACT_NO_OVERWRITE |
|
||||
#::Archive::EXTRACT_SECURE_NOABSOLUTEPATHS |
|
||||
::Archive::EXTRACT_SECURE_SYMLINKS |
|
||||
::Archive::EXTRACT_SECURE_NODOTDOT
|
||||
|
||||
attr_reader :file
|
||||
|
||||
# Open an archive, or raise an error if the archive can't be opened. If given a block, pass the archive to the block
|
||||
# and close the archive after the block finishes.
|
||||
#
|
||||
# @param filelike [String, File] The filename of the archive, or an open archive file.
|
||||
# @yieldparam [Danbooru::Archive] The archive.
|
||||
# @return [Danbooru::Archive] The archive.
|
||||
def self.open!(filelike, &block)
|
||||
file = filelike.is_a?(File) ? filelike : Kernel.open(filelike, binmode: true)
|
||||
archive = new(file)
|
||||
|
||||
if block_given?
|
||||
begin
|
||||
yield archive
|
||||
ensure
|
||||
archive.close
|
||||
end
|
||||
else
|
||||
archive
|
||||
end
|
||||
rescue => error
|
||||
archive&.close
|
||||
raise Error, error
|
||||
end
|
||||
|
||||
# Open an archive, or return nil if the archive can't be opened. See `#open!` for details.
|
||||
def self.open(filelike, &block)
|
||||
open!(filelike, &block)
|
||||
rescue Error
|
||||
nil
|
||||
end
|
||||
|
||||
# Extract the archive to the given directory. If a block is given, extract the archive to a temp directory and
|
||||
# delete the directory afterwards. The block is given the name of the directory and the list of files.
|
||||
#
|
||||
# @param filelike [String, File] The filename of the archive, or an open archive file.
|
||||
# @param directory [String] The directory to extract the files to. By default, this is a temp directory the caller must clean up.
|
||||
# @yieldparam [String, Array<String>] The path to the temp directory, and the list of extracted files in the directory.
|
||||
# @return [(String, Array<String>)] The path to the directory, and the list of extracted files in the directory.
|
||||
def self.extract!(filelike, directory = nil, flags: DEFAULT_FLAGS, &block)
|
||||
open!(filelike) do |archive|
|
||||
archive.extract!(directory, flags: flags, &block)
|
||||
end
|
||||
end
|
||||
|
||||
# @param file [File] The archive file.
|
||||
def initialize(file)
|
||||
@file = file
|
||||
end
|
||||
|
||||
def close
|
||||
# no-op
|
||||
end
|
||||
|
||||
# Iterate across each entry (file) in the archive.
|
||||
#
|
||||
# @return [Enumerator, Danbooru:Archive] If given a block, call the block on each entry and return the archive
|
||||
# itself. If not given a block, return an Enumerator.
|
||||
def each_entry(&block)
|
||||
return enum_for(:each_entry) unless block_given?
|
||||
|
||||
# XXX We have to re-open the archive on every call because libarchive is designed for streaming and doesn't
|
||||
# support iterating across the archive multiple times.
|
||||
archive = ::Archive::Reader.open_filename(file.path)
|
||||
while (entry = archive.next_header(clone_entry: true))
|
||||
yield Entry.new(archive, entry)
|
||||
end
|
||||
|
||||
self
|
||||
ensure
|
||||
archive&.close
|
||||
end
|
||||
alias_method :entries, :each_entry
|
||||
|
||||
# Extract the files in the archive to a directory. Subdirectories inside the archive are ignored; all files are
|
||||
# extracted to a single top-level directory.
|
||||
#
|
||||
# If a block is given, extract the archive to a temp directory and delete the directory after the block finishes.
|
||||
# Otherwise, extract to a temp directory and return the directory. The caller should delete the directory afterwards.
|
||||
#
|
||||
# @param directory [String] The directory to extract the files to. By default, this is a temp directory the caller must clean up.
|
||||
# @yieldparam [String, Array<String>] The name of the temp directory, and the list of files in the directory.
|
||||
# @return [(String, Array<String>)] The path to the directory, and the list of extracted files.
|
||||
def extract!(directory = nil, flags: DEFAULT_FLAGS, &block)
|
||||
raise ArgumentError, "can't pass directory and block at the same time" if block_given? && directory.present?
|
||||
|
||||
if block_given?
|
||||
Dir.mktmpdir(["danbooru-archive-", "-" + File.basename(file.path)]) do |dir|
|
||||
filenames = extract_to!(dir, flags: flags)
|
||||
yield dir, filenames
|
||||
end
|
||||
else
|
||||
dir = directory.presence || Dir.mktmpdir(["danbooru-archive-", "-" + File.basename(file.path)])
|
||||
filenames = extract_to!(dir, flags: flags)
|
||||
[dir, filenames]
|
||||
end
|
||||
end
|
||||
|
||||
# Extract the archive to a directory. See `extract!` for details.
|
||||
def extract_to!(directory, flags: DEFAULT_FLAGS)
|
||||
entries.map do |entry|
|
||||
raise Danbooru::Archive::Error, "Can't extract archive containing absolute path (path: '#{entry.pathname_utf8}')" if entry.pathname_utf8.starts_with?("/")
|
||||
raise Danbooru::Archive::Error, "'#{entry.pathname_utf8}' is not a regular file" if !entry.file?
|
||||
|
||||
path = "#{directory}/#{entry.pathname_utf8.tr("/", "_")}"
|
||||
entry.extract!(path, flags: flags)
|
||||
end
|
||||
end
|
||||
|
||||
# @return [Integer] The total decompressed size of all files in the archive.
|
||||
def uncompressed_size
|
||||
@uncompressed_size ||= entries.sum(&:size)
|
||||
end
|
||||
|
||||
# @return [Boolean] True if any entry in the archive satisfies the condition; otherwise false.
|
||||
def exists?(&block)
|
||||
entries.with_index { |entry, index| return true if yield entry, index + 1 }
|
||||
false
|
||||
end
|
||||
|
||||
# @return [String] The archive format ("RAR", "ZIP", etc).
|
||||
def format
|
||||
@format ||= entries.lazy.map(&:format).first
|
||||
end
|
||||
|
||||
# Print the archive contents in `ls -l` format.
|
||||
def ls(io = STDOUT)
|
||||
io.puts(entries.map(&:ls).join("\n"))
|
||||
end
|
||||
end
|
||||
|
||||
# An entry represents a single file in an archive.
|
||||
class Entry
|
||||
attr_reader :archive, :entry
|
||||
delegate :directory?, :file?, :close, :pathname, :pathname=, :size, :strmode, :uid, :gid, :mtime, to: :entry
|
||||
|
||||
# @param entry [::Archive] The archive the entry belongs to.
|
||||
# @param entry [::Archive::Entry] The archive entry.
|
||||
def initialize(archive, entry)
|
||||
@archive = archive
|
||||
@entry = entry
|
||||
end
|
||||
|
||||
# Copy the entry. Called by `dup`.
|
||||
def initialize_copy(entry)
|
||||
@archive = entry.archive
|
||||
@entry = ::Archive::Entry.new(entry.ffi_ptr, clone: true)
|
||||
end
|
||||
|
||||
# Extract the file to the given destination. By default, don't overwrite files, don't allow symlinks or paths
|
||||
# containing '..', and don't extract file ownership, permission, or timestamp information.
|
||||
#
|
||||
# @param destination [String] The path to extract the file to.
|
||||
# @param flags [Integer] The extraction flags.
|
||||
# @return [String] The path to the extracted file.
|
||||
def extract!(destination, flags: Danbooru::Archive::DEFAULT_FLAGS)
|
||||
entry = dup
|
||||
entry.pathname = destination
|
||||
|
||||
result = ::Archive::C.archive_read_extract(entry.archive_ffi_ptr, entry.ffi_ptr, flags)
|
||||
raise Danbooru::Archive::Error, "Error extracting '#{entry.pathname_utf8}': #{archive.error_string}" if result != ::Archive::C::OK
|
||||
|
||||
entry.pathname_utf8
|
||||
end
|
||||
|
||||
# @return [String] The pathname encoded as UTF-8 instead of ASCII-8BIT. May be wrong if the original pathname wasn't UTF-8.
|
||||
def pathname_utf8
|
||||
pathname.encode("UTF-8", invalid: :replace, undef: :replace, replace: "?")
|
||||
end
|
||||
|
||||
# @return [String] The archive entry format ("RAR", "ZIP", etc).
|
||||
def format
|
||||
::Archive::C::archive_format_name(archive_ffi_ptr)
|
||||
end
|
||||
|
||||
# @return [Array<String>] The list of filters for the entry.
|
||||
def filters
|
||||
count = ::Archive::C::archive_filter_count(archive_ffi_ptr)
|
||||
|
||||
count.times.map do |n|
|
||||
::Archive::C::archive_filter_name(archive_ffi_ptr, n)
|
||||
end
|
||||
end
|
||||
|
||||
# @return [String] The entry in `ls -l` format.
|
||||
def ls
|
||||
"#{strmode} #{uid} #{gid} #{"%9d" % size} #{mtime.to_fs(:db)} #{pathname_utf8}"
|
||||
end
|
||||
|
||||
def archive_ffi_ptr
|
||||
archive.send(:archive)
|
||||
end
|
||||
|
||||
# @return [FFI::Pointer] The pointer to the libarchive entry object.
|
||||
def ffi_ptr
|
||||
entry.entry
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -18,7 +18,6 @@ class MediaFile::Ugoira < MediaFile
|
||||
|
||||
def close
|
||||
file.close
|
||||
zipfile.close
|
||||
preview_frame.close
|
||||
end
|
||||
|
||||
@@ -52,24 +51,15 @@ class MediaFile::Ugoira < MediaFile
|
||||
raise NotImplementedError, "can't convert ugoira to webm: ffmpeg or mkvmerge not installed" unless self.class.videos_enabled?
|
||||
raise RuntimeError, "can't convert ugoira to webm: no ugoira frame data was provided" unless frame_delays.present?
|
||||
|
||||
Dir.mktmpdir("ugoira-#{md5}") do |tmpdir|
|
||||
Danbooru::Archive.extract!(file) do |tmpdir, filenames|
|
||||
output_file = Tempfile.new(["ugoira-conversion", ".webm"], binmode: true)
|
||||
|
||||
FileUtils.mkdir_p("#{tmpdir}/images")
|
||||
|
||||
zipfile.each do |entry|
|
||||
path = File.join(tmpdir, "images", entry.name)
|
||||
entry.extract(path)
|
||||
end
|
||||
|
||||
# Duplicate last frame to avoid it being displayed only for a very short amount of time.
|
||||
last_file_name = zipfile.entries.last.name
|
||||
last_file_name =~ /\A(\d{6})(\.\w{,4})\Z/
|
||||
new_last_index = $1.to_i + 1
|
||||
file_ext = $2
|
||||
new_last_filename = ("%06d" % new_last_index) + file_ext
|
||||
path_from = File.join(tmpdir, "images", last_file_name)
|
||||
path_to = File.join(tmpdir, "images", new_last_filename)
|
||||
last_file_name = File.basename(filenames.last)
|
||||
last_index, file_ext = last_file_name.split(".")
|
||||
new_last_filename = "#{"%06d" % (last_index.to_i + 1)}.#{file_ext}"
|
||||
path_from = File.join(tmpdir, last_file_name)
|
||||
path_to = File.join(tmpdir, new_last_filename)
|
||||
FileUtils.cp(path_from, path_to)
|
||||
|
||||
delay_sum = 0
|
||||
@@ -84,11 +74,10 @@ class MediaFile::Ugoira < MediaFile
|
||||
f.write("#{delay_sum}\n")
|
||||
end
|
||||
|
||||
ext = zipfile.first.name.match(/\.(\w{,4})$/)[1]
|
||||
ffmpeg_out, status = Open3.capture2e("ffmpeg -i #{tmpdir}/images/%06d.#{ext} -codec:v libvpx-vp9 -crf 12 -b:v 0 -an -threads 8 -tile-columns 2 -tile-rows 1 -row-mt 1 -pass 1 -passlogfile #{tmpdir}/ffmpeg2pass -f null /dev/null")
|
||||
ffmpeg_out, status = Open3.capture2e("ffmpeg -i #{tmpdir}/%06d.#{file_ext} -codec:v libvpx-vp9 -crf 12 -b:v 0 -an -threads 8 -tile-columns 2 -tile-rows 1 -row-mt 1 -pass 1 -passlogfile #{tmpdir}/ffmpeg2pass -f null /dev/null")
|
||||
raise Error, "ffmpeg failed: #{ffmpeg_out}" unless status.success?
|
||||
|
||||
ffmpeg_out, status = Open3.capture2e("ffmpeg -i #{tmpdir}/images/%06d.#{ext} -codec:v libvpx-vp9 -crf 12 -b:v 0 -an -threads 8 -tile-columns 2 -tile-rows 1 -row-mt 1 -pass 2 -passlogfile #{tmpdir}/ffmpeg2pass #{tmpdir}/tmp.webm")
|
||||
ffmpeg_out, status = Open3.capture2e("ffmpeg -i #{tmpdir}/%06d.#{file_ext} -codec:v libvpx-vp9 -crf 12 -b:v 0 -an -threads 8 -tile-columns 2 -tile-rows 1 -row-mt 1 -pass 2 -passlogfile #{tmpdir}/ffmpeg2pass #{tmpdir}/tmp.webm")
|
||||
raise Error, "ffmpeg failed: #{ffmpeg_out}" unless status.success?
|
||||
|
||||
mkvmerge_out, status = Open3.capture2e("mkvmerge -o #{output_file.path} --webm --timecodes 0:#{tmpdir}/timecodes.tc #{tmpdir}/tmp.webm")
|
||||
@@ -100,13 +89,9 @@ class MediaFile::Ugoira < MediaFile
|
||||
|
||||
private
|
||||
|
||||
def zipfile
|
||||
Zip::File.new(file.path)
|
||||
end
|
||||
|
||||
def preview_frame
|
||||
FFmpeg.new(convert).smart_video_preview
|
||||
end
|
||||
|
||||
memoize :zipfile, :preview_frame, :dimensions, :convert, :metadata
|
||||
memoize :preview_frame, :dimensions, :convert, :metadata
|
||||
end
|
||||
|
||||
@@ -26,7 +26,7 @@ DANBOORU_RUNTIME_DEPS="
|
||||
ca-certificates mkvtoolnix rclone libpq5 openssl libgmpxx4ldbl
|
||||
zlib1g libfftw3-3 libwebp7 libwebpmux3 libwebpdemux2 liborc-0.4.0 liblcms2-2
|
||||
libpng16-16 libexpat1 libglib2.0 libgif7 libexif12 libheif1 libvpx7 libdav1d6
|
||||
libseccomp2 libseccomp-dev libjemalloc2
|
||||
libseccomp2 libseccomp-dev libjemalloc2 libarchive13
|
||||
"
|
||||
COMMON_RUNTIME_DEPS="
|
||||
$DANBOORU_RUNTIME_DEPS $EXIFTOOL_RUNTIME_DEPS tini busybox less ncdu
|
||||
|
||||
111
test/unit/danbooru_archive_test.rb
Normal file
111
test/unit/danbooru_archive_test.rb
Normal file
@@ -0,0 +1,111 @@
|
||||
require 'test_helper'
|
||||
|
||||
class DanbooruArchiveTest < ActiveSupport::TestCase
|
||||
context "Danbooru::Archive" do
|
||||
context ".open! method" do
|
||||
should "work without a block" do
|
||||
archive = Danbooru::Archive.open!("test/files/ugoira.zip")
|
||||
assert_equal(5, archive.entries.count)
|
||||
end
|
||||
|
||||
should "work with a block" do
|
||||
Danbooru::Archive.open!("test/files/ugoira.zip") do |archive|
|
||||
assert_equal(5, archive.entries.count)
|
||||
end
|
||||
end
|
||||
|
||||
should "raise an error if the block raises an error" do
|
||||
assert_raises(Danbooru::Archive::Error) { Danbooru::Archive.open!("test/files/ugoira.zip") { raise "failed" } }
|
||||
end
|
||||
|
||||
should "raise an error if the file doesn't exist" do
|
||||
assert_raises(Danbooru::Archive::Error) { Danbooru::Archive.open!("test/files/does_not_exist.zip") }
|
||||
end
|
||||
end
|
||||
|
||||
context ".open method" do
|
||||
should "work without a block" do
|
||||
archive = Danbooru::Archive.open("test/files/ugoira.zip")
|
||||
assert_equal(5, archive.entries.count)
|
||||
end
|
||||
|
||||
should "work with a block" do
|
||||
Danbooru::Archive.open("test/files/ugoira.zip") do |archive|
|
||||
assert_equal(5, archive.entries.count)
|
||||
end
|
||||
end
|
||||
|
||||
should "return nil if the block raises an error" do
|
||||
assert_nil(Danbooru::Archive.open("test/files/ugoira.zip") { raise "failed" })
|
||||
end
|
||||
|
||||
should "return nil if the file doesn't exist" do
|
||||
assert_nil(Danbooru::Archive.open("test/files/does_not_exist.zip"))
|
||||
end
|
||||
end
|
||||
|
||||
context ".extract! method" do
|
||||
should "extract to temp directory if not given a block or directory" do
|
||||
dir, filenames = Danbooru::Archive.extract!("test/files/ugoira.zip")
|
||||
|
||||
assert_equal(true, File.directory?(dir))
|
||||
assert_equal(5, filenames.size)
|
||||
filenames.each { |filename| assert_equal(true, File.exist?(filename)) }
|
||||
ensure
|
||||
FileUtils.rm_rf(dir)
|
||||
end
|
||||
|
||||
should "extract to a temp directory and delete it afterwards if given a block" do
|
||||
Danbooru::Archive.extract!("test/files/ugoira.zip") do |dir, filenames|
|
||||
@tmpdir = dir
|
||||
assert_equal(true, File.directory?(dir))
|
||||
assert_equal(5, filenames.size)
|
||||
filenames.each { |filename| assert_equal(true, File.exist?(filename)) }
|
||||
end
|
||||
|
||||
assert_equal(true, @tmpdir.present?)
|
||||
assert_equal(false, File.exist?(@tmpdir))
|
||||
end
|
||||
|
||||
should "extract to given directory if given a directory" do
|
||||
Dir.mktmpdir do |tmpdir|
|
||||
dir, filenames = Danbooru::Archive.extract!("test/files/ugoira.zip", tmpdir)
|
||||
assert_equal(dir, tmpdir)
|
||||
assert_equal(5, filenames.size)
|
||||
filenames.each { |filename| assert_equal(true, File.exist?(filename)) }
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context "#uncompressed_size method" do
|
||||
should "work" do
|
||||
archive = Danbooru::Archive.open!("test/files/ugoira.zip")
|
||||
assert_equal(6161, archive.uncompressed_size)
|
||||
end
|
||||
end
|
||||
|
||||
context "#exists? method" do
|
||||
should "work" do
|
||||
archive = Danbooru::Archive.open!("test/files/ugoira.zip")
|
||||
assert_equal(true, archive.exists? { |entry, count| count > 4 })
|
||||
end
|
||||
end
|
||||
|
||||
context "#format method" do
|
||||
should "work" do
|
||||
archive = Danbooru::Archive.open!("test/files/ugoira.zip")
|
||||
assert_equal("ZIP 2.0 (uncompressed)", archive.format)
|
||||
end
|
||||
end
|
||||
|
||||
context "#ls method" do
|
||||
should "work" do
|
||||
archive = Danbooru::Archive.open!("test/files/ugoira.zip")
|
||||
output = StringIO.new
|
||||
|
||||
archive.ls(output)
|
||||
assert_match(/^-rw-rw-r-- *0 0 *1639 2014-10-05 23:31:06 000000\.jpg$/, output.tap(&:rewind).read)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user