Fix temp files generated during the upload process not being cleaned up quickly enough. This included downloaded files, generated preview images, and Ugoira video conversions. Before we relied on `Tempfile` cleaning up files automatically. But this only happened when the Tempfile object was garbage collected, which could take a long time. In the meantime we could have hundreds of megabytes of temp files hanging around. The fix is to explicitly close temp files when we're done with them. But the standard `Tempfile` class doesn't immediately delete the file when it's closed. So we also have to introduce a Danbooru::Tempfile wrapper that deletes the tempfile as soon as it's closed.
319 lines
11 KiB
Ruby
319 lines
11 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require "shellwords"
|
|
|
|
# A wrapper for the ffmpeg command.
|
|
class FFmpeg
|
|
extend Memoist
|
|
|
|
class Error < StandardError; end
|
|
|
|
attr_reader :file
|
|
|
|
# Operate on a file with FFmpeg.
|
|
#
|
|
# @param file [MediaFile, String] A webm, mp4, gif, or apng file.
|
|
def initialize(file)
|
|
@file = file.is_a?(String) ? MediaFile.open(file) : file
|
|
end
|
|
|
|
# Generate a .png preview image for a video or animation. Generates
|
|
# thumbnails intelligently by avoiding blank frames.
|
|
#
|
|
# @return [MediaFile] the preview image
|
|
def smart_video_preview
|
|
vp = Danbooru::Tempfile.new(["danbooru-video-preview-#{file.md5}-", ".png"], binmode: true)
|
|
|
|
# https://ffmpeg.org/ffmpeg.html#Main-options
|
|
# https://ffmpeg.org/ffmpeg-filters.html#thumbnail
|
|
output = shell!("ffmpeg -i #{file.path.shellescape} -vf thumbnail=300 -frames:v 1 -y #{vp.path.shellescape}")
|
|
Rails.logger.debug(output)
|
|
|
|
MediaFile.open(vp)
|
|
end
|
|
|
|
# Get file metadata using ffprobe.
|
|
#
|
|
# @see https://ffmpeg.org/ffprobe.html
|
|
# @see https://gist.github.com/nrk/2286511
|
|
#
|
|
# @return [Hash] A hash of the file's metadata. Will be empty if reading the file failed for any reason.
|
|
def metadata
|
|
output = shell!("ffprobe -v quiet -print_format json -show_format -show_streams -show_packets #{file.path.shellescape}")
|
|
json = JSON.parse(output)
|
|
json.with_indifferent_access
|
|
rescue Error => e
|
|
{ error: e.message.strip }.with_indifferent_access
|
|
end
|
|
|
|
def width
|
|
video_stream[:width]
|
|
end
|
|
|
|
def height
|
|
video_stream[:height]
|
|
end
|
|
|
|
# @see https://trac.ffmpeg.org/wiki/FFprobeTips#Duration
|
|
# @return [Float, nil] The duration of the video or animation in seconds, or nil if unknown.
|
|
def duration
|
|
if metadata.dig(:format, :duration).present?
|
|
metadata.dig(:format, :duration).to_f
|
|
elsif playback_info.has_key?(:time)
|
|
hours, minutes, seconds = playback_info[:time].split(/:/)
|
|
hours.to_f*60*60 + minutes.to_f*60 + seconds.to_f
|
|
else
|
|
nil
|
|
end
|
|
end
|
|
|
|
# @return [Integer, nil] The number of frames in the video or animation, or nil if unknown.
|
|
def frame_count
|
|
if video_stream.has_key?(:nb_frames)
|
|
video_stream[:nb_frames].to_i
|
|
elsif playback_info.has_key?(:frame)
|
|
playback_info[:frame].to_i
|
|
else
|
|
nil
|
|
end
|
|
end
|
|
|
|
# @return [Float, nil] The average frame rate of the video or animation, or nil if unknown.
|
|
def frame_rate
|
|
return nil if frame_count.nil? || duration.nil? || duration == 0
|
|
frame_count / duration
|
|
end
|
|
|
|
def major_brand
|
|
metadata.dig(:format, :tags, :major_brand)
|
|
end
|
|
|
|
def pix_fmt
|
|
video_stream[:pix_fmt]
|
|
end
|
|
|
|
def video_codec
|
|
video_stream[:codec_name]
|
|
end
|
|
|
|
# @return [Integer, nil] The bit rate of the video stream, in bits per second, or nil if it can't be calculated.
|
|
def video_bit_rate
|
|
if video_stream.has_key?(:bit_rate)
|
|
video_stream[:bit_rate].to_i
|
|
# .webm doesn't have the bit rate in the metadata, so we have to calculate it from the video stream size and duration.
|
|
elsif video_size > 0 && duration > 0
|
|
((8.0 * video_size) / duration).to_i
|
|
else
|
|
nil
|
|
end
|
|
end
|
|
|
|
def video_stream
|
|
video_streams.first || {}
|
|
end
|
|
|
|
def video_streams
|
|
metadata[:streams].to_a.select { |stream| stream[:codec_type] == "video" }
|
|
end
|
|
|
|
def audio_codec
|
|
audio_stream[:codec_name]
|
|
end
|
|
|
|
# @return [Integer, nil] The bit rate of the audio stream, in bits per second, or nil if it can't be calculated.
|
|
def audio_bit_rate
|
|
if audio_stream.has_key?(:bit_rate)
|
|
audio_stream[:bit_rate].to_i
|
|
# .webm doesn't have the bit rate in the metadata, so we have to calculate it from the audio stream size and duration.
|
|
elsif audio_size > 0 && duration > 0
|
|
((8.0 * audio_size) / duration).to_i
|
|
else
|
|
nil
|
|
end
|
|
end
|
|
|
|
def audio_stream
|
|
audio_streams.first || {}
|
|
end
|
|
|
|
def audio_streams
|
|
metadata[:streams].to_a.select { |stream| stream[:codec_type] == "audio" }
|
|
end
|
|
|
|
# @return [Boolean] True if the file has an audio track. The audio track may be silent.
|
|
def has_audio?
|
|
audio_streams.present?
|
|
end
|
|
|
|
# @return [Float, nil] The total duration in seconds of all silent sections of the audio track.
|
|
# Nil if the file doesn't have an audio track.
|
|
def silence_duration
|
|
playback_info[:silence].pluck("silence_duration").sum if has_audio?
|
|
end
|
|
|
|
# @return [Float, nil] The percentage of the video that is silent, from 0% to 100%, or nil if the file doesn't
|
|
# have an audio track. If the silence percentage is 100%, then the audio track is totally silent.
|
|
def silence_percentage
|
|
return nil if !has_audio? || duration.to_f == 0.0
|
|
(silence_duration.to_f / duration).clamp(0.0, 1.0)
|
|
end
|
|
|
|
# The average loudness of the audio track, as a percentage of max volume. 0% is silent and 100% is max volume.
|
|
#
|
|
# The average loudness value ignores silent or quiet sections of the audio. 7% is the standard
|
|
# average loudness for TV programs. 15% to 30% is typical for music streaming services.
|
|
#
|
|
# @return [Float, nil] The average loudness as a percent, or nil if the file doesn't have an audio track.
|
|
# @see https://en.wikipedia.org/wiki/EBU_R_128
|
|
def average_loudness
|
|
10.pow(average_loudness_lufs / 20.0) if average_loudness_lufs.present?
|
|
end
|
|
|
|
# The average loudness of the audio track, in LUFS units. -70.0 is silent and 0.0 is max volume.
|
|
#
|
|
# The average loudness value ignores silent or quiet sections of the audio. -23.0 LUFS is the
|
|
# standard average loudness for TV programs. -10.0 to -16.0 is typical for music streaming services.
|
|
#
|
|
# @return [Float, nil] The average loudness in LUFS, or nil if the file doesn't have an audio track.
|
|
# @see https://en.wikipedia.org/wiki/EBU_R_128
|
|
def average_loudness_lufs
|
|
playback_info.dig(:ebur128, :I) if has_audio?
|
|
end
|
|
|
|
# The loudness range of the audio track, in LU (loudness units, where 1 LU = 1 dB). The loudness
|
|
# range is roughly the difference between the quietest sound and the loudest sound (i.e., the
|
|
# dynamic range). A typical loudness range for music is around 5 to 10 LU.
|
|
#
|
|
# This is based on measuring loudness in 3-second intervals, ignoring silence, so it's not very
|
|
# meaningful for very short videos or videos that are mostly silent.
|
|
#
|
|
# @return [Float, nil] The loudness range in LU, or nil if the file doesn't have an audio track.
|
|
# @see https://en.wikipedia.org/wiki/EBU_R_128
|
|
# @see https://tech.ebu.ch/docs/tech/tech3342.pdf (EBU Tech 3343 - Loudness Range: A Measure to Supplement EBU R 128 Loudness Normalization)
|
|
def loudness_range
|
|
playback_info.dig(:ebur128, :LRA) if has_audio?
|
|
end
|
|
|
|
# The peak loudness of the audio track, as a percentage of max volume. 1.0 is 100% volume, 0.5 is
|
|
# 50% volume, 0.0 is 0% volume, etc.
|
|
#
|
|
# This is the true peak loudness, which means it measures the true loudness even if the audio is clipped.
|
|
# If the peak loudness if above 1.0, it means the audio is clipped.
|
|
#
|
|
# @return [Float, nil] The peak loudness in dBFS, or nil if the file doesn't have an audio track.
|
|
# @see https://en.wikipedia.org/wiki/EBU_R_128
|
|
def peak_loudness
|
|
10.pow(peak_loudness_dbfs / 20.0) if peak_loudness_dbfs.present?
|
|
end
|
|
|
|
# The peak loudness of the audio track, in dBFS (decibels referenced to full scale). 0.0 is 100%
|
|
# volume, -6.0 is 50% volume, -20.0 is 10% volume, -40.0 is 1% volume, etc.
|
|
#
|
|
# @return [Float, nil] The peak loudness in dBFS, or nil if the file doesn't have an audio track.
|
|
# @see https://en.wikipedia.org/wiki/DBFS
|
|
def peak_loudness_dbfs
|
|
playback_info.dig(:ebur128, :Peak) if has_audio?
|
|
end
|
|
|
|
def packets
|
|
metadata[:packets].to_a
|
|
end
|
|
|
|
def video_packets
|
|
packets.select { |stream| stream[:codec_type] == "video" }
|
|
end
|
|
|
|
def audio_packets
|
|
packets.select { |stream| stream[:codec_type] == "audio" }
|
|
end
|
|
|
|
# @return [Integer] The size of the compressed video stream in bytes.
|
|
def video_size
|
|
video_packets.pluck("size").map(&:to_i).sum
|
|
end
|
|
|
|
# @return [Integer] The size of the compressed audio stream in bytes.
|
|
def audio_size
|
|
audio_packets.pluck("size").map(&:to_i).sum
|
|
end
|
|
|
|
# @return [Boolean] True if the video is unplayable.
|
|
def is_corrupt?
|
|
error.present?
|
|
end
|
|
|
|
# @return [String, nil] The error message if the video is unplayable, or nil if no error.
|
|
def error
|
|
metadata[:error] || playback_info[:error]
|
|
end
|
|
|
|
# Decode the full video and return a hash containing the frame count, fps, runtime, and the sizes of the decompressed video and audio streams.
|
|
def playback_info
|
|
# https://ffmpeg.org/ffmpeg-filters.html#silencedetect
|
|
# https://ffmpeg.org/ffmpeg-filters.html#ebur128-1
|
|
# XXX `-c copy` is faster, but it doesn't decompress the stream so it can't detect corrupt videos.
|
|
output = shell!("ffmpeg -hide_banner -i #{file.path.shellescape} -af silencedetect=noise=0.0001:duration=0.25s,ebur128=metadata=1:dualmono=true:peak=true -f null /dev/null")
|
|
lines = output.split(/\r\n|\r|\n/)
|
|
|
|
# time_line = "frame= 10 fps=0.0 q=-0.0 Lsize=N/A time=00:00:00.48 bitrate=N/A speed= 179x"
|
|
# time_info = { "frame"=>"10", "fps"=>"0.0", "q"=>"-0.0", "Lsize"=>"N/A", "time"=>"00:00:00.48", "bitrate"=>"N/A", "speed"=>"188x" }
|
|
time_line = lines.grep(/\Aframe=/).last.strip
|
|
time_info = time_line.scan(/\S+=\s*\S+/).map { |pair| pair.split(/=\s*/) }.to_h
|
|
|
|
# size_line = "video:36kBkB audio:16kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: unknown"
|
|
# size_info = { "video" => 36000, "audio" => 16000, "subtitle" => 0, "other streams" => 0, "global headers" => 0, "muxing overhead" => 0 }
|
|
size_line = lines.grep(/\Avideo:/).last.strip
|
|
size_info = size_line.scan(/[a-z ]+: *[a-z0-9]+/i).map do |pair|
|
|
key, value = pair.split(/: */)
|
|
[key.strip, value.to_i * 1000] # [" audio", "16kB"] => ["audio", 16000]
|
|
end.to_h
|
|
|
|
# [silencedetect @ 0x561855af1040] silence_start: -0.00133333e=N/A speed= 25x
|
|
# [silencedetect @ 0x561855af1040] silence_end: 12.052 | silence_duration: 12.0533
|
|
silence_info = lines.grep(/silence_duration/).map do |line|
|
|
line.scan(/[a-z_]+: *[0-9.]+/i).map do |pair|
|
|
key, value = pair.split(/: */)
|
|
[key, value.to_f]
|
|
end.to_h
|
|
end
|
|
|
|
# [Parsed_ebur128_1 @ 0x5586b53889c0] Summary:
|
|
#
|
|
# Integrated loudness:
|
|
# I: -20.1 LUFS
|
|
# Threshold: -30.7 LUFS
|
|
#
|
|
# Loudness range:
|
|
# LRA: 5.8 LU
|
|
# Threshold: -40.6 LUFS
|
|
# LRA low: -24.0 LUFS
|
|
# LRA high: -18.2 LUFS
|
|
#
|
|
# True peak:
|
|
# Peak: -2.2 dBFS
|
|
ebur128_index = lines.rindex { |line| /Parsed_ebur128.*Summary:/ === line }
|
|
|
|
if ebur128_index
|
|
ebur128_lines = lines[ebur128_index..ebur128_index + 13].join("\n")
|
|
ebur128_info = ebur128_lines.scan(/^ *[a-z ]+: *-?(?:inf|[0-9.]+) (?:LUFS|LU|dBFS)$/i).map do |pair|
|
|
key, value = pair.split(/: */)
|
|
value = -1000.0 if value == "-inf dBFS" # "Peak: -inf dBFS" for silent audio tracks.
|
|
[key.strip.tr(" ", "_"), value.to_f] # ["LRA low", "-34.3 LUFS"] => ["lra_low", -34.3]
|
|
end.to_h
|
|
end
|
|
|
|
{ **time_info, **size_info, silence: silence_info, ebur128: ebur128_info.to_h }.with_indifferent_access
|
|
rescue Error => e
|
|
{ error: e.message.strip }.with_indifferent_access
|
|
end
|
|
|
|
def shell!(command)
|
|
program = command.shellsplit.first
|
|
output, status = Open3.capture2e(command)
|
|
raise Error, "#{program} failed: #{output}" if !status.success?
|
|
output
|
|
end
|
|
|
|
memoize :metadata, :playback_info, :frame_count, :duration, :error, :video_size, :audio_size
|
|
end
|