media assets: save audio volume levels in media metadata.
For videos with sound, save information about audio volume levels in the media asset's metadata. These values are stored: * FFmpeg:AudioPeakLoudness The peak loudness of the audio track, from 0.0 (silent) to 1.0 (max volume) * FFmpeg:AudioAverageLoudness The average loudness of the audio track, from 0.0 (silent) to 1.0 (max volume). * FFmpeg:AudioLoudnessRange The difference between the quietest and loudest sounds in the audio track (in decibels). * FFmpeg:AudioSilencePercentage The percentage of the video that is silent (1.0 is completely silent, 0.5 is 50% silence, 0.0 is no silence). These values are calculated based on the EBU R 128 standard, using the ffmpeg command below: ffmpeg -i file.mp4 -af silencedetect=duration=0.05:noise=0.0001,ebur128=metadata=1:peak=true:dualmono=true -f null /dev/null See the links below for details: * https://en.wikipedia.org/wiki/EBU_R_128 * https://www.ffmpeg.org/ffmpeg-filters.html#ebur128-1 * https://tech.ebu.ch/loudness * https://tech.ebu.ch/docs/tech/tech3341.pdf
This commit is contained in:
@@ -140,10 +140,81 @@ class FFmpeg
|
|||||||
metadata[:streams].to_a.select { |stream| stream[:codec_type] == "audio" }
|
metadata[:streams].to_a.select { |stream| stream[:codec_type] == "audio" }
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# @return [Boolean] True if the file has an audio track. The audio track may be silent.
|
||||||
def has_audio?
|
def has_audio?
|
||||||
audio_streams.present?
|
audio_streams.present?
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# @return [Float, nil] The total duration in seconds of all silent sections of the audio track.
|
||||||
|
# Nil if the file doesn't have an audio track.
|
||||||
|
def silence_duration
|
||||||
|
playback_info[:silence].pluck("silence_duration").sum if has_audio?
|
||||||
|
end
|
||||||
|
|
||||||
|
# @return [Float, nil] The percentage of the video that is silent, from 0% to 100%, or nil if the file doesn't
|
||||||
|
# have an audio track. If the silence percentage is 100%, then the audio track is totally silent.
|
||||||
|
def silence_percentage
|
||||||
|
return nil if !has_audio? || duration.to_f == 0.0
|
||||||
|
(silence_duration.to_f / duration).clamp(0.0, 1.0).round(4)
|
||||||
|
end
|
||||||
|
|
||||||
|
# The average loudness of the audio track, as a percentage of max volume. 0% is silent and 100% is max volume.
|
||||||
|
#
|
||||||
|
# The average loudness value ignores silent or quiet sections of the audio. 7% is the standard
|
||||||
|
# average loudness for TV programs. 15% to 30% is typical for music streaming services.
|
||||||
|
#
|
||||||
|
# @return [Float, nil] The average loudness as a percent, or nil if the file doesn't have an audio track.
|
||||||
|
# @see https://en.wikipedia.org/wiki/EBU_R_128
|
||||||
|
def average_loudness
|
||||||
|
10.pow(average_loudness_lufs / 20.0).round(4) if average_loudness_lufs.present?
|
||||||
|
end
|
||||||
|
|
||||||
|
# The average loudness of the audio track, in LUFS units. -70.0 is silent and 0.0 is max volume.
|
||||||
|
#
|
||||||
|
# The average loudness value ignores silent or quiet sections of the audio. -23.0 LUFS is the
|
||||||
|
# standard average loudness for TV programs. -10.0 to -16.0 is typical for music streaming services.
|
||||||
|
#
|
||||||
|
# @return [Float, nil] The average loudness in LUFS, or nil if the file doesn't have an audio track.
|
||||||
|
# @see https://en.wikipedia.org/wiki/EBU_R_128
|
||||||
|
def average_loudness_lufs
|
||||||
|
playback_info.dig(:ebur128, :I) if has_audio?
|
||||||
|
end
|
||||||
|
|
||||||
|
# The loudness range of the audio track, in LU (loudness units, where 1 LU = 1 dB). The loudness
|
||||||
|
# range is roughly the difference between the quietest sound and the loudest sound (i.e., the
|
||||||
|
# dynamic range). A typical loudness range for music is around 5 to 10 LU.
|
||||||
|
#
|
||||||
|
# This is based on measuring loudness in 3-second intervals, ignoring silence, so it's not very
|
||||||
|
# meaningful for very short videos or videos that are mostly silent.
|
||||||
|
#
|
||||||
|
# @return [Float, nil] The loudness range in LU, or nil if the file doesn't have an audio track.
|
||||||
|
# @see https://en.wikipedia.org/wiki/EBU_R_128
|
||||||
|
# @see https://tech.ebu.ch/docs/tech/tech3342.pdf (EBU Tech 3343 - Loudness Range: A Measure to Supplement EBU R 128 Loudness Normalization)
|
||||||
|
def loudness_range
|
||||||
|
playback_info.dig(:ebur128, :LRA) if has_audio?
|
||||||
|
end
|
||||||
|
|
||||||
|
# The peak loudness of the audio track, as a percentage of max volume. 1.0 is 100% volume, 0.5 is
|
||||||
|
# 50% volume, 0.0 is 0% volume, etc.
|
||||||
|
#
|
||||||
|
# This is the true peak loudness, which means it measures the true loudness even if the audio is clipped.
|
||||||
|
# If the peak loudness if above 1.0, it means the audio is clipped.
|
||||||
|
#
|
||||||
|
# @return [Float, nil] The peak loudness in dBFS, or nil if the file doesn't have an audio track.
|
||||||
|
# @see https://en.wikipedia.org/wiki/EBU_R_128
|
||||||
|
def peak_loudness
|
||||||
|
10.pow(peak_loudness_dbfs / 20.0).round(4) if peak_loudness_dbfs.present?
|
||||||
|
end
|
||||||
|
|
||||||
|
# The peak loudness of the audio track, in dBFS (decibels referenced to full scale). 0.0 is 100%
|
||||||
|
# volume, -6.0 is 50% volume, -20.0 is 10% volume, -40.0 is 1% volume, etc.
|
||||||
|
#
|
||||||
|
# @return [Float, nil] The peak loudness in dBFS, or nil if the file doesn't have an audio track.
|
||||||
|
# @see https://en.wikipedia.org/wiki/DBFS
|
||||||
|
def peak_loudness_dbfs
|
||||||
|
playback_info.dig(:ebur128, :Peak) if has_audio?
|
||||||
|
end
|
||||||
|
|
||||||
def packets
|
def packets
|
||||||
metadata[:packets].to_a
|
metadata[:packets].to_a
|
||||||
end
|
end
|
||||||
@@ -178,8 +249,10 @@ class FFmpeg
|
|||||||
|
|
||||||
# Decode the full video and return a hash containing the frame count, fps, runtime, and the sizes of the decompressed video and audio streams.
|
# Decode the full video and return a hash containing the frame count, fps, runtime, and the sizes of the decompressed video and audio streams.
|
||||||
def playback_info
|
def playback_info
|
||||||
|
# https://ffmpeg.org/ffmpeg-filters.html#silencedetect
|
||||||
|
# https://ffmpeg.org/ffmpeg-filters.html#ebur128-1
|
||||||
# XXX `-c copy` is faster, but it doesn't decompress the stream so it can't detect corrupt videos.
|
# XXX `-c copy` is faster, but it doesn't decompress the stream so it can't detect corrupt videos.
|
||||||
output = shell!("ffmpeg -hide_banner -i #{file.path.shellescape} -f null /dev/null")
|
output = shell!("ffmpeg -hide_banner -i #{file.path.shellescape} -af silencedetect=noise=0.0001:duration=0.25s,ebur128=metadata=1:dualmono=true:peak=true -f null /dev/null")
|
||||||
lines = output.split(/\r\n|\r|\n/)
|
lines = output.split(/\r\n|\r|\n/)
|
||||||
|
|
||||||
# time_line = "frame= 10 fps=0.0 q=-0.0 Lsize=N/A time=00:00:00.48 bitrate=N/A speed= 179x"
|
# time_line = "frame= 10 fps=0.0 q=-0.0 Lsize=N/A time=00:00:00.48 bitrate=N/A speed= 179x"
|
||||||
@@ -195,7 +268,41 @@ class FFmpeg
|
|||||||
[key.strip, value.to_i * 1000] # [" audio", "16kB"] => ["audio", 16000]
|
[key.strip, value.to_i * 1000] # [" audio", "16kB"] => ["audio", 16000]
|
||||||
end.to_h
|
end.to_h
|
||||||
|
|
||||||
{ **time_info, **size_info }.with_indifferent_access
|
# [silencedetect @ 0x561855af1040] silence_start: -0.00133333e=N/A speed= 25x
|
||||||
|
# [silencedetect @ 0x561855af1040] silence_end: 12.052 | silence_duration: 12.0533
|
||||||
|
silence_info = lines.grep(/silence_duration/).map do |line|
|
||||||
|
line.scan(/[a-z_]+: *[0-9.]+/i).map do |pair|
|
||||||
|
key, value = pair.split(/: */)
|
||||||
|
[key, value.to_f]
|
||||||
|
end.to_h
|
||||||
|
end
|
||||||
|
|
||||||
|
# [Parsed_ebur128_1 @ 0x5586b53889c0] Summary:
|
||||||
|
#
|
||||||
|
# Integrated loudness:
|
||||||
|
# I: -20.1 LUFS
|
||||||
|
# Threshold: -30.7 LUFS
|
||||||
|
#
|
||||||
|
# Loudness range:
|
||||||
|
# LRA: 5.8 LU
|
||||||
|
# Threshold: -40.6 LUFS
|
||||||
|
# LRA low: -24.0 LUFS
|
||||||
|
# LRA high: -18.2 LUFS
|
||||||
|
#
|
||||||
|
# True peak:
|
||||||
|
# Peak: -2.2 dBFS
|
||||||
|
ebur128_index = lines.rindex { |line| /Parsed_ebur128.*Summary:/ === line }
|
||||||
|
|
||||||
|
if ebur128_index
|
||||||
|
ebur128_lines = lines[ebur128_index..ebur128_index + 13].join("\n")
|
||||||
|
ebur128_info = ebur128_lines.scan(/^ *[a-z ]+: *-?(?:inf|[0-9.]+) (?:LUFS|LU|dBFS)$/i).map do |pair|
|
||||||
|
key, value = pair.split(/: */)
|
||||||
|
value = -1000.0 if value == "-inf dBFS" # "Peak: -inf dBFS" for silent audio tracks.
|
||||||
|
[key.strip.tr(" ", "_"), value.to_f] # ["LRA low", "-34.3 LUFS"] => ["lra_low", -34.3]
|
||||||
|
end.to_h
|
||||||
|
end
|
||||||
|
|
||||||
|
{ **time_info, **size_info, silence: silence_info, ebur128: ebur128_info.to_h }.with_indifferent_access
|
||||||
rescue Error => e
|
rescue Error => e
|
||||||
{ error: e.message.strip }.with_indifferent_access
|
{ error: e.message.strip }.with_indifferent_access
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -5,7 +5,10 @@
|
|||||||
#
|
#
|
||||||
# @see https://github.com/streamio/streamio-ffmpeg
|
# @see https://github.com/streamio/streamio-ffmpeg
|
||||||
class MediaFile::Video < MediaFile
|
class MediaFile::Video < MediaFile
|
||||||
delegate :duration, :frame_count, :frame_rate, :has_audio?, :is_corrupt?, :major_brand, :pix_fmt, :video_codec, :video_bit_rate, :video_stream, :video_streams, :audio_codec, :audio_bit_rate, :audio_stream, :audio_streams, :error, to: :video
|
delegate :duration, :frame_count, :frame_rate, :has_audio?, :is_corrupt?, :major_brand, :pix_fmt,
|
||||||
|
:video_codec, :video_bit_rate, :video_stream, :video_streams, :audio_codec, :audio_bit_rate,
|
||||||
|
:audio_stream, :audio_streams, :silence_duration, :silence_percentage, :average_loudness,
|
||||||
|
:peak_loudness, :loudness_range, :error, to: :video
|
||||||
|
|
||||||
def dimensions
|
def dimensions
|
||||||
[video.width, video.height]
|
[video.width, video.height]
|
||||||
@@ -28,6 +31,10 @@ class MediaFile::Video < MediaFile
|
|||||||
"FFmpeg:AudioProfile" => audio_stream[:profile],
|
"FFmpeg:AudioProfile" => audio_stream[:profile],
|
||||||
"FFmpeg:AudioLayout" => audio_stream[:channel_layout],
|
"FFmpeg:AudioLayout" => audio_stream[:channel_layout],
|
||||||
"FFmpeg:AudioBitRate" => audio_bit_rate,
|
"FFmpeg:AudioBitRate" => audio_bit_rate,
|
||||||
|
"FFmpeg:AudioPeakLoudness" => peak_loudness,
|
||||||
|
"FFmpeg:AudioAverageLoudness" => average_loudness,
|
||||||
|
"FFmpeg:AudioLoudnessRange" => loudness_range,
|
||||||
|
"FFmpeg:AudioSilencePercentage" => silence_percentage,
|
||||||
}.compact_blank)
|
}.compact_blank)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
BIN
test/files/mp4/test-silent-audio.mp4
Normal file
BIN
test/files/mp4/test-silent-audio.mp4
Normal file
Binary file not shown.
BIN
test/files/webm/test-silent-audio.webm
Normal file
BIN
test/files/webm/test-silent-audio.webm
Normal file
Binary file not shown.
@@ -216,6 +216,33 @@ class MediaFileTest < ActiveSupport::TestCase
|
|||||||
assert_equal("LC", file.metadata["FFmpeg:AudioProfile"])
|
assert_equal("LC", file.metadata["FFmpeg:AudioProfile"])
|
||||||
assert_equal("stereo", file.metadata["FFmpeg:AudioLayout"])
|
assert_equal("stereo", file.metadata["FFmpeg:AudioLayout"])
|
||||||
assert_equal(128002, file.metadata["FFmpeg:AudioBitRate"])
|
assert_equal(128002, file.metadata["FFmpeg:AudioBitRate"])
|
||||||
|
assert_equal(0.1318, file.metadata["FFmpeg:AudioPeakLoudness"])
|
||||||
|
assert_equal(0.0193, file.metadata["FFmpeg:AudioAverageLoudness"])
|
||||||
|
assert_equal(0, file.metadata["FFmpeg:AudioLoudnessRange"])
|
||||||
|
assert_equal(0.7562, file.metadata["FFmpeg:AudioSilencePercentage"])
|
||||||
|
end
|
||||||
|
|
||||||
|
should "determine the metadata for a video with silent audio" do
|
||||||
|
file = MediaFile.open("test/files/mp4/test-silent-audio.mp4")
|
||||||
|
|
||||||
|
assert_equal(false, file.is_corrupt?)
|
||||||
|
assert_equal(5.736, file.duration)
|
||||||
|
assert_equal(1.74, file.frame_rate.round(2))
|
||||||
|
assert_equal(10, file.frame_count)
|
||||||
|
assert_equal(10, file.metadata["FFmpeg:FrameCount"])
|
||||||
|
assert_equal("isom", file.metadata["FFmpeg:MajorBrand"])
|
||||||
|
assert_equal("yuv420p", file.metadata["FFmpeg:PixFmt"])
|
||||||
|
assert_equal("h264", file.metadata["FFmpeg:VideoCodec"])
|
||||||
|
assert_equal("Constrained Baseline", file.metadata["FFmpeg:VideoProfile"])
|
||||||
|
assert_equal(25003, file.metadata["FFmpeg:VideoBitRate"])
|
||||||
|
assert_equal("aac", file.metadata["FFmpeg:AudioCodec"])
|
||||||
|
assert_equal("LC", file.metadata["FFmpeg:AudioProfile"])
|
||||||
|
assert_equal("stereo", file.metadata["FFmpeg:AudioLayout"])
|
||||||
|
assert_equal(2100, file.metadata["FFmpeg:AudioBitRate"])
|
||||||
|
assert_equal(0, file.metadata["FFmpeg:AudioPeakLoudness"])
|
||||||
|
assert_equal(0.0003, file.metadata["FFmpeg:AudioAverageLoudness"])
|
||||||
|
assert_equal(0, file.metadata["FFmpeg:AudioLoudnessRange"])
|
||||||
|
assert_equal(0.9999, file.metadata["FFmpeg:AudioSilencePercentage"])
|
||||||
end
|
end
|
||||||
|
|
||||||
should "determine the metadata for a video without audio" do
|
should "determine the metadata for a video without audio" do
|
||||||
@@ -283,6 +310,30 @@ class MediaFileTest < ActiveSupport::TestCase
|
|||||||
assert_equal("opus", file.metadata["FFmpeg:AudioCodec"])
|
assert_equal("opus", file.metadata["FFmpeg:AudioCodec"])
|
||||||
assert_equal("stereo", file.metadata["FFmpeg:AudioLayout"])
|
assert_equal("stereo", file.metadata["FFmpeg:AudioLayout"])
|
||||||
assert_equal(50661, file.metadata["FFmpeg:AudioBitRate"])
|
assert_equal(50661, file.metadata["FFmpeg:AudioBitRate"])
|
||||||
|
assert_equal(0.1274, file.metadata["FFmpeg:AudioPeakLoudness"])
|
||||||
|
assert_equal(0.0186, file.metadata["FFmpeg:AudioAverageLoudness"])
|
||||||
|
assert_equal(0, file.metadata["FFmpeg:AudioLoudnessRange"])
|
||||||
|
assert_equal(0.7506, file.metadata["FFmpeg:AudioSilencePercentage"])
|
||||||
|
end
|
||||||
|
|
||||||
|
should "determine the metadata for a video with silent audio" do
|
||||||
|
file = MediaFile.open("test/files/webm/test-silent-audio.webm")
|
||||||
|
|
||||||
|
assert_equal(0.501, file.duration)
|
||||||
|
assert_equal(10/0.501, file.frame_rate) # 19.96
|
||||||
|
assert_equal(10, file.frame_count)
|
||||||
|
assert_equal(10, file.metadata["FFmpeg:FrameCount"])
|
||||||
|
assert_equal("yuv420p", file.metadata["FFmpeg:PixFmt"])
|
||||||
|
assert_equal("vp8", file.metadata["FFmpeg:VideoCodec"])
|
||||||
|
assert_equal("0", file.metadata["FFmpeg:VideoProfile"])
|
||||||
|
assert_equal(188407, file.metadata["FFmpeg:VideoBitRate"])
|
||||||
|
assert_equal("opus", file.metadata["FFmpeg:AudioCodec"])
|
||||||
|
assert_equal("stereo", file.metadata["FFmpeg:AudioLayout"])
|
||||||
|
assert_equal(1197, file.metadata["FFmpeg:AudioBitRate"])
|
||||||
|
assert_equal(0, file.metadata["FFmpeg:AudioPeakLoudness"])
|
||||||
|
assert_equal(0.0003, file.metadata["FFmpeg:AudioAverageLoudness"])
|
||||||
|
assert_equal(0, file.metadata["FFmpeg:AudioLoudnessRange"])
|
||||||
|
assert_equal(0.985, file.metadata["FFmpeg:AudioSilencePercentage"])
|
||||||
end
|
end
|
||||||
|
|
||||||
should "determine the metadata for a video without audio" do
|
should "determine the metadata for a video without audio" do
|
||||||
@@ -519,7 +570,7 @@ class MediaFileTest < ActiveSupport::TestCase
|
|||||||
|
|
||||||
assert_equal(true, @file.is_corrupt?)
|
assert_equal(true, @file.is_corrupt?)
|
||||||
assert_equal("libvips error", @file.error)
|
assert_equal("libvips error", @file.error)
|
||||||
assert_equal(nil, @file.frame_count)
|
assert_nil(@file.frame_count)
|
||||||
assert_equal([575, 800], @file.dimensions)
|
assert_equal([575, 800], @file.dimensions)
|
||||||
assert_equal("File format error", @metadata["ExifTool:Error"])
|
assert_equal("File format error", @metadata["ExifTool:Error"])
|
||||||
assert_equal("89a", @metadata["GIF:GIFVersion"])
|
assert_equal("89a", @metadata["GIF:GIFVersion"])
|
||||||
|
|||||||
Reference in New Issue
Block a user