media assets: track corrupted files in media metadata.

If a media asset is corrupt, include the error message from libvips or
ffmpeg in the "Vips:Error" or "FFmpeg:Error" fields in the media
metadata table.

Corrupt files can't be uploaded nowadays, but they could be in the past,
so we have some old corrupted files that we can't generate thumbnails
for. This lets us mark these files in the metadata so they're findable
with the tag search `exif:Vips:Error`.

Known bug: Vips has a single global error buffer that is shared between
threads and that isn't cleared between operations. So we can't reliably
get the actual error message because it may pick up errors from other
threads, or from previous operations in the same thread.
This commit is contained in:
evazion
2022-11-02 14:55:07 -05:00
parent 19c091d81c
commit 3172031caa
7 changed files with 68 additions and 33 deletions

View File

@@ -41,8 +41,8 @@ class FFmpeg
output = shell!("ffprobe -v quiet -print_format json -show_format -show_streams #{file.path.shellescape}") output = shell!("ffprobe -v quiet -print_format json -show_format -show_streams #{file.path.shellescape}")
json = JSON.parse(output) json = JSON.parse(output)
json.with_indifferent_access json.with_indifferent_access
rescue Error rescue Error => e
{} { error: e.message.strip }.with_indifferent_access
end end
def width def width
@@ -107,6 +107,16 @@ class FFmpeg
audio_streams.present? audio_streams.present?
end end
# @return [Boolean] True if the video is unplayable.
def is_corrupt?
error.present?
end
# @return [String, nil] The error message if the video is unplayable, or nil if no error.
def error
metadata[:error] || playback_info[:error]
end
# Decode the full video and return a hash containing the frame count, fps, and runtime. # Decode the full video and return a hash containing the frame count, fps, and runtime.
def playback_info def playback_info
output = shell!("ffmpeg -i #{file.path.shellescape} -f null /dev/null") output = shell!("ffmpeg -i #{file.path.shellescape} -f null /dev/null")
@@ -117,7 +127,7 @@ class FFmpeg
info = status_line.scan(/\S+=\s*\S+/).map { |pair| pair.split(/=\s*/) }.to_h info = status_line.scan(/\S+=\s*\S+/).map { |pair| pair.split(/=\s*/) }.to_h
info.with_indifferent_access info.with_indifferent_access
rescue Error => e rescue Error => e
{} { error: e.message.strip }.with_indifferent_access
end end
def shell!(command) def shell!(command)
@@ -127,5 +137,5 @@ class FFmpeg
output output
end end
memoize :metadata, :playback_info, :frame_count, :duration memoize :metadata, :playback_info, :frame_count, :duration, :error
end end

View File

@@ -10,7 +10,7 @@ class MediaFile
extend Memoist extend Memoist
include ActiveModel::Serializers::JSON include ActiveModel::Serializers::JSON
attr_accessor :file, :strict attr_accessor :file
# delegate all File methods to `file`. # delegate all File methods to `file`.
delegate *(File.instance_methods - MediaFile.instance_methods), to: :file delegate *(File.instance_methods - MediaFile.instance_methods), to: :file
@@ -122,11 +122,8 @@ class MediaFile
# Initialize a MediaFile from a regular File. # Initialize a MediaFile from a regular File.
# #
# @param file [File] The image file. # @param file [File] The image file.
# @param strict [Boolean] If true, raise errors if the file is corrupt. If false, def initialize(file, **options)
# try to process corrupt files without raising any errors.
def initialize(file, strict: true, **options)
@file = file @file = file
@strict = strict
end end
# @return [Array<(Integer, Integer)>] the width and height of the file # @return [Array<(Integer, Integer)>] the width and height of the file
@@ -164,7 +161,15 @@ class MediaFile
file.size file.size
end end
# @return [ExifTool::Metadata] The metadata for the file. Subclasses may override this to add
# extra non-ExifTool metadata, such as error messages, Ugoira frame delays, or ffprobe metadata.
# This metadata may be slower to calculate than the raw `exif_metadata`.
def metadata def metadata
exif_metadata
end
# @return [ExifTool::Metadata] The metadata for the file, as returned by ExifTool.
def exif_metadata
ExifTool.new(file).metadata ExifTool.new(file).metadata
end end
@@ -207,9 +212,14 @@ class MediaFile
file_ext == :swf file_ext == :swf
end end
# @return [Boolean] true if the file is corrupted in some way # @return [Boolean] True if the file is too corrupted to read or generate thumbnails without error.
def is_corrupt? def is_corrupt?
false error.present?
end
# @return [String, nil] The error message when reading the file, or nil if there are no errors.
def error
nil
end end
# @return [Boolean] true if the file is animated. Note that GIFs and PNGs may be animated. # @return [Boolean] true if the file is animated. Note that GIFs and PNGs may be animated.
@@ -278,7 +288,7 @@ class MediaFile
height: height, height: height,
file_size: file_size, file_size: file_size,
file_ext: file_ext, file_ext: file_ext,
mime_type: mime_type, mime_type: mime_type.to_s,
md5: md5, md5: md5,
is_corrupt?: is_corrupt?, is_corrupt?: is_corrupt?,
is_supported?: is_supported?, is_supported?: is_supported?,
@@ -302,5 +312,5 @@ class MediaFile
end end
end end
memoize :file_ext, :file_size, :md5, :metadata, :mime_type memoize :file_ext, :file_size, :md5, :mime_type, :exif_metadata
end end

View File

@@ -28,10 +28,22 @@ class MediaFile::Image < MediaFile
end end
def is_corrupt? def is_corrupt?
error.present?
end
def error
image = Vips::Image.new_from_file(file.path, fail: true)
image.stats image.stats
false nil
rescue Vips::Error rescue Vips::Error => e
true # XXX Vips has a single global error buffer that is shared between threads and that isn't cleared between operations.
# We can't reliably use `e.message` here because it may pick up errors from other threads, or from previous
# operations in the same thread.
"libvips error"
end
def metadata
super.merge({ "Vips:Error" => error }.compact_blank)
end end
def duration def duration
@@ -44,7 +56,7 @@ class MediaFile::Image < MediaFile
when :gif, :webp when :gif, :webp
n_pages n_pages
when :png when :png
metadata.fetch("PNG:AnimationFrames", 1) exif_metadata.fetch("PNG:AnimationFrames", 1)
when :avif when :avif
video.frame_count video.frame_count
else else
@@ -156,12 +168,12 @@ class MediaFile::Image < MediaFile
# @return [Vips::Image] the Vips image object for the file # @return [Vips::Image] the Vips image object for the file
def image def image
Vips::Image.new_from_file(file.path, fail: strict).autorot Vips::Image.new_from_file(file.path, fail: false).autorot
end end
def video def video
FFmpeg.new(file) FFmpeg.new(file)
end end
memoize :image, :video, :dimensions, :is_corrupt?, :is_animated_gif?, :is_animated_png? memoize :image, :video, :preview_frame, :dimensions, :error, :metadata, :is_corrupt?, :is_animated_gif?, :is_animated_png?
end end

View File

@@ -108,5 +108,5 @@ class MediaFile::Ugoira < MediaFile
FFmpeg.new(convert).smart_video_preview FFmpeg.new(convert).smart_video_preview
end end
memoize :zipfile, :preview_frame, :dimensions, :convert memoize :zipfile, :preview_frame, :dimensions, :convert, :metadata
end end

View File

@@ -5,7 +5,7 @@
# #
# @see https://github.com/streamio/streamio-ffmpeg # @see https://github.com/streamio/streamio-ffmpeg
class MediaFile::Video < MediaFile class MediaFile::Video < MediaFile
delegate :duration, :frame_count, :frame_rate, :has_audio?, :pix_fmt, :video_codec, :video_stream, :video_streams, :audio_streams, to: :video delegate :duration, :frame_count, :frame_rate, :has_audio?, :is_corrupt?, :pix_fmt, :video_codec, :video_stream, :video_streams, :audio_streams, :error, to: :video
def dimensions def dimensions
[video.width, video.height] [video.width, video.height]
@@ -15,10 +15,14 @@ class MediaFile::Video < MediaFile
preview_frame.preview!(max_width, max_height, **options) preview_frame.preview!(max_width, max_height, **options)
end end
def metadata
super.merge({ "FFmpeg:Error" => error }.compact_blank)
end
def is_supported? def is_supported?
return false if video_streams.size != 1 return false if video_streams.size != 1
return false if audio_streams.size > 1 return false if audio_streams.size > 1
return false if is_webm? && metadata["Matroska:DocType"] != "webm" return false if is_webm? && exif_metadata["Matroska:DocType"] != "webm"
return false if is_mp4? && !video_codec.in?(["h264", "vp9"]) return false if is_mp4? && !video_codec.in?(["h264", "vp9"])
# Only allow pixel formats supported by most browsers. Don't allow 10-bit video or 4:4:4 subsampling (neither are supported by Firefox). # Only allow pixel formats supported by most browsers. Don't allow 10-bit video or 4:4:4 subsampling (neither are supported by Firefox).
@@ -35,11 +39,6 @@ class MediaFile::Video < MediaFile
true true
end end
# True if decoding the video fails.
def is_corrupt?
video.playback_info.blank?
end
private private
def video def video
@@ -50,5 +49,5 @@ class MediaFile::Video < MediaFile
video.smart_video_preview video.smart_video_preview
end end
memoize :video, :preview_frame, :dimensions, :duration, :has_audio? memoize :video, :preview_frame, :dimensions, :metadata, :duration, :has_audio?
end end

View File

@@ -89,7 +89,7 @@ class MediaAsset < ApplicationRecord
def open_file!(&block) def open_file!(&block)
file = storage_service.open(file_path) file = storage_service.open(file_path)
frame_delays = media_asset.frame_delays if media_asset.is_ugoira? frame_delays = media_asset.frame_delays if media_asset.is_ugoira?
MediaFile.open(file, frame_delays: frame_delays, strict: false, &block) MediaFile.open(file, frame_delays: frame_delays, &block)
end end
def convert_file(media_file) def convert_file(media_file)

View File

@@ -467,10 +467,11 @@ class MediaFileTest < ActiveSupport::TestCase
@metadata = @file.metadata @metadata = @file.metadata
assert_equal(true, @file.is_corrupt?) assert_equal(true, @file.is_corrupt?)
assert_equal("libvips error", @file.error)
assert_equal([475, 600], @file.dimensions) assert_equal([475, 600], @file.dimensions)
assert_equal("File format error", @metadata["ExifTool:Error"]) assert_equal("File format error", @metadata["ExifTool:Error"])
assert_equal("89a", @metadata["GIF:GIFVersion"]) assert_equal("89a", @metadata["GIF:GIFVersion"])
assert_equal(9, @metadata.count) assert_equal(10, @metadata.count)
end end
should "not raise an exception when reading the frame count" do should "not raise an exception when reading the frame count" do
@@ -478,11 +479,12 @@ class MediaFileTest < ActiveSupport::TestCase
@metadata = @file.metadata @metadata = @file.metadata
assert_equal(true, @file.is_corrupt?) assert_equal(true, @file.is_corrupt?)
assert_equal("libvips error", @file.error)
assert_equal(nil, @file.frame_count) assert_equal(nil, @file.frame_count)
assert_equal([575, 800], @file.dimensions) assert_equal([575, 800], @file.dimensions)
assert_equal("File format error", @metadata["ExifTool:Error"]) assert_equal("File format error", @metadata["ExifTool:Error"])
assert_equal("89a", @metadata["GIF:GIFVersion"]) assert_equal("89a", @metadata["GIF:GIFVersion"])
assert_equal(9, @metadata.count) assert_equal(10, @metadata.count)
assert_nothing_raised { @file.attributes } assert_nothing_raised { @file.attributes }
end end
end end
@@ -493,8 +495,9 @@ class MediaFileTest < ActiveSupport::TestCase
@metadata = @file.metadata @metadata = @file.metadata
assert_equal(true, @file.is_corrupt?) assert_equal(true, @file.is_corrupt?)
assert_equal("libvips error", @file.error)
assert_equal("Grayscale", @metadata["PNG:ColorType"]) assert_equal("Grayscale", @metadata["PNG:ColorType"])
assert_equal(9, @metadata.count) assert_equal(10, @metadata.count)
end end
end end
@@ -504,8 +507,9 @@ class MediaFileTest < ActiveSupport::TestCase
@metadata = @file.metadata @metadata = @file.metadata
assert_equal(true, @file.is_corrupt?) assert_equal(true, @file.is_corrupt?)
assert_equal("libvips error", @file.error)
assert_equal(1, @metadata["File:ColorComponents"]) assert_equal(1, @metadata["File:ColorComponents"])
assert_equal(10, @metadata.count) assert_equal(11, @metadata.count)
end end
end end