From 3172031caae1c4398f6d3fd112c5de717ffaef07 Mon Sep 17 00:00:00 2001 From: evazion Date: Wed, 2 Nov 2022 14:55:07 -0500 Subject: [PATCH] media assets: track corrupted files in media metadata. If a media asset is corrupt, include the error message from libvips or ffmpeg in the "Vips:Error" or "FFmpeg:Error" fields in the media metadata table. Corrupt files can't be uploaded nowadays, but they could be in the past, so we have some old corrupted files that we can't generate thumbnails for. This lets us mark these files in the metadata so they're findable with the tag search `exif:Vips:Error`. Known bug: Vips has a single global error buffer that is shared between threads and that isn't cleared between operations. So we can't reliably get the actual error message because it may pick up errors from other threads, or from previous operations in the same thread. --- app/logical/ffmpeg.rb | 18 ++++++++++++++---- app/logical/media_file.rb | 28 +++++++++++++++++++--------- app/logical/media_file/image.rb | 24 ++++++++++++++++++------ app/logical/media_file/ugoira.rb | 2 +- app/logical/media_file/video.rb | 15 +++++++-------- app/models/media_asset.rb | 2 +- test/unit/media_file_test.rb | 12 ++++++++---- 7 files changed, 68 insertions(+), 33 deletions(-) diff --git a/app/logical/ffmpeg.rb b/app/logical/ffmpeg.rb index fc4612bb2..3424d3f3b 100644 --- a/app/logical/ffmpeg.rb +++ b/app/logical/ffmpeg.rb @@ -41,8 +41,8 @@ class FFmpeg output = shell!("ffprobe -v quiet -print_format json -show_format -show_streams #{file.path.shellescape}") json = JSON.parse(output) json.with_indifferent_access - rescue Error - {} + rescue Error => e + { error: e.message.strip }.with_indifferent_access end def width @@ -107,6 +107,16 @@ class FFmpeg audio_streams.present? end + # @return [Boolean] True if the video is unplayable. + def is_corrupt? + error.present? + end + + # @return [String, nil] The error message if the video is unplayable, or nil if no error. + def error + metadata[:error] || playback_info[:error] + end + # Decode the full video and return a hash containing the frame count, fps, and runtime. def playback_info output = shell!("ffmpeg -i #{file.path.shellescape} -f null /dev/null") @@ -117,7 +127,7 @@ class FFmpeg info = status_line.scan(/\S+=\s*\S+/).map { |pair| pair.split(/=\s*/) }.to_h info.with_indifferent_access rescue Error => e - {} + { error: e.message.strip }.with_indifferent_access end def shell!(command) @@ -127,5 +137,5 @@ class FFmpeg output end - memoize :metadata, :playback_info, :frame_count, :duration + memoize :metadata, :playback_info, :frame_count, :duration, :error end diff --git a/app/logical/media_file.rb b/app/logical/media_file.rb index 1d41e97be..3a59349e5 100644 --- a/app/logical/media_file.rb +++ b/app/logical/media_file.rb @@ -10,7 +10,7 @@ class MediaFile extend Memoist include ActiveModel::Serializers::JSON - attr_accessor :file, :strict + attr_accessor :file # delegate all File methods to `file`. delegate *(File.instance_methods - MediaFile.instance_methods), to: :file @@ -122,11 +122,8 @@ class MediaFile # Initialize a MediaFile from a regular File. # # @param file [File] The image file. - # @param strict [Boolean] If true, raise errors if the file is corrupt. If false, - # try to process corrupt files without raising any errors. - def initialize(file, strict: true, **options) + def initialize(file, **options) @file = file - @strict = strict end # @return [Array<(Integer, Integer)>] the width and height of the file @@ -164,7 +161,15 @@ class MediaFile file.size end + # @return [ExifTool::Metadata] The metadata for the file. Subclasses may override this to add + # extra non-ExifTool metadata, such as error messages, Ugoira frame delays, or ffprobe metadata. + # This metadata may be slower to calculate than the raw `exif_metadata`. def metadata + exif_metadata + end + + # @return [ExifTool::Metadata] The metadata for the file, as returned by ExifTool. + def exif_metadata ExifTool.new(file).metadata end @@ -207,9 +212,14 @@ class MediaFile file_ext == :swf end - # @return [Boolean] true if the file is corrupted in some way + # @return [Boolean] True if the file is too corrupted to read or generate thumbnails without error. def is_corrupt? - false + error.present? + end + + # @return [String, nil] The error message when reading the file, or nil if there are no errors. + def error + nil end # @return [Boolean] true if the file is animated. Note that GIFs and PNGs may be animated. @@ -278,7 +288,7 @@ class MediaFile height: height, file_size: file_size, file_ext: file_ext, - mime_type: mime_type, + mime_type: mime_type.to_s, md5: md5, is_corrupt?: is_corrupt?, is_supported?: is_supported?, @@ -302,5 +312,5 @@ class MediaFile end end - memoize :file_ext, :file_size, :md5, :metadata, :mime_type + memoize :file_ext, :file_size, :md5, :mime_type, :exif_metadata end diff --git a/app/logical/media_file/image.rb b/app/logical/media_file/image.rb index 0eee1bea5..9566c68d2 100644 --- a/app/logical/media_file/image.rb +++ b/app/logical/media_file/image.rb @@ -28,10 +28,22 @@ class MediaFile::Image < MediaFile end def is_corrupt? + error.present? + end + + def error + image = Vips::Image.new_from_file(file.path, fail: true) image.stats - false - rescue Vips::Error - true + nil + rescue Vips::Error => e + # XXX Vips has a single global error buffer that is shared between threads and that isn't cleared between operations. + # We can't reliably use `e.message` here because it may pick up errors from other threads, or from previous + # operations in the same thread. + "libvips error" + end + + def metadata + super.merge({ "Vips:Error" => error }.compact_blank) end def duration @@ -44,7 +56,7 @@ class MediaFile::Image < MediaFile when :gif, :webp n_pages when :png - metadata.fetch("PNG:AnimationFrames", 1) + exif_metadata.fetch("PNG:AnimationFrames", 1) when :avif video.frame_count else @@ -156,12 +168,12 @@ class MediaFile::Image < MediaFile # @return [Vips::Image] the Vips image object for the file def image - Vips::Image.new_from_file(file.path, fail: strict).autorot + Vips::Image.new_from_file(file.path, fail: false).autorot end def video FFmpeg.new(file) end - memoize :image, :video, :dimensions, :is_corrupt?, :is_animated_gif?, :is_animated_png? + memoize :image, :video, :preview_frame, :dimensions, :error, :metadata, :is_corrupt?, :is_animated_gif?, :is_animated_png? end diff --git a/app/logical/media_file/ugoira.rb b/app/logical/media_file/ugoira.rb index 3c7f1e925..0ef45fa93 100644 --- a/app/logical/media_file/ugoira.rb +++ b/app/logical/media_file/ugoira.rb @@ -108,5 +108,5 @@ class MediaFile::Ugoira < MediaFile FFmpeg.new(convert).smart_video_preview end - memoize :zipfile, :preview_frame, :dimensions, :convert + memoize :zipfile, :preview_frame, :dimensions, :convert, :metadata end diff --git a/app/logical/media_file/video.rb b/app/logical/media_file/video.rb index 0986ed615..07d39ebaa 100644 --- a/app/logical/media_file/video.rb +++ b/app/logical/media_file/video.rb @@ -5,7 +5,7 @@ # # @see https://github.com/streamio/streamio-ffmpeg class MediaFile::Video < MediaFile - delegate :duration, :frame_count, :frame_rate, :has_audio?, :pix_fmt, :video_codec, :video_stream, :video_streams, :audio_streams, to: :video + delegate :duration, :frame_count, :frame_rate, :has_audio?, :is_corrupt?, :pix_fmt, :video_codec, :video_stream, :video_streams, :audio_streams, :error, to: :video def dimensions [video.width, video.height] @@ -15,10 +15,14 @@ class MediaFile::Video < MediaFile preview_frame.preview!(max_width, max_height, **options) end + def metadata + super.merge({ "FFmpeg:Error" => error }.compact_blank) + end + def is_supported? return false if video_streams.size != 1 return false if audio_streams.size > 1 - return false if is_webm? && metadata["Matroska:DocType"] != "webm" + return false if is_webm? && exif_metadata["Matroska:DocType"] != "webm" return false if is_mp4? && !video_codec.in?(["h264", "vp9"]) # Only allow pixel formats supported by most browsers. Don't allow 10-bit video or 4:4:4 subsampling (neither are supported by Firefox). @@ -35,11 +39,6 @@ class MediaFile::Video < MediaFile true end - # True if decoding the video fails. - def is_corrupt? - video.playback_info.blank? - end - private def video @@ -50,5 +49,5 @@ class MediaFile::Video < MediaFile video.smart_video_preview end - memoize :video, :preview_frame, :dimensions, :duration, :has_audio? + memoize :video, :preview_frame, :dimensions, :metadata, :duration, :has_audio? end diff --git a/app/models/media_asset.rb b/app/models/media_asset.rb index d99b5fb4e..14a38085a 100644 --- a/app/models/media_asset.rb +++ b/app/models/media_asset.rb @@ -89,7 +89,7 @@ class MediaAsset < ApplicationRecord def open_file!(&block) file = storage_service.open(file_path) frame_delays = media_asset.frame_delays if media_asset.is_ugoira? - MediaFile.open(file, frame_delays: frame_delays, strict: false, &block) + MediaFile.open(file, frame_delays: frame_delays, &block) end def convert_file(media_file) diff --git a/test/unit/media_file_test.rb b/test/unit/media_file_test.rb index e84bfcd87..b54636340 100644 --- a/test/unit/media_file_test.rb +++ b/test/unit/media_file_test.rb @@ -467,10 +467,11 @@ class MediaFileTest < ActiveSupport::TestCase @metadata = @file.metadata assert_equal(true, @file.is_corrupt?) + assert_equal("libvips error", @file.error) assert_equal([475, 600], @file.dimensions) assert_equal("File format error", @metadata["ExifTool:Error"]) assert_equal("89a", @metadata["GIF:GIFVersion"]) - assert_equal(9, @metadata.count) + assert_equal(10, @metadata.count) end should "not raise an exception when reading the frame count" do @@ -478,11 +479,12 @@ class MediaFileTest < ActiveSupport::TestCase @metadata = @file.metadata assert_equal(true, @file.is_corrupt?) + assert_equal("libvips error", @file.error) assert_equal(nil, @file.frame_count) assert_equal([575, 800], @file.dimensions) assert_equal("File format error", @metadata["ExifTool:Error"]) assert_equal("89a", @metadata["GIF:GIFVersion"]) - assert_equal(9, @metadata.count) + assert_equal(10, @metadata.count) assert_nothing_raised { @file.attributes } end end @@ -493,8 +495,9 @@ class MediaFileTest < ActiveSupport::TestCase @metadata = @file.metadata assert_equal(true, @file.is_corrupt?) + assert_equal("libvips error", @file.error) assert_equal("Grayscale", @metadata["PNG:ColorType"]) - assert_equal(9, @metadata.count) + assert_equal(10, @metadata.count) end end @@ -504,8 +507,9 @@ class MediaFileTest < ActiveSupport::TestCase @metadata = @file.metadata assert_equal(true, @file.is_corrupt?) + assert_equal("libvips error", @file.error) assert_equal(1, @metadata["File:ColorComponents"]) - assert_equal(10, @metadata.count) + assert_equal(11, @metadata.count) end end