From 822f72387e9109dedd540656c6faf3e5cb9d3b3d Mon Sep 17 00:00:00 2001 From: evazion Date: Wed, 15 Sep 2021 20:26:35 -0500 Subject: [PATCH] metadata: record metadata for corrupt files. Bug: if ExifTool exited with status 1 because it thought the file was corrupt, then we didn't record any of the metadata, even though it was able to read most of it. It turns out there are thousands of posts with minorly corrupt metadata that ExifTool is still able to read, but will complain about. Fix: ignore the exit code of ExifTool and always save whatever metadata ExifTool is able to return. It will return an `ExifTool:Error` tag in the event of errors. Note that there are some (many?) files that are considered corrupt by ExifTool but not by Vips, and vice versa. Probably because ExifTool only parses the metadata while Vips only parses the image data. --- app/logical/exif_tool.rb | 10 +--------- test/unit/media_file_test.rb | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/app/logical/exif_tool.rb b/app/logical/exif_tool.rb index 9b2db8a2b..f48368c76 100644 --- a/app/logical/exif_tool.rb +++ b/app/logical/exif_tool.rb @@ -4,8 +4,6 @@ require "shellwords" class ExifTool extend Memoist - class Error < StandardError; end - # @see https://exiftool.org/exiftool_pod.html#OPTIONS DEFAULT_OPTIONS = %q( -G1 -duplicates -unknown -struct --binary @@ -26,17 +24,11 @@ class ExifTool # @param options [String] the options to pass to exiftool # @return [Hash] the file's metadata def metadata(options: DEFAULT_OPTIONS) - output = shell!("exiftool #{options} -json #{file.path.shellescape}") + output = %x(exiftool #{options} -json #{file.path.shellescape}) json = JSON.parse(output).first json = json.except("SourceFile") json.with_indifferent_access end - def shell!(command) - output, status = Open3.capture2e(command) - raise Error, "#{command}` failed: #{output}" if !status.success? - output - end - memoize :metadata end diff --git a/test/unit/media_file_test.rb b/test/unit/media_file_test.rb index a9ed28065..3d6fd4b26 100644 --- a/test/unit/media_file_test.rb +++ b/test/unit/media_file_test.rb @@ -190,6 +190,40 @@ class MediaFileTest < ActiveSupport::TestCase end end + context "a corrupt GIF" do + should "still read the metadata" do + @file = MediaFile.open("test/files/test-corrupt.gif") + @metadata = @file.metadata + + assert_equal(true, @file.is_corrupt?) + assert_equal("File format error", @metadata["ExifTool:Error"]) + assert_equal("89a", @metadata["GIF:GIFVersion"]) + assert_equal(6, @metadata.count) + end + end + + context "a corrupt PNG" do + should "still read the metadata" do + @file = MediaFile.open("test/files/test-corrupt.png") + @metadata = @file.metadata + + assert_equal(true, @file.is_corrupt?) + assert_equal("Grayscale", @metadata["PNG:ColorType"]) + assert_equal(6, @metadata.count) + end + end + + context "a corrupt JPEG" do + should "still read the metadata" do + @file = MediaFile.open("test/files/test-corrupt.jpg") + @metadata = @file.metadata + + assert_equal(true, @file.is_corrupt?) + assert_equal(1, @metadata["File:ColorComponents"]) + assert_equal(7, @metadata.count) + end + end + context "a greyscale image without an embedded color profile" do should "successfully generate a thumbnail" do @image = MediaFile.open("test/files/test-grey-no-profile.jpg")