metadata: record metadata for corrupt files.
Bug: if ExifTool exited with status 1 because it thought the file was corrupt, then we didn't record any of the metadata, even though it was able to read most of it. It turns out there are thousands of posts with minorly corrupt metadata that ExifTool is still able to read, but will complain about. Fix: ignore the exit code of ExifTool and always save whatever metadata ExifTool is able to return. It will return an `ExifTool:Error` tag in the event of errors. Note that there are some (many?) files that are considered corrupt by ExifTool but not by Vips, and vice versa. Probably because ExifTool only parses the metadata while Vips only parses the image data.
This commit is contained in:
@@ -4,8 +4,6 @@ require "shellwords"
|
||||
class ExifTool
|
||||
extend Memoist
|
||||
|
||||
class Error < StandardError; end
|
||||
|
||||
# @see https://exiftool.org/exiftool_pod.html#OPTIONS
|
||||
DEFAULT_OPTIONS = %q(
|
||||
-G1 -duplicates -unknown -struct --binary
|
||||
@@ -26,17 +24,11 @@ class ExifTool
|
||||
# @param options [String] the options to pass to exiftool
|
||||
# @return [Hash] the file's metadata
|
||||
def metadata(options: DEFAULT_OPTIONS)
|
||||
output = shell!("exiftool #{options} -json #{file.path.shellescape}")
|
||||
output = %x(exiftool #{options} -json #{file.path.shellescape})
|
||||
json = JSON.parse(output).first
|
||||
json = json.except("SourceFile")
|
||||
json.with_indifferent_access
|
||||
end
|
||||
|
||||
def shell!(command)
|
||||
output, status = Open3.capture2e(command)
|
||||
raise Error, "#{command}` failed: #{output}" if !status.success?
|
||||
output
|
||||
end
|
||||
|
||||
memoize :metadata
|
||||
end
|
||||
|
||||
@@ -190,6 +190,40 @@ class MediaFileTest < ActiveSupport::TestCase
|
||||
end
|
||||
end
|
||||
|
||||
context "a corrupt GIF" do
|
||||
should "still read the metadata" do
|
||||
@file = MediaFile.open("test/files/test-corrupt.gif")
|
||||
@metadata = @file.metadata
|
||||
|
||||
assert_equal(true, @file.is_corrupt?)
|
||||
assert_equal("File format error", @metadata["ExifTool:Error"])
|
||||
assert_equal("89a", @metadata["GIF:GIFVersion"])
|
||||
assert_equal(6, @metadata.count)
|
||||
end
|
||||
end
|
||||
|
||||
context "a corrupt PNG" do
|
||||
should "still read the metadata" do
|
||||
@file = MediaFile.open("test/files/test-corrupt.png")
|
||||
@metadata = @file.metadata
|
||||
|
||||
assert_equal(true, @file.is_corrupt?)
|
||||
assert_equal("Grayscale", @metadata["PNG:ColorType"])
|
||||
assert_equal(6, @metadata.count)
|
||||
end
|
||||
end
|
||||
|
||||
context "a corrupt JPEG" do
|
||||
should "still read the metadata" do
|
||||
@file = MediaFile.open("test/files/test-corrupt.jpg")
|
||||
@metadata = @file.metadata
|
||||
|
||||
assert_equal(true, @file.is_corrupt?)
|
||||
assert_equal(1, @metadata["File:ColorComponents"])
|
||||
assert_equal(7, @metadata.count)
|
||||
end
|
||||
end
|
||||
|
||||
context "a greyscale image without an embedded color profile" do
|
||||
should "successfully generate a thumbnail" do
|
||||
@image = MediaFile.open("test/files/test-grey-no-profile.jpg")
|
||||
|
||||
Reference in New Issue
Block a user