media file: factor out file type detection code.

Factor out the file type detection code from MediaFile into a FileTypeDetector class so we can use
it to detect archive files (.zip, .rar, .7z) too.
This commit is contained in:
evazion
2022-11-14 15:46:08 -06:00
parent 0c1e9a1618
commit 5f92f452fe
3 changed files with 78 additions and 49 deletions

View File

@@ -155,7 +155,12 @@ module Danbooru
false
end
# @return [String] The archive format ("RAR", "ZIP", etc).
# @return [Symbol] The archive format as detected by us (:zip, :rar, :7z, etc).
def file_ext
@file_ext ||= FileTypeDetector.new(file).file_ext
end
# @return [String] The archive format as returned by libarchive ("RAR", "ZIP", etc).
def format
@format ||= entries.lazy.map(&:format).first
end

View File

@@ -0,0 +1,71 @@
# frozen_string_literal: true
# Detect a file's type based on its file signature.
#
# @see https://en.wikipedia.org/wiki/List_of_file_signatures
class FileTypeDetector
extend Memoist
attr_reader :file
# @param [File] The file to detect.
def initialize(file)
@file = file
end
# @return [Symbol] The file's extension (e.g. :jpg, :png, etc). Returns `:bin` if the file type is unknown.
memoize def file_ext
header = file.pread(16, 0)
case header
when /\A\xff\xd8/n
:jpg
when /\AGIF87a/, /\AGIF89a/
:gif
when /\A\x89PNG\r\n\x1a\n/n
:png
when /\ACWS/, /\AFWS/, /\AZWS/
:swf
# This detects the Matroska (.mkv) header. WebM files have a DocType of "webm", which is checked later in `MediaFile::Video#is_supported?`.
#
# https://www.rfc-editor.org/rfc/rfc8794.html#section-8.1
# https://www.webmproject.org/docs/container/
when /\A\x1a\x45\xdf\xa3/n
:webm
# https://developers.google.com/speed/webp/docs/riff_container
when /\ARIFF....WEBP/
:webp
# https://www.ftyps.com
# https://cconcolato.github.io/mp4ra/filetype.html
# https://github.com/mozilla/gecko-dev/blob/master/toolkit/components/mediasniffer/nsMediaSniffer.cpp#L78
# https://mimesniff.spec.whatwg.org/#signature-for-mp4
#
# isom (common) - MP4 Base Media v1 [IS0 14496-12:2003]
# mp42 (common) - MP4 v2 [ISO 14496-14]
# iso4 (rare) - MP4 Base Media v4
# iso5 (rare) - MP4 Base Media v5 (used by Twitter)
# 3gp5 (rare) - 3GPP Media (.3GP) Release 5 (XXX technically this should be .3gp, not .mp4. Supported by Chrome but not Firefox)
# avc1 (rare) - MP4 Base w/ AVC ext [ISO 14496-12:2005]
# M4V (rare) - Apple iTunes Video (https://en.wikipedia.org/wiki/M4V)
when /\A....ftyp(?:mp4|avc|iso|3gp5|M4V)/
:mp4
# https://aomediacodec.github.io/av1-avif/#brands-overview
when /\A....ftyp(?:avif|avis)/
:avif
when /\APK\x03\x04/
:zip
else
:bin
end
rescue EOFError
:bin
end
# @return [String] The file's MIME type, or "application/octet-stream" if unknown.
def mime_type
Mime::Type.lookup_by_extension(file_ext).to_s.presence || "application/octet-stream"
end
end

View File

@@ -64,54 +64,7 @@ class MediaFile
# @param [File] an open file
# @return [Symbol] the file's type
def self.file_ext(file)
header = file.pread(16, 0)
case header
when /\A\xff\xd8/n
:jpg
when /\AGIF87a/, /\AGIF89a/
:gif
when /\A\x89PNG\r\n\x1a\n/n
:png
when /\ACWS/, /\AFWS/, /\AZWS/
:swf
# This detects the Matroska (.mkv) header. WebM files have a DocType of "webm", which is checked later in `MediaFile::Video#is_supported?`.
#
# https://www.rfc-editor.org/rfc/rfc8794.html#section-8.1
# https://www.webmproject.org/docs/container/
when /\A\x1a\x45\xdf\xa3/n
:webm
# https://developers.google.com/speed/webp/docs/riff_container
when /\ARIFF....WEBP/
:webp
# https://www.ftyps.com
# https://cconcolato.github.io/mp4ra/filetype.html
# https://github.com/mozilla/gecko-dev/blob/master/toolkit/components/mediasniffer/nsMediaSniffer.cpp#L78
# https://mimesniff.spec.whatwg.org/#signature-for-mp4
#
# isom (common) - MP4 Base Media v1 [IS0 14496-12:2003]
# mp42 (common) - MP4 v2 [ISO 14496-14]
# iso4 (rare) - MP4 Base Media v4
# iso5 (rare) - MP4 Base Media v5 (used by Twitter)
# 3gp5 (rare) - 3GPP Media (.3GP) Release 5 (XXX technically this should be .3gp, not .mp4. Supported by Chrome but not Firefox)
# avc1 (rare) - MP4 Base w/ AVC ext [ISO 14496-12:2005]
# M4V (rare) - Apple iTunes Video (https://en.wikipedia.org/wiki/M4V)
when /\A....ftyp(?:mp4|avc|iso|3gp5|M4V)/
:mp4
# https://aomediacodec.github.io/av1-avif/#brands-overview
when /\A....ftyp(?:avif|avis)/
:avif
when /\APK\x03\x04/
:zip
else
:bin
end
rescue EOFError
:bin
FileTypeDetector.new(file).file_ext
end
# @return [Boolean] true if we can generate video previews.