diff --git a/app/logical/danbooru/archive.rb b/app/logical/danbooru/archive.rb index 8c67bbaa2..2f33f9ecb 100644 --- a/app/logical/danbooru/archive.rb +++ b/app/logical/danbooru/archive.rb @@ -155,7 +155,12 @@ module Danbooru false end - # @return [String] The archive format ("RAR", "ZIP", etc). + # @return [Symbol] The archive format as detected by us (:zip, :rar, :7z, etc). + def file_ext + @file_ext ||= FileTypeDetector.new(file).file_ext + end + + # @return [String] The archive format as returned by libarchive ("RAR", "ZIP", etc). def format @format ||= entries.lazy.map(&:format).first end diff --git a/app/logical/file_type_detector.rb b/app/logical/file_type_detector.rb new file mode 100644 index 000000000..797ace84d --- /dev/null +++ b/app/logical/file_type_detector.rb @@ -0,0 +1,71 @@ +# frozen_string_literal: true + +# Detect a file's type based on its file signature. +# +# @see https://en.wikipedia.org/wiki/List_of_file_signatures +class FileTypeDetector + extend Memoist + attr_reader :file + + # @param [File] The file to detect. + def initialize(file) + @file = file + end + + # @return [Symbol] The file's extension (e.g. :jpg, :png, etc). Returns `:bin` if the file type is unknown. + memoize def file_ext + header = file.pread(16, 0) + + case header + when /\A\xff\xd8/n + :jpg + when /\AGIF87a/, /\AGIF89a/ + :gif + when /\A\x89PNG\r\n\x1a\n/n + :png + when /\ACWS/, /\AFWS/, /\AZWS/ + :swf + + # This detects the Matroska (.mkv) header. WebM files have a DocType of "webm", which is checked later in `MediaFile::Video#is_supported?`. + # + # https://www.rfc-editor.org/rfc/rfc8794.html#section-8.1 + # https://www.webmproject.org/docs/container/ + when /\A\x1a\x45\xdf\xa3/n + :webm + + # https://developers.google.com/speed/webp/docs/riff_container + when /\ARIFF....WEBP/ + :webp + + # https://www.ftyps.com + # https://cconcolato.github.io/mp4ra/filetype.html + # https://github.com/mozilla/gecko-dev/blob/master/toolkit/components/mediasniffer/nsMediaSniffer.cpp#L78 + # https://mimesniff.spec.whatwg.org/#signature-for-mp4 + # + # isom (common) - MP4 Base Media v1 [IS0 14496-12:2003] + # mp42 (common) - MP4 v2 [ISO 14496-14] + # iso4 (rare) - MP4 Base Media v4 + # iso5 (rare) - MP4 Base Media v5 (used by Twitter) + # 3gp5 (rare) - 3GPP Media (.3GP) Release 5 (XXX technically this should be .3gp, not .mp4. Supported by Chrome but not Firefox) + # avc1 (rare) - MP4 Base w/ AVC ext [ISO 14496-12:2005] + # M4V (rare) - Apple iTunes Video (https://en.wikipedia.org/wiki/M4V) + when /\A....ftyp(?:mp4|avc|iso|3gp5|M4V)/ + :mp4 + + # https://aomediacodec.github.io/av1-avif/#brands-overview + when /\A....ftyp(?:avif|avis)/ + :avif + when /\APK\x03\x04/ + :zip + else + :bin + end + rescue EOFError + :bin + end + + # @return [String] The file's MIME type, or "application/octet-stream" if unknown. + def mime_type + Mime::Type.lookup_by_extension(file_ext).to_s.presence || "application/octet-stream" + end +end diff --git a/app/logical/media_file.rb b/app/logical/media_file.rb index 3a59349e5..a00dcec55 100644 --- a/app/logical/media_file.rb +++ b/app/logical/media_file.rb @@ -64,54 +64,7 @@ class MediaFile # @param [File] an open file # @return [Symbol] the file's type def self.file_ext(file) - header = file.pread(16, 0) - - case header - when /\A\xff\xd8/n - :jpg - when /\AGIF87a/, /\AGIF89a/ - :gif - when /\A\x89PNG\r\n\x1a\n/n - :png - when /\ACWS/, /\AFWS/, /\AZWS/ - :swf - - # This detects the Matroska (.mkv) header. WebM files have a DocType of "webm", which is checked later in `MediaFile::Video#is_supported?`. - # - # https://www.rfc-editor.org/rfc/rfc8794.html#section-8.1 - # https://www.webmproject.org/docs/container/ - when /\A\x1a\x45\xdf\xa3/n - :webm - - # https://developers.google.com/speed/webp/docs/riff_container - when /\ARIFF....WEBP/ - :webp - - # https://www.ftyps.com - # https://cconcolato.github.io/mp4ra/filetype.html - # https://github.com/mozilla/gecko-dev/blob/master/toolkit/components/mediasniffer/nsMediaSniffer.cpp#L78 - # https://mimesniff.spec.whatwg.org/#signature-for-mp4 - # - # isom (common) - MP4 Base Media v1 [IS0 14496-12:2003] - # mp42 (common) - MP4 v2 [ISO 14496-14] - # iso4 (rare) - MP4 Base Media v4 - # iso5 (rare) - MP4 Base Media v5 (used by Twitter) - # 3gp5 (rare) - 3GPP Media (.3GP) Release 5 (XXX technically this should be .3gp, not .mp4. Supported by Chrome but not Firefox) - # avc1 (rare) - MP4 Base w/ AVC ext [ISO 14496-12:2005] - # M4V (rare) - Apple iTunes Video (https://en.wikipedia.org/wiki/M4V) - when /\A....ftyp(?:mp4|avc|iso|3gp5|M4V)/ - :mp4 - - # https://aomediacodec.github.io/av1-avif/#brands-overview - when /\A....ftyp(?:avif|avis)/ - :avif - when /\APK\x03\x04/ - :zip - else - :bin - end - rescue EOFError - :bin + FileTypeDetector.new(file).file_ext end # @return [Boolean] true if we can generate video previews.