From 3d660953d43fc564a87603f63b6e9d2d4a7bc6a6 Mon Sep 17 00:00:00 2001 From: evazion Date: Tue, 7 Sep 2021 18:22:34 -0500 Subject: [PATCH] Add MediaMetadata model. Add a model for storing image and video metadata for uploaded files. Metadata is extracted using ExifTool. You will need to install ExifTool after this commit. ExifTool 12.22 is the minimum required version because we use the `--binary` option, which was added in this release. The MediaMetadata model is separate from the MediaAsset model because some files contain tons of metadata, and most of it is non-essential. The MediaAsset model represents an uploaded file and contains essential metadata, like the file's size and type, while the MediaMetadata model represents all the other non-essential metadata associated with a file. Metadata is stored as a JSON column in the database. ExifTool returns all the file's metadata, not just the EXIF metadata. EXIF is one of several types of image metadata, hence why we call it MediaMetadata instead of EXIFMetadata. --- INSTALL.debian | 1 + app/controllers/media_metadata_controller.rb | 8 +++ app/logical/exif_tool.rb | 42 ++++++++++++ app/logical/media_file.rb | 6 +- app/logical/upload_service.rb | 7 -- app/logical/upload_service/utils.rb | 2 + app/models/media_asset.rb | 13 ++++ app/models/media_metadata.rb | 22 +++++++ app/models/upload.rb | 2 + app/policies/media_metadata_policy.rb | 5 ++ config/docker/Dockerfile.danbooru | 1 + config/docker/build-base-image.sh | 15 ++++- config/routes.rb | 1 + .../20210908015203_create_media_metadata.rb | 17 +++++ db/structure.sql | 64 ++++++++++++++++++- test/factories/media_asset.rb | 1 + test/factories/media_metadata.rb | 6 ++ .../media_metadata_controller_test.rb | 14 ++++ test/test_helpers/upload_test_helper.rb | 2 + test/unit/upload_service_test.rb | 27 ++++---- 20 files changed, 235 insertions(+), 21 deletions(-) create mode 100644 app/controllers/media_metadata_controller.rb create mode 100644 app/logical/exif_tool.rb create mode 100644 app/models/media_metadata.rb create mode 100644 app/policies/media_metadata_policy.rb create mode 100644 db/migrate/20210908015203_create_media_metadata.rb create mode 100644 test/factories/media_metadata.rb create mode 100644 test/functional/media_metadata_controller_test.rb diff --git a/INSTALL.debian b/INSTALL.debian index c99201595..c4278aad7 100644 --- a/INSTALL.debian +++ b/INSTALL.debian @@ -49,6 +49,7 @@ apt-get -y install $LIBSSL_DEV_PKG build-essential automake libxml2-dev libxslt- apt-get -y install libpq-dev postgresql-client apt-get -y install liblcms2-dev $LIBJPEG_TURBO_DEV_PKG libexpat1-dev libgif-dev libpng-dev libexif-dev apt-get -y install gcc g++ +apt-get -y install exiftool curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - echo "deb https://dl.yarnpkg.com/debian/ stable main" | tee /etc/apt/sources.list.d/yarn.list diff --git a/app/controllers/media_metadata_controller.rb b/app/controllers/media_metadata_controller.rb new file mode 100644 index 000000000..f7e1e4311 --- /dev/null +++ b/app/controllers/media_metadata_controller.rb @@ -0,0 +1,8 @@ +class MediaMetadataController < ApplicationController + respond_to :json, :xml + + def index + @media_metadata = authorize MediaMetadata.visible(CurrentUser.user).paginated_search(params, count_pages: true) + respond_with(@media_metadata) + end +end diff --git a/app/logical/exif_tool.rb b/app/logical/exif_tool.rb new file mode 100644 index 000000000..9b2db8a2b --- /dev/null +++ b/app/logical/exif_tool.rb @@ -0,0 +1,42 @@ +require "shellwords" + +# A wrapper for the exiftool command. +class ExifTool + extend Memoist + + class Error < StandardError; end + + # @see https://exiftool.org/exiftool_pod.html#OPTIONS + DEFAULT_OPTIONS = %q( + -G1 -duplicates -unknown -struct --binary + -x 'System:*' -x ExifToolVersion -x FileType -x FileTypeExtension + -x MIMEType -x ImageWidth -x ImageHeight -x ImageSize -x MegaPixels + ).squish + + attr_reader :file + + # Open a file with ExifTool. + # @param file [File, String] an image or video file + def initialize(file) + @file = file.is_a?(String) ? File.open(file) : file + end + + # Get the file's metadata. + # @see https://exiftool.org/TagNames/index.html + # @param options [String] the options to pass to exiftool + # @return [Hash] the file's metadata + def metadata(options: DEFAULT_OPTIONS) + output = shell!("exiftool #{options} -json #{file.path.shellescape}") + json = JSON.parse(output).first + json = json.except("SourceFile") + json.with_indifferent_access + end + + def shell!(command) + output, status = Open3.capture2e(command) + raise Error, "#{command}` failed: #{output}" if !status.success? + output + end + + memoize :metadata +end diff --git a/app/logical/media_file.rb b/app/logical/media_file.rb index 043c7f35b..1702e0102 100644 --- a/app/logical/media_file.rb +++ b/app/logical/media_file.rb @@ -102,6 +102,10 @@ class MediaFile file.size end + def metadata + ExifTool.new(file).metadata + end + # @return [Boolean] true if the file is an image def is_image? file_ext.in?([:jpg, :png, :gif]) @@ -164,5 +168,5 @@ class MediaFile nil end - memoize :file_ext, :file_size, :md5 + memoize :file_ext, :file_size, :md5, :metadata end diff --git a/app/logical/upload_service.rb b/app/logical/upload_service.rb index 5279d0325..12e95d90f 100644 --- a/app/logical/upload_service.rb +++ b/app/logical/upload_service.rb @@ -104,13 +104,6 @@ class UploadService p.uploader_id = upload.uploader_id p.uploader_ip_addr = upload.uploader_ip_addr p.parent_id = upload.parent_id - p.media_asset = MediaAsset.new( - md5: upload.md5, - file_ext: upload.file_ext, - file_size: upload.file_size, - image_width: upload.image_width, - image_height: upload.image_height, - ) if !upload.uploader.can_upload_free? || upload.upload_as_pending? p.is_pending = true diff --git a/app/logical/upload_service/utils.rb b/app/logical/upload_service/utils.rb index bbc194f49..e82fa1950 100644 --- a/app/logical/upload_service/utils.rb +++ b/app/logical/upload_service/utils.rb @@ -62,6 +62,8 @@ class UploadService upload.tag_string = "#{upload.tag_string} #{Utils.automatic_tags(media_file)}" process_resizes(upload, file, original_post_id) + + MediaAsset.create_from_media_file!(media_file) end def automatic_tags(media_file) diff --git a/app/models/media_asset.rb b/app/models/media_asset.rb index 5dd15c7ba..6a2d8d1ae 100644 --- a/app/models/media_asset.rb +++ b/app/models/media_asset.rb @@ -1,4 +1,17 @@ class MediaAsset < ApplicationRecord + has_one :media_metadata, dependent: :destroy + + def self.create_from_media_file!(media_file) + create!( + md5: media_file.md5, + file_ext: media_file.file_ext, + file_size: media_file.file_size, + image_width: media_file.width, + image_height: media_file.height, + media_metadata: MediaMetadata.new(metadata: media_file.metadata), + ) + end + def self.search(params) q = search_attributes(params, :id, :created_at, :updated_at, :md5, :file_ext, :file_size, :image_width, :image_height) q = q.apply_default_order(params) diff --git a/app/models/media_metadata.rb b/app/models/media_metadata.rb new file mode 100644 index 000000000..d22c4e24e --- /dev/null +++ b/app/models/media_metadata.rb @@ -0,0 +1,22 @@ +# MediaMetadata represents the EXIF and other metadata associated with a +# MediaAsset (an uploaded image or video file). The `metadata` field contains a +# JSON hash of the file's metadata as returned by ExifTool. +# +# @see ExifTool +# @see https://exiftool.org/TagNames/index.html +class MediaMetadata < ApplicationRecord + self.table_name = "media_metadata" + + attribute :id + attribute :created_at + attribute :updated_at + attribute :media_asset_id + attribute :metadata + belongs_to :media_asset + + def self.search(params) + q = search_attributes(params, :id, :created_at, :updated_at, :media_asset_id) + q = q.apply_default_order(params) + q + end +end diff --git a/app/models/upload.rb b/app/models/upload.rb index 592696ee1..d926579e5 100644 --- a/app/models/upload.rb +++ b/app/models/upload.rb @@ -65,6 +65,7 @@ class Upload < ApplicationRecord belongs_to :uploader, :class_name => "User" belongs_to :post, optional: true + has_one :media_asset, foreign_key: :md5, primary_key: :md5 before_validation :initialize_attributes, on: :create before_validation :assign_rating_from_tags @@ -114,6 +115,7 @@ class Upload < ApplicationRecord return end + media_asset.destroy! DanbooruLogger.info("Uploads: Deleting files for upload md5=#{md5}") Danbooru.config.storage_manager.delete_file(nil, md5, file_ext, :original) Danbooru.config.storage_manager.delete_file(nil, md5, file_ext, :large) diff --git a/app/policies/media_metadata_policy.rb b/app/policies/media_metadata_policy.rb new file mode 100644 index 000000000..fe0039160 --- /dev/null +++ b/app/policies/media_metadata_policy.rb @@ -0,0 +1,5 @@ +class MediaMetadataPolicy < ApplicationPolicy + def index? + true + end +end diff --git a/config/docker/Dockerfile.danbooru b/config/docker/Dockerfile.danbooru index 9aab5c33d..d2844a00f 100644 --- a/config/docker/Dockerfile.danbooru +++ b/config/docker/Dockerfile.danbooru @@ -2,6 +2,7 @@ ARG RUBY_VERSION=2.7.1 ARG NODE_VERSION=14.15.5 ARG VIPS_VERSION=8.10.6 ARG FFMPEG_VERSION=4.3.2 +ARG EXIFTOOL_VERSION=12.30 diff --git a/config/docker/build-base-image.sh b/config/docker/build-base-image.sh index f3a805596..abf7e9a78 100755 --- a/config/docker/build-base-image.sh +++ b/config/docker/build-base-image.sh @@ -5,6 +5,7 @@ set -xeuo pipefail RUBY_VERSION="${RUBY_VERSION:-2.7.1}" VIPS_VERSION="${VIPS_VERSION:-8.10.6}" FFMPEG_VERSION="${FFMPEG_VERSION:-4.3.2}" +EXIFTOOL_VERSION="${EXIFTOOL_VERSION:-12.30}" COMMON_BUILD_DEPS=" curl ca-certificates build-essential pkg-config git @@ -59,6 +60,17 @@ install_ffmpeg() { ffprobe -version } +install_exiftool() { + EXIFTOOL_URL="https://github.com/exiftool/exiftool/archive/refs/tags/${EXIFTOOL_VERSION}.tar.gz" + curl -L "$EXIFTOOL_URL" | tar -C /usr/local/src -xzvf - + cd /usr/local/src/exiftool-${EXIFTOOL_VERSION} + + perl Makefile.PL + make -j "$(nproc)" install + + exiftool -ver +} + install_ruby() { apt_install $RUBY_BUILD_DEPS @@ -73,7 +85,7 @@ cleanup() { apt-get purge -y $RUBY_BUILD_DEPS $VIPS_BUILD_DEPS $FFMPEG_BUILD_DEPS apt-get purge -y --allow-remove-essential \ build-essential pkg-config e2fsprogs git libglib2.0-bin libglib2.0-doc \ - mount perl-modules-5.30 procps python3 readline-common shared-mime-info tzdata + mount procps python3 readline-common shared-mime-info tzdata apt-get autoremove -y rm -rf \ @@ -91,6 +103,7 @@ cleanup() { apt-get update apt_install $COMMON_BUILD_DEPS $DANBOORU_RUNTIME_DEPS install_asdf +install_exiftool install_ffmpeg install_vips install_ruby diff --git a/config/routes.rb b/config/routes.rb index c63eb34f6..0bead6648 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -155,6 +155,7 @@ Rails.application.routes.draw do end end resources :media_assets, only: [:index] + resources :media_metadata, only: [:index] resources :mod_actions resources :moderation_reports, only: [:new, :create, :index, :show] resources :modqueue, only: [:index] diff --git a/db/migrate/20210908015203_create_media_metadata.rb b/db/migrate/20210908015203_create_media_metadata.rb new file mode 100644 index 000000000..f0a3be190 --- /dev/null +++ b/db/migrate/20210908015203_create_media_metadata.rb @@ -0,0 +1,17 @@ +class CreateMediaMetadata < ActiveRecord::Migration[6.1] + def change + create_table :media_metadata do |t| + t.timestamps null: false + + t.references :media_asset, null: false, index: { unique: true } + t.jsonb :metadata, null: false, default: '{}' + t.index :metadata, using: "gin" + end + + reversible do |dir| + dir.up do + execute "INSERT INTO media_metadata (created_at, updated_at, media_asset_id, metadata) SELECT created_at, updated_at, id, '{}' FROM media_assets ORDER BY id ASC" + end + end + end +end diff --git a/db/structure.sql b/db/structure.sql index cadf7455a..1c40ccca7 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -2496,6 +2496,38 @@ CREATE SEQUENCE public.media_assets_id_seq ALTER SEQUENCE public.media_assets_id_seq OWNED BY public.media_assets.id; +-- +-- Name: media_metadata; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.media_metadata ( + id bigint NOT NULL, + created_at timestamp(6) without time zone NOT NULL, + updated_at timestamp(6) without time zone NOT NULL, + media_asset_id bigint NOT NULL, + metadata jsonb DEFAULT '"{}"'::jsonb NOT NULL +); + + +-- +-- Name: media_metadata_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.media_metadata_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: media_metadata_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.media_metadata_id_seq OWNED BY public.media_metadata.id; + + -- -- Name: mod_actions; Type: TABLE; Schema: public; Owner: - -- @@ -4321,6 +4353,13 @@ ALTER TABLE ONLY public.ip_geolocations ALTER COLUMN id SET DEFAULT nextval('pub ALTER TABLE ONLY public.media_assets ALTER COLUMN id SET DEFAULT nextval('public.media_assets_id_seq'::regclass); +-- +-- Name: media_metadata id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.media_metadata ALTER COLUMN id SET DEFAULT nextval('public.media_metadata_id_seq'::regclass); + + -- -- Name: mod_actions id; Type: DEFAULT; Schema: public; Owner: - -- @@ -4709,6 +4748,14 @@ ALTER TABLE ONLY public.media_assets ADD CONSTRAINT media_assets_pkey PRIMARY KEY (id); +-- +-- Name: media_metadata media_metadata_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.media_metadata + ADD CONSTRAINT media_metadata_pkey PRIMARY KEY (id); + + -- -- Name: mod_actions mod_actions_pkey; Type: CONSTRAINT; Schema: public; Owner: - -- @@ -7034,6 +7081,20 @@ CREATE INDEX index_media_assets_on_md5 ON public.media_assets USING btree (md5); CREATE INDEX index_media_assets_on_updated_at ON public.media_assets USING btree (updated_at); +-- +-- Name: index_media_metadata_on_media_asset_id; Type: INDEX; Schema: public; Owner: - +-- + +CREATE UNIQUE INDEX index_media_metadata_on_media_asset_id ON public.media_metadata USING btree (media_asset_id); + + +-- +-- Name: index_media_metadata_on_metadata; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX index_media_metadata_on_metadata ON public.media_metadata USING gin (metadata); + + -- -- Name: index_mod_actions_on_created_at; Type: INDEX; Schema: public; Owner: - -- @@ -8115,6 +8176,7 @@ INSERT INTO "schema_migrations" (version) VALUES ('20210310221248'), ('20210330003356'), ('20210330093133'), -('20210901230931'); +('20210901230931'), +('20210908015203'); diff --git a/test/factories/media_asset.rb b/test/factories/media_asset.rb index 9fe45e086..e1d087964 100644 --- a/test/factories/media_asset.rb +++ b/test/factories/media_asset.rb @@ -5,5 +5,6 @@ FactoryBot.define do file_size { 1_000_000 } image_width { 1000 } image_height { 1000 } + media_metadata { build(:media_metadata, media_asset: instance) } end end diff --git a/test/factories/media_metadata.rb b/test/factories/media_metadata.rb new file mode 100644 index 000000000..19521133d --- /dev/null +++ b/test/factories/media_metadata.rb @@ -0,0 +1,6 @@ +FactoryBot.define do + factory(:media_metadata) do + media_asset { build(:media_asset, media_metadata: instance) } + metadata { MediaFile.open("test/files/test.jpg").metadata } + end +end diff --git a/test/functional/media_metadata_controller_test.rb b/test/functional/media_metadata_controller_test.rb new file mode 100644 index 000000000..1fb9e9313 --- /dev/null +++ b/test/functional/media_metadata_controller_test.rb @@ -0,0 +1,14 @@ +require 'test_helper' + +class MediaMetadataControllerTest < ActionDispatch::IntegrationTest + context "The media metadata controller" do + context "index action" do + should "render" do + create(:media_metadata) + get media_metadata_path, as: :json + + assert_response :success + end + end + end +end diff --git a/test/test_helpers/upload_test_helper.rb b/test/test_helpers/upload_test_helper.rb index ef7937780..fc05a9989 100644 --- a/test/test_helpers/upload_test_helper.rb +++ b/test/test_helpers/upload_test_helper.rb @@ -32,6 +32,8 @@ module UploadTestHelper assert_equal("completed", upload.status) assert_equal(Post.last, upload.post) assert_equal(upload.post.md5, upload.md5) + assert_not_nil(upload.media_asset) + assert_operator(upload.media_asset.media_metadata.metadata.count, :>=, 1) upload end diff --git a/test/unit/upload_service_test.rb b/test/unit/upload_service_test.rb index 89c8859b2..ea818716f 100644 --- a/test/unit/upload_service_test.rb +++ b/test/unit/upload_service_test.rb @@ -85,6 +85,22 @@ class UploadServiceTest < ActiveSupport::TestCase assert_equal(335, @upload.image_height) assert_equal(500, @upload.image_width) end + + should "create a media asset" do + UploadService::Utils.expects(:distribute_files).times(3) + UploadService::Utils.process_file(@upload, @upload.file.tempfile) + + @media_asset = @upload.media_asset + assert_not_nil(@media_asset) + assert_equal("ecef68c44edb8a0d6a3070b5f8e8ee76", @media_asset.md5) + assert_equal("jpg", @media_asset.file_ext) + assert_equal(28086, @media_asset.file_size) + assert_equal(500, @media_asset.image_width) + assert_equal(335, @media_asset.image_height) + + metadata = @media_asset.media_metadata.metadata + assert_equal(91, metadata.count) + end end end @@ -935,17 +951,6 @@ class UploadServiceTest < ActiveSupport::TestCase assert_equal([], post.errors.full_messages) assert_not_nil(post.id) end - - should "create a media asset" do - post = subject.new({}).create_post_from_upload(@upload) - - assert_not_nil(post.media_asset) - assert_equal("12345", post.media_asset.md5) - assert_equal("jpg", post.media_asset.file_ext) - assert_equal(1000, post.media_asset.file_size) - assert_equal(100, post.media_asset.image_width) - assert_equal(100, post.media_asset.image_height) - end end end