Add MediaMetadata model.

Add a model for storing image and video metadata for uploaded files.

Metadata is extracted using ExifTool. You will need to install ExifTool
after this commit. ExifTool 12.22 is the minimum required version
because we use the `--binary` option, which was added in this release.

The MediaMetadata model is separate from the MediaAsset model because
some files contain tons of metadata, and most of it is non-essential.
The MediaAsset model represents an uploaded file and contains essential
metadata, like the file's size and type, while the MediaMetadata model
represents all the other non-essential metadata associated with a file.

Metadata is stored as a JSON column in the database.

ExifTool returns all the file's metadata, not just the EXIF metadata.
EXIF is one of several types of image metadata, hence why we call
it MediaMetadata instead of EXIFMetadata.
This commit is contained in:
evazion
2021-09-07 18:22:34 -05:00
parent 291758ddb7
commit 3d660953d4
20 changed files with 235 additions and 21 deletions

View File

@@ -49,6 +49,7 @@ apt-get -y install $LIBSSL_DEV_PKG build-essential automake libxml2-dev libxslt-
apt-get -y install libpq-dev postgresql-client apt-get -y install libpq-dev postgresql-client
apt-get -y install liblcms2-dev $LIBJPEG_TURBO_DEV_PKG libexpat1-dev libgif-dev libpng-dev libexif-dev apt-get -y install liblcms2-dev $LIBJPEG_TURBO_DEV_PKG libexpat1-dev libgif-dev libpng-dev libexif-dev
apt-get -y install gcc g++ apt-get -y install gcc g++
apt-get -y install exiftool
curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add -
echo "deb https://dl.yarnpkg.com/debian/ stable main" | tee /etc/apt/sources.list.d/yarn.list echo "deb https://dl.yarnpkg.com/debian/ stable main" | tee /etc/apt/sources.list.d/yarn.list

View File

@@ -0,0 +1,8 @@
class MediaMetadataController < ApplicationController
respond_to :json, :xml
def index
@media_metadata = authorize MediaMetadata.visible(CurrentUser.user).paginated_search(params, count_pages: true)
respond_with(@media_metadata)
end
end

42
app/logical/exif_tool.rb Normal file
View File

@@ -0,0 +1,42 @@
require "shellwords"
# A wrapper for the exiftool command.
class ExifTool
extend Memoist
class Error < StandardError; end
# @see https://exiftool.org/exiftool_pod.html#OPTIONS
DEFAULT_OPTIONS = %q(
-G1 -duplicates -unknown -struct --binary
-x 'System:*' -x ExifToolVersion -x FileType -x FileTypeExtension
-x MIMEType -x ImageWidth -x ImageHeight -x ImageSize -x MegaPixels
).squish
attr_reader :file
# Open a file with ExifTool.
# @param file [File, String] an image or video file
def initialize(file)
@file = file.is_a?(String) ? File.open(file) : file
end
# Get the file's metadata.
# @see https://exiftool.org/TagNames/index.html
# @param options [String] the options to pass to exiftool
# @return [Hash] the file's metadata
def metadata(options: DEFAULT_OPTIONS)
output = shell!("exiftool #{options} -json #{file.path.shellescape}")
json = JSON.parse(output).first
json = json.except("SourceFile")
json.with_indifferent_access
end
def shell!(command)
output, status = Open3.capture2e(command)
raise Error, "#{command}` failed: #{output}" if !status.success?
output
end
memoize :metadata
end

View File

@@ -102,6 +102,10 @@ class MediaFile
file.size file.size
end end
def metadata
ExifTool.new(file).metadata
end
# @return [Boolean] true if the file is an image # @return [Boolean] true if the file is an image
def is_image? def is_image?
file_ext.in?([:jpg, :png, :gif]) file_ext.in?([:jpg, :png, :gif])
@@ -164,5 +168,5 @@ class MediaFile
nil nil
end end
memoize :file_ext, :file_size, :md5 memoize :file_ext, :file_size, :md5, :metadata
end end

View File

@@ -104,13 +104,6 @@ class UploadService
p.uploader_id = upload.uploader_id p.uploader_id = upload.uploader_id
p.uploader_ip_addr = upload.uploader_ip_addr p.uploader_ip_addr = upload.uploader_ip_addr
p.parent_id = upload.parent_id p.parent_id = upload.parent_id
p.media_asset = MediaAsset.new(
md5: upload.md5,
file_ext: upload.file_ext,
file_size: upload.file_size,
image_width: upload.image_width,
image_height: upload.image_height,
)
if !upload.uploader.can_upload_free? || upload.upload_as_pending? if !upload.uploader.can_upload_free? || upload.upload_as_pending?
p.is_pending = true p.is_pending = true

View File

@@ -62,6 +62,8 @@ class UploadService
upload.tag_string = "#{upload.tag_string} #{Utils.automatic_tags(media_file)}" upload.tag_string = "#{upload.tag_string} #{Utils.automatic_tags(media_file)}"
process_resizes(upload, file, original_post_id) process_resizes(upload, file, original_post_id)
MediaAsset.create_from_media_file!(media_file)
end end
def automatic_tags(media_file) def automatic_tags(media_file)

View File

@@ -1,4 +1,17 @@
class MediaAsset < ApplicationRecord class MediaAsset < ApplicationRecord
has_one :media_metadata, dependent: :destroy
def self.create_from_media_file!(media_file)
create!(
md5: media_file.md5,
file_ext: media_file.file_ext,
file_size: media_file.file_size,
image_width: media_file.width,
image_height: media_file.height,
media_metadata: MediaMetadata.new(metadata: media_file.metadata),
)
end
def self.search(params) def self.search(params)
q = search_attributes(params, :id, :created_at, :updated_at, :md5, :file_ext, :file_size, :image_width, :image_height) q = search_attributes(params, :id, :created_at, :updated_at, :md5, :file_ext, :file_size, :image_width, :image_height)
q = q.apply_default_order(params) q = q.apply_default_order(params)

View File

@@ -0,0 +1,22 @@
# MediaMetadata represents the EXIF and other metadata associated with a
# MediaAsset (an uploaded image or video file). The `metadata` field contains a
# JSON hash of the file's metadata as returned by ExifTool.
#
# @see ExifTool
# @see https://exiftool.org/TagNames/index.html
class MediaMetadata < ApplicationRecord
self.table_name = "media_metadata"
attribute :id
attribute :created_at
attribute :updated_at
attribute :media_asset_id
attribute :metadata
belongs_to :media_asset
def self.search(params)
q = search_attributes(params, :id, :created_at, :updated_at, :media_asset_id)
q = q.apply_default_order(params)
q
end
end

View File

@@ -65,6 +65,7 @@ class Upload < ApplicationRecord
belongs_to :uploader, :class_name => "User" belongs_to :uploader, :class_name => "User"
belongs_to :post, optional: true belongs_to :post, optional: true
has_one :media_asset, foreign_key: :md5, primary_key: :md5
before_validation :initialize_attributes, on: :create before_validation :initialize_attributes, on: :create
before_validation :assign_rating_from_tags before_validation :assign_rating_from_tags
@@ -114,6 +115,7 @@ class Upload < ApplicationRecord
return return
end end
media_asset.destroy!
DanbooruLogger.info("Uploads: Deleting files for upload md5=#{md5}") DanbooruLogger.info("Uploads: Deleting files for upload md5=#{md5}")
Danbooru.config.storage_manager.delete_file(nil, md5, file_ext, :original) Danbooru.config.storage_manager.delete_file(nil, md5, file_ext, :original)
Danbooru.config.storage_manager.delete_file(nil, md5, file_ext, :large) Danbooru.config.storage_manager.delete_file(nil, md5, file_ext, :large)

View File

@@ -0,0 +1,5 @@
class MediaMetadataPolicy < ApplicationPolicy
def index?
true
end
end

View File

@@ -2,6 +2,7 @@ ARG RUBY_VERSION=2.7.1
ARG NODE_VERSION=14.15.5 ARG NODE_VERSION=14.15.5
ARG VIPS_VERSION=8.10.6 ARG VIPS_VERSION=8.10.6
ARG FFMPEG_VERSION=4.3.2 ARG FFMPEG_VERSION=4.3.2
ARG EXIFTOOL_VERSION=12.30

View File

@@ -5,6 +5,7 @@ set -xeuo pipefail
RUBY_VERSION="${RUBY_VERSION:-2.7.1}" RUBY_VERSION="${RUBY_VERSION:-2.7.1}"
VIPS_VERSION="${VIPS_VERSION:-8.10.6}" VIPS_VERSION="${VIPS_VERSION:-8.10.6}"
FFMPEG_VERSION="${FFMPEG_VERSION:-4.3.2}" FFMPEG_VERSION="${FFMPEG_VERSION:-4.3.2}"
EXIFTOOL_VERSION="${EXIFTOOL_VERSION:-12.30}"
COMMON_BUILD_DEPS=" COMMON_BUILD_DEPS="
curl ca-certificates build-essential pkg-config git curl ca-certificates build-essential pkg-config git
@@ -59,6 +60,17 @@ install_ffmpeg() {
ffprobe -version ffprobe -version
} }
install_exiftool() {
EXIFTOOL_URL="https://github.com/exiftool/exiftool/archive/refs/tags/${EXIFTOOL_VERSION}.tar.gz"
curl -L "$EXIFTOOL_URL" | tar -C /usr/local/src -xzvf -
cd /usr/local/src/exiftool-${EXIFTOOL_VERSION}
perl Makefile.PL
make -j "$(nproc)" install
exiftool -ver
}
install_ruby() { install_ruby() {
apt_install $RUBY_BUILD_DEPS apt_install $RUBY_BUILD_DEPS
@@ -73,7 +85,7 @@ cleanup() {
apt-get purge -y $RUBY_BUILD_DEPS $VIPS_BUILD_DEPS $FFMPEG_BUILD_DEPS apt-get purge -y $RUBY_BUILD_DEPS $VIPS_BUILD_DEPS $FFMPEG_BUILD_DEPS
apt-get purge -y --allow-remove-essential \ apt-get purge -y --allow-remove-essential \
build-essential pkg-config e2fsprogs git libglib2.0-bin libglib2.0-doc \ build-essential pkg-config e2fsprogs git libglib2.0-bin libglib2.0-doc \
mount perl-modules-5.30 procps python3 readline-common shared-mime-info tzdata mount procps python3 readline-common shared-mime-info tzdata
apt-get autoremove -y apt-get autoremove -y
rm -rf \ rm -rf \
@@ -91,6 +103,7 @@ cleanup() {
apt-get update apt-get update
apt_install $COMMON_BUILD_DEPS $DANBOORU_RUNTIME_DEPS apt_install $COMMON_BUILD_DEPS $DANBOORU_RUNTIME_DEPS
install_asdf install_asdf
install_exiftool
install_ffmpeg install_ffmpeg
install_vips install_vips
install_ruby install_ruby

View File

@@ -155,6 +155,7 @@ Rails.application.routes.draw do
end end
end end
resources :media_assets, only: [:index] resources :media_assets, only: [:index]
resources :media_metadata, only: [:index]
resources :mod_actions resources :mod_actions
resources :moderation_reports, only: [:new, :create, :index, :show] resources :moderation_reports, only: [:new, :create, :index, :show]
resources :modqueue, only: [:index] resources :modqueue, only: [:index]

View File

@@ -0,0 +1,17 @@
class CreateMediaMetadata < ActiveRecord::Migration[6.1]
def change
create_table :media_metadata do |t|
t.timestamps null: false
t.references :media_asset, null: false, index: { unique: true }
t.jsonb :metadata, null: false, default: '{}'
t.index :metadata, using: "gin"
end
reversible do |dir|
dir.up do
execute "INSERT INTO media_metadata (created_at, updated_at, media_asset_id, metadata) SELECT created_at, updated_at, id, '{}' FROM media_assets ORDER BY id ASC"
end
end
end
end

View File

@@ -2496,6 +2496,38 @@ CREATE SEQUENCE public.media_assets_id_seq
ALTER SEQUENCE public.media_assets_id_seq OWNED BY public.media_assets.id; ALTER SEQUENCE public.media_assets_id_seq OWNED BY public.media_assets.id;
--
-- Name: media_metadata; Type: TABLE; Schema: public; Owner: -
--
CREATE TABLE public.media_metadata (
id bigint NOT NULL,
created_at timestamp(6) without time zone NOT NULL,
updated_at timestamp(6) without time zone NOT NULL,
media_asset_id bigint NOT NULL,
metadata jsonb DEFAULT '"{}"'::jsonb NOT NULL
);
--
-- Name: media_metadata_id_seq; Type: SEQUENCE; Schema: public; Owner: -
--
CREATE SEQUENCE public.media_metadata_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
--
-- Name: media_metadata_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: -
--
ALTER SEQUENCE public.media_metadata_id_seq OWNED BY public.media_metadata.id;
-- --
-- Name: mod_actions; Type: TABLE; Schema: public; Owner: - -- Name: mod_actions; Type: TABLE; Schema: public; Owner: -
-- --
@@ -4321,6 +4353,13 @@ ALTER TABLE ONLY public.ip_geolocations ALTER COLUMN id SET DEFAULT nextval('pub
ALTER TABLE ONLY public.media_assets ALTER COLUMN id SET DEFAULT nextval('public.media_assets_id_seq'::regclass); ALTER TABLE ONLY public.media_assets ALTER COLUMN id SET DEFAULT nextval('public.media_assets_id_seq'::regclass);
--
-- Name: media_metadata id; Type: DEFAULT; Schema: public; Owner: -
--
ALTER TABLE ONLY public.media_metadata ALTER COLUMN id SET DEFAULT nextval('public.media_metadata_id_seq'::regclass);
-- --
-- Name: mod_actions id; Type: DEFAULT; Schema: public; Owner: - -- Name: mod_actions id; Type: DEFAULT; Schema: public; Owner: -
-- --
@@ -4709,6 +4748,14 @@ ALTER TABLE ONLY public.media_assets
ADD CONSTRAINT media_assets_pkey PRIMARY KEY (id); ADD CONSTRAINT media_assets_pkey PRIMARY KEY (id);
--
-- Name: media_metadata media_metadata_pkey; Type: CONSTRAINT; Schema: public; Owner: -
--
ALTER TABLE ONLY public.media_metadata
ADD CONSTRAINT media_metadata_pkey PRIMARY KEY (id);
-- --
-- Name: mod_actions mod_actions_pkey; Type: CONSTRAINT; Schema: public; Owner: - -- Name: mod_actions mod_actions_pkey; Type: CONSTRAINT; Schema: public; Owner: -
-- --
@@ -7034,6 +7081,20 @@ CREATE INDEX index_media_assets_on_md5 ON public.media_assets USING btree (md5);
CREATE INDEX index_media_assets_on_updated_at ON public.media_assets USING btree (updated_at); CREATE INDEX index_media_assets_on_updated_at ON public.media_assets USING btree (updated_at);
--
-- Name: index_media_metadata_on_media_asset_id; Type: INDEX; Schema: public; Owner: -
--
CREATE UNIQUE INDEX index_media_metadata_on_media_asset_id ON public.media_metadata USING btree (media_asset_id);
--
-- Name: index_media_metadata_on_metadata; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX index_media_metadata_on_metadata ON public.media_metadata USING gin (metadata);
-- --
-- Name: index_mod_actions_on_created_at; Type: INDEX; Schema: public; Owner: - -- Name: index_mod_actions_on_created_at; Type: INDEX; Schema: public; Owner: -
-- --
@@ -8115,6 +8176,7 @@ INSERT INTO "schema_migrations" (version) VALUES
('20210310221248'), ('20210310221248'),
('20210330003356'), ('20210330003356'),
('20210330093133'), ('20210330093133'),
('20210901230931'); ('20210901230931'),
('20210908015203');

View File

@@ -5,5 +5,6 @@ FactoryBot.define do
file_size { 1_000_000 } file_size { 1_000_000 }
image_width { 1000 } image_width { 1000 }
image_height { 1000 } image_height { 1000 }
media_metadata { build(:media_metadata, media_asset: instance) }
end end
end end

View File

@@ -0,0 +1,6 @@
FactoryBot.define do
factory(:media_metadata) do
media_asset { build(:media_asset, media_metadata: instance) }
metadata { MediaFile.open("test/files/test.jpg").metadata }
end
end

View File

@@ -0,0 +1,14 @@
require 'test_helper'
class MediaMetadataControllerTest < ActionDispatch::IntegrationTest
context "The media metadata controller" do
context "index action" do
should "render" do
create(:media_metadata)
get media_metadata_path, as: :json
assert_response :success
end
end
end
end

View File

@@ -32,6 +32,8 @@ module UploadTestHelper
assert_equal("completed", upload.status) assert_equal("completed", upload.status)
assert_equal(Post.last, upload.post) assert_equal(Post.last, upload.post)
assert_equal(upload.post.md5, upload.md5) assert_equal(upload.post.md5, upload.md5)
assert_not_nil(upload.media_asset)
assert_operator(upload.media_asset.media_metadata.metadata.count, :>=, 1)
upload upload
end end

View File

@@ -85,6 +85,22 @@ class UploadServiceTest < ActiveSupport::TestCase
assert_equal(335, @upload.image_height) assert_equal(335, @upload.image_height)
assert_equal(500, @upload.image_width) assert_equal(500, @upload.image_width)
end end
should "create a media asset" do
UploadService::Utils.expects(:distribute_files).times(3)
UploadService::Utils.process_file(@upload, @upload.file.tempfile)
@media_asset = @upload.media_asset
assert_not_nil(@media_asset)
assert_equal("ecef68c44edb8a0d6a3070b5f8e8ee76", @media_asset.md5)
assert_equal("jpg", @media_asset.file_ext)
assert_equal(28086, @media_asset.file_size)
assert_equal(500, @media_asset.image_width)
assert_equal(335, @media_asset.image_height)
metadata = @media_asset.media_metadata.metadata
assert_equal(91, metadata.count)
end
end end
end end
@@ -935,17 +951,6 @@ class UploadServiceTest < ActiveSupport::TestCase
assert_equal([], post.errors.full_messages) assert_equal([], post.errors.full_messages)
assert_not_nil(post.id) assert_not_nil(post.id)
end end
should "create a media asset" do
post = subject.new({}).create_post_from_upload(@upload)
assert_not_nil(post.media_asset)
assert_equal("12345", post.media_asset.md5)
assert_equal("jpg", post.media_asset.file_ext)
assert_equal(1000, post.media_asset.file_size)
assert_equal(100, post.media_asset.image_width)
assert_equal(100, post.media_asset.image_height)
end
end end
end end