uploads: allow searching uploads and media assets by metatag.

Allow searching the /uploads and /media_assets pages by the following metatags:

* id:
* md5:
* width:
* height:
* duration:
* mpixels:
* ratio:
* filesize:
* filetype:
* date:
* age:
* status:<processing|active|deleted|expunged|failed> (for /media_assets)
* status:<pending|processing|active|failed> (for /uploads)
* is:<filetype>, is:<status>
* exif:

Examples:

* https://betabooru.donmai.us/media_assets?search[ai_tags_match]=filetype:png
* https://betabooru.donmai.us/uploads?search[ai_tags_match]=filetype:png

Note that in /uploads search, the id:, date:, and age: metatags refer to the upload media asset, not
the upload itself.

Note also that uploads may contain multiple assets, so for example searching uploads by
`filetype:png` will return all uploads containing at least one PNG file, even if they contain other
non-PNG files.
This commit is contained in:
evazion
2022-12-07 00:53:04 -06:00
parent 062a67086e
commit 2c33539be7
8 changed files with 232 additions and 18 deletions

View File

@@ -1,8 +1,7 @@
# frozen_string_literal: true
# An AITagQuery is a tag search performed on media assets using AI tags. Only
# basic tags are allowed, no metatags.
class AITagQuery
# A MediaAssetQuery is a tag search performed on media assets (or upload media assets) using AI tags.
class MediaAssetQuery
extend Memoist
attr_reader :search_string
@@ -36,7 +35,7 @@ class AITagQuery
ai_tag = AITag.named(node.name).where(score: score_range)
relation.where(ai_tag.where(AITag.arel_table[:media_asset_id].eq(relation.arel_table[foreign_key])).arel.exists)
in :metatag
relation.none
metatag_matches(node.name, node.value, relation)
in :wildcard
relation.none
in :not
@@ -54,4 +53,37 @@ class AITagQuery
end
end
end
def metatag_matches(name, value, relation)
case name
when "id"
relation.attribute_matches(value, :id)
when "md5"
relation.attribute_matches(value, "media_assets.md5", :md5)
when "width"
relation.attribute_matches(value, "media_assets.image_width")
when "height"
relation.attribute_matches(value, "media_assets.image_height")
when "duration"
relation.attribute_matches(value, "media_assets.duration", :float)
when "mpixels"
relation.attribute_matches(value, "(media_assets.image_width * media_assets.image_height) / 1000000.0", :float)
when "ratio"
relation.attribute_matches(value, "ROUND(media_assets.image_width::numeric / media_assets.image_height::numeric, 2)", :ratio)
when "filesize"
relation.attribute_matches(value, "media_assets.file_size", :filesize)
when "filetype"
relation.attribute_matches(value, "media_assets.file_ext", :enum)
when "date"
relation.attribute_matches(value, :created_at, :date)
when "age"
relation.attribute_matches(value, :created_at, :age)
when "status"
relation.attribute_matches(value, :status, :enum)
when "is"
relation.is_matches(value)
when "exif"
relation.exif_matches(value)
end
end
end

View File

@@ -204,7 +204,30 @@ class MediaAsset < ApplicationRecord
concerning :SearchMethods do
class_methods do
def ai_tags_match(tag_string, score_range: (50..))
AITagQuery.search(tag_string, relation: self, score_range: score_range)
MediaAssetQuery.search(tag_string, relation: self, score_range: score_range)
end
def is_matches(value)
case value.downcase
when *MediaAsset.statuses.keys
where(status: value)
when *FILE_TYPES
attribute_matches(value, :file_ext, :enum)
else
none
end
end
def exif_matches(string)
# string = File:ColorComponents=3
if string.include?("=")
key, value = string.split(/=/, 2)
hash = { key => value }
joins(:media_metadata).where_json_contains("media_metadata.metadata", hash)
# string = File:ColorComponents
else
joins(:media_metadata).where_json_has_key("media_metadata.metadata", string)
end
end
def search(params, current_user)

View File

@@ -1284,17 +1284,7 @@ class Post < ApplicationRecord
end
def exif_matches(string)
# string = exif:File:ColorComponents=3
if string.include?("=")
key, value = string.split(/=/, 2)
hash = { key => value }
metadata = MediaMetadata.joins(:media_asset).where_json_contains(:metadata, hash)
# string = exif:File:ColorComponents
else
metadata = MediaMetadata.joins(:media_asset).where_json_has_key(:metadata, string)
end
where(md5: metadata.select(:md5))
where(md5: MediaAsset.exif_matches(string).select(:md5))
end
def ai_tags_include(value, default_confidence: ">=50")

View File

@@ -126,7 +126,7 @@ class Upload < ApplicationRecord
end
def self.ai_tags_match(tag_string, score_range: (50..))
upload_media_assets = AITagQuery.search(tag_string, relation: UploadMediaAsset.all, foreign_key: :media_asset_id, score_range: score_range)
upload_media_assets = MediaAssetQuery.search(tag_string, relation: UploadMediaAsset.joins(:media_asset), foreign_key: :media_asset_id, score_range: score_range)
where(upload_media_assets.where("upload_media_assets.upload_id = uploads.id").arel.exists)
end

View File

@@ -41,6 +41,21 @@ class UploadMediaAsset < ApplicationRecord
expired.update_all(status: :failed, error: "Stuck processing for more than 4 hours")
end
def self.is_matches(value)
case value.downcase
when *UploadMediaAsset.statuses.keys
where(status: value)
when *MediaAsset::FILE_TYPES
attribute_matches(value, :file_ext, :enum)
else
none
end
end
def self.exif_matches(string)
merge(MediaAsset.exif_matches(string))
end
def self.search(params, current_user)
q = search_attributes(params, [:id, :created_at, :updated_at, :status, :source_url, :page_url, :error, :upload, :media_asset, :post], current_user: current_user)

View File

@@ -1,6 +1,6 @@
FactoryBot.define do
factory(:media_asset) do
md5 { SecureRandom.hex(32) }
md5 { SecureRandom.hex(16) }
file_ext { "jpg" }
file_size { 1_000_000 }
image_width { 1000 }

View File

@@ -0,0 +1,74 @@
require 'test_helper'
class MediaAssetTest < ActiveSupport::TestCase
def assert_tag_match(assets, query)
assert_equal(assets.map(&:id), MediaAsset.ai_tags_match(query).order(id: :desc).pluck("id"))
end
context "MediaAsset" do
context "searching" do
setup do
@asset1 = create(:media_asset, image_width: 720, image_height: 1280, file_size: 1.megabyte, file_ext: "jpg", created_at: Time.zone.now, media_metadata: build(:media_metadata, metadata: { "File:FileType" => "JPEG" }))
@asset2 = create(:media_asset, image_width: 1920, image_height: 1080, file_size: 2.megabytes, file_ext: "png", duration: 3.0, created_at: Time.parse("2022-01-01"), media_metadata: build(:media_metadata, metadata: { "File:FileType" => "PNG" }))
end
should "return assets for the id: metatag" do
assert_tag_match([@asset1], "id:#{@asset1.id}")
end
should "return assets for the md5: metatag" do
assert_tag_match([@asset1], "md5:#{@asset1.md5}")
end
should "return assets for the width: metatag" do
assert_tag_match([@asset1], "width:#{@asset1.image_width}")
end
should "return assets for the height: metatag" do
assert_tag_match([@asset1], "height:#{@asset1.image_height}")
end
should "return assets for the duration: metatag" do
assert_tag_match([@asset2], "duration:3")
end
should "return assets for the mpixels: metatag" do
assert_tag_match([@asset1], "mpixels:#{(@asset1.image_width * @asset1.image_height) / 1_000_000.0}")
end
should "return assets for the ratio: metatag" do
assert_tag_match([@asset1], "ratio:#{@asset1.image_width.to_f / @asset1.image_height}")
end
should "return assets for the filesize: metatag" do
assert_tag_match([@asset1], "filesize:1mb")
end
should "return assets for the filetype: metatag" do
assert_tag_match([@asset1], "filetype:jpg")
end
should "return assets for the date: tag" do
assert_tag_match([@asset2], "date:2022-01-01")
end
should "return assets for the age: tag" do
assert_tag_match([@asset1], "age:<1minute")
end
should "return assets for the status: tag" do
assert_tag_match([@asset2, @asset1], "status:active")
end
should "return assets for the is: tag" do
assert_tag_match([@asset1], "is:jpg")
assert_tag_match([@asset2, @asset1], "is:active")
end
should "return assets for the exif: tag" do
assert_tag_match([@asset2, @asset1], "exif:File:FileType")
assert_tag_match([@asset1], "exif:File:FileType=JPEG")
end
end
end
end

80
test/unit/upload_test.rb Normal file
View File

@@ -0,0 +1,80 @@
require 'test_helper'
class UploadTest < ActiveSupport::TestCase
def assert_tag_match(uploads, query)
assert_equal(uploads.map(&:id), Upload.ai_tags_match(query).order(id: :desc).pluck("id"))
end
context "Upload" do
context "searching" do
setup do
@asset1 = create(:media_asset, image_width: 720, image_height: 1280, file_size: 1.megabyte, file_ext: "jpg", media_metadata: build(:media_metadata, metadata: { "File:FileType" => "JPEG" }))
@asset2 = create(:media_asset, image_width: 1920, image_height: 1080, file_size: 2.megabytes, file_ext: "png", duration: 3.0, media_metadata: build(:media_metadata, metadata: { "File:FileType" => "PNG" }))
@uma1 = build(:upload_media_asset, media_asset: @asset1, status: "active", created_at: Time.zone.now)
@uma2 = build(:upload_media_asset, media_asset: @asset2, status: "active", created_at: Time.parse("2022-01-01"))
@upload1 = create(:upload, created_at: Time.zone.now, upload_media_assets: [@uma1])
@upload2 = create(:upload, created_at: Time.parse("2022-01-01"), upload_media_assets: [@uma2])
end
should "return assets for the id: metatag" do
assert_tag_match([@upload1], "id:#{@upload1.upload_media_assets.sole.id}")
end
should "return assets for the md5: metatag" do
assert_tag_match([@upload1], "md5:#{@asset1.md5}")
end
should "return assets for the width: metatag" do
assert_tag_match([@upload1], "width:#{@asset1.image_width}")
end
should "return assets for the height: metatag" do
assert_tag_match([@upload1], "height:#{@asset1.image_height}")
end
should "return assets for the duration: metatag" do
assert_tag_match([@upload2], "duration:3")
end
should "return assets for the mpixels: metatag" do
assert_tag_match([@upload1], "mpixels:#{(@asset1.image_width * @asset1.image_height) / 1_000_000.0}")
end
should "return assets for the ratio: metatag" do
assert_tag_match([@upload1], "ratio:#{@asset1.image_width.to_f / @asset1.image_height}")
end
should "return assets for the filesize: metatag" do
assert_tag_match([@upload1], "filesize:1mb")
end
should "return assets for the filetype: metatag" do
assert_tag_match([@upload1], "filetype:jpg")
end
should "return assets for the date: tag" do
assert_tag_match([@upload2], "date:2022-01-01")
end
should "return assets for the age: tag" do
assert_tag_match([@upload1], "age:<1minute")
end
should "return assets for the status: tag" do
assert_tag_match([@upload2, @upload1], "status:active")
end
should "return assets for the is: tag" do
assert_tag_match([@upload1], "is:jpg")
assert_tag_match([@upload2, @upload1], "is:active")
end
should "return assets for the exif: tag" do
assert_tag_match([@upload2, @upload1], "exif:File:FileType")
assert_tag_match([@upload1], "exif:File:FileType=JPEG")
end
end
end
end