Add AI tag model and UI.

Add a database model for storing AI-predicted tags, and add a UI for browsing and searching these tags.

AI tags are generated by the Danbooru Autotagger (https://github.com/danbooru/autotagger). See that
repo for details about the model.

The database schema is `ai_tags (media_asset_id integer, tag_id integer, score smallint)`. This is
designed to be as space-efficient as possible, since in production we have over 300 million
AI-generated tags (6 million images and 50 tags per post). This amounts to over 10GB in size, plus
indexes.

You can search for AI tags using e.g. `ai:scenery`. You can do `ai:scenery -scenery` to find posts
where the scenery tag is potentially missing, or `scenery -ai:scenery` to find posts that are
potentially mistagged (or more likely where the AI missed the tag).

You can browse AI tags at https://danbooru.donmai.us/ai_tags. On this page you can filter by
confidence level. You can also search unposted media assets by AI tag.

To generate tags, use the `autotag` script from the Autotagger repo, something like this:

  docker run --rm -v ~/danbooru/public/data/360x360:/images ghcr.io/danbooru/autotagger ./autotag -c -f /images | gzip > tags.csv.gz

To import tags, use the fix script in script/fixes/. Expect a Danbooru-size dataset to take
hours to days to generate tags, then 20-30 minutes to import. Currently this all has to be done by hand.
This commit is contained in:
evazion
2022-06-24 04:35:29 -05:00
parent ae9495ec7c
commit 1aeb52186e
20 changed files with 247 additions and 3 deletions

View File

@@ -0,0 +1,15 @@
# frozen_string_literal: true
class AITagsController < ApplicationController
respond_to :js, :html, :json, :xml
def index
@ai_tags = authorize AITag.visible(CurrentUser.user).paginated_search(params, count_pages: false)
@ai_tags = @ai_tags.includes(:media_asset, :tag, :post) if request.format.html?
@mode = params.fetch(:mode, "gallery")
@preview_size = params[:size].presence || cookies[:post_preview_size].presence || MediaAssetGalleryComponent::DEFAULT_SIZE
respond_with(@ai_tags)
end
end

View File

@@ -211,6 +211,8 @@ class AutocompleteService
autocomplete_favorite_group(value) autocomplete_favorite_group(value)
when :search when :search
autocomplete_saved_search_label(value) autocomplete_saved_search_label(value)
when :ai, :unaliased
autocomplete_tag(value)
when *STATIC_METATAGS.keys when *STATIC_METATAGS.keys
autocomplete_static_metatag(metatag, value) autocomplete_static_metatag(metatag, value)
else else

View File

@@ -430,7 +430,13 @@ module Searchable
end end
if model == Post && params["#{attr}_tags_match"].present? if model == Post && params["#{attr}_tags_match"].present?
relation = relation.where(attr => Post.user_tag_match(params["#{attr}_tags_match"], current_user).reorder(nil)) posts = Post.user_tag_match(params["#{attr}_tags_match"], current_user).reorder(nil)
if association.through_reflection?
relation = relation.includes(association.through_reflection.name).where(association.through_reflection.name => { attr => posts })
else
relation = relation.where(attr => posts)
end
end end
if params["has_#{attr}"].to_s.truthy? || params["has_#{attr}"].to_s.falsy? if params["has_#{attr}"].to_s.truthy? || params["has_#{attr}"].to_s.falsy?

View File

@@ -38,7 +38,7 @@ class PostQueryBuilder
ordpool note comment commentary id rating source status filetype ordpool note comment commentary id rating source status filetype
disapproved parent child search embedded md5 width height mpixels ratio disapproved parent child search embedded md5 width height mpixels ratio
score upvotes downvotes favcount filesize date age order limit tagcount pixiv_id pixiv score upvotes downvotes favcount filesize date age order limit tagcount pixiv_id pixiv
unaliased exif duration random is has unaliased exif duration random is has ai
] + COUNT_METATAGS + COUNT_METATAG_SYNONYMS + CATEGORY_COUNT_METATAGS ] + COUNT_METATAGS + COUNT_METATAG_SYNONYMS + CATEGORY_COUNT_METATAGS
ORDER_METATAGS = %w[ ORDER_METATAGS = %w[
@@ -163,6 +163,8 @@ class PostQueryBuilder
relation.tags_include(value) relation.tags_include(value)
when "exif" when "exif"
relation.exif_matches(value) relation.exif_matches(value)
when "ai"
relation.ai_tags_include(value)
when "user" when "user"
relation.uploader_matches(value) relation.uploader_matches(value)
when "approver" when "approver"

40
app/models/ai_tag.rb Normal file
View File

@@ -0,0 +1,40 @@
# frozen_string_literal: true
class AITag < ApplicationRecord
belongs_to :tag
belongs_to :media_asset
has_one :post, through: :media_asset
validates :score, inclusion: { in: (0.0..1.0) }
def self.search(params)
q = search_attributes(params, :media_asset, :tag, :post, :score)
if params[:tag_name].present?
q = q.where(tag_id: Tag.find_by_name_or_alias(params[:tag_name])&.id)
end
if params[:is_posted].to_s.truthy?
q = q.where.associated(:post)
elsif params[:is_posted].to_s.falsy?
q = q.where.missing(:post)
end
q = q.apply_default_order(params)
q
end
def self.default_order
order(media_asset_id: :desc, tag_id: :asc)
end
def correct?
if post.nil?
false
elsif tag.name =~ /\Arating:(.)\z/
post.rating == $1
else
post.has_tag?(tag.name)
end
end
end

View File

@@ -20,6 +20,7 @@ class MediaAsset < ApplicationRecord
has_many :upload_media_assets, dependent: :destroy has_many :upload_media_assets, dependent: :destroy
has_many :uploads, through: :upload_media_assets has_many :uploads, through: :upload_media_assets
has_many :uploaders, through: :uploads, class_name: "User", foreign_key: :uploader_id has_many :uploaders, through: :uploads, class_name: "User", foreign_key: :uploader_id
has_many :ai_tags
delegate :metadata, to: :media_metadata delegate :metadata, to: :media_metadata
delegate :is_non_repeating_animation?, :is_greyscale?, :is_rotated?, to: :metadata delegate :is_non_repeating_animation?, :is_greyscale?, :is_rotated?, to: :metadata

View File

@@ -1307,6 +1307,14 @@ class Post < ApplicationRecord
where(md5: metadata.select(:md5)) where(md5: metadata.select(:md5))
end end
def ai_tags_include(value)
tag = Tag.find_by_name_or_alias(value)
return none if tag.nil?
ai_tags = AITag.joins(:media_asset).where(tag: tag, score: (50..))
where(ai_tags.where("media_assets.md5 = posts.md5").arel.exists)
end
def uploader_matches(username) def uploader_matches(username)
case username.downcase case username.downcase
when "any" when "any"

View File

@@ -13,6 +13,7 @@ class Tag < ApplicationRecord
has_many :antecedent_implications, -> {active}, :class_name => "TagImplication", :foreign_key => "antecedent_name", :primary_key => "name" has_many :antecedent_implications, -> {active}, :class_name => "TagImplication", :foreign_key => "antecedent_name", :primary_key => "name"
has_many :consequent_implications, -> {active}, :class_name => "TagImplication", :foreign_key => "consequent_name", :primary_key => "name" has_many :consequent_implications, -> {active}, :class_name => "TagImplication", :foreign_key => "consequent_name", :primary_key => "name"
has_many :dtext_links, foreign_key: :link_target, primary_key: :name has_many :dtext_links, foreign_key: :link_target, primary_key: :name
has_many :ai_tags
validates :name, tag_name: true, uniqueness: true, on: :create validates :name, tag_name: true, uniqueness: true, on: :create
validates :name, tag_name: true, on: :name validates :name, tag_name: true, on: :name

View File

@@ -0,0 +1,7 @@
# frozen_string_literal: true
class AITagPolicy < ApplicationPolicy
def index?
true
end
end

View File

@@ -0,0 +1,9 @@
<%= render(MediaAssetGalleryComponent.new(size: size)) do |gallery| %>
<% ai_tags.each do |ai_tag| %>
<% if policy(ai_tag.media_asset).can_see_image? %>
<% gallery.media_asset do %>
<%= render "ai_tags/preview", ai_tag: ai_tag, media_asset: ai_tag.media_asset, size: gallery.size %>
<% end %>
<% end %>
<% end %>
<% end %>

View File

@@ -0,0 +1,14 @@
<%= render(MediaAssetPreviewComponent.new(media_asset: media_asset, size: size, link_target: media_asset.post, html: { **data_attributes_for(media_asset) })) do |preview| %>
<% preview.footer do %>
<div class="text-center text-xs h-8">
<% if media_asset.post.present? %>
<%= link_to "post ##{media_asset.post.id}", media_asset.post %>
<% end %>
<div>
<%= link_to ai_tag.tag.pretty_name, ai_tags_path(search: { tag_name: ai_tag.tag.name, **params[:search].except(:tag_name) }), class: "tag-type-#{ai_tag.tag.category}", "data-tag-name": ai_tag.tag.name %>
<%= link_to "#{ai_tag.score}%", ai_tags_path(search: { tag_name: ai_tag.tag.name, score: ">=#{ai_tag.score}", **params[:search].except(:tag_name, :score) }), class: "tag-type-#{ai_tag.tag.category}", "data-tag-name": ai_tag.tag.name %>
</div>
</div>
<% end %>
<% end %>

View File

@@ -0,0 +1,24 @@
<%= table_for @ai_tags, class: "striped autofit" do |t| %>
<% t.column :tag do |ai_tag| %>
<%= link_to_wiki "?", ai_tag.tag.name %>
<%= link_to ai_tag.tag.pretty_name, ai_tags_path(search: { tag_name: ai_tag.tag.name }), class: "tag-type-#{ai_tag.tag.category}", "data-tag-name": ai_tag.tag.name %>
<% end %>
<% t.column :asset do |ai_tag| %>
<%= link_to "asset ##{ai_tag.media_asset_id}", ai_tag.media_asset %>
<% end %>
<% t.column :post do |ai_tag| %>
<% if ai_tag.post.present? %>
<%= link_to "post ##{ai_tag.post.id}", ai_tag.post %>
<% end %>
<% end %>
<% t.column :confidence do |ai_tag| %>
<%= ai_tag.score %>%
<% end %>
<% t.column "Present?" do |ai_tag| %>
<%= "Yes" if ai_tag.correct? %>
<% end %>
<% end %>

View File

@@ -0,0 +1,37 @@
<%= render "tags/secondary_links" %>
<div id="c-ai-tags">
<div id="a-index">
<h1>AI Tags</h1>
<%= search_form_for(ai_tags_path) do |f| %>
<%= f.input :tag_name, label: "AI Tag", input_html: { value: params.dig(:search, :tag_name).presence || Tag.find_by(id: params.dig(:search, :tag_id))&.name, data: { autocomplete: "tag" } } %>
<%= f.input :post_tags_match, label: "Post Search", input_html: { value: params.dig(:search, :post_tags_match), data: { autocomplete: "tag-query" } } %>
<%= f.input :score, label: "Confidence", input_html: { value: params.dig(:search, :score) } %>
<%= f.input :is_posted, as: :hidden, input_html: { value: params.dig(:search, :is_posted) } %>
<%= f.submit "Search" %>
<% end %>
<div class="border-b mb-4 flex flex-wrap gap-4">
<%= link_to "All", current_page_path(search: search_params.to_h.without("is_posted")), class: ["inline-block p-1 pb-2", (search_params[:is_posted].nil? ? "border-current border-b-2 -mb-px" : "inactive-link")] %>
<%= link_to "Posted", current_page_path(search: { is_posted: true, **search_params }), class: ["inline-block p-1 pb-2", (search_params[:is_posted].to_s.truthy? ? "border-current border-b-2 -mb-px" : "inactive-link")] %>
<%# link_to "Unposted", current_page_path(search: { is_posted: false, **search_params }), class: ["inline-block p-1 pb-2", (search_params[:is_posted].to_s.falsy? ? "border-current border-b-2 -mb-px" : "inactive-link")] %>
<span class="flex-grow-1"></span>
<%= render PreviewSizeMenuComponent.new(current_size: @preview_size) %>
<% if @mode == "table" %>
<%= link_to grid_icon, current_page_path(mode: nil), title: "Gallery", class: "inline-block p-1 pb-2 rounded inactive-link" %>
<% else %>
<%= link_to list_icon, current_page_path(mode: "table"), title: "Table", class: "inline-block p-1 pb-2 rounded inactive-link" %>
<% end %>
</div>
<% if params[:mode] == "table" %>
<%= render "ai_tags/table" %>
<% else %>
<%= render "ai_tags/gallery", ai_tags: @ai_tags, size: @preview_size %>
<% end %>
<%= numbered_paginator(@ai_tags) %>
</div>
</div>

View File

@@ -51,6 +51,7 @@
<li><%= link_to("Aliases", tag_aliases_path) %></li> <li><%= link_to("Aliases", tag_aliases_path) %></li>
<li><%= link_to("Implications", tag_implications_path) %></li> <li><%= link_to("Implications", tag_implications_path) %></li>
<li><%= link_to("Listing", tags_path) %></li> <li><%= link_to("Listing", tags_path) %></li>
<li><%= link_to("AI Tags", ai_tags_path) %></li>
<li><%= link_to("Related Tags", related_tag_path) %></li> <li><%= link_to("Related Tags", related_tag_path) %></li>
</ul> </ul>
<ul> <ul>

View File

@@ -4,6 +4,7 @@
<%= subnav_link_to("Aliases", tag_aliases_path) %> <%= subnav_link_to("Aliases", tag_aliases_path) %>
<%= subnav_link_to("Implications", tag_implications_path) %> <%= subnav_link_to("Implications", tag_implications_path) %>
<%= subnav_link_to "Request alias/implication", new_bulk_update_request_path %> <%= subnav_link_to "Request alias/implication", new_bulk_update_request_path %>
<%= subnav_link_to "AI tags", ai_tags_path %>
<%= subnav_link_to "Related tags", related_tag_path %> <%= subnav_link_to "Related tags", related_tag_path %>
<%= subnav_link_to "Help", wiki_page_path("help:tags") %> <%= subnav_link_to "Help", wiki_page_path("help:tags") %>

View File

@@ -9,6 +9,7 @@ ActiveSupport::Inflector.inflections(:en) do |inflect|
inflect.acronym "URL" inflect.acronym "URL"
inflect.acronym "URLs" inflect.acronym "URLs"
inflect.acronym "AST" inflect.acronym "AST"
inflect.acronym "AI"
# inflect.plural /^(ox)$/i, '\1en' # inflect.plural /^(ox)$/i, '\1en'
# inflect.singular /^(ox)en/i, '\1' # inflect.singular /^(ox)en/i, '\1'
# inflect.irregular 'person', 'people' # inflect.irregular 'person', 'people'

View File

@@ -158,6 +158,7 @@ Rails.application.routes.draw do
end end
resources :media_assets, only: [:index, :show] resources :media_assets, only: [:index, :show]
resources :media_metadata, only: [:index] resources :media_metadata, only: [:index]
resources :ai_tags, only: [:index]
resources :mod_actions resources :mod_actions
resources :moderation_reports, only: [:new, :create, :index, :show, :update] resources :moderation_reports, only: [:new, :create, :index, :show, :update]
resources :modqueue, only: [:index] resources :modqueue, only: [:index]

View File

@@ -0,0 +1,13 @@
class CreateAITags < ActiveRecord::Migration[7.0]
def change
create_table :ai_tags, id: false do |t|
t.column :media_asset_id, :integer, null: false
t.column :tag_id, :integer, null: false
t.column :score, :smallint, null: false
t.index :media_asset_id
t.index :tag_id
t.index :score
end
end
end

View File

@@ -89,6 +89,17 @@ SET default_tablespace = '';
SET default_table_access_method = heap; SET default_table_access_method = heap;
--
-- Name: ai_tags; Type: TABLE; Schema: public; Owner: -
--
CREATE TABLE public.ai_tags (
media_asset_id integer NOT NULL,
tag_id integer NOT NULL,
score smallint NOT NULL
);
-- --
-- Name: api_keys; Type: TABLE; Schema: public; Owner: - -- Name: api_keys; Type: TABLE; Schema: public; Owner: -
-- --
@@ -3123,6 +3134,27 @@ ALTER TABLE ONLY public.wiki_pages
ADD CONSTRAINT wiki_pages_pkey PRIMARY KEY (id); ADD CONSTRAINT wiki_pages_pkey PRIMARY KEY (id);
--
-- Name: index_ai_tags_on_media_asset_id; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX index_ai_tags_on_media_asset_id ON public.ai_tags USING btree (media_asset_id);
--
-- Name: index_ai_tags_on_score; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX index_ai_tags_on_score ON public.ai_tags USING btree (score);
--
-- Name: index_ai_tags_on_tag_id; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX index_ai_tags_on_tag_id ON public.ai_tags USING btree (tag_id);
-- --
-- Name: index_api_keys_on_key; Type: INDEX; Schema: public; Owner: - -- Name: index_api_keys_on_key; Type: INDEX; Schema: public; Owner: -
-- --
@@ -5942,6 +5974,7 @@ INSERT INTO "schema_migrations" (version) VALUES
('20220410050628'), ('20220410050628'),
('20220504235329'), ('20220504235329'),
('20220514175125'), ('20220514175125'),
('20220525214746'); ('20220525214746'),
('20220623052547');

View File

@@ -0,0 +1,28 @@
create temporary table ai_tags_import (md5 text, tag text, score real);
\copy ai_tags_import (md5, tag, score) from program 'zcat tags.csv.gz' with (format csv, header off);
create unlogged table ai_tags_temp as (select ma.id::integer as media_asset_id, t.id::integer as tag_id, (score * 100)::smallint as score from media_assets ma join ai_tags_import mli on mli.md5 = ma.md5 join tags t on t.name = mli.tag);
alter table ai_tags_temp set logged;
create index index_ai_tags_temp_on_media_asset_id on ai_tags_temp (media_asset_id);
create index index_ai_tags_temp_on_tag_id on ai_tags_temp (tag_id);
create index index_ai_tags_temp_on_score on ai_tags_temp (score);
alter table ai_tags_temp alter column media_asset_id set not null;
alter table ai_tags_temp alter column tag_id set not null;
alter table ai_tags_temp alter column score set not null;
begin;
alter table ai_tags rename to ai_tags_old;
alter index index_ai_tags_on_media_asset_id rename to index_ai_tags_old_on_media_asset_id;
alter index index_ai_tags_on_tag_id rename to index_ai_tags_old_on_tag_id;
alter index index_ai_tags_on_score rename to index_ai_tags_old_on_score;
alter table ai_tags_temp rename to ai_tags;
alter index index_ai_tags_temp_on_media_asset_id rename to index_ai_tags_on_media_asset_id;
alter index index_ai_tags_temp_on_tag_id rename to index_ai_tags_on_tag_id;
alter index index_ai_tags_temp_on_score rename to index_ai_tags_on_score;
commit;
drop table ai_tags_old;
drop table ai_tags_import;