From 1aeb52186e80348eba61c76e1fd4dbfbf47a1ade Mon Sep 17 00:00:00 2001 From: evazion Date: Fri, 24 Jun 2022 04:35:29 -0500 Subject: [PATCH] Add AI tag model and UI. Add a database model for storing AI-predicted tags, and add a UI for browsing and searching these tags. AI tags are generated by the Danbooru Autotagger (https://github.com/danbooru/autotagger). See that repo for details about the model. The database schema is `ai_tags (media_asset_id integer, tag_id integer, score smallint)`. This is designed to be as space-efficient as possible, since in production we have over 300 million AI-generated tags (6 million images and 50 tags per post). This amounts to over 10GB in size, plus indexes. You can search for AI tags using e.g. `ai:scenery`. You can do `ai:scenery -scenery` to find posts where the scenery tag is potentially missing, or `scenery -ai:scenery` to find posts that are potentially mistagged (or more likely where the AI missed the tag). You can browse AI tags at https://danbooru.donmai.us/ai_tags. On this page you can filter by confidence level. You can also search unposted media assets by AI tag. To generate tags, use the `autotag` script from the Autotagger repo, something like this: docker run --rm -v ~/danbooru/public/data/360x360:/images ghcr.io/danbooru/autotagger ./autotag -c -f /images | gzip > tags.csv.gz To import tags, use the fix script in script/fixes/. Expect a Danbooru-size dataset to take hours to days to generate tags, then 20-30 minutes to import. Currently this all has to be done by hand. --- app/controllers/ai_tags_controller.rb | 15 ++++++++ app/logical/autocomplete_service.rb | 2 ++ app/logical/concerns/searchable.rb | 8 ++++- app/logical/post_query_builder.rb | 4 ++- app/models/ai_tag.rb | 40 +++++++++++++++++++++ app/models/media_asset.rb | 1 + app/models/post.rb | 8 +++++ app/models/tag.rb | 1 + app/policies/ai_tag_policy.rb | 7 ++++ app/views/ai_tags/_gallery.html.erb | 9 +++++ app/views/ai_tags/_preview.html.erb | 14 ++++++++ app/views/ai_tags/_table.html.erb | 24 +++++++++++++ app/views/ai_tags/index.html.erb | 37 +++++++++++++++++++ app/views/static/site_map.html.erb | 1 + app/views/tags/_secondary_links.html.erb | 1 + config/initializers/inflections.rb | 1 + config/routes.rb | 1 + db/migrate/20220623052547_create_ai_tags.rb | 13 +++++++ db/structure.sql | 35 +++++++++++++++++- script/fixes/112_import_ai_tags.sql | 28 +++++++++++++++ 20 files changed, 247 insertions(+), 3 deletions(-) create mode 100644 app/controllers/ai_tags_controller.rb create mode 100644 app/models/ai_tag.rb create mode 100644 app/policies/ai_tag_policy.rb create mode 100644 app/views/ai_tags/_gallery.html.erb create mode 100644 app/views/ai_tags/_preview.html.erb create mode 100644 app/views/ai_tags/_table.html.erb create mode 100644 app/views/ai_tags/index.html.erb create mode 100644 db/migrate/20220623052547_create_ai_tags.rb create mode 100755 script/fixes/112_import_ai_tags.sql diff --git a/app/controllers/ai_tags_controller.rb b/app/controllers/ai_tags_controller.rb new file mode 100644 index 000000000..ed028dcdb --- /dev/null +++ b/app/controllers/ai_tags_controller.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +class AITagsController < ApplicationController + respond_to :js, :html, :json, :xml + + def index + @ai_tags = authorize AITag.visible(CurrentUser.user).paginated_search(params, count_pages: false) + @ai_tags = @ai_tags.includes(:media_asset, :tag, :post) if request.format.html? + + @mode = params.fetch(:mode, "gallery") + @preview_size = params[:size].presence || cookies[:post_preview_size].presence || MediaAssetGalleryComponent::DEFAULT_SIZE + + respond_with(@ai_tags) + end +end diff --git a/app/logical/autocomplete_service.rb b/app/logical/autocomplete_service.rb index 58c756f82..5cf34cc6a 100644 --- a/app/logical/autocomplete_service.rb +++ b/app/logical/autocomplete_service.rb @@ -211,6 +211,8 @@ class AutocompleteService autocomplete_favorite_group(value) when :search autocomplete_saved_search_label(value) + when :ai, :unaliased + autocomplete_tag(value) when *STATIC_METATAGS.keys autocomplete_static_metatag(metatag, value) else diff --git a/app/logical/concerns/searchable.rb b/app/logical/concerns/searchable.rb index b91fe8643..232ffbb9f 100644 --- a/app/logical/concerns/searchable.rb +++ b/app/logical/concerns/searchable.rb @@ -430,7 +430,13 @@ module Searchable end if model == Post && params["#{attr}_tags_match"].present? - relation = relation.where(attr => Post.user_tag_match(params["#{attr}_tags_match"], current_user).reorder(nil)) + posts = Post.user_tag_match(params["#{attr}_tags_match"], current_user).reorder(nil) + + if association.through_reflection? + relation = relation.includes(association.through_reflection.name).where(association.through_reflection.name => { attr => posts }) + else + relation = relation.where(attr => posts) + end end if params["has_#{attr}"].to_s.truthy? || params["has_#{attr}"].to_s.falsy? diff --git a/app/logical/post_query_builder.rb b/app/logical/post_query_builder.rb index de602fd96..340c19b5c 100644 --- a/app/logical/post_query_builder.rb +++ b/app/logical/post_query_builder.rb @@ -38,7 +38,7 @@ class PostQueryBuilder ordpool note comment commentary id rating source status filetype disapproved parent child search embedded md5 width height mpixels ratio score upvotes downvotes favcount filesize date age order limit tagcount pixiv_id pixiv - unaliased exif duration random is has + unaliased exif duration random is has ai ] + COUNT_METATAGS + COUNT_METATAG_SYNONYMS + CATEGORY_COUNT_METATAGS ORDER_METATAGS = %w[ @@ -163,6 +163,8 @@ class PostQueryBuilder relation.tags_include(value) when "exif" relation.exif_matches(value) + when "ai" + relation.ai_tags_include(value) when "user" relation.uploader_matches(value) when "approver" diff --git a/app/models/ai_tag.rb b/app/models/ai_tag.rb new file mode 100644 index 000000000..753769343 --- /dev/null +++ b/app/models/ai_tag.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +class AITag < ApplicationRecord + belongs_to :tag + belongs_to :media_asset + has_one :post, through: :media_asset + + validates :score, inclusion: { in: (0.0..1.0) } + + def self.search(params) + q = search_attributes(params, :media_asset, :tag, :post, :score) + + if params[:tag_name].present? + q = q.where(tag_id: Tag.find_by_name_or_alias(params[:tag_name])&.id) + end + + if params[:is_posted].to_s.truthy? + q = q.where.associated(:post) + elsif params[:is_posted].to_s.falsy? + q = q.where.missing(:post) + end + + q = q.apply_default_order(params) + q + end + + def self.default_order + order(media_asset_id: :desc, tag_id: :asc) + end + + def correct? + if post.nil? + false + elsif tag.name =~ /\Arating:(.)\z/ + post.rating == $1 + else + post.has_tag?(tag.name) + end + end +end diff --git a/app/models/media_asset.rb b/app/models/media_asset.rb index 8f77b1d94..a54ab154d 100644 --- a/app/models/media_asset.rb +++ b/app/models/media_asset.rb @@ -20,6 +20,7 @@ class MediaAsset < ApplicationRecord has_many :upload_media_assets, dependent: :destroy has_many :uploads, through: :upload_media_assets has_many :uploaders, through: :uploads, class_name: "User", foreign_key: :uploader_id + has_many :ai_tags delegate :metadata, to: :media_metadata delegate :is_non_repeating_animation?, :is_greyscale?, :is_rotated?, to: :metadata diff --git a/app/models/post.rb b/app/models/post.rb index da7ba4701..94a8b9fa6 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -1307,6 +1307,14 @@ class Post < ApplicationRecord where(md5: metadata.select(:md5)) end + def ai_tags_include(value) + tag = Tag.find_by_name_or_alias(value) + return none if tag.nil? + + ai_tags = AITag.joins(:media_asset).where(tag: tag, score: (50..)) + where(ai_tags.where("media_assets.md5 = posts.md5").arel.exists) + end + def uploader_matches(username) case username.downcase when "any" diff --git a/app/models/tag.rb b/app/models/tag.rb index 79f1e058e..ad642511c 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -13,6 +13,7 @@ class Tag < ApplicationRecord has_many :antecedent_implications, -> {active}, :class_name => "TagImplication", :foreign_key => "antecedent_name", :primary_key => "name" has_many :consequent_implications, -> {active}, :class_name => "TagImplication", :foreign_key => "consequent_name", :primary_key => "name" has_many :dtext_links, foreign_key: :link_target, primary_key: :name + has_many :ai_tags validates :name, tag_name: true, uniqueness: true, on: :create validates :name, tag_name: true, on: :name diff --git a/app/policies/ai_tag_policy.rb b/app/policies/ai_tag_policy.rb new file mode 100644 index 000000000..debd7d4fd --- /dev/null +++ b/app/policies/ai_tag_policy.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +class AITagPolicy < ApplicationPolicy + def index? + true + end +end diff --git a/app/views/ai_tags/_gallery.html.erb b/app/views/ai_tags/_gallery.html.erb new file mode 100644 index 000000000..0f0e48587 --- /dev/null +++ b/app/views/ai_tags/_gallery.html.erb @@ -0,0 +1,9 @@ +<%= render(MediaAssetGalleryComponent.new(size: size)) do |gallery| %> + <% ai_tags.each do |ai_tag| %> + <% if policy(ai_tag.media_asset).can_see_image? %> + <% gallery.media_asset do %> + <%= render "ai_tags/preview", ai_tag: ai_tag, media_asset: ai_tag.media_asset, size: gallery.size %> + <% end %> + <% end %> + <% end %> +<% end %> diff --git a/app/views/ai_tags/_preview.html.erb b/app/views/ai_tags/_preview.html.erb new file mode 100644 index 000000000..8e1762947 --- /dev/null +++ b/app/views/ai_tags/_preview.html.erb @@ -0,0 +1,14 @@ +<%= render(MediaAssetPreviewComponent.new(media_asset: media_asset, size: size, link_target: media_asset.post, html: { **data_attributes_for(media_asset) })) do |preview| %> + <% preview.footer do %> +
+ <% if media_asset.post.present? %> + <%= link_to "post ##{media_asset.post.id}", media_asset.post %> + <% end %> + +
+ <%= link_to ai_tag.tag.pretty_name, ai_tags_path(search: { tag_name: ai_tag.tag.name, **params[:search].except(:tag_name) }), class: "tag-type-#{ai_tag.tag.category}", "data-tag-name": ai_tag.tag.name %> + <%= link_to "#{ai_tag.score}%", ai_tags_path(search: { tag_name: ai_tag.tag.name, score: ">=#{ai_tag.score}", **params[:search].except(:tag_name, :score) }), class: "tag-type-#{ai_tag.tag.category}", "data-tag-name": ai_tag.tag.name %> +
+
+ <% end %> +<% end %> diff --git a/app/views/ai_tags/_table.html.erb b/app/views/ai_tags/_table.html.erb new file mode 100644 index 000000000..42e33e0ce --- /dev/null +++ b/app/views/ai_tags/_table.html.erb @@ -0,0 +1,24 @@ +<%= table_for @ai_tags, class: "striped autofit" do |t| %> + <% t.column :tag do |ai_tag| %> + <%= link_to_wiki "?", ai_tag.tag.name %> + <%= link_to ai_tag.tag.pretty_name, ai_tags_path(search: { tag_name: ai_tag.tag.name }), class: "tag-type-#{ai_tag.tag.category}", "data-tag-name": ai_tag.tag.name %> + <% end %> + + <% t.column :asset do |ai_tag| %> + <%= link_to "asset ##{ai_tag.media_asset_id}", ai_tag.media_asset %> + <% end %> + + <% t.column :post do |ai_tag| %> + <% if ai_tag.post.present? %> + <%= link_to "post ##{ai_tag.post.id}", ai_tag.post %> + <% end %> + <% end %> + + <% t.column :confidence do |ai_tag| %> + <%= ai_tag.score %>% + <% end %> + + <% t.column "Present?" do |ai_tag| %> + <%= "Yes" if ai_tag.correct? %> + <% end %> +<% end %> diff --git a/app/views/ai_tags/index.html.erb b/app/views/ai_tags/index.html.erb new file mode 100644 index 000000000..1ed77a8a4 --- /dev/null +++ b/app/views/ai_tags/index.html.erb @@ -0,0 +1,37 @@ +<%= render "tags/secondary_links" %> + +
+
+

AI Tags

+ + <%= search_form_for(ai_tags_path) do |f| %> + <%= f.input :tag_name, label: "AI Tag", input_html: { value: params.dig(:search, :tag_name).presence || Tag.find_by(id: params.dig(:search, :tag_id))&.name, data: { autocomplete: "tag" } } %> + <%= f.input :post_tags_match, label: "Post Search", input_html: { value: params.dig(:search, :post_tags_match), data: { autocomplete: "tag-query" } } %> + <%= f.input :score, label: "Confidence", input_html: { value: params.dig(:search, :score) } %> + <%= f.input :is_posted, as: :hidden, input_html: { value: params.dig(:search, :is_posted) } %> + <%= f.submit "Search" %> + <% end %> + +
+ <%= link_to "All", current_page_path(search: search_params.to_h.without("is_posted")), class: ["inline-block p-1 pb-2", (search_params[:is_posted].nil? ? "border-current border-b-2 -mb-px" : "inactive-link")] %> + <%= link_to "Posted", current_page_path(search: { is_posted: true, **search_params }), class: ["inline-block p-1 pb-2", (search_params[:is_posted].to_s.truthy? ? "border-current border-b-2 -mb-px" : "inactive-link")] %> + <%# link_to "Unposted", current_page_path(search: { is_posted: false, **search_params }), class: ["inline-block p-1 pb-2", (search_params[:is_posted].to_s.falsy? ? "border-current border-b-2 -mb-px" : "inactive-link")] %> + + <%= render PreviewSizeMenuComponent.new(current_size: @preview_size) %> + + <% if @mode == "table" %> + <%= link_to grid_icon, current_page_path(mode: nil), title: "Gallery", class: "inline-block p-1 pb-2 rounded inactive-link" %> + <% else %> + <%= link_to list_icon, current_page_path(mode: "table"), title: "Table", class: "inline-block p-1 pb-2 rounded inactive-link" %> + <% end %> +
+ + <% if params[:mode] == "table" %> + <%= render "ai_tags/table" %> + <% else %> + <%= render "ai_tags/gallery", ai_tags: @ai_tags, size: @preview_size %> + <% end %> + + <%= numbered_paginator(@ai_tags) %> +
+
diff --git a/app/views/static/site_map.html.erb b/app/views/static/site_map.html.erb index bb7df4078..27e90aa8b 100644 --- a/app/views/static/site_map.html.erb +++ b/app/views/static/site_map.html.erb @@ -51,6 +51,7 @@
  • <%= link_to("Aliases", tag_aliases_path) %>
  • <%= link_to("Implications", tag_implications_path) %>
  • <%= link_to("Listing", tags_path) %>
  • +
  • <%= link_to("AI Tags", ai_tags_path) %>
  • <%= link_to("Related Tags", related_tag_path) %>