Add AI tag model and UI.
Add a database model for storing AI-predicted tags, and add a UI for browsing and searching these tags. AI tags are generated by the Danbooru Autotagger (https://github.com/danbooru/autotagger). See that repo for details about the model. The database schema is `ai_tags (media_asset_id integer, tag_id integer, score smallint)`. This is designed to be as space-efficient as possible, since in production we have over 300 million AI-generated tags (6 million images and 50 tags per post). This amounts to over 10GB in size, plus indexes. You can search for AI tags using e.g. `ai:scenery`. You can do `ai:scenery -scenery` to find posts where the scenery tag is potentially missing, or `scenery -ai:scenery` to find posts that are potentially mistagged (or more likely where the AI missed the tag). You can browse AI tags at https://danbooru.donmai.us/ai_tags. On this page you can filter by confidence level. You can also search unposted media assets by AI tag. To generate tags, use the `autotag` script from the Autotagger repo, something like this: docker run --rm -v ~/danbooru/public/data/360x360:/images ghcr.io/danbooru/autotagger ./autotag -c -f /images | gzip > tags.csv.gz To import tags, use the fix script in script/fixes/. Expect a Danbooru-size dataset to take hours to days to generate tags, then 20-30 minutes to import. Currently this all has to be done by hand.
This commit is contained in:
28
script/fixes/112_import_ai_tags.sql
Executable file
28
script/fixes/112_import_ai_tags.sql
Executable file
@@ -0,0 +1,28 @@
|
||||
create temporary table ai_tags_import (md5 text, tag text, score real);
|
||||
\copy ai_tags_import (md5, tag, score) from program 'zcat tags.csv.gz' with (format csv, header off);
|
||||
|
||||
create unlogged table ai_tags_temp as (select ma.id::integer as media_asset_id, t.id::integer as tag_id, (score * 100)::smallint as score from media_assets ma join ai_tags_import mli on mli.md5 = ma.md5 join tags t on t.name = mli.tag);
|
||||
|
||||
alter table ai_tags_temp set logged;
|
||||
create index index_ai_tags_temp_on_media_asset_id on ai_tags_temp (media_asset_id);
|
||||
create index index_ai_tags_temp_on_tag_id on ai_tags_temp (tag_id);
|
||||
create index index_ai_tags_temp_on_score on ai_tags_temp (score);
|
||||
|
||||
alter table ai_tags_temp alter column media_asset_id set not null;
|
||||
alter table ai_tags_temp alter column tag_id set not null;
|
||||
alter table ai_tags_temp alter column score set not null;
|
||||
|
||||
begin;
|
||||
alter table ai_tags rename to ai_tags_old;
|
||||
alter index index_ai_tags_on_media_asset_id rename to index_ai_tags_old_on_media_asset_id;
|
||||
alter index index_ai_tags_on_tag_id rename to index_ai_tags_old_on_tag_id;
|
||||
alter index index_ai_tags_on_score rename to index_ai_tags_old_on_score;
|
||||
|
||||
alter table ai_tags_temp rename to ai_tags;
|
||||
alter index index_ai_tags_temp_on_media_asset_id rename to index_ai_tags_on_media_asset_id;
|
||||
alter index index_ai_tags_temp_on_tag_id rename to index_ai_tags_on_tag_id;
|
||||
alter index index_ai_tags_temp_on_score rename to index_ai_tags_on_score;
|
||||
commit;
|
||||
|
||||
drop table ai_tags_old;
|
||||
drop table ai_tags_import;
|
||||
Reference in New Issue
Block a user