From e3b836b5060a573f39a105cfdf2b816d5230e4f3 Mon Sep 17 00:00:00 2001 From: evazion Date: Sat, 16 Oct 2021 05:38:07 -0500 Subject: [PATCH] Refactor full-text search to get rid of tsvector columns. Refactor full-text search on several tables (comments, dmails, forum_posts, forum_topics, notes, and wiki_pages) to use to_tsvector expression indexes instead of dedicated tsvector columns. This way full-text search works the same way across all tables. API changes: * Changed /wiki_pages.json?search[body_matches] to match against only the body. Before `body_matches` matched against both the title and the body. * Added /wiki_pages.json?search[title_or_body_matches] to match against both the title and the body. * Fixed /dmails.json?search[message_matches] to match against both the title and body when doing a wildcard search. Before a wildcard search only matched against the body. * Added /dmails.json?search[body_matches] to match against only the dmail body. --- app/logical/concerns/searchable.rb | 54 +++++++++++++++---- app/models/comment.rb | 2 +- app/models/dmail.rb | 3 +- app/models/forum_post.rb | 2 +- app/models/forum_topic.rb | 2 +- app/models/note.rb | 2 +- app/models/wiki_page.rb | 3 +- app/views/wiki_pages/_search.html.erb | 2 +- ...15223510_add_tsvector_index_to_multiple.rb | 28 ++++++++++ db/structure.sql | 45 +++++++++++++++- 10 files changed, 125 insertions(+), 18 deletions(-) create mode 100644 db/migrate/20211015223510_add_tsvector_index_to_multiple.rb diff --git a/app/logical/concerns/searchable.rb b/app/logical/concerns/searchable.rb index aefeda4ad..691cff8a3 100644 --- a/app/logical/concerns/searchable.rb +++ b/app/logical/concerns/searchable.rb @@ -160,6 +160,16 @@ module Searchable where("#{qualified_column_for(attr)} ? :key", key: key) end + # https://www.postgresql.org/docs/current/textsearch-controls.html#TEXTSEARCH-PARSING-DOCUMENTS + # https://www.postgresql.org/docs/current/textsearch-controls.html#TEXTSEARCH-PARSING-QUERIES + def where_tsvector_matches(columns, query) + tsvectors = Array.wrap(columns).map do |column| + to_tsvector("pg_catalog.english", arel_table[column]) + end.reduce(:concat) + + where("(#{tsvectors.to_sql}) @@ plainto_tsquery('pg_catalog.english', :query)", query: query) + end + def search_boolean_attribute(attr, params) if params[attr].present? boolean_attribute_matches(attr, params[attr]) @@ -194,18 +204,17 @@ module Searchable end end - def text_attribute_matches(attribute, value, index_column: nil) - return all unless value.present? + def text_attribute_matches(columns, query) + columns = Array.wrap(columns) - column = column_for_attribute(attribute) - qualified_column = "#{table_name}.#{column.name}" - - if value =~ /\*/ - where("lower(#{qualified_column}) LIKE :value ESCAPE E'\\\\'", value: value.mb_chars.downcase.to_escaped_for_sql_like) - elsif index_column.present? - where("#{table_name}.#{index_column} @@ plainto_tsquery('english', :value)", value: value) + if query.nil? + all + elsif query =~ /\*/ + columns.map do |column| + where_ilike(column, query) + end.reduce(:or) else - where("to_tsvector('english', #{qualified_column}) @@ plainto_tsquery('english', :value)", value: value) + where_tsvector_matches(columns, query) end end @@ -596,6 +605,20 @@ module Searchable end end + def sql_value(value) + if Arel.arel_node?(value) + value + elsif value.is_a?(String) + Arel::Nodes.build_quoted(value) + elsif value.is_a?(Symbol) + arel_table[value] + elsif value.is_a?(Array) + sql_array(value) + else + raise ArgumentError + end + end + # Convert a Ruby array to an SQL array. # # @param values [Array] @@ -603,4 +626,15 @@ module Searchable def sql_array(array) Arel.sql(ActiveRecord::Base.sanitize_sql(["ARRAY[?]", array])) end + + # @example Tag.sql_function(:sum, Tag.arel_table[:post_count]).to_sql == "SUM(tags.post_count)" + def sql_function(name, *args) + Arel::Nodes::NamedFunction.new(name.to_s, args.map { |arg| sql_value(arg) }) + end + + # @example Note.to_tsvector("pg_catalog.english", :body).to_sql == "to_tsvector('pg_catalog.english', notes.body)" + # https://www.postgresql.org/docs/current/textsearch-controls.html#TEXTSEARCH-PARSING-DOCUMENTS + def to_tsvector(config, column) + sql_function(:to_tsvector, config, column) + end end diff --git a/app/models/comment.rb b/app/models/comment.rb index e2686bc27..47237df12 100644 --- a/app/models/comment.rb +++ b/app/models/comment.rb @@ -28,7 +28,7 @@ class Comment < ApplicationRecord module SearchMethods def search(params) q = search_attributes(params, :id, :created_at, :updated_at, :is_deleted, :is_sticky, :do_not_bump_post, :body, :score, :post, :creator, :updater) - q = q.text_attribute_matches(:body, params[:body_matches], index_column: :body_index) + q = q.text_attribute_matches(:body, params[:body_matches]) case params[:order] when "post_id", "post_id_desc" diff --git a/app/models/dmail.rb b/app/models/dmail.rb index 2dbc1498d..0a44e58b6 100644 --- a/app/models/dmail.rb +++ b/app/models/dmail.rb @@ -99,7 +99,8 @@ class Dmail < ApplicationRecord def search(params) q = search_attributes(params, :id, :created_at, :updated_at, :is_read, :is_deleted, :title, :body, :to, :from) q = q.text_attribute_matches(:title, params[:title_matches]) - q = q.text_attribute_matches(:body, params[:message_matches], index_column: :message_index) + q = q.text_attribute_matches(:body, params[:body_matches]) + q = q.text_attribute_matches([:title, :body], params[:message_matches]) q = q.folder_matches(params[:folder]) diff --git a/app/models/forum_post.rb b/app/models/forum_post.rb index 93567f80e..fa8872e78 100644 --- a/app/models/forum_post.rb +++ b/app/models/forum_post.rb @@ -53,7 +53,7 @@ class ForumPost < ApplicationRecord def search(params) q = search_attributes(params, :id, :created_at, :updated_at, :is_deleted, :body, :creator, :updater, :topic, :dtext_links, :votes, :tag_alias, :tag_implication, :bulk_update_request) - q = q.text_attribute_matches(:body, params[:body_matches], index_column: :text_index) + q = q.text_attribute_matches(:body, params[:body_matches]) if params[:linked_to].present? q = q.wiki_link_matches(params[:linked_to]) diff --git a/app/models/forum_topic.rb b/app/models/forum_topic.rb index 2b63bf081..05de1643d 100644 --- a/app/models/forum_topic.rb +++ b/app/models/forum_topic.rb @@ -86,7 +86,7 @@ class ForumTopic < ApplicationRecord def search(params) q = search_attributes(params, :id, :created_at, :updated_at, :is_sticky, :is_locked, :is_deleted, :category_id, :title, :response_count, :creator, :updater, :forum_posts, :bulk_update_requests, :tag_aliases, :tag_implications) - q = q.text_attribute_matches(:title, params[:title_matches], index_column: :text_index) + q = q.text_attribute_matches(:title, params[:title_matches]) if params[:is_private].to_s.truthy? q = q.private_only diff --git a/app/models/note.rb b/app/models/note.rb index 8693f3196..36094f28e 100644 --- a/app/models/note.rb +++ b/app/models/note.rb @@ -19,7 +19,7 @@ class Note < ApplicationRecord module SearchMethods def search(params) q = search_attributes(params, :id, :created_at, :updated_at, :is_active, :x, :y, :width, :height, :body, :version, :post) - q = q.text_attribute_matches(:body, params[:body_matches], index_column: :body_index) + q = q.text_attribute_matches(:body, params[:body_matches]) q.apply_default_order(params) end diff --git a/app/models/wiki_page.rb b/app/models/wiki_page.rb index 10f7165e2..ab7906a0c 100644 --- a/app/models/wiki_page.rb +++ b/app/models/wiki_page.rb @@ -70,7 +70,8 @@ class WikiPage < ApplicationRecord def search(params = {}) q = search_attributes(params, :id, :created_at, :updated_at, :is_locked, :is_deleted, :body, :title, :other_names, :tag, :artist, :dtext_links) - q = q.text_attribute_matches(:body, params[:body_matches], index_column: :body_index) + q = q.text_attribute_matches(:body, params[:body_matches]) + q = q.text_attribute_matches([:title, :body], params[:title_or_body_matches]) if params[:title_normalize].present? q = q.where_like(:title, normalize_title(params[:title_normalize])) diff --git a/app/views/wiki_pages/_search.html.erb b/app/views/wiki_pages/_search.html.erb index cf9bd1834..f8f7d61c8 100644 --- a/app/views/wiki_pages/_search.html.erb +++ b/app/views/wiki_pages/_search.html.erb @@ -1,7 +1,7 @@ <%= search_form_for(wiki_pages_path) do |f| %> <%= f.input :title_normalize, label: "Title", hint: "Use * for wildcard", input_html: { value: params[:search][:title_normalize], "data-autocomplete": "wiki-page" } %> <%= f.input :other_names_match, label: "Other names", hint: "Use * for wildcard", input_html: { value: params[:search][:other_names_match] } %> - <%= f.input :body_matches, label: "Body", hint: "Use * for wildcard", input_html: { value: params[:search][:body_matches] } %> + <%= f.input :title_or_body_matches, label: "Text", hint: "Use * for wildcard", input_html: { value: params[:search][:title_or_body_matches] } %> <%= f.input :linked_to, hint: "Find wikis linking to this wiki", input_html: { value: params[:search][:linked_to], "data-autocomplete": "wiki-page" } %> <%= f.input :is_deleted, label: "Deleted?", as: :select, include_blank: true, selected: params[:search][:is_deleted] %> <%= f.input :order, collection: [%w[Newest created_at], %w[Title title], %w[Posts post_count]], include_blank: true, selected: params[:search][:order] %> diff --git a/db/migrate/20211015223510_add_tsvector_index_to_multiple.rb b/db/migrate/20211015223510_add_tsvector_index_to_multiple.rb new file mode 100644 index 000000000..85c45be82 --- /dev/null +++ b/db/migrate/20211015223510_add_tsvector_index_to_multiple.rb @@ -0,0 +1,28 @@ +class AddTsvectorIndexToMultiple < ActiveRecord::Migration[6.1] + disable_ddl_transaction! + + def up + add_index :comments, "to_tsvector('pg_catalog.english', body)", using: :gin, algorithm: :concurrently, name: "index_comments_on_body_tsvector" + add_index :dmails, "(to_tsvector('pg_catalog.english', title) || to_tsvector('pg_catalog.english', body))", using: :gin, algorithm: :concurrently, name: "index_dmails_on_title_and_body_tsvector" + add_index :forum_posts, "to_tsvector('pg_catalog.english', body)", using: :gin, algorithm: :concurrently, name: "index_forum_posts_on_body_tsvector" + add_index :forum_topics, "to_tsvector('pg_catalog.english', title)", using: :gin, algorithm: :concurrently, name: "index_forum_topics_on_title_tsvector" + add_index :notes, "to_tsvector('pg_catalog.english', body)", using: :gin, algorithm: :concurrently, name: "index_notes_on_body_tsvector" + add_index :wiki_pages, "(to_tsvector('pg_catalog.english', title) || to_tsvector('pg_catalog.english', body))", using: :gin, algorithm: :concurrently, name: "index_wiki_pages_on_title_and_body_tsvector" + + execute("VACUUM (VERBOSE, ANALYZE) comments") + execute("VACUUM (VERBOSE, ANALYZE) dmails") + execute("VACUUM (VERBOSE, ANALYZE) forum_posts") + execute("VACUUM (VERBOSE, ANALYZE) forum_topics") + execute("VACUUM (VERBOSE, ANALYZE) notes") + execute("VACUUM (VERBOSE, ANALYZE) wiki_pages") + end + + def down + remove_index :comments, algorithm: :concurrently, name: "index_comments_on_body_tsvector" + remove_index :dmails, algorithm: :concurrently, name: "index_dmails_on_title_and_body_tsvector" + remove_index :forum_posts, algorithm: :concurrently, name: "index_forum_posts_on_body_tsvector" + remove_index :forum_topics, algorithm: :concurrently, name: "index_forum_topics_on_title_tsvector" + remove_index :notes, algorithm: :concurrently, name: "index_notes_on_body_tsvector" + remove_index :wiki_pages, algorithm: :concurrently, name: "index_wiki_pages_on_title_and_body_tsvector" + end +end diff --git a/db/structure.sql b/db/structure.sql index 076394b7d..68fd062eb 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -3281,6 +3281,13 @@ CREATE UNIQUE INDEX index_comment_votes_on_user_id_and_comment_id ON public.comm CREATE INDEX index_comments_on_body_index ON public.comments USING gin (body_index); +-- +-- Name: index_comments_on_body_tsvector; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX index_comments_on_body_tsvector ON public.comments USING gin (to_tsvector('english'::regconfig, body)); + + -- -- Name: index_comments_on_created_at; Type: INDEX; Schema: public; Owner: - -- @@ -3379,6 +3386,13 @@ CREATE INDEX index_dmails_on_message_index ON public.dmails USING gin (message_i CREATE INDEX index_dmails_on_owner_id ON public.dmails USING btree (owner_id); +-- +-- Name: index_dmails_on_title_and_body_tsvector; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX index_dmails_on_title_and_body_tsvector ON public.dmails USING gin (((to_tsvector('english'::regconfig, title) || to_tsvector('english'::regconfig, body)))); + + -- -- Name: index_dtext_links_on_link_target; Type: INDEX; Schema: public; Owner: - -- @@ -3498,6 +3512,13 @@ CREATE INDEX index_forum_post_votes_on_forum_post_id ON public.forum_post_votes CREATE UNIQUE INDEX index_forum_post_votes_on_forum_post_id_and_creator_id ON public.forum_post_votes USING btree (forum_post_id, creator_id); +-- +-- Name: index_forum_posts_on_body_tsvector; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX index_forum_posts_on_body_tsvector ON public.forum_posts USING gin (to_tsvector('english'::regconfig, body)); + + -- -- Name: index_forum_posts_on_creator_id; Type: INDEX; Schema: public; Owner: - -- @@ -3568,6 +3589,13 @@ CREATE INDEX index_forum_topics_on_is_sticky_and_updated_at ON public.forum_topi CREATE INDEX index_forum_topics_on_text_index ON public.forum_topics USING gin (text_index); +-- +-- Name: index_forum_topics_on_title_tsvector; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX index_forum_topics_on_title_tsvector ON public.forum_topics USING gin (to_tsvector('english'::regconfig, (title)::text)); + + -- -- Name: index_forum_topics_on_updated_at; Type: INDEX; Schema: public; Owner: - -- @@ -3855,6 +3883,13 @@ CREATE INDEX index_note_versions_on_updater_ip_addr ON public.note_versions USIN CREATE INDEX index_notes_on_body_index ON public.notes USING gin (body_index); +-- +-- Name: index_notes_on_body_tsvector; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX index_notes_on_body_tsvector ON public.notes USING gin (to_tsvector('english'::regconfig, body)); + + -- -- Name: index_notes_on_post_id; Type: INDEX; Schema: public; Owner: - -- @@ -4675,6 +4710,13 @@ CREATE INDEX index_wiki_pages_on_other_names ON public.wiki_pages USING gin (oth CREATE UNIQUE INDEX index_wiki_pages_on_title ON public.wiki_pages USING btree (title); +-- +-- Name: index_wiki_pages_on_title_and_body_tsvector; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX index_wiki_pages_on_title_and_body_tsvector ON public.wiki_pages USING gin (((to_tsvector('english'::regconfig, (title)::text) || to_tsvector('english'::regconfig, body)))); + + -- -- Name: index_wiki_pages_on_title_pattern; Type: INDEX; Schema: public; Owner: - -- @@ -4996,6 +5038,7 @@ INSERT INTO "schema_migrations" (version) VALUES ('20211010181657'), ('20211011044400'), ('20211013011619'), -('20211014063943'); +('20211014063943'), +('20211015223510');