Refactor full-text search to get rid of tsvector columns.

Refactor full-text search on several tables (comments, dmails,
forum_posts, forum_topics, notes, and wiki_pages) to use to_tsvector
expression indexes instead of dedicated tsvector columns. This way
full-text search works the same way across all tables.

API changes:

* Changed /wiki_pages.json?search[body_matches] to match against only
  the body. Before `body_matches` matched against both the title and the body.

* Added /wiki_pages.json?search[title_or_body_matches] to match against
  both the title and the body.

* Fixed /dmails.json?search[message_matches] to match against both the
  title and body when doing a wildcard search. Before a wildcard search
  only matched against the body.

* Added /dmails.json?search[body_matches] to match against only the dmail body.
This commit is contained in:
evazion
2021-10-16 05:38:07 -05:00
parent 300bc6941e
commit e3b836b506
10 changed files with 125 additions and 18 deletions

View File

@@ -160,6 +160,16 @@ module Searchable
where("#{qualified_column_for(attr)} ? :key", key: key)
end
# https://www.postgresql.org/docs/current/textsearch-controls.html#TEXTSEARCH-PARSING-DOCUMENTS
# https://www.postgresql.org/docs/current/textsearch-controls.html#TEXTSEARCH-PARSING-QUERIES
def where_tsvector_matches(columns, query)
tsvectors = Array.wrap(columns).map do |column|
to_tsvector("pg_catalog.english", arel_table[column])
end.reduce(:concat)
where("(#{tsvectors.to_sql}) @@ plainto_tsquery('pg_catalog.english', :query)", query: query)
end
def search_boolean_attribute(attr, params)
if params[attr].present?
boolean_attribute_matches(attr, params[attr])
@@ -194,18 +204,17 @@ module Searchable
end
end
def text_attribute_matches(attribute, value, index_column: nil)
return all unless value.present?
def text_attribute_matches(columns, query)
columns = Array.wrap(columns)
column = column_for_attribute(attribute)
qualified_column = "#{table_name}.#{column.name}"
if value =~ /\*/
where("lower(#{qualified_column}) LIKE :value ESCAPE E'\\\\'", value: value.mb_chars.downcase.to_escaped_for_sql_like)
elsif index_column.present?
where("#{table_name}.#{index_column} @@ plainto_tsquery('english', :value)", value: value)
if query.nil?
all
elsif query =~ /\*/
columns.map do |column|
where_ilike(column, query)
end.reduce(:or)
else
where("to_tsvector('english', #{qualified_column}) @@ plainto_tsquery('english', :value)", value: value)
where_tsvector_matches(columns, query)
end
end
@@ -596,6 +605,20 @@ module Searchable
end
end
def sql_value(value)
if Arel.arel_node?(value)
value
elsif value.is_a?(String)
Arel::Nodes.build_quoted(value)
elsif value.is_a?(Symbol)
arel_table[value]
elsif value.is_a?(Array)
sql_array(value)
else
raise ArgumentError
end
end
# Convert a Ruby array to an SQL array.
#
# @param values [Array]
@@ -603,4 +626,15 @@ module Searchable
def sql_array(array)
Arel.sql(ActiveRecord::Base.sanitize_sql(["ARRAY[?]", array]))
end
# @example Tag.sql_function(:sum, Tag.arel_table[:post_count]).to_sql == "SUM(tags.post_count)"
def sql_function(name, *args)
Arel::Nodes::NamedFunction.new(name.to_s, args.map { |arg| sql_value(arg) })
end
# @example Note.to_tsvector("pg_catalog.english", :body).to_sql == "to_tsvector('pg_catalog.english', notes.body)"
# https://www.postgresql.org/docs/current/textsearch-controls.html#TEXTSEARCH-PARSING-DOCUMENTS
def to_tsvector(config, column)
sql_function(:to_tsvector, config, column)
end
end

View File

@@ -28,7 +28,7 @@ class Comment < ApplicationRecord
module SearchMethods
def search(params)
q = search_attributes(params, :id, :created_at, :updated_at, :is_deleted, :is_sticky, :do_not_bump_post, :body, :score, :post, :creator, :updater)
q = q.text_attribute_matches(:body, params[:body_matches], index_column: :body_index)
q = q.text_attribute_matches(:body, params[:body_matches])
case params[:order]
when "post_id", "post_id_desc"

View File

@@ -99,7 +99,8 @@ class Dmail < ApplicationRecord
def search(params)
q = search_attributes(params, :id, :created_at, :updated_at, :is_read, :is_deleted, :title, :body, :to, :from)
q = q.text_attribute_matches(:title, params[:title_matches])
q = q.text_attribute_matches(:body, params[:message_matches], index_column: :message_index)
q = q.text_attribute_matches(:body, params[:body_matches])
q = q.text_attribute_matches([:title, :body], params[:message_matches])
q = q.folder_matches(params[:folder])

View File

@@ -53,7 +53,7 @@ class ForumPost < ApplicationRecord
def search(params)
q = search_attributes(params, :id, :created_at, :updated_at, :is_deleted, :body, :creator, :updater, :topic, :dtext_links, :votes, :tag_alias, :tag_implication, :bulk_update_request)
q = q.text_attribute_matches(:body, params[:body_matches], index_column: :text_index)
q = q.text_attribute_matches(:body, params[:body_matches])
if params[:linked_to].present?
q = q.wiki_link_matches(params[:linked_to])

View File

@@ -86,7 +86,7 @@ class ForumTopic < ApplicationRecord
def search(params)
q = search_attributes(params, :id, :created_at, :updated_at, :is_sticky, :is_locked, :is_deleted, :category_id, :title, :response_count, :creator, :updater, :forum_posts, :bulk_update_requests, :tag_aliases, :tag_implications)
q = q.text_attribute_matches(:title, params[:title_matches], index_column: :text_index)
q = q.text_attribute_matches(:title, params[:title_matches])
if params[:is_private].to_s.truthy?
q = q.private_only

View File

@@ -19,7 +19,7 @@ class Note < ApplicationRecord
module SearchMethods
def search(params)
q = search_attributes(params, :id, :created_at, :updated_at, :is_active, :x, :y, :width, :height, :body, :version, :post)
q = q.text_attribute_matches(:body, params[:body_matches], index_column: :body_index)
q = q.text_attribute_matches(:body, params[:body_matches])
q.apply_default_order(params)
end

View File

@@ -70,7 +70,8 @@ class WikiPage < ApplicationRecord
def search(params = {})
q = search_attributes(params, :id, :created_at, :updated_at, :is_locked, :is_deleted, :body, :title, :other_names, :tag, :artist, :dtext_links)
q = q.text_attribute_matches(:body, params[:body_matches], index_column: :body_index)
q = q.text_attribute_matches(:body, params[:body_matches])
q = q.text_attribute_matches([:title, :body], params[:title_or_body_matches])
if params[:title_normalize].present?
q = q.where_like(:title, normalize_title(params[:title_normalize]))

View File

@@ -1,7 +1,7 @@
<%= search_form_for(wiki_pages_path) do |f| %>
<%= f.input :title_normalize, label: "Title", hint: "Use * for wildcard", input_html: { value: params[:search][:title_normalize], "data-autocomplete": "wiki-page" } %>
<%= f.input :other_names_match, label: "Other names", hint: "Use * for wildcard", input_html: { value: params[:search][:other_names_match] } %>
<%= f.input :body_matches, label: "Body", hint: "Use * for wildcard", input_html: { value: params[:search][:body_matches] } %>
<%= f.input :title_or_body_matches, label: "Text", hint: "Use * for wildcard", input_html: { value: params[:search][:title_or_body_matches] } %>
<%= f.input :linked_to, hint: "Find wikis linking to this wiki", input_html: { value: params[:search][:linked_to], "data-autocomplete": "wiki-page" } %>
<%= f.input :is_deleted, label: "Deleted?", as: :select, include_blank: true, selected: params[:search][:is_deleted] %>
<%= f.input :order, collection: [%w[Newest created_at], %w[Title title], %w[Posts post_count]], include_blank: true, selected: params[:search][:order] %>

View File

@@ -0,0 +1,28 @@
class AddTsvectorIndexToMultiple < ActiveRecord::Migration[6.1]
disable_ddl_transaction!
def up
add_index :comments, "to_tsvector('pg_catalog.english', body)", using: :gin, algorithm: :concurrently, name: "index_comments_on_body_tsvector"
add_index :dmails, "(to_tsvector('pg_catalog.english', title) || to_tsvector('pg_catalog.english', body))", using: :gin, algorithm: :concurrently, name: "index_dmails_on_title_and_body_tsvector"
add_index :forum_posts, "to_tsvector('pg_catalog.english', body)", using: :gin, algorithm: :concurrently, name: "index_forum_posts_on_body_tsvector"
add_index :forum_topics, "to_tsvector('pg_catalog.english', title)", using: :gin, algorithm: :concurrently, name: "index_forum_topics_on_title_tsvector"
add_index :notes, "to_tsvector('pg_catalog.english', body)", using: :gin, algorithm: :concurrently, name: "index_notes_on_body_tsvector"
add_index :wiki_pages, "(to_tsvector('pg_catalog.english', title) || to_tsvector('pg_catalog.english', body))", using: :gin, algorithm: :concurrently, name: "index_wiki_pages_on_title_and_body_tsvector"
execute("VACUUM (VERBOSE, ANALYZE) comments")
execute("VACUUM (VERBOSE, ANALYZE) dmails")
execute("VACUUM (VERBOSE, ANALYZE) forum_posts")
execute("VACUUM (VERBOSE, ANALYZE) forum_topics")
execute("VACUUM (VERBOSE, ANALYZE) notes")
execute("VACUUM (VERBOSE, ANALYZE) wiki_pages")
end
def down
remove_index :comments, algorithm: :concurrently, name: "index_comments_on_body_tsvector"
remove_index :dmails, algorithm: :concurrently, name: "index_dmails_on_title_and_body_tsvector"
remove_index :forum_posts, algorithm: :concurrently, name: "index_forum_posts_on_body_tsvector"
remove_index :forum_topics, algorithm: :concurrently, name: "index_forum_topics_on_title_tsvector"
remove_index :notes, algorithm: :concurrently, name: "index_notes_on_body_tsvector"
remove_index :wiki_pages, algorithm: :concurrently, name: "index_wiki_pages_on_title_and_body_tsvector"
end
end

View File

@@ -3281,6 +3281,13 @@ CREATE UNIQUE INDEX index_comment_votes_on_user_id_and_comment_id ON public.comm
CREATE INDEX index_comments_on_body_index ON public.comments USING gin (body_index);
--
-- Name: index_comments_on_body_tsvector; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX index_comments_on_body_tsvector ON public.comments USING gin (to_tsvector('english'::regconfig, body));
--
-- Name: index_comments_on_created_at; Type: INDEX; Schema: public; Owner: -
--
@@ -3379,6 +3386,13 @@ CREATE INDEX index_dmails_on_message_index ON public.dmails USING gin (message_i
CREATE INDEX index_dmails_on_owner_id ON public.dmails USING btree (owner_id);
--
-- Name: index_dmails_on_title_and_body_tsvector; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX index_dmails_on_title_and_body_tsvector ON public.dmails USING gin (((to_tsvector('english'::regconfig, title) || to_tsvector('english'::regconfig, body))));
--
-- Name: index_dtext_links_on_link_target; Type: INDEX; Schema: public; Owner: -
--
@@ -3498,6 +3512,13 @@ CREATE INDEX index_forum_post_votes_on_forum_post_id ON public.forum_post_votes
CREATE UNIQUE INDEX index_forum_post_votes_on_forum_post_id_and_creator_id ON public.forum_post_votes USING btree (forum_post_id, creator_id);
--
-- Name: index_forum_posts_on_body_tsvector; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX index_forum_posts_on_body_tsvector ON public.forum_posts USING gin (to_tsvector('english'::regconfig, body));
--
-- Name: index_forum_posts_on_creator_id; Type: INDEX; Schema: public; Owner: -
--
@@ -3568,6 +3589,13 @@ CREATE INDEX index_forum_topics_on_is_sticky_and_updated_at ON public.forum_topi
CREATE INDEX index_forum_topics_on_text_index ON public.forum_topics USING gin (text_index);
--
-- Name: index_forum_topics_on_title_tsvector; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX index_forum_topics_on_title_tsvector ON public.forum_topics USING gin (to_tsvector('english'::regconfig, (title)::text));
--
-- Name: index_forum_topics_on_updated_at; Type: INDEX; Schema: public; Owner: -
--
@@ -3855,6 +3883,13 @@ CREATE INDEX index_note_versions_on_updater_ip_addr ON public.note_versions USIN
CREATE INDEX index_notes_on_body_index ON public.notes USING gin (body_index);
--
-- Name: index_notes_on_body_tsvector; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX index_notes_on_body_tsvector ON public.notes USING gin (to_tsvector('english'::regconfig, body));
--
-- Name: index_notes_on_post_id; Type: INDEX; Schema: public; Owner: -
--
@@ -4675,6 +4710,13 @@ CREATE INDEX index_wiki_pages_on_other_names ON public.wiki_pages USING gin (oth
CREATE UNIQUE INDEX index_wiki_pages_on_title ON public.wiki_pages USING btree (title);
--
-- Name: index_wiki_pages_on_title_and_body_tsvector; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX index_wiki_pages_on_title_and_body_tsvector ON public.wiki_pages USING gin (((to_tsvector('english'::regconfig, (title)::text) || to_tsvector('english'::regconfig, body))));
--
-- Name: index_wiki_pages_on_title_pattern; Type: INDEX; Schema: public; Owner: -
--
@@ -4996,6 +5038,7 @@ INSERT INTO "schema_migrations" (version) VALUES
('20211010181657'),
('20211011044400'),
('20211013011619'),
('20211014063943');
('20211014063943'),
('20211015223510');