Refactor full-text search to get rid of tsvector columns.

Refactor full-text search on several tables (comments, dmails,
forum_posts, forum_topics, notes, and wiki_pages) to use to_tsvector
expression indexes instead of dedicated tsvector columns. This way
full-text search works the same way across all tables.

API changes:

* Changed /wiki_pages.json?search[body_matches] to match against only
  the body. Before `body_matches` matched against both the title and the body.

* Added /wiki_pages.json?search[title_or_body_matches] to match against
  both the title and the body.

* Fixed /dmails.json?search[message_matches] to match against both the
  title and body when doing a wildcard search. Before a wildcard search
  only matched against the body.

* Added /dmails.json?search[body_matches] to match against only the dmail body.
This commit is contained in:
evazion
2021-10-16 05:38:07 -05:00
parent 300bc6941e
commit e3b836b506
10 changed files with 125 additions and 18 deletions

View File

@@ -160,6 +160,16 @@ module Searchable
where("#{qualified_column_for(attr)} ? :key", key: key)
end
# https://www.postgresql.org/docs/current/textsearch-controls.html#TEXTSEARCH-PARSING-DOCUMENTS
# https://www.postgresql.org/docs/current/textsearch-controls.html#TEXTSEARCH-PARSING-QUERIES
def where_tsvector_matches(columns, query)
tsvectors = Array.wrap(columns).map do |column|
to_tsvector("pg_catalog.english", arel_table[column])
end.reduce(:concat)
where("(#{tsvectors.to_sql}) @@ plainto_tsquery('pg_catalog.english', :query)", query: query)
end
def search_boolean_attribute(attr, params)
if params[attr].present?
boolean_attribute_matches(attr, params[attr])
@@ -194,18 +204,17 @@ module Searchable
end
end
def text_attribute_matches(attribute, value, index_column: nil)
return all unless value.present?
def text_attribute_matches(columns, query)
columns = Array.wrap(columns)
column = column_for_attribute(attribute)
qualified_column = "#{table_name}.#{column.name}"
if value =~ /\*/
where("lower(#{qualified_column}) LIKE :value ESCAPE E'\\\\'", value: value.mb_chars.downcase.to_escaped_for_sql_like)
elsif index_column.present?
where("#{table_name}.#{index_column} @@ plainto_tsquery('english', :value)", value: value)
if query.nil?
all
elsif query =~ /\*/
columns.map do |column|
where_ilike(column, query)
end.reduce(:or)
else
where("to_tsvector('english', #{qualified_column}) @@ plainto_tsquery('english', :value)", value: value)
where_tsvector_matches(columns, query)
end
end
@@ -596,6 +605,20 @@ module Searchable
end
end
def sql_value(value)
if Arel.arel_node?(value)
value
elsif value.is_a?(String)
Arel::Nodes.build_quoted(value)
elsif value.is_a?(Symbol)
arel_table[value]
elsif value.is_a?(Array)
sql_array(value)
else
raise ArgumentError
end
end
# Convert a Ruby array to an SQL array.
#
# @param values [Array]
@@ -603,4 +626,15 @@ module Searchable
def sql_array(array)
Arel.sql(ActiveRecord::Base.sanitize_sql(["ARRAY[?]", array]))
end
# @example Tag.sql_function(:sum, Tag.arel_table[:post_count]).to_sql == "SUM(tags.post_count)"
def sql_function(name, *args)
Arel::Nodes::NamedFunction.new(name.to_s, args.map { |arg| sql_value(arg) })
end
# @example Note.to_tsvector("pg_catalog.english", :body).to_sql == "to_tsvector('pg_catalog.english', notes.body)"
# https://www.postgresql.org/docs/current/textsearch-controls.html#TEXTSEARCH-PARSING-DOCUMENTS
def to_tsvector(config, column)
sql_function(:to_tsvector, config, column)
end
end