posts: use string_to_array index for tag searches.

Use the `string_to_array(tag_string, ' ')` index instead of the
`tag_index` for tag searches. The string_to_array index lets us treat
the tag_string as an array for searching purposes. This lets us get rid
of the tag_index column and the test_parser dependency in the future.
This commit is contained in:
evazion
2021-10-10 16:34:15 -05:00
parent 51e9ea2772
commit 37a8dc5dbd
4 changed files with 71 additions and 26 deletions

View File

@@ -24,27 +24,34 @@ module Searchable
q
end
# Search a table field by an Arel operator. `field` may be an Arel node, the
# name of a table column, or raw SQL. `operator` is an Arel::Predications
# method: :eq, :gt, :lt, :between, :in, :matches (LIKE), etc.
# Search a table column by an Arel operator.
#
# https://github.com/rails/rails/blob/master/activerecord/lib/arel/predications.rb
# @see https://github.com/rails/rails/blob/master/activerecord/lib/arel/predications.rb
#
# @example SELECT * FROM posts WHERE id <= 42
# Post.where_operator(:id, :lteq, 42)
#
# @param field [String, Arel::Nodes::Node] the name of a table column, an
# Arel node, or raw SQL
# @param operator [Symbol] the name of an Arel::Predications method (:eq,
# :gt, :lt, :between, :in, :matches (LIKE), etc).
# @return ActiveRecord::Relation
def where_operator(field, operator, *args, **options)
if field.is_a?(Arel::Nodes::Node)
node = field
elsif has_attribute?(field)
node = arel_table[field]
else
node = Arel.sql(field.to_s)
end
arel = node.send(operator, *args, **options)
arel = arel_node(field).send(operator, *args, **options)
where(arel)
end
def where_not_operator(field, operator, *args, **options)
arel = arel_node(field).send(operator, *args, **options)
where.not(arel)
end
def where_array_operator(attr, operator, values)
array = Arel.sql(ActiveRecord::Base.sanitize_sql(["ARRAY[?]", values]))
where_operator(attr, operator, array)
where_operator(attr, operator, sql_array(values))
end
def where_not_array_operator(attr, operator, values)
where_not_operator(attr, operator, sql_array(values))
end
def where_like(attr, value)
@@ -97,6 +104,10 @@ module Searchable
where_array_operator(attr, :contains, values)
end
def where_array_includes_none(attr, values)
where_not_array_operator(attr, :overlaps, values)
end
def where_array_includes_any_lower(attr, values)
where("lower(#{qualified_column_for(attr)}::text)::text[] && ARRAY[?]", values.map(&:downcase))
end
@@ -561,4 +572,27 @@ module Searchable
def qualified_column_for(attr)
"#{table_name}.#{column_for_attribute(attr).name}"
end
# Convert a column name or a raw SQL fragment to an Arel node.
#
# @param field [String, Arel::Nodes::Node] an Arel node, the name of a table
# column, or a raw SQL fragment
# @return Arel::Expressions the Arel node
def arel_node(field)
if field.is_a?(Arel::Nodes::Node)
field
elsif has_attribute?(field)
arel_table[field]
else
Arel.sql(field.to_s)
end
end
# Convert a Ruby array to an SQL array.
#
# @param values [Array]
# @return Arel::Nodes::SqlLiteral
def sql_array(array)
Arel.sql(ActiveRecord::Base.sanitize_sql(["ARRAY[?]", array]))
end
end

View File

@@ -107,12 +107,10 @@ class PostQueryBuilder
optional_tags += (matched_optional_wildcard_tags.empty? && !optional_wildcard_tags.empty?) ? optional_wildcard_tags.map(&:name) : matched_optional_wildcard_tags
optional_tags += (matched_required_wildcard_tags.empty? && !required_wildcard_tags.empty?) ? required_wildcard_tags.map(&:name) : matched_required_wildcard_tags
tsquery << "!(#{negated_tags.sort.uniq.map(&:to_escaped_for_tsquery).join(" | ")})" if negated_tags.present?
tsquery << "(#{optional_tags.sort.uniq.map(&:to_escaped_for_tsquery).join(" | ")})" if optional_tags.present?
tsquery << "(#{required_tags.sort.uniq.map(&:to_escaped_for_tsquery).join(" & ")})" if required_tags.present?
return relation if tsquery.empty?
relation.where("posts.tag_index @@ to_tsquery('danbooru', E?)", tsquery.join(" & "))
relation = relation.where_array_includes_all("string_to_array(posts.tag_string, ' ')", required_tags) if required_tags.present?
relation = relation.where_array_includes_any("string_to_array(posts.tag_string, ' ')", optional_tags) if optional_tags.present?
relation = relation.where_array_includes_none("string_to_array(posts.tag_string, ' ')", negated_tags) if negated_tags.present?
relation
end
def metatags_match(metatags, relation)
@@ -232,8 +230,7 @@ class PostQueryBuilder
end
def tags_include(*tags)
query = tags.map(&:to_escaped_for_tsquery).join(" & ")
Post.where("posts.tag_index @@ to_tsquery('danbooru', E?)", query)
Post.where_array_includes_all("string_to_array(posts.tag_string, ' ')", tags)
end
def unaliased_matches(tag)

View File

@@ -1132,7 +1132,7 @@ class Post < ApplicationRecord
end
def raw_tag_match(tag)
where("posts.tag_index @@ to_tsquery('danbooru', E?)", tag.to_escaped_for_tsquery)
Post.where_array_includes_all("string_to_array(posts.tag_string, ' ')", [tag])
end
# Perform a tag search as an anonymous user. No tag limit is enforced.

View File

@@ -1,8 +1,8 @@
require 'test_helper'
class PostQueryBuilderTest < ActiveSupport::TestCase
def assert_tag_match(posts, query)
assert_equal(posts.map(&:id), Post.user_tag_match(query).pluck(:id))
def assert_tag_match(posts, query, **options)
assert_equal(posts.map(&:id), Post.user_tag_match(query, CurrentUser.user, **options).pluck(:id))
end
def assert_fast_count(count, query, query_options = {}, fast_count_options = {})
@@ -144,6 +144,20 @@ class PostQueryBuilderTest < ActiveSupport::TestCase
assert_tag_match([post2], "-*c -a*a")
end
should "return posts for a complex search with multiple AND, OR, and NOT tags" do
post1 = create(:post, tag_string: "original")
post2 = create(:post, tag_string: "smile")
post3 = create(:post, tag_string: "original smile")
post4 = create(:post, tag_string: "original smile 1girl")
post5 = create(:post, tag_string: "original smile 1girl 1boy")
post6 = create(:post, tag_string: "original smile 1girl multiple_boys")
post7 = create(:post, tag_string: "original smile multiple_girls")
post8 = create(:post, tag_string: "original smile multiple_girls 1boy")
post9 = create(:post, tag_string: "original smile multiple_girls multiple_boys")
assert_tag_match([post7, post4], "original smile ~1girl ~multiple_girls -1boy -multiple_boys", tag_limit: 100)
end
should "ignore invalid operator syntax" do
assert_nothing_raised do
assert_tag_match([], "-")