post queries: switch to new post search engine.

Switch to the post search engine using the new PostQuery parser. The new
engine fully supports AND, OR, and NOT operators and grouping expressions
with parentheses.

Highlights:

New OR operator:

* `skirt or dress` (same as `~skirt ~dress`)

Tags can be grouped with parentheses:

* `1girl (skirt or dress)`
* `(blonde_hair blue_eyes) or (red_hair green_eyes)`
* `~(blonde_hair blue_eyes) ~(red_hair green_eyes)` (same as above)
* `(pantyhose or thighhighs) (black_legwear or brown_legwear)`
* `(~pantyhose ~thighhighs) (~black_legwear ~brown_legwear)` (same as above)

Metatags can be OR'd together:

* `user:evazion or fav:evazion`
* `~user:evazion ~fav:evazion`

Wildcard tags can combined with either AND or OR:

* `black_* white_*` (find posts with at least one black_* tag AND one white_* tag)
* `black_* or white_*` (find posts with at least one black_* tag OR one white_* tag)
* `~black_* ~white_*` (same as above)

See 4c7cfc73 for more syntax examples.

Fixes #4949: And+or search?
Fixes #5056: Wildcard searches return unexpected results when combined with OR searches
This commit is contained in:
evazion
2022-04-04 16:52:11 -05:00
parent 703fd05025
commit af183467b6
11 changed files with 227 additions and 387 deletions

View File

@@ -3,11 +3,18 @@
class PostQuery
extend Memoist
private attr_reader :current_user, :tag_limit, :safe_mode, :hide_deleted_posts, :builder
attr_reader :current_user
private attr_reader :tag_limit, :safe_mode, :hide_deleted_posts, :builder
delegate :tag?, :metatag?, :wildcard?, :metatags, :wildcards, :tag_names, :metatags, to: :ast
delegate :tag?, :metatag?, :wildcard?, :metatags, :wildcards, :tag_names, :metatags, :to_infix, to: :ast
alias_method :safe_mode?, :safe_mode
alias_method :hide_deleted_posts?, :hide_deleted_posts
alias_method :to_s, :to_infix
# Return a new PostQuery with aliases replaced.
def self.normalize(...)
PostQuery.new(...).replace_aliases.trim
end
def initialize(search_or_ast, current_user: User.anonymous, tag_limit: nil, safe_mode: false, hide_deleted_posts: false)
if search_or_ast.is_a?(AST)
@@ -39,10 +46,25 @@ class PostQuery
@ast ||= Parser.parse(search)
end
def fast_count(...)
builder.normalized_query.fast_count(...)
def posts
builder.posts(to_cnf)
end
def paginated_posts(...)
builder.paginated_posts(to_cnf, ...)
end
# The name of the only tag in the query, if the query contains a single tag. The tag may not exist. The query may contain other metatags or wildcards, and the tag may be negated.
def tag_name
tag_names.first if has_single_tag?
end
# The only tag in the query, if the query contains a single tag. The query may contain other metatags or wildcards, and the tag may be negated.
def tag
tags.first if has_single_tag?
end
# The list of all tags contained in the query.
def tags
Tag.where(name: tag_names)
end
@@ -57,10 +79,39 @@ class PostQuery
ast.none?
end
def is_single_tag?
# True if the search is a single metatag search for the given metatag.
def is_metatag?(name, value = nil)
if value.nil?
is_single_term? && has_metatag?(name)
else
is_single_term? && find_metatag(name) == value.to_s
end
end
# True if the search consists of a single tag, metatag, or wildcard.
def is_single_term?
tag_names.size + metatags.size + wildcards.size == 1
end
# True if this search consists only of a single non-negated tag, with no other metatags or operators.
def is_simple_tag?
ast.tag?
end
# True if the search contains a single tag. It may have other metatags or wildcards, and the tag may be negated.
def has_single_tag?
tag_names.one?
end
# True if the search depends on the current user because of permissions or privacy settings.
def is_user_dependent_search?
metatags.any? do |metatag|
metatag.name.in?(%w[upvoter upvote downvoter downvote search flagger fav ordfav favgroup ordfavgroup]) ||
metatag.name == "status" && metatag.value == "unmoderated" ||
metatag.name == "disapproved" && !metatag.value.downcase.in?(PostDisapproval::REASONS)
end
end
def select_metatags(*names)
metatags.select { |metatag| metatag.name.in?(names.map(&:to_s).map(&:downcase)) }
end
@@ -73,9 +124,9 @@ class PostQuery
select_metatags(*names).first&.value
end
# Return a new PostQuery with aliases replaced, implicit metatags added, and the query converted to conjunctive normal form.
def normalize
replace_aliases.with_implicit_metatags.to_cnf
# Return a new PostQuery with unnecessary AND and OR clauses eliminated.
def trim
build(ast.trim)
end
# Return a new PostQuery with aliases replaced.
@@ -115,5 +166,75 @@ class PostQuery
hide_deleted_posts? && !has_status_metatag
end
memoize :tags, :normalize, :replace_aliases, :with_implicit_metatags, :to_cnf, :aliases, :implicit_metatags, :hide_deleted?
concerning :CountMethods do
def post_count
@post_count ||= fast_count
end
# Return an estimate of the number of posts returned by the search. By default, we try to use an
# estimated or cached count before doing an exact count.
#
# @param timeout [Integer] The database timeout in milliseconds
# @param estimate_count [Boolean] If true, estimate the count with inexact methods.
# @param skip_cache [Boolean] If true, don't use the cached count.
# @return [Integer, nil] The number of posts, or nil on timeout.
def fast_count(timeout: 1_000, estimate_count: true, skip_cache: false)
count = nil
count = estimated_count if estimate_count
count = cached_count(timeout) if count.nil? && !skip_cache
count = exact_count(timeout) if count.nil? && skip_cache
count
end
def estimated_count
if is_empty_search?
estimated_row_count
elsif is_simple_tag?
tag.try(:post_count)
elsif is_metatag?(:rating)
estimated_row_count
elsif is_metatag?(:pool) || is_metatag?(:ordpool)
name = find_metatag(:pool, :ordpool)
Pool.find_by_name(name)&.post_count || 0
elsif is_metatag?(:fav) || is_metatag?(:ordfav)
name = find_metatag(:fav, :ordfav)
user = User.find_by_name(name)
if user.nil?
0
elsif Pundit.policy!(current_user, user).can_see_favorites?
user.favorite_count
else
nil
end
end
end
# Estimate the count by parsing the Postgres EXPLAIN output.
def estimated_row_count
ExplainParser.new(posts).row_count
end
def cached_count(timeout, duration: 5.minutes)
Cache.get(count_cache_key, duration) do
exact_count(timeout)
end
end
def exact_count(timeout)
Post.with_timeout(timeout) do
posts.count
end
end
def count_cache_key
if is_user_dependent_search?
"pfc[#{current_user.id.to_i}]:#{to_s}"
else
"pfc:#{to_s}"
end
end
end
memoize :tags, :replace_aliases, :with_implicit_metatags, :to_cnf, :aliases, :implicit_metatags, :hide_deleted?
end