Files
danbooru/app/logical/post_query/parser.rb
evazion 906ac25221 autocomplete: fix metatags not completing until a character is typed.
Fix metatags not showing autocomplete results until after the first
letter was typed. For example, typing `filetype:` didn't show any
completions until another letter was typed. Now typing `filetype:` shows
all available file types.

This was because `filetype:` by itself wasn't recognized as a valid
search before, since metatags always required a value. Now it is a valid
search, so it's technically possible to search for `filetype:` by
itself. In this case the metatag value will be the empty string, which
will return no results because there are no posts where the filetype is
the empty string.

This sounds nonsensical, but it's potentially useful for metatags like
the `source:` metatag, where searching for posts with an empty source
does make sense. It was also technically possible before by searching
for `source:""`, so making the value optional doesn't change anything.
2022-04-29 22:22:11 -05:00

228 lines
5.8 KiB
Ruby

# frozen_string_literal: true
# A PostQuery::Parser parses a search string into a PostQuery::AST.
#
# @example
#
# ast = PostQuery.new("1girl or 1boy").parse
#
# Grammar:
#
# root = or_clause [root]
# or_clause = and_clause "or" or_clause
# | and_clause
# and_clause = factor_list "and" and_clause
# | factor_list
# factor_list = factor [factor_list]
# factor = "-" expr
# | "~" expr
# | expr
# expr = "(" or_clause ")" | term
# term = metatag | tag | wildcard
# metatag = metatag_name ":" quoted_string
# metatag_name = "user" | "fav" | "pool" | "order" | ...
# quoted_string = '"' /[^"]+/ '"'
# | "'" /[^']+/ "'"
# tag = /[^ *]+/
# wildcard = /[^ ]+/
class PostQuery
class Parser
extend Memoist
METATAG_NAME_REGEX = /(#{PostQueryBuilder::METATAGS.join("|")}):/i
attr_reader :parser
delegate :error, :rest, :eos?, :accept, :expect, :rewind, :zero_or_more, :one_or_more, :one_of, to: :parser
# @param input [String] The search string to parse.
def initialize(input)
@parser = StringParser.new(input, state: 0) # 0 is the initial number of unclosed parens.
end
# Parse a search and return the AST.
#
# @param string [String] The search string to parse.
# @returns [PostQuery::AST] The AST of the parsed search.
def self.parse(string)
new(string).parse
end
concerning :ParserMethods do
# Parse the search and return the AST, or return a search that matches nothing if the parse failed.
#
# @return [PostQuery::AST] The AST of the parsed search.
def parse
parse!
rescue StringParser::Error
AST.none
end
# Parse the search and return the AST, or raise an error if the parse failed.
#
# @return [PostQuery::AST] The AST of the parsed search.
def parse!
ast = root
error("Unexpected EOS (rest: '#{rest}')") unless eos?
error("Unclosed parentheses (#{unclosed_parens})") unless unclosed_parens == 0
ast
end
private
# root = or_clause [root]
def root
a = zero_or_more { or_clause }
space
if a.empty?
AST.all
elsif a.size == 1
a.first
else
AST.new(:and, a)
end
end
# or_clause = and_clause "or" or_clause | and_clause
def or_clause
a = and_clause
space
if accept(/or +/i)
b = or_clause
AST.new(:or, [a, b])
else
a
end
end
# and_clause = factor_list "and" and_clause | factor_list
def and_clause
a = factor_list
space
if accept(/and +/i)
b = and_clause
AST.new(:and, [a, b])
else
a
end
end
# factor_list = factor [factor_list]
def factor_list
a = one_or_more { factor }
AST.new(:and, a)
end
# factor = "-" expr | "~" expr | expr
def factor
space
if accept("-")
AST.not(expr)
elsif accept("~")
AST.opt(expr)
else
expr
end
end
# expr = "(" or_clause ")" | term
def expr
space
if accept("(")
self.unclosed_parens += 1
a = or_clause
expect(")")
self.unclosed_parens -= 1
a
else
term
end
end
# term = metatag | tag | wildcard
def term
one_of [
method(:tag),
method(:metatag),
method(:wildcard),
]
end
# metatag = metatag_name ":" quoted_string
# metatag_name = "user" | "fav" | "pool" | "order" | ...
def metatag
name = expect(METATAG_NAME_REGEX).delete_suffix(":")
quoted, value = quoted_string
AST.metatag(name, value, quoted)
end
def quoted_string
if accept('"')
a = accept(/([^"\\]|\\")*/).gsub(/\\"/, '"') # handle backslash escaped quotes
expect('"')
[true, a]
elsif accept("'")
a = accept(/([^'\\]|\\')*/).gsub(/\\'/, "'") # handle backslash escaped quotes
expect("'")
[true, a]
else
[false, string(/[^ ]*/)]
end
end
# A wildcard is a string that contains a '*' character and that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters.
def wildcard
t = string(/(?=[^ ]*\*)[^ \)~-][^ ]*/, skip_balanced_parens: true)
error("Invalid tag name: #{t}") if t.match?(/\A#{METATAG_NAME_REGEX}/)
space
AST.wildcard(t)
end
# A tag is a string that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters.
def tag
t = string(/[^ \)~-][^ ]*/, skip_balanced_parens: true)
error("Invalid tag name: #{t}") if t.downcase.in?(%w[and or]) || t.include?("*") || t.match?(/\A#{METATAG_NAME_REGEX}/)
space
AST.tag(t)
end
def string(pattern, skip_balanced_parens: false)
str = expect(pattern)
# XXX: Now put back any trailing right parens we mistakenly consumed.
n = unclosed_parens
while n > 0 && str.ends_with?(")")
break if skip_balanced_parens && (str.has_balanced_parens? || str.in?(Tag::PERMITTED_UNBALANCED_TAGS))
str.chop!
rewind
n -= 1
end
str
end
def space
expect(/ */)
end
end
# The current number of '(' characters without a matching ')'. Used for
# determining whether a trailing ')' is part of a tag or not.
private def unclosed_parens
parser.state
end
private def unclosed_parens=(n)
parser.state = n
end
memoize :parse, :parse!
end
end