Fix metatags not showing autocomplete results until after the first letter was typed. For example, typing `filetype:` didn't show any completions until another letter was typed. Now typing `filetype:` shows all available file types. This was because `filetype:` by itself wasn't recognized as a valid search before, since metatags always required a value. Now it is a valid search, so it's technically possible to search for `filetype:` by itself. In this case the metatag value will be the empty string, which will return no results because there are no posts where the filetype is the empty string. This sounds nonsensical, but it's potentially useful for metatags like the `source:` metatag, where searching for posts with an empty source does make sense. It was also technically possible before by searching for `source:""`, so making the value optional doesn't change anything.
228 lines
5.8 KiB
Ruby
228 lines
5.8 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
# A PostQuery::Parser parses a search string into a PostQuery::AST.
|
|
#
|
|
# @example
|
|
#
|
|
# ast = PostQuery.new("1girl or 1boy").parse
|
|
#
|
|
# Grammar:
|
|
#
|
|
# root = or_clause [root]
|
|
# or_clause = and_clause "or" or_clause
|
|
# | and_clause
|
|
# and_clause = factor_list "and" and_clause
|
|
# | factor_list
|
|
# factor_list = factor [factor_list]
|
|
# factor = "-" expr
|
|
# | "~" expr
|
|
# | expr
|
|
# expr = "(" or_clause ")" | term
|
|
# term = metatag | tag | wildcard
|
|
# metatag = metatag_name ":" quoted_string
|
|
# metatag_name = "user" | "fav" | "pool" | "order" | ...
|
|
# quoted_string = '"' /[^"]+/ '"'
|
|
# | "'" /[^']+/ "'"
|
|
# tag = /[^ *]+/
|
|
# wildcard = /[^ ]+/
|
|
|
|
class PostQuery
|
|
class Parser
|
|
extend Memoist
|
|
|
|
METATAG_NAME_REGEX = /(#{PostQueryBuilder::METATAGS.join("|")}):/i
|
|
|
|
attr_reader :parser
|
|
delegate :error, :rest, :eos?, :accept, :expect, :rewind, :zero_or_more, :one_or_more, :one_of, to: :parser
|
|
|
|
# @param input [String] The search string to parse.
|
|
def initialize(input)
|
|
@parser = StringParser.new(input, state: 0) # 0 is the initial number of unclosed parens.
|
|
end
|
|
|
|
# Parse a search and return the AST.
|
|
#
|
|
# @param string [String] The search string to parse.
|
|
# @returns [PostQuery::AST] The AST of the parsed search.
|
|
def self.parse(string)
|
|
new(string).parse
|
|
end
|
|
|
|
concerning :ParserMethods do
|
|
# Parse the search and return the AST, or return a search that matches nothing if the parse failed.
|
|
#
|
|
# @return [PostQuery::AST] The AST of the parsed search.
|
|
def parse
|
|
parse!
|
|
rescue StringParser::Error
|
|
AST.none
|
|
end
|
|
|
|
# Parse the search and return the AST, or raise an error if the parse failed.
|
|
#
|
|
# @return [PostQuery::AST] The AST of the parsed search.
|
|
def parse!
|
|
ast = root
|
|
error("Unexpected EOS (rest: '#{rest}')") unless eos?
|
|
error("Unclosed parentheses (#{unclosed_parens})") unless unclosed_parens == 0
|
|
ast
|
|
end
|
|
|
|
private
|
|
|
|
# root = or_clause [root]
|
|
def root
|
|
a = zero_or_more { or_clause }
|
|
space
|
|
|
|
if a.empty?
|
|
AST.all
|
|
elsif a.size == 1
|
|
a.first
|
|
else
|
|
AST.new(:and, a)
|
|
end
|
|
end
|
|
|
|
# or_clause = and_clause "or" or_clause | and_clause
|
|
def or_clause
|
|
a = and_clause
|
|
|
|
space
|
|
if accept(/or +/i)
|
|
b = or_clause
|
|
AST.new(:or, [a, b])
|
|
else
|
|
a
|
|
end
|
|
end
|
|
|
|
# and_clause = factor_list "and" and_clause | factor_list
|
|
def and_clause
|
|
a = factor_list
|
|
|
|
space
|
|
if accept(/and +/i)
|
|
b = and_clause
|
|
AST.new(:and, [a, b])
|
|
else
|
|
a
|
|
end
|
|
end
|
|
|
|
# factor_list = factor [factor_list]
|
|
def factor_list
|
|
a = one_or_more { factor }
|
|
AST.new(:and, a)
|
|
end
|
|
|
|
# factor = "-" expr | "~" expr | expr
|
|
def factor
|
|
space
|
|
|
|
if accept("-")
|
|
AST.not(expr)
|
|
elsif accept("~")
|
|
AST.opt(expr)
|
|
else
|
|
expr
|
|
end
|
|
end
|
|
|
|
# expr = "(" or_clause ")" | term
|
|
def expr
|
|
space
|
|
|
|
if accept("(")
|
|
self.unclosed_parens += 1
|
|
a = or_clause
|
|
expect(")")
|
|
self.unclosed_parens -= 1
|
|
a
|
|
else
|
|
term
|
|
end
|
|
end
|
|
|
|
# term = metatag | tag | wildcard
|
|
def term
|
|
one_of [
|
|
method(:tag),
|
|
method(:metatag),
|
|
method(:wildcard),
|
|
]
|
|
end
|
|
|
|
# metatag = metatag_name ":" quoted_string
|
|
# metatag_name = "user" | "fav" | "pool" | "order" | ...
|
|
def metatag
|
|
name = expect(METATAG_NAME_REGEX).delete_suffix(":")
|
|
quoted, value = quoted_string
|
|
|
|
AST.metatag(name, value, quoted)
|
|
end
|
|
|
|
def quoted_string
|
|
if accept('"')
|
|
a = accept(/([^"\\]|\\")*/).gsub(/\\"/, '"') # handle backslash escaped quotes
|
|
expect('"')
|
|
[true, a]
|
|
elsif accept("'")
|
|
a = accept(/([^'\\]|\\')*/).gsub(/\\'/, "'") # handle backslash escaped quotes
|
|
expect("'")
|
|
[true, a]
|
|
else
|
|
[false, string(/[^ ]*/)]
|
|
end
|
|
end
|
|
|
|
# A wildcard is a string that contains a '*' character and that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters.
|
|
def wildcard
|
|
t = string(/(?=[^ ]*\*)[^ \)~-][^ ]*/, skip_balanced_parens: true)
|
|
error("Invalid tag name: #{t}") if t.match?(/\A#{METATAG_NAME_REGEX}/)
|
|
space
|
|
AST.wildcard(t)
|
|
end
|
|
|
|
# A tag is a string that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters.
|
|
def tag
|
|
t = string(/[^ \)~-][^ ]*/, skip_balanced_parens: true)
|
|
error("Invalid tag name: #{t}") if t.downcase.in?(%w[and or]) || t.include?("*") || t.match?(/\A#{METATAG_NAME_REGEX}/)
|
|
space
|
|
AST.tag(t)
|
|
end
|
|
|
|
def string(pattern, skip_balanced_parens: false)
|
|
str = expect(pattern)
|
|
|
|
# XXX: Now put back any trailing right parens we mistakenly consumed.
|
|
n = unclosed_parens
|
|
while n > 0 && str.ends_with?(")")
|
|
break if skip_balanced_parens && (str.has_balanced_parens? || str.in?(Tag::PERMITTED_UNBALANCED_TAGS))
|
|
str.chop!
|
|
rewind
|
|
n -= 1
|
|
end
|
|
|
|
str
|
|
end
|
|
|
|
def space
|
|
expect(/ */)
|
|
end
|
|
end
|
|
|
|
# The current number of '(' characters without a matching ')'. Used for
|
|
# determining whether a trailing ')' is part of a tag or not.
|
|
private def unclosed_parens
|
|
parser.state
|
|
end
|
|
|
|
private def unclosed_parens=(n)
|
|
parser.state = n
|
|
end
|
|
|
|
memoize :parse, :parse!
|
|
end
|
|
end
|