post queries: fix parsing of trailing parentheses.

Fix queries like `(fate_(series) saber)` being parsed as `fate_(series` + `saber)`
instead of `fate_(series)` + `saber`.

This is pretty hacky. We assume that parentheses in tags are balanced.
So the rule is that trailing parentheses are part of the tag as long as
they're balanced, and not part of the tag if they're unbalanced.
This commit is contained in:
evazion
2022-04-07 21:24:52 -05:00
parent 5f1c296011
commit c45d1d42c2
2 changed files with 86 additions and 21 deletions

View File

@@ -154,27 +154,32 @@ class PostQuery
end end
end end
# term = metatag | tag | wildcard
def term def term
metatag || wildcard || tag one_of [
method(:tag),
method(:metatag),
method(:wildcard),
]
end end
# metatag = metatag_name ":" quoted_string # metatag = metatag_name ":" quoted_string
# metatag_name = "user" | "fav" | "pool" | "order" | ... # metatag_name = "user" | "fav" | "pool" | "order" | ...
def metatag def metatag
if accept(METATAG_NAME_REGEX) name = expect(METATAG_NAME_REGEX)
name = @scanner.matched.delete_suffix(":").downcase quoted, value = quoted_string
name = name.singularize + "_count" if name.in?(PostQueryBuilder::COUNT_METATAG_SYNONYMS)
quoted, value = quoted_string
if name == "order" name = name.delete_suffix(":").downcase
attribute, direction, _tail = value.to_s.downcase.partition(/_(asc|desc)\z/i) name = name.singularize + "_count" if name.in?(PostQueryBuilder::COUNT_METATAG_SYNONYMS)
if attribute.in?(PostQueryBuilder::COUNT_METATAG_SYNONYMS)
value = attribute.singularize + "_count" + direction if name == "order"
end attribute, direction, _tail = value.to_s.downcase.partition(/_(asc|desc)\z/i)
if attribute.in?(PostQueryBuilder::COUNT_METATAG_SYNONYMS)
value = attribute.singularize + "_count" + direction
end end
node(:metatag, name, value, quoted)
end end
node(:metatag, name, value, quoted)
end end
def quoted_string def quoted_string
@@ -193,26 +198,27 @@ class PostQuery
# A wildcard is a string that contains a '*' character and that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters. # A wildcard is a string that contains a '*' character and that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters.
def wildcard def wildcard
if t = accept(/(?=[^ ]*\*)[^ \)~-][^ ]*/) t = string(/(?=[^ ]*\*)[^ \)~-][^ ]*/, skip_balanced_parens: true)
space raise Error if t.match?(/\A#{METATAG_NAME_REGEX}/)
node(:wildcard, t.downcase) space
end node(:wildcard, t.downcase)
end end
# A tag is a string that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters. # A tag is a string that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters.
def tag def tag
t = string(/[^ \)~-][^ ]*/) t = string(/[^ \)~-][^ ]*/, skip_balanced_parens: true)
raise Error if t.downcase.in?(%w[and or]) raise Error if t.downcase.in?(%w[and or]) || t.include?("*") || t.match?(/\A#{METATAG_NAME_REGEX}/)
space space
node(:tag, t.downcase) node(:tag, t.downcase)
end end
def string(pattern) def string(pattern, skip_balanced_parens: false)
str = expect(pattern) str = expect(pattern)
# XXX: Now put back any trailing right parens we mistakenly consumed. # XXX: Now put back any trailing right parens we mistakenly consumed.
n = @unclosed_parens n = @unclosed_parens
while n > 0 && str.ends_with?(")") while n > 0 && str.ends_with?(")")
break if skip_balanced_parens && (str.has_balanced_parens? || str.in?(Tag::PERMITTED_UNBALANCED_TAGS))
str.chop! str.chop!
scanner.pos -= 1 scanner.pos -= 1
n -= 1 n -= 1
@@ -276,6 +282,17 @@ class PostQuery
[first, *rest] [first, *rest]
end end
# Given a list of parsers, return the first one that succeeds.
def one_of(parsers)
parsers.each do |parser|
return backtrack { parser.call }
rescue Error
next
end
raise Error, "expected one of: #{parsers}"
end
# Build an AST node of the given type. # Build an AST node of the given type.
def node(type, *args) def node(type, *args)
AST.new(type, args) AST.new(type, args)

View File

@@ -19,14 +19,62 @@ class PostQueryParserTest < ActiveSupport::TestCase
assert_parse_equals("a", "a") assert_parse_equals("a", "a")
assert_parse_equals("a", "A") assert_parse_equals("a", "A")
assert_parse_equals(";)", ";)")
assert_parse_equals("9", "(9)")
end
should "parse parentheses correctly" do
assert_parse_equals("foo_(bar)", "foo_(bar)") assert_parse_equals("foo_(bar)", "foo_(bar)")
assert_parse_equals("foo_(bar)", "(foo_(bar))") assert_parse_equals("foo_(bar)", "(foo_(bar))")
assert_parse_equals("foo_(bar)", "((foo_(bar)))")
assert_parse_equals("foo_(bar_(baz))", "foo_(bar_(baz))") assert_parse_equals("foo_(bar_(baz))", "foo_(bar_(baz))")
assert_parse_equals("foo_(bar_(baz))", "(foo_(bar_(baz)))") assert_parse_equals("foo_(bar_(baz))", "(foo_(bar_(baz)))")
assert_parse_equals("foo_(bar_baz))", "(foo_(bar_baz)))")
assert_parse_equals(";)", ";)") assert_parse_equals("(and abc_(def) ghi)", "abc_(def) ghi")
assert_parse_equals("9", "(9)") assert_parse_equals("(and abc_(def) ghi)", "(abc_(def) ghi)")
assert_parse_equals("(and abc_(def) ghi)", "((abc_(def)) ghi)")
assert_parse_equals("(and abc def_(ghi))", "abc def_(ghi)")
assert_parse_equals("(and abc def_(ghi))", "(abc def_(ghi))")
assert_parse_equals("(and abc def_(ghi))", "(abc (def_(ghi)))")
assert_parse_equals("(and abc_(def) ghi_(jkl))", "abc_(def) ghi_(jkl)")
assert_parse_equals("(and abc_(def) ghi_(jkl))", "(abc_(def) ghi_(jkl))")
assert_parse_equals(":)", ":)")
assert_parse_equals(":)", "(:))")
assert_parse_equals("none", "(:)")
assert_parse_equals("(and :) >:))", "(:) >:))")
assert_parse_equals("none", "(:) >:)")
assert_parse_equals("(wildcard *))", "*)")
assert_parse_equals("(wildcard *)", "(*)")
assert_parse_equals("(wildcard foo*)", "(foo*)")
assert_parse_equals("(wildcard foo*))", "foo*)")
assert_parse_equals("(and bar (wildcard foo*)))", "foo*) bar")
assert_parse_equals("(and bar (wildcard foo*))", "(foo*) bar")
assert_parse_equals("(and bar) (wildcard foo*))", "(foo*) bar)")
assert_parse_equals("(wildcard *_(foo))", "*_(foo)")
assert_parse_equals("(wildcard *_(foo))", "(*_(foo))")
assert_parse_equals("(and bar (wildcard *_(foo)))", "(*_(foo) bar)")
assert_parse_equals("(and bar (wildcard *_(foo)))", "((*_(foo)) bar)")
assert_parse_equals("(and bar (wildcard *_(foo)))", "(bar *_(foo))")
assert_parse_equals("(and bar (wildcard *_(foo)))", "(bar (*_(foo)))")
assert_parse_equals("note:a", "(note:a)")
assert_parse_equals("note:(a", "(note:(a)")
assert_parse_equals("note:(a)", "(note:(a))")
assert_parse_equals("(and note:a note:b)", "(note:a note:b)")
assert_parse_equals("(and note:a note:b))", "(note:a) note:b)")
assert_parse_equals('(and note:"a)" note:b)', '(note:"a)" note:b)')
end end
should "parse basic queries correctly" do should "parse basic queries correctly" do