post queries: fix parsing of trailing parentheses.

Fix queries like `(fate_(series) saber)` being parsed as `fate_(series` + `saber)`
instead of `fate_(series)` + `saber`.

This is pretty hacky. We assume that parentheses in tags are balanced.
So the rule is that trailing parentheses are part of the tag as long as
they're balanced, and not part of the tag if they're unbalanced.
This commit is contained in:
evazion
2022-04-07 21:24:52 -05:00
parent 5f1c296011
commit c45d1d42c2
2 changed files with 86 additions and 21 deletions

View File

@@ -154,18 +154,24 @@ class PostQuery
end
end
# term = metatag | tag | wildcard
def term
metatag || wildcard || tag
one_of [
method(:tag),
method(:metatag),
method(:wildcard),
]
end
# metatag = metatag_name ":" quoted_string
# metatag_name = "user" | "fav" | "pool" | "order" | ...
def metatag
if accept(METATAG_NAME_REGEX)
name = @scanner.matched.delete_suffix(":").downcase
name = name.singularize + "_count" if name.in?(PostQueryBuilder::COUNT_METATAG_SYNONYMS)
name = expect(METATAG_NAME_REGEX)
quoted, value = quoted_string
name = name.delete_suffix(":").downcase
name = name.singularize + "_count" if name.in?(PostQueryBuilder::COUNT_METATAG_SYNONYMS)
if name == "order"
attribute, direction, _tail = value.to_s.downcase.partition(/_(asc|desc)\z/i)
if attribute.in?(PostQueryBuilder::COUNT_METATAG_SYNONYMS)
@@ -175,7 +181,6 @@ class PostQuery
node(:metatag, name, value, quoted)
end
end
def quoted_string
if accept('"')
@@ -193,26 +198,27 @@ class PostQuery
# A wildcard is a string that contains a '*' character and that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters.
def wildcard
if t = accept(/(?=[^ ]*\*)[^ \)~-][^ ]*/)
t = string(/(?=[^ ]*\*)[^ \)~-][^ ]*/, skip_balanced_parens: true)
raise Error if t.match?(/\A#{METATAG_NAME_REGEX}/)
space
node(:wildcard, t.downcase)
end
end
# A tag is a string that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters.
def tag
t = string(/[^ \)~-][^ ]*/)
raise Error if t.downcase.in?(%w[and or])
t = string(/[^ \)~-][^ ]*/, skip_balanced_parens: true)
raise Error if t.downcase.in?(%w[and or]) || t.include?("*") || t.match?(/\A#{METATAG_NAME_REGEX}/)
space
node(:tag, t.downcase)
end
def string(pattern)
def string(pattern, skip_balanced_parens: false)
str = expect(pattern)
# XXX: Now put back any trailing right parens we mistakenly consumed.
n = @unclosed_parens
while n > 0 && str.ends_with?(")")
break if skip_balanced_parens && (str.has_balanced_parens? || str.in?(Tag::PERMITTED_UNBALANCED_TAGS))
str.chop!
scanner.pos -= 1
n -= 1
@@ -276,6 +282,17 @@ class PostQuery
[first, *rest]
end
# Given a list of parsers, return the first one that succeeds.
def one_of(parsers)
parsers.each do |parser|
return backtrack { parser.call }
rescue Error
next
end
raise Error, "expected one of: #{parsers}"
end
# Build an AST node of the given type.
def node(type, *args)
AST.new(type, args)

View File

@@ -19,14 +19,62 @@ class PostQueryParserTest < ActiveSupport::TestCase
assert_parse_equals("a", "a")
assert_parse_equals("a", "A")
assert_parse_equals(";)", ";)")
assert_parse_equals("9", "(9)")
end
should "parse parentheses correctly" do
assert_parse_equals("foo_(bar)", "foo_(bar)")
assert_parse_equals("foo_(bar)", "(foo_(bar))")
assert_parse_equals("foo_(bar)", "((foo_(bar)))")
assert_parse_equals("foo_(bar_(baz))", "foo_(bar_(baz))")
assert_parse_equals("foo_(bar_(baz))", "(foo_(bar_(baz)))")
assert_parse_equals("foo_(bar_baz))", "(foo_(bar_baz)))")
assert_parse_equals(";)", ";)")
assert_parse_equals("9", "(9)")
assert_parse_equals("(and abc_(def) ghi)", "abc_(def) ghi")
assert_parse_equals("(and abc_(def) ghi)", "(abc_(def) ghi)")
assert_parse_equals("(and abc_(def) ghi)", "((abc_(def)) ghi)")
assert_parse_equals("(and abc def_(ghi))", "abc def_(ghi)")
assert_parse_equals("(and abc def_(ghi))", "(abc def_(ghi))")
assert_parse_equals("(and abc def_(ghi))", "(abc (def_(ghi)))")
assert_parse_equals("(and abc_(def) ghi_(jkl))", "abc_(def) ghi_(jkl)")
assert_parse_equals("(and abc_(def) ghi_(jkl))", "(abc_(def) ghi_(jkl))")
assert_parse_equals(":)", ":)")
assert_parse_equals(":)", "(:))")
assert_parse_equals("none", "(:)")
assert_parse_equals("(and :) >:))", "(:) >:))")
assert_parse_equals("none", "(:) >:)")
assert_parse_equals("(wildcard *))", "*)")
assert_parse_equals("(wildcard *)", "(*)")
assert_parse_equals("(wildcard foo*)", "(foo*)")
assert_parse_equals("(wildcard foo*))", "foo*)")
assert_parse_equals("(and bar (wildcard foo*)))", "foo*) bar")
assert_parse_equals("(and bar (wildcard foo*))", "(foo*) bar")
assert_parse_equals("(and bar) (wildcard foo*))", "(foo*) bar)")
assert_parse_equals("(wildcard *_(foo))", "*_(foo)")
assert_parse_equals("(wildcard *_(foo))", "(*_(foo))")
assert_parse_equals("(and bar (wildcard *_(foo)))", "(*_(foo) bar)")
assert_parse_equals("(and bar (wildcard *_(foo)))", "((*_(foo)) bar)")
assert_parse_equals("(and bar (wildcard *_(foo)))", "(bar *_(foo))")
assert_parse_equals("(and bar (wildcard *_(foo)))", "(bar (*_(foo)))")
assert_parse_equals("note:a", "(note:a)")
assert_parse_equals("note:(a", "(note:(a)")
assert_parse_equals("note:(a)", "(note:(a))")
assert_parse_equals("(and note:a note:b)", "(note:a note:b)")
assert_parse_equals("(and note:a note:b))", "(note:a) note:b)")
assert_parse_equals('(and note:"a)" note:b)', '(note:"a)" note:b)')
end
should "parse basic queries correctly" do