From c45d1d42c2cdf146743b814bfcdad218c2583f23 Mon Sep 17 00:00:00 2001 From: evazion Date: Thu, 7 Apr 2022 21:24:52 -0500 Subject: [PATCH] post queries: fix parsing of trailing parentheses. Fix queries like `(fate_(series) saber)` being parsed as `fate_(series` + `saber)` instead of `fate_(series)` + `saber`. This is pretty hacky. We assume that parentheses in tags are balanced. So the rule is that trailing parentheses are part of the tag as long as they're balanced, and not part of the tag if they're unbalanced. --- app/logical/post_query/parser.rb | 55 +++++++++++++++++++---------- test/unit/post_query_parser_test.rb | 52 +++++++++++++++++++++++++-- 2 files changed, 86 insertions(+), 21 deletions(-) diff --git a/app/logical/post_query/parser.rb b/app/logical/post_query/parser.rb index c09cd36a6..736e9e40c 100644 --- a/app/logical/post_query/parser.rb +++ b/app/logical/post_query/parser.rb @@ -154,27 +154,32 @@ class PostQuery end end + # term = metatag | tag | wildcard def term - metatag || wildcard || tag + one_of [ + method(:tag), + method(:metatag), + method(:wildcard), + ] end # metatag = metatag_name ":" quoted_string # metatag_name = "user" | "fav" | "pool" | "order" | ... def metatag - if accept(METATAG_NAME_REGEX) - name = @scanner.matched.delete_suffix(":").downcase - name = name.singularize + "_count" if name.in?(PostQueryBuilder::COUNT_METATAG_SYNONYMS) - quoted, value = quoted_string + name = expect(METATAG_NAME_REGEX) + quoted, value = quoted_string - if name == "order" - attribute, direction, _tail = value.to_s.downcase.partition(/_(asc|desc)\z/i) - if attribute.in?(PostQueryBuilder::COUNT_METATAG_SYNONYMS) - value = attribute.singularize + "_count" + direction - end + name = name.delete_suffix(":").downcase + name = name.singularize + "_count" if name.in?(PostQueryBuilder::COUNT_METATAG_SYNONYMS) + + if name == "order" + attribute, direction, _tail = value.to_s.downcase.partition(/_(asc|desc)\z/i) + if attribute.in?(PostQueryBuilder::COUNT_METATAG_SYNONYMS) + value = attribute.singularize + "_count" + direction end - - node(:metatag, name, value, quoted) end + + node(:metatag, name, value, quoted) end def quoted_string @@ -193,26 +198,27 @@ class PostQuery # A wildcard is a string that contains a '*' character and that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters. def wildcard - if t = accept(/(?=[^ ]*\*)[^ \)~-][^ ]*/) - space - node(:wildcard, t.downcase) - end + t = string(/(?=[^ ]*\*)[^ \)~-][^ ]*/, skip_balanced_parens: true) + raise Error if t.match?(/\A#{METATAG_NAME_REGEX}/) + space + node(:wildcard, t.downcase) end # A tag is a string that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters. def tag - t = string(/[^ \)~-][^ ]*/) - raise Error if t.downcase.in?(%w[and or]) + t = string(/[^ \)~-][^ ]*/, skip_balanced_parens: true) + raise Error if t.downcase.in?(%w[and or]) || t.include?("*") || t.match?(/\A#{METATAG_NAME_REGEX}/) space node(:tag, t.downcase) end - def string(pattern) + def string(pattern, skip_balanced_parens: false) str = expect(pattern) # XXX: Now put back any trailing right parens we mistakenly consumed. n = @unclosed_parens while n > 0 && str.ends_with?(")") + break if skip_balanced_parens && (str.has_balanced_parens? || str.in?(Tag::PERMITTED_UNBALANCED_TAGS)) str.chop! scanner.pos -= 1 n -= 1 @@ -276,6 +282,17 @@ class PostQuery [first, *rest] end + # Given a list of parsers, return the first one that succeeds. + def one_of(parsers) + parsers.each do |parser| + return backtrack { parser.call } + rescue Error + next + end + + raise Error, "expected one of: #{parsers}" + end + # Build an AST node of the given type. def node(type, *args) AST.new(type, args) diff --git a/test/unit/post_query_parser_test.rb b/test/unit/post_query_parser_test.rb index 4301f226c..72a325c38 100644 --- a/test/unit/post_query_parser_test.rb +++ b/test/unit/post_query_parser_test.rb @@ -19,14 +19,62 @@ class PostQueryParserTest < ActiveSupport::TestCase assert_parse_equals("a", "a") assert_parse_equals("a", "A") + assert_parse_equals(";)", ";)") + assert_parse_equals("9", "(9)") + end + + should "parse parentheses correctly" do assert_parse_equals("foo_(bar)", "foo_(bar)") assert_parse_equals("foo_(bar)", "(foo_(bar))") + assert_parse_equals("foo_(bar)", "((foo_(bar)))") assert_parse_equals("foo_(bar_(baz))", "foo_(bar_(baz))") assert_parse_equals("foo_(bar_(baz))", "(foo_(bar_(baz)))") + assert_parse_equals("foo_(bar_baz))", "(foo_(bar_baz)))") - assert_parse_equals(";)", ";)") - assert_parse_equals("9", "(9)") + assert_parse_equals("(and abc_(def) ghi)", "abc_(def) ghi") + assert_parse_equals("(and abc_(def) ghi)", "(abc_(def) ghi)") + assert_parse_equals("(and abc_(def) ghi)", "((abc_(def)) ghi)") + + assert_parse_equals("(and abc def_(ghi))", "abc def_(ghi)") + assert_parse_equals("(and abc def_(ghi))", "(abc def_(ghi))") + assert_parse_equals("(and abc def_(ghi))", "(abc (def_(ghi)))") + + assert_parse_equals("(and abc_(def) ghi_(jkl))", "abc_(def) ghi_(jkl)") + assert_parse_equals("(and abc_(def) ghi_(jkl))", "(abc_(def) ghi_(jkl))") + + assert_parse_equals(":)", ":)") + assert_parse_equals(":)", "(:))") + assert_parse_equals("none", "(:)") + + assert_parse_equals("(and :) >:))", "(:) >:))") + assert_parse_equals("none", "(:) >:)") + + assert_parse_equals("(wildcard *))", "*)") + assert_parse_equals("(wildcard *)", "(*)") + + assert_parse_equals("(wildcard foo*)", "(foo*)") + assert_parse_equals("(wildcard foo*))", "foo*)") + + assert_parse_equals("(and bar (wildcard foo*)))", "foo*) bar") + assert_parse_equals("(and bar (wildcard foo*))", "(foo*) bar") + assert_parse_equals("(and bar) (wildcard foo*))", "(foo*) bar)") + + assert_parse_equals("(wildcard *_(foo))", "*_(foo)") + assert_parse_equals("(wildcard *_(foo))", "(*_(foo))") + + assert_parse_equals("(and bar (wildcard *_(foo)))", "(*_(foo) bar)") + assert_parse_equals("(and bar (wildcard *_(foo)))", "((*_(foo)) bar)") + assert_parse_equals("(and bar (wildcard *_(foo)))", "(bar *_(foo))") + assert_parse_equals("(and bar (wildcard *_(foo)))", "(bar (*_(foo)))") + + assert_parse_equals("note:a", "(note:a)") + assert_parse_equals("note:(a", "(note:(a)") + assert_parse_equals("note:(a)", "(note:(a))") + + assert_parse_equals("(and note:a note:b)", "(note:a note:b)") + assert_parse_equals("(and note:a note:b))", "(note:a) note:b)") + assert_parse_equals('(and note:"a)" note:b)', '(note:"a)" note:b)') end should "parse basic queries correctly" do