Factor out StringParser from PostQuery::Parser.

Factor out StringParser class from PostQuery::Parser so it can be used
for other parsing tasks.
This commit is contained in:
evazion
2022-04-26 22:14:14 -05:00
parent 9eaea22fac
commit d4b448dd94
2 changed files with 120 additions and 81 deletions

View File

@@ -1,7 +1,5 @@
# frozen_string_literal: true # frozen_string_literal: true
require "strscan"
# A PostQuery::Parser parses a search string into a PostQuery::AST. # A PostQuery::Parser parses a search string into a PostQuery::AST.
# #
# @example # @example
@@ -27,27 +25,19 @@ require "strscan"
# | "'" /[^']+/ "'" # | "'" /[^']+/ "'"
# tag = /[^ *]+/ # tag = /[^ *]+/
# wildcard = /[^ ]+/ # wildcard = /[^ ]+/
#
# Ref:
#
# * https://hmac.dev/posts/2019-05-19-ruby-parser-combinators.html
class PostQuery class PostQuery
class Parser class Parser
extend Memoist extend Memoist
class Error < StandardError; end
METATAG_NAME_REGEX = /(#{PostQueryBuilder::METATAGS.join("|")}):/i METATAG_NAME_REGEX = /(#{PostQueryBuilder::METATAGS.join("|")}):/i
attr_reader :input attr_reader :parser
private attr_reader :scanner, :unclosed_parens delegate :error, :rest, :eos?, :accept, :expect, :rewind, :zero_or_more, :one_or_more, :one_of, to: :parser
# @param input [String] The search string to parse. # @param input [String] The search string to parse.
def initialize(input) def initialize(input)
@input = input.to_s.clone.freeze @parser = StringParser.new(input, state: 0) # 0 is the initial number of unclosed parens.
@scanner = StringScanner.new(@input)
@unclosed_parens = 0
end end
# Parse a search and return the AST. # Parse a search and return the AST.
@@ -64,7 +54,7 @@ class PostQuery
# @return [PostQuery::AST] The AST of the parsed search. # @return [PostQuery::AST] The AST of the parsed search.
def parse def parse
parse! parse!
rescue Error rescue StringParser::Error
AST.none AST.none
end end
@@ -73,8 +63,8 @@ class PostQuery
# @return [PostQuery::AST] The AST of the parsed search. # @return [PostQuery::AST] The AST of the parsed search.
def parse! def parse!
ast = root ast = root
raise Error, "Unexpected EOS (rest: '#{scanner.rest}')" unless scanner.eos? error("Unexpected EOS (rest: '#{rest}')") unless eos?
raise Error, "Unclosed parentheses (#{@unclosed_parens})" unless @unclosed_parens == 0 error("Unclosed parentheses (#{unclosed_parens})") unless unclosed_parens == 0
ast ast
end end
@@ -144,10 +134,10 @@ class PostQuery
space space
if accept("(") if accept("(")
@unclosed_parens += 1 self.unclosed_parens += 1
a = or_clause a = or_clause
expect(")") expect(")")
@unclosed_parens -= 1 self.unclosed_parens -= 1
a a
else else
term term
@@ -189,7 +179,7 @@ class PostQuery
# A wildcard is a string that contains a '*' character and that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters. # A wildcard is a string that contains a '*' character and that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters.
def wildcard def wildcard
t = string(/(?=[^ ]*\*)[^ \)~-][^ ]*/, skip_balanced_parens: true) t = string(/(?=[^ ]*\*)[^ \)~-][^ ]*/, skip_balanced_parens: true)
raise Error if t.match?(/\A#{METATAG_NAME_REGEX}/) error("Invalid tag name: #{t}") if t.match?(/\A#{METATAG_NAME_REGEX}/)
space space
AST.wildcard(t) AST.wildcard(t)
end end
@@ -197,7 +187,7 @@ class PostQuery
# A tag is a string that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters. # A tag is a string that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters.
def tag def tag
t = string(/[^ \)~-][^ ]*/, skip_balanced_parens: true) t = string(/[^ \)~-][^ ]*/, skip_balanced_parens: true)
raise Error if t.downcase.in?(%w[and or]) || t.include?("*") || t.match?(/\A#{METATAG_NAME_REGEX}/) error("Invalid tag name: #{t}") if t.downcase.in?(%w[and or]) || t.include?("*") || t.match?(/\A#{METATAG_NAME_REGEX}/)
space space
AST.tag(t) AST.tag(t)
end end
@@ -206,11 +196,11 @@ class PostQuery
str = expect(pattern) str = expect(pattern)
# XXX: Now put back any trailing right parens we mistakenly consumed. # XXX: Now put back any trailing right parens we mistakenly consumed.
n = @unclosed_parens n = unclosed_parens
while n > 0 && str.ends_with?(")") while n > 0 && str.ends_with?(")")
break if skip_balanced_parens && (str.has_balanced_parens? || str.in?(Tag::PERMITTED_UNBALANCED_TAGS)) break if skip_balanced_parens && (str.has_balanced_parens? || str.in?(Tag::PERMITTED_UNBALANCED_TAGS))
str.chop! str.chop!
scanner.pos -= 1 rewind
n -= 1 n -= 1
end end
@@ -222,66 +212,14 @@ class PostQuery
end end
end end
concerning :HelperMethods do # The current number of '(' characters without a matching ')'. Used for
private # determining whether a trailing ')' is part of a tag or not.
private def unclosed_parens
parser.state
end
# Try to match `pattern`, returning the string if it matched or nil if it didn't. private def unclosed_parens=(n)
# parser.state = n
# @param pattern [Regexp, String] The pattern to match.
# @return [String, nil] The matched string, or nil
def accept(pattern)
@scanner.scan(pattern)
end
# Try to match `pattern`, returning the string if it matched or raising an Error if it didn't.
#
# @param pattern [Regexp, String] The pattern to match.
# @return [String] The matched string
def expect(pattern)
str = accept(pattern)
raise Error, "Expected '#{pattern}'; got '#{str}'" if str.nil?
str
end
# Try to parse the given block, backtracking to the original state if the parse failed.
def backtrack(&block)
saved_pos = @scanner.pos
saved_unclosed_parens = @unclosed_parens
raise Error if @scanner.eos?
yield
rescue Error
@scanner.pos = saved_pos
@unclosed_parens = saved_unclosed_parens
raise
end
# Parse the block zero or more times, returning an array of parse results.
def zero_or_more(&block)
matches = []
loop do
matches << backtrack { yield }
end
rescue Error
matches
end
# Parse the block one or more times, returning an array of parse results.
def one_or_more(&block)
first = yield
rest = zero_or_more(&block)
[first, *rest]
end
# Given a list of parsers, return the first one that succeeds.
def one_of(parsers)
parsers.each do |parser|
return backtrack { parser.call }
rescue Error
next
end
raise Error, "expected one of: #{parsers}"
end
end end
memoize :parse, :parse! memoize :parse, :parse!

View File

@@ -0,0 +1,101 @@
# frozen_string_literal: true
require "strscan"
# A StringParser is a wrapper around StringScanner that adds extra
# helper methods for writing parser-combinator style parsers.
#
# @see StringScanner
# @see https://hmac.dev/posts/2019-05-19-ruby-parser-combinators.html
class StringParser
class Error < StandardError; end
attr_reader :input
attr_accessor :state
private attr_reader :scanner
delegate :rest, :eos?, to: :scanner
# @param input [String] The string to parse.
# @param state [Object] An arbitrary piece of user-defined state. Will be
# rolled back when the parser backtracks or is reset.
def initialize(input, state: nil)
@input = input.to_s.clone.freeze
@state = state
@scanner = StringScanner.new(@input)
end
# Try to match `pattern`, returning the string if it matched or nil if it didn't.
#
# @param pattern [Regexp, String] The pattern to match.
# @return [String, nil] The matched string, or nil
def accept(pattern)
scanner.scan(pattern)
end
# Try to match `pattern`, returning the string if it matched or raising an Error if it didn't.
#
# @param pattern [Regexp, String] The pattern to match.
# @return [String] The matched string
# @raise [Error] If the pattern didn't match
def expect(pattern)
str = scanner.scan(pattern)
error("Expected '#{pattern}'; got '#{str}'") if str.nil?
str
end
# Move the scan pointer back N characters (default: 1)
#
# @param n [Integer] The number of characters to move back (default: 1).
def rewind(n = 1)
scanner.pos -= n
end
# Raise a parse error.
#
# @param message [String] The parse error message.
# @raise [Error]
def error(message)
raise Error, message
end
# Try to parse the given block, backtracking to the previous state if the parse failed.
def backtrack(&block)
saved_pos = scanner.pos
saved_state = state.deep_dup
error("Unexpected EOS") if scanner.eos?
yield
rescue Error
scanner.pos = saved_pos
self.state = saved_state
raise
end
# Parse the block zero or more times, returning an array of parse results.
def zero_or_more(&block)
matches = []
loop do
matches << backtrack { yield }
end
rescue Error
matches
end
# Parse the block one or more times, returning an array of parse results.
def one_or_more(&block)
first = yield
rest = zero_or_more(&block)
[first, *rest]
end
# Given a list of parsers, try each in sequence and return the first one that succeeds.
def one_of(parsers)
parsers.each do |parser|
return backtrack { parser.call }
rescue Error
next
end
error("expected one of: #{parsers}")
end
end