Factor out StringParser from PostQuery::Parser.

Factor out StringParser class from PostQuery::Parser so it can be used
for other parsing tasks.
This commit is contained in:
evazion
2022-04-26 22:14:14 -05:00
parent 9eaea22fac
commit d4b448dd94
2 changed files with 120 additions and 81 deletions

View File

@@ -1,7 +1,5 @@
# frozen_string_literal: true
require "strscan"
# A PostQuery::Parser parses a search string into a PostQuery::AST.
#
# @example
@@ -27,27 +25,19 @@ require "strscan"
# | "'" /[^']+/ "'"
# tag = /[^ *]+/
# wildcard = /[^ ]+/
#
# Ref:
#
# * https://hmac.dev/posts/2019-05-19-ruby-parser-combinators.html
class PostQuery
class Parser
extend Memoist
class Error < StandardError; end
METATAG_NAME_REGEX = /(#{PostQueryBuilder::METATAGS.join("|")}):/i
attr_reader :input
private attr_reader :scanner, :unclosed_parens
attr_reader :parser
delegate :error, :rest, :eos?, :accept, :expect, :rewind, :zero_or_more, :one_or_more, :one_of, to: :parser
# @param input [String] The search string to parse.
def initialize(input)
@input = input.to_s.clone.freeze
@scanner = StringScanner.new(@input)
@unclosed_parens = 0
@parser = StringParser.new(input, state: 0) # 0 is the initial number of unclosed parens.
end
# Parse a search and return the AST.
@@ -64,7 +54,7 @@ class PostQuery
# @return [PostQuery::AST] The AST of the parsed search.
def parse
parse!
rescue Error
rescue StringParser::Error
AST.none
end
@@ -73,8 +63,8 @@ class PostQuery
# @return [PostQuery::AST] The AST of the parsed search.
def parse!
ast = root
raise Error, "Unexpected EOS (rest: '#{scanner.rest}')" unless scanner.eos?
raise Error, "Unclosed parentheses (#{@unclosed_parens})" unless @unclosed_parens == 0
error("Unexpected EOS (rest: '#{rest}')") unless eos?
error("Unclosed parentheses (#{unclosed_parens})") unless unclosed_parens == 0
ast
end
@@ -144,10 +134,10 @@ class PostQuery
space
if accept("(")
@unclosed_parens += 1
self.unclosed_parens += 1
a = or_clause
expect(")")
@unclosed_parens -= 1
self.unclosed_parens -= 1
a
else
term
@@ -189,7 +179,7 @@ class PostQuery
# A wildcard is a string that contains a '*' character and that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters.
def wildcard
t = string(/(?=[^ ]*\*)[^ \)~-][^ ]*/, skip_balanced_parens: true)
raise Error if t.match?(/\A#{METATAG_NAME_REGEX}/)
error("Invalid tag name: #{t}") if t.match?(/\A#{METATAG_NAME_REGEX}/)
space
AST.wildcard(t)
end
@@ -197,7 +187,7 @@ class PostQuery
# A tag is a string that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters.
def tag
t = string(/[^ \)~-][^ ]*/, skip_balanced_parens: true)
raise Error if t.downcase.in?(%w[and or]) || t.include?("*") || t.match?(/\A#{METATAG_NAME_REGEX}/)
error("Invalid tag name: #{t}") if t.downcase.in?(%w[and or]) || t.include?("*") || t.match?(/\A#{METATAG_NAME_REGEX}/)
space
AST.tag(t)
end
@@ -206,11 +196,11 @@ class PostQuery
str = expect(pattern)
# XXX: Now put back any trailing right parens we mistakenly consumed.
n = @unclosed_parens
n = unclosed_parens
while n > 0 && str.ends_with?(")")
break if skip_balanced_parens && (str.has_balanced_parens? || str.in?(Tag::PERMITTED_UNBALANCED_TAGS))
str.chop!
scanner.pos -= 1
rewind
n -= 1
end
@@ -222,66 +212,14 @@ class PostQuery
end
end
concerning :HelperMethods do
private
# The current number of '(' characters without a matching ')'. Used for
# determining whether a trailing ')' is part of a tag or not.
private def unclosed_parens
parser.state
end
# Try to match `pattern`, returning the string if it matched or nil if it didn't.
#
# @param pattern [Regexp, String] The pattern to match.
# @return [String, nil] The matched string, or nil
def accept(pattern)
@scanner.scan(pattern)
end
# Try to match `pattern`, returning the string if it matched or raising an Error if it didn't.
#
# @param pattern [Regexp, String] The pattern to match.
# @return [String] The matched string
def expect(pattern)
str = accept(pattern)
raise Error, "Expected '#{pattern}'; got '#{str}'" if str.nil?
str
end
# Try to parse the given block, backtracking to the original state if the parse failed.
def backtrack(&block)
saved_pos = @scanner.pos
saved_unclosed_parens = @unclosed_parens
raise Error if @scanner.eos?
yield
rescue Error
@scanner.pos = saved_pos
@unclosed_parens = saved_unclosed_parens
raise
end
# Parse the block zero or more times, returning an array of parse results.
def zero_or_more(&block)
matches = []
loop do
matches << backtrack { yield }
end
rescue Error
matches
end
# Parse the block one or more times, returning an array of parse results.
def one_or_more(&block)
first = yield
rest = zero_or_more(&block)
[first, *rest]
end
# Given a list of parsers, return the first one that succeeds.
def one_of(parsers)
parsers.each do |parser|
return backtrack { parser.call }
rescue Error
next
end
raise Error, "expected one of: #{parsers}"
end
private def unclosed_parens=(n)
parser.state = n
end
memoize :parse, :parse!

View File

@@ -0,0 +1,101 @@
# frozen_string_literal: true
require "strscan"
# A StringParser is a wrapper around StringScanner that adds extra
# helper methods for writing parser-combinator style parsers.
#
# @see StringScanner
# @see https://hmac.dev/posts/2019-05-19-ruby-parser-combinators.html
class StringParser
class Error < StandardError; end
attr_reader :input
attr_accessor :state
private attr_reader :scanner
delegate :rest, :eos?, to: :scanner
# @param input [String] The string to parse.
# @param state [Object] An arbitrary piece of user-defined state. Will be
# rolled back when the parser backtracks or is reset.
def initialize(input, state: nil)
@input = input.to_s.clone.freeze
@state = state
@scanner = StringScanner.new(@input)
end
# Try to match `pattern`, returning the string if it matched or nil if it didn't.
#
# @param pattern [Regexp, String] The pattern to match.
# @return [String, nil] The matched string, or nil
def accept(pattern)
scanner.scan(pattern)
end
# Try to match `pattern`, returning the string if it matched or raising an Error if it didn't.
#
# @param pattern [Regexp, String] The pattern to match.
# @return [String] The matched string
# @raise [Error] If the pattern didn't match
def expect(pattern)
str = scanner.scan(pattern)
error("Expected '#{pattern}'; got '#{str}'") if str.nil?
str
end
# Move the scan pointer back N characters (default: 1)
#
# @param n [Integer] The number of characters to move back (default: 1).
def rewind(n = 1)
scanner.pos -= n
end
# Raise a parse error.
#
# @param message [String] The parse error message.
# @raise [Error]
def error(message)
raise Error, message
end
# Try to parse the given block, backtracking to the previous state if the parse failed.
def backtrack(&block)
saved_pos = scanner.pos
saved_state = state.deep_dup
error("Unexpected EOS") if scanner.eos?
yield
rescue Error
scanner.pos = saved_pos
self.state = saved_state
raise
end
# Parse the block zero or more times, returning an array of parse results.
def zero_or_more(&block)
matches = []
loop do
matches << backtrack { yield }
end
rescue Error
matches
end
# Parse the block one or more times, returning an array of parse results.
def one_or_more(&block)
first = yield
rest = zero_or_more(&block)
[first, *rest]
end
# Given a list of parsers, try each in sequence and return the first one that succeeds.
def one_of(parsers)
parsers.each do |parser|
return backtrack { parser.call }
rescue Error
next
end
error("expected one of: #{parsers}")
end
end