Factor out StringParser from PostQuery::Parser.
Factor out StringParser class from PostQuery::Parser so it can be used for other parsing tasks.
This commit is contained in:
@@ -1,7 +1,5 @@
|
|||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
require "strscan"
|
|
||||||
|
|
||||||
# A PostQuery::Parser parses a search string into a PostQuery::AST.
|
# A PostQuery::Parser parses a search string into a PostQuery::AST.
|
||||||
#
|
#
|
||||||
# @example
|
# @example
|
||||||
@@ -27,27 +25,19 @@ require "strscan"
|
|||||||
# | "'" /[^']+/ "'"
|
# | "'" /[^']+/ "'"
|
||||||
# tag = /[^ *]+/
|
# tag = /[^ *]+/
|
||||||
# wildcard = /[^ ]+/
|
# wildcard = /[^ ]+/
|
||||||
#
|
|
||||||
# Ref:
|
|
||||||
#
|
|
||||||
# * https://hmac.dev/posts/2019-05-19-ruby-parser-combinators.html
|
|
||||||
|
|
||||||
class PostQuery
|
class PostQuery
|
||||||
class Parser
|
class Parser
|
||||||
extend Memoist
|
extend Memoist
|
||||||
|
|
||||||
class Error < StandardError; end
|
|
||||||
|
|
||||||
METATAG_NAME_REGEX = /(#{PostQueryBuilder::METATAGS.join("|")}):/i
|
METATAG_NAME_REGEX = /(#{PostQueryBuilder::METATAGS.join("|")}):/i
|
||||||
|
|
||||||
attr_reader :input
|
attr_reader :parser
|
||||||
private attr_reader :scanner, :unclosed_parens
|
delegate :error, :rest, :eos?, :accept, :expect, :rewind, :zero_or_more, :one_or_more, :one_of, to: :parser
|
||||||
|
|
||||||
# @param input [String] The search string to parse.
|
# @param input [String] The search string to parse.
|
||||||
def initialize(input)
|
def initialize(input)
|
||||||
@input = input.to_s.clone.freeze
|
@parser = StringParser.new(input, state: 0) # 0 is the initial number of unclosed parens.
|
||||||
@scanner = StringScanner.new(@input)
|
|
||||||
@unclosed_parens = 0
|
|
||||||
end
|
end
|
||||||
|
|
||||||
# Parse a search and return the AST.
|
# Parse a search and return the AST.
|
||||||
@@ -64,7 +54,7 @@ class PostQuery
|
|||||||
# @return [PostQuery::AST] The AST of the parsed search.
|
# @return [PostQuery::AST] The AST of the parsed search.
|
||||||
def parse
|
def parse
|
||||||
parse!
|
parse!
|
||||||
rescue Error
|
rescue StringParser::Error
|
||||||
AST.none
|
AST.none
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -73,8 +63,8 @@ class PostQuery
|
|||||||
# @return [PostQuery::AST] The AST of the parsed search.
|
# @return [PostQuery::AST] The AST of the parsed search.
|
||||||
def parse!
|
def parse!
|
||||||
ast = root
|
ast = root
|
||||||
raise Error, "Unexpected EOS (rest: '#{scanner.rest}')" unless scanner.eos?
|
error("Unexpected EOS (rest: '#{rest}')") unless eos?
|
||||||
raise Error, "Unclosed parentheses (#{@unclosed_parens})" unless @unclosed_parens == 0
|
error("Unclosed parentheses (#{unclosed_parens})") unless unclosed_parens == 0
|
||||||
ast
|
ast
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -144,10 +134,10 @@ class PostQuery
|
|||||||
space
|
space
|
||||||
|
|
||||||
if accept("(")
|
if accept("(")
|
||||||
@unclosed_parens += 1
|
self.unclosed_parens += 1
|
||||||
a = or_clause
|
a = or_clause
|
||||||
expect(")")
|
expect(")")
|
||||||
@unclosed_parens -= 1
|
self.unclosed_parens -= 1
|
||||||
a
|
a
|
||||||
else
|
else
|
||||||
term
|
term
|
||||||
@@ -189,7 +179,7 @@ class PostQuery
|
|||||||
# A wildcard is a string that contains a '*' character and that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters.
|
# A wildcard is a string that contains a '*' character and that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters.
|
||||||
def wildcard
|
def wildcard
|
||||||
t = string(/(?=[^ ]*\*)[^ \)~-][^ ]*/, skip_balanced_parens: true)
|
t = string(/(?=[^ ]*\*)[^ \)~-][^ ]*/, skip_balanced_parens: true)
|
||||||
raise Error if t.match?(/\A#{METATAG_NAME_REGEX}/)
|
error("Invalid tag name: #{t}") if t.match?(/\A#{METATAG_NAME_REGEX}/)
|
||||||
space
|
space
|
||||||
AST.wildcard(t)
|
AST.wildcard(t)
|
||||||
end
|
end
|
||||||
@@ -197,7 +187,7 @@ class PostQuery
|
|||||||
# A tag is a string that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters.
|
# A tag is a string that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters.
|
||||||
def tag
|
def tag
|
||||||
t = string(/[^ \)~-][^ ]*/, skip_balanced_parens: true)
|
t = string(/[^ \)~-][^ ]*/, skip_balanced_parens: true)
|
||||||
raise Error if t.downcase.in?(%w[and or]) || t.include?("*") || t.match?(/\A#{METATAG_NAME_REGEX}/)
|
error("Invalid tag name: #{t}") if t.downcase.in?(%w[and or]) || t.include?("*") || t.match?(/\A#{METATAG_NAME_REGEX}/)
|
||||||
space
|
space
|
||||||
AST.tag(t)
|
AST.tag(t)
|
||||||
end
|
end
|
||||||
@@ -206,11 +196,11 @@ class PostQuery
|
|||||||
str = expect(pattern)
|
str = expect(pattern)
|
||||||
|
|
||||||
# XXX: Now put back any trailing right parens we mistakenly consumed.
|
# XXX: Now put back any trailing right parens we mistakenly consumed.
|
||||||
n = @unclosed_parens
|
n = unclosed_parens
|
||||||
while n > 0 && str.ends_with?(")")
|
while n > 0 && str.ends_with?(")")
|
||||||
break if skip_balanced_parens && (str.has_balanced_parens? || str.in?(Tag::PERMITTED_UNBALANCED_TAGS))
|
break if skip_balanced_parens && (str.has_balanced_parens? || str.in?(Tag::PERMITTED_UNBALANCED_TAGS))
|
||||||
str.chop!
|
str.chop!
|
||||||
scanner.pos -= 1
|
rewind
|
||||||
n -= 1
|
n -= 1
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -222,66 +212,14 @@ class PostQuery
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
concerning :HelperMethods do
|
# The current number of '(' characters without a matching ')'. Used for
|
||||||
private
|
# determining whether a trailing ')' is part of a tag or not.
|
||||||
|
private def unclosed_parens
|
||||||
|
parser.state
|
||||||
|
end
|
||||||
|
|
||||||
# Try to match `pattern`, returning the string if it matched or nil if it didn't.
|
private def unclosed_parens=(n)
|
||||||
#
|
parser.state = n
|
||||||
# @param pattern [Regexp, String] The pattern to match.
|
|
||||||
# @return [String, nil] The matched string, or nil
|
|
||||||
def accept(pattern)
|
|
||||||
@scanner.scan(pattern)
|
|
||||||
end
|
|
||||||
|
|
||||||
# Try to match `pattern`, returning the string if it matched or raising an Error if it didn't.
|
|
||||||
#
|
|
||||||
# @param pattern [Regexp, String] The pattern to match.
|
|
||||||
# @return [String] The matched string
|
|
||||||
def expect(pattern)
|
|
||||||
str = accept(pattern)
|
|
||||||
raise Error, "Expected '#{pattern}'; got '#{str}'" if str.nil?
|
|
||||||
str
|
|
||||||
end
|
|
||||||
|
|
||||||
# Try to parse the given block, backtracking to the original state if the parse failed.
|
|
||||||
def backtrack(&block)
|
|
||||||
saved_pos = @scanner.pos
|
|
||||||
saved_unclosed_parens = @unclosed_parens
|
|
||||||
raise Error if @scanner.eos?
|
|
||||||
yield
|
|
||||||
rescue Error
|
|
||||||
@scanner.pos = saved_pos
|
|
||||||
@unclosed_parens = saved_unclosed_parens
|
|
||||||
raise
|
|
||||||
end
|
|
||||||
|
|
||||||
# Parse the block zero or more times, returning an array of parse results.
|
|
||||||
def zero_or_more(&block)
|
|
||||||
matches = []
|
|
||||||
loop do
|
|
||||||
matches << backtrack { yield }
|
|
||||||
end
|
|
||||||
rescue Error
|
|
||||||
matches
|
|
||||||
end
|
|
||||||
|
|
||||||
# Parse the block one or more times, returning an array of parse results.
|
|
||||||
def one_or_more(&block)
|
|
||||||
first = yield
|
|
||||||
rest = zero_or_more(&block)
|
|
||||||
[first, *rest]
|
|
||||||
end
|
|
||||||
|
|
||||||
# Given a list of parsers, return the first one that succeeds.
|
|
||||||
def one_of(parsers)
|
|
||||||
parsers.each do |parser|
|
|
||||||
return backtrack { parser.call }
|
|
||||||
rescue Error
|
|
||||||
next
|
|
||||||
end
|
|
||||||
|
|
||||||
raise Error, "expected one of: #{parsers}"
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
memoize :parse, :parse!
|
memoize :parse, :parse!
|
||||||
|
|||||||
101
app/logical/string_parser.rb
Normal file
101
app/logical/string_parser.rb
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
require "strscan"
|
||||||
|
|
||||||
|
# A StringParser is a wrapper around StringScanner that adds extra
|
||||||
|
# helper methods for writing parser-combinator style parsers.
|
||||||
|
#
|
||||||
|
# @see StringScanner
|
||||||
|
# @see https://hmac.dev/posts/2019-05-19-ruby-parser-combinators.html
|
||||||
|
class StringParser
|
||||||
|
class Error < StandardError; end
|
||||||
|
|
||||||
|
attr_reader :input
|
||||||
|
attr_accessor :state
|
||||||
|
private attr_reader :scanner
|
||||||
|
|
||||||
|
delegate :rest, :eos?, to: :scanner
|
||||||
|
|
||||||
|
# @param input [String] The string to parse.
|
||||||
|
# @param state [Object] An arbitrary piece of user-defined state. Will be
|
||||||
|
# rolled back when the parser backtracks or is reset.
|
||||||
|
def initialize(input, state: nil)
|
||||||
|
@input = input.to_s.clone.freeze
|
||||||
|
@state = state
|
||||||
|
@scanner = StringScanner.new(@input)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Try to match `pattern`, returning the string if it matched or nil if it didn't.
|
||||||
|
#
|
||||||
|
# @param pattern [Regexp, String] The pattern to match.
|
||||||
|
# @return [String, nil] The matched string, or nil
|
||||||
|
def accept(pattern)
|
||||||
|
scanner.scan(pattern)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Try to match `pattern`, returning the string if it matched or raising an Error if it didn't.
|
||||||
|
#
|
||||||
|
# @param pattern [Regexp, String] The pattern to match.
|
||||||
|
# @return [String] The matched string
|
||||||
|
# @raise [Error] If the pattern didn't match
|
||||||
|
def expect(pattern)
|
||||||
|
str = scanner.scan(pattern)
|
||||||
|
error("Expected '#{pattern}'; got '#{str}'") if str.nil?
|
||||||
|
str
|
||||||
|
end
|
||||||
|
|
||||||
|
# Move the scan pointer back N characters (default: 1)
|
||||||
|
#
|
||||||
|
# @param n [Integer] The number of characters to move back (default: 1).
|
||||||
|
def rewind(n = 1)
|
||||||
|
scanner.pos -= n
|
||||||
|
end
|
||||||
|
|
||||||
|
# Raise a parse error.
|
||||||
|
#
|
||||||
|
# @param message [String] The parse error message.
|
||||||
|
# @raise [Error]
|
||||||
|
def error(message)
|
||||||
|
raise Error, message
|
||||||
|
end
|
||||||
|
|
||||||
|
# Try to parse the given block, backtracking to the previous state if the parse failed.
|
||||||
|
def backtrack(&block)
|
||||||
|
saved_pos = scanner.pos
|
||||||
|
saved_state = state.deep_dup
|
||||||
|
error("Unexpected EOS") if scanner.eos?
|
||||||
|
yield
|
||||||
|
rescue Error
|
||||||
|
scanner.pos = saved_pos
|
||||||
|
self.state = saved_state
|
||||||
|
raise
|
||||||
|
end
|
||||||
|
|
||||||
|
# Parse the block zero or more times, returning an array of parse results.
|
||||||
|
def zero_or_more(&block)
|
||||||
|
matches = []
|
||||||
|
loop do
|
||||||
|
matches << backtrack { yield }
|
||||||
|
end
|
||||||
|
rescue Error
|
||||||
|
matches
|
||||||
|
end
|
||||||
|
|
||||||
|
# Parse the block one or more times, returning an array of parse results.
|
||||||
|
def one_or_more(&block)
|
||||||
|
first = yield
|
||||||
|
rest = zero_or_more(&block)
|
||||||
|
[first, *rest]
|
||||||
|
end
|
||||||
|
|
||||||
|
# Given a list of parsers, try each in sequence and return the first one that succeeds.
|
||||||
|
def one_of(parsers)
|
||||||
|
parsers.each do |parser|
|
||||||
|
return backtrack { parser.call }
|
||||||
|
rescue Error
|
||||||
|
next
|
||||||
|
end
|
||||||
|
|
||||||
|
error("expected one of: #{parsers}")
|
||||||
|
end
|
||||||
|
end
|
||||||
Reference in New Issue
Block a user