279 lines
6.9 KiB
Ruby
279 lines
6.9 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require "strscan"
|
|
|
|
# A PostQuery::Parser parses a search string into a PostQuery::AST.
|
|
#
|
|
# @example
|
|
#
|
|
# ast = PostQuery.new("1girl or 1boy").parse
|
|
#
|
|
# Grammar:
|
|
#
|
|
# root = or_clause [root]
|
|
# or_clause = and_clause "or" or_clause
|
|
# | and_clause
|
|
# and_clause = factor_list "and" and_clause
|
|
# | factor_list
|
|
# factor_list = factor [factor_list]
|
|
# factor = "-" expr
|
|
# | "~" expr
|
|
# | expr
|
|
# expr = "(" or_clause ")" | term
|
|
# term = metatag | tag | wildcard
|
|
# metatag = metatag_name ":" quoted_string
|
|
# metatag_name = "user" | "fav" | "pool" | "order" | ...
|
|
# quoted_string = '"' /[^"]+/ '"'
|
|
# | "'" /[^']+/ "'"
|
|
# tag = /[^ *]+/
|
|
# wildcard = /[^ ]+/
|
|
#
|
|
# Ref:
|
|
#
|
|
# * https://hmac.dev/posts/2019-05-19-ruby-parser-combinators.html
|
|
|
|
class PostQuery
|
|
class Parser
|
|
extend Memoist
|
|
|
|
class Error < StandardError; end
|
|
|
|
METATAG_NAME_REGEX = /(#{PostQueryBuilder::METATAGS.join("|")}):/i
|
|
|
|
attr_reader :input
|
|
private attr_reader :scanner, :unclosed_parens
|
|
|
|
# @param input [String] The search string to parse.
|
|
def initialize(input)
|
|
@input = input.to_s.clone.freeze
|
|
@scanner = StringScanner.new(@input)
|
|
@unclosed_parens = 0
|
|
end
|
|
|
|
# Parse a search and return the AST.
|
|
#
|
|
# @param string [String] The search string to parse.
|
|
# @returns [PostQuery::AST] The AST of the parsed search.
|
|
def self.parse(string)
|
|
new(string).parse
|
|
end
|
|
|
|
concerning :ParserMethods do
|
|
# Parse the search and return the AST, or return a search that matches nothing if the parse failed.
|
|
#
|
|
# @return [PostQuery::AST] The AST of the parsed search.
|
|
def parse
|
|
parse!
|
|
rescue Error
|
|
node(:none)
|
|
end
|
|
|
|
# Parse the search and return the AST, or raise an error if the parse failed.
|
|
#
|
|
# @return [PostQuery::AST] The AST of the parsed search.
|
|
def parse!
|
|
ast = root
|
|
raise Error, "Unexpected EOS (rest: '#{scanner.rest}')" unless scanner.eos?
|
|
raise Error, "Unclosed parentheses (#{@unclosed_parens})" unless @unclosed_parens == 0
|
|
ast
|
|
end
|
|
|
|
private
|
|
|
|
# root = or_clause [root]
|
|
def root
|
|
a = zero_or_more { or_clause }
|
|
space
|
|
|
|
if a.empty?
|
|
node(:all)
|
|
elsif a.size == 1
|
|
a.first
|
|
else
|
|
node(:and, *a)
|
|
end
|
|
end
|
|
|
|
# or_clause = and_clause "or" or_clause | and_clause
|
|
def or_clause
|
|
a = and_clause
|
|
|
|
space
|
|
if accept(/or +/i)
|
|
b = or_clause
|
|
node(:or, a, b)
|
|
else
|
|
a
|
|
end
|
|
end
|
|
|
|
# and_clause = factor_list "and" and_clause | factor_list
|
|
def and_clause
|
|
a = factor_list
|
|
|
|
space
|
|
if accept(/and +/i)
|
|
b = and_clause
|
|
node(:and, a, b)
|
|
else
|
|
a
|
|
end
|
|
end
|
|
|
|
# factor_list = factor [factor_list]
|
|
def factor_list
|
|
a = one_or_more { factor }
|
|
node(:and, *a)
|
|
end
|
|
|
|
# factor = "-" expr | "~" expr | expr
|
|
def factor
|
|
space
|
|
|
|
if accept("-")
|
|
node(:not, expr)
|
|
elsif accept("~")
|
|
node(:opt, expr)
|
|
else
|
|
expr
|
|
end
|
|
end
|
|
|
|
# expr = "(" or_clause ")" | term
|
|
def expr
|
|
space
|
|
|
|
if accept("(")
|
|
@unclosed_parens += 1
|
|
a = or_clause
|
|
expect(")")
|
|
@unclosed_parens -= 1
|
|
a
|
|
else
|
|
term
|
|
end
|
|
end
|
|
|
|
def term
|
|
metatag || wildcard || tag
|
|
end
|
|
|
|
# metatag = metatag_name ":" quoted_string
|
|
# metatag_name = "user" | "fav" | "pool" | "order" | ...
|
|
def metatag
|
|
if accept(METATAG_NAME_REGEX)
|
|
name = @scanner.matched.delete_suffix(":")
|
|
value = quoted_string
|
|
node(:metatag, name.downcase, value)
|
|
end
|
|
end
|
|
|
|
def quoted_string
|
|
if accept('"')
|
|
a = accept(/([^"\\]|\\")*/).gsub(/\\"/, '"') # handle backslash escaped quotes
|
|
expect('"')
|
|
a
|
|
elsif accept("'")
|
|
a = accept(/([^'\\]|\\')*/).gsub(/\\'/, "'") # handle backslash escaped quotes
|
|
expect("'")
|
|
a
|
|
else
|
|
string(/[^ ]+/)
|
|
end
|
|
end
|
|
|
|
# A wildcard is a string that contains a '*' character and that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters.
|
|
def wildcard
|
|
if t = accept(/(?=[^ ]*\*)[^ \)~-][^ ]*/)
|
|
space
|
|
node(:wildcard, t.downcase)
|
|
end
|
|
end
|
|
|
|
# A tag is a string that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters.
|
|
def tag
|
|
t = string(/[^ \)~-][^ ]*/)
|
|
raise Error if t.downcase.in?(%w[and or])
|
|
space
|
|
node(:tag, t.downcase)
|
|
end
|
|
|
|
def string(pattern)
|
|
str = expect(pattern)
|
|
|
|
# XXX: Now put back any trailing right parens we mistakenly consumed.
|
|
n = @unclosed_parens
|
|
while n > 0 && str.ends_with?(")")
|
|
str.chop!
|
|
scanner.pos -= 1
|
|
n -= 1
|
|
end
|
|
|
|
str
|
|
end
|
|
|
|
def space
|
|
expect(/ */)
|
|
end
|
|
end
|
|
|
|
concerning :HelperMethods do
|
|
private
|
|
|
|
# Try to match `pattern`, returning the string if it matched or nil if it didn't.
|
|
#
|
|
# @param pattern [Regexp, String] The pattern to match.
|
|
# @return [String, nil] The matched string, or nil
|
|
def accept(pattern)
|
|
@scanner.scan(pattern)
|
|
end
|
|
|
|
# Try to match `pattern`, returning the string if it matched or raising an Error if it didn't.
|
|
#
|
|
# @param pattern [Regexp, String] The pattern to match.
|
|
# @return [String] The matched string
|
|
def expect(pattern)
|
|
str = accept(pattern)
|
|
raise Error, "Expected '#{pattern}'; got '#{str}'" if str.nil?
|
|
str
|
|
end
|
|
|
|
# Try to parse the given block, backtracking to the original state if the parse failed.
|
|
def backtrack(&block)
|
|
saved_pos = @scanner.pos
|
|
saved_unclosed_parens = @unclosed_parens
|
|
raise Error if @scanner.eos?
|
|
yield
|
|
rescue Error
|
|
@scanner.pos = saved_pos
|
|
@unclosed_parens = saved_unclosed_parens
|
|
raise
|
|
end
|
|
|
|
# Parse the block zero or more times, returning an array of parse results.
|
|
def zero_or_more(&block)
|
|
matches = []
|
|
loop do
|
|
matches << backtrack { yield }
|
|
end
|
|
rescue Error
|
|
matches
|
|
end
|
|
|
|
# Parse the block one or more times, returning an array of parse results.
|
|
def one_or_more(&block)
|
|
first = yield
|
|
rest = zero_or_more(&block)
|
|
[first, *rest]
|
|
end
|
|
|
|
# Build an AST node of the given type.
|
|
def node(type, *args)
|
|
AST.new(type, args)
|
|
end
|
|
end
|
|
|
|
memoize :parse, :parse!
|
|
end
|
|
end
|