Files
danbooru/app/logical/post_query/parser.rb

279 lines
6.9 KiB
Ruby

# frozen_string_literal: true
require "strscan"
# A PostQuery::Parser parses a search string into a PostQuery::AST.
#
# @example
#
# ast = PostQuery.new("1girl or 1boy").parse
#
# Grammar:
#
# root = or_clause [root]
# or_clause = and_clause "or" or_clause
# | and_clause
# and_clause = factor_list "and" and_clause
# | factor_list
# factor_list = factor [factor_list]
# factor = "-" expr
# | "~" expr
# | expr
# expr = "(" or_clause ")" | term
# term = metatag | tag | wildcard
# metatag = metatag_name ":" quoted_string
# metatag_name = "user" | "fav" | "pool" | "order" | ...
# quoted_string = '"' /[^"]+/ '"'
# | "'" /[^']+/ "'"
# tag = /[^ *]+/
# wildcard = /[^ ]+/
#
# Ref:
#
# * https://hmac.dev/posts/2019-05-19-ruby-parser-combinators.html
class PostQuery
class Parser
extend Memoist
class Error < StandardError; end
METATAG_NAME_REGEX = /(#{PostQueryBuilder::METATAGS.join("|")}):/i
attr_reader :input
private attr_reader :scanner, :unclosed_parens
# @param input [String] The search string to parse.
def initialize(input)
@input = input.to_s.clone.freeze
@scanner = StringScanner.new(@input)
@unclosed_parens = 0
end
# Parse a search and return the AST.
#
# @param string [String] The search string to parse.
# @returns [PostQuery::AST] The AST of the parsed search.
def self.parse(string)
new(string).parse
end
concerning :ParserMethods do
# Parse the search and return the AST, or return a search that matches nothing if the parse failed.
#
# @return [PostQuery::AST] The AST of the parsed search.
def parse
parse!
rescue Error
node(:none)
end
# Parse the search and return the AST, or raise an error if the parse failed.
#
# @return [PostQuery::AST] The AST of the parsed search.
def parse!
ast = root
raise Error, "Unexpected EOS (rest: '#{scanner.rest}')" unless scanner.eos?
raise Error, "Unclosed parentheses (#{@unclosed_parens})" unless @unclosed_parens == 0
ast
end
private
# root = or_clause [root]
def root
a = zero_or_more { or_clause }
space
if a.empty?
node(:all)
elsif a.size == 1
a.first
else
node(:and, *a)
end
end
# or_clause = and_clause "or" or_clause | and_clause
def or_clause
a = and_clause
space
if accept(/or +/i)
b = or_clause
node(:or, a, b)
else
a
end
end
# and_clause = factor_list "and" and_clause | factor_list
def and_clause
a = factor_list
space
if accept(/and +/i)
b = and_clause
node(:and, a, b)
else
a
end
end
# factor_list = factor [factor_list]
def factor_list
a = one_or_more { factor }
node(:and, *a)
end
# factor = "-" expr | "~" expr | expr
def factor
space
if accept("-")
node(:not, expr)
elsif accept("~")
node(:opt, expr)
else
expr
end
end
# expr = "(" or_clause ")" | term
def expr
space
if accept("(")
@unclosed_parens += 1
a = or_clause
expect(")")
@unclosed_parens -= 1
a
else
term
end
end
def term
metatag || wildcard || tag
end
# metatag = metatag_name ":" quoted_string
# metatag_name = "user" | "fav" | "pool" | "order" | ...
def metatag
if accept(METATAG_NAME_REGEX)
name = @scanner.matched.delete_suffix(":")
value = quoted_string
node(:metatag, name.downcase, value)
end
end
def quoted_string
if accept('"')
a = accept(/([^"\\]|\\")*/).gsub(/\\"/, '"') # handle backslash escaped quotes
expect('"')
a
elsif accept("'")
a = accept(/([^'\\]|\\')*/).gsub(/\\'/, "'") # handle backslash escaped quotes
expect("'")
a
else
string(/[^ ]+/)
end
end
# A wildcard is a string that contains a '*' character and that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters.
def wildcard
if t = accept(/(?=[^ ]*\*)[^ \)~-][^ ]*/)
space
node(:wildcard, t.downcase)
end
end
# A tag is a string that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters.
def tag
t = string(/[^ \)~-][^ ]*/)
raise Error if t.downcase.in?(%w[and or])
space
node(:tag, t.downcase)
end
def string(pattern)
str = expect(pattern)
# XXX: Now put back any trailing right parens we mistakenly consumed.
n = @unclosed_parens
while n > 0 && str.ends_with?(")")
str.chop!
scanner.pos -= 1
n -= 1
end
str
end
def space
expect(/ */)
end
end
concerning :HelperMethods do
private
# Try to match `pattern`, returning the string if it matched or nil if it didn't.
#
# @param pattern [Regexp, String] The pattern to match.
# @return [String, nil] The matched string, or nil
def accept(pattern)
@scanner.scan(pattern)
end
# Try to match `pattern`, returning the string if it matched or raising an Error if it didn't.
#
# @param pattern [Regexp, String] The pattern to match.
# @return [String] The matched string
def expect(pattern)
str = accept(pattern)
raise Error, "Expected '#{pattern}'; got '#{str}'" if str.nil?
str
end
# Try to parse the given block, backtracking to the original state if the parse failed.
def backtrack(&block)
saved_pos = @scanner.pos
saved_unclosed_parens = @unclosed_parens
raise Error if @scanner.eos?
yield
rescue Error
@scanner.pos = saved_pos
@unclosed_parens = saved_unclosed_parens
raise
end
# Parse the block zero or more times, returning an array of parse results.
def zero_or_more(&block)
matches = []
loop do
matches << backtrack { yield }
end
rescue Error
matches
end
# Parse the block one or more times, returning an array of parse results.
def one_or_more(&block)
first = yield
rest = zero_or_more(&block)
[first, *rest]
end
# Build an AST node of the given type.
def node(type, *args)
AST.new(type, args)
end
end
memoize :parse, :parse!
end
end