# frozen_string_literal: true require "strscan" # A PostQuery::Parser parses a search string into a PostQuery::AST. # # @example # # ast = PostQuery.new("1girl or 1boy").parse # # Grammar: # # root = or_clause [root] # or_clause = and_clause "or" or_clause # | and_clause # and_clause = factor_list "and" and_clause # | factor_list # factor_list = factor [factor_list] # factor = "-" expr # | "~" expr # | expr # expr = "(" or_clause ")" | term # term = metatag | tag | wildcard # metatag = metatag_name ":" quoted_string # metatag_name = "user" | "fav" | "pool" | "order" | ... # quoted_string = '"' /[^"]+/ '"' # tag = /[^ *]+/ # wildcard = /[^ ]+/ # # Ref: # # * https://hmac.dev/posts/2019-05-19-ruby-parser-combinators.html class PostQuery class Parser extend Memoist class Error < StandardError; end METATAG_NAME_REGEX = /(#{PostQueryBuilder::METATAGS.join("|")}):/i attr_reader :input private attr_reader :scanner, :unclosed_parens # @param input [String] The search string to parse. def initialize(input) @input = input.to_s.clone.freeze @scanner = StringScanner.new(@input) @unclosed_parens = 0 end # Parse a search and return the AST. # # @param string [String] The search string to parse. # @returns [PostQuery::AST] The AST of the parsed search. def self.parse(string) new(string).parse end concerning :ParserMethods do # Parse the search and return the AST, or return a search that matches nothing if the parse failed. # # @return [PostQuery::AST] The AST of the parsed search. def parse parse! rescue Error node(:none) end # Parse the search and return the AST, or raise an error if the parse failed. # # @return [PostQuery::AST] The AST of the parsed search. def parse! ast = root raise Error, "Unexpected EOS (rest: '#{scanner.rest}')" unless scanner.eos? raise Error, "Unclosed parentheses (#{@unclosed_parens})" unless @unclosed_parens == 0 ast end private # root = or_clause [root] def root a = zero_or_more { or_clause } space if a.empty? node(:all) elsif a.size == 1 a.first else node(:and, *a) end end # or_clause = and_clause "or" or_clause | and_clause def or_clause a = and_clause space if accept(/or +/i) b = or_clause node(:or, a, b) else a end end # and_clause = factor_list "and" and_clause | factor_list def and_clause a = factor_list space if accept(/and +/i) b = and_clause node(:and, a, b) else a end end # factor_list = factor [factor_list] def factor_list a = one_or_more { factor } node(:and, *a) end # factor = "-" expr | "~" expr | expr def factor space if accept("-") node(:not, expr) elsif accept("~") node(:opt, expr) else expr end end # expr = "(" or_clause ")" | term def expr space if accept("(") @unclosed_parens += 1 a = or_clause expect(")") @unclosed_parens -= 1 a else term end end def term metatag || wildcard || tag end # metatag = metatag_name ":" quoted_string # metatag_name = "user" | "fav" | "pool" | "order" | ... def metatag if accept(METATAG_NAME_REGEX) name = @scanner.matched.delete_suffix(":") value = quoted_string node(:metatag, name.downcase, value) end end def quoted_string if accept('"') a = accept(/([^"\\]|\\")*/).gsub(/\\"/, '"') # handle backslash escaped quotes expect('"') a else string(/[^ ]+/) end end # A wildcard is a string that contains a '*' character and that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters. def wildcard if t = accept(/(?=[^ ]*\*)[^ \)~-][^ ]*/) space node(:wildcard, t.downcase) end end # A tag is a string that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters. def tag t = string(/[^ \)~-][^ ]*/) raise Error if t.downcase.in?(%w[and or]) space node(:tag, t.downcase) end def string(pattern) str = expect(pattern) # XXX: Now put back any trailing right parens we mistakenly consumed. n = @unclosed_parens while n > 0 && str.ends_with?(")") str.chop! scanner.pos -= 1 n -= 1 end str end def space expect(/ */) end end concerning :HelperMethods do private # Try to match `pattern`, returning the string if it matched or nil if it didn't. # # @param pattern [Regexp, String] The pattern to match. # @return [String, nil] The matched string, or nil def accept(pattern) @scanner.scan(pattern) end # Try to match `pattern`, returning the string if it matched or raising an Error if it didn't. # # @param pattern [Regexp, String] The pattern to match. # @return [String] The matched string def expect(pattern) str = accept(pattern) raise Error, "Expected '#{pattern}'; got '#{str}'" if str.nil? str end # Try to parse the given block, backtracking to the original state if the parse failed. def backtrack(&block) saved_pos = @scanner.pos saved_unclosed_parens = @unclosed_parens raise Error if @scanner.eos? yield rescue Error @scanner.pos = saved_pos @unclosed_parens = saved_unclosed_parens raise end # Parse the block zero or more times, returning an array of parse results. def zero_or_more(&block) matches = [] loop do matches << backtrack { yield } end rescue Error matches end # Parse the block one or more times, returning an array of parse results. def one_or_more(&block) first = yield rest = zero_or_more(&block) [first, *rest] end # Build an AST node of the given type. def node(type, *args) AST.new(type, args) end end memoize :parse, :parse! end end