From 4c7cfc73c62b967ab9008b51aa900528adf2797e Mon Sep 17 00:00:00 2001 From: evazion Date: Mon, 28 Mar 2022 05:00:04 -0500 Subject: [PATCH] search: add new tag search parser. Add a new tag tag search parser that supports full boolean expressions, including `and`, `or`, and `not` operators and parenthesized subexpressions. This is only the parser itself, not the code for converting the search into SQL. The new parser isn't used yet for actual searches. Searches still use the old parser. Some example syntax: * `1girl 1boy` * `1girl and 1boy` (same as `1girl 1boy`) * `1girl or 1boy` * `~1girl ~1boy` (same as `1girl or 1boy`) * `1girl and ((blonde_hair blue_eyes) or (red_hair green_eyes))` * `1girl ~(blonde_hair blue_eyes) ~(red_hair green_eyes)` (same as above) * `1girl -(blonde_hair blue_eyes)` * `*_hair *_eyes` * `*_hair or *_eyes` * `user:evazion or fav:evazion` * `~user:evazion ~fav:evazion` Rules: AND is implicit between terms, but may be written explicitly: * `a b c` is `a and b and c` AND has higher precedence (binds tighter) than OR: * `a or b and c or d` is `a or (b and c) or d` * `a or b c or d e` is `a or (b and c) or (d and e)` All `~` operators in the same subexpression are combined into a single OR: * `a b ~c ~d` is `a b (c or d)` * `~a ~b and ~c ~d` is `(a or b) (c or d)` * `(~a ~b) (~c ~d)` is `(a or b) (c or d)` A single `~` operator in a subexpression by itself is ignored: * `a ~b` is `a b` * `~a and ~b` is `a and b`, which is `a b` * `(~a) ~b` is `a ~b`, which is `a b` The parser is written as a backtracking recursive descent parser built on top of StringScanner and a handful of parser combinators. The parser generates an AST, which is then simplified using Boolean algebra to remove redundant nodes and to convert the expression to conjunctive normal form (that is, a product of sums, or an AND of ORs). --- app/logical/post_query.rb | 20 ++ app/logical/post_query/ast.rb | 291 +++++++++++++++++++++++++++ app/logical/post_query/parser.rb | 269 +++++++++++++++++++++++++ config/initializers/inflections.rb | 1 + test/unit/post_query_parser_test.rb | 297 ++++++++++++++++++++++++++++ 5 files changed, 878 insertions(+) create mode 100644 app/logical/post_query.rb create mode 100644 app/logical/post_query/ast.rb create mode 100644 app/logical/post_query/parser.rb create mode 100644 test/unit/post_query_parser_test.rb diff --git a/app/logical/post_query.rb b/app/logical/post_query.rb new file mode 100644 index 000000000..4d0fed5ca --- /dev/null +++ b/app/logical/post_query.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +class PostQuery + extend Memoist + + attr_reader :search, :parser, :ast + delegate :tag_names, to: :ast + + def initialize(search) + @search = search + @parser = Parser.new(search) + @ast = parser.parse.simplify + end + + def tags + Tag.where(name: tag_names) + end + + memoize :tags +end diff --git a/app/logical/post_query/ast.rb b/app/logical/post_query/ast.rb new file mode 100644 index 000000000..80b08573d --- /dev/null +++ b/app/logical/post_query/ast.rb @@ -0,0 +1,291 @@ +# frozen_string_literal: true + +# A PostQuery::AST is an abstract syntax tree representing a search parsed by +# `PostQuery::Parser#parse`. It has methods for printing, manipulating, and +# simpifying ASTs returned by the parser. +# +# There are nine AST node types: +# +# * :all (representing the search that returns everything, aka the empty search) +# * :none (representing the search that returns nothing) +# * :tag (a single tag) +# * :metatag (a metatag with a name and value) +# * :wildcard (a wildcard tag, e.g. `blue_*`) +# * :and (an n-ary AND clause) +# * :or (an n-nary OR clause) +# * :not (a unary NOT clause) +# * :opt (the unary `~`, or 'optional' operator) +# +# The AST returned by the parser is normally simplified with `#simplify` before +# it's used. This is for several reasons: +# +# * To replace the `~` operator with `or` clauses. +# * To remove redundant `and` and `or` nodes. +# * To normalize the AST to conjunctive normal form. +# +# @example +# +# PostQuery::AST.new(:or, [PostQuery::AST.new(:tag, "1girl"), PostQuery::AST.new(:metatag, "rating", "s")]).to_sexp +# => "(or 1girl rating:s)" +# +# PostQuery::Parser.parse("cat_girl or (cat_ears tail)").to_sexp +# => "(or (and cat_girl) (and (and cat_ears tail)))" +# +# PostQuery::Parser.parse("cat_girl or (cat_ears tail)").simplify.to_sexp +# => "(and (or cat_ears cat_girl) (or cat_girl tail))" + +class PostQuery + class AST + extend Memoist + include Comparable + include Enumerable + + attr_reader :type, :args + delegate :all?, :none?, :and?, :or?, :not?, :opt?, :tag?, :metatag?, :wildcard?, to: :inquirer + + # Create an AST node. + # + # @param type [Symbol] The type of the AST node. + # @param args [Array] The arguments for the node (either a list of child nodes for + # AND/OR/NOT/OPT nodes, or the name and/or value for tag, metatag, or wildcard nodes). + def initialize(type, args) + @type = type + @args = args + end + + concerning :SimplificationMethods do + # Simplify the AST by rewriting `~` to `or` clauses, and by reducing it to + # conjunctive normal form (that is, product-of-sums form, or an AND of ORs). + # + # The algorithm is to repeatedly apply the rules of Boolean algebra, one + # at a time in a top-down fashion, until the AST can't be simplified any more. + # + # @return [AST] A new simplified AST + def simplify + old_ast = nil + new_ast = rewrite_opts + + until new_ast == old_ast + old_ast = new_ast + new_ast = old_ast.simplify_once + end + + new_ast + end + + # Simplify the AST once by applying the rules of Boolean algebra in a single top-down pass. + # + # @return [AST] A new simplified AST + def simplify_once + case self + + # (and A) = A + in [:and, a] + a + + # (or A) = A + in [:or, a] + a + + # Double negation: -(-A) = A + in [:not, [:not, a]] + a + + # DeMorgan's law: -(A and B) = -A or -B + in [:not, [:and, *args]] + node(:or, *args.map { node(:not, _1) }) + + # DeMorgan's law: -(A or B) = -A and -B + in [:not, [:or, *args]] + node(:and, *args.map { node(:not, _1) }) + + # Associative law: (or (or A B) C) = (or A B C) + in [:or, *args] if args.any?(&:or?) + ors, others = args.partition(&:or?) + node(:or, *ors.flat_map(&:args), *others) + + # Associative law: (and (and A B) C) = (and A B C) + in [:and, *args] if args.any?(&:and?) + ands, others = args.partition(&:and?) + node(:and, *ands.flat_map(&:args), *others) + + # Distributive law: A or (B and C) = (A or B) and (A or C) + # (or A (and B C ...) ... = (and (or A B ...) (or A C ...) ... + in [:or, *args] if args.any?(&:and?) + ands, others = args.partition(&:and?) + first, rest = ands.first, ands[1..] + others + node(:and, *first.args.map { node(:or, _1, *rest) }) + + in [:not, arg] + node(:not, arg.simplify_once) + + in [:and, *args] + node(:and, *args.map(&:simplify_once)) + + in [:or, *args] + node(:or, *args.map(&:simplify_once)) + + else + self + end + end + + # Rewrite the `~` operator to `or` clauses. + # + # @return [AST] A new AST with `:opt` nodes replaced with `:or` nodes. + def rewrite_opts + # ... ~A ~B ... = ... (or A B) ... + # ... ~A ... = ... (or A) ... = ... A ... + if children.any?(&:opt?) + opts, non_opts = children.partition(&:opt?) + or_node = node(:or, *opts.flat_map(&:children)) + node(type, or_node, *non_opts).rewrite_opts + elsif children.any? + node(type, *children.map(&:rewrite_opts)) + else + self + end + end + + # Create a new AST node, sorting the child nodes so that the AST is normalized to a consistent form. + def node(type, *args) + AST.new(type, args.sort) + end + end + + concerning :OutputMethods do + def inspect + to_sexp + end + + # Display the AST as an S-expression. + def to_sexp + case self + in [:all] + "all" + in [:none] + "none" + in [:tag, name] + name + in [:metatag, name, value] + "#{name}:#{value}" + in [:wildcard, name] + "(wildcard #{name})" + in [type, *args] + "(#{type} #{args.map(&:to_sexp).join(" ")})" + end + end + + # Display the AST in infix notation. + def to_infix + case self + in [:all] + "all" + in [:none] + "none" + in [:wildcard, name] + name + in [:tag, name] + name + in [:metatag, name, value] + "#{name}:#{value}" + in [:not, a] + "-#{a.to_infix}" + in [:opt, a] + "~#{a.to_infix}" + in [:and, a] + a.to_infix + in [:or, a] + a.to_infix + in [:and, *a] + "(#{a.map(&:to_infix).join(" ")})" + in [:or, *a] + "(#{a.map(&:to_infix).join(" or ")})" + end + end + + # Convert the AST to a series of nested arrays. + def to_tree + if term? + [type, *args] + else + [type, *args.map(&:to_tree)] + end + end + end + + concerning :UtilityMethods do + # Traverse the AST in depth-first left-to-right order, calling the block on each + # node and passing it the current node and the results from visiting each subtree. + def visit(&block) + return enum_for(:visit) unless block_given? + + results = children.map { _1.visit(&block) } + yield self, *results + end + + # Traverse the AST in depth-first left-to-right order, calling the block on each node. + def each(&block) + return enum_for(:each) unless block_given? + visit { |node| yield node } + self + end + + # @return [Array] A flat list of all the nodes in the AST, in depth-first left-to-right order. + def nodes + each.map + end + + # @return [Array] A list of all unique tag nodes in the AST. + def tags + nodes.select(&:tag?).uniq.sort + end + + # @return [Array] A list of all unique metatag nodes in the AST. + def metatags + nodes.select(&:metatag?).uniq.sort + end + + # @return [Array] The names of all unique tags in the AST. + def tag_names + tags.map(&:name) + end + + # True if the AST is a simple node, that is a leaf node with no child nodes. + def term? + type.in?(%i[tag metatag wildcard all none]) + end + + # @return [String, nil] The name of the tag, metatag, or wildcard, if one of these nodes. + def name + args.first if tag? || metatag? || wildcard? + end + + # @return [String, nil] The value of the metatag, if a metatag node. + def value + args.second if metatag? + end + + # @return [Array] The child nodes, if the node has children. + def children + term? ? [] : args + end + + def <=>(other) + return nil unless other.is_a?(AST) + deconstruct <=> other.deconstruct + end + + # Deconstruct the node into an array (used for pattern matching). + def deconstruct + [type, *args] + end + + def inquirer + ActiveSupport::StringInquirer.new(type.to_s) + end + end + + memoize :simplify, :simplify_once, :rewrite_opts, :inquirer, :deconstruct, :inspect, :to_sexp, :to_infix, :to_tree, :nodes, :tags, :metatags, :tag_names + end +end diff --git a/app/logical/post_query/parser.rb b/app/logical/post_query/parser.rb new file mode 100644 index 000000000..fb19217b8 --- /dev/null +++ b/app/logical/post_query/parser.rb @@ -0,0 +1,269 @@ +# frozen_string_literal: true + +require "strscan" + +# A PostQuery::Parser parses a search string into a PostQuery::AST. +# +# @example +# +# ast = PostQuery.new("1girl or 1boy").parse +# +# Grammar: +# +# root = or_clause [root] +# or_clause = and_clause "or" or_clause +# | and_clause +# and_clause = factor_list "and" and_clause +# | factor_list +# factor_list = factor [factor_list] +# factor = "-" expr +# | "~" expr +# | expr +# expr = "(" or_clause ")" | term +# term = metatag | tag | wildcard +# metatag = metatag_name ":" quoted_string +# metatag_name = "user" | "fav" | "pool" | "order" | ... +# quoted_string = '"' /[^"]+/ '"' +# tag = /[^ *]+/ +# wildcard = /[^ ]+/ +# +# Ref: +# +# * https://hmac.dev/posts/2019-05-19-ruby-parser-combinators.html + +class PostQuery + class Parser + class Error < StandardError; end + + METATAG_NAME_REGEX = /(#{PostQueryBuilder::METATAGS.join("|")}):/i + + attr_reader :input + private attr_reader :scanner, :unclosed_parens + + # @param input [String] The search string to parse. + def initialize(input) + @input = input.to_s.clone.freeze + @scanner = StringScanner.new(@input) + @unclosed_parens = 0 + end + + # Parse a search and return the AST. + # + # @param string [String] The search string to parse. + # @returns [PostQuery::AST] The AST of the parsed search. + def self.parse(string) + new(string).parse + end + + concerning :ParserMethods do + # Parse the search and return the AST, or return a search that matches nothing if the parse failed. + # + # @return [PostQuery::AST] The AST of the parsed search. + def parse + parse! + rescue Error + node(:none) + end + + # Parse the search and return the AST, or raise an error if the parse failed. + # + # @return [PostQuery::AST] The AST of the parsed search. + def parse! + ast = root + raise Error, "Unexpected EOS (rest: '#{scanner.rest}')" unless scanner.eos? + raise Error, "Unclosed parentheses (#{@unclosed_parens})" unless @unclosed_parens == 0 + ast + end + + private + + # root = or_clause [root] + def root + a = zero_or_more { or_clause } + space + + if a.empty? + node(:all) + elsif a.size == 1 + a.first + else + node(:and, *a) + end + end + + # or_clause = and_clause "or" or_clause | and_clause + def or_clause + a = and_clause + + space + if accept(/or +/i) + b = or_clause + node(:or, a, b) + else + a + end + end + + # and_clause = factor_list "and" and_clause | factor_list + def and_clause + a = factor_list + + space + if accept(/and +/i) + b = and_clause + node(:and, a, b) + else + a + end + end + + # factor_list = factor [factor_list] + def factor_list + a = one_or_more { factor } + node(:and, *a) + end + + # factor = "-" expr | "~" expr | expr + def factor + space + + if accept("-") + node(:not, expr) + elsif accept("~") + node(:opt, expr) + else + expr + end + end + + # expr = "(" or_clause ")" | term + def expr + space + + if accept("(") + @unclosed_parens += 1 + a = or_clause + expect(")") + @unclosed_parens -= 1 + a + else + term + end + end + + def term + metatag || wildcard || tag + end + + # metatag = metatag_name ":" quoted_string + # metatag_name = "user" | "fav" | "pool" | "order" | ... + def metatag + if accept(METATAG_NAME_REGEX) + name = @scanner.matched.delete_suffix(":") + value = quoted_string + node(:metatag, name.downcase, value) + end + end + + def quoted_string + if accept('"') + a = accept(/([^"\\]|\\")*/).gsub(/\\"/, '"') # handle backslash escaped quotes + expect('"') + a + else + string(/[^ ]+/) + end + end + + # A wildcard is a string that contains a '*' character and that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters. + def wildcard + if t = accept(/(?=[^ ]*\*)[^ \)~-][^ ]*/) + space + node(:wildcard, t.downcase) + end + end + + # A tag is a string that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters. + def tag + t = string(/[^ \)~-][^ ]*/) + raise Error if t.downcase.in?(%w[and or]) + space + node(:tag, t.downcase) + end + + def string(pattern) + str = expect(pattern) + + # XXX: Now put back any trailing right parens we mistakenly consumed. + n = @unclosed_parens + while n > 0 && str.ends_with?(")") + str.chop! + scanner.pos -= 1 + n -= 1 + end + + str + end + + def space + expect(/ */) + end + end + + concerning :HelperMethods do + private + + # Try to match `pattern`, returning the string if it matched or nil if it didn't. + # + # @param pattern [Regexp, String] The pattern to match. + # @return [String, nil] The matched string, or nil + def accept(pattern) + @scanner.scan(pattern) + end + + # Try to match `pattern`, returning the string if it matched or raising an Error if it didn't. + # + # @param pattern [Regexp, String] The pattern to match. + # @return [String] The matched string + def expect(pattern) + str = accept(pattern) + raise Error, "Expected '#{pattern}'; got '#{str}'" if str.nil? + str + end + + # Try to parse the given block, backtracking to the original state if the parse failed. + def backtrack(&block) + saved_pos = @scanner.pos + saved_unclosed_parens = @unclosed_parens + raise Error if @scanner.eos? + yield + rescue Error + @scanner.pos = saved_pos + @unclosed_parens = saved_unclosed_parens + raise + end + + # Parse the block zero or more times, returning an array of parse results. + def zero_or_more(&block) + matches = [] + loop do + matches << backtrack { yield } + end + rescue Error + matches + end + + # Parse the block one or more times, returning an array of parse results. + def one_or_more(&block) + first = yield + rest = zero_or_more(&block) + [first, *rest] + end + + # Build an AST node of the given type. + def node(type, *args) + AST.new(type, args) + end + end + end +end diff --git a/config/initializers/inflections.rb b/config/initializers/inflections.rb index fef5847da..67f9ccb64 100644 --- a/config/initializers/inflections.rb +++ b/config/initializers/inflections.rb @@ -8,6 +8,7 @@ ActiveSupport::Inflector.inflections(:en) do |inflect| inflect.acronym "FFmpeg" inflect.acronym "URL" inflect.acronym "URLs" + inflect.acronym "AST" # inflect.plural /^(ox)$/i, '\1en' # inflect.singular /^(ox)en/i, '\1' # inflect.irregular 'person', 'people' diff --git a/test/unit/post_query_parser_test.rb b/test/unit/post_query_parser_test.rb new file mode 100644 index 000000000..c55df6338 --- /dev/null +++ b/test/unit/post_query_parser_test.rb @@ -0,0 +1,297 @@ +require 'test_helper' + +class PostQueryParserTest < ActiveSupport::TestCase + def assert_parse_equals(expected, input) + assert_equal(expected, PostQuery::Parser.parse(input).simplify.to_sexp) + end + + context "PostQueryParser:" do + should "parse empty queries correctly" do + assert_parse_equals("all", "") + assert_parse_equals("all", " ") + end + + should "parse basic tags correctly" do + assert_parse_equals("a", "a") + assert_parse_equals("a", "A") + + assert_parse_equals("foo_(bar)", "foo_(bar)") + assert_parse_equals("foo_(bar)", "(foo_(bar))") + + assert_parse_equals("foo_(bar_(baz))", "foo_(bar_(baz))") + assert_parse_equals("foo_(bar_(baz))", "(foo_(bar_(baz)))") + + assert_parse_equals(";)", ";)") + assert_parse_equals("9", "(9)") + end + + should "parse basic queries correctly" do + assert_parse_equals("(and a b)", "a b") + assert_parse_equals("(or a b)", "a or b") + assert_parse_equals("(or a b)", "~a ~b") + + assert_parse_equals("(not a)", "-a") + assert_parse_equals("(and (not b) a)", "a -b") + + assert_parse_equals("fav:a", "fav:a") + assert_parse_equals("(not fav:a)", "-fav:a") + + assert_parse_equals("(and fav:a fav:b)", "fav:a fav:b") + end + + should "parse metatags correctly" do + assert_parse_equals("fav:a", "fav:a") + assert_parse_equals("user:a", "user:a") + assert_parse_equals("pool:a", "pool:a") + assert_parse_equals("order:a", "order:a") + assert_parse_equals("source:a", "source:a") + + assert_parse_equals("fav:a", "FAV:a") + assert_parse_equals("fav:A", "fav:A") + + assert_parse_equals("fav:a", "~fav:a") + assert_parse_equals("(not fav:a)", "-fav:a") + + assert_parse_equals("(and fav:a fav:b)", "fav:a fav:b") + assert_parse_equals("(or fav:a fav:b)", "~fav:a ~fav:b") + assert_parse_equals("(or fav:a fav:b)", "fav:a or fav:b") + + assert_parse_equals("fav:a", "(fav:a)") + assert_parse_equals("fav:(a)", "fav:(a)") + assert_parse_equals("fav:(a", "(fav:(a)") + + assert_parse_equals('source:foo bar', 'source:"foo bar"') + assert_parse_equals('source:foobar"(', 'source:foobar"(') + assert_parse_equals('source:', 'source:""') + assert_parse_equals(%q{source:don't say "lazy" okay}, %q{source:"don't say \"lazy\" okay"}) + assert_parse_equals(%q{(and source:foo)bar a)}, %q{(a (source:"foo)bar"))}) + end + + should "parse wildcard tags correctly" do + assert_parse_equals("(wildcard *)", "*") + assert_parse_equals("(wildcard *a)", "*a") + assert_parse_equals("(wildcard a*)", "a*") + assert_parse_equals("(wildcard *a*)", "*a*") + assert_parse_equals("(wildcard a*b)", "a*b") + + assert_parse_equals("(and b (wildcard *))", "* b") + assert_parse_equals("(and b (wildcard *a))", "*a b") + assert_parse_equals("(and b (wildcard a*))", "a* b") + assert_parse_equals("(and b (wildcard *a*))", "*a* b") + + assert_parse_equals("(and a (wildcard *))", "a *") + assert_parse_equals("(and a (wildcard *b))", "a *b") + assert_parse_equals("(and a (wildcard b*))", "a b*") + assert_parse_equals("(and a (wildcard *b*))", "a *b*") + + assert_parse_equals("(and (not (wildcard *)) a)", "a -*") + assert_parse_equals("(and (not (wildcard b*)) a)", "a -b*") + assert_parse_equals("(and (not (wildcard *b)) a)", "a -*b") + assert_parse_equals("(and (not (wildcard *b*)) a)", "a -*b*") + + assert_parse_equals("(or a (wildcard *))", "~a ~*") + assert_parse_equals("(or a (wildcard *))", "~* ~a") + assert_parse_equals("(or a (wildcard *a))", "~a ~*a") + assert_parse_equals("(or a (wildcard *a))", "~*a ~a") + + assert_parse_equals("(or a (wildcard a*))", "a or a*") + assert_parse_equals("(and a (wildcard a*))", "a and a*") + + assert_parse_equals("(and (wildcard a*) (wildcard b*))", "a* b*") + assert_parse_equals("(or (wildcard a*) (wildcard b*))", "a* or b*") + + assert_parse_equals("(and a c (wildcard b*))", "a b* c") + assert_parse_equals("(and (not (wildcard *)) a c)", "a -* c") + end + + should "parse single tag queries correctly" do + assert_parse_equals("a", "a") + assert_parse_equals("a", "a ") + assert_parse_equals("a", " a") + assert_parse_equals("a", " a ") + assert_parse_equals("a", "(a)") + assert_parse_equals("a", "( a)") + assert_parse_equals("a", "(a )") + assert_parse_equals("a", " ( a ) ") + assert_parse_equals("a", "((a))") + assert_parse_equals("a", "( ( a ) )") + assert_parse_equals("a", " ( ( a ) ) ") + end + + should "parse nested AND queries correctly" do + assert_parse_equals("(and a b)", "a b") + assert_parse_equals("(and a b)", "(a b)") + assert_parse_equals("(and a b)", "a (b)") + assert_parse_equals("(and a b)", "(a) b") + assert_parse_equals("(and a b)", "(a) (b)") + assert_parse_equals("(and a b)", "((a) (b))") + + assert_parse_equals("(and a b c)", "a b c") + assert_parse_equals("(and a b c)", "(a b) c") + assert_parse_equals("(and a b c)", "((a) b) c") + assert_parse_equals("(and a b c)", "(((a) b) c)") + assert_parse_equals("(and a b c)", "((a b) c)") + assert_parse_equals("(and a b c)", "((a) (b) (c))") + + assert_parse_equals("(and a b c)", "a (b c)") + assert_parse_equals("(and a b c)", "a (b (c))") + assert_parse_equals("(and a b c)", "(a (b (c)))") + assert_parse_equals("(and a b c)", "(a (b c))") + assert_parse_equals("(and a b c)", "(a b c)") + + assert_parse_equals("(and a b)", "a and b") + assert_parse_equals("(and a b)", "a AND b") + assert_parse_equals("(and a b)", "(a and b)") + assert_parse_equals("(and a b c)", "a and b and c") + assert_parse_equals("(and a b c)", "(a and b) and c") + assert_parse_equals("(and a b c)", "a and (b and c)") + assert_parse_equals("(and a b c)", "(a and b and c)") + end + + should "parse nested OR queries correctly" do + assert_parse_equals("(or a b)", "a or b") + assert_parse_equals("(or a b)", "a OR b") + assert_parse_equals("(or a b)", "(a or b)") + assert_parse_equals("(or a b)", "(a) or (b)") + + assert_parse_equals("(or a b c)", "a or b or c") + assert_parse_equals("(or a b c)", "(a or b) or c") + assert_parse_equals("(or a b c)", "a or (b or c)") + assert_parse_equals("(or a b c)", "(a or b or c)") + + assert_parse_equals("(or a b c d)", "a or (b or (c or d))") + assert_parse_equals("(or a b c d)", "((a or b) or c) or d") + assert_parse_equals("(or a b c d)", "(a or b) or (c or d)") + end + + should "parse the '~' operator correctly" do + assert_parse_equals("(or a b)", "~a ~b") + assert_parse_equals("(or a b c)", "~a ~b ~c") + assert_parse_equals("(or a b c d)", "~a ~b ~c ~d") + + assert_parse_equals("a", "~a") + assert_parse_equals("a", "(~a)") + assert_parse_equals("a", "~(a)") + assert_parse_equals("a", "~(~a)") + assert_parse_equals("a", "~(~(~a))") + + assert_parse_equals("(not a)", "~(-a)") + assert_parse_equals("(not a)", "-(~a)") + assert_parse_equals("a", "-(~(-(~a)))") + assert_parse_equals("a", "~(-(~(-a)))") + + assert_parse_equals("(and a b)", "a ~b") + assert_parse_equals("(and a b)", "~a b") + assert_parse_equals("(and a b)", "((a) ~b)") + assert_parse_equals("(and a b)", "~(a b)") + + assert_parse_equals("(and a b)", "~a and ~b") + assert_parse_equals("(or a b)", "~a or ~b") + assert_parse_equals("(or (not a) (not b))", "~(-a) or ~(-b)") + + assert_parse_equals("(or a b)", "~(a) ~(b)") + assert_parse_equals("(and a b)", "(~a) (~b)") + + assert_parse_equals("(and (or b c) a)", "(~a) ~b ~c") + assert_parse_equals("(and (or b c) a)", "~a (~b ~c)") + + assert_parse_equals("(or a b c d)", "~a ~b or ~c ~d") + assert_parse_equals("(and (or a b) (or c d))", "~a ~b and ~c ~d") + assert_parse_equals("(and (or a b) (or c d))", "(~a ~b) (~c ~d)") + assert_parse_equals("(and (or a c) (or a d) (or b c) (or b d))", "~(a b) ~(c d)") + assert_parse_equals("(and (or a c) (or a d) (or b c) (or b d))", "(a b) or (c d)") + + assert_parse_equals("(and a b c d)", " a b c d") + assert_parse_equals("(and a b c d)", " a b c ~d") + assert_parse_equals("(and a b c d)", " a b ~c d") + assert_parse_equals("(and (or c d) a b)", " a b ~c ~d") + assert_parse_equals("(and a b c d)", " a ~b c d") + assert_parse_equals("(and (or b d) a c)", " a ~b c ~d") + assert_parse_equals("(and (or b c) a d)", " a ~b ~c d") + assert_parse_equals("(and (or b c d) a)", " a ~b ~c ~d") + assert_parse_equals("(and a b c d)", "~a b c d") + assert_parse_equals("(and (or a d) b c)", "~a b c ~d") + assert_parse_equals("(and (or a c) b d)", "~a b ~c d") + assert_parse_equals("(and (or a c d) b)", "~a b ~c ~d") + assert_parse_equals("(and (or a b) c d)", "~a ~b c d") + assert_parse_equals("(and (or a b d) c)", "~a ~b c ~d") + assert_parse_equals("(and (or a b c) d)", "~a ~b ~c d") + assert_parse_equals("(or a b c d)", "~a ~b ~c ~d") + end + + should "parse NOT queries correctly" do + assert_parse_equals("(not a)", "-a") + + assert_parse_equals("(and (not b) a)", "(a -b)") + assert_parse_equals("(and (not b) a)", "a (-b)") + assert_parse_equals("(and (not b) a)", "((a) -b)") + end + + should "eliminate double negations" do + assert_parse_equals("(not a)", "-a") + assert_parse_equals("(not a)", "-(-(-a))") + + assert_parse_equals("a", "-(-a)") + assert_parse_equals("a", "-(-(-(-a)))") + + assert_parse_equals("(and a b c)", "a -(-(b)) c") + assert_parse_equals("(and a b c d)", "a -(-(b -(-c))) d") + end + + should "apply DeMorgan's law" do + assert_parse_equals("(or (not a) (not b))", "-(a b)") + assert_parse_equals("(and (not a) (not b))", "-(a or b)") + + assert_parse_equals("(or (not a) (not b) (not c))", "-(a b c)") + assert_parse_equals("(and (not a) (not b) (not c))", "-(a or b or c)") + + assert_parse_equals("(or a b c)", "-(-a -b -c)") + assert_parse_equals("(and a b c)", "-(-a or -b or -c)") + + assert_parse_equals("(and (or (not a) (not c) (not d)) (or (not a) b))", "-(a -(b -(c d)))") + end + + should "apply the distributive law" do + assert_parse_equals("(and (or a b) (or a c))", "a or (b c)") + assert_parse_equals("(and (or a b) (or a c))", "(b c) or a") + + assert_parse_equals("(and (or a c) (or a d) (or b c) (or b d))", "(a b) or (c d)") + + assert_parse_equals("(and (or a c e) (or a c f) (or a d e) (or a d f) (or b c e) (or b c f) (or b d e) (or b d f))", "(a b) or (c d) or (e f)") + end + + should "return the empty search for syntax errors" do + assert_parse_equals("none", "(") + assert_parse_equals("none", ")") + assert_parse_equals("none", "-") + assert_parse_equals("none", "~") + + assert_parse_equals("none", "(a") + assert_parse_equals("none", ")a") + assert_parse_equals("none", "-~a") + assert_parse_equals("none", "~-a") + assert_parse_equals("none", "~~a") + assert_parse_equals("none", "--a") + + assert_parse_equals("none", "and") + assert_parse_equals("none", "-and") + assert_parse_equals("none", "~and") + assert_parse_equals("none", "or") + assert_parse_equals("none", "-or") + assert_parse_equals("none", "~or") + assert_parse_equals("none", "a and") + assert_parse_equals("none", "a or") + assert_parse_equals("none", "and a") + assert_parse_equals("none", "or a") + + assert_parse_equals("none", "a -") + assert_parse_equals("none", "a ~") + + assert_parse_equals("none", "(a b") + assert_parse_equals("none", "(a (b)") + + assert_parse_equals("none", 'source:"foo') + assert_parse_equals("none", 'source:"foo bar') + end + end +end