diff --git a/app/logical/post_query.rb b/app/logical/post_query.rb new file mode 100644 index 000000000..4d0fed5ca --- /dev/null +++ b/app/logical/post_query.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +class PostQuery + extend Memoist + + attr_reader :search, :parser, :ast + delegate :tag_names, to: :ast + + def initialize(search) + @search = search + @parser = Parser.new(search) + @ast = parser.parse.simplify + end + + def tags + Tag.where(name: tag_names) + end + + memoize :tags +end diff --git a/app/logical/post_query/ast.rb b/app/logical/post_query/ast.rb new file mode 100644 index 000000000..80b08573d --- /dev/null +++ b/app/logical/post_query/ast.rb @@ -0,0 +1,291 @@ +# frozen_string_literal: true + +# A PostQuery::AST is an abstract syntax tree representing a search parsed by +# `PostQuery::Parser#parse`. It has methods for printing, manipulating, and +# simpifying ASTs returned by the parser. +# +# There are nine AST node types: +# +# * :all (representing the search that returns everything, aka the empty search) +# * :none (representing the search that returns nothing) +# * :tag (a single tag) +# * :metatag (a metatag with a name and value) +# * :wildcard (a wildcard tag, e.g. `blue_*`) +# * :and (an n-ary AND clause) +# * :or (an n-nary OR clause) +# * :not (a unary NOT clause) +# * :opt (the unary `~`, or 'optional' operator) +# +# The AST returned by the parser is normally simplified with `#simplify` before +# it's used. This is for several reasons: +# +# * To replace the `~` operator with `or` clauses. +# * To remove redundant `and` and `or` nodes. +# * To normalize the AST to conjunctive normal form. +# +# @example +# +# PostQuery::AST.new(:or, [PostQuery::AST.new(:tag, "1girl"), PostQuery::AST.new(:metatag, "rating", "s")]).to_sexp +# => "(or 1girl rating:s)" +# +# PostQuery::Parser.parse("cat_girl or (cat_ears tail)").to_sexp +# => "(or (and cat_girl) (and (and cat_ears tail)))" +# +# PostQuery::Parser.parse("cat_girl or (cat_ears tail)").simplify.to_sexp +# => "(and (or cat_ears cat_girl) (or cat_girl tail))" + +class PostQuery + class AST + extend Memoist + include Comparable + include Enumerable + + attr_reader :type, :args + delegate :all?, :none?, :and?, :or?, :not?, :opt?, :tag?, :metatag?, :wildcard?, to: :inquirer + + # Create an AST node. + # + # @param type [Symbol] The type of the AST node. + # @param args [Array] The arguments for the node (either a list of child nodes for + # AND/OR/NOT/OPT nodes, or the name and/or value for tag, metatag, or wildcard nodes). + def initialize(type, args) + @type = type + @args = args + end + + concerning :SimplificationMethods do + # Simplify the AST by rewriting `~` to `or` clauses, and by reducing it to + # conjunctive normal form (that is, product-of-sums form, or an AND of ORs). + # + # The algorithm is to repeatedly apply the rules of Boolean algebra, one + # at a time in a top-down fashion, until the AST can't be simplified any more. + # + # @return [AST] A new simplified AST + def simplify + old_ast = nil + new_ast = rewrite_opts + + until new_ast == old_ast + old_ast = new_ast + new_ast = old_ast.simplify_once + end + + new_ast + end + + # Simplify the AST once by applying the rules of Boolean algebra in a single top-down pass. + # + # @return [AST] A new simplified AST + def simplify_once + case self + + # (and A) = A + in [:and, a] + a + + # (or A) = A + in [:or, a] + a + + # Double negation: -(-A) = A + in [:not, [:not, a]] + a + + # DeMorgan's law: -(A and B) = -A or -B + in [:not, [:and, *args]] + node(:or, *args.map { node(:not, _1) }) + + # DeMorgan's law: -(A or B) = -A and -B + in [:not, [:or, *args]] + node(:and, *args.map { node(:not, _1) }) + + # Associative law: (or (or A B) C) = (or A B C) + in [:or, *args] if args.any?(&:or?) + ors, others = args.partition(&:or?) + node(:or, *ors.flat_map(&:args), *others) + + # Associative law: (and (and A B) C) = (and A B C) + in [:and, *args] if args.any?(&:and?) + ands, others = args.partition(&:and?) + node(:and, *ands.flat_map(&:args), *others) + + # Distributive law: A or (B and C) = (A or B) and (A or C) + # (or A (and B C ...) ... = (and (or A B ...) (or A C ...) ... + in [:or, *args] if args.any?(&:and?) + ands, others = args.partition(&:and?) + first, rest = ands.first, ands[1..] + others + node(:and, *first.args.map { node(:or, _1, *rest) }) + + in [:not, arg] + node(:not, arg.simplify_once) + + in [:and, *args] + node(:and, *args.map(&:simplify_once)) + + in [:or, *args] + node(:or, *args.map(&:simplify_once)) + + else + self + end + end + + # Rewrite the `~` operator to `or` clauses. + # + # @return [AST] A new AST with `:opt` nodes replaced with `:or` nodes. + def rewrite_opts + # ... ~A ~B ... = ... (or A B) ... + # ... ~A ... = ... (or A) ... = ... A ... + if children.any?(&:opt?) + opts, non_opts = children.partition(&:opt?) + or_node = node(:or, *opts.flat_map(&:children)) + node(type, or_node, *non_opts).rewrite_opts + elsif children.any? + node(type, *children.map(&:rewrite_opts)) + else + self + end + end + + # Create a new AST node, sorting the child nodes so that the AST is normalized to a consistent form. + def node(type, *args) + AST.new(type, args.sort) + end + end + + concerning :OutputMethods do + def inspect + to_sexp + end + + # Display the AST as an S-expression. + def to_sexp + case self + in [:all] + "all" + in [:none] + "none" + in [:tag, name] + name + in [:metatag, name, value] + "#{name}:#{value}" + in [:wildcard, name] + "(wildcard #{name})" + in [type, *args] + "(#{type} #{args.map(&:to_sexp).join(" ")})" + end + end + + # Display the AST in infix notation. + def to_infix + case self + in [:all] + "all" + in [:none] + "none" + in [:wildcard, name] + name + in [:tag, name] + name + in [:metatag, name, value] + "#{name}:#{value}" + in [:not, a] + "-#{a.to_infix}" + in [:opt, a] + "~#{a.to_infix}" + in [:and, a] + a.to_infix + in [:or, a] + a.to_infix + in [:and, *a] + "(#{a.map(&:to_infix).join(" ")})" + in [:or, *a] + "(#{a.map(&:to_infix).join(" or ")})" + end + end + + # Convert the AST to a series of nested arrays. + def to_tree + if term? + [type, *args] + else + [type, *args.map(&:to_tree)] + end + end + end + + concerning :UtilityMethods do + # Traverse the AST in depth-first left-to-right order, calling the block on each + # node and passing it the current node and the results from visiting each subtree. + def visit(&block) + return enum_for(:visit) unless block_given? + + results = children.map { _1.visit(&block) } + yield self, *results + end + + # Traverse the AST in depth-first left-to-right order, calling the block on each node. + def each(&block) + return enum_for(:each) unless block_given? + visit { |node| yield node } + self + end + + # @return [Array] A flat list of all the nodes in the AST, in depth-first left-to-right order. + def nodes + each.map + end + + # @return [Array] A list of all unique tag nodes in the AST. + def tags + nodes.select(&:tag?).uniq.sort + end + + # @return [Array] A list of all unique metatag nodes in the AST. + def metatags + nodes.select(&:metatag?).uniq.sort + end + + # @return [Array] The names of all unique tags in the AST. + def tag_names + tags.map(&:name) + end + + # True if the AST is a simple node, that is a leaf node with no child nodes. + def term? + type.in?(%i[tag metatag wildcard all none]) + end + + # @return [String, nil] The name of the tag, metatag, or wildcard, if one of these nodes. + def name + args.first if tag? || metatag? || wildcard? + end + + # @return [String, nil] The value of the metatag, if a metatag node. + def value + args.second if metatag? + end + + # @return [Array] The child nodes, if the node has children. + def children + term? ? [] : args + end + + def <=>(other) + return nil unless other.is_a?(AST) + deconstruct <=> other.deconstruct + end + + # Deconstruct the node into an array (used for pattern matching). + def deconstruct + [type, *args] + end + + def inquirer + ActiveSupport::StringInquirer.new(type.to_s) + end + end + + memoize :simplify, :simplify_once, :rewrite_opts, :inquirer, :deconstruct, :inspect, :to_sexp, :to_infix, :to_tree, :nodes, :tags, :metatags, :tag_names + end +end diff --git a/app/logical/post_query/parser.rb b/app/logical/post_query/parser.rb new file mode 100644 index 000000000..fb19217b8 --- /dev/null +++ b/app/logical/post_query/parser.rb @@ -0,0 +1,269 @@ +# frozen_string_literal: true + +require "strscan" + +# A PostQuery::Parser parses a search string into a PostQuery::AST. +# +# @example +# +# ast = PostQuery.new("1girl or 1boy").parse +# +# Grammar: +# +# root = or_clause [root] +# or_clause = and_clause "or" or_clause +# | and_clause +# and_clause = factor_list "and" and_clause +# | factor_list +# factor_list = factor [factor_list] +# factor = "-" expr +# | "~" expr +# | expr +# expr = "(" or_clause ")" | term +# term = metatag | tag | wildcard +# metatag = metatag_name ":" quoted_string +# metatag_name = "user" | "fav" | "pool" | "order" | ... +# quoted_string = '"' /[^"]+/ '"' +# tag = /[^ *]+/ +# wildcard = /[^ ]+/ +# +# Ref: +# +# * https://hmac.dev/posts/2019-05-19-ruby-parser-combinators.html + +class PostQuery + class Parser + class Error < StandardError; end + + METATAG_NAME_REGEX = /(#{PostQueryBuilder::METATAGS.join("|")}):/i + + attr_reader :input + private attr_reader :scanner, :unclosed_parens + + # @param input [String] The search string to parse. + def initialize(input) + @input = input.to_s.clone.freeze + @scanner = StringScanner.new(@input) + @unclosed_parens = 0 + end + + # Parse a search and return the AST. + # + # @param string [String] The search string to parse. + # @returns [PostQuery::AST] The AST of the parsed search. + def self.parse(string) + new(string).parse + end + + concerning :ParserMethods do + # Parse the search and return the AST, or return a search that matches nothing if the parse failed. + # + # @return [PostQuery::AST] The AST of the parsed search. + def parse + parse! + rescue Error + node(:none) + end + + # Parse the search and return the AST, or raise an error if the parse failed. + # + # @return [PostQuery::AST] The AST of the parsed search. + def parse! + ast = root + raise Error, "Unexpected EOS (rest: '#{scanner.rest}')" unless scanner.eos? + raise Error, "Unclosed parentheses (#{@unclosed_parens})" unless @unclosed_parens == 0 + ast + end + + private + + # root = or_clause [root] + def root + a = zero_or_more { or_clause } + space + + if a.empty? + node(:all) + elsif a.size == 1 + a.first + else + node(:and, *a) + end + end + + # or_clause = and_clause "or" or_clause | and_clause + def or_clause + a = and_clause + + space + if accept(/or +/i) + b = or_clause + node(:or, a, b) + else + a + end + end + + # and_clause = factor_list "and" and_clause | factor_list + def and_clause + a = factor_list + + space + if accept(/and +/i) + b = and_clause + node(:and, a, b) + else + a + end + end + + # factor_list = factor [factor_list] + def factor_list + a = one_or_more { factor } + node(:and, *a) + end + + # factor = "-" expr | "~" expr | expr + def factor + space + + if accept("-") + node(:not, expr) + elsif accept("~") + node(:opt, expr) + else + expr + end + end + + # expr = "(" or_clause ")" | term + def expr + space + + if accept("(") + @unclosed_parens += 1 + a = or_clause + expect(")") + @unclosed_parens -= 1 + a + else + term + end + end + + def term + metatag || wildcard || tag + end + + # metatag = metatag_name ":" quoted_string + # metatag_name = "user" | "fav" | "pool" | "order" | ... + def metatag + if accept(METATAG_NAME_REGEX) + name = @scanner.matched.delete_suffix(":") + value = quoted_string + node(:metatag, name.downcase, value) + end + end + + def quoted_string + if accept('"') + a = accept(/([^"\\]|\\")*/).gsub(/\\"/, '"') # handle backslash escaped quotes + expect('"') + a + else + string(/[^ ]+/) + end + end + + # A wildcard is a string that contains a '*' character and that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters. + def wildcard + if t = accept(/(?=[^ ]*\*)[^ \)~-][^ ]*/) + space + node(:wildcard, t.downcase) + end + end + + # A tag is a string that begins with a nonspace, non-')', non-'~', or non-'-' character, followed by nonspace characters. + def tag + t = string(/[^ \)~-][^ ]*/) + raise Error if t.downcase.in?(%w[and or]) + space + node(:tag, t.downcase) + end + + def string(pattern) + str = expect(pattern) + + # XXX: Now put back any trailing right parens we mistakenly consumed. + n = @unclosed_parens + while n > 0 && str.ends_with?(")") + str.chop! + scanner.pos -= 1 + n -= 1 + end + + str + end + + def space + expect(/ */) + end + end + + concerning :HelperMethods do + private + + # Try to match `pattern`, returning the string if it matched or nil if it didn't. + # + # @param pattern [Regexp, String] The pattern to match. + # @return [String, nil] The matched string, or nil + def accept(pattern) + @scanner.scan(pattern) + end + + # Try to match `pattern`, returning the string if it matched or raising an Error if it didn't. + # + # @param pattern [Regexp, String] The pattern to match. + # @return [String] The matched string + def expect(pattern) + str = accept(pattern) + raise Error, "Expected '#{pattern}'; got '#{str}'" if str.nil? + str + end + + # Try to parse the given block, backtracking to the original state if the parse failed. + def backtrack(&block) + saved_pos = @scanner.pos + saved_unclosed_parens = @unclosed_parens + raise Error if @scanner.eos? + yield + rescue Error + @scanner.pos = saved_pos + @unclosed_parens = saved_unclosed_parens + raise + end + + # Parse the block zero or more times, returning an array of parse results. + def zero_or_more(&block) + matches = [] + loop do + matches << backtrack { yield } + end + rescue Error + matches + end + + # Parse the block one or more times, returning an array of parse results. + def one_or_more(&block) + first = yield + rest = zero_or_more(&block) + [first, *rest] + end + + # Build an AST node of the given type. + def node(type, *args) + AST.new(type, args) + end + end + end +end diff --git a/config/initializers/inflections.rb b/config/initializers/inflections.rb index fef5847da..67f9ccb64 100644 --- a/config/initializers/inflections.rb +++ b/config/initializers/inflections.rb @@ -8,6 +8,7 @@ ActiveSupport::Inflector.inflections(:en) do |inflect| inflect.acronym "FFmpeg" inflect.acronym "URL" inflect.acronym "URLs" + inflect.acronym "AST" # inflect.plural /^(ox)$/i, '\1en' # inflect.singular /^(ox)en/i, '\1' # inflect.irregular 'person', 'people' diff --git a/test/unit/post_query_parser_test.rb b/test/unit/post_query_parser_test.rb new file mode 100644 index 000000000..c55df6338 --- /dev/null +++ b/test/unit/post_query_parser_test.rb @@ -0,0 +1,297 @@ +require 'test_helper' + +class PostQueryParserTest < ActiveSupport::TestCase + def assert_parse_equals(expected, input) + assert_equal(expected, PostQuery::Parser.parse(input).simplify.to_sexp) + end + + context "PostQueryParser:" do + should "parse empty queries correctly" do + assert_parse_equals("all", "") + assert_parse_equals("all", " ") + end + + should "parse basic tags correctly" do + assert_parse_equals("a", "a") + assert_parse_equals("a", "A") + + assert_parse_equals("foo_(bar)", "foo_(bar)") + assert_parse_equals("foo_(bar)", "(foo_(bar))") + + assert_parse_equals("foo_(bar_(baz))", "foo_(bar_(baz))") + assert_parse_equals("foo_(bar_(baz))", "(foo_(bar_(baz)))") + + assert_parse_equals(";)", ";)") + assert_parse_equals("9", "(9)") + end + + should "parse basic queries correctly" do + assert_parse_equals("(and a b)", "a b") + assert_parse_equals("(or a b)", "a or b") + assert_parse_equals("(or a b)", "~a ~b") + + assert_parse_equals("(not a)", "-a") + assert_parse_equals("(and (not b) a)", "a -b") + + assert_parse_equals("fav:a", "fav:a") + assert_parse_equals("(not fav:a)", "-fav:a") + + assert_parse_equals("(and fav:a fav:b)", "fav:a fav:b") + end + + should "parse metatags correctly" do + assert_parse_equals("fav:a", "fav:a") + assert_parse_equals("user:a", "user:a") + assert_parse_equals("pool:a", "pool:a") + assert_parse_equals("order:a", "order:a") + assert_parse_equals("source:a", "source:a") + + assert_parse_equals("fav:a", "FAV:a") + assert_parse_equals("fav:A", "fav:A") + + assert_parse_equals("fav:a", "~fav:a") + assert_parse_equals("(not fav:a)", "-fav:a") + + assert_parse_equals("(and fav:a fav:b)", "fav:a fav:b") + assert_parse_equals("(or fav:a fav:b)", "~fav:a ~fav:b") + assert_parse_equals("(or fav:a fav:b)", "fav:a or fav:b") + + assert_parse_equals("fav:a", "(fav:a)") + assert_parse_equals("fav:(a)", "fav:(a)") + assert_parse_equals("fav:(a", "(fav:(a)") + + assert_parse_equals('source:foo bar', 'source:"foo bar"') + assert_parse_equals('source:foobar"(', 'source:foobar"(') + assert_parse_equals('source:', 'source:""') + assert_parse_equals(%q{source:don't say "lazy" okay}, %q{source:"don't say \"lazy\" okay"}) + assert_parse_equals(%q{(and source:foo)bar a)}, %q{(a (source:"foo)bar"))}) + end + + should "parse wildcard tags correctly" do + assert_parse_equals("(wildcard *)", "*") + assert_parse_equals("(wildcard *a)", "*a") + assert_parse_equals("(wildcard a*)", "a*") + assert_parse_equals("(wildcard *a*)", "*a*") + assert_parse_equals("(wildcard a*b)", "a*b") + + assert_parse_equals("(and b (wildcard *))", "* b") + assert_parse_equals("(and b (wildcard *a))", "*a b") + assert_parse_equals("(and b (wildcard a*))", "a* b") + assert_parse_equals("(and b (wildcard *a*))", "*a* b") + + assert_parse_equals("(and a (wildcard *))", "a *") + assert_parse_equals("(and a (wildcard *b))", "a *b") + assert_parse_equals("(and a (wildcard b*))", "a b*") + assert_parse_equals("(and a (wildcard *b*))", "a *b*") + + assert_parse_equals("(and (not (wildcard *)) a)", "a -*") + assert_parse_equals("(and (not (wildcard b*)) a)", "a -b*") + assert_parse_equals("(and (not (wildcard *b)) a)", "a -*b") + assert_parse_equals("(and (not (wildcard *b*)) a)", "a -*b*") + + assert_parse_equals("(or a (wildcard *))", "~a ~*") + assert_parse_equals("(or a (wildcard *))", "~* ~a") + assert_parse_equals("(or a (wildcard *a))", "~a ~*a") + assert_parse_equals("(or a (wildcard *a))", "~*a ~a") + + assert_parse_equals("(or a (wildcard a*))", "a or a*") + assert_parse_equals("(and a (wildcard a*))", "a and a*") + + assert_parse_equals("(and (wildcard a*) (wildcard b*))", "a* b*") + assert_parse_equals("(or (wildcard a*) (wildcard b*))", "a* or b*") + + assert_parse_equals("(and a c (wildcard b*))", "a b* c") + assert_parse_equals("(and (not (wildcard *)) a c)", "a -* c") + end + + should "parse single tag queries correctly" do + assert_parse_equals("a", "a") + assert_parse_equals("a", "a ") + assert_parse_equals("a", " a") + assert_parse_equals("a", " a ") + assert_parse_equals("a", "(a)") + assert_parse_equals("a", "( a)") + assert_parse_equals("a", "(a )") + assert_parse_equals("a", " ( a ) ") + assert_parse_equals("a", "((a))") + assert_parse_equals("a", "( ( a ) )") + assert_parse_equals("a", " ( ( a ) ) ") + end + + should "parse nested AND queries correctly" do + assert_parse_equals("(and a b)", "a b") + assert_parse_equals("(and a b)", "(a b)") + assert_parse_equals("(and a b)", "a (b)") + assert_parse_equals("(and a b)", "(a) b") + assert_parse_equals("(and a b)", "(a) (b)") + assert_parse_equals("(and a b)", "((a) (b))") + + assert_parse_equals("(and a b c)", "a b c") + assert_parse_equals("(and a b c)", "(a b) c") + assert_parse_equals("(and a b c)", "((a) b) c") + assert_parse_equals("(and a b c)", "(((a) b) c)") + assert_parse_equals("(and a b c)", "((a b) c)") + assert_parse_equals("(and a b c)", "((a) (b) (c))") + + assert_parse_equals("(and a b c)", "a (b c)") + assert_parse_equals("(and a b c)", "a (b (c))") + assert_parse_equals("(and a b c)", "(a (b (c)))") + assert_parse_equals("(and a b c)", "(a (b c))") + assert_parse_equals("(and a b c)", "(a b c)") + + assert_parse_equals("(and a b)", "a and b") + assert_parse_equals("(and a b)", "a AND b") + assert_parse_equals("(and a b)", "(a and b)") + assert_parse_equals("(and a b c)", "a and b and c") + assert_parse_equals("(and a b c)", "(a and b) and c") + assert_parse_equals("(and a b c)", "a and (b and c)") + assert_parse_equals("(and a b c)", "(a and b and c)") + end + + should "parse nested OR queries correctly" do + assert_parse_equals("(or a b)", "a or b") + assert_parse_equals("(or a b)", "a OR b") + assert_parse_equals("(or a b)", "(a or b)") + assert_parse_equals("(or a b)", "(a) or (b)") + + assert_parse_equals("(or a b c)", "a or b or c") + assert_parse_equals("(or a b c)", "(a or b) or c") + assert_parse_equals("(or a b c)", "a or (b or c)") + assert_parse_equals("(or a b c)", "(a or b or c)") + + assert_parse_equals("(or a b c d)", "a or (b or (c or d))") + assert_parse_equals("(or a b c d)", "((a or b) or c) or d") + assert_parse_equals("(or a b c d)", "(a or b) or (c or d)") + end + + should "parse the '~' operator correctly" do + assert_parse_equals("(or a b)", "~a ~b") + assert_parse_equals("(or a b c)", "~a ~b ~c") + assert_parse_equals("(or a b c d)", "~a ~b ~c ~d") + + assert_parse_equals("a", "~a") + assert_parse_equals("a", "(~a)") + assert_parse_equals("a", "~(a)") + assert_parse_equals("a", "~(~a)") + assert_parse_equals("a", "~(~(~a))") + + assert_parse_equals("(not a)", "~(-a)") + assert_parse_equals("(not a)", "-(~a)") + assert_parse_equals("a", "-(~(-(~a)))") + assert_parse_equals("a", "~(-(~(-a)))") + + assert_parse_equals("(and a b)", "a ~b") + assert_parse_equals("(and a b)", "~a b") + assert_parse_equals("(and a b)", "((a) ~b)") + assert_parse_equals("(and a b)", "~(a b)") + + assert_parse_equals("(and a b)", "~a and ~b") + assert_parse_equals("(or a b)", "~a or ~b") + assert_parse_equals("(or (not a) (not b))", "~(-a) or ~(-b)") + + assert_parse_equals("(or a b)", "~(a) ~(b)") + assert_parse_equals("(and a b)", "(~a) (~b)") + + assert_parse_equals("(and (or b c) a)", "(~a) ~b ~c") + assert_parse_equals("(and (or b c) a)", "~a (~b ~c)") + + assert_parse_equals("(or a b c d)", "~a ~b or ~c ~d") + assert_parse_equals("(and (or a b) (or c d))", "~a ~b and ~c ~d") + assert_parse_equals("(and (or a b) (or c d))", "(~a ~b) (~c ~d)") + assert_parse_equals("(and (or a c) (or a d) (or b c) (or b d))", "~(a b) ~(c d)") + assert_parse_equals("(and (or a c) (or a d) (or b c) (or b d))", "(a b) or (c d)") + + assert_parse_equals("(and a b c d)", " a b c d") + assert_parse_equals("(and a b c d)", " a b c ~d") + assert_parse_equals("(and a b c d)", " a b ~c d") + assert_parse_equals("(and (or c d) a b)", " a b ~c ~d") + assert_parse_equals("(and a b c d)", " a ~b c d") + assert_parse_equals("(and (or b d) a c)", " a ~b c ~d") + assert_parse_equals("(and (or b c) a d)", " a ~b ~c d") + assert_parse_equals("(and (or b c d) a)", " a ~b ~c ~d") + assert_parse_equals("(and a b c d)", "~a b c d") + assert_parse_equals("(and (or a d) b c)", "~a b c ~d") + assert_parse_equals("(and (or a c) b d)", "~a b ~c d") + assert_parse_equals("(and (or a c d) b)", "~a b ~c ~d") + assert_parse_equals("(and (or a b) c d)", "~a ~b c d") + assert_parse_equals("(and (or a b d) c)", "~a ~b c ~d") + assert_parse_equals("(and (or a b c) d)", "~a ~b ~c d") + assert_parse_equals("(or a b c d)", "~a ~b ~c ~d") + end + + should "parse NOT queries correctly" do + assert_parse_equals("(not a)", "-a") + + assert_parse_equals("(and (not b) a)", "(a -b)") + assert_parse_equals("(and (not b) a)", "a (-b)") + assert_parse_equals("(and (not b) a)", "((a) -b)") + end + + should "eliminate double negations" do + assert_parse_equals("(not a)", "-a") + assert_parse_equals("(not a)", "-(-(-a))") + + assert_parse_equals("a", "-(-a)") + assert_parse_equals("a", "-(-(-(-a)))") + + assert_parse_equals("(and a b c)", "a -(-(b)) c") + assert_parse_equals("(and a b c d)", "a -(-(b -(-c))) d") + end + + should "apply DeMorgan's law" do + assert_parse_equals("(or (not a) (not b))", "-(a b)") + assert_parse_equals("(and (not a) (not b))", "-(a or b)") + + assert_parse_equals("(or (not a) (not b) (not c))", "-(a b c)") + assert_parse_equals("(and (not a) (not b) (not c))", "-(a or b or c)") + + assert_parse_equals("(or a b c)", "-(-a -b -c)") + assert_parse_equals("(and a b c)", "-(-a or -b or -c)") + + assert_parse_equals("(and (or (not a) (not c) (not d)) (or (not a) b))", "-(a -(b -(c d)))") + end + + should "apply the distributive law" do + assert_parse_equals("(and (or a b) (or a c))", "a or (b c)") + assert_parse_equals("(and (or a b) (or a c))", "(b c) or a") + + assert_parse_equals("(and (or a c) (or a d) (or b c) (or b d))", "(a b) or (c d)") + + assert_parse_equals("(and (or a c e) (or a c f) (or a d e) (or a d f) (or b c e) (or b c f) (or b d e) (or b d f))", "(a b) or (c d) or (e f)") + end + + should "return the empty search for syntax errors" do + assert_parse_equals("none", "(") + assert_parse_equals("none", ")") + assert_parse_equals("none", "-") + assert_parse_equals("none", "~") + + assert_parse_equals("none", "(a") + assert_parse_equals("none", ")a") + assert_parse_equals("none", "-~a") + assert_parse_equals("none", "~-a") + assert_parse_equals("none", "~~a") + assert_parse_equals("none", "--a") + + assert_parse_equals("none", "and") + assert_parse_equals("none", "-and") + assert_parse_equals("none", "~and") + assert_parse_equals("none", "or") + assert_parse_equals("none", "-or") + assert_parse_equals("none", "~or") + assert_parse_equals("none", "a and") + assert_parse_equals("none", "a or") + assert_parse_equals("none", "and a") + assert_parse_equals("none", "or a") + + assert_parse_equals("none", "a -") + assert_parse_equals("none", "a ~") + + assert_parse_equals("none", "(a b") + assert_parse_equals("none", "(a (b)") + + assert_parse_equals("none", 'source:"foo') + assert_parse_equals("none", 'source:"foo bar') + end + end +end