From bf7c721815752836a0d665d0f5666eb611ec5eda Mon Sep 17 00:00:00 2001 From: evazion Date: Mon, 4 Apr 2022 00:48:40 -0500 Subject: [PATCH] post queries: refactor AST `#simplify` method. Refactor the `PostQuery::AST#simplify` method to split it into three methods: `#trim` to eliminate redundant AND and OR clauses, `#simplify` to expand deeply nested subexpressions, and `#sort` to sort the query into alphabetical order. This is so we can normalize queries written by users by parsing and rewriting them, but without expanding out nested subexpressions, which can substantially alter the way the query is written. --- app/logical/post_query/ast.rb | 227 +++++++++++++++++----------- test/unit/post_query_parser_test.rb | 2 +- 2 files changed, 137 insertions(+), 92 deletions(-) diff --git a/app/logical/post_query/ast.rb b/app/logical/post_query/ast.rb index b5a1ef969..56266b89e 100644 --- a/app/logical/post_query/ast.rb +++ b/app/logical/post_query/ast.rb @@ -16,12 +16,13 @@ # * :not (a unary NOT clause) # * :opt (the unary `~`, or 'optional' operator) # -# The AST returned by the parser is normally simplified with `#simplify` before +# The AST returned by the parser is normally rewritten with `#to_cnf` before # it's used. This is for several reasons: # # * To replace the `~` operator with `or` clauses. # * To remove redundant `and` and `or` nodes. -# * To normalize the AST to conjunctive normal form. +# * To transform the AST to conjunctive normal form. +# * To sort the AST into alphabetical order. # # @example # @@ -31,7 +32,7 @@ # PostQuery::Parser.parse("cat_girl or (cat_ears tail)").to_sexp # => "(or (and cat_girl) (and (and cat_ears tail)))" # -# PostQuery::Parser.parse("cat_girl or (cat_ears tail)").simplify.to_sexp +# PostQuery::Parser.parse("cat_girl or (cat_ears tail)").to_cnf.to_sexp # => "(and (or cat_ears cat_girl) (or cat_girl tail))" class PostQuery @@ -53,103 +54,118 @@ class PostQuery @args = args end + # Create an AST node. + def node(type, *args) + AST.new(type, args) + end + concerning :SimplificationMethods do - # Simplify the AST by rewriting `~` to `or` clauses, and by reducing it to - # conjunctive normal form (that is, product-of-sums form, or an AND of ORs). + # Convert the AST to conjunctive normal form, that is, product-of-sums + # form, or an AND of ORs. The result is a single top-level AND clause, + # containing a series of tags, metatags, and OR clauses, with no deeply + # nested subexpressions. # - # The algorithm is to repeatedly apply the rules of Boolean algebra, one - # at a time in a top-down fashion, until the AST can't be simplified any more. - # - # @return [AST] A new simplified AST - def simplify - old_ast = nil - new_ast = rewrite_opts - - until new_ast == old_ast - old_ast = new_ast - new_ast = old_ast.simplify_once - end - - new_ast - end - - # Simplify the AST once by applying the rules of Boolean algebra in a single top-down pass. - # - # @return [AST] A new simplified AST - def simplify_once - case self - - # (and A) = A - in [:and, a] - a - - # (or A) = A - in [:or, a] - a - - # Double negation: -(-A) = A - in [:not, [:not, a]] - a - - # DeMorgan's law: -(A and B) = -A or -B - in [:not, [:and, *args]] - node(:or, *args.map { node(:not, _1) }) - - # DeMorgan's law: -(A or B) = -A and -B - in [:not, [:or, *args]] - node(:and, *args.map { node(:not, _1) }) - - # Associative law: (or (or A B) C) = (or A B C) - in [:or, *args] if args.any?(&:or?) - ors, others = args.partition(&:or?) - node(:or, *ors.flat_map(&:args), *others) - - # Associative law: (and (and A B) C) = (and A B C) - in [:and, *args] if args.any?(&:and?) - ands, others = args.partition(&:and?) - node(:and, *ands.flat_map(&:args), *others) - - # Distributive law: A or (B and C) = (A or B) and (A or C) - # (or A (and B C ...) ... = (and (or A B ...) (or A C ...) ... - in [:or, *args] if args.any?(&:and?) - ands, others = args.partition(&:and?) - first, rest = ands.first, ands[1..] + others - node(:and, *first.args.map { node(:or, _1, *rest) }) - - in [:not, arg] - node(:not, arg.simplify_once) - - in [:and, *args] - node(:and, *args.map(&:simplify_once)) - - in [:or, *args] - node(:or, *args.map(&:simplify_once)) - - else - self - end + # @return [AST] A new AST in conjunctive normal form. + def to_cnf + rewrite_opts.simplify.sort end # Rewrite the `~` operator to `or` clauses. # # @return [AST] A new AST with `:opt` nodes replaced with `:or` nodes. def rewrite_opts - # ... ~A ~B ... = ... (or A B) ... - # ... ~A ... = ... (or A) ... = ... A ... - if children.any?(&:opt?) - opts, non_opts = children.partition(&:opt?) - or_node = node(:or, *opts.flat_map(&:children)) - node(type, or_node, *non_opts).rewrite_opts - elsif children.any? - node(type, *children.map(&:rewrite_opts)) - else - self + rewrite do |ast| + # ... ~A ~B ... = ... (or A B) ... + # ... ~A ... = ... (or A) ... = ... A ... + if ast.children.any?(&:opt?) + opts, non_opts = ast.children.partition(&:opt?) + or_node = node(:or, *opts.flat_map(&:children)) + node(ast.type, or_node, *non_opts) + else + ast + end end end - # Create a new AST node, sorting the child nodes so that the AST is normalized to a consistent form. - def node(type, *args) - AST.new(type, args.sort) + # Simplify the AST by eliminating unnecessary AND and OR nodes, and by + # expanding out deeply nested subexpressions. The result is an AST in + # conjunctive normal form. + # + # @return [AST] A new AST in conjunctive normal form. + def simplify + repeat_until_unchanged do |ast| + ast.trim_once.simplify_once + end + end + + # Simplify the AST once in a single top-down pass by applying the double + # negation law, DeMorgan's law, and the distributive law. This expands + # out deeply nested subexpressions. + # + # @return [AST] A new simplified AST + def simplify_once + rewrite do |ast| + case ast + + # Double negation: -(-A) = A + in [:not, [:not, a]] + a + + # DeMorgan's law: -(A and B) = -A or -B + in [:not, [:and, *children]] + node(:or, *children.map { node(:not, _1) }) + + # DeMorgan's law: -(A or B) = -A and -B + in [:not, [:or, *children]] + node(:and, *children.map { node(:not, _1) }) + + # Distributive law: A or (B and C) = (A or B) and (A or C) + # (or A (and B C ...) ... = (and (or A B ...) (or A C ...) ... + in [:or, *children] if children.any?(&:and?) + ands, non_ands = children.partition(&:and?) + first_and, rest = ands.first, ands[1..] + non_ands + node(:and, *first_and.children.map { node(:or, _1, *rest) }) + + else + ast + end + end + end + + # Trim the AST by eliminating redundant AND and OR clauses. + def trim + repeat_until_unchanged(&:trim_once) + end + + def trim_once + rewrite do |ast| + case ast + + # (and A) = A; (or A) = A + in :and | :or, a + a + + # Associative law: (and (and A B) C) = (and A B C) + in :and, *children + node(:and, *children.flat_map { _1.and? ? _1.children : _1 }) + + # Associative law: (or (or A B) C) = (or A B C) + in :or, *children + node(:or, *children.flat_map { _1.or? ? _1.children : _1 }) + + else + ast + end + end + end + + # Sort the AST into alphabetical order. + def sort + if children.present? + node(type, *children.map(&:sort).sort) + else + self + end end end @@ -210,7 +226,7 @@ class PostQuery end end - concerning :UtilityMethods do + concerning :TraversalMethods do # Traverse the AST in depth-first left-to-right order, calling the block on each # node and passing it the current node and the results from visiting each subtree. def visit(&block) @@ -227,6 +243,35 @@ class PostQuery self end + # Rewrite the AST by calling the block on each node and replacing the node with the result. + def rewrite(&block) + ast = yield self + + if ast.children.any? + node(ast.type, *ast.children.map { _1.rewrite(&block) } ) + else + ast + end + end + + # Call the block on the AST repeatedly until the output stops changing. + # + # `ast.repeat_until_unchanged(&:trim)` is like doing `ast.trim.trim.trim...` + # until the AST can't be trimmed any more. + def repeat_until_unchanged(&block) + old = nil + new = self + + until new == old + old = new + new = yield old + end + + new + end + end + + concerning :UtilityMethods do # @return [Array] A flat list of all the nodes in the AST, in depth-first left-to-right order. def nodes each.map @@ -298,6 +343,6 @@ class PostQuery end end - memoize :simplify, :simplify_once, :rewrite_opts, :inquirer, :deconstruct, :inspect, :to_sexp, :to_infix, :to_tree, :nodes, :tags, :metatags, :tag_names + memoize :to_cnf, :simplify, :simplify_once, :rewrite_opts, :trim, :trim_once, :sort, :inquirer, :deconstruct, :inspect, :to_sexp, :to_infix, :to_tree, :nodes, :tags, :metatags, :tag_names end end diff --git a/test/unit/post_query_parser_test.rb b/test/unit/post_query_parser_test.rb index 9b477b01e..4f812da83 100644 --- a/test/unit/post_query_parser_test.rb +++ b/test/unit/post_query_parser_test.rb @@ -2,7 +2,7 @@ require 'test_helper' class PostQueryParserTest < ActiveSupport::TestCase def assert_parse_equals(expected, input) - assert_equal(expected, PostQuery::Parser.parse(input).simplify.to_sexp) + assert_equal(expected, PostQuery::Parser.parse(input).to_cnf.to_sexp) end def to_infix(string)