post queries: refactor AST #simplify method.

Refactor the `PostQuery::AST#simplify` method to split it into three
methods: `#trim` to eliminate redundant AND and OR clauses, `#simplify`
to expand deeply nested subexpressions, and `#sort` to sort the query
into alphabetical order.

This is so we can normalize queries written by users by parsing and
rewriting them, but without expanding out nested subexpressions, which
can substantially alter the way the query is written.
This commit is contained in:
evazion
2022-04-04 00:48:40 -05:00
parent 8055c4f172
commit bf7c721815
2 changed files with 137 additions and 92 deletions

View File

@@ -16,12 +16,13 @@
# * :not (a unary NOT clause)
# * :opt (the unary `~`, or 'optional' operator)
#
# The AST returned by the parser is normally simplified with `#simplify` before
# The AST returned by the parser is normally rewritten with `#to_cnf` before
# it's used. This is for several reasons:
#
# * To replace the `~` operator with `or` clauses.
# * To remove redundant `and` and `or` nodes.
# * To normalize the AST to conjunctive normal form.
# * To transform the AST to conjunctive normal form.
# * To sort the AST into alphabetical order.
#
# @example
#
@@ -31,7 +32,7 @@
# PostQuery::Parser.parse("cat_girl or (cat_ears tail)").to_sexp
# => "(or (and cat_girl) (and (and cat_ears tail)))"
#
# PostQuery::Parser.parse("cat_girl or (cat_ears tail)").simplify.to_sexp
# PostQuery::Parser.parse("cat_girl or (cat_ears tail)").to_cnf.to_sexp
# => "(and (or cat_ears cat_girl) (or cat_girl tail))"
class PostQuery
@@ -53,103 +54,118 @@ class PostQuery
@args = args
end
# Create an AST node.
def node(type, *args)
AST.new(type, args)
end
concerning :SimplificationMethods do
# Simplify the AST by rewriting `~` to `or` clauses, and by reducing it to
# conjunctive normal form (that is, product-of-sums form, or an AND of ORs).
# Convert the AST to conjunctive normal form, that is, product-of-sums
# form, or an AND of ORs. The result is a single top-level AND clause,
# containing a series of tags, metatags, and OR clauses, with no deeply
# nested subexpressions.
#
# The algorithm is to repeatedly apply the rules of Boolean algebra, one
# at a time in a top-down fashion, until the AST can't be simplified any more.
#
# @return [AST] A new simplified AST
def simplify
old_ast = nil
new_ast = rewrite_opts
until new_ast == old_ast
old_ast = new_ast
new_ast = old_ast.simplify_once
end
new_ast
end
# Simplify the AST once by applying the rules of Boolean algebra in a single top-down pass.
#
# @return [AST] A new simplified AST
def simplify_once
case self
# (and A) = A
in [:and, a]
a
# (or A) = A
in [:or, a]
a
# Double negation: -(-A) = A
in [:not, [:not, a]]
a
# DeMorgan's law: -(A and B) = -A or -B
in [:not, [:and, *args]]
node(:or, *args.map { node(:not, _1) })
# DeMorgan's law: -(A or B) = -A and -B
in [:not, [:or, *args]]
node(:and, *args.map { node(:not, _1) })
# Associative law: (or (or A B) C) = (or A B C)
in [:or, *args] if args.any?(&:or?)
ors, others = args.partition(&:or?)
node(:or, *ors.flat_map(&:args), *others)
# Associative law: (and (and A B) C) = (and A B C)
in [:and, *args] if args.any?(&:and?)
ands, others = args.partition(&:and?)
node(:and, *ands.flat_map(&:args), *others)
# Distributive law: A or (B and C) = (A or B) and (A or C)
# (or A (and B C ...) ... = (and (or A B ...) (or A C ...) ...
in [:or, *args] if args.any?(&:and?)
ands, others = args.partition(&:and?)
first, rest = ands.first, ands[1..] + others
node(:and, *first.args.map { node(:or, _1, *rest) })
in [:not, arg]
node(:not, arg.simplify_once)
in [:and, *args]
node(:and, *args.map(&:simplify_once))
in [:or, *args]
node(:or, *args.map(&:simplify_once))
else
self
end
# @return [AST] A new AST in conjunctive normal form.
def to_cnf
rewrite_opts.simplify.sort
end
# Rewrite the `~` operator to `or` clauses.
#
# @return [AST] A new AST with `:opt` nodes replaced with `:or` nodes.
def rewrite_opts
# ... ~A ~B ... = ... (or A B) ...
# ... ~A ... = ... (or A) ... = ... A ...
if children.any?(&:opt?)
opts, non_opts = children.partition(&:opt?)
or_node = node(:or, *opts.flat_map(&:children))
node(type, or_node, *non_opts).rewrite_opts
elsif children.any?
node(type, *children.map(&:rewrite_opts))
else
self
rewrite do |ast|
# ... ~A ~B ... = ... (or A B) ...
# ... ~A ... = ... (or A) ... = ... A ...
if ast.children.any?(&:opt?)
opts, non_opts = ast.children.partition(&:opt?)
or_node = node(:or, *opts.flat_map(&:children))
node(ast.type, or_node, *non_opts)
else
ast
end
end
end
# Create a new AST node, sorting the child nodes so that the AST is normalized to a consistent form.
def node(type, *args)
AST.new(type, args.sort)
# Simplify the AST by eliminating unnecessary AND and OR nodes, and by
# expanding out deeply nested subexpressions. The result is an AST in
# conjunctive normal form.
#
# @return [AST] A new AST in conjunctive normal form.
def simplify
repeat_until_unchanged do |ast|
ast.trim_once.simplify_once
end
end
# Simplify the AST once in a single top-down pass by applying the double
# negation law, DeMorgan's law, and the distributive law. This expands
# out deeply nested subexpressions.
#
# @return [AST] A new simplified AST
def simplify_once
rewrite do |ast|
case ast
# Double negation: -(-A) = A
in [:not, [:not, a]]
a
# DeMorgan's law: -(A and B) = -A or -B
in [:not, [:and, *children]]
node(:or, *children.map { node(:not, _1) })
# DeMorgan's law: -(A or B) = -A and -B
in [:not, [:or, *children]]
node(:and, *children.map { node(:not, _1) })
# Distributive law: A or (B and C) = (A or B) and (A or C)
# (or A (and B C ...) ... = (and (or A B ...) (or A C ...) ...
in [:or, *children] if children.any?(&:and?)
ands, non_ands = children.partition(&:and?)
first_and, rest = ands.first, ands[1..] + non_ands
node(:and, *first_and.children.map { node(:or, _1, *rest) })
else
ast
end
end
end
# Trim the AST by eliminating redundant AND and OR clauses.
def trim
repeat_until_unchanged(&:trim_once)
end
def trim_once
rewrite do |ast|
case ast
# (and A) = A; (or A) = A
in :and | :or, a
a
# Associative law: (and (and A B) C) = (and A B C)
in :and, *children
node(:and, *children.flat_map { _1.and? ? _1.children : _1 })
# Associative law: (or (or A B) C) = (or A B C)
in :or, *children
node(:or, *children.flat_map { _1.or? ? _1.children : _1 })
else
ast
end
end
end
# Sort the AST into alphabetical order.
def sort
if children.present?
node(type, *children.map(&:sort).sort)
else
self
end
end
end
@@ -210,7 +226,7 @@ class PostQuery
end
end
concerning :UtilityMethods do
concerning :TraversalMethods do
# Traverse the AST in depth-first left-to-right order, calling the block on each
# node and passing it the current node and the results from visiting each subtree.
def visit(&block)
@@ -227,6 +243,35 @@ class PostQuery
self
end
# Rewrite the AST by calling the block on each node and replacing the node with the result.
def rewrite(&block)
ast = yield self
if ast.children.any?
node(ast.type, *ast.children.map { _1.rewrite(&block) } )
else
ast
end
end
# Call the block on the AST repeatedly until the output stops changing.
#
# `ast.repeat_until_unchanged(&:trim)` is like doing `ast.trim.trim.trim...`
# until the AST can't be trimmed any more.
def repeat_until_unchanged(&block)
old = nil
new = self
until new == old
old = new
new = yield old
end
new
end
end
concerning :UtilityMethods do
# @return [Array<AST>] A flat list of all the nodes in the AST, in depth-first left-to-right order.
def nodes
each.map
@@ -298,6 +343,6 @@ class PostQuery
end
end
memoize :simplify, :simplify_once, :rewrite_opts, :inquirer, :deconstruct, :inspect, :to_sexp, :to_infix, :to_tree, :nodes, :tags, :metatags, :tag_names
memoize :to_cnf, :simplify, :simplify_once, :rewrite_opts, :trim, :trim_once, :sort, :inquirer, :deconstruct, :inspect, :to_sexp, :to_infix, :to_tree, :nodes, :tags, :metatags, :tag_names
end
end

View File

@@ -2,7 +2,7 @@ require 'test_helper'
class PostQueryParserTest < ActiveSupport::TestCase
def assert_parse_equals(expected, input)
assert_equal(expected, PostQuery::Parser.parse(input).simplify.to_sexp)
assert_equal(expected, PostQuery::Parser.parse(input).to_cnf.to_sexp)
end
def to_infix(string)