Files
danbooru/test/unit/post_query_parser_test.rb
evazion bf7c721815 post queries: refactor AST #simplify method.
Refactor the `PostQuery::AST#simplify` method to split it into three
methods: `#trim` to eliminate redundant AND and OR clauses, `#simplify`
to expand deeply nested subexpressions, and `#sort` to sort the query
into alphabetical order.

This is so we can normalize queries written by users by parsing and
rewriting them, but without expanding out nested subexpressions, which
can substantially alter the way the query is written.
2022-04-04 00:48:40 -05:00

343 lines
13 KiB
Ruby

require 'test_helper'
class PostQueryParserTest < ActiveSupport::TestCase
def assert_parse_equals(expected, input)
assert_equal(expected, PostQuery::Parser.parse(input).to_cnf.to_sexp)
end
def to_infix(string)
PostQuery::Parser.parse(string).to_infix
end
context "PostQueryParser:" do
should "parse empty queries correctly" do
assert_parse_equals("all", "")
assert_parse_equals("all", " ")
end
should "parse basic tags correctly" do
assert_parse_equals("a", "a")
assert_parse_equals("a", "A")
assert_parse_equals("foo_(bar)", "foo_(bar)")
assert_parse_equals("foo_(bar)", "(foo_(bar))")
assert_parse_equals("foo_(bar_(baz))", "foo_(bar_(baz))")
assert_parse_equals("foo_(bar_(baz))", "(foo_(bar_(baz)))")
assert_parse_equals(";)", ";)")
assert_parse_equals("9", "(9)")
end
should "parse basic queries correctly" do
assert_parse_equals("(and a b)", "a b")
assert_parse_equals("(or a b)", "a or b")
assert_parse_equals("(or a b)", "~a ~b")
assert_parse_equals("(not a)", "-a")
assert_parse_equals("(and (not b) a)", "a -b")
assert_parse_equals("fav:a", "fav:a")
assert_parse_equals("(not fav:a)", "-fav:a")
assert_parse_equals("(and fav:a fav:b)", "fav:a fav:b")
end
should "parse metatags correctly" do
assert_parse_equals("fav:a", "fav:a")
assert_parse_equals("user:a", "user:a")
assert_parse_equals("pool:a", "pool:a")
assert_parse_equals("order:a", "order:a")
assert_parse_equals("source:a", "source:a")
assert_parse_equals("fav:a", "FAV:a")
assert_parse_equals("fav:A", "fav:A")
assert_parse_equals("fav:a", "~fav:a")
assert_parse_equals("(not fav:a)", "-fav:a")
assert_parse_equals("(and fav:a fav:b)", "fav:a fav:b")
assert_parse_equals("(or fav:a fav:b)", "~fav:a ~fav:b")
assert_parse_equals("(or fav:a fav:b)", "fav:a or fav:b")
assert_parse_equals("fav:a", "(fav:a)")
assert_parse_equals("fav:(a)", "fav:(a)")
assert_parse_equals("fav:(a", "(fav:(a)")
assert_parse_equals('source:"foo bar"', 'source:"foo bar"')
assert_parse_equals('source:foobar"(', 'source:foobar"(')
assert_parse_equals('source:""', 'source:""')
assert_parse_equals('source:"\""', 'source:"\""')
assert_parse_equals(%q{source:"don't say \"lazy\" okay"}, %q{source:"don't say \"lazy\" okay"})
assert_parse_equals(%q{(and source:foo)bar a)}, %q{(a (source:"foo)bar"))})
end
should "parse wildcard tags correctly" do
assert_parse_equals("(wildcard *)", "*")
assert_parse_equals("(wildcard *a)", "*a")
assert_parse_equals("(wildcard a*)", "a*")
assert_parse_equals("(wildcard *a*)", "*a*")
assert_parse_equals("(wildcard a*b)", "a*b")
assert_parse_equals("(and b (wildcard *))", "* b")
assert_parse_equals("(and b (wildcard *a))", "*a b")
assert_parse_equals("(and b (wildcard a*))", "a* b")
assert_parse_equals("(and b (wildcard *a*))", "*a* b")
assert_parse_equals("(and a (wildcard *))", "a *")
assert_parse_equals("(and a (wildcard *b))", "a *b")
assert_parse_equals("(and a (wildcard b*))", "a b*")
assert_parse_equals("(and a (wildcard *b*))", "a *b*")
assert_parse_equals("(and (not (wildcard *)) a)", "a -*")
assert_parse_equals("(and (not (wildcard b*)) a)", "a -b*")
assert_parse_equals("(and (not (wildcard *b)) a)", "a -*b")
assert_parse_equals("(and (not (wildcard *b*)) a)", "a -*b*")
assert_parse_equals("(or a (wildcard *))", "~a ~*")
assert_parse_equals("(or a (wildcard *))", "~* ~a")
assert_parse_equals("(or a (wildcard *a))", "~a ~*a")
assert_parse_equals("(or a (wildcard *a))", "~*a ~a")
assert_parse_equals("(or a (wildcard a*))", "a or a*")
assert_parse_equals("(and a (wildcard a*))", "a and a*")
assert_parse_equals("(and (wildcard a*) (wildcard b*))", "a* b*")
assert_parse_equals("(or (wildcard a*) (wildcard b*))", "a* or b*")
assert_parse_equals("(and a c (wildcard b*))", "a b* c")
assert_parse_equals("(and (not (wildcard *)) a c)", "a -* c")
end
should "parse single tag queries correctly" do
assert_parse_equals("a", "a")
assert_parse_equals("a", "a ")
assert_parse_equals("a", " a")
assert_parse_equals("a", " a ")
assert_parse_equals("a", "(a)")
assert_parse_equals("a", "( a)")
assert_parse_equals("a", "(a )")
assert_parse_equals("a", " ( a ) ")
assert_parse_equals("a", "((a))")
assert_parse_equals("a", "( ( a ) )")
assert_parse_equals("a", " ( ( a ) ) ")
end
should "parse nested AND queries correctly" do
assert_parse_equals("(and a b)", "a b")
assert_parse_equals("(and a b)", "(a b)")
assert_parse_equals("(and a b)", "a (b)")
assert_parse_equals("(and a b)", "(a) b")
assert_parse_equals("(and a b)", "(a) (b)")
assert_parse_equals("(and a b)", "((a) (b))")
assert_parse_equals("(and a b c)", "a b c")
assert_parse_equals("(and a b c)", "(a b) c")
assert_parse_equals("(and a b c)", "((a) b) c")
assert_parse_equals("(and a b c)", "(((a) b) c)")
assert_parse_equals("(and a b c)", "((a b) c)")
assert_parse_equals("(and a b c)", "((a) (b) (c))")
assert_parse_equals("(and a b c)", "a (b c)")
assert_parse_equals("(and a b c)", "a (b (c))")
assert_parse_equals("(and a b c)", "(a (b (c)))")
assert_parse_equals("(and a b c)", "(a (b c))")
assert_parse_equals("(and a b c)", "(a b c)")
assert_parse_equals("(and a b)", "a and b")
assert_parse_equals("(and a b)", "a AND b")
assert_parse_equals("(and a b)", "(a and b)")
assert_parse_equals("(and a b c)", "a and b and c")
assert_parse_equals("(and a b c)", "(a and b) and c")
assert_parse_equals("(and a b c)", "a and (b and c)")
assert_parse_equals("(and a b c)", "(a and b and c)")
end
should "parse nested OR queries correctly" do
assert_parse_equals("(or a b)", "a or b")
assert_parse_equals("(or a b)", "a OR b")
assert_parse_equals("(or a b)", "(a or b)")
assert_parse_equals("(or a b)", "(a) or (b)")
assert_parse_equals("(or a b c)", "a or b or c")
assert_parse_equals("(or a b c)", "(a or b) or c")
assert_parse_equals("(or a b c)", "a or (b or c)")
assert_parse_equals("(or a b c)", "(a or b or c)")
assert_parse_equals("(or a b c d)", "a or (b or (c or d))")
assert_parse_equals("(or a b c d)", "((a or b) or c) or d")
assert_parse_equals("(or a b c d)", "(a or b) or (c or d)")
end
should "parse the '~' operator correctly" do
assert_parse_equals("(or a b)", "~a ~b")
assert_parse_equals("(or a b c)", "~a ~b ~c")
assert_parse_equals("(or a b c d)", "~a ~b ~c ~d")
assert_parse_equals("a", "~a")
assert_parse_equals("a", "(~a)")
assert_parse_equals("a", "~(a)")
assert_parse_equals("a", "~(~a)")
assert_parse_equals("a", "~(~(~a))")
assert_parse_equals("(not a)", "~(-a)")
assert_parse_equals("(not a)", "-(~a)")
assert_parse_equals("a", "-(~(-(~a)))")
assert_parse_equals("a", "~(-(~(-a)))")
assert_parse_equals("(and a b)", "a ~b")
assert_parse_equals("(and a b)", "~a b")
assert_parse_equals("(and a b)", "((a) ~b)")
assert_parse_equals("(and a b)", "~(a b)")
assert_parse_equals("(and a b)", "~a and ~b")
assert_parse_equals("(or a b)", "~a or ~b")
assert_parse_equals("(or (not a) (not b))", "~(-a) or ~(-b)")
assert_parse_equals("(or a b)", "~(a) ~(b)")
assert_parse_equals("(and a b)", "(~a) (~b)")
assert_parse_equals("(and (or b c) a)", "(~a) ~b ~c")
assert_parse_equals("(and (or b c) a)", "~a (~b ~c)")
assert_parse_equals("(or a b c d)", "~a ~b or ~c ~d")
assert_parse_equals("(and (or a b) (or c d))", "~a ~b and ~c ~d")
assert_parse_equals("(and (or a b) (or c d))", "(~a ~b) (~c ~d)")
assert_parse_equals("(and (or a c) (or a d) (or b c) (or b d))", "~(a b) ~(c d)")
assert_parse_equals("(and (or a c) (or a d) (or b c) (or b d))", "(a b) or (c d)")
assert_parse_equals("(and a b c d)", " a b c d")
assert_parse_equals("(and a b c d)", " a b c ~d")
assert_parse_equals("(and a b c d)", " a b ~c d")
assert_parse_equals("(and (or c d) a b)", " a b ~c ~d")
assert_parse_equals("(and a b c d)", " a ~b c d")
assert_parse_equals("(and (or b d) a c)", " a ~b c ~d")
assert_parse_equals("(and (or b c) a d)", " a ~b ~c d")
assert_parse_equals("(and (or b c d) a)", " a ~b ~c ~d")
assert_parse_equals("(and a b c d)", "~a b c d")
assert_parse_equals("(and (or a d) b c)", "~a b c ~d")
assert_parse_equals("(and (or a c) b d)", "~a b ~c d")
assert_parse_equals("(and (or a c d) b)", "~a b ~c ~d")
assert_parse_equals("(and (or a b) c d)", "~a ~b c d")
assert_parse_equals("(and (or a b d) c)", "~a ~b c ~d")
assert_parse_equals("(and (or a b c) d)", "~a ~b ~c d")
assert_parse_equals("(or a b c d)", "~a ~b ~c ~d")
end
should "parse NOT queries correctly" do
assert_parse_equals("(not a)", "-a")
assert_parse_equals("(and (not b) a)", "(a -b)")
assert_parse_equals("(and (not b) a)", "a (-b)")
assert_parse_equals("(and (not b) a)", "((a) -b)")
end
should "eliminate double negations" do
assert_parse_equals("(not a)", "-a")
assert_parse_equals("(not a)", "-(-(-a))")
assert_parse_equals("a", "-(-a)")
assert_parse_equals("a", "-(-(-(-a)))")
assert_parse_equals("(and a b c)", "a -(-(b)) c")
assert_parse_equals("(and a b c d)", "a -(-(b -(-c))) d")
end
should "apply DeMorgan's law" do
assert_parse_equals("(or (not a) (not b))", "-(a b)")
assert_parse_equals("(and (not a) (not b))", "-(a or b)")
assert_parse_equals("(or (not a) (not b) (not c))", "-(a b c)")
assert_parse_equals("(and (not a) (not b) (not c))", "-(a or b or c)")
assert_parse_equals("(or a b c)", "-(-a -b -c)")
assert_parse_equals("(and a b c)", "-(-a or -b or -c)")
assert_parse_equals("(and (or (not a) (not c) (not d)) (or (not a) b))", "-(a -(b -(c d)))")
end
should "apply the distributive law" do
assert_parse_equals("(and (or a b) (or a c))", "a or (b c)")
assert_parse_equals("(and (or a b) (or a c))", "(b c) or a")
assert_parse_equals("(and (or a c) (or a d) (or b c) (or b d))", "(a b) or (c d)")
assert_parse_equals("(and (or a c e) (or a c f) (or a d e) (or a d f) (or b c e) (or b c f) (or b d e) (or b d f))", "(a b) or (c d) or (e f)")
end
should "return the empty search for syntax errors" do
assert_parse_equals("none", "(")
assert_parse_equals("none", ")")
assert_parse_equals("none", "-")
assert_parse_equals("none", "~")
assert_parse_equals("none", "(a")
assert_parse_equals("none", ")a")
assert_parse_equals("none", "-~a")
assert_parse_equals("none", "~-a")
assert_parse_equals("none", "~~a")
assert_parse_equals("none", "--a")
assert_parse_equals("none", "and")
assert_parse_equals("none", "-and")
assert_parse_equals("none", "~and")
assert_parse_equals("none", "or")
assert_parse_equals("none", "-or")
assert_parse_equals("none", "~or")
assert_parse_equals("none", "a and")
assert_parse_equals("none", "a or")
assert_parse_equals("none", "and a")
assert_parse_equals("none", "or a")
assert_parse_equals("none", "a -")
assert_parse_equals("none", "a ~")
assert_parse_equals("none", "(a b")
assert_parse_equals("none", "(a (b)")
assert_parse_equals("none", 'source:"foo')
assert_parse_equals("none", 'source:"foo bar')
end
context "#to_infix" do
should "work" do
assert_equal("", to_infix(""))
assert_equal("", to_infix(" "))
assert_equal("a", to_infix("a"))
assert_equal("a b", to_infix("a b"))
assert_equal("a b c", to_infix("a b c"))
assert_equal("-a", to_infix("-a"))
assert_equal("-(-a)", to_infix("-(-a)"))
assert_equal("-(-(-a))", to_infix("-(-(-a))"))
assert_equal("~a", to_infix("~a"))
assert_equal("~a ~b", to_infix("~a ~b"))
assert_equal("~a ~(b ~c)", to_infix("~a ~(b ~c)"))
assert_equal("a b", to_infix("a and b"))
assert_equal("a (b c)", to_infix("a and b and c"))
assert_equal("a (b (c d))", to_infix("a and b and c and d"))
assert_equal("a or b", to_infix("a or b"))
assert_equal("a or (b or c)", to_infix("a or b or c"))
assert_equal("a or (b or (c or d))", to_infix("a or b or c or d"))
assert_equal("(a b) or (c d)", to_infix("a b or c d"))
assert_equal("(~a ~b) (~c ~d)", to_infix("~a ~b and ~c ~d"))
assert_equal("(a or b) (c or d)", to_infix("(a or b) and (c or d)"))
assert_equal("a source:b", to_infix("a source:b"))
assert_equal('a source:"b c"', to_infix('a source:"b c"'))
assert_equal('a source:"\\""', to_infix('a source:"\\""'))
assert_equal('a source:""', to_infix('a source:""'))
assert_equal("a* b", to_infix("a* b"))
assert_equal("none", to_infix("("))
end
end
end
end