Fix #4978: -approver:username implicitly adds approver:any

Fix the approver:, parent:, and pixiv: metatags not working correctly when negated:

* Fix -approver:<name> not including posts that don't have an approver (the approver_id is NULL)
* Fix -parent:<id> not including posts that don't have a parent (the parent_id is NULL)
* Fix -pixiv:<id> not including posts that aren't from Pixiv (the pixiv_id is NULL)

The problem lies how the equality operator is negated when the column contains NULL values;
`approver_id != 52664` doesn't match posts where the `approver_id` is NULL.

The search `approver:evazion` boils down to:

    # Post.where(approver_id: 52664).to_sql
    SELECT * FROM posts WHERE approver_id = 52664;

When that is negated with `-approver:evazion`, it becomes:

    # Post.where(approver_id: 52664).invert_where.to_sql
    SELECT * FROM posts WHERE approver_id != 52664;

But in SQL, `approver_id != 52664` doesn't match when the approver_id IS NULL, so the search doesn't
include posts without an approver.

We could use `a IS NOT DISTINCT FROM b` instead of `a = b`:

    # Post.where(Post.arel_table[:approver_id].is_not_distinct_from(52664)).to_sql
    SELECT * FROM posts WHERE approver_id IS NOT DISTINCT FROM 52664;

This way when it's inverted it becomes `IS DISTINCT FROM`:

    # Post.where(Post.arel_table[:approver_id].is_not_distinct_from(52664)).invert_where.to_sql
    SELECT * FROM posts WHERE approver_id IS NOT DISTINCT FROM 52664;

`approver_id IS DISTINCT FROM 52664` is like `approver_id != 52664`, except it matches when
approver_id is NULL [1].

This works correctly, however the problem is that `IS NOT DISTINCT FROM` can't use indexes because
of a long-standing Postgres limitation [2]. This makes searches too slow. So instead we do this:

    # Post.where(approver_id: 52664).where.not(approver_id: nil).to_sql
    SELECT * FROM posts WHERE approver_id = 52664 AND approver_id IS NOT NULL;

That way when negated it becomes:

    # Post.where(approver_id: 52664).where.not(approver_id: nil).invert_where.to_sql
    SELECT * FROM posts WHERE approver_id != 52664 OR approver_id IS NULL;

Which is the correct behavior.

[1] https://modern-sql.com/feature/is-distinct-from
[2] https://www.postgresql.org/message-id/6FC83909-5DB1-420F-9191-DBE533A3CEDE@excoventures.com
This commit is contained in:
evazion
2022-09-26 19:28:10 -05:00
parent a399ab1060
commit 65dbd89e25
3 changed files with 61 additions and 25 deletions

View File

@@ -132,8 +132,7 @@ module Searchable
def where_array_count(attr, value)
qualified_column = "cardinality(#{qualified_column_for(attr)})"
range = PostQueryBuilder.parse_range(value, :integer)
where_operator(qualified_column, *range)
where_numeric_matches(qualified_column, value)
end
# @param attr [String] the name of the JSON field
@@ -172,8 +171,7 @@ module Searchable
# value: "5", ">5", "<5", ">=5", "<=5", "5..10", "5,6,7"
def where_numeric_matches(attribute, value, type = :integer)
range = PostQueryBuilder.parse_range(value, type)
where_operator(attribute, *range)
attribute_matches(value, attribute, type)
end
def where_boolean_matches(attribute, value)
@@ -202,6 +200,25 @@ module Searchable
end
end
def attribute_matches(value, field, type = :integer)
operator, *args = PostQueryBuilder.parse_metatag_value(value, type)
relation = where_operator(field, operator, *args)
# XXX Hack to make negating the equality operator work correctly on nullable columns.
#
# This makes `Post.attribute_matches(1, :approver_id)` produce `WHERE approver_id = 1 AND approver_id IS NOT NULL`.
# This way if the relation is negated with `Post.attribute_matches(1, :approver_id).negate_relation`, it will
# produce `WHERE approver_id != 1 OR approver_id IS NULL`. This is so the search includes NULL values; if it
# was just `approver_id != 1`, then it would not include when approver_id is NULL.
if (operator in :eq | :not_eq) && args[0] != nil && has_attribute?(field) && column_for_attribute(field).null
relation = relation.where.not(field => nil)
end
relation
rescue PostQueryBuilder::ParseError
none
end
def search_attributes(params, attributes, current_user:)
SearchContext.new(all, params, current_user).search_attributes(attributes)
end

View File

@@ -1061,13 +1061,6 @@ class Post < ApplicationRecord
end
end
def attribute_matches(value, field, type = :integer)
operator, *args = PostQueryBuilder.parse_metatag_value(value, type)
where_operator(field, operator, *args)
rescue PostQueryBuilder::ParseError
none
end
def is_matches(value, current_user = User.anonymous)
case value.downcase
when "parent"
@@ -1148,7 +1141,8 @@ class Post < ApplicationRecord
when "pending", "flagged", "appealed", "modqueue", "deleted", "banned", "active", "unmoderated"
where.not(parent: nil).where(parent: status_matches(parent))
when /\A\d+\z/
where(id: parent).or(where(parent: parent))
# XXX must use `attribute_matches(parent, :parent_id)` instead of `where(parent_id: parent)` so that `-parent:1` works
where(id: parent).or(attribute_matches(parent, :parent_id))
else
none
end
@@ -1348,7 +1342,9 @@ class Post < ApplicationRecord
else
user = User.find_by_name(username)
return none if user.nil?
where(approver: user)
# XXX must use `attribute_matches(user.id, :approver_id)` instead of `where(approver: user)` so that `-approver:evazion` works
attribute_matches(user.id, :approver_id)
end
end

View File

@@ -331,30 +331,34 @@ class PostQueryBuilderTest < ActiveSupport::TestCase
end
should "return posts for the parent:<N> metatag" do
post = create(:post)
parent = create(:post)
child = create(:post, tag_string: "parent:#{parent.id}")
child = create(:post, parent: parent)
assert_tag_match([parent], "parent:none")
assert_tag_match([parent, post], "parent:none")
assert_tag_match([child], "-parent:none")
assert_tag_match([child], "parent:any")
assert_tag_match([parent], "-parent:any")
assert_tag_match([parent, post], "-parent:any")
assert_tag_match([child, parent], "parent:#{parent.id}")
assert_tag_match([child], "parent:#{child.id}")
assert_tag_match([], "-parent:#{parent.id}")
assert_tag_match([], "-parent:#{child.id}")
assert_tag_match([post], "-parent:#{parent.id}")
assert_tag_match([parent, post], "-parent:#{child.id}")
assert_tag_match([child], "parent:#{parent.id} parent:#{child.id}")
assert_tag_match([child], "child:none")
assert_tag_match([], "parent:garbage")
assert_tag_match([child, parent, post], "-parent:garbage")
assert_tag_match([child, post], "child:none")
assert_tag_match([parent], "child:any")
assert_tag_match([], "child:garbage")
assert_tag_match([parent], "-child:none")
assert_tag_match([child], "-child:any")
assert_tag_match([child, parent], "-child:garbage")
assert_tag_match([child, post], "-child:any")
assert_tag_match([child, parent, post], "-child:garbage")
end
should "return posts when using the status of the parent/child" do
@@ -442,7 +446,8 @@ class PostQueryBuilderTest < ActiveSupport::TestCase
posts << create(:post, approver: nil)
assert_tag_match([posts[0]], "approver:#{users[0].name}")
assert_tag_match([posts[1]], "-approver:#{users[0].name}")
assert_tag_match([posts[2], posts[1]], "-approver:#{users[0].name}")
assert_tag_match([posts[2], posts[0]], "-approver:#{users[1].name}")
assert_tag_match([posts[1], posts[0]], "approver:any")
assert_tag_match([posts[2]], "approver:none")
assert_tag_match([posts[2]], "approver:NONE")
@@ -1022,9 +1027,27 @@ class PostQueryBuilderTest < ActiveSupport::TestCase
assert_tag_match([post], "pixiv_id:any")
end
should "return posts for a pixiv_id:none search" do
post = create(:post)
assert_tag_match([post], "pixiv_id:none")
should "return posts for a pixiv_id: search" do
post1 = create(:post, pixiv_id: nil)
post2 = create(:post, pixiv_id: 42, source: "http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/42_p0.png")
assert_tag_match([post2], "pixiv_id:42")
assert_tag_match([post1], "-pixiv_id:42")
assert_tag_match([post2], "pixiv_id:>=42")
assert_tag_match([], "pixiv_id:<42")
assert_tag_match([], "-pixiv_id:>=42")
assert_tag_match([post2], "-pixiv_id:<42")
assert_tag_match([post1], "pixiv_id:none")
assert_tag_match([post2], "pixiv_id:any")
assert_tag_match([post2], "-pixiv_id:none")
assert_tag_match([post1], "-pixiv_id:any")
assert_tag_match([post1], "pixiv:none")
assert_tag_match([post2], "pixiv:any")
end
should "return posts for the search: metatag" do