post queries: switch to new post search engine.

Switch to the post search engine using the new PostQuery parser. The new
engine fully supports AND, OR, and NOT operators and grouping expressions
with parentheses.

Highlights:

New OR operator:

* `skirt or dress` (same as `~skirt ~dress`)

Tags can be grouped with parentheses:

* `1girl (skirt or dress)`
* `(blonde_hair blue_eyes) or (red_hair green_eyes)`
* `~(blonde_hair blue_eyes) ~(red_hair green_eyes)` (same as above)
* `(pantyhose or thighhighs) (black_legwear or brown_legwear)`
* `(~pantyhose ~thighhighs) (~black_legwear ~brown_legwear)` (same as above)

Metatags can be OR'd together:

* `user:evazion or fav:evazion`
* `~user:evazion ~fav:evazion`

Wildcard tags can combined with either AND or OR:

* `black_* white_*` (find posts with at least one black_* tag AND one white_* tag)
* `black_* or white_*` (find posts with at least one black_* tag OR one white_* tag)
* `~black_* ~white_*` (same as above)

See 4c7cfc73 for more syntax examples.

Fixes #4949: And+or search?
Fixes #5056: Wildcard searches return unexpected results when combined with OR searches
This commit is contained in:
evazion
2022-04-04 16:52:11 -05:00
parent 703fd05025
commit af183467b6
11 changed files with 227 additions and 387 deletions

View File

@@ -273,7 +273,7 @@ class BulkUpdateRequestProcessor
"mass update {{#{args[0]}}} -> {{#{args[1]}}}" "mass update {{#{args[0]}}} -> {{#{args[1]}}}"
when :nuke when :nuke
if PostQuery.new(args[0]).is_single_tag? if PostQuery.normalize(args[0]).is_simple_tag?
"nuke [[#{args[0]}]]" "nuke [[#{args[0]}]]"
else else
"nuke {{#{args[0]}}}" "nuke {{#{args[0]}}}"
@@ -292,7 +292,7 @@ class BulkUpdateRequestProcessor
def self.nuke(tag_name) def self.nuke(tag_name)
# Reject existing implications from any other tag to the one we're nuking # Reject existing implications from any other tag to the one we're nuking
# otherwise the tag won't be removed from posts that have those other tags # otherwise the tag won't be removed from posts that have those other tags
if PostQuery.new(tag_name).is_single_tag? if PostQuery.normalize(tag_name).is_simple_tag?
TagImplication.active.where(consequent_name: tag_name).each { |ti| ti.reject!(User.system) } TagImplication.active.where(consequent_name: tag_name).each { |ti| ti.reject!(User.system) }
TagImplication.active.where(antecedent_name: tag_name).each { |ti| ti.reject!(User.system) } TagImplication.active.where(antecedent_name: tag_name).each { |ti| ti.reject!(User.system) }
end end

View File

@@ -13,7 +13,9 @@ module Searchable
end end
def negate_relation def negate_relation
unscoped.where(all.where_clause.invert.ast) relation = unscoped
relation = relation.from(all.from_clause.value) if all.from_clause.value.present?
relation.where(all.where_clause.invert.ast)
end end
# XXX hacky method to AND two relations together. # XXX hacky method to AND two relations together.

View File

@@ -3,11 +3,18 @@
class PostQuery class PostQuery
extend Memoist extend Memoist
private attr_reader :current_user, :tag_limit, :safe_mode, :hide_deleted_posts, :builder attr_reader :current_user
private attr_reader :tag_limit, :safe_mode, :hide_deleted_posts, :builder
delegate :tag?, :metatag?, :wildcard?, :metatags, :wildcards, :tag_names, :metatags, to: :ast delegate :tag?, :metatag?, :wildcard?, :metatags, :wildcards, :tag_names, :metatags, :to_infix, to: :ast
alias_method :safe_mode?, :safe_mode alias_method :safe_mode?, :safe_mode
alias_method :hide_deleted_posts?, :hide_deleted_posts alias_method :hide_deleted_posts?, :hide_deleted_posts
alias_method :to_s, :to_infix
# Return a new PostQuery with aliases replaced.
def self.normalize(...)
PostQuery.new(...).replace_aliases.trim
end
def initialize(search_or_ast, current_user: User.anonymous, tag_limit: nil, safe_mode: false, hide_deleted_posts: false) def initialize(search_or_ast, current_user: User.anonymous, tag_limit: nil, safe_mode: false, hide_deleted_posts: false)
if search_or_ast.is_a?(AST) if search_or_ast.is_a?(AST)
@@ -39,10 +46,25 @@ class PostQuery
@ast ||= Parser.parse(search) @ast ||= Parser.parse(search)
end end
def fast_count(...) def posts
builder.normalized_query.fast_count(...) builder.posts(to_cnf)
end end
def paginated_posts(...)
builder.paginated_posts(to_cnf, ...)
end
# The name of the only tag in the query, if the query contains a single tag. The tag may not exist. The query may contain other metatags or wildcards, and the tag may be negated.
def tag_name
tag_names.first if has_single_tag?
end
# The only tag in the query, if the query contains a single tag. The query may contain other metatags or wildcards, and the tag may be negated.
def tag
tags.first if has_single_tag?
end
# The list of all tags contained in the query.
def tags def tags
Tag.where(name: tag_names) Tag.where(name: tag_names)
end end
@@ -57,10 +79,39 @@ class PostQuery
ast.none? ast.none?
end end
def is_single_tag? # True if the search is a single metatag search for the given metatag.
def is_metatag?(name, value = nil)
if value.nil?
is_single_term? && has_metatag?(name)
else
is_single_term? && find_metatag(name) == value.to_s
end
end
# True if the search consists of a single tag, metatag, or wildcard.
def is_single_term?
tag_names.size + metatags.size + wildcards.size == 1
end
# True if this search consists only of a single non-negated tag, with no other metatags or operators.
def is_simple_tag?
ast.tag? ast.tag?
end end
# True if the search contains a single tag. It may have other metatags or wildcards, and the tag may be negated.
def has_single_tag?
tag_names.one?
end
# True if the search depends on the current user because of permissions or privacy settings.
def is_user_dependent_search?
metatags.any? do |metatag|
metatag.name.in?(%w[upvoter upvote downvoter downvote search flagger fav ordfav favgroup ordfavgroup]) ||
metatag.name == "status" && metatag.value == "unmoderated" ||
metatag.name == "disapproved" && !metatag.value.downcase.in?(PostDisapproval::REASONS)
end
end
def select_metatags(*names) def select_metatags(*names)
metatags.select { |metatag| metatag.name.in?(names.map(&:to_s).map(&:downcase)) } metatags.select { |metatag| metatag.name.in?(names.map(&:to_s).map(&:downcase)) }
end end
@@ -73,9 +124,9 @@ class PostQuery
select_metatags(*names).first&.value select_metatags(*names).first&.value
end end
# Return a new PostQuery with aliases replaced, implicit metatags added, and the query converted to conjunctive normal form. # Return a new PostQuery with unnecessary AND and OR clauses eliminated.
def normalize def trim
replace_aliases.with_implicit_metatags.to_cnf build(ast.trim)
end end
# Return a new PostQuery with aliases replaced. # Return a new PostQuery with aliases replaced.
@@ -115,5 +166,75 @@ class PostQuery
hide_deleted_posts? && !has_status_metatag hide_deleted_posts? && !has_status_metatag
end end
memoize :tags, :normalize, :replace_aliases, :with_implicit_metatags, :to_cnf, :aliases, :implicit_metatags, :hide_deleted? concerning :CountMethods do
def post_count
@post_count ||= fast_count
end
# Return an estimate of the number of posts returned by the search. By default, we try to use an
# estimated or cached count before doing an exact count.
#
# @param timeout [Integer] The database timeout in milliseconds
# @param estimate_count [Boolean] If true, estimate the count with inexact methods.
# @param skip_cache [Boolean] If true, don't use the cached count.
# @return [Integer, nil] The number of posts, or nil on timeout.
def fast_count(timeout: 1_000, estimate_count: true, skip_cache: false)
count = nil
count = estimated_count if estimate_count
count = cached_count(timeout) if count.nil? && !skip_cache
count = exact_count(timeout) if count.nil? && skip_cache
count
end
def estimated_count
if is_empty_search?
estimated_row_count
elsif is_simple_tag?
tag.try(:post_count)
elsif is_metatag?(:rating)
estimated_row_count
elsif is_metatag?(:pool) || is_metatag?(:ordpool)
name = find_metatag(:pool, :ordpool)
Pool.find_by_name(name)&.post_count || 0
elsif is_metatag?(:fav) || is_metatag?(:ordfav)
name = find_metatag(:fav, :ordfav)
user = User.find_by_name(name)
if user.nil?
0
elsif Pundit.policy!(current_user, user).can_see_favorites?
user.favorite_count
else
nil
end
end
end
# Estimate the count by parsing the Postgres EXPLAIN output.
def estimated_row_count
ExplainParser.new(posts).row_count
end
def cached_count(timeout, duration: 5.minutes)
Cache.get(count_cache_key, duration) do
exact_count(timeout)
end
end
def exact_count(timeout)
Post.with_timeout(timeout) do
posts.count
end
end
def count_cache_key
if is_user_dependent_search?
"pfc[#{current_user.id.to_i}]:#{to_s}"
else
"pfc:#{to_s}"
end
end
end
memoize :tags, :replace_aliases, :with_implicit_metatags, :to_cnf, :aliases, :implicit_metatags, :hide_deleted?
end end

View File

@@ -93,49 +93,6 @@ class PostQueryBuilder
@hide_deleted_posts = hide_deleted_posts @hide_deleted_posts = hide_deleted_posts
end end
def tags_match(tags, relation)
negated_wildcard_tags, negated_tags = tags.select(&:negated).partition(&:wildcard)
optional_wildcard_tags, optional_tags = tags.select(&:optional).partition(&:wildcard)
required_wildcard_tags, required_tags = tags.reject(&:negated).reject(&:optional).partition(&:wildcard)
negated_tags = negated_tags.map(&:name)
optional_tags = optional_tags.map(&:name)
required_tags = required_tags.map(&:name)
matched_negated_wildcard_tags = negated_wildcard_tags.flat_map { |tag| Tag.wildcard_matches(tag.name).limit(MAX_WILDCARD_TAGS).pluck(:name) }
matched_optional_wildcard_tags = optional_wildcard_tags.flat_map { |tag| Tag.wildcard_matches(tag.name).limit(MAX_WILDCARD_TAGS).pluck(:name) }
matched_required_wildcard_tags = required_wildcard_tags.flat_map { |tag| Tag.wildcard_matches(tag.name).limit(MAX_WILDCARD_TAGS).pluck(:name) }
negated_tags += (matched_negated_wildcard_tags.empty? && !negated_wildcard_tags.empty?) ? negated_wildcard_tags.map(&:name) : matched_negated_wildcard_tags
optional_tags += (matched_optional_wildcard_tags.empty? && !optional_wildcard_tags.empty?) ? optional_wildcard_tags.map(&:name) : matched_optional_wildcard_tags
optional_tags += (matched_required_wildcard_tags.empty? && !required_wildcard_tags.empty?) ? required_wildcard_tags.map(&:name) : matched_required_wildcard_tags
relation = relation.where_array_includes_all("string_to_array(posts.tag_string, ' ')", required_tags) if required_tags.present?
relation = relation.where_array_includes_any("string_to_array(posts.tag_string, ' ')", optional_tags) if optional_tags.present?
relation = relation.where_array_includes_none("string_to_array(posts.tag_string, ' ')", negated_tags) if negated_tags.present?
relation
end
def metatags_match(metatags, relation)
metatags.each do |metatag|
metatag_name = metatags_without_ord[metatag.name] if metatag.negated && metatags_without_ord.key?(metatag.name)
clause = metatag_matches(metatag_name || metatag.name, metatag.value, quoted: metatag.quoted)
clause = clause.negate_relation if metatag.negated
relation = relation.and_relation(clause)
end
relation
end
def metatags_without_ord
{
"ordfav" => "fav",
"ordfavgroup" => "favgroup",
"ordpool" => "pool",
}
end
def metatag_matches(name, value, relation = Post.all, quoted: false) def metatag_matches(name, value, relation = Post.all, quoted: false)
case name case name
when "id" when "id"
@@ -256,53 +213,77 @@ class PostQueryBuilder
end end
end end
def tables_for_query def tables_for_query(post_query)
metatag_names = metatags.map(&:name) metatag_names = post_query.metatags.map(&:name)
metatag_names << find_metatag(:order).remove(/_(asc|desc)\z/i) if has_metatag?(:order) metatag_names << post_query.find_metatag(:order).remove(/_(asc|desc)\z/i) if post_query.has_metatag?(:order)
tables = metatag_names.map { |metatag| table_for_metatag(metatag.to_s) } tables = metatag_names.map { |metatag| table_for_metatag(metatag.to_s) }
tables.compact.uniq tables.compact.uniq
end end
def add_joins(relation) def add_joins(post_query, relation)
tables = tables_for_query tables = tables_for_query(post_query)
relation = relation.with_stats(tables) relation = relation.with_stats(tables)
relation relation
end end
def build(includes: nil)
validate!
relation = Post.includes(includes) # Generate a SQL relation from a PostQuery.
relation = add_joins(relation) def build_relation(post_query, relation = Post.all)
relation = metatags_match(metatags, relation) post_query.ast.visit do |node, *children|
relation = tags_match(tags, relation) case node.type
in :all
relation.all
in :none
relation.none
in :tag
relation.tags_include(node.name)
in :metatag
metatag_matches(node.name, node.value, relation, quoted: node.quoted?)
in :wildcard
tag_names = Tag.wildcard_matches(node.name).limit(MAX_WILDCARD_TAGS).pluck(:name)
relation.where_array_includes_any("string_to_array(posts.tag_string, ' ')", tag_names)
in :not
children.first.negate_relation
in :and
children.reduce(&:and)
in :or
children.reduce(&:or)
end
end
end
def posts(post_query, includes: nil)
relation = Post.all
relation = add_joins(post_query, relation)
relation = build_relation(post_query, relation)
# HACK: if we're using a date: or age: metatag, default to ordering by # HACK: if we're using a date: or age: metatag, default to ordering by
# created_at instead of id so that the query will use the created_at index. # created_at instead of id so that the query will use the created_at index.
if has_metatag?(:date, :age) && find_metatag(:order).in?(["id", "id_asc"]) if post_query.has_metatag?(:date, :age) && post_query.find_metatag(:order).in?(["id", "id_asc"])
relation = search_order(relation, "created_at_asc") relation = search_order(relation, "created_at_asc")
elsif has_metatag?(:date, :age) && find_metatag(:order).in?(["id_desc", nil]) elsif post_query.has_metatag?(:date, :age) && post_query.find_metatag(:order).in?(["id_desc", nil])
relation = search_order(relation, "created_at_desc") relation = search_order(relation, "created_at_desc")
elsif find_metatag(:order) == "custom" elsif post_query.find_metatag(:order) == "custom"
relation = search_order_custom(relation, select_metatags(:id).map(&:value)) relation = search_order_custom(relation, post_query.select_metatags(:id).map(&:value))
elsif has_metatag?(:ordfav) elsif post_query.has_metatag?(:ordfav)
# no-op # no-op
else else
relation = search_order(relation, find_metatag(:order)) relation = search_order(relation, post_query.find_metatag(:order))
end end
if count = find_metatag(:random) if count = post_query.find_metatag(:random)
count = Integer(count).clamp(0, PostSets::Post::MAX_PER_PAGE) count = Integer(count).clamp(0, PostSets::Post::MAX_PER_PAGE)
relation = relation.random(count) relation = relation.random(count)
end end
relation = relation.includes(includes)
relation relation
end end
def paginated_posts(page, small_search_threshold: Danbooru.config.small_search_threshold.to_i, includes: nil, **options) def paginated_posts(post_query, page, count:, small_search_threshold: Danbooru.config.small_search_threshold.to_i, includes: nil, **options)
posts = build(includes: includes).paginate(page, **options) posts = posts(post_query, includes: includes).paginate(page, count: count, **options)
posts = optimize_search(posts, small_search_threshold) posts = optimize_search(posts, count, small_search_threshold)
posts.load posts.load
end end
@@ -315,7 +296,7 @@ class PostQueryBuilder
# tags, Postgres sometimes assumes tags in the 10k-50k range are large enough # tags, Postgres sometimes assumes tags in the 10k-50k range are large enough
# for a post id index scan, when in reality a tag index bitmap scan would be # for a post id index scan, when in reality a tag index bitmap scan would be
# better. # better.
def optimize_search(relation, small_search_threshold) def optimize_search(relation, post_count, small_search_threshold)
return relation unless small_search_threshold.present? return relation unless small_search_threshold.present?
order_values = relation.order_values.map { |order| order.try(:to_sql) || order.to_s }.map(&:downcase) order_values = relation.order_values.map { |order| order.try(:to_sql) || order.to_s }.map(&:downcase)
@@ -745,131 +726,6 @@ class PostQueryBuilder
end end
end end
concerning :CountMethods do
def post_count
@post_count ||= fast_count
end
# Return an estimate of the number of posts returned by the search. By
# default, we try to use an estimated or cached count before doing an exact
# count.
#
# @param timeout [Integer] the database timeout
# @param estimate_count [Boolean] if true, estimate the count with inexact methods
# @param skip_cache [Boolean] if true, don't use the cached count
# @return [Integer, nil] the number of posts, or nil on timeout
def fast_count(timeout: 1_000, estimate_count: true, skip_cache: false)
count = nil
count = estimated_count if estimate_count
count = cached_count(timeout) if count.nil? && !skip_cache
count = exact_count(timeout) if count.nil? && skip_cache
count
end
def estimated_count
if is_empty_search?
estimated_row_count
elsif is_simple_tag?
Tag.find_by(name: tags.first.name).try(:post_count)
elsif is_metatag?(:rating)
estimated_row_count
elsif is_metatag?(:pool) || is_metatag?(:ordpool)
name = find_metatag(:pool, :ordpool)
Pool.find_by_name(name)&.post_count || 0
elsif is_metatag?(:fav) || is_metatag?(:ordfav)
name = find_metatag(:fav, :ordfav)
user = User.find_by_name(name)
if user.nil?
0
elsif Pundit.policy!(current_user, user).can_see_favorites?
user.favorite_count
else
nil
end
end
end
# Estimate the count by parsing the Postgres EXPLAIN output.
def estimated_row_count
ExplainParser.new(build).row_count
end
def cached_count(timeout, duration: 5.minutes)
Cache.get(count_cache_key, duration) do
exact_count(timeout)
end
end
def exact_count(timeout)
Post.with_timeout(timeout) do
build.count
end
end
def count_cache_key
if is_user_dependent_search?
"pfc[#{current_user.id.to_i}]:#{to_s}"
else
"pfc:#{to_s}"
end
end
# @return [Boolean] true if the search depends on the current user because
# of permissions or privacy settings.
def is_user_dependent_search?
metatags.any? do |metatag|
metatag.name.in?(%w[upvoter upvote downvoter downvote search flagger fav ordfav favgroup ordfavgroup]) ||
metatag.name == "status" && metatag.value == "unmoderated" ||
metatag.name == "disapproved" && !metatag.value.downcase.in?(PostDisapproval::REASONS)
end
end
end
concerning :NormalizationMethods do
# Normalize a search by sorting tags and applying aliases.
# @return [PostQueryBuilder] the normalized query
def normalized_query(implicit: true, sort: true)
post_query = dup
post_query.terms.concat(implicit_metatags) if implicit
post_query.normalize_aliases!
post_query.normalize_order! if sort
post_query
end
# Apply aliases to all tags in the query.
def normalize_aliases!
tag_names = tags.map(&:name)
tag_aliases = tag_names.zip(TagAlias.to_aliased(tag_names)).to_h
terms.map! do |term|
term.name = tag_aliases[term.name] if term.type == :tag
term
end
end
# Normalize the tag order.
def normalize_order!
terms.sort_by!(&:to_s).uniq!
end
# Implicit metatags are metatags added by the user's account settings.
# rating:s is implicit under safe mode. -status:deleted is implicit when the
# "hide deleted posts" setting is on.
def implicit_metatags
metatags = []
metatags << OpenStruct.new(type: :metatag, name: "rating", value: "s") if safe_mode?
metatags << OpenStruct.new(type: :metatag, name: "status", value: "deleted", negated: true) if hide_deleted?
metatags
end
# XXX unify with PostSets::Post#show_deleted?
def hide_deleted?
has_status_metatag = select_metatags(:status).any? { |metatag| metatag.value.downcase.in?(%w[deleted active any all unmoderated modqueue appealed]) }
hide_deleted_posts? && !has_status_metatag
end
end
concerning :UtilityMethods do concerning :UtilityMethods do
def to_s def to_s
split_query.join(" ") split_query.join(" ")
@@ -879,78 +735,7 @@ class PostQueryBuilder
def terms def terms
@terms ||= scan_query @terms ||= scan_query
end end
# The list of regular tags in the search.
def tags
terms.select { |term| term.type == :tag }
end
# The list of metatags in the search.
def metatags
terms.select { |term| term.type == :metatag }
end
# Find all metatags with the given names.
def select_metatags(*names)
metatags.select { |term| term.name.in?(names.map(&:to_s)) }
end
# Find the first metatag with any of the given names.
def find_metatag(*metatags)
select_metatags(*metatags).first.try(:value)
end
# @return [Boolean] true if the search has a metatag with any of the given names.
def has_metatag?(*metatag_names)
metatags.any? { |term| term.name.in?(metatag_names.map(&:to_s).map(&:downcase)) }
end
# @return [Boolean] true if the search has a single regular tag, with any number of metatags.
def has_single_tag?
tags.size == 1 && !tags.first.wildcard
end
# @return [Boolean] true if the search is a single metatag search for the given metatag.
def is_metatag?(name, value = nil)
if value.nil?
is_single_term? && has_metatag?(name)
else
is_single_term? && find_metatag(name) == value.to_s
end
end
# @return [Boolean] true if the search doesn't have any tags or metatags.
def is_empty_search?
terms.size == 0
end
# @return [Boolean] true if the search consists of a single tag or metatag.
def is_single_term?
terms.size == 1
end
# @return [Boolean] true if the search has a single tag, possibly with wildcards or negation.
def is_single_tag?
is_single_term? && tags.size == 1
end
# @return [Boolean] true if the search has a single tag, without any wildcards or operators.
def is_simple_tag?
tag = tags.first
is_single_tag? && !tag.negated && !tag.optional && !tag.wildcard
end
# @return [Boolean] true if the search has a single tag with a wildcard
def is_wildcard_search?
is_single_tag? && tags.first.wildcard
end
# @return [Tag, nil] the tag if the search is for a simple tag, otherwise nil
def simple_tag
return nil if !is_simple_tag?
Tag.find_by_name(tags.first.name)
end
end end
memoize :split_query, :post_count memoize :split_query
end end

View File

@@ -8,14 +8,16 @@ module PostSets
class Post class Post
MAX_PER_PAGE = 200 MAX_PER_PAGE = 200
MAX_SIDEBAR_TAGS = 25 MAX_SIDEBAR_TAGS = 25
MAX_WILDCARD_TAGS = PostQueryBuilder::MAX_WILDCARD_TAGS
attr_reader :page, :format, :tag_string, :query, :normalized_query, :show_votes attr_reader :page, :format, :tag_string, :query, :post_query, :normalized_query, :show_votes
delegate :post_count, to: :normalized_query delegate :tag, to: :post_query
alias_method :show_votes?, :show_votes alias_method :show_votes?, :show_votes
def initialize(tags, page = 1, per_page = nil, user: CurrentUser.user, format: "html", show_votes: false) def initialize(tags, page = 1, per_page = nil, user: CurrentUser.user, format: "html", show_votes: false)
@query = PostQueryBuilder.new(tags, user, tag_limit: user.tag_query_limit, safe_mode: CurrentUser.safe_mode?, hide_deleted_posts: user.hide_deleted_posts?) @query = PostQueryBuilder.new(tags, user, tag_limit: user.tag_query_limit, safe_mode: CurrentUser.safe_mode?, hide_deleted_posts: user.hide_deleted_posts?)
@normalized_query = query.normalized_query @post_query = PostQuery.normalize(tags, current_user: user, tag_limit: user.tag_query_limit, safe_mode: CurrentUser.safe_mode?, hide_deleted_posts: user.hide_deleted_posts?)
@normalized_query = post_query.with_implicit_metatags
@tag_string = tags @tag_string = tags
@page = page @page = page
@per_page = per_page @per_page = per_page
@@ -32,13 +34,8 @@ module PostSets
end end
def wiki_page def wiki_page
return nil unless normalized_query.has_single_tag? return nil unless post_query.has_single_tag?
@wiki_page ||= WikiPage.undeleted.find_by(title: normalized_query.tags.first.name) @wiki_page ||= WikiPage.undeleted.find_by(title: post_query.tag_name)
end
def tag
return nil unless normalized_query.has_single_tag?
@tag ||= Tag.find_by(name: normalized_query.tags.first.name)
end end
def artist def artist
@@ -48,7 +45,7 @@ module PostSets
end end
def pool def pool
pool_names = normalized_query.select_metatags(:pool, :ordpool).map(&:value) pool_names = post_query.select_metatags(:pool, :ordpool).map(&:value)
name = pool_names.first name = pool_names.first
return nil unless pool_names.size == 1 return nil unless pool_names.size == 1
@@ -56,7 +53,7 @@ module PostSets
end end
def favgroup def favgroup
favgroup_names = normalized_query.select_metatags(:favgroup, :ordfavgroup).map(&:value) favgroup_names = post_query.select_metatags(:favgroup, :ordfavgroup).map(&:value)
name = favgroup_names.first name = favgroup_names.first
return nil unless favgroup_names.size == 1 return nil unless favgroup_names.size == 1
@@ -84,7 +81,7 @@ module PostSets
end end
def per_page def per_page
(@per_page || query.find_metatag(:limit) || CurrentUser.user.per_page).to_i.clamp(0, max_per_page) (@per_page || post_query.find_metatag(:limit) || CurrentUser.user.per_page).to_i.clamp(0, max_per_page)
end end
def max_per_page def max_per_page
@@ -95,11 +92,15 @@ module PostSets
@posts ||= normalized_query.paginated_posts(page, includes: includes, count: post_count, search_count: !post_count.nil?, limit: per_page, max_limit: max_per_page).load @posts ||= normalized_query.paginated_posts(page, includes: includes, count: post_count, search_count: !post_count.nil?, limit: per_page, max_limit: max_per_page).load
end end
def post_count
normalized_query.post_count
end
def hide_from_crawler? def hide_from_crawler?
return true if current_page > 50 return true if current_page > 50
return true if show_votes? return true if show_votes?
return true if artist.present? && artist.is_banned? return true if artist.present? && artist.is_banned?
return false if query.is_empty_search? || query.is_simple_tag? || query.is_metatag?(:order, :rank) return false if post_query.is_empty_search? || post_query.is_simple_tag? || post_query.is_metatag?(:order, :rank)
true true
end end
@@ -118,7 +119,7 @@ module PostSets
end end
def show_deleted? def show_deleted?
query.select_metatags("status").any? do |metatag| post_query.select_metatags("status").any? do |metatag|
metatag.value.downcase.in?(%w[all any active unmoderated modqueue deleted appealed]) metatag.value.downcase.in?(%w[all any active unmoderated modqueue deleted appealed])
end end
end end
@@ -133,13 +134,13 @@ module PostSets
concerning :TagListMethods do concerning :TagListMethods do
def related_tags def related_tags
if query.is_wildcard_search? if post_query.wildcards.one? && post_query.tags.none?
wildcard_tags wildcard_tags
elsif query.is_metatag?(:search) elsif post_query.is_metatag?(:search)
saved_search_tags saved_search_tags
elsif query.is_empty_search? || query.is_metatag?(:order, :rank) elsif post_query.is_empty_search? || post_query.is_metatag?(:order, :rank)
popular_tags.presence || frequent_tags popular_tags.presence || frequent_tags
elsif query.is_single_term? elsif post_query.is_single_term?
similar_tags.presence || frequent_tags similar_tags.presence || frequent_tags
else else
frequent_tags frequent_tags
@@ -151,7 +152,7 @@ module PostSets
end end
def similar_tags def similar_tags
RelatedTagCalculator.cached_similar_tags_for_search(query.normalized_query(implicit: false), MAX_SIDEBAR_TAGS) RelatedTagCalculator.cached_similar_tags_for_search(post_query, MAX_SIDEBAR_TAGS)
end end
def frequent_tags def frequent_tags
@@ -161,7 +162,7 @@ module PostSets
# Wildcard searches can show up to 100 tags in the sidebar, not 25, # Wildcard searches can show up to 100 tags in the sidebar, not 25,
# because that's how many tags the search itself will use. # because that's how many tags the search itself will use.
def wildcard_tags def wildcard_tags
Tag.wildcard_matches(tag_string).limit(PostQueryBuilder::MAX_WILDCARD_TAGS).pluck(:name) Tag.wildcard_matches(post_query.wildcards.first).limit(MAX_WILDCARD_TAGS).pluck(:name)
end end
def saved_search_tags def saved_search_tags

View File

@@ -19,7 +19,7 @@
# @see https://en.wikipedia.org/wiki/Cosine_similarity # @see https://en.wikipedia.org/wiki/Cosine_similarity
module RelatedTagCalculator module RelatedTagCalculator
# Return the set of tags similar to the given search. # Return the set of tags similar to the given search.
# @param post_query [PostQueryBuilder] the search to find similar tags for. # @param post_query [PostQuery] the search to find similar tags for.
# @param search_sample_size [Integer] the number of posts to sample from the search # @param search_sample_size [Integer] the number of posts to sample from the search
# @param tag_sample_size [Integer] the number of tags to calculate similarity for # @param tag_sample_size [Integer] the number of tags to calculate similarity for
# @param category [Integer] an optional tag category, to restrict the tags to a given category. # @param category [Integer] an optional tag category, to restrict the tags to a given category.
@@ -41,12 +41,12 @@ module RelatedTagCalculator
end end
# Return the set of tags most frequently appearing in the given search. # Return the set of tags most frequently appearing in the given search.
# @param post_query [PostQueryBuilder] the search to find frequent tags for. # @param post_query [PostQuery] the search to find frequent tags for.
# @param search_sample_size [Integer] the number of posts to sample from the search # @param search_sample_size [Integer] the number of posts to sample from the search
# @param category [Integer] an optional tag category, to restrict the tags to a given category. # @param category [Integer] an optional tag category, to restrict the tags to a given category.
# @return [Array<Tag>] the set of frequent tags, ordered by most frequent # @return [Array<Tag>] the set of frequent tags, ordered by most frequent
def self.frequent_tags_for_search(post_query, search_sample_size: 1000, category: nil) def self.frequent_tags_for_search(post_query, search_sample_size: 1000, category: nil)
sample_posts = post_query.build.reorder(:md5).limit(search_sample_size) sample_posts = post_query.posts.reorder(:md5).limit(search_sample_size)
frequent_tags_for_post_relation(sample_posts, category: category) frequent_tags_for_post_relation(sample_posts, category: category)
end end
@@ -74,7 +74,7 @@ module RelatedTagCalculator
end end
# Return a cached set of tags similar to the given search. # Return a cached set of tags similar to the given search.
# @param post_query [PostQueryBuilder] the search to find similar tags for. # @param post_query [PostQuery] the search to find similar tags for.
# @param max_tags [Integer] the maximum number of tags to return # @param max_tags [Integer] the maximum number of tags to return
# @param search_timeout [Integer] the database timeout for the search # @param search_timeout [Integer] the database timeout for the search
# @param cache_timeout [Integer] the length of time to cache the results # @param cache_timeout [Integer] the length of time to cache the results
@@ -90,7 +90,7 @@ module RelatedTagCalculator
# Return a cache key for the given search. Some searches are cached on a # Return a cache key for the given search. Some searches are cached on a
# per-user basis because they depend on the current user (for example, # per-user basis because they depend on the current user (for example,
# searches for private favorites, favgroups, or saved searches). # searches for private favorites, favgroups, or saved searches).
# @param post_query [PostQueryBuilder] the post search # @param post_query [PostQuery] the post search
# @return [String] the cache key # @return [String] the cache key
def self.cache_key(post_query) def self.cache_key(post_query)
if post_query.is_user_dependent_search? if post_query.is_user_dependent_search?

View File

@@ -10,7 +10,7 @@ class RelatedTagQuery
def initialize(query:, user: User.anonymous, category: nil, type: nil, limit: nil) def initialize(query:, user: User.anonymous, category: nil, type: nil, limit: nil)
@user = user @user = user
@post_query = PostQueryBuilder.new(query, user).normalized_query @post_query = PostQuery.normalize(query, current_user: user) # XXX This query does not include implicit metatags (rating:s, -status:deleted)
@query = @post_query.to_s @query = @post_query.to_s
@category = category @category = category
@type = type @type = type
@@ -75,7 +75,7 @@ class RelatedTagQuery
end end
def other_wiki_pages def other_wiki_pages
tag = post_query.simple_tag tag = post_query.tag
return [] if tag.nil? return [] if tag.nil?
if tag.copyright? if tag.copyright?

View File

@@ -1384,8 +1384,8 @@ class Post < ApplicationRecord
# @param hide_deleted_posts [Boolean] if true, automatically add -status:deleted to the search # @param hide_deleted_posts [Boolean] if true, automatically add -status:deleted to the search
# @return [ActiveRecord::Relation<Post>] the set of resulting posts # @return [ActiveRecord::Relation<Post>] the set of resulting posts
def user_tag_match(query, user = CurrentUser.user, tag_limit: user.tag_query_limit, safe_mode: CurrentUser.safe_mode?, hide_deleted_posts: user.hide_deleted_posts?) def user_tag_match(query, user = CurrentUser.user, tag_limit: user.tag_query_limit, safe_mode: CurrentUser.safe_mode?, hide_deleted_posts: user.hide_deleted_posts?)
post_query = PostQueryBuilder.new(query, user, tag_limit: tag_limit, safe_mode: safe_mode, hide_deleted_posts: hide_deleted_posts) post_query = PostQuery.normalize(query, current_user: user, tag_limit: tag_limit, safe_mode: safe_mode, hide_deleted_posts: hide_deleted_posts)
post_query.normalized_query.build post_query.with_implicit_metatags.posts
end end
def search(params) def search(params)

View File

@@ -46,9 +46,9 @@
<li><%= link_to "Deleted", posts_path(tags: "#{params[:tags]} status:deleted"), rel: "nofollow" %></li> <li><%= link_to "Deleted", posts_path(tags: "#{params[:tags]} status:deleted"), rel: "nofollow" %></li>
<li><%= link_to "Random", random_posts_path(tags: params[:tags]), id: "random-post", "data-shortcut": "r", rel: "nofollow" %></li> <li><%= link_to "Random", random_posts_path(tags: params[:tags]), id: "random-post", "data-shortcut": "r", rel: "nofollow" %></li>
<% if @post_set.normalized_query.has_single_tag? %> <% if @post_set.post_query.has_single_tag? %>
<li><%= link_to "History", post_versions_path(search: { changed_tags: @post_set.normalized_query.tags.first.name }), rel: "nofollow" %></li> <li><%= link_to "History", post_versions_path(search: { changed_tags: @post_set.post_query.tag_name }), rel: "nofollow" %></li>
<li><%= link_to "Discussions", forum_posts_path(search: { linked_to: @post_set.normalized_query.tags.first.name }), rel: "nofollow" %></li> <li><%= link_to "Discussions", forum_posts_path(search: { linked_to: @post_set.post_query.tag_name }), rel: "nofollow" %></li>
<% end %> <% end %>
<li><%= link_to "Count", posts_counts_path(tags: params[:tags]), rel: "nofollow" %></li> <li><%= link_to "Count", posts_counts_path(tags: params[:tags]), rel: "nofollow" %></li>
</ul> </ul>
@@ -222,7 +222,7 @@
<% end %> <% end %>
<% content_for(:html_header) do %> <% content_for(:html_header) do %>
<% if @post_set.query.is_empty_search? %> <% if @post_set.post_query.is_empty_search? %>
<% page_title("#{Danbooru.config.app_name}: Anime Image Board", suffix: nil) %> <% page_title("#{Danbooru.config.app_name}: Anime Image Board", suffix: nil) %>
<% meta_description site_description %> <% meta_description site_description %>

View File

@@ -6,14 +6,14 @@ class PostQueryBuilderTest < ActiveSupport::TestCase
end end
def assert_fast_count(count, query, query_options = {}, fast_count_options = {}) def assert_fast_count(count, query, query_options = {}, fast_count_options = {})
assert_equal(count, PostQueryBuilder.new(query, **query_options).normalized_query.fast_count(**fast_count_options)) assert_equal(count, PostQuery.normalize(query, **query_options).with_implicit_metatags.fast_count(**fast_count_options))
end end
def assert_parse_equals(expected, query) def assert_parse_equals(expected, query)
assert_equal(expected, PostQueryBuilder.new(query).split_query) assert_equal(expected, PostQueryBuilder.new(query).split_query)
# parsing, serializing, then parsing again should produce the same result. # parsing, serializing, then parsing again should produce the same result.
assert_equal(PostQueryBuilder.new(query).to_s, PostQueryBuilder.new(PostQueryBuilder.new(query).to_s).to_s) assert_equal(PostQuery.new(query).to_s, PostQuery.new(PostQuery.new(query).to_s).to_s)
end end
setup do setup do
@@ -1322,75 +1322,6 @@ class PostQueryBuilderTest < ActiveSupport::TestCase
assert_equal(%w(aaa bbb), PostQueryBuilder.new("aaa bbb").split_query) assert_equal(%w(aaa bbb), PostQueryBuilder.new("aaa bbb").split_query)
assert_equal(%w(favgroup:yondemasu_yo,_azazel-san. pool:ichigo_100%), PostQueryBuilder.new("favgroup:yondemasu_yo,_azazel-san. pool:ichigo_100%").split_query) assert_equal(%w(favgroup:yondemasu_yo,_azazel-san. pool:ichigo_100%), PostQueryBuilder.new("favgroup:yondemasu_yo,_azazel-san. pool:ichigo_100%").split_query)
end end
should "parse single tags correctly" do
assert_equal(true, PostQueryBuilder.new("foo").is_single_tag?)
assert_equal(true, PostQueryBuilder.new("-foo").is_single_tag?)
assert_equal(true, PostQueryBuilder.new("~foo").is_single_tag?)
assert_equal(true, PostQueryBuilder.new("foo*").is_single_tag?)
assert_equal(false, PostQueryBuilder.new("fav:1234").is_single_tag?)
assert_equal(false, PostQueryBuilder.new("pool:1234").is_single_tag?)
assert_equal(false, PostQueryBuilder.new('source:"foo bar baz"').is_single_tag?)
assert_equal(false, PostQueryBuilder.new("foo bar").is_single_tag?)
end
should "parse simple tags correctly" do
assert_equal(true, PostQueryBuilder.new("foo").is_simple_tag?)
assert_equal(false, PostQueryBuilder.new("-foo").is_simple_tag?)
assert_equal(false, PostQueryBuilder.new("~foo").is_simple_tag?)
assert_equal(false, PostQueryBuilder.new("foo*").is_simple_tag?)
assert_equal(false, PostQueryBuilder.new("fav:1234").is_simple_tag?)
assert_equal(false, PostQueryBuilder.new("FAV:1234").is_simple_tag?)
assert_equal(false, PostQueryBuilder.new("pool:1234").is_simple_tag?)
assert_equal(false, PostQueryBuilder.new('source:"foo bar baz"').is_simple_tag?)
assert_equal(false, PostQueryBuilder.new("foo bar").is_simple_tag?)
end
should "parse quoted metatags correctly" do
assert_parse_equals(%w[status:"active" source:"https"], %q(status:'active' source:'https'))
assert_parse_equals(%w[source:"https" status:"active"], %q(source:'https' status:'active'))
assert_parse_equals(%w[status:"active" source:"https"], %q(status:"active" source:'https'))
assert_parse_equals(%w[status:"active" source:"https"], %q(status:'active' source:"https"))
assert_parse_equals(%w[status:"active" source:https], %q(status:'active' source:https))
assert_parse_equals(%w[status:active source:"https"], %q(status:active source:'https'))
assert_parse_equals(%w[limit:"5" status:"active" source:"x"], %q(limit:"5" status:"active" source:"x"))
assert_parse_equals(%w[source:"" limit:"1" status:"deleted"], %q(source:"" limit:'1' status:'deleted'))
assert_parse_equals(['source:"bar baz"', 'don\'t_say_"lazy"'], %q(source:"bar baz" don't_say_"lazy"))
assert_parse_equals(['source:"bar baz"', 'don\'t_say_"lazy"'], %q(source:"bar baz" don't_say_"lazy"))
assert_parse_equals(['source:"bar baz"', 'don\'t_say_"lazy"'], %q(source:'bar baz' don't_say_"lazy"))
assert_parse_equals([%q(source:"foo")], %q(source:"\f\o\o"))
assert_parse_equals([%q(source:"foo")], %q(source:'\f\o\o'))
assert_parse_equals([%q(source:foo\bar)], %q(source:foo\bar))
assert_parse_equals([%q(source:"foo)], %q(source:"foo))
assert_parse_equals([%q(source:'foo)], %q(source:'foo))
assert_parse_equals([%q(source:"foo bar")], %q(source:foo\ bar))
assert_parse_equals([%q(source:"\"foo bar\\\\")], %q(source:"foo\ bar\\))
assert_parse_equals(['source:"don\'t_say_\\"lazy\\""', 'don\'t_say_"lazy"'], %q(source:"don't_say_\"lazy\"" don't_say_"lazy"))
assert_parse_equals(['source:"don\'t_say_\\"lazy\\""', 'don\'t_say_"lazy"'], %q(source:'don\'t_say_"lazy"' don't_say_"lazy"))
end
end
context "The normalized_query method" do
should "work" do
create(:tag_alias, antecedent_name: "gray", consequent_name: "grey")
assert_equal("foo", PostQueryBuilder.new("foo").normalized_query.to_s)
assert_equal("foo", PostQueryBuilder.new(" foo ").normalized_query.to_s)
assert_equal("foo", PostQueryBuilder.new("FOO").normalized_query.to_s)
assert_equal("foo", PostQueryBuilder.new("foo foo").normalized_query.to_s)
assert_equal("grey", PostQueryBuilder.new("gray").normalized_query.to_s)
assert_equal("aaa bbb", PostQueryBuilder.new("bbb aaa").normalized_query.to_s)
assert_equal("-aaa bbb", PostQueryBuilder.new("bbb -aaa").normalized_query.to_s)
assert_equal("~aaa ~bbb", PostQueryBuilder.new("~bbb ~aaa").normalized_query.to_s)
assert_equal("commentary:true bbb", PostQueryBuilder.new("bbb commentary:true").normalized_query.to_s)
assert_equal('commentary:"true" bbb', PostQueryBuilder.new("bbb commentary:'true'").normalized_query.to_s)
assert_equal('-commentary:true bbb', PostQueryBuilder.new("bbb -commentary:true").normalized_query.to_s)
assert_equal('-commentary:"true" bbb', PostQueryBuilder.new("bbb -commentary:'true'").normalized_query.to_s)
end
end end
context "#fast_count" do context "#fast_count" do
@@ -1452,7 +1383,7 @@ class PostQueryBuilderTest < ActiveSupport::TestCase
context "for a multi-tag search" do context "for a multi-tag search" do
should "return the cached count, if it exists" do should "return the cached count, if it exists" do
Cache.put("pfc:score:42 aaa", 100) Cache.put("pfc:aaa score:42", 100)
assert_fast_count(100, "aaa score:42") assert_fast_count(100, "aaa score:42")
end end
@@ -1470,7 +1401,7 @@ class PostQueryBuilderTest < ActiveSupport::TestCase
context "a blank search" do context "a blank search" do
should "should execute a search" do should "should execute a search" do
assert_fast_count(1, "", {}, { estimate_count: false }) assert_fast_count(1, "", {}, { estimate_count: false })
assert_nothing_raised { PostQueryBuilder.new("").normalized_query.fast_count(estimate_count: true) } assert_nothing_raised { PostQuery.new("").fast_count(estimate_count: true) }
end end
should "return 0 for a nonexisting tag" do should "return 0 for a nonexisting tag" do
@@ -1480,12 +1411,12 @@ class PostQueryBuilderTest < ActiveSupport::TestCase
context "in safe mode" do context "in safe mode" do
should "work for a blank search" do should "work for a blank search" do
assert_fast_count(0, "", { safe_mode: true }, { estimate_count: false }) assert_fast_count(0, "", { safe_mode: true }, { estimate_count: false })
assert_nothing_raised { PostQueryBuilder.new("", safe_mode: true).normalized_query.fast_count(estimate_count: true) } assert_nothing_raised { PostQuery.new("", safe_mode: true).fast_count(estimate_count: true) }
end end
should "work for a nil search" do should "work for a nil search" do
assert_fast_count(0, nil, { safe_mode: true }, { estimate_count: false }) assert_fast_count(0, nil, { safe_mode: true }, { estimate_count: false })
assert_nothing_raised { PostQueryBuilder.new("", safe_mode: true).normalized_query.fast_count(estimate_count: true) } assert_nothing_raised { PostQuery.new("", safe_mode: true).fast_count(estimate_count: true) }
end end
should "not fail for a two tag search by a member" do should "not fail for a two tag search by a member" do
@@ -1502,8 +1433,8 @@ class PostQueryBuilderTest < ActiveSupport::TestCase
@user = create(:user, enable_private_favorites: true) @user = create(:user, enable_private_favorites: true)
@post = as(@user) { create(:post, tag_string: "fav:#{@user.name}") } @post = as(@user) { create(:post, tag_string: "fav:#{@user.name}") }
assert_equal(1, PostQueryBuilder.new("fav:#{@user.name}", @user).fast_count) assert_equal(1, PostQuery.new("fav:#{@user.name}", current_user: @user).fast_count)
assert_equal(0, PostQueryBuilder.new("fav:#{@user.name}").fast_count) assert_equal(0, PostQuery.new("fav:#{@user.name}").fast_count)
end end
end end
end end

View File

@@ -2,12 +2,12 @@ require 'test_helper'
class RelatedTagCalculatorTest < ActiveSupport::TestCase class RelatedTagCalculatorTest < ActiveSupport::TestCase
def frequent_tags_for_search(tag_search, user = CurrentUser.user, **options) def frequent_tags_for_search(tag_search, user = CurrentUser.user, **options)
post_query = PostQueryBuilder.new(tag_search, user) post_query = PostQuery.normalize(tag_search, current_user: user)
RelatedTagCalculator.frequent_tags_for_search(post_query, **options).pluck(:name) RelatedTagCalculator.frequent_tags_for_search(post_query, **options).pluck(:name)
end end
def similar_tags_for_search(tag_search, user = CurrentUser.user, **options) def similar_tags_for_search(tag_search, user = CurrentUser.user, **options)
post_query = PostQueryBuilder.new(tag_search, user).normalized_query post_query = PostQuery.normalize(tag_search, current_user: user)
RelatedTagCalculator.similar_tags_for_search(post_query, **options).pluck(:name) RelatedTagCalculator.similar_tags_for_search(post_query, **options).pluck(:name)
end end