post queries: switch to new post search engine.

Switch to the post search engine using the new PostQuery parser. The new
engine fully supports AND, OR, and NOT operators and grouping expressions
with parentheses.

Highlights:

New OR operator:

* `skirt or dress` (same as `~skirt ~dress`)

Tags can be grouped with parentheses:

* `1girl (skirt or dress)`
* `(blonde_hair blue_eyes) or (red_hair green_eyes)`
* `~(blonde_hair blue_eyes) ~(red_hair green_eyes)` (same as above)
* `(pantyhose or thighhighs) (black_legwear or brown_legwear)`
* `(~pantyhose ~thighhighs) (~black_legwear ~brown_legwear)` (same as above)

Metatags can be OR'd together:

* `user:evazion or fav:evazion`
* `~user:evazion ~fav:evazion`

Wildcard tags can combined with either AND or OR:

* `black_* white_*` (find posts with at least one black_* tag AND one white_* tag)
* `black_* or white_*` (find posts with at least one black_* tag OR one white_* tag)
* `~black_* ~white_*` (same as above)

See 4c7cfc73 for more syntax examples.

Fixes #4949: And+or search?
Fixes #5056: Wildcard searches return unexpected results when combined with OR searches
This commit is contained in:
evazion
2022-04-04 16:52:11 -05:00
parent 703fd05025
commit af183467b6
11 changed files with 227 additions and 387 deletions

View File

@@ -273,7 +273,7 @@ class BulkUpdateRequestProcessor
"mass update {{#{args[0]}}} -> {{#{args[1]}}}"
when :nuke
if PostQuery.new(args[0]).is_single_tag?
if PostQuery.normalize(args[0]).is_simple_tag?
"nuke [[#{args[0]}]]"
else
"nuke {{#{args[0]}}}"
@@ -292,7 +292,7 @@ class BulkUpdateRequestProcessor
def self.nuke(tag_name)
# Reject existing implications from any other tag to the one we're nuking
# otherwise the tag won't be removed from posts that have those other tags
if PostQuery.new(tag_name).is_single_tag?
if PostQuery.normalize(tag_name).is_simple_tag?
TagImplication.active.where(consequent_name: tag_name).each { |ti| ti.reject!(User.system) }
TagImplication.active.where(antecedent_name: tag_name).each { |ti| ti.reject!(User.system) }
end

View File

@@ -13,7 +13,9 @@ module Searchable
end
def negate_relation
unscoped.where(all.where_clause.invert.ast)
relation = unscoped
relation = relation.from(all.from_clause.value) if all.from_clause.value.present?
relation.where(all.where_clause.invert.ast)
end
# XXX hacky method to AND two relations together.

View File

@@ -3,11 +3,18 @@
class PostQuery
extend Memoist
private attr_reader :current_user, :tag_limit, :safe_mode, :hide_deleted_posts, :builder
attr_reader :current_user
private attr_reader :tag_limit, :safe_mode, :hide_deleted_posts, :builder
delegate :tag?, :metatag?, :wildcard?, :metatags, :wildcards, :tag_names, :metatags, to: :ast
delegate :tag?, :metatag?, :wildcard?, :metatags, :wildcards, :tag_names, :metatags, :to_infix, to: :ast
alias_method :safe_mode?, :safe_mode
alias_method :hide_deleted_posts?, :hide_deleted_posts
alias_method :to_s, :to_infix
# Return a new PostQuery with aliases replaced.
def self.normalize(...)
PostQuery.new(...).replace_aliases.trim
end
def initialize(search_or_ast, current_user: User.anonymous, tag_limit: nil, safe_mode: false, hide_deleted_posts: false)
if search_or_ast.is_a?(AST)
@@ -39,10 +46,25 @@ class PostQuery
@ast ||= Parser.parse(search)
end
def fast_count(...)
builder.normalized_query.fast_count(...)
def posts
builder.posts(to_cnf)
end
def paginated_posts(...)
builder.paginated_posts(to_cnf, ...)
end
# The name of the only tag in the query, if the query contains a single tag. The tag may not exist. The query may contain other metatags or wildcards, and the tag may be negated.
def tag_name
tag_names.first if has_single_tag?
end
# The only tag in the query, if the query contains a single tag. The query may contain other metatags or wildcards, and the tag may be negated.
def tag
tags.first if has_single_tag?
end
# The list of all tags contained in the query.
def tags
Tag.where(name: tag_names)
end
@@ -57,10 +79,39 @@ class PostQuery
ast.none?
end
def is_single_tag?
# True if the search is a single metatag search for the given metatag.
def is_metatag?(name, value = nil)
if value.nil?
is_single_term? && has_metatag?(name)
else
is_single_term? && find_metatag(name) == value.to_s
end
end
# True if the search consists of a single tag, metatag, or wildcard.
def is_single_term?
tag_names.size + metatags.size + wildcards.size == 1
end
# True if this search consists only of a single non-negated tag, with no other metatags or operators.
def is_simple_tag?
ast.tag?
end
# True if the search contains a single tag. It may have other metatags or wildcards, and the tag may be negated.
def has_single_tag?
tag_names.one?
end
# True if the search depends on the current user because of permissions or privacy settings.
def is_user_dependent_search?
metatags.any? do |metatag|
metatag.name.in?(%w[upvoter upvote downvoter downvote search flagger fav ordfav favgroup ordfavgroup]) ||
metatag.name == "status" && metatag.value == "unmoderated" ||
metatag.name == "disapproved" && !metatag.value.downcase.in?(PostDisapproval::REASONS)
end
end
def select_metatags(*names)
metatags.select { |metatag| metatag.name.in?(names.map(&:to_s).map(&:downcase)) }
end
@@ -73,9 +124,9 @@ class PostQuery
select_metatags(*names).first&.value
end
# Return a new PostQuery with aliases replaced, implicit metatags added, and the query converted to conjunctive normal form.
def normalize
replace_aliases.with_implicit_metatags.to_cnf
# Return a new PostQuery with unnecessary AND and OR clauses eliminated.
def trim
build(ast.trim)
end
# Return a new PostQuery with aliases replaced.
@@ -115,5 +166,75 @@ class PostQuery
hide_deleted_posts? && !has_status_metatag
end
memoize :tags, :normalize, :replace_aliases, :with_implicit_metatags, :to_cnf, :aliases, :implicit_metatags, :hide_deleted?
concerning :CountMethods do
def post_count
@post_count ||= fast_count
end
# Return an estimate of the number of posts returned by the search. By default, we try to use an
# estimated or cached count before doing an exact count.
#
# @param timeout [Integer] The database timeout in milliseconds
# @param estimate_count [Boolean] If true, estimate the count with inexact methods.
# @param skip_cache [Boolean] If true, don't use the cached count.
# @return [Integer, nil] The number of posts, or nil on timeout.
def fast_count(timeout: 1_000, estimate_count: true, skip_cache: false)
count = nil
count = estimated_count if estimate_count
count = cached_count(timeout) if count.nil? && !skip_cache
count = exact_count(timeout) if count.nil? && skip_cache
count
end
def estimated_count
if is_empty_search?
estimated_row_count
elsif is_simple_tag?
tag.try(:post_count)
elsif is_metatag?(:rating)
estimated_row_count
elsif is_metatag?(:pool) || is_metatag?(:ordpool)
name = find_metatag(:pool, :ordpool)
Pool.find_by_name(name)&.post_count || 0
elsif is_metatag?(:fav) || is_metatag?(:ordfav)
name = find_metatag(:fav, :ordfav)
user = User.find_by_name(name)
if user.nil?
0
elsif Pundit.policy!(current_user, user).can_see_favorites?
user.favorite_count
else
nil
end
end
end
# Estimate the count by parsing the Postgres EXPLAIN output.
def estimated_row_count
ExplainParser.new(posts).row_count
end
def cached_count(timeout, duration: 5.minutes)
Cache.get(count_cache_key, duration) do
exact_count(timeout)
end
end
def exact_count(timeout)
Post.with_timeout(timeout) do
posts.count
end
end
def count_cache_key
if is_user_dependent_search?
"pfc[#{current_user.id.to_i}]:#{to_s}"
else
"pfc:#{to_s}"
end
end
end
memoize :tags, :replace_aliases, :with_implicit_metatags, :to_cnf, :aliases, :implicit_metatags, :hide_deleted?
end

View File

@@ -93,49 +93,6 @@ class PostQueryBuilder
@hide_deleted_posts = hide_deleted_posts
end
def tags_match(tags, relation)
negated_wildcard_tags, negated_tags = tags.select(&:negated).partition(&:wildcard)
optional_wildcard_tags, optional_tags = tags.select(&:optional).partition(&:wildcard)
required_wildcard_tags, required_tags = tags.reject(&:negated).reject(&:optional).partition(&:wildcard)
negated_tags = negated_tags.map(&:name)
optional_tags = optional_tags.map(&:name)
required_tags = required_tags.map(&:name)
matched_negated_wildcard_tags = negated_wildcard_tags.flat_map { |tag| Tag.wildcard_matches(tag.name).limit(MAX_WILDCARD_TAGS).pluck(:name) }
matched_optional_wildcard_tags = optional_wildcard_tags.flat_map { |tag| Tag.wildcard_matches(tag.name).limit(MAX_WILDCARD_TAGS).pluck(:name) }
matched_required_wildcard_tags = required_wildcard_tags.flat_map { |tag| Tag.wildcard_matches(tag.name).limit(MAX_WILDCARD_TAGS).pluck(:name) }
negated_tags += (matched_negated_wildcard_tags.empty? && !negated_wildcard_tags.empty?) ? negated_wildcard_tags.map(&:name) : matched_negated_wildcard_tags
optional_tags += (matched_optional_wildcard_tags.empty? && !optional_wildcard_tags.empty?) ? optional_wildcard_tags.map(&:name) : matched_optional_wildcard_tags
optional_tags += (matched_required_wildcard_tags.empty? && !required_wildcard_tags.empty?) ? required_wildcard_tags.map(&:name) : matched_required_wildcard_tags
relation = relation.where_array_includes_all("string_to_array(posts.tag_string, ' ')", required_tags) if required_tags.present?
relation = relation.where_array_includes_any("string_to_array(posts.tag_string, ' ')", optional_tags) if optional_tags.present?
relation = relation.where_array_includes_none("string_to_array(posts.tag_string, ' ')", negated_tags) if negated_tags.present?
relation
end
def metatags_match(metatags, relation)
metatags.each do |metatag|
metatag_name = metatags_without_ord[metatag.name] if metatag.negated && metatags_without_ord.key?(metatag.name)
clause = metatag_matches(metatag_name || metatag.name, metatag.value, quoted: metatag.quoted)
clause = clause.negate_relation if metatag.negated
relation = relation.and_relation(clause)
end
relation
end
def metatags_without_ord
{
"ordfav" => "fav",
"ordfavgroup" => "favgroup",
"ordpool" => "pool",
}
end
def metatag_matches(name, value, relation = Post.all, quoted: false)
case name
when "id"
@@ -256,53 +213,77 @@ class PostQueryBuilder
end
end
def tables_for_query
metatag_names = metatags.map(&:name)
metatag_names << find_metatag(:order).remove(/_(asc|desc)\z/i) if has_metatag?(:order)
def tables_for_query(post_query)
metatag_names = post_query.metatags.map(&:name)
metatag_names << post_query.find_metatag(:order).remove(/_(asc|desc)\z/i) if post_query.has_metatag?(:order)
tables = metatag_names.map { |metatag| table_for_metatag(metatag.to_s) }
tables.compact.uniq
end
def add_joins(relation)
tables = tables_for_query
def add_joins(post_query, relation)
tables = tables_for_query(post_query)
relation = relation.with_stats(tables)
relation
end
def build(includes: nil)
validate!
relation = Post.includes(includes)
relation = add_joins(relation)
relation = metatags_match(metatags, relation)
relation = tags_match(tags, relation)
# Generate a SQL relation from a PostQuery.
def build_relation(post_query, relation = Post.all)
post_query.ast.visit do |node, *children|
case node.type
in :all
relation.all
in :none
relation.none
in :tag
relation.tags_include(node.name)
in :metatag
metatag_matches(node.name, node.value, relation, quoted: node.quoted?)
in :wildcard
tag_names = Tag.wildcard_matches(node.name).limit(MAX_WILDCARD_TAGS).pluck(:name)
relation.where_array_includes_any("string_to_array(posts.tag_string, ' ')", tag_names)
in :not
children.first.negate_relation
in :and
children.reduce(&:and)
in :or
children.reduce(&:or)
end
end
end
def posts(post_query, includes: nil)
relation = Post.all
relation = add_joins(post_query, relation)
relation = build_relation(post_query, relation)
# HACK: if we're using a date: or age: metatag, default to ordering by
# created_at instead of id so that the query will use the created_at index.
if has_metatag?(:date, :age) && find_metatag(:order).in?(["id", "id_asc"])
if post_query.has_metatag?(:date, :age) && post_query.find_metatag(:order).in?(["id", "id_asc"])
relation = search_order(relation, "created_at_asc")
elsif has_metatag?(:date, :age) && find_metatag(:order).in?(["id_desc", nil])
elsif post_query.has_metatag?(:date, :age) && post_query.find_metatag(:order).in?(["id_desc", nil])
relation = search_order(relation, "created_at_desc")
elsif find_metatag(:order) == "custom"
relation = search_order_custom(relation, select_metatags(:id).map(&:value))
elsif has_metatag?(:ordfav)
elsif post_query.find_metatag(:order) == "custom"
relation = search_order_custom(relation, post_query.select_metatags(:id).map(&:value))
elsif post_query.has_metatag?(:ordfav)
# no-op
else
relation = search_order(relation, find_metatag(:order))
relation = search_order(relation, post_query.find_metatag(:order))
end
if count = find_metatag(:random)
if count = post_query.find_metatag(:random)
count = Integer(count).clamp(0, PostSets::Post::MAX_PER_PAGE)
relation = relation.random(count)
end
relation = relation.includes(includes)
relation
end
def paginated_posts(page, small_search_threshold: Danbooru.config.small_search_threshold.to_i, includes: nil, **options)
posts = build(includes: includes).paginate(page, **options)
posts = optimize_search(posts, small_search_threshold)
def paginated_posts(post_query, page, count:, small_search_threshold: Danbooru.config.small_search_threshold.to_i, includes: nil, **options)
posts = posts(post_query, includes: includes).paginate(page, count: count, **options)
posts = optimize_search(posts, count, small_search_threshold)
posts.load
end
@@ -315,7 +296,7 @@ class PostQueryBuilder
# tags, Postgres sometimes assumes tags in the 10k-50k range are large enough
# for a post id index scan, when in reality a tag index bitmap scan would be
# better.
def optimize_search(relation, small_search_threshold)
def optimize_search(relation, post_count, small_search_threshold)
return relation unless small_search_threshold.present?
order_values = relation.order_values.map { |order| order.try(:to_sql) || order.to_s }.map(&:downcase)
@@ -745,131 +726,6 @@ class PostQueryBuilder
end
end
concerning :CountMethods do
def post_count
@post_count ||= fast_count
end
# Return an estimate of the number of posts returned by the search. By
# default, we try to use an estimated or cached count before doing an exact
# count.
#
# @param timeout [Integer] the database timeout
# @param estimate_count [Boolean] if true, estimate the count with inexact methods
# @param skip_cache [Boolean] if true, don't use the cached count
# @return [Integer, nil] the number of posts, or nil on timeout
def fast_count(timeout: 1_000, estimate_count: true, skip_cache: false)
count = nil
count = estimated_count if estimate_count
count = cached_count(timeout) if count.nil? && !skip_cache
count = exact_count(timeout) if count.nil? && skip_cache
count
end
def estimated_count
if is_empty_search?
estimated_row_count
elsif is_simple_tag?
Tag.find_by(name: tags.first.name).try(:post_count)
elsif is_metatag?(:rating)
estimated_row_count
elsif is_metatag?(:pool) || is_metatag?(:ordpool)
name = find_metatag(:pool, :ordpool)
Pool.find_by_name(name)&.post_count || 0
elsif is_metatag?(:fav) || is_metatag?(:ordfav)
name = find_metatag(:fav, :ordfav)
user = User.find_by_name(name)
if user.nil?
0
elsif Pundit.policy!(current_user, user).can_see_favorites?
user.favorite_count
else
nil
end
end
end
# Estimate the count by parsing the Postgres EXPLAIN output.
def estimated_row_count
ExplainParser.new(build).row_count
end
def cached_count(timeout, duration: 5.minutes)
Cache.get(count_cache_key, duration) do
exact_count(timeout)
end
end
def exact_count(timeout)
Post.with_timeout(timeout) do
build.count
end
end
def count_cache_key
if is_user_dependent_search?
"pfc[#{current_user.id.to_i}]:#{to_s}"
else
"pfc:#{to_s}"
end
end
# @return [Boolean] true if the search depends on the current user because
# of permissions or privacy settings.
def is_user_dependent_search?
metatags.any? do |metatag|
metatag.name.in?(%w[upvoter upvote downvoter downvote search flagger fav ordfav favgroup ordfavgroup]) ||
metatag.name == "status" && metatag.value == "unmoderated" ||
metatag.name == "disapproved" && !metatag.value.downcase.in?(PostDisapproval::REASONS)
end
end
end
concerning :NormalizationMethods do
# Normalize a search by sorting tags and applying aliases.
# @return [PostQueryBuilder] the normalized query
def normalized_query(implicit: true, sort: true)
post_query = dup
post_query.terms.concat(implicit_metatags) if implicit
post_query.normalize_aliases!
post_query.normalize_order! if sort
post_query
end
# Apply aliases to all tags in the query.
def normalize_aliases!
tag_names = tags.map(&:name)
tag_aliases = tag_names.zip(TagAlias.to_aliased(tag_names)).to_h
terms.map! do |term|
term.name = tag_aliases[term.name] if term.type == :tag
term
end
end
# Normalize the tag order.
def normalize_order!
terms.sort_by!(&:to_s).uniq!
end
# Implicit metatags are metatags added by the user's account settings.
# rating:s is implicit under safe mode. -status:deleted is implicit when the
# "hide deleted posts" setting is on.
def implicit_metatags
metatags = []
metatags << OpenStruct.new(type: :metatag, name: "rating", value: "s") if safe_mode?
metatags << OpenStruct.new(type: :metatag, name: "status", value: "deleted", negated: true) if hide_deleted?
metatags
end
# XXX unify with PostSets::Post#show_deleted?
def hide_deleted?
has_status_metatag = select_metatags(:status).any? { |metatag| metatag.value.downcase.in?(%w[deleted active any all unmoderated modqueue appealed]) }
hide_deleted_posts? && !has_status_metatag
end
end
concerning :UtilityMethods do
def to_s
split_query.join(" ")
@@ -879,78 +735,7 @@ class PostQueryBuilder
def terms
@terms ||= scan_query
end
# The list of regular tags in the search.
def tags
terms.select { |term| term.type == :tag }
end
# The list of metatags in the search.
def metatags
terms.select { |term| term.type == :metatag }
end
# Find all metatags with the given names.
def select_metatags(*names)
metatags.select { |term| term.name.in?(names.map(&:to_s)) }
end
# Find the first metatag with any of the given names.
def find_metatag(*metatags)
select_metatags(*metatags).first.try(:value)
end
# @return [Boolean] true if the search has a metatag with any of the given names.
def has_metatag?(*metatag_names)
metatags.any? { |term| term.name.in?(metatag_names.map(&:to_s).map(&:downcase)) }
end
# @return [Boolean] true if the search has a single regular tag, with any number of metatags.
def has_single_tag?
tags.size == 1 && !tags.first.wildcard
end
# @return [Boolean] true if the search is a single metatag search for the given metatag.
def is_metatag?(name, value = nil)
if value.nil?
is_single_term? && has_metatag?(name)
else
is_single_term? && find_metatag(name) == value.to_s
end
end
# @return [Boolean] true if the search doesn't have any tags or metatags.
def is_empty_search?
terms.size == 0
end
# @return [Boolean] true if the search consists of a single tag or metatag.
def is_single_term?
terms.size == 1
end
# @return [Boolean] true if the search has a single tag, possibly with wildcards or negation.
def is_single_tag?
is_single_term? && tags.size == 1
end
# @return [Boolean] true if the search has a single tag, without any wildcards or operators.
def is_simple_tag?
tag = tags.first
is_single_tag? && !tag.negated && !tag.optional && !tag.wildcard
end
# @return [Boolean] true if the search has a single tag with a wildcard
def is_wildcard_search?
is_single_tag? && tags.first.wildcard
end
# @return [Tag, nil] the tag if the search is for a simple tag, otherwise nil
def simple_tag
return nil if !is_simple_tag?
Tag.find_by_name(tags.first.name)
end
end
memoize :split_query, :post_count
memoize :split_query
end

View File

@@ -8,14 +8,16 @@ module PostSets
class Post
MAX_PER_PAGE = 200
MAX_SIDEBAR_TAGS = 25
MAX_WILDCARD_TAGS = PostQueryBuilder::MAX_WILDCARD_TAGS
attr_reader :page, :format, :tag_string, :query, :normalized_query, :show_votes
delegate :post_count, to: :normalized_query
attr_reader :page, :format, :tag_string, :query, :post_query, :normalized_query, :show_votes
delegate :tag, to: :post_query
alias_method :show_votes?, :show_votes
def initialize(tags, page = 1, per_page = nil, user: CurrentUser.user, format: "html", show_votes: false)
@query = PostQueryBuilder.new(tags, user, tag_limit: user.tag_query_limit, safe_mode: CurrentUser.safe_mode?, hide_deleted_posts: user.hide_deleted_posts?)
@normalized_query = query.normalized_query
@post_query = PostQuery.normalize(tags, current_user: user, tag_limit: user.tag_query_limit, safe_mode: CurrentUser.safe_mode?, hide_deleted_posts: user.hide_deleted_posts?)
@normalized_query = post_query.with_implicit_metatags
@tag_string = tags
@page = page
@per_page = per_page
@@ -32,13 +34,8 @@ module PostSets
end
def wiki_page
return nil unless normalized_query.has_single_tag?
@wiki_page ||= WikiPage.undeleted.find_by(title: normalized_query.tags.first.name)
end
def tag
return nil unless normalized_query.has_single_tag?
@tag ||= Tag.find_by(name: normalized_query.tags.first.name)
return nil unless post_query.has_single_tag?
@wiki_page ||= WikiPage.undeleted.find_by(title: post_query.tag_name)
end
def artist
@@ -48,7 +45,7 @@ module PostSets
end
def pool
pool_names = normalized_query.select_metatags(:pool, :ordpool).map(&:value)
pool_names = post_query.select_metatags(:pool, :ordpool).map(&:value)
name = pool_names.first
return nil unless pool_names.size == 1
@@ -56,7 +53,7 @@ module PostSets
end
def favgroup
favgroup_names = normalized_query.select_metatags(:favgroup, :ordfavgroup).map(&:value)
favgroup_names = post_query.select_metatags(:favgroup, :ordfavgroup).map(&:value)
name = favgroup_names.first
return nil unless favgroup_names.size == 1
@@ -84,7 +81,7 @@ module PostSets
end
def per_page
(@per_page || query.find_metatag(:limit) || CurrentUser.user.per_page).to_i.clamp(0, max_per_page)
(@per_page || post_query.find_metatag(:limit) || CurrentUser.user.per_page).to_i.clamp(0, max_per_page)
end
def max_per_page
@@ -95,11 +92,15 @@ module PostSets
@posts ||= normalized_query.paginated_posts(page, includes: includes, count: post_count, search_count: !post_count.nil?, limit: per_page, max_limit: max_per_page).load
end
def post_count
normalized_query.post_count
end
def hide_from_crawler?
return true if current_page > 50
return true if show_votes?
return true if artist.present? && artist.is_banned?
return false if query.is_empty_search? || query.is_simple_tag? || query.is_metatag?(:order, :rank)
return false if post_query.is_empty_search? || post_query.is_simple_tag? || post_query.is_metatag?(:order, :rank)
true
end
@@ -118,7 +119,7 @@ module PostSets
end
def show_deleted?
query.select_metatags("status").any? do |metatag|
post_query.select_metatags("status").any? do |metatag|
metatag.value.downcase.in?(%w[all any active unmoderated modqueue deleted appealed])
end
end
@@ -133,13 +134,13 @@ module PostSets
concerning :TagListMethods do
def related_tags
if query.is_wildcard_search?
if post_query.wildcards.one? && post_query.tags.none?
wildcard_tags
elsif query.is_metatag?(:search)
elsif post_query.is_metatag?(:search)
saved_search_tags
elsif query.is_empty_search? || query.is_metatag?(:order, :rank)
elsif post_query.is_empty_search? || post_query.is_metatag?(:order, :rank)
popular_tags.presence || frequent_tags
elsif query.is_single_term?
elsif post_query.is_single_term?
similar_tags.presence || frequent_tags
else
frequent_tags
@@ -151,7 +152,7 @@ module PostSets
end
def similar_tags
RelatedTagCalculator.cached_similar_tags_for_search(query.normalized_query(implicit: false), MAX_SIDEBAR_TAGS)
RelatedTagCalculator.cached_similar_tags_for_search(post_query, MAX_SIDEBAR_TAGS)
end
def frequent_tags
@@ -161,7 +162,7 @@ module PostSets
# Wildcard searches can show up to 100 tags in the sidebar, not 25,
# because that's how many tags the search itself will use.
def wildcard_tags
Tag.wildcard_matches(tag_string).limit(PostQueryBuilder::MAX_WILDCARD_TAGS).pluck(:name)
Tag.wildcard_matches(post_query.wildcards.first).limit(MAX_WILDCARD_TAGS).pluck(:name)
end
def saved_search_tags

View File

@@ -19,7 +19,7 @@
# @see https://en.wikipedia.org/wiki/Cosine_similarity
module RelatedTagCalculator
# Return the set of tags similar to the given search.
# @param post_query [PostQueryBuilder] the search to find similar tags for.
# @param post_query [PostQuery] the search to find similar tags for.
# @param search_sample_size [Integer] the number of posts to sample from the search
# @param tag_sample_size [Integer] the number of tags to calculate similarity for
# @param category [Integer] an optional tag category, to restrict the tags to a given category.
@@ -41,12 +41,12 @@ module RelatedTagCalculator
end
# Return the set of tags most frequently appearing in the given search.
# @param post_query [PostQueryBuilder] the search to find frequent tags for.
# @param post_query [PostQuery] the search to find frequent tags for.
# @param search_sample_size [Integer] the number of posts to sample from the search
# @param category [Integer] an optional tag category, to restrict the tags to a given category.
# @return [Array<Tag>] the set of frequent tags, ordered by most frequent
def self.frequent_tags_for_search(post_query, search_sample_size: 1000, category: nil)
sample_posts = post_query.build.reorder(:md5).limit(search_sample_size)
sample_posts = post_query.posts.reorder(:md5).limit(search_sample_size)
frequent_tags_for_post_relation(sample_posts, category: category)
end
@@ -74,7 +74,7 @@ module RelatedTagCalculator
end
# Return a cached set of tags similar to the given search.
# @param post_query [PostQueryBuilder] the search to find similar tags for.
# @param post_query [PostQuery] the search to find similar tags for.
# @param max_tags [Integer] the maximum number of tags to return
# @param search_timeout [Integer] the database timeout for the search
# @param cache_timeout [Integer] the length of time to cache the results
@@ -90,7 +90,7 @@ module RelatedTagCalculator
# Return a cache key for the given search. Some searches are cached on a
# per-user basis because they depend on the current user (for example,
# searches for private favorites, favgroups, or saved searches).
# @param post_query [PostQueryBuilder] the post search
# @param post_query [PostQuery] the post search
# @return [String] the cache key
def self.cache_key(post_query)
if post_query.is_user_dependent_search?

View File

@@ -10,7 +10,7 @@ class RelatedTagQuery
def initialize(query:, user: User.anonymous, category: nil, type: nil, limit: nil)
@user = user
@post_query = PostQueryBuilder.new(query, user).normalized_query
@post_query = PostQuery.normalize(query, current_user: user) # XXX This query does not include implicit metatags (rating:s, -status:deleted)
@query = @post_query.to_s
@category = category
@type = type
@@ -75,7 +75,7 @@ class RelatedTagQuery
end
def other_wiki_pages
tag = post_query.simple_tag
tag = post_query.tag
return [] if tag.nil?
if tag.copyright?

View File

@@ -1384,8 +1384,8 @@ class Post < ApplicationRecord
# @param hide_deleted_posts [Boolean] if true, automatically add -status:deleted to the search
# @return [ActiveRecord::Relation<Post>] the set of resulting posts
def user_tag_match(query, user = CurrentUser.user, tag_limit: user.tag_query_limit, safe_mode: CurrentUser.safe_mode?, hide_deleted_posts: user.hide_deleted_posts?)
post_query = PostQueryBuilder.new(query, user, tag_limit: tag_limit, safe_mode: safe_mode, hide_deleted_posts: hide_deleted_posts)
post_query.normalized_query.build
post_query = PostQuery.normalize(query, current_user: user, tag_limit: tag_limit, safe_mode: safe_mode, hide_deleted_posts: hide_deleted_posts)
post_query.with_implicit_metatags.posts
end
def search(params)

View File

@@ -46,9 +46,9 @@
<li><%= link_to "Deleted", posts_path(tags: "#{params[:tags]} status:deleted"), rel: "nofollow" %></li>
<li><%= link_to "Random", random_posts_path(tags: params[:tags]), id: "random-post", "data-shortcut": "r", rel: "nofollow" %></li>
<% if @post_set.normalized_query.has_single_tag? %>
<li><%= link_to "History", post_versions_path(search: { changed_tags: @post_set.normalized_query.tags.first.name }), rel: "nofollow" %></li>
<li><%= link_to "Discussions", forum_posts_path(search: { linked_to: @post_set.normalized_query.tags.first.name }), rel: "nofollow" %></li>
<% if @post_set.post_query.has_single_tag? %>
<li><%= link_to "History", post_versions_path(search: { changed_tags: @post_set.post_query.tag_name }), rel: "nofollow" %></li>
<li><%= link_to "Discussions", forum_posts_path(search: { linked_to: @post_set.post_query.tag_name }), rel: "nofollow" %></li>
<% end %>
<li><%= link_to "Count", posts_counts_path(tags: params[:tags]), rel: "nofollow" %></li>
</ul>
@@ -222,7 +222,7 @@
<% end %>
<% content_for(:html_header) do %>
<% if @post_set.query.is_empty_search? %>
<% if @post_set.post_query.is_empty_search? %>
<% page_title("#{Danbooru.config.app_name}: Anime Image Board", suffix: nil) %>
<% meta_description site_description %>

View File

@@ -6,14 +6,14 @@ class PostQueryBuilderTest < ActiveSupport::TestCase
end
def assert_fast_count(count, query, query_options = {}, fast_count_options = {})
assert_equal(count, PostQueryBuilder.new(query, **query_options).normalized_query.fast_count(**fast_count_options))
assert_equal(count, PostQuery.normalize(query, **query_options).with_implicit_metatags.fast_count(**fast_count_options))
end
def assert_parse_equals(expected, query)
assert_equal(expected, PostQueryBuilder.new(query).split_query)
# parsing, serializing, then parsing again should produce the same result.
assert_equal(PostQueryBuilder.new(query).to_s, PostQueryBuilder.new(PostQueryBuilder.new(query).to_s).to_s)
assert_equal(PostQuery.new(query).to_s, PostQuery.new(PostQuery.new(query).to_s).to_s)
end
setup do
@@ -1322,75 +1322,6 @@ class PostQueryBuilderTest < ActiveSupport::TestCase
assert_equal(%w(aaa bbb), PostQueryBuilder.new("aaa bbb").split_query)
assert_equal(%w(favgroup:yondemasu_yo,_azazel-san. pool:ichigo_100%), PostQueryBuilder.new("favgroup:yondemasu_yo,_azazel-san. pool:ichigo_100%").split_query)
end
should "parse single tags correctly" do
assert_equal(true, PostQueryBuilder.new("foo").is_single_tag?)
assert_equal(true, PostQueryBuilder.new("-foo").is_single_tag?)
assert_equal(true, PostQueryBuilder.new("~foo").is_single_tag?)
assert_equal(true, PostQueryBuilder.new("foo*").is_single_tag?)
assert_equal(false, PostQueryBuilder.new("fav:1234").is_single_tag?)
assert_equal(false, PostQueryBuilder.new("pool:1234").is_single_tag?)
assert_equal(false, PostQueryBuilder.new('source:"foo bar baz"').is_single_tag?)
assert_equal(false, PostQueryBuilder.new("foo bar").is_single_tag?)
end
should "parse simple tags correctly" do
assert_equal(true, PostQueryBuilder.new("foo").is_simple_tag?)
assert_equal(false, PostQueryBuilder.new("-foo").is_simple_tag?)
assert_equal(false, PostQueryBuilder.new("~foo").is_simple_tag?)
assert_equal(false, PostQueryBuilder.new("foo*").is_simple_tag?)
assert_equal(false, PostQueryBuilder.new("fav:1234").is_simple_tag?)
assert_equal(false, PostQueryBuilder.new("FAV:1234").is_simple_tag?)
assert_equal(false, PostQueryBuilder.new("pool:1234").is_simple_tag?)
assert_equal(false, PostQueryBuilder.new('source:"foo bar baz"').is_simple_tag?)
assert_equal(false, PostQueryBuilder.new("foo bar").is_simple_tag?)
end
should "parse quoted metatags correctly" do
assert_parse_equals(%w[status:"active" source:"https"], %q(status:'active' source:'https'))
assert_parse_equals(%w[source:"https" status:"active"], %q(source:'https' status:'active'))
assert_parse_equals(%w[status:"active" source:"https"], %q(status:"active" source:'https'))
assert_parse_equals(%w[status:"active" source:"https"], %q(status:'active' source:"https"))
assert_parse_equals(%w[status:"active" source:https], %q(status:'active' source:https))
assert_parse_equals(%w[status:active source:"https"], %q(status:active source:'https'))
assert_parse_equals(%w[limit:"5" status:"active" source:"x"], %q(limit:"5" status:"active" source:"x"))
assert_parse_equals(%w[source:"" limit:"1" status:"deleted"], %q(source:"" limit:'1' status:'deleted'))
assert_parse_equals(['source:"bar baz"', 'don\'t_say_"lazy"'], %q(source:"bar baz" don't_say_"lazy"))
assert_parse_equals(['source:"bar baz"', 'don\'t_say_"lazy"'], %q(source:"bar baz" don't_say_"lazy"))
assert_parse_equals(['source:"bar baz"', 'don\'t_say_"lazy"'], %q(source:'bar baz' don't_say_"lazy"))
assert_parse_equals([%q(source:"foo")], %q(source:"\f\o\o"))
assert_parse_equals([%q(source:"foo")], %q(source:'\f\o\o'))
assert_parse_equals([%q(source:foo\bar)], %q(source:foo\bar))
assert_parse_equals([%q(source:"foo)], %q(source:"foo))
assert_parse_equals([%q(source:'foo)], %q(source:'foo))
assert_parse_equals([%q(source:"foo bar")], %q(source:foo\ bar))
assert_parse_equals([%q(source:"\"foo bar\\\\")], %q(source:"foo\ bar\\))
assert_parse_equals(['source:"don\'t_say_\\"lazy\\""', 'don\'t_say_"lazy"'], %q(source:"don't_say_\"lazy\"" don't_say_"lazy"))
assert_parse_equals(['source:"don\'t_say_\\"lazy\\""', 'don\'t_say_"lazy"'], %q(source:'don\'t_say_"lazy"' don't_say_"lazy"))
end
end
context "The normalized_query method" do
should "work" do
create(:tag_alias, antecedent_name: "gray", consequent_name: "grey")
assert_equal("foo", PostQueryBuilder.new("foo").normalized_query.to_s)
assert_equal("foo", PostQueryBuilder.new(" foo ").normalized_query.to_s)
assert_equal("foo", PostQueryBuilder.new("FOO").normalized_query.to_s)
assert_equal("foo", PostQueryBuilder.new("foo foo").normalized_query.to_s)
assert_equal("grey", PostQueryBuilder.new("gray").normalized_query.to_s)
assert_equal("aaa bbb", PostQueryBuilder.new("bbb aaa").normalized_query.to_s)
assert_equal("-aaa bbb", PostQueryBuilder.new("bbb -aaa").normalized_query.to_s)
assert_equal("~aaa ~bbb", PostQueryBuilder.new("~bbb ~aaa").normalized_query.to_s)
assert_equal("commentary:true bbb", PostQueryBuilder.new("bbb commentary:true").normalized_query.to_s)
assert_equal('commentary:"true" bbb', PostQueryBuilder.new("bbb commentary:'true'").normalized_query.to_s)
assert_equal('-commentary:true bbb', PostQueryBuilder.new("bbb -commentary:true").normalized_query.to_s)
assert_equal('-commentary:"true" bbb', PostQueryBuilder.new("bbb -commentary:'true'").normalized_query.to_s)
end
end
context "#fast_count" do
@@ -1452,7 +1383,7 @@ class PostQueryBuilderTest < ActiveSupport::TestCase
context "for a multi-tag search" do
should "return the cached count, if it exists" do
Cache.put("pfc:score:42 aaa", 100)
Cache.put("pfc:aaa score:42", 100)
assert_fast_count(100, "aaa score:42")
end
@@ -1470,7 +1401,7 @@ class PostQueryBuilderTest < ActiveSupport::TestCase
context "a blank search" do
should "should execute a search" do
assert_fast_count(1, "", {}, { estimate_count: false })
assert_nothing_raised { PostQueryBuilder.new("").normalized_query.fast_count(estimate_count: true) }
assert_nothing_raised { PostQuery.new("").fast_count(estimate_count: true) }
end
should "return 0 for a nonexisting tag" do
@@ -1480,12 +1411,12 @@ class PostQueryBuilderTest < ActiveSupport::TestCase
context "in safe mode" do
should "work for a blank search" do
assert_fast_count(0, "", { safe_mode: true }, { estimate_count: false })
assert_nothing_raised { PostQueryBuilder.new("", safe_mode: true).normalized_query.fast_count(estimate_count: true) }
assert_nothing_raised { PostQuery.new("", safe_mode: true).fast_count(estimate_count: true) }
end
should "work for a nil search" do
assert_fast_count(0, nil, { safe_mode: true }, { estimate_count: false })
assert_nothing_raised { PostQueryBuilder.new("", safe_mode: true).normalized_query.fast_count(estimate_count: true) }
assert_nothing_raised { PostQuery.new("", safe_mode: true).fast_count(estimate_count: true) }
end
should "not fail for a two tag search by a member" do
@@ -1502,8 +1433,8 @@ class PostQueryBuilderTest < ActiveSupport::TestCase
@user = create(:user, enable_private_favorites: true)
@post = as(@user) { create(:post, tag_string: "fav:#{@user.name}") }
assert_equal(1, PostQueryBuilder.new("fav:#{@user.name}", @user).fast_count)
assert_equal(0, PostQueryBuilder.new("fav:#{@user.name}").fast_count)
assert_equal(1, PostQuery.new("fav:#{@user.name}", current_user: @user).fast_count)
assert_equal(0, PostQuery.new("fav:#{@user.name}").fast_count)
end
end
end

View File

@@ -2,12 +2,12 @@ require 'test_helper'
class RelatedTagCalculatorTest < ActiveSupport::TestCase
def frequent_tags_for_search(tag_search, user = CurrentUser.user, **options)
post_query = PostQueryBuilder.new(tag_search, user)
post_query = PostQuery.normalize(tag_search, current_user: user)
RelatedTagCalculator.frequent_tags_for_search(post_query, **options).pluck(:name)
end
def similar_tags_for_search(tag_search, user = CurrentUser.user, **options)
post_query = PostQueryBuilder.new(tag_search, user).normalized_query
post_query = PostQuery.normalize(tag_search, current_user: user)
RelatedTagCalculator.similar_tags_for_search(post_query, **options).pluck(:name)
end