From af183467b64ece45c598ffcb823cb657a94053e6 Mon Sep 17 00:00:00 2001 From: evazion Date: Mon, 4 Apr 2022 16:52:11 -0500 Subject: [PATCH] post queries: switch to new post search engine. Switch to the post search engine using the new PostQuery parser. The new engine fully supports AND, OR, and NOT operators and grouping expressions with parentheses. Highlights: New OR operator: * `skirt or dress` (same as `~skirt ~dress`) Tags can be grouped with parentheses: * `1girl (skirt or dress)` * `(blonde_hair blue_eyes) or (red_hair green_eyes)` * `~(blonde_hair blue_eyes) ~(red_hair green_eyes)` (same as above) * `(pantyhose or thighhighs) (black_legwear or brown_legwear)` * `(~pantyhose ~thighhighs) (~black_legwear ~brown_legwear)` (same as above) Metatags can be OR'd together: * `user:evazion or fav:evazion` * `~user:evazion ~fav:evazion` Wildcard tags can combined with either AND or OR: * `black_* white_*` (find posts with at least one black_* tag AND one white_* tag) * `black_* or white_*` (find posts with at least one black_* tag OR one white_* tag) * `~black_* ~white_*` (same as above) See 4c7cfc73 for more syntax examples. Fixes #4949: And+or search? Fixes #5056: Wildcard searches return unexpected results when combined with OR searches --- app/logical/bulk_update_request_processor.rb | 4 +- app/logical/concerns/searchable.rb | 4 +- app/logical/post_query.rb | 139 ++++++++- app/logical/post_query_builder.rb | 309 +++---------------- app/logical/post_sets/post.rb | 43 +-- app/logical/related_tag_calculator.rb | 10 +- app/logical/related_tag_query.rb | 4 +- app/models/post.rb | 4 +- app/views/posts/index.html.erb | 8 +- test/unit/post_query_builder_test.rb | 85 +---- test/unit/related_tag_calculator_test.rb | 4 +- 11 files changed, 227 insertions(+), 387 deletions(-) diff --git a/app/logical/bulk_update_request_processor.rb b/app/logical/bulk_update_request_processor.rb index aec15aa61..165e4084e 100644 --- a/app/logical/bulk_update_request_processor.rb +++ b/app/logical/bulk_update_request_processor.rb @@ -273,7 +273,7 @@ class BulkUpdateRequestProcessor "mass update {{#{args[0]}}} -> {{#{args[1]}}}" when :nuke - if PostQuery.new(args[0]).is_single_tag? + if PostQuery.normalize(args[0]).is_simple_tag? "nuke [[#{args[0]}]]" else "nuke {{#{args[0]}}}" @@ -292,7 +292,7 @@ class BulkUpdateRequestProcessor def self.nuke(tag_name) # Reject existing implications from any other tag to the one we're nuking # otherwise the tag won't be removed from posts that have those other tags - if PostQuery.new(tag_name).is_single_tag? + if PostQuery.normalize(tag_name).is_simple_tag? TagImplication.active.where(consequent_name: tag_name).each { |ti| ti.reject!(User.system) } TagImplication.active.where(antecedent_name: tag_name).each { |ti| ti.reject!(User.system) } end diff --git a/app/logical/concerns/searchable.rb b/app/logical/concerns/searchable.rb index f90a8e604..b91fe8643 100644 --- a/app/logical/concerns/searchable.rb +++ b/app/logical/concerns/searchable.rb @@ -13,7 +13,9 @@ module Searchable end def negate_relation - unscoped.where(all.where_clause.invert.ast) + relation = unscoped + relation = relation.from(all.from_clause.value) if all.from_clause.value.present? + relation.where(all.where_clause.invert.ast) end # XXX hacky method to AND two relations together. diff --git a/app/logical/post_query.rb b/app/logical/post_query.rb index 5d8a21594..dce0f5b18 100644 --- a/app/logical/post_query.rb +++ b/app/logical/post_query.rb @@ -3,11 +3,18 @@ class PostQuery extend Memoist - private attr_reader :current_user, :tag_limit, :safe_mode, :hide_deleted_posts, :builder + attr_reader :current_user + private attr_reader :tag_limit, :safe_mode, :hide_deleted_posts, :builder - delegate :tag?, :metatag?, :wildcard?, :metatags, :wildcards, :tag_names, :metatags, to: :ast + delegate :tag?, :metatag?, :wildcard?, :metatags, :wildcards, :tag_names, :metatags, :to_infix, to: :ast alias_method :safe_mode?, :safe_mode alias_method :hide_deleted_posts?, :hide_deleted_posts + alias_method :to_s, :to_infix + + # Return a new PostQuery with aliases replaced. + def self.normalize(...) + PostQuery.new(...).replace_aliases.trim + end def initialize(search_or_ast, current_user: User.anonymous, tag_limit: nil, safe_mode: false, hide_deleted_posts: false) if search_or_ast.is_a?(AST) @@ -39,10 +46,25 @@ class PostQuery @ast ||= Parser.parse(search) end - def fast_count(...) - builder.normalized_query.fast_count(...) + def posts + builder.posts(to_cnf) end + def paginated_posts(...) + builder.paginated_posts(to_cnf, ...) + end + + # The name of the only tag in the query, if the query contains a single tag. The tag may not exist. The query may contain other metatags or wildcards, and the tag may be negated. + def tag_name + tag_names.first if has_single_tag? + end + + # The only tag in the query, if the query contains a single tag. The query may contain other metatags or wildcards, and the tag may be negated. + def tag + tags.first if has_single_tag? + end + + # The list of all tags contained in the query. def tags Tag.where(name: tag_names) end @@ -57,10 +79,39 @@ class PostQuery ast.none? end - def is_single_tag? + # True if the search is a single metatag search for the given metatag. + def is_metatag?(name, value = nil) + if value.nil? + is_single_term? && has_metatag?(name) + else + is_single_term? && find_metatag(name) == value.to_s + end + end + + # True if the search consists of a single tag, metatag, or wildcard. + def is_single_term? + tag_names.size + metatags.size + wildcards.size == 1 + end + + # True if this search consists only of a single non-negated tag, with no other metatags or operators. + def is_simple_tag? ast.tag? end + # True if the search contains a single tag. It may have other metatags or wildcards, and the tag may be negated. + def has_single_tag? + tag_names.one? + end + + # True if the search depends on the current user because of permissions or privacy settings. + def is_user_dependent_search? + metatags.any? do |metatag| + metatag.name.in?(%w[upvoter upvote downvoter downvote search flagger fav ordfav favgroup ordfavgroup]) || + metatag.name == "status" && metatag.value == "unmoderated" || + metatag.name == "disapproved" && !metatag.value.downcase.in?(PostDisapproval::REASONS) + end + end + def select_metatags(*names) metatags.select { |metatag| metatag.name.in?(names.map(&:to_s).map(&:downcase)) } end @@ -73,9 +124,9 @@ class PostQuery select_metatags(*names).first&.value end - # Return a new PostQuery with aliases replaced, implicit metatags added, and the query converted to conjunctive normal form. - def normalize - replace_aliases.with_implicit_metatags.to_cnf + # Return a new PostQuery with unnecessary AND and OR clauses eliminated. + def trim + build(ast.trim) end # Return a new PostQuery with aliases replaced. @@ -115,5 +166,75 @@ class PostQuery hide_deleted_posts? && !has_status_metatag end - memoize :tags, :normalize, :replace_aliases, :with_implicit_metatags, :to_cnf, :aliases, :implicit_metatags, :hide_deleted? + concerning :CountMethods do + def post_count + @post_count ||= fast_count + end + + # Return an estimate of the number of posts returned by the search. By default, we try to use an + # estimated or cached count before doing an exact count. + # + # @param timeout [Integer] The database timeout in milliseconds + # @param estimate_count [Boolean] If true, estimate the count with inexact methods. + # @param skip_cache [Boolean] If true, don't use the cached count. + # @return [Integer, nil] The number of posts, or nil on timeout. + def fast_count(timeout: 1_000, estimate_count: true, skip_cache: false) + count = nil + count = estimated_count if estimate_count + count = cached_count(timeout) if count.nil? && !skip_cache + count = exact_count(timeout) if count.nil? && skip_cache + count + end + + def estimated_count + if is_empty_search? + estimated_row_count + elsif is_simple_tag? + tag.try(:post_count) + elsif is_metatag?(:rating) + estimated_row_count + elsif is_metatag?(:pool) || is_metatag?(:ordpool) + name = find_metatag(:pool, :ordpool) + Pool.find_by_name(name)&.post_count || 0 + elsif is_metatag?(:fav) || is_metatag?(:ordfav) + name = find_metatag(:fav, :ordfav) + user = User.find_by_name(name) + + if user.nil? + 0 + elsif Pundit.policy!(current_user, user).can_see_favorites? + user.favorite_count + else + nil + end + end + end + + # Estimate the count by parsing the Postgres EXPLAIN output. + def estimated_row_count + ExplainParser.new(posts).row_count + end + + def cached_count(timeout, duration: 5.minutes) + Cache.get(count_cache_key, duration) do + exact_count(timeout) + end + end + + def exact_count(timeout) + Post.with_timeout(timeout) do + posts.count + end + end + + def count_cache_key + if is_user_dependent_search? + "pfc[#{current_user.id.to_i}]:#{to_s}" + else + "pfc:#{to_s}" + end + end + end + + memoize :tags, :replace_aliases, :with_implicit_metatags, :to_cnf, :aliases, :implicit_metatags, :hide_deleted? end diff --git a/app/logical/post_query_builder.rb b/app/logical/post_query_builder.rb index 5a4f7245c..b5bdcdb1a 100644 --- a/app/logical/post_query_builder.rb +++ b/app/logical/post_query_builder.rb @@ -93,49 +93,6 @@ class PostQueryBuilder @hide_deleted_posts = hide_deleted_posts end - def tags_match(tags, relation) - negated_wildcard_tags, negated_tags = tags.select(&:negated).partition(&:wildcard) - optional_wildcard_tags, optional_tags = tags.select(&:optional).partition(&:wildcard) - required_wildcard_tags, required_tags = tags.reject(&:negated).reject(&:optional).partition(&:wildcard) - - negated_tags = negated_tags.map(&:name) - optional_tags = optional_tags.map(&:name) - required_tags = required_tags.map(&:name) - - matched_negated_wildcard_tags = negated_wildcard_tags.flat_map { |tag| Tag.wildcard_matches(tag.name).limit(MAX_WILDCARD_TAGS).pluck(:name) } - matched_optional_wildcard_tags = optional_wildcard_tags.flat_map { |tag| Tag.wildcard_matches(tag.name).limit(MAX_WILDCARD_TAGS).pluck(:name) } - matched_required_wildcard_tags = required_wildcard_tags.flat_map { |tag| Tag.wildcard_matches(tag.name).limit(MAX_WILDCARD_TAGS).pluck(:name) } - - negated_tags += (matched_negated_wildcard_tags.empty? && !negated_wildcard_tags.empty?) ? negated_wildcard_tags.map(&:name) : matched_negated_wildcard_tags - optional_tags += (matched_optional_wildcard_tags.empty? && !optional_wildcard_tags.empty?) ? optional_wildcard_tags.map(&:name) : matched_optional_wildcard_tags - optional_tags += (matched_required_wildcard_tags.empty? && !required_wildcard_tags.empty?) ? required_wildcard_tags.map(&:name) : matched_required_wildcard_tags - - relation = relation.where_array_includes_all("string_to_array(posts.tag_string, ' ')", required_tags) if required_tags.present? - relation = relation.where_array_includes_any("string_to_array(posts.tag_string, ' ')", optional_tags) if optional_tags.present? - relation = relation.where_array_includes_none("string_to_array(posts.tag_string, ' ')", negated_tags) if negated_tags.present? - relation - end - - def metatags_match(metatags, relation) - metatags.each do |metatag| - metatag_name = metatags_without_ord[metatag.name] if metatag.negated && metatags_without_ord.key?(metatag.name) - - clause = metatag_matches(metatag_name || metatag.name, metatag.value, quoted: metatag.quoted) - clause = clause.negate_relation if metatag.negated - relation = relation.and_relation(clause) - end - - relation - end - - def metatags_without_ord - { - "ordfav" => "fav", - "ordfavgroup" => "favgroup", - "ordpool" => "pool", - } - end - def metatag_matches(name, value, relation = Post.all, quoted: false) case name when "id" @@ -256,53 +213,77 @@ class PostQueryBuilder end end - def tables_for_query - metatag_names = metatags.map(&:name) - metatag_names << find_metatag(:order).remove(/_(asc|desc)\z/i) if has_metatag?(:order) + def tables_for_query(post_query) + metatag_names = post_query.metatags.map(&:name) + metatag_names << post_query.find_metatag(:order).remove(/_(asc|desc)\z/i) if post_query.has_metatag?(:order) tables = metatag_names.map { |metatag| table_for_metatag(metatag.to_s) } tables.compact.uniq end - def add_joins(relation) - tables = tables_for_query + def add_joins(post_query, relation) + tables = tables_for_query(post_query) relation = relation.with_stats(tables) relation end - def build(includes: nil) - validate! - relation = Post.includes(includes) - relation = add_joins(relation) - relation = metatags_match(metatags, relation) - relation = tags_match(tags, relation) + # Generate a SQL relation from a PostQuery. + def build_relation(post_query, relation = Post.all) + post_query.ast.visit do |node, *children| + case node.type + in :all + relation.all + in :none + relation.none + in :tag + relation.tags_include(node.name) + in :metatag + metatag_matches(node.name, node.value, relation, quoted: node.quoted?) + in :wildcard + tag_names = Tag.wildcard_matches(node.name).limit(MAX_WILDCARD_TAGS).pluck(:name) + relation.where_array_includes_any("string_to_array(posts.tag_string, ' ')", tag_names) + in :not + children.first.negate_relation + in :and + children.reduce(&:and) + in :or + children.reduce(&:or) + end + end + end + + def posts(post_query, includes: nil) + relation = Post.all + relation = add_joins(post_query, relation) + relation = build_relation(post_query, relation) # HACK: if we're using a date: or age: metatag, default to ordering by # created_at instead of id so that the query will use the created_at index. - if has_metatag?(:date, :age) && find_metatag(:order).in?(["id", "id_asc"]) + if post_query.has_metatag?(:date, :age) && post_query.find_metatag(:order).in?(["id", "id_asc"]) relation = search_order(relation, "created_at_asc") - elsif has_metatag?(:date, :age) && find_metatag(:order).in?(["id_desc", nil]) + elsif post_query.has_metatag?(:date, :age) && post_query.find_metatag(:order).in?(["id_desc", nil]) relation = search_order(relation, "created_at_desc") - elsif find_metatag(:order) == "custom" - relation = search_order_custom(relation, select_metatags(:id).map(&:value)) - elsif has_metatag?(:ordfav) + elsif post_query.find_metatag(:order) == "custom" + relation = search_order_custom(relation, post_query.select_metatags(:id).map(&:value)) + elsif post_query.has_metatag?(:ordfav) # no-op else - relation = search_order(relation, find_metatag(:order)) + relation = search_order(relation, post_query.find_metatag(:order)) end - if count = find_metatag(:random) + if count = post_query.find_metatag(:random) count = Integer(count).clamp(0, PostSets::Post::MAX_PER_PAGE) relation = relation.random(count) end + relation = relation.includes(includes) relation end - def paginated_posts(page, small_search_threshold: Danbooru.config.small_search_threshold.to_i, includes: nil, **options) - posts = build(includes: includes).paginate(page, **options) - posts = optimize_search(posts, small_search_threshold) + def paginated_posts(post_query, page, count:, small_search_threshold: Danbooru.config.small_search_threshold.to_i, includes: nil, **options) + posts = posts(post_query, includes: includes).paginate(page, count: count, **options) + posts = optimize_search(posts, count, small_search_threshold) posts.load end @@ -315,7 +296,7 @@ class PostQueryBuilder # tags, Postgres sometimes assumes tags in the 10k-50k range are large enough # for a post id index scan, when in reality a tag index bitmap scan would be # better. - def optimize_search(relation, small_search_threshold) + def optimize_search(relation, post_count, small_search_threshold) return relation unless small_search_threshold.present? order_values = relation.order_values.map { |order| order.try(:to_sql) || order.to_s }.map(&:downcase) @@ -745,131 +726,6 @@ class PostQueryBuilder end end - concerning :CountMethods do - def post_count - @post_count ||= fast_count - end - - # Return an estimate of the number of posts returned by the search. By - # default, we try to use an estimated or cached count before doing an exact - # count. - # - # @param timeout [Integer] the database timeout - # @param estimate_count [Boolean] if true, estimate the count with inexact methods - # @param skip_cache [Boolean] if true, don't use the cached count - # @return [Integer, nil] the number of posts, or nil on timeout - def fast_count(timeout: 1_000, estimate_count: true, skip_cache: false) - count = nil - count = estimated_count if estimate_count - count = cached_count(timeout) if count.nil? && !skip_cache - count = exact_count(timeout) if count.nil? && skip_cache - count - end - - def estimated_count - if is_empty_search? - estimated_row_count - elsif is_simple_tag? - Tag.find_by(name: tags.first.name).try(:post_count) - elsif is_metatag?(:rating) - estimated_row_count - elsif is_metatag?(:pool) || is_metatag?(:ordpool) - name = find_metatag(:pool, :ordpool) - Pool.find_by_name(name)&.post_count || 0 - elsif is_metatag?(:fav) || is_metatag?(:ordfav) - name = find_metatag(:fav, :ordfav) - user = User.find_by_name(name) - - if user.nil? - 0 - elsif Pundit.policy!(current_user, user).can_see_favorites? - user.favorite_count - else - nil - end - end - end - - # Estimate the count by parsing the Postgres EXPLAIN output. - def estimated_row_count - ExplainParser.new(build).row_count - end - - def cached_count(timeout, duration: 5.minutes) - Cache.get(count_cache_key, duration) do - exact_count(timeout) - end - end - - def exact_count(timeout) - Post.with_timeout(timeout) do - build.count - end - end - - def count_cache_key - if is_user_dependent_search? - "pfc[#{current_user.id.to_i}]:#{to_s}" - else - "pfc:#{to_s}" - end - end - - # @return [Boolean] true if the search depends on the current user because - # of permissions or privacy settings. - def is_user_dependent_search? - metatags.any? do |metatag| - metatag.name.in?(%w[upvoter upvote downvoter downvote search flagger fav ordfav favgroup ordfavgroup]) || - metatag.name == "status" && metatag.value == "unmoderated" || - metatag.name == "disapproved" && !metatag.value.downcase.in?(PostDisapproval::REASONS) - end - end - end - - concerning :NormalizationMethods do - # Normalize a search by sorting tags and applying aliases. - # @return [PostQueryBuilder] the normalized query - def normalized_query(implicit: true, sort: true) - post_query = dup - post_query.terms.concat(implicit_metatags) if implicit - post_query.normalize_aliases! - post_query.normalize_order! if sort - post_query - end - - # Apply aliases to all tags in the query. - def normalize_aliases! - tag_names = tags.map(&:name) - tag_aliases = tag_names.zip(TagAlias.to_aliased(tag_names)).to_h - - terms.map! do |term| - term.name = tag_aliases[term.name] if term.type == :tag - term - end - end - - # Normalize the tag order. - def normalize_order! - terms.sort_by!(&:to_s).uniq! - end - - # Implicit metatags are metatags added by the user's account settings. - # rating:s is implicit under safe mode. -status:deleted is implicit when the - # "hide deleted posts" setting is on. - def implicit_metatags - metatags = [] - metatags << OpenStruct.new(type: :metatag, name: "rating", value: "s") if safe_mode? - metatags << OpenStruct.new(type: :metatag, name: "status", value: "deleted", negated: true) if hide_deleted? - metatags - end - - # XXX unify with PostSets::Post#show_deleted? - def hide_deleted? - has_status_metatag = select_metatags(:status).any? { |metatag| metatag.value.downcase.in?(%w[deleted active any all unmoderated modqueue appealed]) } - hide_deleted_posts? && !has_status_metatag - end - end - concerning :UtilityMethods do def to_s split_query.join(" ") @@ -879,78 +735,7 @@ class PostQueryBuilder def terms @terms ||= scan_query end - - # The list of regular tags in the search. - def tags - terms.select { |term| term.type == :tag } - end - - # The list of metatags in the search. - def metatags - terms.select { |term| term.type == :metatag } - end - - # Find all metatags with the given names. - def select_metatags(*names) - metatags.select { |term| term.name.in?(names.map(&:to_s)) } - end - - # Find the first metatag with any of the given names. - def find_metatag(*metatags) - select_metatags(*metatags).first.try(:value) - end - - # @return [Boolean] true if the search has a metatag with any of the given names. - def has_metatag?(*metatag_names) - metatags.any? { |term| term.name.in?(metatag_names.map(&:to_s).map(&:downcase)) } - end - - # @return [Boolean] true if the search has a single regular tag, with any number of metatags. - def has_single_tag? - tags.size == 1 && !tags.first.wildcard - end - - # @return [Boolean] true if the search is a single metatag search for the given metatag. - def is_metatag?(name, value = nil) - if value.nil? - is_single_term? && has_metatag?(name) - else - is_single_term? && find_metatag(name) == value.to_s - end - end - - # @return [Boolean] true if the search doesn't have any tags or metatags. - def is_empty_search? - terms.size == 0 - end - - # @return [Boolean] true if the search consists of a single tag or metatag. - def is_single_term? - terms.size == 1 - end - - # @return [Boolean] true if the search has a single tag, possibly with wildcards or negation. - def is_single_tag? - is_single_term? && tags.size == 1 - end - - # @return [Boolean] true if the search has a single tag, without any wildcards or operators. - def is_simple_tag? - tag = tags.first - is_single_tag? && !tag.negated && !tag.optional && !tag.wildcard - end - - # @return [Boolean] true if the search has a single tag with a wildcard - def is_wildcard_search? - is_single_tag? && tags.first.wildcard - end - - # @return [Tag, nil] the tag if the search is for a simple tag, otherwise nil - def simple_tag - return nil if !is_simple_tag? - Tag.find_by_name(tags.first.name) - end end - memoize :split_query, :post_count + memoize :split_query end diff --git a/app/logical/post_sets/post.rb b/app/logical/post_sets/post.rb index f879be6ab..9e136848d 100644 --- a/app/logical/post_sets/post.rb +++ b/app/logical/post_sets/post.rb @@ -8,14 +8,16 @@ module PostSets class Post MAX_PER_PAGE = 200 MAX_SIDEBAR_TAGS = 25 + MAX_WILDCARD_TAGS = PostQueryBuilder::MAX_WILDCARD_TAGS - attr_reader :page, :format, :tag_string, :query, :normalized_query, :show_votes - delegate :post_count, to: :normalized_query + attr_reader :page, :format, :tag_string, :query, :post_query, :normalized_query, :show_votes + delegate :tag, to: :post_query alias_method :show_votes?, :show_votes def initialize(tags, page = 1, per_page = nil, user: CurrentUser.user, format: "html", show_votes: false) @query = PostQueryBuilder.new(tags, user, tag_limit: user.tag_query_limit, safe_mode: CurrentUser.safe_mode?, hide_deleted_posts: user.hide_deleted_posts?) - @normalized_query = query.normalized_query + @post_query = PostQuery.normalize(tags, current_user: user, tag_limit: user.tag_query_limit, safe_mode: CurrentUser.safe_mode?, hide_deleted_posts: user.hide_deleted_posts?) + @normalized_query = post_query.with_implicit_metatags @tag_string = tags @page = page @per_page = per_page @@ -32,13 +34,8 @@ module PostSets end def wiki_page - return nil unless normalized_query.has_single_tag? - @wiki_page ||= WikiPage.undeleted.find_by(title: normalized_query.tags.first.name) - end - - def tag - return nil unless normalized_query.has_single_tag? - @tag ||= Tag.find_by(name: normalized_query.tags.first.name) + return nil unless post_query.has_single_tag? + @wiki_page ||= WikiPage.undeleted.find_by(title: post_query.tag_name) end def artist @@ -48,7 +45,7 @@ module PostSets end def pool - pool_names = normalized_query.select_metatags(:pool, :ordpool).map(&:value) + pool_names = post_query.select_metatags(:pool, :ordpool).map(&:value) name = pool_names.first return nil unless pool_names.size == 1 @@ -56,7 +53,7 @@ module PostSets end def favgroup - favgroup_names = normalized_query.select_metatags(:favgroup, :ordfavgroup).map(&:value) + favgroup_names = post_query.select_metatags(:favgroup, :ordfavgroup).map(&:value) name = favgroup_names.first return nil unless favgroup_names.size == 1 @@ -84,7 +81,7 @@ module PostSets end def per_page - (@per_page || query.find_metatag(:limit) || CurrentUser.user.per_page).to_i.clamp(0, max_per_page) + (@per_page || post_query.find_metatag(:limit) || CurrentUser.user.per_page).to_i.clamp(0, max_per_page) end def max_per_page @@ -95,11 +92,15 @@ module PostSets @posts ||= normalized_query.paginated_posts(page, includes: includes, count: post_count, search_count: !post_count.nil?, limit: per_page, max_limit: max_per_page).load end + def post_count + normalized_query.post_count + end + def hide_from_crawler? return true if current_page > 50 return true if show_votes? return true if artist.present? && artist.is_banned? - return false if query.is_empty_search? || query.is_simple_tag? || query.is_metatag?(:order, :rank) + return false if post_query.is_empty_search? || post_query.is_simple_tag? || post_query.is_metatag?(:order, :rank) true end @@ -118,7 +119,7 @@ module PostSets end def show_deleted? - query.select_metatags("status").any? do |metatag| + post_query.select_metatags("status").any? do |metatag| metatag.value.downcase.in?(%w[all any active unmoderated modqueue deleted appealed]) end end @@ -133,13 +134,13 @@ module PostSets concerning :TagListMethods do def related_tags - if query.is_wildcard_search? + if post_query.wildcards.one? && post_query.tags.none? wildcard_tags - elsif query.is_metatag?(:search) + elsif post_query.is_metatag?(:search) saved_search_tags - elsif query.is_empty_search? || query.is_metatag?(:order, :rank) + elsif post_query.is_empty_search? || post_query.is_metatag?(:order, :rank) popular_tags.presence || frequent_tags - elsif query.is_single_term? + elsif post_query.is_single_term? similar_tags.presence || frequent_tags else frequent_tags @@ -151,7 +152,7 @@ module PostSets end def similar_tags - RelatedTagCalculator.cached_similar_tags_for_search(query.normalized_query(implicit: false), MAX_SIDEBAR_TAGS) + RelatedTagCalculator.cached_similar_tags_for_search(post_query, MAX_SIDEBAR_TAGS) end def frequent_tags @@ -161,7 +162,7 @@ module PostSets # Wildcard searches can show up to 100 tags in the sidebar, not 25, # because that's how many tags the search itself will use. def wildcard_tags - Tag.wildcard_matches(tag_string).limit(PostQueryBuilder::MAX_WILDCARD_TAGS).pluck(:name) + Tag.wildcard_matches(post_query.wildcards.first).limit(MAX_WILDCARD_TAGS).pluck(:name) end def saved_search_tags diff --git a/app/logical/related_tag_calculator.rb b/app/logical/related_tag_calculator.rb index b143a3c89..5564b2c63 100644 --- a/app/logical/related_tag_calculator.rb +++ b/app/logical/related_tag_calculator.rb @@ -19,7 +19,7 @@ # @see https://en.wikipedia.org/wiki/Cosine_similarity module RelatedTagCalculator # Return the set of tags similar to the given search. - # @param post_query [PostQueryBuilder] the search to find similar tags for. + # @param post_query [PostQuery] the search to find similar tags for. # @param search_sample_size [Integer] the number of posts to sample from the search # @param tag_sample_size [Integer] the number of tags to calculate similarity for # @param category [Integer] an optional tag category, to restrict the tags to a given category. @@ -41,12 +41,12 @@ module RelatedTagCalculator end # Return the set of tags most frequently appearing in the given search. - # @param post_query [PostQueryBuilder] the search to find frequent tags for. + # @param post_query [PostQuery] the search to find frequent tags for. # @param search_sample_size [Integer] the number of posts to sample from the search # @param category [Integer] an optional tag category, to restrict the tags to a given category. # @return [Array] the set of frequent tags, ordered by most frequent def self.frequent_tags_for_search(post_query, search_sample_size: 1000, category: nil) - sample_posts = post_query.build.reorder(:md5).limit(search_sample_size) + sample_posts = post_query.posts.reorder(:md5).limit(search_sample_size) frequent_tags_for_post_relation(sample_posts, category: category) end @@ -74,7 +74,7 @@ module RelatedTagCalculator end # Return a cached set of tags similar to the given search. - # @param post_query [PostQueryBuilder] the search to find similar tags for. + # @param post_query [PostQuery] the search to find similar tags for. # @param max_tags [Integer] the maximum number of tags to return # @param search_timeout [Integer] the database timeout for the search # @param cache_timeout [Integer] the length of time to cache the results @@ -90,7 +90,7 @@ module RelatedTagCalculator # Return a cache key for the given search. Some searches are cached on a # per-user basis because they depend on the current user (for example, # searches for private favorites, favgroups, or saved searches). - # @param post_query [PostQueryBuilder] the post search + # @param post_query [PostQuery] the post search # @return [String] the cache key def self.cache_key(post_query) if post_query.is_user_dependent_search? diff --git a/app/logical/related_tag_query.rb b/app/logical/related_tag_query.rb index a77719682..d8868faa2 100644 --- a/app/logical/related_tag_query.rb +++ b/app/logical/related_tag_query.rb @@ -10,7 +10,7 @@ class RelatedTagQuery def initialize(query:, user: User.anonymous, category: nil, type: nil, limit: nil) @user = user - @post_query = PostQueryBuilder.new(query, user).normalized_query + @post_query = PostQuery.normalize(query, current_user: user) # XXX This query does not include implicit metatags (rating:s, -status:deleted) @query = @post_query.to_s @category = category @type = type @@ -75,7 +75,7 @@ class RelatedTagQuery end def other_wiki_pages - tag = post_query.simple_tag + tag = post_query.tag return [] if tag.nil? if tag.copyright? diff --git a/app/models/post.rb b/app/models/post.rb index 11c460efc..af62cabd4 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -1384,8 +1384,8 @@ class Post < ApplicationRecord # @param hide_deleted_posts [Boolean] if true, automatically add -status:deleted to the search # @return [ActiveRecord::Relation] the set of resulting posts def user_tag_match(query, user = CurrentUser.user, tag_limit: user.tag_query_limit, safe_mode: CurrentUser.safe_mode?, hide_deleted_posts: user.hide_deleted_posts?) - post_query = PostQueryBuilder.new(query, user, tag_limit: tag_limit, safe_mode: safe_mode, hide_deleted_posts: hide_deleted_posts) - post_query.normalized_query.build + post_query = PostQuery.normalize(query, current_user: user, tag_limit: tag_limit, safe_mode: safe_mode, hide_deleted_posts: hide_deleted_posts) + post_query.with_implicit_metatags.posts end def search(params) diff --git a/app/views/posts/index.html.erb b/app/views/posts/index.html.erb index 3653efbe8..c46c39647 100644 --- a/app/views/posts/index.html.erb +++ b/app/views/posts/index.html.erb @@ -46,9 +46,9 @@
  • <%= link_to "Deleted", posts_path(tags: "#{params[:tags]} status:deleted"), rel: "nofollow" %>
  • <%= link_to "Random", random_posts_path(tags: params[:tags]), id: "random-post", "data-shortcut": "r", rel: "nofollow" %>
  • - <% if @post_set.normalized_query.has_single_tag? %> -
  • <%= link_to "History", post_versions_path(search: { changed_tags: @post_set.normalized_query.tags.first.name }), rel: "nofollow" %>
  • -
  • <%= link_to "Discussions", forum_posts_path(search: { linked_to: @post_set.normalized_query.tags.first.name }), rel: "nofollow" %>
  • + <% if @post_set.post_query.has_single_tag? %> +
  • <%= link_to "History", post_versions_path(search: { changed_tags: @post_set.post_query.tag_name }), rel: "nofollow" %>
  • +
  • <%= link_to "Discussions", forum_posts_path(search: { linked_to: @post_set.post_query.tag_name }), rel: "nofollow" %>
  • <% end %>
  • <%= link_to "Count", posts_counts_path(tags: params[:tags]), rel: "nofollow" %>
  • @@ -222,7 +222,7 @@ <% end %> <% content_for(:html_header) do %> - <% if @post_set.query.is_empty_search? %> + <% if @post_set.post_query.is_empty_search? %> <% page_title("#{Danbooru.config.app_name}: Anime Image Board", suffix: nil) %> <% meta_description site_description %> diff --git a/test/unit/post_query_builder_test.rb b/test/unit/post_query_builder_test.rb index e2af7cc5b..0fd9afee1 100644 --- a/test/unit/post_query_builder_test.rb +++ b/test/unit/post_query_builder_test.rb @@ -6,14 +6,14 @@ class PostQueryBuilderTest < ActiveSupport::TestCase end def assert_fast_count(count, query, query_options = {}, fast_count_options = {}) - assert_equal(count, PostQueryBuilder.new(query, **query_options).normalized_query.fast_count(**fast_count_options)) + assert_equal(count, PostQuery.normalize(query, **query_options).with_implicit_metatags.fast_count(**fast_count_options)) end def assert_parse_equals(expected, query) assert_equal(expected, PostQueryBuilder.new(query).split_query) # parsing, serializing, then parsing again should produce the same result. - assert_equal(PostQueryBuilder.new(query).to_s, PostQueryBuilder.new(PostQueryBuilder.new(query).to_s).to_s) + assert_equal(PostQuery.new(query).to_s, PostQuery.new(PostQuery.new(query).to_s).to_s) end setup do @@ -1322,75 +1322,6 @@ class PostQueryBuilderTest < ActiveSupport::TestCase assert_equal(%w(aaa bbb), PostQueryBuilder.new("aaa bbb").split_query) assert_equal(%w(favgroup:yondemasu_yo,_azazel-san. pool:ichigo_100%), PostQueryBuilder.new("favgroup:yondemasu_yo,_azazel-san. pool:ichigo_100%").split_query) end - - should "parse single tags correctly" do - assert_equal(true, PostQueryBuilder.new("foo").is_single_tag?) - assert_equal(true, PostQueryBuilder.new("-foo").is_single_tag?) - assert_equal(true, PostQueryBuilder.new("~foo").is_single_tag?) - assert_equal(true, PostQueryBuilder.new("foo*").is_single_tag?) - assert_equal(false, PostQueryBuilder.new("fav:1234").is_single_tag?) - assert_equal(false, PostQueryBuilder.new("pool:1234").is_single_tag?) - assert_equal(false, PostQueryBuilder.new('source:"foo bar baz"').is_single_tag?) - assert_equal(false, PostQueryBuilder.new("foo bar").is_single_tag?) - end - - should "parse simple tags correctly" do - assert_equal(true, PostQueryBuilder.new("foo").is_simple_tag?) - assert_equal(false, PostQueryBuilder.new("-foo").is_simple_tag?) - assert_equal(false, PostQueryBuilder.new("~foo").is_simple_tag?) - assert_equal(false, PostQueryBuilder.new("foo*").is_simple_tag?) - assert_equal(false, PostQueryBuilder.new("fav:1234").is_simple_tag?) - assert_equal(false, PostQueryBuilder.new("FAV:1234").is_simple_tag?) - assert_equal(false, PostQueryBuilder.new("pool:1234").is_simple_tag?) - assert_equal(false, PostQueryBuilder.new('source:"foo bar baz"').is_simple_tag?) - assert_equal(false, PostQueryBuilder.new("foo bar").is_simple_tag?) - end - - should "parse quoted metatags correctly" do - assert_parse_equals(%w[status:"active" source:"https"], %q(status:'active' source:'https')) - assert_parse_equals(%w[source:"https" status:"active"], %q(source:'https' status:'active')) - assert_parse_equals(%w[status:"active" source:"https"], %q(status:"active" source:'https')) - assert_parse_equals(%w[status:"active" source:"https"], %q(status:'active' source:"https")) - assert_parse_equals(%w[status:"active" source:https], %q(status:'active' source:https)) - assert_parse_equals(%w[status:active source:"https"], %q(status:active source:'https')) - - assert_parse_equals(%w[limit:"5" status:"active" source:"x"], %q(limit:"5" status:"active" source:"x")) - assert_parse_equals(%w[source:"" limit:"1" status:"deleted"], %q(source:"" limit:'1' status:'deleted')) - - assert_parse_equals(['source:"bar baz"', 'don\'t_say_"lazy"'], %q(source:"bar baz" don't_say_"lazy")) - assert_parse_equals(['source:"bar baz"', 'don\'t_say_"lazy"'], %q(source:"bar baz" don't_say_"lazy")) - assert_parse_equals(['source:"bar baz"', 'don\'t_say_"lazy"'], %q(source:'bar baz' don't_say_"lazy")) - - assert_parse_equals([%q(source:"foo")], %q(source:"\f\o\o")) - assert_parse_equals([%q(source:"foo")], %q(source:'\f\o\o')) - assert_parse_equals([%q(source:foo\bar)], %q(source:foo\bar)) - assert_parse_equals([%q(source:"foo)], %q(source:"foo)) - assert_parse_equals([%q(source:'foo)], %q(source:'foo)) - assert_parse_equals([%q(source:"foo bar")], %q(source:foo\ bar)) - assert_parse_equals([%q(source:"\"foo bar\\\\")], %q(source:"foo\ bar\\)) - - assert_parse_equals(['source:"don\'t_say_\\"lazy\\""', 'don\'t_say_"lazy"'], %q(source:"don't_say_\"lazy\"" don't_say_"lazy")) - assert_parse_equals(['source:"don\'t_say_\\"lazy\\""', 'don\'t_say_"lazy"'], %q(source:'don\'t_say_"lazy"' don't_say_"lazy")) - end - end - - context "The normalized_query method" do - should "work" do - create(:tag_alias, antecedent_name: "gray", consequent_name: "grey") - - assert_equal("foo", PostQueryBuilder.new("foo").normalized_query.to_s) - assert_equal("foo", PostQueryBuilder.new(" foo ").normalized_query.to_s) - assert_equal("foo", PostQueryBuilder.new("FOO").normalized_query.to_s) - assert_equal("foo", PostQueryBuilder.new("foo foo").normalized_query.to_s) - assert_equal("grey", PostQueryBuilder.new("gray").normalized_query.to_s) - assert_equal("aaa bbb", PostQueryBuilder.new("bbb aaa").normalized_query.to_s) - assert_equal("-aaa bbb", PostQueryBuilder.new("bbb -aaa").normalized_query.to_s) - assert_equal("~aaa ~bbb", PostQueryBuilder.new("~bbb ~aaa").normalized_query.to_s) - assert_equal("commentary:true bbb", PostQueryBuilder.new("bbb commentary:true").normalized_query.to_s) - assert_equal('commentary:"true" bbb', PostQueryBuilder.new("bbb commentary:'true'").normalized_query.to_s) - assert_equal('-commentary:true bbb', PostQueryBuilder.new("bbb -commentary:true").normalized_query.to_s) - assert_equal('-commentary:"true" bbb', PostQueryBuilder.new("bbb -commentary:'true'").normalized_query.to_s) - end end context "#fast_count" do @@ -1452,7 +1383,7 @@ class PostQueryBuilderTest < ActiveSupport::TestCase context "for a multi-tag search" do should "return the cached count, if it exists" do - Cache.put("pfc:score:42 aaa", 100) + Cache.put("pfc:aaa score:42", 100) assert_fast_count(100, "aaa score:42") end @@ -1470,7 +1401,7 @@ class PostQueryBuilderTest < ActiveSupport::TestCase context "a blank search" do should "should execute a search" do assert_fast_count(1, "", {}, { estimate_count: false }) - assert_nothing_raised { PostQueryBuilder.new("").normalized_query.fast_count(estimate_count: true) } + assert_nothing_raised { PostQuery.new("").fast_count(estimate_count: true) } end should "return 0 for a nonexisting tag" do @@ -1480,12 +1411,12 @@ class PostQueryBuilderTest < ActiveSupport::TestCase context "in safe mode" do should "work for a blank search" do assert_fast_count(0, "", { safe_mode: true }, { estimate_count: false }) - assert_nothing_raised { PostQueryBuilder.new("", safe_mode: true).normalized_query.fast_count(estimate_count: true) } + assert_nothing_raised { PostQuery.new("", safe_mode: true).fast_count(estimate_count: true) } end should "work for a nil search" do assert_fast_count(0, nil, { safe_mode: true }, { estimate_count: false }) - assert_nothing_raised { PostQueryBuilder.new("", safe_mode: true).normalized_query.fast_count(estimate_count: true) } + assert_nothing_raised { PostQuery.new("", safe_mode: true).fast_count(estimate_count: true) } end should "not fail for a two tag search by a member" do @@ -1502,8 +1433,8 @@ class PostQueryBuilderTest < ActiveSupport::TestCase @user = create(:user, enable_private_favorites: true) @post = as(@user) { create(:post, tag_string: "fav:#{@user.name}") } - assert_equal(1, PostQueryBuilder.new("fav:#{@user.name}", @user).fast_count) - assert_equal(0, PostQueryBuilder.new("fav:#{@user.name}").fast_count) + assert_equal(1, PostQuery.new("fav:#{@user.name}", current_user: @user).fast_count) + assert_equal(0, PostQuery.new("fav:#{@user.name}").fast_count) end end end diff --git a/test/unit/related_tag_calculator_test.rb b/test/unit/related_tag_calculator_test.rb index c40d11b7a..63fd359da 100644 --- a/test/unit/related_tag_calculator_test.rb +++ b/test/unit/related_tag_calculator_test.rb @@ -2,12 +2,12 @@ require 'test_helper' class RelatedTagCalculatorTest < ActiveSupport::TestCase def frequent_tags_for_search(tag_search, user = CurrentUser.user, **options) - post_query = PostQueryBuilder.new(tag_search, user) + post_query = PostQuery.normalize(tag_search, current_user: user) RelatedTagCalculator.frequent_tags_for_search(post_query, **options).pluck(:name) end def similar_tags_for_search(tag_search, user = CurrentUser.user, **options) - post_query = PostQueryBuilder.new(tag_search, user).normalized_query + post_query = PostQuery.normalize(tag_search, current_user: user) RelatedTagCalculator.similar_tags_for_search(post_query, **options).pluck(:name) end