diff --git a/app/logical/autocomplete_service.rb b/app/logical/autocomplete_service.rb index 424a00dd3..693b19684 100644 --- a/app/logical/autocomplete_service.rb +++ b/app/logical/autocomplete_service.rb @@ -78,8 +78,7 @@ class AutocompleteService # # @return [Array] the autocomplete results def autocomplete_tag_query - if parsed_query.metatags.one? - metatag = parsed_query.metatags.first + if metatag.present? autocomplete_metatag(metatag.name, metatag.value) else tag = Tag.normalize_name(query) @@ -105,25 +104,67 @@ class AutocompleteService results = tag_other_name_matches(string) elsif string.starts_with?("/") results = tag_abbreviation_matches(string) - results = results.sort_by do |r| - [r[:antecedent].to_s.size, -r[:post_count]] - end - - results = results.uniq { |r| r[:value] }.take(limit) elsif string.include?("*") - results = tag_matches(string) - else - results = tag_matches(string + "*") + results = tag_wildcard_matches(string) + elsif Tag.parsable_into_words?(string) # do a word match if the search contains at least 2 contiguous letters or numbers + results = tag_word_matches(string) results = tag_autocorrect_matches(string) if results.blank? + else + results = tag_prefix_matches(string) end results end - # Find tags or tag aliases matching a wildcard search. + # Find tags or tag aliases containing all the words in the search string, in any order. + # Example: "haruhi_suzumiya" => "suzumiya_haruhi_no_yuuutsu" + # + # Rank results with exact matches first (unless it's a small tag), then substring matches + # next (e.g. tags where the words are in the same order and next to each other), then word + # matches last (e.g. tag where the words are in a different order, or not next to each other). + # # @param string [String] the string to complete # @return [Array] the autocomplete results - def tag_matches(string) + def tag_word_matches(string) + query = Tag.parse_query(string) + + name_matches = Tag.nonempty.where_all_in_array_like(:words, query) + alias_matches = Tag.nonempty.where(name: TagAlias.active.joins(:antecedent_tag).where_all_in_array_like("tags.words", query).select(:consequent_name)) + union = "((#{name_matches.to_sql}) UNION (#{alias_matches.to_sql})) AS tags" + tags = Tag.from(union).includes(:consequent_aliases).order(post_count: :desc, name: :asc).limit(100) + + results = tags.map do |tag| + antecedent = tag.tag_alias_for_word_pattern(string)&.antecedent_name + { type: "tag-word", label: tag.pretty_name, value: tag.name, category: tag.category, post_count: tag.post_count, antecedent: antecedent } + end + + results = results.sort_by do |result| + name = result[:antecedent] || result[:value] + post_count = result[:post_count] + + large = post_count > 100 ? 1 : 0 + exact = name == string ? 1 : 0 + substr = name.include?(string) ? 1 : 0 + + [-large, -exact, -substr, -post_count, result[:value]] + end + + results.take(limit) + end + + # Find tags or tag aliases starting with the given search string. + # + # @param string [String] the string to complete + # @return [Array] the autocomplete results + def tag_prefix_matches(string) + tag_wildcard_matches(string + "*") + end + + # Find tags or tag aliases matching a wildcard search. + # + # @param string [String] the string to complete + # @return [Array] the autocomplete results + def tag_wildcard_matches(string) name_matches = Tag.nonempty.name_matches(string).order(post_count: :desc).limit(limit) alias_matches = Tag.nonempty.alias_matches(string).order(post_count: :desc).limit(limit) union = "((#{name_matches.to_sql}) UNION (#{alias_matches.to_sql})) AS tags" @@ -148,9 +189,13 @@ class AutocompleteService string += "*" unless string.include?("*") tags = Tag.nonempty.abbreviation_matches(string).order(post_count: :desc).limit(limit) - tags.map do |tag| + results = tags.map do |tag| { type: "tag-abbreviation", label: tag.pretty_name, value: tag.name, category: tag.category, post_count: tag.post_count, antecedent: "/" + tag.abbreviation } + end.sort_by do |r| + [r[:antecedent].to_s.size, -r[:post_count]] end + + results.uniq { |r| r[:value] }.take(limit) end # Find tags matching a mispelled tag. @@ -346,5 +391,9 @@ class AutocompleteService PostQuery.new(query) end - memoize :autocomplete_results, :parsed_query + def metatag + parsed_query.metatags.first if type == :tag_query && parsed_query.metatags.one? + end + + memoize :autocomplete_results, :parsed_query, :metatag end diff --git a/app/logical/concerns/searchable.rb b/app/logical/concerns/searchable.rb index 232ffbb9f..85a722695 100644 --- a/app/logical/concerns/searchable.rb +++ b/app/logical/concerns/searchable.rb @@ -125,9 +125,24 @@ module Searchable where("? ~<< ANY(#{qualified_column_for(attr)})", "(?#{flags})#{regex}") end + # Perform a Postgres full-text search on an array of strings. Assumes the query is already escaped. # The column should have a `array_to_tsvector(column) using gin` index for best performance. + # + # @see https://www.postgresql.org/docs/current/datatype-textsearch.html#DATATYPE-TSQUERY + def where_array_to_tsvector_matches(attr, query) + where("array_to_tsvector(#{qualified_column_for(attr)}) @@ ?::tsquery", query) + end + def where_any_in_array_starts_with(attr, value) - where("array_to_tsvector(#{qualified_column_for(attr)}) @@ ?", value.to_escaped_for_tsquery + ":*") + where_array_to_tsvector_matches(attr, value.to_escaped_for_tsquery + ":*") + end + + def where_all_in_array_like(attr, patterns) + where_array_to_tsvector_matches(attr, escape_patterns_for_tsquery(patterns).join(" & ")) + end + + def where_any_in_array_like(attr, patterns) + where_array_to_tsvector_matches(attr, escape_patterns_for_tsquery(patterns).join(" | ")) end def where_text_includes_lower(attr, values) @@ -614,9 +629,21 @@ module Searchable private def qualified_column_for(attr) + return attr if attr.to_s.include?(".") "#{table_name}.#{column_for_attribute(attr).name}" end + # @param patterns [Array] An array of wildcard patterns to escape for a tsquery search. + def escape_patterns_for_tsquery(patterns) + patterns.map do |pattern| + if pattern.ends_with?("*") + pattern.delete_suffix("*").to_escaped_for_tsquery + ":*" + else + pattern.to_escaped_for_tsquery + end + end + end + # Convert a column name or a raw SQL fragment to an Arel node. # # @param field [String, Arel::Nodes::Node] an Arel node, the name of a table diff --git a/app/models/tag.rb b/app/models/tag.rb index d968cd3ad..aad38d8ca 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -279,6 +279,24 @@ class Tag < ApplicationRecord def parsable_into_words?(name) name.match?(/[a-zA-Z0-9]{2}/) end + + # True if the `string` contains all the words in the `query`. + # + # Tag.includes_all_words?("holding_hands", ["hand*", "hold*"]) => true + def includes_all_words?(string, query) + words = parse_words(string) + query.all? { |pattern| words.any? { |word| word.ilike?(pattern) }} + end + + # Parse a string into a query for performing a word-based search. + # + # Tag.parse_query("holding_hand") => ["holding", "hand*"] + # Tag.parse_query("looking_at_") => ["looking", "at"] + def parse_query(string) + query = parse_words(string) + query[-1] += "*" unless string.match?(/[#{WORD_DELIMITERS}]\z/) + query + end end end @@ -452,6 +470,19 @@ class Tag < ApplicationRecord end end + # If this tag has aliases, find the shortest alias matching the given pattern. + def tag_alias_for_word_pattern(query) + query = Tag.parse_query(query) + aliases = consequent_aliases.sort_by { |ca| [ca.antecedent_name.size, ca.antecedent_name] } + + aliases.find do |tag_alias| + name_matches = Tag.includes_all_words?(name, query) + antecedent_matches = Tag.includes_all_words?(tag_alias.antecedent_name, query) + + antecedent_matches && !name_matches + end + end + def is_aliased? aliased_tag.present? end