autocomplete: switch to word-based tag matching.
Switch autocomplete to match individual words in the tag, instead of only matching the start of the tag. For example, "hair" matches any tag containing the word "hair", not just tags starting with "hair". "long_hair" matches all tags containing the words "long" and "hair", which includes "very_long_hair" and "absurdly_long_hair". Words can be in any order and words can be left out. So "closed_eye" matches "one_eye_closed". "asuka_langley_souryuu" matches "souryuu_asuka_langley". This has several advantages: * You can search characters by first name. For example, "miku" matches "hatsune_miku". "zelda" matches both "princess_zelda" and "the_legend_of_zelda". * You can find the right tag even if you get the word order wrong, or forget a word. For example, "eyes_closed" matches "closed_eyes". "hair_over_eye" matches "hair_over_one_eye". * You can find more related tags. For example, searching "skirt" shows all tags containing the word "skirt", not just tags starting with "skirt". The downside is this may break muscle memory by changing the autocomplete order of some tags. This is an acceptable trade-off. You can get the old behavior by writing a "*" at the end of the tag. For example, searching "skirt*" gives the same results as before.
This commit is contained in:
@@ -78,8 +78,7 @@ class AutocompleteService
|
||||
#
|
||||
# @return [Array<Hash>] the autocomplete results
|
||||
def autocomplete_tag_query
|
||||
if parsed_query.metatags.one?
|
||||
metatag = parsed_query.metatags.first
|
||||
if metatag.present?
|
||||
autocomplete_metatag(metatag.name, metatag.value)
|
||||
else
|
||||
tag = Tag.normalize_name(query)
|
||||
@@ -105,25 +104,67 @@ class AutocompleteService
|
||||
results = tag_other_name_matches(string)
|
||||
elsif string.starts_with?("/")
|
||||
results = tag_abbreviation_matches(string)
|
||||
results = results.sort_by do |r|
|
||||
[r[:antecedent].to_s.size, -r[:post_count]]
|
||||
end
|
||||
|
||||
results = results.uniq { |r| r[:value] }.take(limit)
|
||||
elsif string.include?("*")
|
||||
results = tag_matches(string)
|
||||
else
|
||||
results = tag_matches(string + "*")
|
||||
results = tag_wildcard_matches(string)
|
||||
elsif Tag.parsable_into_words?(string) # do a word match if the search contains at least 2 contiguous letters or numbers
|
||||
results = tag_word_matches(string)
|
||||
results = tag_autocorrect_matches(string) if results.blank?
|
||||
else
|
||||
results = tag_prefix_matches(string)
|
||||
end
|
||||
|
||||
results
|
||||
end
|
||||
|
||||
# Find tags or tag aliases matching a wildcard search.
|
||||
# Find tags or tag aliases containing all the words in the search string, in any order.
|
||||
# Example: "haruhi_suzumiya" => "suzumiya_haruhi_no_yuuutsu"
|
||||
#
|
||||
# Rank results with exact matches first (unless it's a small tag), then substring matches
|
||||
# next (e.g. tags where the words are in the same order and next to each other), then word
|
||||
# matches last (e.g. tag where the words are in a different order, or not next to each other).
|
||||
#
|
||||
# @param string [String] the string to complete
|
||||
# @return [Array<Hash>] the autocomplete results
|
||||
def tag_matches(string)
|
||||
def tag_word_matches(string)
|
||||
query = Tag.parse_query(string)
|
||||
|
||||
name_matches = Tag.nonempty.where_all_in_array_like(:words, query)
|
||||
alias_matches = Tag.nonempty.where(name: TagAlias.active.joins(:antecedent_tag).where_all_in_array_like("tags.words", query).select(:consequent_name))
|
||||
union = "((#{name_matches.to_sql}) UNION (#{alias_matches.to_sql})) AS tags"
|
||||
tags = Tag.from(union).includes(:consequent_aliases).order(post_count: :desc, name: :asc).limit(100)
|
||||
|
||||
results = tags.map do |tag|
|
||||
antecedent = tag.tag_alias_for_word_pattern(string)&.antecedent_name
|
||||
{ type: "tag-word", label: tag.pretty_name, value: tag.name, category: tag.category, post_count: tag.post_count, antecedent: antecedent }
|
||||
end
|
||||
|
||||
results = results.sort_by do |result|
|
||||
name = result[:antecedent] || result[:value]
|
||||
post_count = result[:post_count]
|
||||
|
||||
large = post_count > 100 ? 1 : 0
|
||||
exact = name == string ? 1 : 0
|
||||
substr = name.include?(string) ? 1 : 0
|
||||
|
||||
[-large, -exact, -substr, -post_count, result[:value]]
|
||||
end
|
||||
|
||||
results.take(limit)
|
||||
end
|
||||
|
||||
# Find tags or tag aliases starting with the given search string.
|
||||
#
|
||||
# @param string [String] the string to complete
|
||||
# @return [Array<Hash>] the autocomplete results
|
||||
def tag_prefix_matches(string)
|
||||
tag_wildcard_matches(string + "*")
|
||||
end
|
||||
|
||||
# Find tags or tag aliases matching a wildcard search.
|
||||
#
|
||||
# @param string [String] the string to complete
|
||||
# @return [Array<Hash>] the autocomplete results
|
||||
def tag_wildcard_matches(string)
|
||||
name_matches = Tag.nonempty.name_matches(string).order(post_count: :desc).limit(limit)
|
||||
alias_matches = Tag.nonempty.alias_matches(string).order(post_count: :desc).limit(limit)
|
||||
union = "((#{name_matches.to_sql}) UNION (#{alias_matches.to_sql})) AS tags"
|
||||
@@ -148,9 +189,13 @@ class AutocompleteService
|
||||
string += "*" unless string.include?("*")
|
||||
tags = Tag.nonempty.abbreviation_matches(string).order(post_count: :desc).limit(limit)
|
||||
|
||||
tags.map do |tag|
|
||||
results = tags.map do |tag|
|
||||
{ type: "tag-abbreviation", label: tag.pretty_name, value: tag.name, category: tag.category, post_count: tag.post_count, antecedent: "/" + tag.abbreviation }
|
||||
end.sort_by do |r|
|
||||
[r[:antecedent].to_s.size, -r[:post_count]]
|
||||
end
|
||||
|
||||
results.uniq { |r| r[:value] }.take(limit)
|
||||
end
|
||||
|
||||
# Find tags matching a mispelled tag.
|
||||
@@ -346,5 +391,9 @@ class AutocompleteService
|
||||
PostQuery.new(query)
|
||||
end
|
||||
|
||||
memoize :autocomplete_results, :parsed_query
|
||||
def metatag
|
||||
parsed_query.metatags.first if type == :tag_query && parsed_query.metatags.one?
|
||||
end
|
||||
|
||||
memoize :autocomplete_results, :parsed_query, :metatag
|
||||
end
|
||||
|
||||
@@ -125,9 +125,24 @@ module Searchable
|
||||
where("? ~<< ANY(#{qualified_column_for(attr)})", "(?#{flags})#{regex}")
|
||||
end
|
||||
|
||||
# Perform a Postgres full-text search on an array of strings. Assumes the query is already escaped.
|
||||
# The column should have a `array_to_tsvector(column) using gin` index for best performance.
|
||||
#
|
||||
# @see https://www.postgresql.org/docs/current/datatype-textsearch.html#DATATYPE-TSQUERY
|
||||
def where_array_to_tsvector_matches(attr, query)
|
||||
where("array_to_tsvector(#{qualified_column_for(attr)}) @@ ?::tsquery", query)
|
||||
end
|
||||
|
||||
def where_any_in_array_starts_with(attr, value)
|
||||
where("array_to_tsvector(#{qualified_column_for(attr)}) @@ ?", value.to_escaped_for_tsquery + ":*")
|
||||
where_array_to_tsvector_matches(attr, value.to_escaped_for_tsquery + ":*")
|
||||
end
|
||||
|
||||
def where_all_in_array_like(attr, patterns)
|
||||
where_array_to_tsvector_matches(attr, escape_patterns_for_tsquery(patterns).join(" & "))
|
||||
end
|
||||
|
||||
def where_any_in_array_like(attr, patterns)
|
||||
where_array_to_tsvector_matches(attr, escape_patterns_for_tsquery(patterns).join(" | "))
|
||||
end
|
||||
|
||||
def where_text_includes_lower(attr, values)
|
||||
@@ -614,9 +629,21 @@ module Searchable
|
||||
private
|
||||
|
||||
def qualified_column_for(attr)
|
||||
return attr if attr.to_s.include?(".")
|
||||
"#{table_name}.#{column_for_attribute(attr).name}"
|
||||
end
|
||||
|
||||
# @param patterns [Array<String>] An array of wildcard patterns to escape for a tsquery search.
|
||||
def escape_patterns_for_tsquery(patterns)
|
||||
patterns.map do |pattern|
|
||||
if pattern.ends_with?("*")
|
||||
pattern.delete_suffix("*").to_escaped_for_tsquery + ":*"
|
||||
else
|
||||
pattern.to_escaped_for_tsquery
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Convert a column name or a raw SQL fragment to an Arel node.
|
||||
#
|
||||
# @param field [String, Arel::Nodes::Node] an Arel node, the name of a table
|
||||
|
||||
@@ -279,6 +279,24 @@ class Tag < ApplicationRecord
|
||||
def parsable_into_words?(name)
|
||||
name.match?(/[a-zA-Z0-9]{2}/)
|
||||
end
|
||||
|
||||
# True if the `string` contains all the words in the `query`.
|
||||
#
|
||||
# Tag.includes_all_words?("holding_hands", ["hand*", "hold*"]) => true
|
||||
def includes_all_words?(string, query)
|
||||
words = parse_words(string)
|
||||
query.all? { |pattern| words.any? { |word| word.ilike?(pattern) }}
|
||||
end
|
||||
|
||||
# Parse a string into a query for performing a word-based search.
|
||||
#
|
||||
# Tag.parse_query("holding_hand") => ["holding", "hand*"]
|
||||
# Tag.parse_query("looking_at_") => ["looking", "at"]
|
||||
def parse_query(string)
|
||||
query = parse_words(string)
|
||||
query[-1] += "*" unless string.match?(/[#{WORD_DELIMITERS}]\z/)
|
||||
query
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -452,6 +470,19 @@ class Tag < ApplicationRecord
|
||||
end
|
||||
end
|
||||
|
||||
# If this tag has aliases, find the shortest alias matching the given pattern.
|
||||
def tag_alias_for_word_pattern(query)
|
||||
query = Tag.parse_query(query)
|
||||
aliases = consequent_aliases.sort_by { |ca| [ca.antecedent_name.size, ca.antecedent_name] }
|
||||
|
||||
aliases.find do |tag_alias|
|
||||
name_matches = Tag.includes_all_words?(name, query)
|
||||
antecedent_matches = Tag.includes_all_words?(tag_alias.antecedent_name, query)
|
||||
|
||||
antecedent_matches && !name_matches
|
||||
end
|
||||
end
|
||||
|
||||
def is_aliased?
|
||||
aliased_tag.present?
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user