autocomplete: optimize various types of bogus input.
Optimize autocomplete to ignore various types of bogus input that will never match anything. It turns out it's not uncommon for people to do things like paste random URLs into autocomplete, or hold down keys, or enter long strings of gibberish text (sometimes in other languages). Some things, like autocorrect and slash abbreviations, become pathologically slow when fed certain types of bad input. Autocomplete will abort and return nothing in the following situations: * Searching for URLs (tags that start with http:// or https://). * Overly long tags (strings longer than the 170 char tag name limit). * Slash abbreviations longer than 10 chars (e.g. typing `/qwoijqoiqogirqewgoi`). * Slash abbreviations that aren't alphanumeric (e.g. typing `/////////`). * Autocorrect input that contains too much punctuation and not enough actual letters.
This commit is contained in:
@@ -65,6 +65,9 @@ class AutocompleteService
|
||||
end
|
||||
|
||||
def autocomplete_tag(string)
|
||||
return [] if string.size > TagNameValidator::MAX_TAG_LENGTH
|
||||
return [] if string.start_with?("http://", "https://")
|
||||
|
||||
if !string.ascii_only?
|
||||
results = tag_other_name_matches(string)
|
||||
elsif string.starts_with?("/")
|
||||
@@ -88,8 +91,6 @@ class AutocompleteService
|
||||
end
|
||||
|
||||
def tag_matches(string)
|
||||
return [] unless string.ascii_only?
|
||||
|
||||
name_matches = Tag.nonempty.name_matches(string).order(post_count: :desc).limit(limit)
|
||||
alias_matches = Tag.nonempty.alias_matches(string).order(post_count: :desc).limit(limit)
|
||||
union = "((#{name_matches.to_sql}) UNION (#{alias_matches.to_sql})) AS tags"
|
||||
@@ -102,7 +103,9 @@ class AutocompleteService
|
||||
end
|
||||
end
|
||||
|
||||
def tag_abbreviation_matches(string)
|
||||
def tag_abbreviation_matches(string, max_length: 10)
|
||||
return [] if string.size > max_length
|
||||
|
||||
tags = Tag.nonempty.abbreviation_matches(string).order(post_count: :desc).limit(limit)
|
||||
|
||||
tags.map do |tag|
|
||||
@@ -111,6 +114,9 @@ class AutocompleteService
|
||||
end
|
||||
|
||||
def tag_autocorrect_matches(string)
|
||||
# autocorrect uses trigram indexing, which needs at least 3 alphanumeric characters to work.
|
||||
return [] if string.remove(/[^a-zA-Z0-9]/).size < 3
|
||||
|
||||
tags = Tag.nonempty.autocorrect_matches(string).limit(limit)
|
||||
|
||||
tags.map do |tag|
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
class TagNameValidator < ActiveModel::EachValidator
|
||||
MAX_TAG_LENGTH = 170
|
||||
|
||||
def validate_each(record, attribute, value)
|
||||
value = Tag.normalize_name(value)
|
||||
|
||||
if value.size > 170
|
||||
record.errors.add(attribute, "'#{value}' cannot be more than 255 characters long")
|
||||
if value.size > MAX_TAG_LENGTH
|
||||
record.errors.add(attribute, "'#{value}' cannot be more than #{MAX_TAG_LENGTH} characters long")
|
||||
end
|
||||
|
||||
case value
|
||||
@@ -30,6 +32,7 @@ class TagNameValidator < ActiveModel::EachValidator
|
||||
when "new", "search"
|
||||
record.errors.add(attribute, "'#{value}' is a reserved name and cannot be used")
|
||||
when /\A(.+)_\(cosplay\)\z/i
|
||||
# XXX don't allow aliases here?
|
||||
tag_name = TagAlias.to_aliased([$1]).first
|
||||
tag = Tag.find_by_name(tag_name)
|
||||
|
||||
|
||||
@@ -254,12 +254,13 @@ class Tag < ApplicationRecord
|
||||
end
|
||||
|
||||
def abbreviation_matches(abbrev)
|
||||
abbrev = abbrev.delete_prefix("/")
|
||||
abbrev = abbrev.downcase.delete_prefix("/")
|
||||
return none if abbrev !~ /\A[a-z0-9\*]*\z/
|
||||
|
||||
where("regexp_replace(tags.name, ?, '\\1', 'g') LIKE ?", ABBREVIATION_REGEXP.source, abbrev.to_escaped_for_sql_like)
|
||||
end
|
||||
|
||||
def find_by_abbreviation(abbrev)
|
||||
abbrev = abbrev.delete_prefix("/")
|
||||
abbreviation_matches(abbrev.escape_wildcards).order(post_count: :desc).first
|
||||
end
|
||||
|
||||
|
||||
@@ -87,15 +87,20 @@ class AutocompleteServiceTest < ActiveSupport::TestCase
|
||||
create(:tag, name: "mole", post_count: 150)
|
||||
create(:tag, name: "mole_under_eye", post_count: 100)
|
||||
create(:tag, name: "mole_under_mouth", post_count: 50)
|
||||
create(:tag, name: "x_x_x_x_x_x_x_x_x_x_x_x", post_count: 1)
|
||||
|
||||
assert_autocomplete_equals(%w[mole mole_under_eye mole_under_mouth], "/m", :tag_query)
|
||||
assert_autocomplete_equals(%w[mole_under_eye mole_under_mouth], "/mu", :tag_query)
|
||||
assert_autocomplete_equals(%w[mole_under_mouth], "/mum", :tag_query)
|
||||
assert_autocomplete_equals(%w[mole_under_eye], "/mue", :tag_query)
|
||||
assert_autocomplete_equals(%w[mole_under_eye], "/*ue", :tag_query)
|
||||
assert_autocomplete_equals(%w[mole_under_eye], "/MUE", :tag_query)
|
||||
|
||||
assert_autocomplete_includes("mole_under_eye", "-/mue", :tag_query)
|
||||
assert_autocomplete_includes("mole_under_eye", "~/mue", :tag_query)
|
||||
|
||||
assert_autocomplete_equals([], "/xxxxxxxxxx", :tag_query)
|
||||
assert_autocomplete_equals([], "/_", :tag_query)
|
||||
end
|
||||
|
||||
should "list aliases before abbreviations" do
|
||||
@@ -117,19 +122,19 @@ class AutocompleteServiceTest < ActiveSupport::TestCase
|
||||
assert_autocomplete_equals(["touhou"], "东", :tag_query)
|
||||
assert_autocomplete_equals(["touhou"], "동", :tag_query)
|
||||
|
||||
assert_autocomplete_equals(["touhou"], "*東*", :tag_query)
|
||||
assert_autocomplete_equals(["touhou"], "東*", :tag_query)
|
||||
assert_autocomplete_equals([], "*東*", :tag_query)
|
||||
assert_autocomplete_equals([], "東*", :tag_query)
|
||||
assert_autocomplete_equals([], "*東", :tag_query)
|
||||
|
||||
assert_autocomplete_equals(["touhou"], "*方*", :tag_query)
|
||||
assert_autocomplete_equals(["touhou"], "*方", :tag_query)
|
||||
assert_autocomplete_equals([], "*方*", :tag_query)
|
||||
assert_autocomplete_equals([], "*方", :tag_query)
|
||||
assert_autocomplete_equals([], "方", :tag_query)
|
||||
|
||||
assert_autocomplete_equals(["bkub"], "*大*", :tag_query)
|
||||
assert_autocomplete_equals([], "*大*", :tag_query)
|
||||
assert_autocomplete_equals(["bkub"], "大", :tag_query)
|
||||
assert_autocomplete_equals([], "*大", :tag_query)
|
||||
|
||||
assert_autocomplete_equals(["bkub"], "*川*", :tag_query)
|
||||
assert_autocomplete_equals([], "*川*", :tag_query)
|
||||
assert_autocomplete_equals([], "*川", :tag_query)
|
||||
assert_autocomplete_equals([], "川", :tag_query)
|
||||
end
|
||||
@@ -148,6 +153,8 @@ class AutocompleteServiceTest < ActiveSupport::TestCase
|
||||
create(:tag, name: "touhou")
|
||||
|
||||
assert_autocomplete_equals(%w[touhou], "touhuo", :tag_query)
|
||||
assert_autocomplete_equals(%w[], ".....", :tag_query)
|
||||
assert_autocomplete_equals(%w[], "t___", :tag_query)
|
||||
end
|
||||
|
||||
should "ignore unsupported metatags" do
|
||||
@@ -193,6 +200,11 @@ class AutocompleteServiceTest < ActiveSupport::TestCase
|
||||
|
||||
assert_autocomplete_equals(["order:score", "order:score_asc"], "order:sco", :tag_query)
|
||||
end
|
||||
|
||||
should "ignore bogus tags" do
|
||||
assert_autocomplete_equals([], "x"*200, :tag_query)
|
||||
assert_autocomplete_equals([], "http://www.google.com", :tag_query)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user