autocomplete: optimize various types of bogus input.

Optimize autocomplete to ignore various types of bogus input that will
never match anything. It turns out it's not uncommon for people to do
things like paste random URLs into autocomplete, or hold down keys, or
enter long strings of gibberish text (sometimes in other languages).
Some things, like autocorrect and slash abbreviations, become
pathologically slow when fed certain types of bad input.

Autocomplete will abort and return nothing in the following situations:

* Searching for URLs (tags that start with http:// or https://).
* Overly long tags (strings longer than the 170 char tag name limit).
* Slash abbreviations longer than 10 chars (e.g. typing `/qwoijqoiqogirqewgoi`).
* Slash abbreviations that aren't alphanumeric (e.g. typing `/////////`).
* Autocorrect input that contains too much punctuation and not enough actual letters.
This commit is contained in:
evazion
2021-01-11 02:11:28 -06:00
parent fc5db679e4
commit be1251b6be
4 changed files with 35 additions and 13 deletions

View File

@@ -65,6 +65,9 @@ class AutocompleteService
end
def autocomplete_tag(string)
return [] if string.size > TagNameValidator::MAX_TAG_LENGTH
return [] if string.start_with?("http://", "https://")
if !string.ascii_only?
results = tag_other_name_matches(string)
elsif string.starts_with?("/")
@@ -88,8 +91,6 @@ class AutocompleteService
end
def tag_matches(string)
return [] unless string.ascii_only?
name_matches = Tag.nonempty.name_matches(string).order(post_count: :desc).limit(limit)
alias_matches = Tag.nonempty.alias_matches(string).order(post_count: :desc).limit(limit)
union = "((#{name_matches.to_sql}) UNION (#{alias_matches.to_sql})) AS tags"
@@ -102,7 +103,9 @@ class AutocompleteService
end
end
def tag_abbreviation_matches(string)
def tag_abbreviation_matches(string, max_length: 10)
return [] if string.size > max_length
tags = Tag.nonempty.abbreviation_matches(string).order(post_count: :desc).limit(limit)
tags.map do |tag|
@@ -111,6 +114,9 @@ class AutocompleteService
end
def tag_autocorrect_matches(string)
# autocorrect uses trigram indexing, which needs at least 3 alphanumeric characters to work.
return [] if string.remove(/[^a-zA-Z0-9]/).size < 3
tags = Tag.nonempty.autocorrect_matches(string).limit(limit)
tags.map do |tag|

View File

@@ -1,9 +1,11 @@
class TagNameValidator < ActiveModel::EachValidator
MAX_TAG_LENGTH = 170
def validate_each(record, attribute, value)
value = Tag.normalize_name(value)
if value.size > 170
record.errors.add(attribute, "'#{value}' cannot be more than 255 characters long")
if value.size > MAX_TAG_LENGTH
record.errors.add(attribute, "'#{value}' cannot be more than #{MAX_TAG_LENGTH} characters long")
end
case value
@@ -30,6 +32,7 @@ class TagNameValidator < ActiveModel::EachValidator
when "new", "search"
record.errors.add(attribute, "'#{value}' is a reserved name and cannot be used")
when /\A(.+)_\(cosplay\)\z/i
# XXX don't allow aliases here?
tag_name = TagAlias.to_aliased([$1]).first
tag = Tag.find_by_name(tag_name)

View File

@@ -254,12 +254,13 @@ class Tag < ApplicationRecord
end
def abbreviation_matches(abbrev)
abbrev = abbrev.delete_prefix("/")
abbrev = abbrev.downcase.delete_prefix("/")
return none if abbrev !~ /\A[a-z0-9\*]*\z/
where("regexp_replace(tags.name, ?, '\\1', 'g') LIKE ?", ABBREVIATION_REGEXP.source, abbrev.to_escaped_for_sql_like)
end
def find_by_abbreviation(abbrev)
abbrev = abbrev.delete_prefix("/")
abbreviation_matches(abbrev.escape_wildcards).order(post_count: :desc).first
end

View File

@@ -87,15 +87,20 @@ class AutocompleteServiceTest < ActiveSupport::TestCase
create(:tag, name: "mole", post_count: 150)
create(:tag, name: "mole_under_eye", post_count: 100)
create(:tag, name: "mole_under_mouth", post_count: 50)
create(:tag, name: "x_x_x_x_x_x_x_x_x_x_x_x", post_count: 1)
assert_autocomplete_equals(%w[mole mole_under_eye mole_under_mouth], "/m", :tag_query)
assert_autocomplete_equals(%w[mole_under_eye mole_under_mouth], "/mu", :tag_query)
assert_autocomplete_equals(%w[mole_under_mouth], "/mum", :tag_query)
assert_autocomplete_equals(%w[mole_under_eye], "/mue", :tag_query)
assert_autocomplete_equals(%w[mole_under_eye], "/*ue", :tag_query)
assert_autocomplete_equals(%w[mole_under_eye], "/MUE", :tag_query)
assert_autocomplete_includes("mole_under_eye", "-/mue", :tag_query)
assert_autocomplete_includes("mole_under_eye", "~/mue", :tag_query)
assert_autocomplete_equals([], "/xxxxxxxxxx", :tag_query)
assert_autocomplete_equals([], "/_", :tag_query)
end
should "list aliases before abbreviations" do
@@ -117,19 +122,19 @@ class AutocompleteServiceTest < ActiveSupport::TestCase
assert_autocomplete_equals(["touhou"], "", :tag_query)
assert_autocomplete_equals(["touhou"], "", :tag_query)
assert_autocomplete_equals(["touhou"], "*東*", :tag_query)
assert_autocomplete_equals(["touhou"], "東*", :tag_query)
assert_autocomplete_equals([], "*東*", :tag_query)
assert_autocomplete_equals([], "東*", :tag_query)
assert_autocomplete_equals([], "*東", :tag_query)
assert_autocomplete_equals(["touhou"], "*方*", :tag_query)
assert_autocomplete_equals(["touhou"], "*方", :tag_query)
assert_autocomplete_equals([], "*方*", :tag_query)
assert_autocomplete_equals([], "*方", :tag_query)
assert_autocomplete_equals([], "", :tag_query)
assert_autocomplete_equals(["bkub"], "*大*", :tag_query)
assert_autocomplete_equals([], "*大*", :tag_query)
assert_autocomplete_equals(["bkub"], "", :tag_query)
assert_autocomplete_equals([], "*大", :tag_query)
assert_autocomplete_equals(["bkub"], "*川*", :tag_query)
assert_autocomplete_equals([], "*川*", :tag_query)
assert_autocomplete_equals([], "*川", :tag_query)
assert_autocomplete_equals([], "", :tag_query)
end
@@ -148,6 +153,8 @@ class AutocompleteServiceTest < ActiveSupport::TestCase
create(:tag, name: "touhou")
assert_autocomplete_equals(%w[touhou], "touhuo", :tag_query)
assert_autocomplete_equals(%w[], ".....", :tag_query)
assert_autocomplete_equals(%w[], "t___", :tag_query)
end
should "ignore unsupported metatags" do
@@ -193,6 +200,11 @@ class AutocompleteServiceTest < ActiveSupport::TestCase
assert_autocomplete_equals(["order:score", "order:score_asc"], "order:sco", :tag_query)
end
should "ignore bogus tags" do
assert_autocomplete_equals([], "x"*200, :tag_query)
assert_autocomplete_equals([], "http://www.google.com", :tag_query)
end
end
end
end