autocomplete: optimize various types of bogus input.

Optimize autocomplete to ignore various types of bogus input that will
never match anything. It turns out it's not uncommon for people to do
things like paste random URLs into autocomplete, or hold down keys, or
enter long strings of gibberish text (sometimes in other languages).
Some things, like autocorrect and slash abbreviations, become
pathologically slow when fed certain types of bad input.

Autocomplete will abort and return nothing in the following situations:

* Searching for URLs (tags that start with http:// or https://).
* Overly long tags (strings longer than the 170 char tag name limit).
* Slash abbreviations longer than 10 chars (e.g. typing `/qwoijqoiqogirqewgoi`).
* Slash abbreviations that aren't alphanumeric (e.g. typing `/////////`).
* Autocorrect input that contains too much punctuation and not enough actual letters.
This commit is contained in:
evazion
2021-01-11 02:11:28 -06:00
parent fc5db679e4
commit be1251b6be
4 changed files with 35 additions and 13 deletions

View File

@@ -65,6 +65,9 @@ class AutocompleteService
end end
def autocomplete_tag(string) def autocomplete_tag(string)
return [] if string.size > TagNameValidator::MAX_TAG_LENGTH
return [] if string.start_with?("http://", "https://")
if !string.ascii_only? if !string.ascii_only?
results = tag_other_name_matches(string) results = tag_other_name_matches(string)
elsif string.starts_with?("/") elsif string.starts_with?("/")
@@ -88,8 +91,6 @@ class AutocompleteService
end end
def tag_matches(string) def tag_matches(string)
return [] unless string.ascii_only?
name_matches = Tag.nonempty.name_matches(string).order(post_count: :desc).limit(limit) name_matches = Tag.nonempty.name_matches(string).order(post_count: :desc).limit(limit)
alias_matches = Tag.nonempty.alias_matches(string).order(post_count: :desc).limit(limit) alias_matches = Tag.nonempty.alias_matches(string).order(post_count: :desc).limit(limit)
union = "((#{name_matches.to_sql}) UNION (#{alias_matches.to_sql})) AS tags" union = "((#{name_matches.to_sql}) UNION (#{alias_matches.to_sql})) AS tags"
@@ -102,7 +103,9 @@ class AutocompleteService
end end
end end
def tag_abbreviation_matches(string) def tag_abbreviation_matches(string, max_length: 10)
return [] if string.size > max_length
tags = Tag.nonempty.abbreviation_matches(string).order(post_count: :desc).limit(limit) tags = Tag.nonempty.abbreviation_matches(string).order(post_count: :desc).limit(limit)
tags.map do |tag| tags.map do |tag|
@@ -111,6 +114,9 @@ class AutocompleteService
end end
def tag_autocorrect_matches(string) def tag_autocorrect_matches(string)
# autocorrect uses trigram indexing, which needs at least 3 alphanumeric characters to work.
return [] if string.remove(/[^a-zA-Z0-9]/).size < 3
tags = Tag.nonempty.autocorrect_matches(string).limit(limit) tags = Tag.nonempty.autocorrect_matches(string).limit(limit)
tags.map do |tag| tags.map do |tag|

View File

@@ -1,9 +1,11 @@
class TagNameValidator < ActiveModel::EachValidator class TagNameValidator < ActiveModel::EachValidator
MAX_TAG_LENGTH = 170
def validate_each(record, attribute, value) def validate_each(record, attribute, value)
value = Tag.normalize_name(value) value = Tag.normalize_name(value)
if value.size > 170 if value.size > MAX_TAG_LENGTH
record.errors.add(attribute, "'#{value}' cannot be more than 255 characters long") record.errors.add(attribute, "'#{value}' cannot be more than #{MAX_TAG_LENGTH} characters long")
end end
case value case value
@@ -30,6 +32,7 @@ class TagNameValidator < ActiveModel::EachValidator
when "new", "search" when "new", "search"
record.errors.add(attribute, "'#{value}' is a reserved name and cannot be used") record.errors.add(attribute, "'#{value}' is a reserved name and cannot be used")
when /\A(.+)_\(cosplay\)\z/i when /\A(.+)_\(cosplay\)\z/i
# XXX don't allow aliases here?
tag_name = TagAlias.to_aliased([$1]).first tag_name = TagAlias.to_aliased([$1]).first
tag = Tag.find_by_name(tag_name) tag = Tag.find_by_name(tag_name)

View File

@@ -254,12 +254,13 @@ class Tag < ApplicationRecord
end end
def abbreviation_matches(abbrev) def abbreviation_matches(abbrev)
abbrev = abbrev.delete_prefix("/") abbrev = abbrev.downcase.delete_prefix("/")
return none if abbrev !~ /\A[a-z0-9\*]*\z/
where("regexp_replace(tags.name, ?, '\\1', 'g') LIKE ?", ABBREVIATION_REGEXP.source, abbrev.to_escaped_for_sql_like) where("regexp_replace(tags.name, ?, '\\1', 'g') LIKE ?", ABBREVIATION_REGEXP.source, abbrev.to_escaped_for_sql_like)
end end
def find_by_abbreviation(abbrev) def find_by_abbreviation(abbrev)
abbrev = abbrev.delete_prefix("/")
abbreviation_matches(abbrev.escape_wildcards).order(post_count: :desc).first abbreviation_matches(abbrev.escape_wildcards).order(post_count: :desc).first
end end

View File

@@ -87,15 +87,20 @@ class AutocompleteServiceTest < ActiveSupport::TestCase
create(:tag, name: "mole", post_count: 150) create(:tag, name: "mole", post_count: 150)
create(:tag, name: "mole_under_eye", post_count: 100) create(:tag, name: "mole_under_eye", post_count: 100)
create(:tag, name: "mole_under_mouth", post_count: 50) create(:tag, name: "mole_under_mouth", post_count: 50)
create(:tag, name: "x_x_x_x_x_x_x_x_x_x_x_x", post_count: 1)
assert_autocomplete_equals(%w[mole mole_under_eye mole_under_mouth], "/m", :tag_query) assert_autocomplete_equals(%w[mole mole_under_eye mole_under_mouth], "/m", :tag_query)
assert_autocomplete_equals(%w[mole_under_eye mole_under_mouth], "/mu", :tag_query) assert_autocomplete_equals(%w[mole_under_eye mole_under_mouth], "/mu", :tag_query)
assert_autocomplete_equals(%w[mole_under_mouth], "/mum", :tag_query) assert_autocomplete_equals(%w[mole_under_mouth], "/mum", :tag_query)
assert_autocomplete_equals(%w[mole_under_eye], "/mue", :tag_query) assert_autocomplete_equals(%w[mole_under_eye], "/mue", :tag_query)
assert_autocomplete_equals(%w[mole_under_eye], "/*ue", :tag_query) assert_autocomplete_equals(%w[mole_under_eye], "/*ue", :tag_query)
assert_autocomplete_equals(%w[mole_under_eye], "/MUE", :tag_query)
assert_autocomplete_includes("mole_under_eye", "-/mue", :tag_query) assert_autocomplete_includes("mole_under_eye", "-/mue", :tag_query)
assert_autocomplete_includes("mole_under_eye", "~/mue", :tag_query) assert_autocomplete_includes("mole_under_eye", "~/mue", :tag_query)
assert_autocomplete_equals([], "/xxxxxxxxxx", :tag_query)
assert_autocomplete_equals([], "/_", :tag_query)
end end
should "list aliases before abbreviations" do should "list aliases before abbreviations" do
@@ -117,19 +122,19 @@ class AutocompleteServiceTest < ActiveSupport::TestCase
assert_autocomplete_equals(["touhou"], "", :tag_query) assert_autocomplete_equals(["touhou"], "", :tag_query)
assert_autocomplete_equals(["touhou"], "", :tag_query) assert_autocomplete_equals(["touhou"], "", :tag_query)
assert_autocomplete_equals(["touhou"], "*東*", :tag_query) assert_autocomplete_equals([], "*東*", :tag_query)
assert_autocomplete_equals(["touhou"], "東*", :tag_query) assert_autocomplete_equals([], "東*", :tag_query)
assert_autocomplete_equals([], "*東", :tag_query) assert_autocomplete_equals([], "*東", :tag_query)
assert_autocomplete_equals(["touhou"], "*方*", :tag_query) assert_autocomplete_equals([], "*方*", :tag_query)
assert_autocomplete_equals(["touhou"], "*方", :tag_query) assert_autocomplete_equals([], "*方", :tag_query)
assert_autocomplete_equals([], "", :tag_query) assert_autocomplete_equals([], "", :tag_query)
assert_autocomplete_equals(["bkub"], "*大*", :tag_query) assert_autocomplete_equals([], "*大*", :tag_query)
assert_autocomplete_equals(["bkub"], "", :tag_query) assert_autocomplete_equals(["bkub"], "", :tag_query)
assert_autocomplete_equals([], "*大", :tag_query) assert_autocomplete_equals([], "*大", :tag_query)
assert_autocomplete_equals(["bkub"], "*川*", :tag_query) assert_autocomplete_equals([], "*川*", :tag_query)
assert_autocomplete_equals([], "*川", :tag_query) assert_autocomplete_equals([], "*川", :tag_query)
assert_autocomplete_equals([], "", :tag_query) assert_autocomplete_equals([], "", :tag_query)
end end
@@ -148,6 +153,8 @@ class AutocompleteServiceTest < ActiveSupport::TestCase
create(:tag, name: "touhou") create(:tag, name: "touhou")
assert_autocomplete_equals(%w[touhou], "touhuo", :tag_query) assert_autocomplete_equals(%w[touhou], "touhuo", :tag_query)
assert_autocomplete_equals(%w[], ".....", :tag_query)
assert_autocomplete_equals(%w[], "t___", :tag_query)
end end
should "ignore unsupported metatags" do should "ignore unsupported metatags" do
@@ -193,6 +200,11 @@ class AutocompleteServiceTest < ActiveSupport::TestCase
assert_autocomplete_equals(["order:score", "order:score_asc"], "order:sco", :tag_query) assert_autocomplete_equals(["order:score", "order:score_asc"], "order:sco", :tag_query)
end end
should "ignore bogus tags" do
assert_autocomplete_equals([], "x"*200, :tag_query)
assert_autocomplete_equals([], "http://www.google.com", :tag_query)
end
end end
end end
end end