From be1251b6be01b6bc857523cfc4cb7d5cf16b79a6 Mon Sep 17 00:00:00 2001 From: evazion Date: Mon, 11 Jan 2021 02:11:28 -0600 Subject: [PATCH] autocomplete: optimize various types of bogus input. Optimize autocomplete to ignore various types of bogus input that will never match anything. It turns out it's not uncommon for people to do things like paste random URLs into autocomplete, or hold down keys, or enter long strings of gibberish text (sometimes in other languages). Some things, like autocorrect and slash abbreviations, become pathologically slow when fed certain types of bad input. Autocomplete will abort and return nothing in the following situations: * Searching for URLs (tags that start with http:// or https://). * Overly long tags (strings longer than the 170 char tag name limit). * Slash abbreviations longer than 10 chars (e.g. typing `/qwoijqoiqogirqewgoi`). * Slash abbreviations that aren't alphanumeric (e.g. typing `/////////`). * Autocorrect input that contains too much punctuation and not enough actual letters. --- app/logical/autocomplete_service.rb | 12 +++++++++--- app/logical/tag_name_validator.rb | 7 +++++-- app/models/tag.rb | 5 +++-- test/unit/autocomplete_service_test.rb | 24 ++++++++++++++++++------ 4 files changed, 35 insertions(+), 13 deletions(-) diff --git a/app/logical/autocomplete_service.rb b/app/logical/autocomplete_service.rb index fbc91d729..d9e0a5b30 100644 --- a/app/logical/autocomplete_service.rb +++ b/app/logical/autocomplete_service.rb @@ -65,6 +65,9 @@ class AutocompleteService end def autocomplete_tag(string) + return [] if string.size > TagNameValidator::MAX_TAG_LENGTH + return [] if string.start_with?("http://", "https://") + if !string.ascii_only? results = tag_other_name_matches(string) elsif string.starts_with?("/") @@ -88,8 +91,6 @@ class AutocompleteService end def tag_matches(string) - return [] unless string.ascii_only? - name_matches = Tag.nonempty.name_matches(string).order(post_count: :desc).limit(limit) alias_matches = Tag.nonempty.alias_matches(string).order(post_count: :desc).limit(limit) union = "((#{name_matches.to_sql}) UNION (#{alias_matches.to_sql})) AS tags" @@ -102,7 +103,9 @@ class AutocompleteService end end - def tag_abbreviation_matches(string) + def tag_abbreviation_matches(string, max_length: 10) + return [] if string.size > max_length + tags = Tag.nonempty.abbreviation_matches(string).order(post_count: :desc).limit(limit) tags.map do |tag| @@ -111,6 +114,9 @@ class AutocompleteService end def tag_autocorrect_matches(string) + # autocorrect uses trigram indexing, which needs at least 3 alphanumeric characters to work. + return [] if string.remove(/[^a-zA-Z0-9]/).size < 3 + tags = Tag.nonempty.autocorrect_matches(string).limit(limit) tags.map do |tag| diff --git a/app/logical/tag_name_validator.rb b/app/logical/tag_name_validator.rb index 029a8c5c0..1a48c53c9 100644 --- a/app/logical/tag_name_validator.rb +++ b/app/logical/tag_name_validator.rb @@ -1,9 +1,11 @@ class TagNameValidator < ActiveModel::EachValidator + MAX_TAG_LENGTH = 170 + def validate_each(record, attribute, value) value = Tag.normalize_name(value) - if value.size > 170 - record.errors.add(attribute, "'#{value}' cannot be more than 255 characters long") + if value.size > MAX_TAG_LENGTH + record.errors.add(attribute, "'#{value}' cannot be more than #{MAX_TAG_LENGTH} characters long") end case value @@ -30,6 +32,7 @@ class TagNameValidator < ActiveModel::EachValidator when "new", "search" record.errors.add(attribute, "'#{value}' is a reserved name and cannot be used") when /\A(.+)_\(cosplay\)\z/i + # XXX don't allow aliases here? tag_name = TagAlias.to_aliased([$1]).first tag = Tag.find_by_name(tag_name) diff --git a/app/models/tag.rb b/app/models/tag.rb index 6106b774e..25ab4e528 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -254,12 +254,13 @@ class Tag < ApplicationRecord end def abbreviation_matches(abbrev) - abbrev = abbrev.delete_prefix("/") + abbrev = abbrev.downcase.delete_prefix("/") + return none if abbrev !~ /\A[a-z0-9\*]*\z/ + where("regexp_replace(tags.name, ?, '\\1', 'g') LIKE ?", ABBREVIATION_REGEXP.source, abbrev.to_escaped_for_sql_like) end def find_by_abbreviation(abbrev) - abbrev = abbrev.delete_prefix("/") abbreviation_matches(abbrev.escape_wildcards).order(post_count: :desc).first end diff --git a/test/unit/autocomplete_service_test.rb b/test/unit/autocomplete_service_test.rb index d57ef52ce..9686f07df 100644 --- a/test/unit/autocomplete_service_test.rb +++ b/test/unit/autocomplete_service_test.rb @@ -87,15 +87,20 @@ class AutocompleteServiceTest < ActiveSupport::TestCase create(:tag, name: "mole", post_count: 150) create(:tag, name: "mole_under_eye", post_count: 100) create(:tag, name: "mole_under_mouth", post_count: 50) + create(:tag, name: "x_x_x_x_x_x_x_x_x_x_x_x", post_count: 1) assert_autocomplete_equals(%w[mole mole_under_eye mole_under_mouth], "/m", :tag_query) assert_autocomplete_equals(%w[mole_under_eye mole_under_mouth], "/mu", :tag_query) assert_autocomplete_equals(%w[mole_under_mouth], "/mum", :tag_query) assert_autocomplete_equals(%w[mole_under_eye], "/mue", :tag_query) assert_autocomplete_equals(%w[mole_under_eye], "/*ue", :tag_query) + assert_autocomplete_equals(%w[mole_under_eye], "/MUE", :tag_query) assert_autocomplete_includes("mole_under_eye", "-/mue", :tag_query) assert_autocomplete_includes("mole_under_eye", "~/mue", :tag_query) + + assert_autocomplete_equals([], "/xxxxxxxxxx", :tag_query) + assert_autocomplete_equals([], "/_", :tag_query) end should "list aliases before abbreviations" do @@ -117,19 +122,19 @@ class AutocompleteServiceTest < ActiveSupport::TestCase assert_autocomplete_equals(["touhou"], "东", :tag_query) assert_autocomplete_equals(["touhou"], "동", :tag_query) - assert_autocomplete_equals(["touhou"], "*東*", :tag_query) - assert_autocomplete_equals(["touhou"], "東*", :tag_query) + assert_autocomplete_equals([], "*東*", :tag_query) + assert_autocomplete_equals([], "東*", :tag_query) assert_autocomplete_equals([], "*東", :tag_query) - assert_autocomplete_equals(["touhou"], "*方*", :tag_query) - assert_autocomplete_equals(["touhou"], "*方", :tag_query) + assert_autocomplete_equals([], "*方*", :tag_query) + assert_autocomplete_equals([], "*方", :tag_query) assert_autocomplete_equals([], "方", :tag_query) - assert_autocomplete_equals(["bkub"], "*大*", :tag_query) + assert_autocomplete_equals([], "*大*", :tag_query) assert_autocomplete_equals(["bkub"], "大", :tag_query) assert_autocomplete_equals([], "*大", :tag_query) - assert_autocomplete_equals(["bkub"], "*川*", :tag_query) + assert_autocomplete_equals([], "*川*", :tag_query) assert_autocomplete_equals([], "*川", :tag_query) assert_autocomplete_equals([], "川", :tag_query) end @@ -148,6 +153,8 @@ class AutocompleteServiceTest < ActiveSupport::TestCase create(:tag, name: "touhou") assert_autocomplete_equals(%w[touhou], "touhuo", :tag_query) + assert_autocomplete_equals(%w[], ".....", :tag_query) + assert_autocomplete_equals(%w[], "t___", :tag_query) end should "ignore unsupported metatags" do @@ -193,6 +200,11 @@ class AutocompleteServiceTest < ActiveSupport::TestCase assert_autocomplete_equals(["order:score", "order:score_asc"], "order:sco", :tag_query) end + + should "ignore bogus tags" do + assert_autocomplete_equals([], "x"*200, :tag_query) + assert_autocomplete_equals([], "http://www.google.com", :tag_query) + end end end end