autocomplete: optimize searching by artist/wiki page other names.

Optimize searches for non-English phrases in autocomplete. These
searches were pretty slow, and could sometimes cause sitewide lag spikes
when users typed long strings of non-English text into the search box
and caused an unintentional DoS.

The trick is to use an `array_to_tsvector(other_names) USING gin` index
on other_names. This supports fast string prefix matching against all
elements of the array. The downside is that it doesn't allow infix or
suffix matches, so we can't support wildcards in general. Wildcards
didn't quite work anyway, since artist and wiki other names can contain
literal '*' characters.
This commit is contained in:
evazion
2021-01-10 03:35:12 -06:00
parent d18dc573fb
commit fc5db679e4
4 changed files with 35 additions and 13 deletions

View File

@@ -65,7 +65,9 @@ class AutocompleteService
end
def autocomplete_tag(string)
if string.starts_with?("/")
if !string.ascii_only?
results = tag_other_name_matches(string)
elsif string.starts_with?("/")
string = string + "*" unless string.include?("*")
results = tag_matches(string)
@@ -77,11 +79,8 @@ class AutocompleteService
results = results.uniq { |r| r[:value] }.take(limit)
elsif string.include?("*")
results = tag_matches(string)
results = tag_other_name_matches(string) if results.blank?
else
string += "*"
results = tag_matches(string)
results = tag_other_name_matches(string) if results.blank?
results = tag_matches(string + "*")
results = tag_autocorrect_matches(string) if results.blank?
end
@@ -89,7 +88,7 @@ class AutocompleteService
end
def tag_matches(string)
return [] if string =~ /[^[:ascii:]]/
return [] unless string.ascii_only?
name_matches = Tag.nonempty.name_matches(string).order(post_count: :desc).limit(limit)
alias_matches = Tag.nonempty.alias_matches(string).order(post_count: :desc).limit(limit)
@@ -112,7 +111,6 @@ class AutocompleteService
end
def tag_autocorrect_matches(string)
string = string.delete("*")
tags = Tag.nonempty.autocorrect_matches(string).limit(limit)
tags.map do |tag|
@@ -121,16 +119,14 @@ class AutocompleteService
end
def tag_other_name_matches(string)
return [] unless string =~ /[^[:ascii:]]/
artists = Artist.undeleted.any_other_name_like(string)
wikis = WikiPage.undeleted.other_names_match(string)
artists = Artist.undeleted.where_any_in_array_starts_with(:other_names, string)
wikis = WikiPage.undeleted.where_any_in_array_starts_with(:other_names, string)
tags = Tag.where(name: wikis.select(:title)).or(Tag.where(name: artists.select(:name)))
tags = tags.nonempty.order(post_count: :desc).limit(limit).includes(:wiki_page, :artist)
tags.map do |tag|
other_names = tag.artist&.other_names.to_a + tag.wiki_page&.other_names.to_a
antecedent = other_names.find { |other_name| other_name.ilike?(string) }
antecedent = other_names.find { |other_name| other_name.ilike?(string + "*") }
{ type: "tag-other-name", label: tag.pretty_name, value: tag.name, category: tag.category, post_count: tag.post_count, antecedent: antecedent }
end
end

View File

@@ -97,6 +97,11 @@ module Searchable
where("? ~<< ANY(#{qualified_column_for(attr)})", "(?#{flags})#{regex}")
end
# The column should have a `array_to_tsvector(column) using gin` index for best performance.
def where_any_in_array_starts_with(attr, value)
where("array_to_tsvector(#{qualified_column_for(attr)}) @@ ?", value.to_escaped_for_tsquery + ":*")
end
def where_text_includes_lower(attr, values)
where("lower(#{qualified_column_for(attr)}) IN (?)", values.map(&:downcase))
end

View File

@@ -0,0 +1,6 @@
class AddArrayToTsvectorIndexOnWikiPagesAndArtists < ActiveRecord::Migration[6.1]
def change
add_index :wiki_pages, "array_to_tsvector(other_names)", using: :gin
add_index :artists, "array_to_tsvector(other_names)", using: :gin
end
end

View File

@@ -4898,6 +4898,13 @@ CREATE INDEX index_artist_versions_on_updater_id ON public.artist_versions USING
CREATE INDEX index_artist_versions_on_updater_ip_addr ON public.artist_versions USING btree (updater_ip_addr);
--
-- Name: index_artists_on_array_to_tsvector_other_names; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX index_artists_on_array_to_tsvector_other_names ON public.artists USING gin (array_to_tsvector(other_names));
--
-- Name: index_artists_on_group_name; Type: INDEX; Schema: public; Owner: -
--
@@ -7535,6 +7542,13 @@ CREATE INDEX index_wiki_page_versions_on_updater_ip_addr ON public.wiki_page_ver
CREATE INDEX index_wiki_page_versions_on_wiki_page_id ON public.wiki_page_versions USING btree (wiki_page_id);
--
-- Name: index_wiki_pages_on_array_to_tsvector_other_names; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX index_wiki_pages_on_array_to_tsvector_other_names ON public.wiki_pages USING gin (array_to_tsvector(other_names));
--
-- Name: index_wiki_pages_on_body_index_index; Type: INDEX; Schema: public; Owner: -
--
@@ -7870,6 +7884,7 @@ INSERT INTO "schema_migrations" (version) VALUES
('20210108030722'),
('20210108030723'),
('20210108030724'),
('20210110015410');
('20210110015410'),
('20210110090656');