autocomplete: optimize searching by artist/wiki page other names.
Optimize searches for non-English phrases in autocomplete. These searches were pretty slow, and could sometimes cause sitewide lag spikes when users typed long strings of non-English text into the search box and caused an unintentional DoS. The trick is to use an `array_to_tsvector(other_names) USING gin` index on other_names. This supports fast string prefix matching against all elements of the array. The downside is that it doesn't allow infix or suffix matches, so we can't support wildcards in general. Wildcards didn't quite work anyway, since artist and wiki other names can contain literal '*' characters.
This commit is contained in:
@@ -65,7 +65,9 @@ class AutocompleteService
|
||||
end
|
||||
|
||||
def autocomplete_tag(string)
|
||||
if string.starts_with?("/")
|
||||
if !string.ascii_only?
|
||||
results = tag_other_name_matches(string)
|
||||
elsif string.starts_with?("/")
|
||||
string = string + "*" unless string.include?("*")
|
||||
|
||||
results = tag_matches(string)
|
||||
@@ -77,11 +79,8 @@ class AutocompleteService
|
||||
results = results.uniq { |r| r[:value] }.take(limit)
|
||||
elsif string.include?("*")
|
||||
results = tag_matches(string)
|
||||
results = tag_other_name_matches(string) if results.blank?
|
||||
else
|
||||
string += "*"
|
||||
results = tag_matches(string)
|
||||
results = tag_other_name_matches(string) if results.blank?
|
||||
results = tag_matches(string + "*")
|
||||
results = tag_autocorrect_matches(string) if results.blank?
|
||||
end
|
||||
|
||||
@@ -89,7 +88,7 @@ class AutocompleteService
|
||||
end
|
||||
|
||||
def tag_matches(string)
|
||||
return [] if string =~ /[^[:ascii:]]/
|
||||
return [] unless string.ascii_only?
|
||||
|
||||
name_matches = Tag.nonempty.name_matches(string).order(post_count: :desc).limit(limit)
|
||||
alias_matches = Tag.nonempty.alias_matches(string).order(post_count: :desc).limit(limit)
|
||||
@@ -112,7 +111,6 @@ class AutocompleteService
|
||||
end
|
||||
|
||||
def tag_autocorrect_matches(string)
|
||||
string = string.delete("*")
|
||||
tags = Tag.nonempty.autocorrect_matches(string).limit(limit)
|
||||
|
||||
tags.map do |tag|
|
||||
@@ -121,16 +119,14 @@ class AutocompleteService
|
||||
end
|
||||
|
||||
def tag_other_name_matches(string)
|
||||
return [] unless string =~ /[^[:ascii:]]/
|
||||
|
||||
artists = Artist.undeleted.any_other_name_like(string)
|
||||
wikis = WikiPage.undeleted.other_names_match(string)
|
||||
artists = Artist.undeleted.where_any_in_array_starts_with(:other_names, string)
|
||||
wikis = WikiPage.undeleted.where_any_in_array_starts_with(:other_names, string)
|
||||
tags = Tag.where(name: wikis.select(:title)).or(Tag.where(name: artists.select(:name)))
|
||||
tags = tags.nonempty.order(post_count: :desc).limit(limit).includes(:wiki_page, :artist)
|
||||
|
||||
tags.map do |tag|
|
||||
other_names = tag.artist&.other_names.to_a + tag.wiki_page&.other_names.to_a
|
||||
antecedent = other_names.find { |other_name| other_name.ilike?(string) }
|
||||
antecedent = other_names.find { |other_name| other_name.ilike?(string + "*") }
|
||||
{ type: "tag-other-name", label: tag.pretty_name, value: tag.name, category: tag.category, post_count: tag.post_count, antecedent: antecedent }
|
||||
end
|
||||
end
|
||||
|
||||
@@ -97,6 +97,11 @@ module Searchable
|
||||
where("? ~<< ANY(#{qualified_column_for(attr)})", "(?#{flags})#{regex}")
|
||||
end
|
||||
|
||||
# The column should have a `array_to_tsvector(column) using gin` index for best performance.
|
||||
def where_any_in_array_starts_with(attr, value)
|
||||
where("array_to_tsvector(#{qualified_column_for(attr)}) @@ ?", value.to_escaped_for_tsquery + ":*")
|
||||
end
|
||||
|
||||
def where_text_includes_lower(attr, values)
|
||||
where("lower(#{qualified_column_for(attr)}) IN (?)", values.map(&:downcase))
|
||||
end
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
class AddArrayToTsvectorIndexOnWikiPagesAndArtists < ActiveRecord::Migration[6.1]
|
||||
def change
|
||||
add_index :wiki_pages, "array_to_tsvector(other_names)", using: :gin
|
||||
add_index :artists, "array_to_tsvector(other_names)", using: :gin
|
||||
end
|
||||
end
|
||||
@@ -4898,6 +4898,13 @@ CREATE INDEX index_artist_versions_on_updater_id ON public.artist_versions USING
|
||||
CREATE INDEX index_artist_versions_on_updater_ip_addr ON public.artist_versions USING btree (updater_ip_addr);
|
||||
|
||||
|
||||
--
|
||||
-- Name: index_artists_on_array_to_tsvector_other_names; Type: INDEX; Schema: public; Owner: -
|
||||
--
|
||||
|
||||
CREATE INDEX index_artists_on_array_to_tsvector_other_names ON public.artists USING gin (array_to_tsvector(other_names));
|
||||
|
||||
|
||||
--
|
||||
-- Name: index_artists_on_group_name; Type: INDEX; Schema: public; Owner: -
|
||||
--
|
||||
@@ -7535,6 +7542,13 @@ CREATE INDEX index_wiki_page_versions_on_updater_ip_addr ON public.wiki_page_ver
|
||||
CREATE INDEX index_wiki_page_versions_on_wiki_page_id ON public.wiki_page_versions USING btree (wiki_page_id);
|
||||
|
||||
|
||||
--
|
||||
-- Name: index_wiki_pages_on_array_to_tsvector_other_names; Type: INDEX; Schema: public; Owner: -
|
||||
--
|
||||
|
||||
CREATE INDEX index_wiki_pages_on_array_to_tsvector_other_names ON public.wiki_pages USING gin (array_to_tsvector(other_names));
|
||||
|
||||
|
||||
--
|
||||
-- Name: index_wiki_pages_on_body_index_index; Type: INDEX; Schema: public; Owner: -
|
||||
--
|
||||
@@ -7870,6 +7884,7 @@ INSERT INTO "schema_migrations" (version) VALUES
|
||||
('20210108030722'),
|
||||
('20210108030723'),
|
||||
('20210108030724'),
|
||||
('20210110015410');
|
||||
('20210110015410'),
|
||||
('20210110090656');
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user