From f4d279771fdc97e7e3d5819cf8fc992034f28c24 Mon Sep 17 00:00:00 2001 From: evazion Date: Mon, 27 Nov 2017 13:42:20 -0600 Subject: [PATCH 1/3] /tags: add fuzzy name search params (search[fuzzy_name_matches], search[order]=similarity). --- app/models/tag.rb | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/app/models/tag.rb b/app/models/tag.rb index 5f3a3fdc2..5534c4b9c 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -816,6 +816,18 @@ class Tag < ApplicationRecord where("tags.post_count > 0") end + # ref: https://www.postgresql.org/docs/current/static/pgtrgm.html#idm46428634524336 + def order_similarity(name) + # trunc(3 * sim) reduces the similarity score from a range of 0.0 -> 1.0 to just 0, 1, or 2. + # This groups tags first by approximate similarity, then by largest tags within groups of similar tags. + order("trunc(3 * similarity(name, #{sanitize(name)})) DESC", "post_count DESC", "name DESC") + end + + # ref: https://www.postgresql.org/docs/current/static/pgtrgm.html#idm46428634524336 + def fuzzy_name_matches(name) + where("tags.name % ?", name) + end + def name_matches(name) where("tags.name LIKE ? ESCAPE E'\\\\'", normalize_name(name).to_escaped_for_sql_like) end @@ -828,6 +840,10 @@ class Tag < ApplicationRecord q = where("true") params = {} if params.blank? + if params[:fuzzy_name_matches].present? + q = q.fuzzy_name_matches(params[:fuzzy_name_matches]) + end + if params[:name_matches].present? q = q.name_matches(params[:name_matches]) end @@ -864,6 +880,8 @@ class Tag < ApplicationRecord q = q.reorder("id desc") when "count" q = q.reorder("post_count desc") + when "similarity" + q = q.order_similarity(params[:fuzzy_name_matches]) if params[:fuzzy_name_matches].present? else q = q.reorder("id desc") end From 44f4befa5aca1ee3daeeb46285f8db433a6baf3b Mon Sep 17 00:00:00 2001 From: evazion Date: Mon, 27 Nov 2017 13:44:47 -0600 Subject: [PATCH 2/3] autocomplete: add automatic typo correction. If normal autocomplete fails to find any results, try doing a fuzzy name search instead. This will correct simple typos. --- app/assets/javascripts/autocomplete.js | 2 +- app/models/tag.rb | 20 ++++++++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/app/assets/javascripts/autocomplete.js b/app/assets/javascripts/autocomplete.js index 40279a8e6..3471b5de0 100644 --- a/app/assets/javascripts/autocomplete.js +++ b/app/assets/javascripts/autocomplete.js @@ -244,7 +244,7 @@ $.ajax({ url: "/tags/autocomplete.json", data: { - "search[name_matches]": term + "*" + "search[name_matches]": term }, method: "get", success: function(data) { diff --git a/app/models/tag.rb b/app/models/tag.rb index 5534c4b9c..7fdd56fed 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -890,21 +890,29 @@ class Tag < ApplicationRecord end def names_matches_with_aliases(name) - query1 = Tag.select("tags.name, tags.post_count, tags.category, null AS antecedent_name") - .search(:name_matches => name, :order => "count").limit(10) + name = normalize_name(name) + wildcard_name = name + '*' + + query1 = Tag.select("tags.name, tags.post_count, tags.category, null AS antecedent_name") + .search(:name_matches => wildcard_name, :order => "count").limit(10) - name = name.mb_chars.downcase.to_escaped_for_sql_like query2 = TagAlias.select("tags.name, tags.post_count, tags.category, tag_aliases.antecedent_name") .joins("INNER JOIN tags ON tags.name = tag_aliases.consequent_name") - .where("tag_aliases.antecedent_name LIKE ? ESCAPE E'\\\\'", name) + .where("tag_aliases.antecedent_name LIKE ? ESCAPE E'\\\\'", wildcard_name.to_escaped_for_sql_like) .active - .where("tags.name NOT LIKE ? ESCAPE E'\\\\'", name) + .where("tags.name NOT LIKE ? ESCAPE E'\\\\'", wildcard_name.to_escaped_for_sql_like) .where("tag_aliases.post_count > 0") .order("tag_aliases.post_count desc") .limit(20) # Get 20 records even though only 10 will be displayed in case some duplicates get filtered out. sql_query = "((#{query1.to_sql}) UNION ALL (#{query2.to_sql})) AS unioned_query" - Tag.select("DISTINCT ON (name, post_count) *").from(sql_query).order("post_count desc").limit(10) + tags = Tag.select("DISTINCT ON (name, post_count) *").from(sql_query).order("post_count desc").limit(10) + + if tags.empty? + tags = Tag.fuzzy_name_matches(name).order_similarity(name).nonempty.limit(10) + end + + tags end end From dd8e3bbb944418334388b03bc2ff5209dacc27b3 Mon Sep 17 00:00:00 2001 From: evazion Date: Mon, 27 Nov 2017 13:52:48 -0600 Subject: [PATCH 3/3] tags: add trigram index on tag names. --- db/migrate/20171127195124_add_trigram_index_to_tags.rb | 9 +++++++++ db/structure.sql | 9 +++++++++ 2 files changed, 18 insertions(+) create mode 100644 db/migrate/20171127195124_add_trigram_index_to_tags.rb diff --git a/db/migrate/20171127195124_add_trigram_index_to_tags.rb b/db/migrate/20171127195124_add_trigram_index_to_tags.rb new file mode 100644 index 000000000..a2d187c55 --- /dev/null +++ b/db/migrate/20171127195124_add_trigram_index_to_tags.rb @@ -0,0 +1,9 @@ +class AddTrigramIndexToTags < ActiveRecord::Migration + def up + execute "create index index_tags_on_name_trgm on tags using gin (name gin_trgm_ops)" + end + + def down + execute "drop index index_tags_on_name_trgm" + end +end diff --git a/db/structure.sql b/db/structure.sql index 5796ea19d..bd655ffd4 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -7008,6 +7008,13 @@ CREATE UNIQUE INDEX index_tags_on_name ON tags USING btree (name); CREATE INDEX index_tags_on_name_pattern ON tags USING btree (name text_pattern_ops); +-- +-- Name: index_tags_on_name_trgm; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX index_tags_on_name_trgm ON tags USING gin (name gin_trgm_ops); + + -- -- Name: index_token_buckets_on_user_id; Type: INDEX; Schema: public; Owner: - -- @@ -7521,3 +7528,5 @@ INSERT INTO schema_migrations (version) VALUES ('20170914200122'); INSERT INTO schema_migrations (version) VALUES ('20171106075030'); +INSERT INTO schema_migrations (version) VALUES ('20171127195124'); +