diff --git a/app/logical/autocomplete_service.rb b/app/logical/autocomplete_service.rb index 8ec4d6a23..33f5b6757 100644 --- a/app/logical/autocomplete_service.rb +++ b/app/logical/autocomplete_service.rb @@ -100,7 +100,7 @@ class AutocompleteService end def tag_autocorrect_matches(string) - tags = Tag.nonempty.fuzzy_name_matches(string).order_similarity(string).limit(limit) + tags = Tag.nonempty.autocorrect_matches(string).limit(limit) tags.map do |tag| { type: "tag", label: tag.pretty_name, value: tag.name, category: tag.category, post_count: tag.post_count } diff --git a/app/models/tag.rb b/app/models/tag.rb index 899718add..3c3a3bf65 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -234,16 +234,19 @@ class Tag < ApplicationRecord end module SearchMethods + def autocorrect_matches(name) + tags = fuzzy_name_matches(name).order_similarity(name) + end + # ref: https://www.postgresql.org/docs/current/static/pgtrgm.html#idm46428634524336 def order_similarity(name) - # trunc(3 * sim) reduces the similarity score from a range of 0.0 -> 1.0 to just 0, 1, or 2. - # This groups tags first by approximate similarity, then by largest tags within groups of similar tags. - order(Arel.sql("trunc(3 * similarity(name, #{connection.quote(name)})) DESC"), "post_count DESC", "name DESC") + order(Arel.sql("levenshtein(left(name, 255), #{connection.quote(name)}), tags.post_count DESC, tags.name ASC")) end # ref: https://www.postgresql.org/docs/current/static/pgtrgm.html#idm46428634524336 def fuzzy_name_matches(name) - where("tags.name % ?", name) + max_distance = [name.size / 4, 3].max.floor.to_i + where("tags.name % ?", name).where("levenshtein(left(name, 255), ?) < ?", name, max_distance) end def name_matches(name) diff --git a/db/migrate/20201213052805_add_extension_fuzzy_str_match.rb b/db/migrate/20201213052805_add_extension_fuzzy_str_match.rb new file mode 100644 index 000000000..bea09b809 --- /dev/null +++ b/db/migrate/20201213052805_add_extension_fuzzy_str_match.rb @@ -0,0 +1,5 @@ +class AddExtensionFuzzyStrMatch < ActiveRecord::Migration[6.0] + def change + enable_extension "fuzzystrmatch" + end +end diff --git a/db/structure.sql b/db/structure.sql index 7da011e05..d85f9a64c 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -9,6 +9,21 @@ SET xmloption = content; SET client_min_messages = warning; SET row_security = off; + +-- +-- Name: fuzzystrmatch; Type: EXTENSION; Schema: -; Owner: - +-- + +CREATE EXTENSION IF NOT EXISTS fuzzystrmatch WITH SCHEMA public; + + +-- +-- Name: EXTENSION fuzzystrmatch; Type: COMMENT; Schema: -; Owner: - +-- + +COMMENT ON EXTENSION fuzzystrmatch IS 'determine similarities and distance between strings'; + + -- -- Name: pg_trgm; Type: EXTENSION; Schema: -; Owner: - -- @@ -7420,6 +7435,7 @@ INSERT INTO "schema_migrations" (version) VALUES ('20200520060951'), ('20200803022359'), ('20200816175151'), -('20201201211748'); +('20201201211748'), +('20201213052805'); diff --git a/test/functional/tags_controller_test.rb b/test/functional/tags_controller_test.rb index 56dead38c..797412d90 100644 --- a/test/functional/tags_controller_test.rb +++ b/test/functional/tags_controller_test.rb @@ -58,7 +58,7 @@ class TagsControllerTest < ActionDispatch::IntegrationTest should respond_to_search(name_matches: "hatsune_miku").with { @miku } should respond_to_search(name_normalize: "HATSUNE_MIKU ").with { @miku } should respond_to_search(name_or_alias_matches: "miku").with { @miku } - should respond_to_search(fuzzy_name_matches: "miku_hatsune", order: "similarity").with { @miku } + should respond_to_search(fuzzy_name_matches: "hatsune_mika", order: "similarity").with { @miku } should respond_to_search(name: "empty", hide_empty: "true").with { [] } should respond_to_search(name: "empty", hide_empty: "false").with { [@empty] }