From 2d669899a476aeb905fd6a717514c4b9ea7f6931 Mon Sep 17 00:00:00 2001 From: BrokenEagle Date: Fri, 6 Mar 2020 06:42:05 +0000 Subject: [PATCH 1/5] Add tags overlap field This information is already available, so there's no extra processing. To avoid conflicting with the current tags field, it was made into its own field, and it uses a hash so that values can be quickly looked up. --- app/logical/related_tag_query.rb | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/app/logical/related_tag_query.rb b/app/logical/related_tag_query.rb index 2a46d985e..d755e3524 100644 --- a/app/logical/related_tag_query.rb +++ b/app/logical/related_tag_query.rb @@ -26,6 +26,14 @@ class RelatedTagQuery end end + def tags_overlap + if query =~ /\*/ + {} + else + tags.map { |v| [v.name, v.overlap_count] }.to_h + end + end + # Returns the top 20 most frequently added tags within the last 20 edits made by the user in the last hour. def recent_tags(since: 1.hour.ago, max_edits: 20, max_tags: 20) return [] unless user.present? && PostVersion.enabled? @@ -78,6 +86,7 @@ class RelatedTagQuery query: query, category: category, tags: tags_with_categories(tags.map(&:name)), + tags_overlap: tags_overlap, wiki_page_tags: tags_with_categories(wiki_page_tags), other_wikis: other_wiki_pages.map { |wiki| [wiki.title, tags_with_categories(wiki.tags)] }.to_h } From 66d2fd7b98fe190eb2fd6015bd017e02b88767e7 Mon Sep 17 00:00:00 2001 From: BrokenEagle Date: Fri, 6 Mar 2020 07:04:26 +0000 Subject: [PATCH 2/5] Add limit support for related tags --- app/controllers/related_tags_controller.rb | 3 ++- app/logical/related_tag_query.rb | 11 ++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/app/controllers/related_tags_controller.rb b/app/controllers/related_tags_controller.rb index af329c0f7..4278c24f7 100644 --- a/app/controllers/related_tags_controller.rb +++ b/app/controllers/related_tags_controller.rb @@ -4,8 +4,9 @@ class RelatedTagsController < ApplicationController def show query = params[:query] || search_params[:query] category = params[:category] || search_params[:category] + limit = params[:limit] - @query = RelatedTagQuery.new(query: query, category: category, user: CurrentUser.user) + @query = RelatedTagQuery.new(query: query, category: category, user: CurrentUser.user, limit: limit) respond_with(@query) end end diff --git a/app/logical/related_tag_query.rb b/app/logical/related_tag_query.rb index d755e3524..cf89aa4ad 100644 --- a/app/logical/related_tag_query.rb +++ b/app/logical/related_tag_query.rb @@ -2,12 +2,13 @@ class RelatedTagQuery include ActiveModel::Serializers::JSON include ActiveModel::Serializers::Xml - attr_reader :query, :category, :user + attr_reader :query, :category, :user, :limit - def initialize(query: nil, category: nil, user: nil) + def initialize(query: nil, category: nil, user: nil, limit: nil) @user = user @query = TagAlias.to_aliased(query.to_s.downcase.strip).join(" ") @category = category + @limit = (limit =~ /^\d+/ ? limit.to_i : 25) end def pretty_name @@ -18,9 +19,9 @@ class RelatedTagQuery if query =~ /\*/ pattern_matching_tags elsif category.present? - RelatedTagCalculator.frequent_tags_for_search(query, category: Tag.categories.value_for(category)).take(25) + RelatedTagCalculator.frequent_tags_for_search(query, category: Tag.categories.value_for(category)).take(limit) elsif query.present? - RelatedTagCalculator.similar_tags_for_search(query).take(25) + RelatedTagCalculator.similar_tags_for_search(query).take(limit) else Tag.none end @@ -99,7 +100,7 @@ class RelatedTagQuery end def pattern_matching_tags - Tag.nonempty.name_matches(query).order("post_count desc, name asc").limit(50) + Tag.nonempty.name_matches(query).order("post_count desc, name asc").limit(limit) end def wiki_page From af96d68c0ba0c3bdaf9483f6c66af0f52fc29d42 Mon Sep 17 00:00:00 2001 From: BrokenEagle Date: Fri, 6 Mar 2020 07:14:08 +0000 Subject: [PATCH 3/5] Add support for using any of the current related tag types Regardless of category or query. This meant that the category value had to be passed in as either null or the value itself for both types of functions. It also fixes an issue where the category wasn't settable on the pattern matching type. --- app/controllers/related_tags_controller.rb | 3 +- app/logical/related_tag_query.rb | 40 +++++++++++++++++----- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/app/controllers/related_tags_controller.rb b/app/controllers/related_tags_controller.rb index 4278c24f7..75a226401 100644 --- a/app/controllers/related_tags_controller.rb +++ b/app/controllers/related_tags_controller.rb @@ -4,9 +4,10 @@ class RelatedTagsController < ApplicationController def show query = params[:query] || search_params[:query] category = params[:category] || search_params[:category] + type = params[:type] || search_params[:type] limit = params[:limit] - @query = RelatedTagQuery.new(query: query, category: category, user: CurrentUser.user, limit: limit) + @query = RelatedTagQuery.new(query: query, category: category, type: type, user: CurrentUser.user, limit: limit) respond_with(@query) end end diff --git a/app/logical/related_tag_query.rb b/app/logical/related_tag_query.rb index cf89aa4ad..d1113821f 100644 --- a/app/logical/related_tag_query.rb +++ b/app/logical/related_tag_query.rb @@ -2,12 +2,13 @@ class RelatedTagQuery include ActiveModel::Serializers::JSON include ActiveModel::Serializers::Xml - attr_reader :query, :category, :user, :limit + attr_reader :query, :category, :type, :user, :limit - def initialize(query: nil, category: nil, user: nil, limit: nil) + def initialize(query: nil, category: nil, type: nil, user: nil, limit: nil) @user = user @query = TagAlias.to_aliased(query.to_s.downcase.strip).join(" ") @category = category + @type = type @limit = (limit =~ /^\d+/ ? limit.to_i : 25) end @@ -16,25 +17,39 @@ class RelatedTagQuery end def tags - if query =~ /\*/ - pattern_matching_tags + if type == "frequent" + frequent_tags + elsif type == "similar" + similar_tags + elsif type == "like" + pattern_matching_tags("*#{query}*") + elsif query =~ /\*/ + pattern_matching_tags(query) elsif category.present? - RelatedTagCalculator.frequent_tags_for_search(query, category: Tag.categories.value_for(category)).take(limit) + frequent_tags elsif query.present? - RelatedTagCalculator.similar_tags_for_search(query).take(limit) + similar_tags else Tag.none end end def tags_overlap - if query =~ /\*/ + if type == "like" || query =~ /\*/ {} else tags.map { |v| [v.name, v.overlap_count] }.to_h end end + def frequent_tags + @frequent_tags ||= RelatedTagCalculator.frequent_tags_for_search(query, category: category_of).take(limit) + end + + def similar_tags + @similar_tags ||= RelatedTagCalculator.similar_tags_for_search(query, category: category_of).take(limit) + end + # Returns the top 20 most frequently added tags within the last 20 edits made by the user in the last hour. def recent_tags(since: 1.hour.ago, max_edits: 20, max_tags: 20) return [] unless user.present? && PostVersion.enabled? @@ -99,8 +114,15 @@ class RelatedTagQuery Tag.categories_for(list_of_tag_names).to_a end - def pattern_matching_tags - Tag.nonempty.name_matches(query).order("post_count desc, name asc").limit(limit) + def category_of + (category.present? ? Tag.categories.value_for(category) : nil) + end + + def pattern_matching_tags(tag_query) + tags = Tag.nonempty.name_matches(tag_query) + tags = tags.where(category: Tag.categories.value_for(category)) if category.present? + tags = tags.order("post_count desc, name asc").limit(limit) + tags end def wiki_page From be0bb42ba957c936eefbaccf06ae9df103fc87e9 Mon Sep 17 00:00:00 2001 From: BrokenEagle Date: Fri, 6 Mar 2020 07:24:12 +0000 Subject: [PATCH 4/5] Return the actual sample size used for related tag calculations This meant that both functions had to return two separate values, which need to be destructured by the function calling them. This isn't so much a huge issue as its more something just to be made aware of in case either function needs to be used elsewhere. --- app/logical/related_tag_calculator.rb | 9 +++--- app/logical/related_tag_query.rb | 37 ++++++++++++++++++++++-- test/unit/related_tag_calculator_test.rb | 16 +++++----- 3 files changed, 48 insertions(+), 14 deletions(-) diff --git a/app/logical/related_tag_calculator.rb b/app/logical/related_tag_calculator.rb index e375fd567..32d58150f 100644 --- a/app/logical/related_tag_calculator.rb +++ b/app/logical/related_tag_calculator.rb @@ -6,13 +6,14 @@ module RelatedTagCalculator search_sample_size = [search_count, search_sample_size].min return [] if search_sample_size <= 0 - tags = frequent_tags_for_search(tag_query, search_sample_size: search_sample_size, category: category).limit(tag_sample_size) + tags, sample_count = frequent_tags_for_search(tag_query, search_sample_size: search_sample_size, category: category) + tags = tags.limit(tag_sample_size) tags = tags.sort_by do |tag| # cosine distance(tag1, tag2) = 1 - {{tag1 tag2}} / sqrt({{tag1}} * {{tag2}}) 1 - tag.overlap_count / Math.sqrt(tag.post_count * search_count.to_f) end - tags + [tags, sample_count] end def self.frequent_tags_for_search(tag_query, search_sample_size: 1000, category: nil) @@ -24,7 +25,7 @@ module RelatedTagCalculator tags = tags.where("tags.post_count > 0") tags = tags.where(category: category) if category.present? tags = tags.order("overlap_count DESC, tags.post_count DESC, tags.name") - tags + [tags, sample_posts.length] end def self.frequent_tags_for_posts(posts) @@ -36,7 +37,7 @@ module RelatedTagCalculator Cache.get("similar_tags:#{tag_query}", cache_timeout, race_condition_ttl: 60.seconds) do ApplicationRecord.with_timeout(search_timeout, []) do CurrentUser.without_safe_mode do - RelatedTagCalculator.similar_tags_for_search(tag_query).take(max_tags).pluck(:name) + RelatedTagCalculator.similar_tags_for_search(tag_query)[0].take(max_tags).pluck(:name) end end end diff --git a/app/logical/related_tag_query.rb b/app/logical/related_tag_query.rb index d1113821f..6fbb18d4c 100644 --- a/app/logical/related_tag_query.rb +++ b/app/logical/related_tag_query.rb @@ -42,12 +42,44 @@ class RelatedTagQuery end end + def sample_count + if type == "frequent" + frequent_count + elsif type == "similar" + similar_count + elsif type == "like" || query =~ /\*/ + 0 + elsif category.present? + frequent_count + elsif query.present? + similar_count + else + 0 + end + end + + def frequent_tags_query + @frequent_tags_query ||= RelatedTagCalculator.frequent_tags_for_search(query, category: category_of).take(limit) + end + def frequent_tags - @frequent_tags ||= RelatedTagCalculator.frequent_tags_for_search(query, category: category_of).take(limit) + frequent_tags_query[0] + end + + def frequent_count + frequent_tags_query[1] + end + + def similar_tags_query + @similar_tags_query ||= RelatedTagCalculator.similar_tags_for_search(query, category: category_of).take(limit) end def similar_tags - @similar_tags ||= RelatedTagCalculator.similar_tags_for_search(query, category: category_of).take(limit) + similar_tags_query[0] + end + + def similar_count + similar_tags_query[1] end # Returns the top 20 most frequently added tags within the last 20 edits made by the user in the last hour. @@ -101,6 +133,7 @@ class RelatedTagQuery { query: query, category: category, + sample_count: sample_count, tags: tags_with_categories(tags.map(&:name)), tags_overlap: tags_overlap, wiki_page_tags: tags_with_categories(wiki_page_tags), diff --git a/test/unit/related_tag_calculator_test.rb b/test/unit/related_tag_calculator_test.rb index 7ebc05f72..1d2f5ee53 100644 --- a/test/unit/related_tag_calculator_test.rb +++ b/test/unit/related_tag_calculator_test.rb @@ -30,7 +30,7 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase create(:post, tag_string: "aaa bbb ccc") create(:post, tag_string: "aaa bbb") - assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_search("aaa").pluck(:name)) + assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_search("aaa")[0].pluck(:name)) end should "calculate the most frequent tags for a multiple tag search" do @@ -38,7 +38,7 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase create(:post, tag_string: "aaa bbb ccc ddd") create(:post, tag_string: "aaa eee fff") - assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_search("aaa bbb").pluck(:name)) + assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_search("aaa bbb")[0].pluck(:name)) end should "calculate the most frequent tags with a category constraint" do @@ -46,8 +46,8 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase create(:post, tag_string: "aaa bbb art:ccc") create(:post, tag_string: "aaa bbb") - assert_equal(%w[aaa bbb], RelatedTagCalculator.frequent_tags_for_search("aaa", category: Tag.categories.general).pluck(:name)) - assert_equal(%w[ccc], RelatedTagCalculator.frequent_tags_for_search("aaa", category: Tag.categories.artist).pluck(:name)) + assert_equal(%w[aaa bbb], RelatedTagCalculator.frequent_tags_for_search("aaa", category: Tag.categories.general)[0].pluck(:name)) + assert_equal(%w[ccc], RelatedTagCalculator.frequent_tags_for_search("aaa", category: Tag.categories.artist)[0].pluck(:name)) end end @@ -57,9 +57,9 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase create(:post, tag_string: "1girl solo", rating: "q") create(:post, tag_string: "1girl 1boy", rating: "q") - assert_equal(%w[1girl solo 1boy], RelatedTagCalculator.similar_tags_for_search("1girl").pluck(:name)) - assert_equal(%w[1girl 1boy solo], RelatedTagCalculator.similar_tags_for_search("rating:q").pluck(:name)) - assert_equal(%w[solo 1girl], RelatedTagCalculator.similar_tags_for_search("solo").pluck(:name)) + assert_equal(%w[1girl solo 1boy], RelatedTagCalculator.similar_tags_for_search("1girl")[0].pluck(:name)) + assert_equal(%w[1girl 1boy solo], RelatedTagCalculator.similar_tags_for_search("rating:q")[0].pluck(:name)) + assert_equal(%w[solo 1girl], RelatedTagCalculator.similar_tags_for_search("solo")[0].pluck(:name)) end should "calculate the similar tags for an aliased tag" do @@ -67,7 +67,7 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase create(:post, tag_string: "bunny dog") create(:post, tag_string: "bunny cat") - assert_equal(%w[bunny cat dog], RelatedTagCalculator.similar_tags_for_search("rabbit").pluck(:name)) + assert_equal(%w[bunny cat dog], RelatedTagCalculator.similar_tags_for_search("rabbit")[0].pluck(:name)) end end end From a70433e78fc3e938d3ac5bb9d65c780da9bafec5 Mon Sep 17 00:00:00 2001 From: BrokenEagle Date: Fri, 6 Mar 2020 07:27:00 +0000 Subject: [PATCH 5/5] Support numeric inputs for the category --- app/logical/tag_category.rb | 4 ++++ app/models/tag.rb | 9 ++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/app/logical/tag_category.rb b/app/logical/tag_category.rb index efd094c1f..ea3d40b7c 100644 --- a/app/logical/tag_category.rb +++ b/app/logical/tag_category.rb @@ -71,6 +71,10 @@ class TagCategory def short_name_regex @@short_name_regex ||= short_name_list.join("|") end + + def category_ids_regex + @@category_ids_regex ||= "[#{category_ids.join("")}]" + end end extend Mappings diff --git a/app/models/tag.rb b/app/models/tag.rb index e6b3e4b03..aafe59dd2 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -87,7 +87,14 @@ class Tag < ApplicationRecord end def value_for(string) - TagCategory.mapping[string.to_s.downcase] || 0 + norm_string = string.to_s.downcase + if norm_string =~ /#{TagCategory.category_ids_regex}/ + norm_string.to_i + elsif TagCategory.mapping[string.to_s.downcase] + TagCategory.mapping[string.to_s.downcase] + else + 0 + end end end