From be0bb42ba957c936eefbaccf06ae9df103fc87e9 Mon Sep 17 00:00:00 2001 From: BrokenEagle Date: Fri, 6 Mar 2020 07:24:12 +0000 Subject: [PATCH] Return the actual sample size used for related tag calculations This meant that both functions had to return two separate values, which need to be destructured by the function calling them. This isn't so much a huge issue as its more something just to be made aware of in case either function needs to be used elsewhere. --- app/logical/related_tag_calculator.rb | 9 +++--- app/logical/related_tag_query.rb | 37 ++++++++++++++++++++++-- test/unit/related_tag_calculator_test.rb | 16 +++++----- 3 files changed, 48 insertions(+), 14 deletions(-) diff --git a/app/logical/related_tag_calculator.rb b/app/logical/related_tag_calculator.rb index e375fd567..32d58150f 100644 --- a/app/logical/related_tag_calculator.rb +++ b/app/logical/related_tag_calculator.rb @@ -6,13 +6,14 @@ module RelatedTagCalculator search_sample_size = [search_count, search_sample_size].min return [] if search_sample_size <= 0 - tags = frequent_tags_for_search(tag_query, search_sample_size: search_sample_size, category: category).limit(tag_sample_size) + tags, sample_count = frequent_tags_for_search(tag_query, search_sample_size: search_sample_size, category: category) + tags = tags.limit(tag_sample_size) tags = tags.sort_by do |tag| # cosine distance(tag1, tag2) = 1 - {{tag1 tag2}} / sqrt({{tag1}} * {{tag2}}) 1 - tag.overlap_count / Math.sqrt(tag.post_count * search_count.to_f) end - tags + [tags, sample_count] end def self.frequent_tags_for_search(tag_query, search_sample_size: 1000, category: nil) @@ -24,7 +25,7 @@ module RelatedTagCalculator tags = tags.where("tags.post_count > 0") tags = tags.where(category: category) if category.present? tags = tags.order("overlap_count DESC, tags.post_count DESC, tags.name") - tags + [tags, sample_posts.length] end def self.frequent_tags_for_posts(posts) @@ -36,7 +37,7 @@ module RelatedTagCalculator Cache.get("similar_tags:#{tag_query}", cache_timeout, race_condition_ttl: 60.seconds) do ApplicationRecord.with_timeout(search_timeout, []) do CurrentUser.without_safe_mode do - RelatedTagCalculator.similar_tags_for_search(tag_query).take(max_tags).pluck(:name) + RelatedTagCalculator.similar_tags_for_search(tag_query)[0].take(max_tags).pluck(:name) end end end diff --git a/app/logical/related_tag_query.rb b/app/logical/related_tag_query.rb index d1113821f..6fbb18d4c 100644 --- a/app/logical/related_tag_query.rb +++ b/app/logical/related_tag_query.rb @@ -42,12 +42,44 @@ class RelatedTagQuery end end + def sample_count + if type == "frequent" + frequent_count + elsif type == "similar" + similar_count + elsif type == "like" || query =~ /\*/ + 0 + elsif category.present? + frequent_count + elsif query.present? + similar_count + else + 0 + end + end + + def frequent_tags_query + @frequent_tags_query ||= RelatedTagCalculator.frequent_tags_for_search(query, category: category_of).take(limit) + end + def frequent_tags - @frequent_tags ||= RelatedTagCalculator.frequent_tags_for_search(query, category: category_of).take(limit) + frequent_tags_query[0] + end + + def frequent_count + frequent_tags_query[1] + end + + def similar_tags_query + @similar_tags_query ||= RelatedTagCalculator.similar_tags_for_search(query, category: category_of).take(limit) end def similar_tags - @similar_tags ||= RelatedTagCalculator.similar_tags_for_search(query, category: category_of).take(limit) + similar_tags_query[0] + end + + def similar_count + similar_tags_query[1] end # Returns the top 20 most frequently added tags within the last 20 edits made by the user in the last hour. @@ -101,6 +133,7 @@ class RelatedTagQuery { query: query, category: category, + sample_count: sample_count, tags: tags_with_categories(tags.map(&:name)), tags_overlap: tags_overlap, wiki_page_tags: tags_with_categories(wiki_page_tags), diff --git a/test/unit/related_tag_calculator_test.rb b/test/unit/related_tag_calculator_test.rb index 7ebc05f72..1d2f5ee53 100644 --- a/test/unit/related_tag_calculator_test.rb +++ b/test/unit/related_tag_calculator_test.rb @@ -30,7 +30,7 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase create(:post, tag_string: "aaa bbb ccc") create(:post, tag_string: "aaa bbb") - assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_search("aaa").pluck(:name)) + assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_search("aaa")[0].pluck(:name)) end should "calculate the most frequent tags for a multiple tag search" do @@ -38,7 +38,7 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase create(:post, tag_string: "aaa bbb ccc ddd") create(:post, tag_string: "aaa eee fff") - assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_search("aaa bbb").pluck(:name)) + assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_search("aaa bbb")[0].pluck(:name)) end should "calculate the most frequent tags with a category constraint" do @@ -46,8 +46,8 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase create(:post, tag_string: "aaa bbb art:ccc") create(:post, tag_string: "aaa bbb") - assert_equal(%w[aaa bbb], RelatedTagCalculator.frequent_tags_for_search("aaa", category: Tag.categories.general).pluck(:name)) - assert_equal(%w[ccc], RelatedTagCalculator.frequent_tags_for_search("aaa", category: Tag.categories.artist).pluck(:name)) + assert_equal(%w[aaa bbb], RelatedTagCalculator.frequent_tags_for_search("aaa", category: Tag.categories.general)[0].pluck(:name)) + assert_equal(%w[ccc], RelatedTagCalculator.frequent_tags_for_search("aaa", category: Tag.categories.artist)[0].pluck(:name)) end end @@ -57,9 +57,9 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase create(:post, tag_string: "1girl solo", rating: "q") create(:post, tag_string: "1girl 1boy", rating: "q") - assert_equal(%w[1girl solo 1boy], RelatedTagCalculator.similar_tags_for_search("1girl").pluck(:name)) - assert_equal(%w[1girl 1boy solo], RelatedTagCalculator.similar_tags_for_search("rating:q").pluck(:name)) - assert_equal(%w[solo 1girl], RelatedTagCalculator.similar_tags_for_search("solo").pluck(:name)) + assert_equal(%w[1girl solo 1boy], RelatedTagCalculator.similar_tags_for_search("1girl")[0].pluck(:name)) + assert_equal(%w[1girl 1boy solo], RelatedTagCalculator.similar_tags_for_search("rating:q")[0].pluck(:name)) + assert_equal(%w[solo 1girl], RelatedTagCalculator.similar_tags_for_search("solo")[0].pluck(:name)) end should "calculate the similar tags for an aliased tag" do @@ -67,7 +67,7 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase create(:post, tag_string: "bunny dog") create(:post, tag_string: "bunny cat") - assert_equal(%w[bunny cat dog], RelatedTagCalculator.similar_tags_for_search("rabbit").pluck(:name)) + assert_equal(%w[bunny cat dog], RelatedTagCalculator.similar_tags_for_search("rabbit")[0].pluck(:name)) end end end