Return the actual sample size used for related tag calculations

This meant that both functions had to return two separate values,
which need to be destructured by the function calling them. This
isn't so much a huge issue as its more something just to be made
aware of in case either function needs to be used elsewhere.
This commit is contained in:
BrokenEagle
2020-03-06 07:24:12 +00:00
parent af96d68c0b
commit be0bb42ba9
3 changed files with 48 additions and 14 deletions

View File

@@ -6,13 +6,14 @@ module RelatedTagCalculator
search_sample_size = [search_count, search_sample_size].min
return [] if search_sample_size <= 0
tags = frequent_tags_for_search(tag_query, search_sample_size: search_sample_size, category: category).limit(tag_sample_size)
tags, sample_count = frequent_tags_for_search(tag_query, search_sample_size: search_sample_size, category: category)
tags = tags.limit(tag_sample_size)
tags = tags.sort_by do |tag|
# cosine distance(tag1, tag2) = 1 - {{tag1 tag2}} / sqrt({{tag1}} * {{tag2}})
1 - tag.overlap_count / Math.sqrt(tag.post_count * search_count.to_f)
end
tags
[tags, sample_count]
end
def self.frequent_tags_for_search(tag_query, search_sample_size: 1000, category: nil)
@@ -24,7 +25,7 @@ module RelatedTagCalculator
tags = tags.where("tags.post_count > 0")
tags = tags.where(category: category) if category.present?
tags = tags.order("overlap_count DESC, tags.post_count DESC, tags.name")
tags
[tags, sample_posts.length]
end
def self.frequent_tags_for_posts(posts)
@@ -36,7 +37,7 @@ module RelatedTagCalculator
Cache.get("similar_tags:#{tag_query}", cache_timeout, race_condition_ttl: 60.seconds) do
ApplicationRecord.with_timeout(search_timeout, []) do
CurrentUser.without_safe_mode do
RelatedTagCalculator.similar_tags_for_search(tag_query).take(max_tags).pluck(:name)
RelatedTagCalculator.similar_tags_for_search(tag_query)[0].take(max_tags).pluck(:name)
end
end
end

View File

@@ -42,12 +42,44 @@ class RelatedTagQuery
end
end
def sample_count
if type == "frequent"
frequent_count
elsif type == "similar"
similar_count
elsif type == "like" || query =~ /\*/
0
elsif category.present?
frequent_count
elsif query.present?
similar_count
else
0
end
end
def frequent_tags_query
@frequent_tags_query ||= RelatedTagCalculator.frequent_tags_for_search(query, category: category_of).take(limit)
end
def frequent_tags
@frequent_tags ||= RelatedTagCalculator.frequent_tags_for_search(query, category: category_of).take(limit)
frequent_tags_query[0]
end
def frequent_count
frequent_tags_query[1]
end
def similar_tags_query
@similar_tags_query ||= RelatedTagCalculator.similar_tags_for_search(query, category: category_of).take(limit)
end
def similar_tags
@similar_tags ||= RelatedTagCalculator.similar_tags_for_search(query, category: category_of).take(limit)
similar_tags_query[0]
end
def similar_count
similar_tags_query[1]
end
# Returns the top 20 most frequently added tags within the last 20 edits made by the user in the last hour.
@@ -101,6 +133,7 @@ class RelatedTagQuery
{
query: query,
category: category,
sample_count: sample_count,
tags: tags_with_categories(tags.map(&:name)),
tags_overlap: tags_overlap,
wiki_page_tags: tags_with_categories(wiki_page_tags),

View File

@@ -30,7 +30,7 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase
create(:post, tag_string: "aaa bbb ccc")
create(:post, tag_string: "aaa bbb")
assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_search("aaa").pluck(:name))
assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_search("aaa")[0].pluck(:name))
end
should "calculate the most frequent tags for a multiple tag search" do
@@ -38,7 +38,7 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase
create(:post, tag_string: "aaa bbb ccc ddd")
create(:post, tag_string: "aaa eee fff")
assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_search("aaa bbb").pluck(:name))
assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_search("aaa bbb")[0].pluck(:name))
end
should "calculate the most frequent tags with a category constraint" do
@@ -46,8 +46,8 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase
create(:post, tag_string: "aaa bbb art:ccc")
create(:post, tag_string: "aaa bbb")
assert_equal(%w[aaa bbb], RelatedTagCalculator.frequent_tags_for_search("aaa", category: Tag.categories.general).pluck(:name))
assert_equal(%w[ccc], RelatedTagCalculator.frequent_tags_for_search("aaa", category: Tag.categories.artist).pluck(:name))
assert_equal(%w[aaa bbb], RelatedTagCalculator.frequent_tags_for_search("aaa", category: Tag.categories.general)[0].pluck(:name))
assert_equal(%w[ccc], RelatedTagCalculator.frequent_tags_for_search("aaa", category: Tag.categories.artist)[0].pluck(:name))
end
end
@@ -57,9 +57,9 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase
create(:post, tag_string: "1girl solo", rating: "q")
create(:post, tag_string: "1girl 1boy", rating: "q")
assert_equal(%w[1girl solo 1boy], RelatedTagCalculator.similar_tags_for_search("1girl").pluck(:name))
assert_equal(%w[1girl 1boy solo], RelatedTagCalculator.similar_tags_for_search("rating:q").pluck(:name))
assert_equal(%w[solo 1girl], RelatedTagCalculator.similar_tags_for_search("solo").pluck(:name))
assert_equal(%w[1girl solo 1boy], RelatedTagCalculator.similar_tags_for_search("1girl")[0].pluck(:name))
assert_equal(%w[1girl 1boy solo], RelatedTagCalculator.similar_tags_for_search("rating:q")[0].pluck(:name))
assert_equal(%w[solo 1girl], RelatedTagCalculator.similar_tags_for_search("solo")[0].pluck(:name))
end
should "calculate the similar tags for an aliased tag" do
@@ -67,7 +67,7 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase
create(:post, tag_string: "bunny dog")
create(:post, tag_string: "bunny cat")
assert_equal(%w[bunny cat dog], RelatedTagCalculator.similar_tags_for_search("rabbit").pluck(:name))
assert_equal(%w[bunny cat dog], RelatedTagCalculator.similar_tags_for_search("rabbit")[0].pluck(:name))
end
end
end