Return the actual sample size used for related tag calculations

This meant that both functions had to return two separate values,
which need to be destructured by the function calling them. This
isn't so much a huge issue as its more something just to be made
aware of in case either function needs to be used elsewhere.
This commit is contained in:
BrokenEagle
2020-03-06 07:24:12 +00:00
parent af96d68c0b
commit be0bb42ba9
3 changed files with 48 additions and 14 deletions

View File

@@ -6,13 +6,14 @@ module RelatedTagCalculator
search_sample_size = [search_count, search_sample_size].min
return [] if search_sample_size <= 0
tags = frequent_tags_for_search(tag_query, search_sample_size: search_sample_size, category: category).limit(tag_sample_size)
tags, sample_count = frequent_tags_for_search(tag_query, search_sample_size: search_sample_size, category: category)
tags = tags.limit(tag_sample_size)
tags = tags.sort_by do |tag|
# cosine distance(tag1, tag2) = 1 - {{tag1 tag2}} / sqrt({{tag1}} * {{tag2}})
1 - tag.overlap_count / Math.sqrt(tag.post_count * search_count.to_f)
end
tags
[tags, sample_count]
end
def self.frequent_tags_for_search(tag_query, search_sample_size: 1000, category: nil)
@@ -24,7 +25,7 @@ module RelatedTagCalculator
tags = tags.where("tags.post_count > 0")
tags = tags.where(category: category) if category.present?
tags = tags.order("overlap_count DESC, tags.post_count DESC, tags.name")
tags
[tags, sample_posts.length]
end
def self.frequent_tags_for_posts(posts)
@@ -36,7 +37,7 @@ module RelatedTagCalculator
Cache.get("similar_tags:#{tag_query}", cache_timeout, race_condition_ttl: 60.seconds) do
ApplicationRecord.with_timeout(search_timeout, []) do
CurrentUser.without_safe_mode do
RelatedTagCalculator.similar_tags_for_search(tag_query).take(max_tags).pluck(:name)
RelatedTagCalculator.similar_tags_for_search(tag_query)[0].take(max_tags).pluck(:name)
end
end
end

View File

@@ -42,12 +42,44 @@ class RelatedTagQuery
end
end
def sample_count
if type == "frequent"
frequent_count
elsif type == "similar"
similar_count
elsif type == "like" || query =~ /\*/
0
elsif category.present?
frequent_count
elsif query.present?
similar_count
else
0
end
end
def frequent_tags_query
@frequent_tags_query ||= RelatedTagCalculator.frequent_tags_for_search(query, category: category_of).take(limit)
end
def frequent_tags
@frequent_tags ||= RelatedTagCalculator.frequent_tags_for_search(query, category: category_of).take(limit)
frequent_tags_query[0]
end
def frequent_count
frequent_tags_query[1]
end
def similar_tags_query
@similar_tags_query ||= RelatedTagCalculator.similar_tags_for_search(query, category: category_of).take(limit)
end
def similar_tags
@similar_tags ||= RelatedTagCalculator.similar_tags_for_search(query, category: category_of).take(limit)
similar_tags_query[0]
end
def similar_count
similar_tags_query[1]
end
# Returns the top 20 most frequently added tags within the last 20 edits made by the user in the last hour.
@@ -101,6 +133,7 @@ class RelatedTagQuery
{
query: query,
category: category,
sample_count: sample_count,
tags: tags_with_categories(tags.map(&:name)),
tags_overlap: tags_overlap,
wiki_page_tags: tags_with_categories(wiki_page_tags),