Return the actual sample size used for related tag calculations
This meant that both functions had to return two separate values, which need to be destructured by the function calling them. This isn't so much a huge issue as its more something just to be made aware of in case either function needs to be used elsewhere.
This commit is contained in:
@@ -6,13 +6,14 @@ module RelatedTagCalculator
|
||||
search_sample_size = [search_count, search_sample_size].min
|
||||
return [] if search_sample_size <= 0
|
||||
|
||||
tags = frequent_tags_for_search(tag_query, search_sample_size: search_sample_size, category: category).limit(tag_sample_size)
|
||||
tags, sample_count = frequent_tags_for_search(tag_query, search_sample_size: search_sample_size, category: category)
|
||||
tags = tags.limit(tag_sample_size)
|
||||
tags = tags.sort_by do |tag|
|
||||
# cosine distance(tag1, tag2) = 1 - {{tag1 tag2}} / sqrt({{tag1}} * {{tag2}})
|
||||
1 - tag.overlap_count / Math.sqrt(tag.post_count * search_count.to_f)
|
||||
end
|
||||
|
||||
tags
|
||||
[tags, sample_count]
|
||||
end
|
||||
|
||||
def self.frequent_tags_for_search(tag_query, search_sample_size: 1000, category: nil)
|
||||
@@ -24,7 +25,7 @@ module RelatedTagCalculator
|
||||
tags = tags.where("tags.post_count > 0")
|
||||
tags = tags.where(category: category) if category.present?
|
||||
tags = tags.order("overlap_count DESC, tags.post_count DESC, tags.name")
|
||||
tags
|
||||
[tags, sample_posts.length]
|
||||
end
|
||||
|
||||
def self.frequent_tags_for_posts(posts)
|
||||
@@ -36,7 +37,7 @@ module RelatedTagCalculator
|
||||
Cache.get("similar_tags:#{tag_query}", cache_timeout, race_condition_ttl: 60.seconds) do
|
||||
ApplicationRecord.with_timeout(search_timeout, []) do
|
||||
CurrentUser.without_safe_mode do
|
||||
RelatedTagCalculator.similar_tags_for_search(tag_query).take(max_tags).pluck(:name)
|
||||
RelatedTagCalculator.similar_tags_for_search(tag_query)[0].take(max_tags).pluck(:name)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -42,12 +42,44 @@ class RelatedTagQuery
|
||||
end
|
||||
end
|
||||
|
||||
def sample_count
|
||||
if type == "frequent"
|
||||
frequent_count
|
||||
elsif type == "similar"
|
||||
similar_count
|
||||
elsif type == "like" || query =~ /\*/
|
||||
0
|
||||
elsif category.present?
|
||||
frequent_count
|
||||
elsif query.present?
|
||||
similar_count
|
||||
else
|
||||
0
|
||||
end
|
||||
end
|
||||
|
||||
def frequent_tags_query
|
||||
@frequent_tags_query ||= RelatedTagCalculator.frequent_tags_for_search(query, category: category_of).take(limit)
|
||||
end
|
||||
|
||||
def frequent_tags
|
||||
@frequent_tags ||= RelatedTagCalculator.frequent_tags_for_search(query, category: category_of).take(limit)
|
||||
frequent_tags_query[0]
|
||||
end
|
||||
|
||||
def frequent_count
|
||||
frequent_tags_query[1]
|
||||
end
|
||||
|
||||
def similar_tags_query
|
||||
@similar_tags_query ||= RelatedTagCalculator.similar_tags_for_search(query, category: category_of).take(limit)
|
||||
end
|
||||
|
||||
def similar_tags
|
||||
@similar_tags ||= RelatedTagCalculator.similar_tags_for_search(query, category: category_of).take(limit)
|
||||
similar_tags_query[0]
|
||||
end
|
||||
|
||||
def similar_count
|
||||
similar_tags_query[1]
|
||||
end
|
||||
|
||||
# Returns the top 20 most frequently added tags within the last 20 edits made by the user in the last hour.
|
||||
@@ -101,6 +133,7 @@ class RelatedTagQuery
|
||||
{
|
||||
query: query,
|
||||
category: category,
|
||||
sample_count: sample_count,
|
||||
tags: tags_with_categories(tags.map(&:name)),
|
||||
tags_overlap: tags_overlap,
|
||||
wiki_page_tags: tags_with_categories(wiki_page_tags),
|
||||
|
||||
Reference in New Issue
Block a user