Return the actual sample size used for related tag calculations
This meant that both functions had to return two separate values, which need to be destructured by the function calling them. This isn't so much a huge issue as its more something just to be made aware of in case either function needs to be used elsewhere.
This commit is contained in:
@@ -6,13 +6,14 @@ module RelatedTagCalculator
|
||||
search_sample_size = [search_count, search_sample_size].min
|
||||
return [] if search_sample_size <= 0
|
||||
|
||||
tags = frequent_tags_for_search(tag_query, search_sample_size: search_sample_size, category: category).limit(tag_sample_size)
|
||||
tags, sample_count = frequent_tags_for_search(tag_query, search_sample_size: search_sample_size, category: category)
|
||||
tags = tags.limit(tag_sample_size)
|
||||
tags = tags.sort_by do |tag|
|
||||
# cosine distance(tag1, tag2) = 1 - {{tag1 tag2}} / sqrt({{tag1}} * {{tag2}})
|
||||
1 - tag.overlap_count / Math.sqrt(tag.post_count * search_count.to_f)
|
||||
end
|
||||
|
||||
tags
|
||||
[tags, sample_count]
|
||||
end
|
||||
|
||||
def self.frequent_tags_for_search(tag_query, search_sample_size: 1000, category: nil)
|
||||
@@ -24,7 +25,7 @@ module RelatedTagCalculator
|
||||
tags = tags.where("tags.post_count > 0")
|
||||
tags = tags.where(category: category) if category.present?
|
||||
tags = tags.order("overlap_count DESC, tags.post_count DESC, tags.name")
|
||||
tags
|
||||
[tags, sample_posts.length]
|
||||
end
|
||||
|
||||
def self.frequent_tags_for_posts(posts)
|
||||
@@ -36,7 +37,7 @@ module RelatedTagCalculator
|
||||
Cache.get("similar_tags:#{tag_query}", cache_timeout, race_condition_ttl: 60.seconds) do
|
||||
ApplicationRecord.with_timeout(search_timeout, []) do
|
||||
CurrentUser.without_safe_mode do
|
||||
RelatedTagCalculator.similar_tags_for_search(tag_query).take(max_tags).pluck(:name)
|
||||
RelatedTagCalculator.similar_tags_for_search(tag_query)[0].take(max_tags).pluck(:name)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -42,12 +42,44 @@ class RelatedTagQuery
|
||||
end
|
||||
end
|
||||
|
||||
def sample_count
|
||||
if type == "frequent"
|
||||
frequent_count
|
||||
elsif type == "similar"
|
||||
similar_count
|
||||
elsif type == "like" || query =~ /\*/
|
||||
0
|
||||
elsif category.present?
|
||||
frequent_count
|
||||
elsif query.present?
|
||||
similar_count
|
||||
else
|
||||
0
|
||||
end
|
||||
end
|
||||
|
||||
def frequent_tags_query
|
||||
@frequent_tags_query ||= RelatedTagCalculator.frequent_tags_for_search(query, category: category_of).take(limit)
|
||||
end
|
||||
|
||||
def frequent_tags
|
||||
@frequent_tags ||= RelatedTagCalculator.frequent_tags_for_search(query, category: category_of).take(limit)
|
||||
frequent_tags_query[0]
|
||||
end
|
||||
|
||||
def frequent_count
|
||||
frequent_tags_query[1]
|
||||
end
|
||||
|
||||
def similar_tags_query
|
||||
@similar_tags_query ||= RelatedTagCalculator.similar_tags_for_search(query, category: category_of).take(limit)
|
||||
end
|
||||
|
||||
def similar_tags
|
||||
@similar_tags ||= RelatedTagCalculator.similar_tags_for_search(query, category: category_of).take(limit)
|
||||
similar_tags_query[0]
|
||||
end
|
||||
|
||||
def similar_count
|
||||
similar_tags_query[1]
|
||||
end
|
||||
|
||||
# Returns the top 20 most frequently added tags within the last 20 edits made by the user in the last hour.
|
||||
@@ -101,6 +133,7 @@ class RelatedTagQuery
|
||||
{
|
||||
query: query,
|
||||
category: category,
|
||||
sample_count: sample_count,
|
||||
tags: tags_with_categories(tags.map(&:name)),
|
||||
tags_overlap: tags_overlap,
|
||||
wiki_page_tags: tags_with_categories(wiki_page_tags),
|
||||
|
||||
@@ -30,7 +30,7 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase
|
||||
create(:post, tag_string: "aaa bbb ccc")
|
||||
create(:post, tag_string: "aaa bbb")
|
||||
|
||||
assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_search("aaa").pluck(:name))
|
||||
assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_search("aaa")[0].pluck(:name))
|
||||
end
|
||||
|
||||
should "calculate the most frequent tags for a multiple tag search" do
|
||||
@@ -38,7 +38,7 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase
|
||||
create(:post, tag_string: "aaa bbb ccc ddd")
|
||||
create(:post, tag_string: "aaa eee fff")
|
||||
|
||||
assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_search("aaa bbb").pluck(:name))
|
||||
assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_search("aaa bbb")[0].pluck(:name))
|
||||
end
|
||||
|
||||
should "calculate the most frequent tags with a category constraint" do
|
||||
@@ -46,8 +46,8 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase
|
||||
create(:post, tag_string: "aaa bbb art:ccc")
|
||||
create(:post, tag_string: "aaa bbb")
|
||||
|
||||
assert_equal(%w[aaa bbb], RelatedTagCalculator.frequent_tags_for_search("aaa", category: Tag.categories.general).pluck(:name))
|
||||
assert_equal(%w[ccc], RelatedTagCalculator.frequent_tags_for_search("aaa", category: Tag.categories.artist).pluck(:name))
|
||||
assert_equal(%w[aaa bbb], RelatedTagCalculator.frequent_tags_for_search("aaa", category: Tag.categories.general)[0].pluck(:name))
|
||||
assert_equal(%w[ccc], RelatedTagCalculator.frequent_tags_for_search("aaa", category: Tag.categories.artist)[0].pluck(:name))
|
||||
end
|
||||
end
|
||||
|
||||
@@ -57,9 +57,9 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase
|
||||
create(:post, tag_string: "1girl solo", rating: "q")
|
||||
create(:post, tag_string: "1girl 1boy", rating: "q")
|
||||
|
||||
assert_equal(%w[1girl solo 1boy], RelatedTagCalculator.similar_tags_for_search("1girl").pluck(:name))
|
||||
assert_equal(%w[1girl 1boy solo], RelatedTagCalculator.similar_tags_for_search("rating:q").pluck(:name))
|
||||
assert_equal(%w[solo 1girl], RelatedTagCalculator.similar_tags_for_search("solo").pluck(:name))
|
||||
assert_equal(%w[1girl solo 1boy], RelatedTagCalculator.similar_tags_for_search("1girl")[0].pluck(:name))
|
||||
assert_equal(%w[1girl 1boy solo], RelatedTagCalculator.similar_tags_for_search("rating:q")[0].pluck(:name))
|
||||
assert_equal(%w[solo 1girl], RelatedTagCalculator.similar_tags_for_search("solo")[0].pluck(:name))
|
||||
end
|
||||
|
||||
should "calculate the similar tags for an aliased tag" do
|
||||
@@ -67,7 +67,7 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase
|
||||
create(:post, tag_string: "bunny dog")
|
||||
create(:post, tag_string: "bunny cat")
|
||||
|
||||
assert_equal(%w[bunny cat dog], RelatedTagCalculator.similar_tags_for_search("rabbit").pluck(:name))
|
||||
assert_equal(%w[bunny cat dog], RelatedTagCalculator.similar_tags_for_search("rabbit")[0].pluck(:name))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user