add cosine similarity algo for related tag calculator
This commit is contained in:
@@ -384,6 +384,3 @@ DEPENDENCIES
|
||||
vcr
|
||||
webmock
|
||||
whenever
|
||||
|
||||
BUNDLED WITH
|
||||
1.10.0
|
||||
|
||||
@@ -2,7 +2,7 @@ class RelatedTagCalculator
|
||||
def self.find_tags(tag, limit)
|
||||
CurrentUser.without_safe_mode do
|
||||
Post.with_timeout(5_000, []) do
|
||||
Post.tag_match(tag).limit(limit).select("posts.tag_string").reorder("posts.md5").map(&:tag_string)
|
||||
Post.tag_match(tag).limit(limit).select("posts.tag_string").reorder("posts.md5").pluck(:tag_string)
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -34,6 +34,41 @@ class RelatedTagCalculator
|
||||
counts
|
||||
end
|
||||
|
||||
def self.calculate_similar_from_sample(tag)
|
||||
# this uses cosine similarity to produce more useful
|
||||
# related tags, but is more db intensive
|
||||
counts = Hash.new {|h, k| h[k] = 0}
|
||||
|
||||
CurrentUser.without_safe_mode do
|
||||
Post.with_timeout(5_000, []) do
|
||||
Post.tag_match(tag).limit(400).select("posts.tag_string").reorder("posts.md5").pluck(:tag_string).each do |tag_string|
|
||||
tag_string.scan(/\S+/).each do |tag|
|
||||
counts[tag] += 1
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
tag_record = Tag.find_by_name(tag)
|
||||
candidates = convert_hash_to_array(counts, 100)
|
||||
similar_counts = Hash.new {|h, k| h[k] = 0}
|
||||
CurrentUser.without_safe_mode do
|
||||
Post.with_timeout(5_000) do
|
||||
candidates.each do |ctag, _|
|
||||
acount = Post.tag_match("#{tag} #{ctag}").count
|
||||
ctag_record = Tag.find_by_name(ctag)
|
||||
div = Math.sqrt(tag_record.post_count * ctag_record.post_count)
|
||||
if div != 0
|
||||
c = acount / div
|
||||
similar_counts[ctag] = c
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
convert_hash_to_array(similar_counts)
|
||||
end
|
||||
|
||||
def self.calculate_from_sample(tags, limit, category_constraint = nil)
|
||||
counts = Hash.new {|h, k| h[k] = 0}
|
||||
|
||||
@@ -56,8 +91,8 @@ class RelatedTagCalculator
|
||||
counts
|
||||
end
|
||||
|
||||
def self.convert_hash_to_array(hash)
|
||||
hash.to_a.sort_by {|x| -x[1]}.slice(0, 25)
|
||||
def self.convert_hash_to_array(hash, limit = 25)
|
||||
hash.to_a.sort_by {|x| -x[1]}.slice(0, limit)
|
||||
end
|
||||
|
||||
def self.convert_hash_to_string(hash)
|
||||
|
||||
Reference in New Issue
Block a user