Rewrite related tags implementation.
Rewrite the implementation of related tags to be simpler, faster, and more accurate: * The related tags are now calculated by taking a random sample of 1000 posts, finding the top 250 most frequent tags among those posts, then ordering those tags by cosine similarity. * Related tags can generally be calculated in 50-300ms at these sample sizes. Very high sample sizes (25000+ posts) are still relatively fast (1-3 seconds), but generally they don't improve accuracy much. * Related tags are now cached in redis rather than in the tags table. The related_tags column in the tags table is no longer used. * Only the related tags in the search taglist are cached. The related tags returned by the 'Related tags' button are not cached. * The cache lifetime is a fixed 4 hours. * The 'Related tags' button now works with metatags. * The /related_tag page now works with metatags and multitag searches. Fixes #4134, #4146.
This commit is contained in:
@@ -1,5 +1,4 @@
|
||||
class Tag < ApplicationRecord
|
||||
COSINE_SIMILARITY_RELATED_TAG_THRESHOLD = 300
|
||||
COUNT_METATAGS = %w[
|
||||
comment_count deleted_comment_count active_comment_count
|
||||
note_count deleted_note_count active_note_count
|
||||
@@ -852,57 +851,6 @@ class Tag < ApplicationRecord
|
||||
end
|
||||
end
|
||||
|
||||
module RelationMethods
|
||||
def update_related
|
||||
return unless should_update_related?
|
||||
|
||||
CurrentUser.scoped(User.first, "127.0.0.1") do
|
||||
self.related_tags = RelatedTagCalculator.calculate_from_sample_to_array(name).join(" ")
|
||||
end
|
||||
self.related_tags_updated_at = Time.now
|
||||
fix_post_count if post_count > 20 && rand(post_count) <= 1
|
||||
save
|
||||
rescue ActiveRecord::StatementInvalid
|
||||
end
|
||||
|
||||
def update_related_if_outdated
|
||||
key = Cache.hash(name)
|
||||
|
||||
if Cache.get("urt:#{key}").nil? && should_update_related?
|
||||
if post_count < COSINE_SIMILARITY_RELATED_TAG_THRESHOLD
|
||||
UpdateRelatedTagsJob.perform_later(self)
|
||||
else
|
||||
sqs = SqsService.new(Danbooru.config.aws_sqs_reltagcalc_url)
|
||||
sqs.send_message("calculate #{name}")
|
||||
self.related_tags_updated_at = Time.now
|
||||
save
|
||||
end
|
||||
|
||||
Cache.put("urt:#{key}", true, 600) # mutex to prevent redundant updates
|
||||
end
|
||||
end
|
||||
|
||||
def related_cache_expiry
|
||||
base = Math.sqrt([post_count, 0].max)
|
||||
if base > 24 * 30
|
||||
24 * 30
|
||||
elsif base < 24
|
||||
24
|
||||
else
|
||||
base
|
||||
end
|
||||
end
|
||||
|
||||
def should_update_related?
|
||||
related_tags.blank? || related_tags_updated_at.blank? || related_tags_updated_at < related_cache_expiry.hours.ago
|
||||
end
|
||||
|
||||
def related_tag_array
|
||||
update_related_if_outdated
|
||||
related_tags.to_s.split(/ /).in_groups_of(2)
|
||||
end
|
||||
end
|
||||
|
||||
module SearchMethods
|
||||
def empty
|
||||
where("tags.post_count <= 0")
|
||||
@@ -1023,6 +971,5 @@ class Tag < ApplicationRecord
|
||||
extend StatisticsMethods
|
||||
extend NameMethods
|
||||
extend ParseMethods
|
||||
include RelationMethods
|
||||
extend SearchMethods
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user