Rewrite related tags implementation.

Rewrite the implementation of related tags to be simpler, faster, and
more accurate:

* The related tags are now calculated by taking a random sample of 1000
  posts, finding the top 250 most frequent tags among those posts, then
  ordering those tags by cosine similarity.

* Related tags can generally be calculated in 50-300ms at these sample
  sizes. Very high sample sizes (25000+ posts) are still relatively fast
  (1-3 seconds), but generally they don't improve accuracy much.

* Related tags are now cached in redis rather than in the tags table.
  The related_tags column in the tags table is no longer used.

* Only the related tags in the search taglist are cached. The related
  tags returned by the 'Related tags' button are not cached.

* The cache lifetime is a fixed 4 hours.

* The 'Related tags' button now works with metatags.

* The /related_tag page now works with metatags and multitag searches.

Fixes #4134, #4146.
This commit is contained in:
evazion
2019-08-30 19:08:56 -05:00
parent 7b8584e3b0
commit 6dd331745a
11 changed files with 99 additions and 256 deletions

View File

@@ -1,5 +1,7 @@
module PostSetPresenters
class Post < Base
MAX_TAGS = 25
attr_accessor :post_set
delegate :posts, :to => :post_set
@@ -8,7 +10,7 @@ module PostSetPresenters
end
def tag_set_presenter
@tag_set_presenter ||= TagSetPresenter.new(related_tags)
@tag_set_presenter ||= TagSetPresenter.new(related_tags.take(MAX_TAGS))
end
def post_previews_html(template, options = {})
@@ -19,55 +21,42 @@ module PostSetPresenters
if post_set.is_pattern_search?
pattern_tags
elsif post_set.is_saved_search?
["search:all"] + SavedSearch.labels_for(CurrentUser.user.id).map {|x| "search:#{x}"}
saved_search_tags
elsif post_set.is_empty_tag? || post_set.tag_string == "order:rank"
popular_tags
elsif post_set.is_single_tag?
related_tags_for_single(post_set.tag_string)
elsif post_set.unordered_tag_array.size == 1
related_tags_for_single(post_set.unordered_tag_array.first)
elsif Tag.has_metatag?(post_set.tag_array, *Tag::SUBQUERY_METATAGS)
calculate_related_tags_from_post_set
similar_tags
else
calculate_related_tags_from_post_set
frequent_tags
end
end
def popular_tags
if PopularSearchService.enabled?
PopularSearchService.new(Date.today).tags.slice(0, 25)
PopularSearchService.new(Date.today).tags
else
Tag.trending
end
end
def similar_tags
Cache.get("similar_tags:#{post_set.tag_string}", 4.hours, race_condition_ttl: 60.seconds) do
ApplicationRecord.with_timeout(1_000, []) do
RelatedTagCalculator.similar_tags_for_search(post_set.tag_string).take(MAX_TAGS).pluck(:name)
end
end
end
def frequent_tags
RelatedTagCalculator.frequent_tags_for_posts(post_set.posts).take(MAX_TAGS)
end
def pattern_tags
Tag.name_matches(post_set.tag_string).select("name").limit(Danbooru.config.tag_query_limit).order("post_count DESC").map(&:name)
Tag.name_matches(post_set.tag_string).order(post_count: :desc).limit(MAX_TAGS).pluck(:name)
end
def related_tags_for_group
normalized_tags = Tag.normalize_query(post_set.tag_string, normalize_aliases: false)
Cache.get("PostSetPresenters::Post#related_tags_for_group(#{normalized_tags})", 5.minutes) do
RelatedTagCalculator.calculate_from_sample_to_array(normalized_tags).map(&:first)
end
end
def related_tags_for_single(tag_string)
tag = Tag.find_by_name(tag_string.downcase)
if tag
tag.related_tag_array.map(&:first)
else
calculate_related_tags_from_post_set
end
end
def calculate_related_tags_from_post_set
RelatedTagCalculator.calculate_from_posts_to_array(post_set.posts).map(&:first)
end
def saved_search_labels
SavedSearch.labels_for(CurrentUser.user.id).map {|x| "search:#{x}"}
def saved_search_tags
["search:all"] + SavedSearch.labels_for(CurrentUser.user.id).map {|x| "search:#{x}"}
end
def tag_list_html(**options)