related tags: refactor to take PostQuery instead of tag string.

Refactor RelatedTagCalculator and RelatedTagQuery to take a PostQuery
object instead of a raw tag string.

* Fixes the related tag sidebar on the post index page having to reparse
  the query and reevaluate aliases.
* Fixes related tags being affected by the current user's safe mode and
  hide deleted posts settings.
This commit is contained in:
evazion
2020-05-08 15:31:25 -05:00
parent 40e3ac14c0
commit 2749269d5b
4 changed files with 36 additions and 25 deletions

View File

@@ -1,12 +1,12 @@
module RelatedTagCalculator
def self.similar_tags_for_search(tag_query, current_user, search_sample_size: 1000, tag_sample_size: 250, category: nil)
search_count = PostQueryBuilder.new(tag_query, current_user).fast_count
def self.similar_tags_for_search(post_query, search_sample_size: 1000, tag_sample_size: 250, category: nil)
search_count = post_query.fast_count
return [] if search_count.nil?
search_sample_size = [search_count, search_sample_size].min
return [] if search_sample_size <= 0
tags = frequent_tags_for_search(tag_query, current_user, search_sample_size: search_sample_size, category: category).limit(tag_sample_size)
tags = frequent_tags_for_search(post_query, search_sample_size: search_sample_size, category: category).limit(tag_sample_size)
tags = tags.sort_by do |tag|
# cosine distance(tag1, tag2) = 1 - {{tag1 tag2}} / sqrt({{tag1}} * {{tag2}})
1 - tag.overlap_count / Math.sqrt(tag.post_count * search_count.to_f)
@@ -15,8 +15,8 @@ module RelatedTagCalculator
tags
end
def self.frequent_tags_for_search(tag_query, current_user, search_sample_size: 1000, category: nil)
sample_posts = Post.user_tag_match(tag_query, current_user).reorder(:md5).limit(search_sample_size)
def self.frequent_tags_for_search(post_query, search_sample_size: 1000, category: nil)
sample_posts = post_query.build.reorder(:md5).limit(search_sample_size)
frequent_tags_for_post_relation(sample_posts, category: category)
end
@@ -36,10 +36,10 @@ module RelatedTagCalculator
tags_with_counts.sort_by { |tag_name, count| [-count, tag_name] }.map(&:first)
end
def self.cached_similar_tags_for_search(tag_query, max_tags, current_user, search_timeout: 2000, cache_timeout: 8.hours)
Cache.get("similar_tags:#{tag_query}", cache_timeout, race_condition_ttl: 60.seconds) do
def self.cached_similar_tags_for_search(post_query, max_tags, search_timeout: 2000, cache_timeout: 8.hours)
Cache.get("similar_tags:#{post_query.to_s}", cache_timeout, race_condition_ttl: 60.seconds) do
ApplicationRecord.with_timeout(search_timeout, []) do
RelatedTagCalculator.similar_tags_for_search(tag_query, current_user).take(max_tags).pluck(:name)
similar_tags_for_search(post_query).take(max_tags).pluck(:name)
end
end
end

View File

@@ -2,11 +2,12 @@ class RelatedTagQuery
include ActiveModel::Serializers::JSON
include ActiveModel::Serializers::Xml
attr_reader :query, :category, :type, :user, :limit
attr_reader :query, :post_query, :category, :type, :user, :limit
def initialize(query: nil, category: nil, type: nil, user: nil, limit: nil)
def initialize(query:, user: User.anonymous, category: nil, type: nil, limit: nil)
@user = user
@query = TagAlias.to_aliased(query.to_s.downcase.strip).join(" ")
@post_query = PostQueryBuilder.new(query, user)
@query = @post_query.to_s
@category = category
@type = type
@limit = (limit =~ /^\d+/ ? limit.to_i : 25)
@@ -43,11 +44,11 @@ class RelatedTagQuery
end
def frequent_tags
@frequent_tags ||= RelatedTagCalculator.frequent_tags_for_search(query, category: category_of).take(limit)
@frequent_tags ||= RelatedTagCalculator.frequent_tags_for_search(post_query, category: category_of).take(limit)
end
def similar_tags
@similar_tags ||= RelatedTagCalculator.similar_tags_for_search(query, user, category: category_of).take(limit)
@similar_tags ||= RelatedTagCalculator.similar_tags_for_search(post_query, category: category_of).take(limit)
end
# Returns the top 20 most frequently added tags within the last 20 edits made by the user in the last hour.

View File

@@ -40,7 +40,7 @@ module PostSetPresenters
end
def similar_tags
RelatedTagCalculator.cached_similar_tags_for_search(post_set.tag_string, MAX_TAGS, CurrentUser.user)
RelatedTagCalculator.cached_similar_tags_for_search(post_set.query, MAX_TAGS)
end
def frequent_tags

View File

@@ -1,6 +1,16 @@
require 'test_helper'
class RelatedTagCalculatorTest < ActiveSupport::TestCase
def frequent_tags_for_search(tag_search, user = CurrentUser.user, **options)
post_query = PostQueryBuilder.new(tag_search, user)
RelatedTagCalculator.frequent_tags_for_search(post_query, **options).pluck(:name)
end
def similar_tags_for_search(tag_search, user = CurrentUser.user, **options)
post_query = PostQueryBuilder.new(tag_search, user)
RelatedTagCalculator.similar_tags_for_search(post_query, **options).pluck(:name)
end
setup do
@user = create(:user)
end
@@ -23,7 +33,7 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase
create(:post, tag_string: "aaa bbb ccc")
create(:post, tag_string: "aaa bbb")
assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_search("aaa", @user).pluck(:name))
assert_equal(%w[aaa bbb ccc ddd], frequent_tags_for_search("aaa", @user))
end
should "calculate the most frequent tags for a multiple tag search" do
@@ -31,7 +41,7 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase
create(:post, tag_string: "aaa bbb ccc ddd")
create(:post, tag_string: "aaa eee fff")
assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_search("aaa bbb", @user).pluck(:name))
assert_equal(%w[aaa bbb ccc ddd], frequent_tags_for_search("aaa bbb", @user))
end
should "calculate the most frequent tags with a category constraint" do
@@ -39,20 +49,20 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase
create(:post, tag_string: "aaa bbb ccc")
create(:post, tag_string: "aaa bbb")
assert_equal(%w[aaa bbb], RelatedTagCalculator.frequent_tags_for_search("aaa", @user, category: Tag.categories.general).pluck(:name))
assert_equal(%w[ccc], RelatedTagCalculator.frequent_tags_for_search("aaa", @user, category: Tag.categories.artist).pluck(:name))
assert_equal(%w[aaa bbb], frequent_tags_for_search("aaa", @user, category: Tag.categories.general))
assert_equal(%w[ccc], frequent_tags_for_search("aaa", @user, category: Tag.categories.artist))
end
end
context "#similar_tags_for_search" do
should "calculate the most similar tags for a search" do
create(:post, tag_string: "1girl solo", rating: "s")
create(:post, tag_string: "1girl solo", rating: "q")
create(:post, tag_string: "1girl 1boy", rating: "q")
create(:post, tag_string: "1girl solo", rating: "s", score: 1)
create(:post, tag_string: "1girl solo", rating: "q", score: 2)
create(:post, tag_string: "1girl 1boy", rating: "q", score: 2)
assert_equal(%w[1girl solo 1boy], RelatedTagCalculator.similar_tags_for_search("1girl", @user).pluck(:name))
assert_equal(%w[1girl 1boy solo], RelatedTagCalculator.similar_tags_for_search("rating:q", @user).pluck(:name))
assert_equal(%w[solo 1girl], RelatedTagCalculator.similar_tags_for_search("solo", @user).pluck(:name))
assert_equal(%w[1girl solo 1boy], similar_tags_for_search("1girl", @user))
assert_equal(%w[1girl 1boy solo], similar_tags_for_search("score:2", @user))
assert_equal(%w[solo 1girl], similar_tags_for_search("solo", @user))
end
should "calculate the similar tags for an aliased tag" do
@@ -60,7 +70,7 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase
create(:post, tag_string: "bunny dog")
create(:post, tag_string: "bunny cat")
assert_equal(%w[bunny cat dog], RelatedTagCalculator.similar_tags_for_search("rabbit", @user).pluck(:name))
assert_equal(%w[bunny cat dog], similar_tags_for_search("rabbit", @user))
end
end
end