Rewrite related tags implementation.
Rewrite the implementation of related tags to be simpler, faster, and more accurate: * The related tags are now calculated by taking a random sample of 1000 posts, finding the top 250 most frequent tags among those posts, then ordering those tags by cosine similarity. * Related tags can generally be calculated in 50-300ms at these sample sizes. Very high sample sizes (25000+ posts) are still relatively fast (1-3 seconds), but generally they don't improve accuracy much. * Related tags are now cached in redis rather than in the tags table. The related_tags column in the tags table is no longer used. * Only the related tags in the search taglist are cached. The related tags returned by the 'Related tags' button are not cached. * The cache lifetime is a fixed 4 hours. * The 'Related tags' button now works with metatags. * The /related_tag page now works with metatags and multitag searches. Fixes #4134, #4146.
This commit is contained in:
@@ -2,11 +2,20 @@ require 'test_helper'
|
||||
|
||||
class RelatedTagsControllerTest < ActionDispatch::IntegrationTest
|
||||
context "The related tags controller" do
|
||||
setup do
|
||||
create(:post, tag_string: "touhou")
|
||||
end
|
||||
|
||||
context "show action" do
|
||||
should "work" do
|
||||
get related_tag_path, params: { query: "touhou" }
|
||||
assert_response :success
|
||||
end
|
||||
|
||||
should "work for .json responses" do
|
||||
get related_tag_path(format: :json), params: { query: "touhou" }
|
||||
assert_response :success
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -12,57 +12,55 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase
|
||||
CurrentUser.ip_addr = nil
|
||||
end
|
||||
|
||||
context "A related tag calculator" do
|
||||
context "for a post set" do
|
||||
setup do
|
||||
FactoryBot.create(:post, :tag_string => "aaa bbb ccc ddd")
|
||||
FactoryBot.create(:post, :tag_string => "aaa bbb ccc")
|
||||
FactoryBot.create(:post, :tag_string => "aaa bbb")
|
||||
@posts = Post.tag_match("aaa")
|
||||
end
|
||||
context "RelatedTagCalculator" do
|
||||
context "#frequent_tags_for_posts" do
|
||||
should "calculate the most frequent tags for a set of posts" do
|
||||
create(:post, tag_string: "aaa bbb ccc ddd")
|
||||
create(:post, tag_string: "aaa bbb ccc")
|
||||
create(:post, tag_string: "aaa bbb")
|
||||
posts = Post.tag_match("aaa")
|
||||
|
||||
should "calculate the related tags" do
|
||||
assert_equal({"aaa"=>3, "bbb"=>3, "ccc"=>2, "ddd"=>1}, RelatedTagCalculator.calculate_from_posts(@posts))
|
||||
assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_posts(posts))
|
||||
end
|
||||
end
|
||||
|
||||
should "calculate related tags for a tag" do
|
||||
posts = []
|
||||
posts << FactoryBot.create(:post, :tag_string => "aaa bbb ccc ddd")
|
||||
posts << FactoryBot.create(:post, :tag_string => "aaa bbb ccc")
|
||||
posts << FactoryBot.create(:post, :tag_string => "aaa bbb")
|
||||
context "#frequent_tags_for_search" do
|
||||
should "calculate the most frequent tags for a single tag search" do
|
||||
create(:post, tag_string: "aaa bbb ccc ddd")
|
||||
create(:post, tag_string: "aaa bbb ccc")
|
||||
create(:post, tag_string: "aaa bbb")
|
||||
|
||||
assert_equal({"aaa" => 3, "bbb" => 3, "ccc" => 2, "ddd" => 1}, RelatedTagCalculator.calculate_from_sample("aaa", 10))
|
||||
assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_search("aaa").pluck(:name))
|
||||
end
|
||||
|
||||
should "calculate the most frequent tags for a multiple tag search" do
|
||||
create(:post, tag_string: "aaa bbb ccc")
|
||||
create(:post, tag_string: "aaa bbb ccc ddd")
|
||||
create(:post, tag_string: "aaa eee fff")
|
||||
|
||||
assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_search("aaa bbb").pluck(:name))
|
||||
end
|
||||
|
||||
should "calculate the most frequent tags with a category constraint" do
|
||||
create(:post, tag_string: "aaa bbb art:ccc copy:ddd")
|
||||
create(:post, tag_string: "aaa bbb art:ccc")
|
||||
create(:post, tag_string: "aaa bbb")
|
||||
|
||||
assert_equal(%w[aaa bbb], RelatedTagCalculator.frequent_tags_for_search("aaa", category: Tag.categories.general).pluck(:name))
|
||||
assert_equal(%w[ccc], RelatedTagCalculator.frequent_tags_for_search("aaa", category: Tag.categories.artist).pluck(:name))
|
||||
end
|
||||
end
|
||||
|
||||
should "calculate related tags for multiple tag" do
|
||||
posts = []
|
||||
posts << FactoryBot.create(:post, :tag_string => "aaa bbb ccc")
|
||||
posts << FactoryBot.create(:post, :tag_string => "aaa bbb ccc ddd")
|
||||
posts << FactoryBot.create(:post, :tag_string => "aaa eee fff")
|
||||
context "#similar_tags_for_search" do
|
||||
should "calculate the most similar tags for a search" do
|
||||
create(:post, tag_string: "1girl solo", rating: "s")
|
||||
create(:post, tag_string: "1girl solo", rating: "q")
|
||||
create(:post, tag_string: "1girl 1boy", rating: "q")
|
||||
|
||||
assert_equal({"aaa"=>2, "bbb"=>2, "ddd"=>1, "ccc"=>2}, RelatedTagCalculator.calculate_from_sample("aaa bbb", 10))
|
||||
end
|
||||
|
||||
should "calculate typed related tags for a tag" do
|
||||
posts = []
|
||||
posts << FactoryBot.create(:post, :tag_string => "aaa bbb art:ccc copy:ddd")
|
||||
posts << FactoryBot.create(:post, :tag_string => "aaa bbb art:ccc")
|
||||
posts << FactoryBot.create(:post, :tag_string => "aaa bbb")
|
||||
|
||||
assert_equal({"ccc" => 2}, RelatedTagCalculator.calculate_from_sample("aaa", 10, Tag.categories.artist))
|
||||
assert_equal({"ddd" => 1}, RelatedTagCalculator.calculate_from_sample("aaa", 10, Tag.categories.copyright))
|
||||
end
|
||||
|
||||
should "convert a hash into string format" do
|
||||
posts = []
|
||||
posts << FactoryBot.create(:post, :tag_string => "aaa bbb ccc ddd")
|
||||
posts << FactoryBot.create(:post, :tag_string => "aaa bbb ccc")
|
||||
posts << FactoryBot.create(:post, :tag_string => "aaa bbb")
|
||||
|
||||
tag = Tag.find_by_name("aaa")
|
||||
counts = RelatedTagCalculator.calculate_from_sample("aaa", 10)
|
||||
assert_equal("aaa 3 bbb 3 ccc 2 ddd 1", RelatedTagCalculator.convert_hash_to_string(counts))
|
||||
assert_equal(%w[1girl solo 1boy], RelatedTagCalculator.similar_tags_for_search("1girl").pluck(:name))
|
||||
assert_equal(%w[1girl 1boy solo], RelatedTagCalculator.similar_tags_for_search("rating:q").pluck(:name))
|
||||
assert_equal(%w[solo 1girl], RelatedTagCalculator.similar_tags_for_search("solo").pluck(:name))
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -31,7 +31,6 @@ class RelatedTagQueryTest < ActiveSupport::TestCase
|
||||
|
||||
context "for a tag that already exists" do
|
||||
setup do
|
||||
Tag.find_by_name("aaa").update_related
|
||||
@query = RelatedTagQuery.new(query: "aaa")
|
||||
end
|
||||
|
||||
@@ -59,8 +58,6 @@ class RelatedTagQueryTest < ActiveSupport::TestCase
|
||||
@ta = FactoryBot.create(:tag_alias, antecedent_name: "xyz", consequent_name: "aaa")
|
||||
@wp = FactoryBot.create(:wiki_page, title: "aaa", body: "blah [[foo|blah]] [[FOO]] [[bar]] blah")
|
||||
@query = RelatedTagQuery.new(query: "xyz")
|
||||
|
||||
Tag.find_by_name("aaa").update_related
|
||||
end
|
||||
|
||||
should "take wiki tags from the consequent's wiki" do
|
||||
|
||||
@@ -283,15 +283,4 @@ class TagTest < ActiveSupport::TestCase
|
||||
assert_equal(1, tag.reload.post_count)
|
||||
end
|
||||
end
|
||||
|
||||
context "The #related_tag_array method" do
|
||||
should "update the related tags" do
|
||||
create(:post, tag_string: "bkub")
|
||||
tag = Tag.find_by_name("bkub")
|
||||
|
||||
assert_nil(tag.related_tags)
|
||||
perform_enqueued_jobs { tag.related_tag_array }
|
||||
assert_equal([["bkub", "1"]], tag.reload.related_tag_array)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user