Rewrite related tags implementation.

Rewrite the implementation of related tags to be simpler, faster, and
more accurate:

* The related tags are now calculated by taking a random sample of 1000
  posts, finding the top 250 most frequent tags among those posts, then
  ordering those tags by cosine similarity.

* Related tags can generally be calculated in 50-300ms at these sample
  sizes. Very high sample sizes (25000+ posts) are still relatively fast
  (1-3 seconds), but generally they don't improve accuracy much.

* Related tags are now cached in redis rather than in the tags table.
  The related_tags column in the tags table is no longer used.

* Only the related tags in the search taglist are cached. The related
  tags returned by the 'Related tags' button are not cached.

* The cache lifetime is a fixed 4 hours.

* The 'Related tags' button now works with metatags.

* The /related_tag page now works with metatags and multitag searches.

Fixes #4134, #4146.
This commit is contained in:
evazion
2019-08-30 19:08:56 -05:00
parent 7b8584e3b0
commit 6dd331745a
11 changed files with 99 additions and 256 deletions

View File

@@ -2,11 +2,20 @@ require 'test_helper'
class RelatedTagsControllerTest < ActionDispatch::IntegrationTest
context "The related tags controller" do
setup do
create(:post, tag_string: "touhou")
end
context "show action" do
should "work" do
get related_tag_path, params: { query: "touhou" }
assert_response :success
end
should "work for .json responses" do
get related_tag_path(format: :json), params: { query: "touhou" }
assert_response :success
end
end
end
end

View File

@@ -12,57 +12,55 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase
CurrentUser.ip_addr = nil
end
context "A related tag calculator" do
context "for a post set" do
setup do
FactoryBot.create(:post, :tag_string => "aaa bbb ccc ddd")
FactoryBot.create(:post, :tag_string => "aaa bbb ccc")
FactoryBot.create(:post, :tag_string => "aaa bbb")
@posts = Post.tag_match("aaa")
end
context "RelatedTagCalculator" do
context "#frequent_tags_for_posts" do
should "calculate the most frequent tags for a set of posts" do
create(:post, tag_string: "aaa bbb ccc ddd")
create(:post, tag_string: "aaa bbb ccc")
create(:post, tag_string: "aaa bbb")
posts = Post.tag_match("aaa")
should "calculate the related tags" do
assert_equal({"aaa"=>3, "bbb"=>3, "ccc"=>2, "ddd"=>1}, RelatedTagCalculator.calculate_from_posts(@posts))
assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_posts(posts))
end
end
should "calculate related tags for a tag" do
posts = []
posts << FactoryBot.create(:post, :tag_string => "aaa bbb ccc ddd")
posts << FactoryBot.create(:post, :tag_string => "aaa bbb ccc")
posts << FactoryBot.create(:post, :tag_string => "aaa bbb")
context "#frequent_tags_for_search" do
should "calculate the most frequent tags for a single tag search" do
create(:post, tag_string: "aaa bbb ccc ddd")
create(:post, tag_string: "aaa bbb ccc")
create(:post, tag_string: "aaa bbb")
assert_equal({"aaa" => 3, "bbb" => 3, "ccc" => 2, "ddd" => 1}, RelatedTagCalculator.calculate_from_sample("aaa", 10))
assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_search("aaa").pluck(:name))
end
should "calculate the most frequent tags for a multiple tag search" do
create(:post, tag_string: "aaa bbb ccc")
create(:post, tag_string: "aaa bbb ccc ddd")
create(:post, tag_string: "aaa eee fff")
assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_search("aaa bbb").pluck(:name))
end
should "calculate the most frequent tags with a category constraint" do
create(:post, tag_string: "aaa bbb art:ccc copy:ddd")
create(:post, tag_string: "aaa bbb art:ccc")
create(:post, tag_string: "aaa bbb")
assert_equal(%w[aaa bbb], RelatedTagCalculator.frequent_tags_for_search("aaa", category: Tag.categories.general).pluck(:name))
assert_equal(%w[ccc], RelatedTagCalculator.frequent_tags_for_search("aaa", category: Tag.categories.artist).pluck(:name))
end
end
should "calculate related tags for multiple tag" do
posts = []
posts << FactoryBot.create(:post, :tag_string => "aaa bbb ccc")
posts << FactoryBot.create(:post, :tag_string => "aaa bbb ccc ddd")
posts << FactoryBot.create(:post, :tag_string => "aaa eee fff")
context "#similar_tags_for_search" do
should "calculate the most similar tags for a search" do
create(:post, tag_string: "1girl solo", rating: "s")
create(:post, tag_string: "1girl solo", rating: "q")
create(:post, tag_string: "1girl 1boy", rating: "q")
assert_equal({"aaa"=>2, "bbb"=>2, "ddd"=>1, "ccc"=>2}, RelatedTagCalculator.calculate_from_sample("aaa bbb", 10))
end
should "calculate typed related tags for a tag" do
posts = []
posts << FactoryBot.create(:post, :tag_string => "aaa bbb art:ccc copy:ddd")
posts << FactoryBot.create(:post, :tag_string => "aaa bbb art:ccc")
posts << FactoryBot.create(:post, :tag_string => "aaa bbb")
assert_equal({"ccc" => 2}, RelatedTagCalculator.calculate_from_sample("aaa", 10, Tag.categories.artist))
assert_equal({"ddd" => 1}, RelatedTagCalculator.calculate_from_sample("aaa", 10, Tag.categories.copyright))
end
should "convert a hash into string format" do
posts = []
posts << FactoryBot.create(:post, :tag_string => "aaa bbb ccc ddd")
posts << FactoryBot.create(:post, :tag_string => "aaa bbb ccc")
posts << FactoryBot.create(:post, :tag_string => "aaa bbb")
tag = Tag.find_by_name("aaa")
counts = RelatedTagCalculator.calculate_from_sample("aaa", 10)
assert_equal("aaa 3 bbb 3 ccc 2 ddd 1", RelatedTagCalculator.convert_hash_to_string(counts))
assert_equal(%w[1girl solo 1boy], RelatedTagCalculator.similar_tags_for_search("1girl").pluck(:name))
assert_equal(%w[1girl 1boy solo], RelatedTagCalculator.similar_tags_for_search("rating:q").pluck(:name))
assert_equal(%w[solo 1girl], RelatedTagCalculator.similar_tags_for_search("solo").pluck(:name))
end
end
end
end

View File

@@ -31,7 +31,6 @@ class RelatedTagQueryTest < ActiveSupport::TestCase
context "for a tag that already exists" do
setup do
Tag.find_by_name("aaa").update_related
@query = RelatedTagQuery.new(query: "aaa")
end
@@ -59,8 +58,6 @@ class RelatedTagQueryTest < ActiveSupport::TestCase
@ta = FactoryBot.create(:tag_alias, antecedent_name: "xyz", consequent_name: "aaa")
@wp = FactoryBot.create(:wiki_page, title: "aaa", body: "blah [[foo|blah]] [[FOO]] [[bar]] blah")
@query = RelatedTagQuery.new(query: "xyz")
Tag.find_by_name("aaa").update_related
end
should "take wiki tags from the consequent's wiki" do

View File

@@ -283,15 +283,4 @@ class TagTest < ActiveSupport::TestCase
assert_equal(1, tag.reload.post_count)
end
end
context "The #related_tag_array method" do
should "update the related tags" do
create(:post, tag_string: "bkub")
tag = Tag.find_by_name("bkub")
assert_nil(tag.related_tags)
perform_enqueued_jobs { tag.related_tag_array }
assert_equal([["bkub", "1"]], tag.reload.related_tag_array)
end
end
end