diff --git a/app/controllers/related_tags_controller.rb b/app/controllers/related_tags_controller.rb index af329c0f7..75a226401 100644 --- a/app/controllers/related_tags_controller.rb +++ b/app/controllers/related_tags_controller.rb @@ -4,8 +4,10 @@ class RelatedTagsController < ApplicationController def show query = params[:query] || search_params[:query] category = params[:category] || search_params[:category] + type = params[:type] || search_params[:type] + limit = params[:limit] - @query = RelatedTagQuery.new(query: query, category: category, user: CurrentUser.user) + @query = RelatedTagQuery.new(query: query, category: category, type: type, user: CurrentUser.user, limit: limit) respond_with(@query) end end diff --git a/app/logical/related_tag_calculator.rb b/app/logical/related_tag_calculator.rb index 898a2b308..741a7a212 100644 --- a/app/logical/related_tag_calculator.rb +++ b/app/logical/related_tag_calculator.rb @@ -6,13 +6,14 @@ module RelatedTagCalculator search_sample_size = [search_count, search_sample_size].min return [] if search_sample_size <= 0 - tags = frequent_tags_for_search(tag_query, search_sample_size: search_sample_size, category: category).limit(tag_sample_size) + tags, sample_count = frequent_tags_for_search(tag_query, search_sample_size: search_sample_size, category: category) + tags = tags.limit(tag_sample_size) tags = tags.sort_by do |tag| # cosine distance(tag1, tag2) = 1 - {{tag1 tag2}} / sqrt({{tag1}} * {{tag2}}) 1 - tag.overlap_count / Math.sqrt(tag.post_count * search_count.to_f) end - tags + [tags, sample_count] end def self.frequent_tags_for_search(tag_query, search_sample_size: 1000, category: nil) @@ -28,7 +29,7 @@ module RelatedTagCalculator tags = tags.where("tags.post_count > 0") tags = tags.where(category: category) if category.present? tags = tags.order("overlap_count DESC, tags.post_count DESC, tags.name") - tags + [tags, sample_posts.length] end def self.frequent_tags_for_post_array(posts) @@ -40,7 +41,7 @@ module RelatedTagCalculator Cache.get("similar_tags:#{tag_query}", cache_timeout, race_condition_ttl: 60.seconds) do ApplicationRecord.with_timeout(search_timeout, []) do CurrentUser.without_safe_mode do - RelatedTagCalculator.similar_tags_for_search(tag_query).take(max_tags).pluck(:name) + RelatedTagCalculator.similar_tags_for_search(tag_query)[0].take(max_tags).pluck(:name) end end end diff --git a/app/logical/related_tag_query.rb b/app/logical/related_tag_query.rb index 2a46d985e..6fbb18d4c 100644 --- a/app/logical/related_tag_query.rb +++ b/app/logical/related_tag_query.rb @@ -2,12 +2,14 @@ class RelatedTagQuery include ActiveModel::Serializers::JSON include ActiveModel::Serializers::Xml - attr_reader :query, :category, :user + attr_reader :query, :category, :type, :user, :limit - def initialize(query: nil, category: nil, user: nil) + def initialize(query: nil, category: nil, type: nil, user: nil, limit: nil) @user = user @query = TagAlias.to_aliased(query.to_s.downcase.strip).join(" ") @category = category + @type = type + @limit = (limit =~ /^\d+/ ? limit.to_i : 25) end def pretty_name @@ -15,17 +17,71 @@ class RelatedTagQuery end def tags - if query =~ /\*/ - pattern_matching_tags + if type == "frequent" + frequent_tags + elsif type == "similar" + similar_tags + elsif type == "like" + pattern_matching_tags("*#{query}*") + elsif query =~ /\*/ + pattern_matching_tags(query) elsif category.present? - RelatedTagCalculator.frequent_tags_for_search(query, category: Tag.categories.value_for(category)).take(25) + frequent_tags elsif query.present? - RelatedTagCalculator.similar_tags_for_search(query).take(25) + similar_tags else Tag.none end end + def tags_overlap + if type == "like" || query =~ /\*/ + {} + else + tags.map { |v| [v.name, v.overlap_count] }.to_h + end + end + + def sample_count + if type == "frequent" + frequent_count + elsif type == "similar" + similar_count + elsif type == "like" || query =~ /\*/ + 0 + elsif category.present? + frequent_count + elsif query.present? + similar_count + else + 0 + end + end + + def frequent_tags_query + @frequent_tags_query ||= RelatedTagCalculator.frequent_tags_for_search(query, category: category_of).take(limit) + end + + def frequent_tags + frequent_tags_query[0] + end + + def frequent_count + frequent_tags_query[1] + end + + def similar_tags_query + @similar_tags_query ||= RelatedTagCalculator.similar_tags_for_search(query, category: category_of).take(limit) + end + + def similar_tags + similar_tags_query[0] + end + + def similar_count + similar_tags_query[1] + end + # Returns the top 20 most frequently added tags within the last 20 edits made by the user in the last hour. def recent_tags(since: 1.hour.ago, max_edits: 20, max_tags: 20) return [] unless user.present? && PostVersion.enabled? @@ -77,7 +133,9 @@ class RelatedTagQuery { query: query, category: category, + sample_count: sample_count, tags: tags_with_categories(tags.map(&:name)), + tags_overlap: tags_overlap, wiki_page_tags: tags_with_categories(wiki_page_tags), other_wikis: other_wiki_pages.map { |wiki| [wiki.title, tags_with_categories(wiki.tags)] }.to_h } @@ -89,8 +147,15 @@ class RelatedTagQuery Tag.categories_for(list_of_tag_names).to_a end - def pattern_matching_tags - Tag.nonempty.name_matches(query).order("post_count desc, name asc").limit(50) + def category_of + (category.present? ? Tag.categories.value_for(category) : nil) + end + + def pattern_matching_tags(tag_query) + tags = Tag.nonempty.name_matches(tag_query) + tags = tags.where(category: Tag.categories.value_for(category)) if category.present? + tags = tags.order("post_count desc, name asc").limit(limit) + tags end def wiki_page diff --git a/app/logical/tag_category.rb b/app/logical/tag_category.rb index efd094c1f..ea3d40b7c 100644 --- a/app/logical/tag_category.rb +++ b/app/logical/tag_category.rb @@ -71,6 +71,10 @@ class TagCategory def short_name_regex @@short_name_regex ||= short_name_list.join("|") end + + def category_ids_regex + @@category_ids_regex ||= "[#{category_ids.join("")}]" + end end extend Mappings diff --git a/app/models/tag.rb b/app/models/tag.rb index 993e58188..6ae4cc580 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -41,7 +41,14 @@ class Tag < ApplicationRecord end def value_for(string) - TagCategory.mapping[string.to_s.downcase] || 0 + norm_string = string.to_s.downcase + if norm_string =~ /#{TagCategory.category_ids_regex}/ + norm_string.to_i + elsif TagCategory.mapping[string.to_s.downcase] + TagCategory.mapping[string.to_s.downcase] + else + 0 + end end end diff --git a/test/unit/related_tag_calculator_test.rb b/test/unit/related_tag_calculator_test.rb index ec9d59b03..07d9dcb73 100644 --- a/test/unit/related_tag_calculator_test.rb +++ b/test/unit/related_tag_calculator_test.rb @@ -30,7 +30,7 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase create(:post, tag_string: "aaa bbb ccc") create(:post, tag_string: "aaa bbb") - assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_search("aaa").pluck(:name)) + assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_search("aaa")[0].pluck(:name)) end should "calculate the most frequent tags for a multiple tag search" do @@ -38,7 +38,7 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase create(:post, tag_string: "aaa bbb ccc ddd") create(:post, tag_string: "aaa eee fff") - assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_search("aaa bbb").pluck(:name)) + assert_equal(%w[aaa bbb ccc ddd], RelatedTagCalculator.frequent_tags_for_search("aaa bbb")[0].pluck(:name)) end should "calculate the most frequent tags with a category constraint" do @@ -46,8 +46,8 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase create(:post, tag_string: "aaa bbb art:ccc") create(:post, tag_string: "aaa bbb") - assert_equal(%w[aaa bbb], RelatedTagCalculator.frequent_tags_for_search("aaa", category: Tag.categories.general).pluck(:name)) - assert_equal(%w[ccc], RelatedTagCalculator.frequent_tags_for_search("aaa", category: Tag.categories.artist).pluck(:name)) + assert_equal(%w[aaa bbb], RelatedTagCalculator.frequent_tags_for_search("aaa", category: Tag.categories.general)[0].pluck(:name)) + assert_equal(%w[ccc], RelatedTagCalculator.frequent_tags_for_search("aaa", category: Tag.categories.artist)[0].pluck(:name)) end end @@ -57,9 +57,9 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase create(:post, tag_string: "1girl solo", rating: "q") create(:post, tag_string: "1girl 1boy", rating: "q") - assert_equal(%w[1girl solo 1boy], RelatedTagCalculator.similar_tags_for_search("1girl").pluck(:name)) - assert_equal(%w[1girl 1boy solo], RelatedTagCalculator.similar_tags_for_search("rating:q").pluck(:name)) - assert_equal(%w[solo 1girl], RelatedTagCalculator.similar_tags_for_search("solo").pluck(:name)) + assert_equal(%w[1girl solo 1boy], RelatedTagCalculator.similar_tags_for_search("1girl")[0].pluck(:name)) + assert_equal(%w[1girl 1boy solo], RelatedTagCalculator.similar_tags_for_search("rating:q")[0].pluck(:name)) + assert_equal(%w[solo 1girl], RelatedTagCalculator.similar_tags_for_search("solo")[0].pluck(:name)) end should "calculate the similar tags for an aliased tag" do @@ -67,7 +67,7 @@ class RelatedTagCalculatorTest < ActiveSupport::TestCase create(:post, tag_string: "bunny dog") create(:post, tag_string: "bunny cat") - assert_equal(%w[bunny cat dog], RelatedTagCalculator.similar_tags_for_search("rabbit").pluck(:name)) + assert_equal(%w[bunny cat dog], RelatedTagCalculator.similar_tags_for_search("rabbit")[0].pluck(:name)) end end end