From 41c6c882c2296ba40824248621b284fd982c38d2 Mon Sep 17 00:00:00 2001 From: evazion Date: Thu, 7 May 2020 20:20:12 -0500 Subject: [PATCH] search: refactor fast_count to return nil on timeout. * Refactor fast_count to return nil instead of 1,000,000 if the exact count times out. * Remove the estimate_post_counts and blank_tag_search_fast_count global config options. * Replace the hardcoded post count estimates inside fast_count with a method that parses Postgres's estimated row count from EXPLAIN. * /counts/posts.json: ** Remove the `raise_on_timeout` parameter. ** Add an `estimate_count=` parameter. ** Return null instead of 1,000,000 if the exact count times out. --- app/controllers/counts_controller.rb | 4 +- app/logical/explain_parser.rb | 15 ++++ app/logical/post_query_builder.rb | 89 ++++++++--------------- app/logical/post_sets/post.rb | 8 +- app/views/counts/posts.html.erb | 2 +- config/danbooru_default_config.rb | 12 --- test/functional/counts_controller_test.rb | 6 -- test/unit/post_query_builder_test.rb | 45 ++++-------- test/unit/post_test.rb | 4 - 9 files changed, 64 insertions(+), 121 deletions(-) create mode 100644 app/logical/explain_parser.rb diff --git a/app/controllers/counts_controller.rb b/app/controllers/counts_controller.rb index 3a2801ca0..556d4a028 100644 --- a/app/controllers/counts_controller.rb +++ b/app/controllers/counts_controller.rb @@ -2,6 +2,8 @@ class CountsController < ApplicationController respond_to :xml, :json def posts - @count = PostQueryBuilder.new(params[:tags], CurrentUser.user).fast_count(timeout: CurrentUser.statement_timeout, raise_on_timeout: true, skip_cache: params[:skip_cache]) + estimate_count = params.fetch(:estimate_count, "true").truthy? + skip_cache = params.fetch(:skip_cache, "false").truthy? + @count = PostQueryBuilder.new(params[:tags], CurrentUser.user).fast_count(timeout: CurrentUser.statement_timeout, estimate_count: estimate_count, skip_cache: skip_cache) end end diff --git a/app/logical/explain_parser.rb b/app/logical/explain_parser.rb new file mode 100644 index 000000000..c9346b456 --- /dev/null +++ b/app/logical/explain_parser.rb @@ -0,0 +1,15 @@ +class ExplainParser < Struct.new(:sql) + extend Memoist + + def query_plan + result = ApplicationRecord.connection.select_one("EXPLAIN (FORMAT JSON) #{sql}") + json = JSON.parse(result["QUERY PLAN"]) + json.first["Plan"] + end + + def row_count + query_plan["Plan Rows"] + end + + memoize :query_plan +end diff --git a/app/logical/post_query_builder.rb b/app/logical/post_query_builder.rb index c79766e30..6c4556c3c 100644 --- a/app/logical/post_query_builder.rb +++ b/app/logical/post_query_builder.rb @@ -825,81 +825,50 @@ class PostQueryBuilder end concerning :CountMethods do - def fast_count(timeout: 1_000, raise_on_timeout: false, skip_cache: false) - tags = to_s - - # Optimize some cases. these are just estimates but at these - # quantities being off by a few hundred doesn't matter much - if Danbooru.config.estimate_post_counts - if tags == "" - return (Post.maximum(:id).to_i * (2200402.0 / 2232212)).floor - - elsif tags =~ /^rating:s(?:afe)?$/ - return (Post.maximum(:id).to_i * (1648652.0 / 2200402)).floor - - elsif tags =~ /^rating:q(?:uestionable)?$/ - return (Post.maximum(:id).to_i * (350101.0 / 2200402)).floor - - elsif tags =~ /^rating:e(?:xplicit)?$/ - return (Post.maximum(:id).to_i * (201650.0 / 2200402)).floor - - end - end - + def fast_count(timeout: 1_000, estimate_count: true, skip_cache: false) count = nil - - unless skip_cache - count = get_count_from_cache - end - - if count.nil? - count = fast_count_search(timeout: timeout, raise_on_timeout: raise_on_timeout) - end - + count = estimated_count if estimate_count + count = cached_count if count.nil? && !skip_cache + count = exact_count(timeout) if count.nil? count - rescue Post::SearchError - 0 end - def fast_count_search(timeout:, raise_on_timeout:) + def estimated_count + if is_empty_search? + estimated_row_count + elsif is_simple_tag? + Tag.find_by(name: tags.first.name).try(:post_count) + elsif is_metatag?(:rating) + estimated_row_count + end + end + + def estimated_row_count + ExplainParser.new(build.to_sql).row_count + end + + def cached_count + Cache.get(count_cache_key) + end + + def exact_count(timeout) count = Post.with_timeout(timeout, nil) do build.count end - if count.nil? - # give up - if raise_on_timeout - raise TimeoutError.new("timed out") - end - - count = Danbooru.config.blank_tag_search_fast_count - else - set_count_in_cache(count) - end - - count ? count.to_i : nil - rescue PG::ConnectionBad - return nil + set_cached_count(count) if count.present? + count + rescue Post::SearchError + nil end - def get_count_from_cache - if is_simple_tag? - count = Tag.find_by(name: tags.first.name).try(:post_count) - else - # this will only have a value for multi-tag searches or single metatag searches - count = Cache.get(count_cache_key) - end - - count.try(:to_i) - end - - def set_count_in_cache(count) + def set_cached_count(count) expiry = count.seconds.clamp(3.minutes, 20.hours).to_i Cache.put(count_cache_key, count, expiry) end def count_cache_key - "pfc:#{Cache.hash(to_s)}" + "pfc:#{to_s}" end end diff --git a/app/logical/post_sets/post.rb b/app/logical/post_sets/post.rb index 3f25df422..eff41c0cd 100644 --- a/app/logical/post_sets/post.rb +++ b/app/logical/post_sets/post.rb @@ -84,7 +84,7 @@ module PostSets def get_post_count if %w(json atom xml).include?(format.downcase) # no need to get counts for formats that don't use a paginator - return Danbooru.config.blank_tag_search_fast_count + nil else query.fast_count end @@ -103,15 +103,11 @@ module PostSets if is_random? temp = get_random_posts else - temp = query.build.paginate(page, count: post_count, limit: per_page) + temp = query.build.paginate(page, count: post_count, search_count: !post_count.nil?, limit: per_page) end end end - def unknown_post_count? - post_count == Danbooru.config.blank_tag_search_fast_count - end - def hide_from_crawler? return true if current_page > 1 return false if query.is_empty_search? || query.is_simple_tag? || query.is_metatag?(:order, :rank) diff --git a/app/views/counts/posts.html.erb b/app/views/counts/posts.html.erb index 8b1199c91..856e5bb95 100644 --- a/app/views/counts/posts.html.erb +++ b/app/views/counts/posts.html.erb @@ -4,7 +4,7 @@
Post count for <%= link_to params[:tags].present? ? params[:tags] : "/posts", posts_path(:tags => params[:tags]) %>: - <% if @count == (Danbooru.config.blank_tag_search_fast_count || 1_000_000) %> + <% if @count.nil? %> timed out <% else %> <%= @count %> diff --git a/config/danbooru_default_config.rb b/config/danbooru_default_config.rb index 0762e1edc..6a0b7f030 100644 --- a/config/danbooru_default_config.rb +++ b/config/danbooru_default_config.rb @@ -299,13 +299,6 @@ module Danbooru [] end - # Counting every post is typically expensive because it involves a sequential scan on - # potentially millions of rows. If this method returns a value, then blank searches - # will return that number for the fast_count call instead. - def blank_tag_search_fast_count - nil - end - # DeviantArt login cookies. Login to DeviantArt and extract these from the browser. # https://github.com/danbooru/danbooru/issues/4219 def deviantart_cookies @@ -440,11 +433,6 @@ module Danbooru false end - # enable some (donmai-specific) optimizations for post counts - def estimate_post_counts - false - end - # Enables recording of popular searches, missed searches, and post view # counts. Requires Reportbooru to be configured and running - see below. def enable_post_search_counts diff --git a/test/functional/counts_controller_test.rb b/test/functional/counts_controller_test.rb index 5aae7a945..24c6dad03 100644 --- a/test/functional/counts_controller_test.rb +++ b/test/functional/counts_controller_test.rb @@ -7,12 +7,6 @@ class CountsControllerTest < ActionDispatch::IntegrationTest get posts_counts_path assert_response :success end - - should "render an error during a timeout" do - PostQueryBuilder.any_instance.stubs(:fast_count).raises(Post::TimeoutError.new) - get posts_counts_path - assert_response :error - end end end end diff --git a/test/unit/post_query_builder_test.rb b/test/unit/post_query_builder_test.rb index 73e73f5e3..8878c3afb 100644 --- a/test/unit/post_query_builder_test.rb +++ b/test/unit/post_query_builder_test.rb @@ -5,8 +5,8 @@ class PostQueryBuilderTest < ActiveSupport::TestCase assert_equal(posts.map(&:id), Post.user_tag_match(query).pluck(:id)) end - def assert_fast_count(count, query, **options) - assert_equal(count, PostQueryBuilder.new(query, **options).fast_count) + def assert_fast_count(count, query, query_options = {}, fast_count_options = {}) + assert_equal(count, PostQueryBuilder.new(query, **query_options).fast_count(**fast_count_options)) end setup do @@ -1071,8 +1071,6 @@ class PostQueryBuilderTest < ActiveSupport::TestCase context "#fast_count" do setup do - Danbooru.config.stubs(:blank_tag_search_fast_count).returns(nil) - Danbooru.config.stubs(:estimate_post_counts).returns(false) create(:tag, name: "grey_skirt", post_count: 100) create(:tag_alias, antecedent_name: "gray_skirt", consequent_name: "grey_skirt") create(:post, tag_string: "aaa", score: 42) @@ -1093,20 +1091,20 @@ class PostQueryBuilderTest < ActiveSupport::TestCase context "for a single metatag" do should "return the correct cached count" do build(:tag, name: "score:42", post_count: -100).save(validate: false) - PostQueryBuilder.new("score:42").set_count_in_cache(100) + PostQueryBuilder.new("score:42").set_cached_count(100) assert_fast_count(100, "score:42") end should "return the correct cached count for a pool: search" do build(:tag, name: "pool:1234", post_count: -100).save(validate: false) - PostQueryBuilder.new("pool:1234").set_count_in_cache(100) + PostQueryBuilder.new("pool:1234").set_cached_count(100) assert_fast_count(100, "pool:1234") end end context "for a multi-tag search" do should "return the cached count, if it exists" do - PostQueryBuilder.new("score:42 aaa").set_count_in_cache(100) + PostQueryBuilder.new("score:42 aaa").set_cached_count(100) assert_fast_count(100, "aaa score:42") end @@ -1121,21 +1119,15 @@ class PostQueryBuilderTest < ActiveSupport::TestCase should "work with the hide_deleted_posts option turned on" do create(:post, tag_string: "aaa", score: 42, is_deleted: true) - assert_fast_count(1, "aaa score:42", hide_deleted_posts: true) - assert_fast_count(2, "aaa score:42", hide_deleted_posts: false) + assert_fast_count(1, "aaa score:42", { hide_deleted_posts: true }) + assert_fast_count(2, "aaa score:42", { hide_deleted_posts: false }) end end context "a blank search" do should "should execute a search" do - assert_fast_count(1, "") - end - - context "with a primed cache" do - should "fetch the value from the cache" do - PostQueryBuilder.new("").set_count_in_cache(100) - assert_fast_count(100, "") - end + assert_equal(1, PostQueryBuilder.new("").fast_count(estimate_count: false)) + assert_nothing_raised { PostQueryBuilder.new("").fast_count(estimate_count: true) } end should "return 0 for a nonexisting tag" do @@ -1143,30 +1135,21 @@ class PostQueryBuilderTest < ActiveSupport::TestCase end context "in safe mode" do - setup do - create(:post, rating: "s") - end - should "work for a blank search" do - assert_fast_count(1, "", safe_mode: true) + assert_equal(2, PostQueryBuilder.new("").fast_count(estimate_count: false)) + assert_nothing_raised { PostQueryBuilder.new("").fast_count(estimate_count: true) } end should "work for a nil search" do - assert_fast_count(1, nil, safe_mode: true) + assert_equal(2, PostQueryBuilder.new(nil).fast_count(estimate_count: false)) + assert_nothing_raised { PostQueryBuilder.new(nil).fast_count(estimate_count: true) } end should "not fail for a two tag search by a member" do post1 = create(:post, tag_string: "aaa bbb rating:s") post2 = create(:post, tag_string: "aaa bbb rating:e") - assert_fast_count(1, "aaa bbb", safe_mode: true) - end - - context "with a primed cache" do - should "fetch the value from the cache" do - PostQueryBuilder.new("rating:s").set_count_in_cache(100) - assert_fast_count(100, "", safe_mode: true) - end + assert_fast_count(1, "aaa bbb", { safe_mode: true }) end end end diff --git a/test/unit/post_test.rb b/test/unit/post_test.rb index b9e4da568..555509e4e 100644 --- a/test/unit/post_test.rb +++ b/test/unit/post_test.rb @@ -136,10 +136,6 @@ class PostTest < ActiveSupport::TestCase end context "Deleting a post" do - setup do - Danbooru.config.stubs(:blank_tag_search_fast_count).returns(nil) - end - context "that is status locked" do setup do @post = FactoryBot.create(:post, is_status_locked: true)