search: refactor fast_count to return nil on timeout.

* Refactor fast_count to return nil instead of 1,000,000 if the exact count times out.
* Remove the estimate_post_counts and blank_tag_search_fast_count global config options.
* Replace the hardcoded post count estimates inside fast_count with a
  method that parses Postgres's estimated row count from EXPLAIN.

* /counts/posts.json:
** Remove the `raise_on_timeout` parameter.
** Add an `estimate_count=<true|false>` parameter.
** Return null instead of 1,000,000 if the exact count times out.
This commit is contained in:
evazion
2020-05-07 20:20:12 -05:00
parent d3e4ac7c17
commit 41c6c882c2
9 changed files with 64 additions and 121 deletions

View File

@@ -2,6 +2,8 @@ class CountsController < ApplicationController
respond_to :xml, :json
def posts
@count = PostQueryBuilder.new(params[:tags], CurrentUser.user).fast_count(timeout: CurrentUser.statement_timeout, raise_on_timeout: true, skip_cache: params[:skip_cache])
estimate_count = params.fetch(:estimate_count, "true").truthy?
skip_cache = params.fetch(:skip_cache, "false").truthy?
@count = PostQueryBuilder.new(params[:tags], CurrentUser.user).fast_count(timeout: CurrentUser.statement_timeout, estimate_count: estimate_count, skip_cache: skip_cache)
end
end

View File

@@ -0,0 +1,15 @@
class ExplainParser < Struct.new(:sql)
extend Memoist
def query_plan
result = ApplicationRecord.connection.select_one("EXPLAIN (FORMAT JSON) #{sql}")
json = JSON.parse(result["QUERY PLAN"])
json.first["Plan"]
end
def row_count
query_plan["Plan Rows"]
end
memoize :query_plan
end

View File

@@ -825,81 +825,50 @@ class PostQueryBuilder
end
concerning :CountMethods do
def fast_count(timeout: 1_000, raise_on_timeout: false, skip_cache: false)
tags = to_s
# Optimize some cases. these are just estimates but at these
# quantities being off by a few hundred doesn't matter much
if Danbooru.config.estimate_post_counts
if tags == ""
return (Post.maximum(:id).to_i * (2200402.0 / 2232212)).floor
elsif tags =~ /^rating:s(?:afe)?$/
return (Post.maximum(:id).to_i * (1648652.0 / 2200402)).floor
elsif tags =~ /^rating:q(?:uestionable)?$/
return (Post.maximum(:id).to_i * (350101.0 / 2200402)).floor
elsif tags =~ /^rating:e(?:xplicit)?$/
return (Post.maximum(:id).to_i * (201650.0 / 2200402)).floor
end
end
def fast_count(timeout: 1_000, estimate_count: true, skip_cache: false)
count = nil
unless skip_cache
count = get_count_from_cache
end
if count.nil?
count = fast_count_search(timeout: timeout, raise_on_timeout: raise_on_timeout)
end
count = estimated_count if estimate_count
count = cached_count if count.nil? && !skip_cache
count = exact_count(timeout) if count.nil?
count
rescue Post::SearchError
0
end
def fast_count_search(timeout:, raise_on_timeout:)
def estimated_count
if is_empty_search?
estimated_row_count
elsif is_simple_tag?
Tag.find_by(name: tags.first.name).try(:post_count)
elsif is_metatag?(:rating)
estimated_row_count
end
end
def estimated_row_count
ExplainParser.new(build.to_sql).row_count
end
def cached_count
Cache.get(count_cache_key)
end
def exact_count(timeout)
count = Post.with_timeout(timeout, nil) do
build.count
end
if count.nil?
# give up
if raise_on_timeout
raise TimeoutError.new("timed out")
end
count = Danbooru.config.blank_tag_search_fast_count
else
set_count_in_cache(count)
end
count ? count.to_i : nil
rescue PG::ConnectionBad
return nil
set_cached_count(count) if count.present?
count
rescue Post::SearchError
nil
end
def get_count_from_cache
if is_simple_tag?
count = Tag.find_by(name: tags.first.name).try(:post_count)
else
# this will only have a value for multi-tag searches or single metatag searches
count = Cache.get(count_cache_key)
end
count.try(:to_i)
end
def set_count_in_cache(count)
def set_cached_count(count)
expiry = count.seconds.clamp(3.minutes, 20.hours).to_i
Cache.put(count_cache_key, count, expiry)
end
def count_cache_key
"pfc:#{Cache.hash(to_s)}"
"pfc:#{to_s}"
end
end

View File

@@ -84,7 +84,7 @@ module PostSets
def get_post_count
if %w(json atom xml).include?(format.downcase)
# no need to get counts for formats that don't use a paginator
return Danbooru.config.blank_tag_search_fast_count
nil
else
query.fast_count
end
@@ -103,15 +103,11 @@ module PostSets
if is_random?
temp = get_random_posts
else
temp = query.build.paginate(page, count: post_count, limit: per_page)
temp = query.build.paginate(page, count: post_count, search_count: !post_count.nil?, limit: per_page)
end
end
end
def unknown_post_count?
post_count == Danbooru.config.blank_tag_search_fast_count
end
def hide_from_crawler?
return true if current_page > 1
return false if query.is_empty_search? || query.is_simple_tag? || query.is_metatag?(:order, :rank)

View File

@@ -4,7 +4,7 @@
<div id="c-counts">
<div id="a-posts">
Post count for <%= link_to params[:tags].present? ? params[:tags] : "/posts", posts_path(:tags => params[:tags]) %>:
<% if @count == (Danbooru.config.blank_tag_search_fast_count || 1_000_000) %>
<% if @count.nil? %>
timed out
<% else %>
<%= @count %>

View File

@@ -299,13 +299,6 @@ module Danbooru
[]
end
# Counting every post is typically expensive because it involves a sequential scan on
# potentially millions of rows. If this method returns a value, then blank searches
# will return that number for the fast_count call instead.
def blank_tag_search_fast_count
nil
end
# DeviantArt login cookies. Login to DeviantArt and extract these from the browser.
# https://github.com/danbooru/danbooru/issues/4219
def deviantart_cookies
@@ -440,11 +433,6 @@ module Danbooru
false
end
# enable some (donmai-specific) optimizations for post counts
def estimate_post_counts
false
end
# Enables recording of popular searches, missed searches, and post view
# counts. Requires Reportbooru to be configured and running - see below.
def enable_post_search_counts

View File

@@ -7,12 +7,6 @@ class CountsControllerTest < ActionDispatch::IntegrationTest
get posts_counts_path
assert_response :success
end
should "render an error during a timeout" do
PostQueryBuilder.any_instance.stubs(:fast_count).raises(Post::TimeoutError.new)
get posts_counts_path
assert_response :error
end
end
end
end

View File

@@ -5,8 +5,8 @@ class PostQueryBuilderTest < ActiveSupport::TestCase
assert_equal(posts.map(&:id), Post.user_tag_match(query).pluck(:id))
end
def assert_fast_count(count, query, **options)
assert_equal(count, PostQueryBuilder.new(query, **options).fast_count)
def assert_fast_count(count, query, query_options = {}, fast_count_options = {})
assert_equal(count, PostQueryBuilder.new(query, **query_options).fast_count(**fast_count_options))
end
setup do
@@ -1071,8 +1071,6 @@ class PostQueryBuilderTest < ActiveSupport::TestCase
context "#fast_count" do
setup do
Danbooru.config.stubs(:blank_tag_search_fast_count).returns(nil)
Danbooru.config.stubs(:estimate_post_counts).returns(false)
create(:tag, name: "grey_skirt", post_count: 100)
create(:tag_alias, antecedent_name: "gray_skirt", consequent_name: "grey_skirt")
create(:post, tag_string: "aaa", score: 42)
@@ -1093,20 +1091,20 @@ class PostQueryBuilderTest < ActiveSupport::TestCase
context "for a single metatag" do
should "return the correct cached count" do
build(:tag, name: "score:42", post_count: -100).save(validate: false)
PostQueryBuilder.new("score:42").set_count_in_cache(100)
PostQueryBuilder.new("score:42").set_cached_count(100)
assert_fast_count(100, "score:42")
end
should "return the correct cached count for a pool:<id> search" do
build(:tag, name: "pool:1234", post_count: -100).save(validate: false)
PostQueryBuilder.new("pool:1234").set_count_in_cache(100)
PostQueryBuilder.new("pool:1234").set_cached_count(100)
assert_fast_count(100, "pool:1234")
end
end
context "for a multi-tag search" do
should "return the cached count, if it exists" do
PostQueryBuilder.new("score:42 aaa").set_count_in_cache(100)
PostQueryBuilder.new("score:42 aaa").set_cached_count(100)
assert_fast_count(100, "aaa score:42")
end
@@ -1121,21 +1119,15 @@ class PostQueryBuilderTest < ActiveSupport::TestCase
should "work with the hide_deleted_posts option turned on" do
create(:post, tag_string: "aaa", score: 42, is_deleted: true)
assert_fast_count(1, "aaa score:42", hide_deleted_posts: true)
assert_fast_count(2, "aaa score:42", hide_deleted_posts: false)
assert_fast_count(1, "aaa score:42", { hide_deleted_posts: true })
assert_fast_count(2, "aaa score:42", { hide_deleted_posts: false })
end
end
context "a blank search" do
should "should execute a search" do
assert_fast_count(1, "")
end
context "with a primed cache" do
should "fetch the value from the cache" do
PostQueryBuilder.new("").set_count_in_cache(100)
assert_fast_count(100, "")
end
assert_equal(1, PostQueryBuilder.new("").fast_count(estimate_count: false))
assert_nothing_raised { PostQueryBuilder.new("").fast_count(estimate_count: true) }
end
should "return 0 for a nonexisting tag" do
@@ -1143,30 +1135,21 @@ class PostQueryBuilderTest < ActiveSupport::TestCase
end
context "in safe mode" do
setup do
create(:post, rating: "s")
end
should "work for a blank search" do
assert_fast_count(1, "", safe_mode: true)
assert_equal(2, PostQueryBuilder.new("").fast_count(estimate_count: false))
assert_nothing_raised { PostQueryBuilder.new("").fast_count(estimate_count: true) }
end
should "work for a nil search" do
assert_fast_count(1, nil, safe_mode: true)
assert_equal(2, PostQueryBuilder.new(nil).fast_count(estimate_count: false))
assert_nothing_raised { PostQueryBuilder.new(nil).fast_count(estimate_count: true) }
end
should "not fail for a two tag search by a member" do
post1 = create(:post, tag_string: "aaa bbb rating:s")
post2 = create(:post, tag_string: "aaa bbb rating:e")
assert_fast_count(1, "aaa bbb", safe_mode: true)
end
context "with a primed cache" do
should "fetch the value from the cache" do
PostQueryBuilder.new("rating:s").set_count_in_cache(100)
assert_fast_count(100, "", safe_mode: true)
end
assert_fast_count(1, "aaa bbb", { safe_mode: true })
end
end
end

View File

@@ -136,10 +136,6 @@ class PostTest < ActiveSupport::TestCase
end
context "Deleting a post" do
setup do
Danbooru.config.stubs(:blank_tag_search_fast_count).returns(nil)
end
context "that is status locked" do
setup do
@post = FactoryBot.create(:post, is_status_locked: true)