From 91587aeb6b7deb27679b165f85b9e2b1598cb002 Mon Sep 17 00:00:00 2001 From: evazion Date: Fri, 12 Nov 2021 16:42:15 -0600 Subject: [PATCH] robots.txt: block Googlebot from crawling certain useless URLs. Block Googlebot from crawling certain slow useless URLs. Sometimes Googlebot tries to crawl old source:, approver:, and ordfav: searches in bulk, which tends to slow down the site because things like source: are inherently slow, and because Google spends hours at a time crawling them in parallel. This is despite the fact that these links are already marked as nofollow and noindex, and source: links were removed from posts a long time ago to try to stop Google from crawling them. --- app/views/robots/index.text.erb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/app/views/robots/index.text.erb b/app/views/robots/index.text.erb index 77e1a29a5..859841bc9 100644 --- a/app/views/robots/index.text.erb +++ b/app/views/robots/index.text.erb @@ -5,6 +5,9 @@ Allow: /$ <% if !Rails.env.production? || Danbooru.config.hostname == request.host %> Disallow: /*.atom Disallow: /*.json +Disallow: /posts?tags=source:* +Disallow: /posts?tags=ordfav:* +Disallow: /posts?tags=approver:* Disallow: <%= artist_urls_path %> Disallow: <%= artist_versions_path %>