From 42f0112c385ee6b47501186b8be18f1eeb238880 Mon Sep 17 00:00:00 2001 From: evazion Date: Thu, 9 Jul 2020 22:38:35 -0500 Subject: [PATCH] seo: increase sitemap coverage. Rework sitemaps to provide more coverage of the site. We want every important page on the site - including every post, tag, and wiki page - to be indexed by Google. We do this by generating sitemaps and sitemap indexes that contain links to every important page on the site. --- app/controllers/static_controller.rb | 34 ++++++++++++--- app/logical/pagination_extension.rb | 4 +- app/logical/post_sets/post.rb | 8 +++- app/models/application_record.rb | 3 +- app/views/application/index.sitemap.erb | 10 +++++ app/views/posts/index.sitemap.erb | 30 ++++++++++++++ app/views/robots/index.text.erb | 19 ++++++--- app/views/static/sitemap.xml.erb | 41 ------------------- app/views/static/sitemap_index.xml.erb | 21 ++++++++++ app/views/tags/index.sitemap.erb | 10 +++++ app/views/users/index.sitemap.erb | 9 ++++ config/initializers/mime_types.rb | 1 + config/routes.rb | 2 +- test/functional/artists_controller_test.rb | 6 +++ .../forum_topics_controller_test.rb | 6 +++ test/functional/pools_controller_test.rb | 7 ++++ test/functional/posts_controller_test.rb | 8 ++++ test/functional/static_controller_test.rb | 13 +++--- test/functional/tags_controller_test.rb | 6 +++ test/functional/users_controller_test.rb | 6 +++ test/functional/wiki_pages_controller_test.rb | 6 +++ 21 files changed, 187 insertions(+), 63 deletions(-) create mode 100644 app/views/application/index.sitemap.erb create mode 100644 app/views/posts/index.sitemap.erb delete mode 100644 app/views/static/sitemap.xml.erb create mode 100644 app/views/static/sitemap_index.xml.erb create mode 100644 app/views/tags/index.sitemap.erb create mode 100644 app/views/users/index.sitemap.erb diff --git a/app/controllers/static_controller.rb b/app/controllers/static_controller.rb index c3a992560..709be20f3 100644 --- a/app/controllers/static_controller.rb +++ b/app/controllers/static_controller.rb @@ -22,10 +22,34 @@ class StaticController < ApplicationController def site_map end - def sitemap - @reportbooru_service = ReportbooruService.new - @posts = Post.where("created_at > ?", 1.week.ago).order(score: :desc).limit(200) - @posts = @posts.select(&:visible?) - render layout: false + def sitemap_index + @sitemap = params[:sitemap] + @limit = params.fetch(:limit, 10000).to_i + + case @sitemap + when "artists" + @relation = Artist.undeleted + @search = { is_deleted: "false" } + when "forum_topics" + @relation = ForumTopic.undeleted + @search = { is_deleted: "false" } + when "pools" + @relation = Pool.undeleted + @search = { is_deleted: "false" } + when "posts" + @relation = Post.order(id: :asc) + @serach = {} + when "tags" + @relation = Tag.nonempty + @search = {} + when "users" + @relation = User.all + @search = {} + when "wiki_pages" + @relation = WikiPage.undeleted + @search = { is_deleted: "false" } + else + raise NotImplementedError + end end end diff --git a/app/logical/pagination_extension.rb b/app/logical/pagination_extension.rb index 686cdcd27..dce0f8ac5 100644 --- a/app/logical/pagination_extension.rb +++ b/app/logical/pagination_extension.rb @@ -3,9 +3,9 @@ module PaginationExtension attr_accessor :current_page, :records_per_page, :paginator_count, :paginator_mode - def paginate(page, limit: nil, count: nil, search_count: nil) + def paginate(page, limit: nil, max_limit: 1000, count: nil, search_count: nil) @records_per_page = limit || Danbooru.config.posts_per_page - @records_per_page = @records_per_page.to_i.clamp(1, 1000) + @records_per_page = @records_per_page.to_i.clamp(1, max_limit) if count.present? @paginator_count = count diff --git a/app/logical/post_sets/post.rb b/app/logical/post_sets/post.rb index c13a1f5fb..80052c527 100644 --- a/app/logical/post_sets/post.rb +++ b/app/logical/post_sets/post.rb @@ -76,7 +76,11 @@ module PostSets end def per_page - (@per_page || query.find_metatag(:limit) || CurrentUser.user.per_page).to_i.clamp(0, MAX_PER_PAGE) + (@per_page || query.find_metatag(:limit) || CurrentUser.user.per_page).to_i.clamp(0, max_per_page) + end + + def max_per_page + (format == "sitemap") ? 10_000 : MAX_PER_PAGE end def is_random? @@ -105,7 +109,7 @@ module PostSets if is_random? get_random_posts else - normalized_query.build.paginate(page, count: post_count, search_count: !post_count.nil?, limit: per_page).load + normalized_query.build.paginate(page, count: post_count, search_count: !post_count.nil?, limit: per_page, max_limit: max_per_page).load end end end diff --git a/app/models/application_record.rb b/app/models/application_record.rb index ee9fa44a0..ccd96df91 100644 --- a/app/models/application_record.rb +++ b/app/models/application_record.rb @@ -16,7 +16,8 @@ class ApplicationRecord < ActiveRecord::Base search_params = params.fetch(:search, {}).permit! search_params = defaults.merge(search_params).with_indifferent_access - search(search_params).paginate(params[:page], limit: params[:limit], search_count: count_pages) + max_limit = (params[:format] == "sitemap") ? 10_000 : 1_000 + search(search_params).paginate(params[:page], limit: params[:limit], max_limit: max_limit, search_count: count_pages) end end end diff --git a/app/views/application/index.sitemap.erb b/app/views/application/index.sitemap.erb new file mode 100644 index 000000000..a7ebe75fa --- /dev/null +++ b/app/views/application/index.sitemap.erb @@ -0,0 +1,10 @@ + + + + <% @current_item.each do |item| %> + + <%= polymorphic_url(item) %> + <%= item.updated_at.iso8601 %> + + <% end %> + diff --git a/app/views/posts/index.sitemap.erb b/app/views/posts/index.sitemap.erb new file mode 100644 index 000000000..260be89b1 --- /dev/null +++ b/app/views/posts/index.sitemap.erb @@ -0,0 +1,30 @@ + + +<%# https://support.google.com/webmasters/answer/178636 %> +<%# https://support.google.com/webmasters/answer/80471 %> + + <% @posts.each do |post| %> + + <%= post_url(post) %> + <%= post.updated_at.iso8601 %> + <% if post.visible? %> + <% if post.is_image? %> + + <%= post.file_url %> + + <% elsif post.is_video? %> + + <%= post.preview_file_url %> + <%= post.file_url %> + <%= post.created_at.iso8601 %> + <%= "Post ##{post.id}" %> + <%= post.tag_string %> + <%= post.rating == "s" ? "yes" : "no" %> + + <% end %> + <% end %> + + <% end %> + diff --git a/app/views/robots/index.text.erb b/app/views/robots/index.text.erb index 5df2f7222..57244f162 100644 --- a/app/views/robots/index.text.erb +++ b/app/views/robots/index.text.erb @@ -1,20 +1,19 @@ -Sitemap: <%= root_url %>sitemap.xml - User-agent: * Disallow: / +Allow: /$ -<% if Rails.env.production? && Danbooru.config.hostname == request.host %> +<% if !Rails.env.production? || Danbooru.config.hostname == request.host %> Disallow: /*.atom Disallow: /*.json -Disallow: /*.xml -Allow: /$ Allow: /artists Allow: /artist_commentaries Allow: /comments Allow: /explore +Allow: /favorite_groups Allow: /forum_posts Allow: /forum_topics +Allow: /iqdb_queries Allow: /login Allow: /notes Allow: /pools @@ -23,7 +22,7 @@ Allow: /sessions Allow: /static Allow: /tags Allow: /uploads -Allow: /user_upgrades +Allow: /user_upgrade Allow: /users Allow: /wiki_pages @@ -51,4 +50,12 @@ Allow: /packs Allow: /terms_of_service Allow: /privacy Allow: /sitemap.xml + +Sitemap: <%= sitemap_url(format: :xml, sitemap: "artists") %> +Sitemap: <%= sitemap_url(format: :xml, sitemap: "forum_topics") %> +Sitemap: <%= sitemap_url(format: :xml, sitemap: "pools") %> +Sitemap: <%= sitemap_url(format: :xml, sitemap: "posts") %> +Sitemap: <%= sitemap_url(format: :xml, sitemap: "tags") %> +Sitemap: <%= sitemap_url(format: :xml, sitemap: "users") %> +Sitemap: <%= sitemap_url(format: :xml, sitemap: "wiki_pages") %> <% end %> diff --git a/app/views/static/sitemap.xml.erb b/app/views/static/sitemap.xml.erb deleted file mode 100644 index dc44779c3..000000000 --- a/app/views/static/sitemap.xml.erb +++ /dev/null @@ -1,41 +0,0 @@ - - - - - <%= posts_url %> - daily - - - <%= wiki_pages_url %> - daily - - - <%= pools_url %> - daily - - - <% cache("sitemap", :expires_in => 24.hours) do %> - <% @reportbooru_service.post_search_rankings(Date.yesterday) do |tags, count| %> - - <%= posts_url(tags: tags) %> - <%= Date.today %> - - <% end %> - - <% @posts.each do |post| %> - - <%= post_url(post) %> - - - <%= post.file_url %> - - - <%= post.presenter.humanized_essential_tag_string %> - - - <%= post.created_at.to_date %> - - <% end %> - <% end %> - diff --git a/app/views/static/sitemap_index.xml.erb b/app/views/static/sitemap_index.xml.erb new file mode 100644 index 000000000..d779d41f7 --- /dev/null +++ b/app/views/static/sitemap_index.xml.erb @@ -0,0 +1,21 @@ + + + + <% 0.upto(@relation.maximum(:id) / @limit) do |page| %> + <% lo = page * @limit %> + <% hi = (page + 1) * @limit %> + <% lastmod = @relation.where(id: lo..hi).maximum(:updated_at).iso8601 %> + <% if @sitemap == "posts" %> + <% loc = posts_url(limit: @limit, format: :sitemap, tags: "id:#{lo}..#{hi}") %> + <% else %> + <% loc = polymorphic_url(@relation.klass, limit: @limit, format: :sitemap, search: { id: "#{lo}..#{hi}", **@search }) %> + <% end %> + + <% if lastmod.present? %> + + <%= loc %> + <%= lastmod %> + + <% end %> + <% end %> + diff --git a/app/views/tags/index.sitemap.erb b/app/views/tags/index.sitemap.erb new file mode 100644 index 000000000..c57765581 --- /dev/null +++ b/app/views/tags/index.sitemap.erb @@ -0,0 +1,10 @@ + + + + <% @tags.each do |tag| %> + + <%= posts_url(tags: tag.name) %> + <%= tag.updated_at.iso8601 %> + + <% end %> + diff --git a/app/views/users/index.sitemap.erb b/app/views/users/index.sitemap.erb new file mode 100644 index 000000000..9a9134097 --- /dev/null +++ b/app/views/users/index.sitemap.erb @@ -0,0 +1,9 @@ + + + + <% @users.each do |user| %> + + <%= user_url(user) %> + + <% end %> + diff --git a/config/initializers/mime_types.rb b/config/initializers/mime_types.rb index dc1899682..35490377a 100644 --- a/config/initializers/mime_types.rb +++ b/config/initializers/mime_types.rb @@ -2,3 +2,4 @@ # Add new mime types for use in respond_to blocks: # Mime::Type.register "text/richtext", :rtf +Mime::Type.register_alias "application/xml", :sitemap diff --git a/config/routes.rb b/config/routes.rb index b9ddd2bc5..38b8a87e7 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -365,7 +365,7 @@ Rails.application.routes.draw do get "/wiki/recent_changes" => redirect {|params, req| "/wiki_page_versions?search[updater_id]=#{req.params[:user_id]}"} get "/wiki/history/:title" => redirect("/wiki_page_versions?title=%{title}") - get "/sitemap" => "static#sitemap" + get "/sitemap" => "static#sitemap_index" get "/opensearch" => "static#opensearch", :as => "opensearch" get "/privacy" => "static#privacy_policy", :as => "privacy_policy" get "/terms_of_service" => "static#terms_of_service", :as => "terms_of_service" diff --git a/test/functional/artists_controller_test.rb b/test/functional/artists_controller_test.rb index 2ae68d78e..076d8fd41 100644 --- a/test/functional/artists_controller_test.rb +++ b/test/functional/artists_controller_test.rb @@ -135,6 +135,12 @@ class ArtistsControllerTest < ActionDispatch::IntegrationTest assert_response :success end + should "get the sitemap" do + get artists_path(format: :sitemap) + assert_response :success + assert_equal(Artist.count, response.parsed_body.css("urlset url loc").size) + end + context "when searching the index page" do should "find artists by name" do get artists_path(name: "masao", format: "json") diff --git a/test/functional/forum_topics_controller_test.rb b/test/functional/forum_topics_controller_test.rb index e8368ff08..4b3a0e2e4 100644 --- a/test/functional/forum_topics_controller_test.rb +++ b/test/functional/forum_topics_controller_test.rb @@ -114,6 +114,12 @@ class ForumTopicsControllerTest < ActionDispatch::IntegrationTest assert_response :success end + should "render for a sitemap" do + get forum_topics_path(format: :sitemap) + assert_response :success + assert_equal(ForumTopic.count, response.parsed_body.css("urlset url loc").size) + end + context "with private topics" do should "not show private topics to unprivileged users" do as(@user) { @topic2.update!(min_level: User::Levels::MODERATOR) } diff --git a/test/functional/pools_controller_test.rb b/test/functional/pools_controller_test.rb index abf1efe83..35b9e3fff 100644 --- a/test/functional/pools_controller_test.rb +++ b/test/functional/pools_controller_test.rb @@ -23,6 +23,13 @@ class PoolsControllerTest < ActionDispatch::IntegrationTest get pools_path, params: {:search => {:name_matches => @pool.name}} assert_response :success end + + should "render for a sitemap" do + get pools_path(format: :sitemap) + assert_response :success + assert_equal(Pool.count, response.parsed_body.css("urlset url loc").size) + end + end context "show action" do diff --git a/test/functional/posts_controller_test.rb b/test/functional/posts_controller_test.rb index 47f831550..67b03454f 100644 --- a/test/functional/posts_controller_test.rb +++ b/test/functional/posts_controller_test.rb @@ -337,6 +337,14 @@ class PostsControllerTest < ActionDispatch::IntegrationTest end end + context "with the .sitemap format" do + should "render" do + get posts_path(format: :sitemap) + assert_response :success + assert_equal(Post.count, response.parsed_body.css("urlset url loc").size) + end + end + context "with deleted posts" do setup do @post.update!(is_deleted: true) diff --git a/test/functional/static_controller_test.rb b/test/functional/static_controller_test.rb index 47b4f9e13..faca87b71 100644 --- a/test/functional/static_controller_test.rb +++ b/test/functional/static_controller_test.rb @@ -14,11 +14,14 @@ class StaticControllerTest < ActionDispatch::IntegrationTest end context "sitemap action" do - should "work" do - create_list(:post, 3) - mock_post_search_rankings(Time.zone.yesterday, [["1girl", 100.0], ["2girls", 50.0]]) - get sitemap_path, as: :xml - assert_response :success + [Artist, ForumTopic, Pool, Post, Tag, User, WikiPage].each do |klass| + should "work for #{klass.model_name.plural}" do + as(create(:user)) { create_list(klass.model_name.singular.to_sym, 3) } + get sitemap_path(sitemap: klass.model_name.plural), as: :xml + + assert_response :success + assert_equal(1, response.parsed_body.css("sitemap loc").size) + end end end diff --git a/test/functional/tags_controller_test.rb b/test/functional/tags_controller_test.rb index 5f0ab23a3..91eea376d 100644 --- a/test/functional/tags_controller_test.rb +++ b/test/functional/tags_controller_test.rb @@ -20,6 +20,12 @@ class TagsControllerTest < ActionDispatch::IntegrationTest assert_response :success end + should "render for a sitemap" do + get tags_path(format: :sitemap) + assert_response :success + assert_equal(Tag.count, response.parsed_body.css("urlset url loc").size) + end + context "with blank search parameters" do should "strip the blank parameters with a redirect" do get tags_path, params: { search: { name: "touhou", category: "" } } diff --git a/test/functional/users_controller_test.rb b/test/functional/users_controller_test.rb index e0454e074..54bc8716b 100644 --- a/test/functional/users_controller_test.rb +++ b/test/functional/users_controller_test.rb @@ -12,6 +12,12 @@ class UsersControllerTest < ActionDispatch::IntegrationTest assert_response :success end + should "render for a sitemap" do + get users_path(format: :sitemap) + assert_response :success + assert_equal(User.count, response.parsed_body.css("urlset url loc").size) + end + should "list all users for /users?name=" do get users_path, params: { name: @user.name } assert_redirected_to(@user) diff --git a/test/functional/wiki_pages_controller_test.rb b/test/functional/wiki_pages_controller_test.rb index 2ea1619c6..713b28e1e 100644 --- a/test/functional/wiki_pages_controller_test.rb +++ b/test/functional/wiki_pages_controller_test.rb @@ -23,6 +23,12 @@ class WikiPagesControllerTest < ActionDispatch::IntegrationTest assert_response :success end + should "render for a sitemap" do + get wiki_pages_path(format: :sitemap) + assert_response :success + assert_equal(WikiPage.count, response.parsed_body.css("urlset url loc").size) + end + should "redirect the legacy title param to the show page" do get wiki_pages_path(title: "tagme") assert_redirected_to wiki_pages_path(search: { title_normalize: "tagme" }, redirect: true)