seo: increase sitemap coverage.

Rework sitemaps to provide more coverage of the site. We want every
important page on the site - including every post, tag, and wiki page -
to be indexed by Google. We do this by generating sitemaps and sitemap
indexes that contain links to every important page on the site.
This commit is contained in:
evazion
2020-07-09 22:38:35 -05:00
parent d88a2a674f
commit 42f0112c38
21 changed files with 187 additions and 63 deletions

View File

@@ -22,10 +22,34 @@ class StaticController < ApplicationController
def site_map
end
def sitemap
@reportbooru_service = ReportbooruService.new
@posts = Post.where("created_at > ?", 1.week.ago).order(score: :desc).limit(200)
@posts = @posts.select(&:visible?)
render layout: false
def sitemap_index
@sitemap = params[:sitemap]
@limit = params.fetch(:limit, 10000).to_i
case @sitemap
when "artists"
@relation = Artist.undeleted
@search = { is_deleted: "false" }
when "forum_topics"
@relation = ForumTopic.undeleted
@search = { is_deleted: "false" }
when "pools"
@relation = Pool.undeleted
@search = { is_deleted: "false" }
when "posts"
@relation = Post.order(id: :asc)
@serach = {}
when "tags"
@relation = Tag.nonempty
@search = {}
when "users"
@relation = User.all
@search = {}
when "wiki_pages"
@relation = WikiPage.undeleted
@search = { is_deleted: "false" }
else
raise NotImplementedError
end
end
end

View File

@@ -3,9 +3,9 @@ module PaginationExtension
attr_accessor :current_page, :records_per_page, :paginator_count, :paginator_mode
def paginate(page, limit: nil, count: nil, search_count: nil)
def paginate(page, limit: nil, max_limit: 1000, count: nil, search_count: nil)
@records_per_page = limit || Danbooru.config.posts_per_page
@records_per_page = @records_per_page.to_i.clamp(1, 1000)
@records_per_page = @records_per_page.to_i.clamp(1, max_limit)
if count.present?
@paginator_count = count

View File

@@ -76,7 +76,11 @@ module PostSets
end
def per_page
(@per_page || query.find_metatag(:limit) || CurrentUser.user.per_page).to_i.clamp(0, MAX_PER_PAGE)
(@per_page || query.find_metatag(:limit) || CurrentUser.user.per_page).to_i.clamp(0, max_per_page)
end
def max_per_page
(format == "sitemap") ? 10_000 : MAX_PER_PAGE
end
def is_random?
@@ -105,7 +109,7 @@ module PostSets
if is_random?
get_random_posts
else
normalized_query.build.paginate(page, count: post_count, search_count: !post_count.nil?, limit: per_page).load
normalized_query.build.paginate(page, count: post_count, search_count: !post_count.nil?, limit: per_page, max_limit: max_per_page).load
end
end
end

View File

@@ -16,7 +16,8 @@ class ApplicationRecord < ActiveRecord::Base
search_params = params.fetch(:search, {}).permit!
search_params = defaults.merge(search_params).with_indifferent_access
search(search_params).paginate(params[:page], limit: params[:limit], search_count: count_pages)
max_limit = (params[:format] == "sitemap") ? 10_000 : 1_000
search(search_params).paginate(params[:page], limit: params[:limit], max_limit: max_limit, search_count: count_pages)
end
end
end

View File

@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<% @current_item.each do |item| %>
<url>
<loc><%= polymorphic_url(item) %></loc>
<lastmod><%= item.updated_at.iso8601 %></lastmod>
</url>
<% end %>
</urlset>

View File

@@ -0,0 +1,30 @@
<?xml version="1.0" encoding="UTF-8"?>
<%# https://support.google.com/webmasters/answer/178636 %>
<%# https://support.google.com/webmasters/answer/80471 %>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:image="http://www.google.com/schemas/sitemap-image/1.1"
xmlns:video="http://www.google.com/schemas/sitemap-video/1.1">
<% @posts.each do |post| %>
<url>
<loc><%= post_url(post) %></loc>
<lastmod><%= post.updated_at.iso8601 %></lastmod>
<% if post.visible? %>
<% if post.is_image? %>
<image:image>
<image:loc><%= post.file_url %></image:loc>
</image:image>
<% elsif post.is_video? %>
<video:video>
<video:thumbnail_loc><%= post.preview_file_url %></video:thumbnail_loc>
<video:content_loc><%= post.file_url %></video:content_loc>
<video:publication_date><%= post.created_at.iso8601 %></video:publication_date>
<video:title><%= "Post ##{post.id}" %></video:title>
<video:description><%= post.tag_string %></video:description>
<video:family_friendly><%= post.rating == "s" ? "yes" : "no" %></video:family_friendly>
</video:video>
<% end %>
<% end %>
</url>
<% end %>
</urlset>

View File

@@ -1,20 +1,19 @@
Sitemap: <%= root_url %>sitemap.xml
User-agent: *
Disallow: /
Allow: /$
<% if Rails.env.production? && Danbooru.config.hostname == request.host %>
<% if !Rails.env.production? || Danbooru.config.hostname == request.host %>
Disallow: /*.atom
Disallow: /*.json
Disallow: /*.xml
Allow: /$
Allow: /artists
Allow: /artist_commentaries
Allow: /comments
Allow: /explore
Allow: /favorite_groups
Allow: /forum_posts
Allow: /forum_topics
Allow: /iqdb_queries
Allow: /login
Allow: /notes
Allow: /pools
@@ -23,7 +22,7 @@ Allow: /sessions
Allow: /static
Allow: /tags
Allow: /uploads
Allow: /user_upgrades
Allow: /user_upgrade
Allow: /users
Allow: /wiki_pages
@@ -51,4 +50,12 @@ Allow: /packs
Allow: /terms_of_service
Allow: /privacy
Allow: /sitemap.xml
Sitemap: <%= sitemap_url(format: :xml, sitemap: "artists") %>
Sitemap: <%= sitemap_url(format: :xml, sitemap: "forum_topics") %>
Sitemap: <%= sitemap_url(format: :xml, sitemap: "pools") %>
Sitemap: <%= sitemap_url(format: :xml, sitemap: "posts") %>
Sitemap: <%= sitemap_url(format: :xml, sitemap: "tags") %>
Sitemap: <%= sitemap_url(format: :xml, sitemap: "users") %>
Sitemap: <%= sitemap_url(format: :xml, sitemap: "wiki_pages") %>
<% end %>

View File

@@ -1,41 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:image="http://www.google.com/schemas/sitemap-image/1.1">
<url>
<loc><%= posts_url %></loc>
<changefreq>daily</changefreq>
</url>
<url>
<loc><%= wiki_pages_url %></loc>
<changefreq>daily</changefreq>
</url>
<url>
<loc><%= pools_url %></loc>
<changefreq>daily</changefreq>
</url>
<% cache("sitemap", :expires_in => 24.hours) do %>
<% @reportbooru_service.post_search_rankings(Date.yesterday) do |tags, count| %>
<url>
<loc><%= posts_url(tags: tags) %></loc>
<lastmod><%= Date.today %></lastmod>
</url>
<% end %>
<% @posts.each do |post| %>
<url>
<loc><%= post_url(post) %></loc>
<image:image>
<image:loc>
<%= post.file_url %>
</image:loc>
<image:caption>
<%= post.presenter.humanized_essential_tag_string %>
</image:caption>
</image:image>
<lastmod><%= post.created_at.to_date %></lastmod>
</url>
<% end %>
<% end %>
</urlset>

View File

@@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<% 0.upto(@relation.maximum(:id) / @limit) do |page| %>
<% lo = page * @limit %>
<% hi = (page + 1) * @limit %>
<% lastmod = @relation.where(id: lo..hi).maximum(:updated_at).iso8601 %>
<% if @sitemap == "posts" %>
<% loc = posts_url(limit: @limit, format: :sitemap, tags: "id:#{lo}..#{hi}") %>
<% else %>
<% loc = polymorphic_url(@relation.klass, limit: @limit, format: :sitemap, search: { id: "#{lo}..#{hi}", **@search }) %>
<% end %>
<% if lastmod.present? %>
<sitemap>
<loc><%= loc %></loc>
<lastmod><%= lastmod %></lastmod>
</sitemap>
<% end %>
<% end %>
</sitemapindex>

View File

@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<% @tags.each do |tag| %>
<url>
<loc><%= posts_url(tags: tag.name) %></loc>
<lastmod><%= tag.updated_at.iso8601 %></lastmod>
</url>
<% end %>
</urlset>

View File

@@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<% @users.each do |user| %>
<url>
<loc><%= user_url(user) %></loc>
</url>
<% end %>
</urlset>

View File

@@ -2,3 +2,4 @@
# Add new mime types for use in respond_to blocks:
# Mime::Type.register "text/richtext", :rtf
Mime::Type.register_alias "application/xml", :sitemap

View File

@@ -365,7 +365,7 @@ Rails.application.routes.draw do
get "/wiki/recent_changes" => redirect {|params, req| "/wiki_page_versions?search[updater_id]=#{req.params[:user_id]}"}
get "/wiki/history/:title" => redirect("/wiki_page_versions?title=%{title}")
get "/sitemap" => "static#sitemap"
get "/sitemap" => "static#sitemap_index"
get "/opensearch" => "static#opensearch", :as => "opensearch"
get "/privacy" => "static#privacy_policy", :as => "privacy_policy"
get "/terms_of_service" => "static#terms_of_service", :as => "terms_of_service"

View File

@@ -135,6 +135,12 @@ class ArtistsControllerTest < ActionDispatch::IntegrationTest
assert_response :success
end
should "get the sitemap" do
get artists_path(format: :sitemap)
assert_response :success
assert_equal(Artist.count, response.parsed_body.css("urlset url loc").size)
end
context "when searching the index page" do
should "find artists by name" do
get artists_path(name: "masao", format: "json")

View File

@@ -114,6 +114,12 @@ class ForumTopicsControllerTest < ActionDispatch::IntegrationTest
assert_response :success
end
should "render for a sitemap" do
get forum_topics_path(format: :sitemap)
assert_response :success
assert_equal(ForumTopic.count, response.parsed_body.css("urlset url loc").size)
end
context "with private topics" do
should "not show private topics to unprivileged users" do
as(@user) { @topic2.update!(min_level: User::Levels::MODERATOR) }

View File

@@ -23,6 +23,13 @@ class PoolsControllerTest < ActionDispatch::IntegrationTest
get pools_path, params: {:search => {:name_matches => @pool.name}}
assert_response :success
end
should "render for a sitemap" do
get pools_path(format: :sitemap)
assert_response :success
assert_equal(Pool.count, response.parsed_body.css("urlset url loc").size)
end
end
context "show action" do

View File

@@ -337,6 +337,14 @@ class PostsControllerTest < ActionDispatch::IntegrationTest
end
end
context "with the .sitemap format" do
should "render" do
get posts_path(format: :sitemap)
assert_response :success
assert_equal(Post.count, response.parsed_body.css("urlset url loc").size)
end
end
context "with deleted posts" do
setup do
@post.update!(is_deleted: true)

View File

@@ -14,11 +14,14 @@ class StaticControllerTest < ActionDispatch::IntegrationTest
end
context "sitemap action" do
should "work" do
create_list(:post, 3)
mock_post_search_rankings(Time.zone.yesterday, [["1girl", 100.0], ["2girls", 50.0]])
get sitemap_path, as: :xml
assert_response :success
[Artist, ForumTopic, Pool, Post, Tag, User, WikiPage].each do |klass|
should "work for #{klass.model_name.plural}" do
as(create(:user)) { create_list(klass.model_name.singular.to_sym, 3) }
get sitemap_path(sitemap: klass.model_name.plural), as: :xml
assert_response :success
assert_equal(1, response.parsed_body.css("sitemap loc").size)
end
end
end

View File

@@ -20,6 +20,12 @@ class TagsControllerTest < ActionDispatch::IntegrationTest
assert_response :success
end
should "render for a sitemap" do
get tags_path(format: :sitemap)
assert_response :success
assert_equal(Tag.count, response.parsed_body.css("urlset url loc").size)
end
context "with blank search parameters" do
should "strip the blank parameters with a redirect" do
get tags_path, params: { search: { name: "touhou", category: "" } }

View File

@@ -12,6 +12,12 @@ class UsersControllerTest < ActionDispatch::IntegrationTest
assert_response :success
end
should "render for a sitemap" do
get users_path(format: :sitemap)
assert_response :success
assert_equal(User.count, response.parsed_body.css("urlset url loc").size)
end
should "list all users for /users?name=<name>" do
get users_path, params: { name: @user.name }
assert_redirected_to(@user)

View File

@@ -23,6 +23,12 @@ class WikiPagesControllerTest < ActionDispatch::IntegrationTest
assert_response :success
end
should "render for a sitemap" do
get wiki_pages_path(format: :sitemap)
assert_response :success
assert_equal(WikiPage.count, response.parsed_body.css("urlset url loc").size)
end
should "redirect the legacy title param to the show page" do
get wiki_pages_path(title: "tagme")
assert_redirected_to wiki_pages_path(search: { title_normalize: "tagme" }, redirect: true)