seo: increase sitemap coverage.

Rework sitemaps to provide more coverage of the site. We want every
important page on the site - including every post, tag, and wiki page -
to be indexed by Google. We do this by generating sitemaps and sitemap
indexes that contain links to every important page on the site.
This commit is contained in:
evazion
2020-07-09 22:38:35 -05:00
parent d88a2a674f
commit 42f0112c38
21 changed files with 187 additions and 63 deletions

View File

@@ -22,10 +22,34 @@ class StaticController < ApplicationController
def site_map def site_map
end end
def sitemap def sitemap_index
@reportbooru_service = ReportbooruService.new @sitemap = params[:sitemap]
@posts = Post.where("created_at > ?", 1.week.ago).order(score: :desc).limit(200) @limit = params.fetch(:limit, 10000).to_i
@posts = @posts.select(&:visible?)
render layout: false case @sitemap
when "artists"
@relation = Artist.undeleted
@search = { is_deleted: "false" }
when "forum_topics"
@relation = ForumTopic.undeleted
@search = { is_deleted: "false" }
when "pools"
@relation = Pool.undeleted
@search = { is_deleted: "false" }
when "posts"
@relation = Post.order(id: :asc)
@serach = {}
when "tags"
@relation = Tag.nonempty
@search = {}
when "users"
@relation = User.all
@search = {}
when "wiki_pages"
@relation = WikiPage.undeleted
@search = { is_deleted: "false" }
else
raise NotImplementedError
end
end end
end end

View File

@@ -3,9 +3,9 @@ module PaginationExtension
attr_accessor :current_page, :records_per_page, :paginator_count, :paginator_mode attr_accessor :current_page, :records_per_page, :paginator_count, :paginator_mode
def paginate(page, limit: nil, count: nil, search_count: nil) def paginate(page, limit: nil, max_limit: 1000, count: nil, search_count: nil)
@records_per_page = limit || Danbooru.config.posts_per_page @records_per_page = limit || Danbooru.config.posts_per_page
@records_per_page = @records_per_page.to_i.clamp(1, 1000) @records_per_page = @records_per_page.to_i.clamp(1, max_limit)
if count.present? if count.present?
@paginator_count = count @paginator_count = count

View File

@@ -76,7 +76,11 @@ module PostSets
end end
def per_page def per_page
(@per_page || query.find_metatag(:limit) || CurrentUser.user.per_page).to_i.clamp(0, MAX_PER_PAGE) (@per_page || query.find_metatag(:limit) || CurrentUser.user.per_page).to_i.clamp(0, max_per_page)
end
def max_per_page
(format == "sitemap") ? 10_000 : MAX_PER_PAGE
end end
def is_random? def is_random?
@@ -105,7 +109,7 @@ module PostSets
if is_random? if is_random?
get_random_posts get_random_posts
else else
normalized_query.build.paginate(page, count: post_count, search_count: !post_count.nil?, limit: per_page).load normalized_query.build.paginate(page, count: post_count, search_count: !post_count.nil?, limit: per_page, max_limit: max_per_page).load
end end
end end
end end

View File

@@ -16,7 +16,8 @@ class ApplicationRecord < ActiveRecord::Base
search_params = params.fetch(:search, {}).permit! search_params = params.fetch(:search, {}).permit!
search_params = defaults.merge(search_params).with_indifferent_access search_params = defaults.merge(search_params).with_indifferent_access
search(search_params).paginate(params[:page], limit: params[:limit], search_count: count_pages) max_limit = (params[:format] == "sitemap") ? 10_000 : 1_000
search(search_params).paginate(params[:page], limit: params[:limit], max_limit: max_limit, search_count: count_pages)
end end
end end
end end

View File

@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<% @current_item.each do |item| %>
<url>
<loc><%= polymorphic_url(item) %></loc>
<lastmod><%= item.updated_at.iso8601 %></lastmod>
</url>
<% end %>
</urlset>

View File

@@ -0,0 +1,30 @@
<?xml version="1.0" encoding="UTF-8"?>
<%# https://support.google.com/webmasters/answer/178636 %>
<%# https://support.google.com/webmasters/answer/80471 %>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:image="http://www.google.com/schemas/sitemap-image/1.1"
xmlns:video="http://www.google.com/schemas/sitemap-video/1.1">
<% @posts.each do |post| %>
<url>
<loc><%= post_url(post) %></loc>
<lastmod><%= post.updated_at.iso8601 %></lastmod>
<% if post.visible? %>
<% if post.is_image? %>
<image:image>
<image:loc><%= post.file_url %></image:loc>
</image:image>
<% elsif post.is_video? %>
<video:video>
<video:thumbnail_loc><%= post.preview_file_url %></video:thumbnail_loc>
<video:content_loc><%= post.file_url %></video:content_loc>
<video:publication_date><%= post.created_at.iso8601 %></video:publication_date>
<video:title><%= "Post ##{post.id}" %></video:title>
<video:description><%= post.tag_string %></video:description>
<video:family_friendly><%= post.rating == "s" ? "yes" : "no" %></video:family_friendly>
</video:video>
<% end %>
<% end %>
</url>
<% end %>
</urlset>

View File

@@ -1,20 +1,19 @@
Sitemap: <%= root_url %>sitemap.xml
User-agent: * User-agent: *
Disallow: / Disallow: /
Allow: /$
<% if Rails.env.production? && Danbooru.config.hostname == request.host %> <% if !Rails.env.production? || Danbooru.config.hostname == request.host %>
Disallow: /*.atom Disallow: /*.atom
Disallow: /*.json Disallow: /*.json
Disallow: /*.xml
Allow: /$
Allow: /artists Allow: /artists
Allow: /artist_commentaries Allow: /artist_commentaries
Allow: /comments Allow: /comments
Allow: /explore Allow: /explore
Allow: /favorite_groups
Allow: /forum_posts Allow: /forum_posts
Allow: /forum_topics Allow: /forum_topics
Allow: /iqdb_queries
Allow: /login Allow: /login
Allow: /notes Allow: /notes
Allow: /pools Allow: /pools
@@ -23,7 +22,7 @@ Allow: /sessions
Allow: /static Allow: /static
Allow: /tags Allow: /tags
Allow: /uploads Allow: /uploads
Allow: /user_upgrades Allow: /user_upgrade
Allow: /users Allow: /users
Allow: /wiki_pages Allow: /wiki_pages
@@ -51,4 +50,12 @@ Allow: /packs
Allow: /terms_of_service Allow: /terms_of_service
Allow: /privacy Allow: /privacy
Allow: /sitemap.xml Allow: /sitemap.xml
Sitemap: <%= sitemap_url(format: :xml, sitemap: "artists") %>
Sitemap: <%= sitemap_url(format: :xml, sitemap: "forum_topics") %>
Sitemap: <%= sitemap_url(format: :xml, sitemap: "pools") %>
Sitemap: <%= sitemap_url(format: :xml, sitemap: "posts") %>
Sitemap: <%= sitemap_url(format: :xml, sitemap: "tags") %>
Sitemap: <%= sitemap_url(format: :xml, sitemap: "users") %>
Sitemap: <%= sitemap_url(format: :xml, sitemap: "wiki_pages") %>
<% end %> <% end %>

View File

@@ -1,41 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:image="http://www.google.com/schemas/sitemap-image/1.1">
<url>
<loc><%= posts_url %></loc>
<changefreq>daily</changefreq>
</url>
<url>
<loc><%= wiki_pages_url %></loc>
<changefreq>daily</changefreq>
</url>
<url>
<loc><%= pools_url %></loc>
<changefreq>daily</changefreq>
</url>
<% cache("sitemap", :expires_in => 24.hours) do %>
<% @reportbooru_service.post_search_rankings(Date.yesterday) do |tags, count| %>
<url>
<loc><%= posts_url(tags: tags) %></loc>
<lastmod><%= Date.today %></lastmod>
</url>
<% end %>
<% @posts.each do |post| %>
<url>
<loc><%= post_url(post) %></loc>
<image:image>
<image:loc>
<%= post.file_url %>
</image:loc>
<image:caption>
<%= post.presenter.humanized_essential_tag_string %>
</image:caption>
</image:image>
<lastmod><%= post.created_at.to_date %></lastmod>
</url>
<% end %>
<% end %>
</urlset>

View File

@@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<% 0.upto(@relation.maximum(:id) / @limit) do |page| %>
<% lo = page * @limit %>
<% hi = (page + 1) * @limit %>
<% lastmod = @relation.where(id: lo..hi).maximum(:updated_at).iso8601 %>
<% if @sitemap == "posts" %>
<% loc = posts_url(limit: @limit, format: :sitemap, tags: "id:#{lo}..#{hi}") %>
<% else %>
<% loc = polymorphic_url(@relation.klass, limit: @limit, format: :sitemap, search: { id: "#{lo}..#{hi}", **@search }) %>
<% end %>
<% if lastmod.present? %>
<sitemap>
<loc><%= loc %></loc>
<lastmod><%= lastmod %></lastmod>
</sitemap>
<% end %>
<% end %>
</sitemapindex>

View File

@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<% @tags.each do |tag| %>
<url>
<loc><%= posts_url(tags: tag.name) %></loc>
<lastmod><%= tag.updated_at.iso8601 %></lastmod>
</url>
<% end %>
</urlset>

View File

@@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<% @users.each do |user| %>
<url>
<loc><%= user_url(user) %></loc>
</url>
<% end %>
</urlset>

View File

@@ -2,3 +2,4 @@
# Add new mime types for use in respond_to blocks: # Add new mime types for use in respond_to blocks:
# Mime::Type.register "text/richtext", :rtf # Mime::Type.register "text/richtext", :rtf
Mime::Type.register_alias "application/xml", :sitemap

View File

@@ -365,7 +365,7 @@ Rails.application.routes.draw do
get "/wiki/recent_changes" => redirect {|params, req| "/wiki_page_versions?search[updater_id]=#{req.params[:user_id]}"} get "/wiki/recent_changes" => redirect {|params, req| "/wiki_page_versions?search[updater_id]=#{req.params[:user_id]}"}
get "/wiki/history/:title" => redirect("/wiki_page_versions?title=%{title}") get "/wiki/history/:title" => redirect("/wiki_page_versions?title=%{title}")
get "/sitemap" => "static#sitemap" get "/sitemap" => "static#sitemap_index"
get "/opensearch" => "static#opensearch", :as => "opensearch" get "/opensearch" => "static#opensearch", :as => "opensearch"
get "/privacy" => "static#privacy_policy", :as => "privacy_policy" get "/privacy" => "static#privacy_policy", :as => "privacy_policy"
get "/terms_of_service" => "static#terms_of_service", :as => "terms_of_service" get "/terms_of_service" => "static#terms_of_service", :as => "terms_of_service"

View File

@@ -135,6 +135,12 @@ class ArtistsControllerTest < ActionDispatch::IntegrationTest
assert_response :success assert_response :success
end end
should "get the sitemap" do
get artists_path(format: :sitemap)
assert_response :success
assert_equal(Artist.count, response.parsed_body.css("urlset url loc").size)
end
context "when searching the index page" do context "when searching the index page" do
should "find artists by name" do should "find artists by name" do
get artists_path(name: "masao", format: "json") get artists_path(name: "masao", format: "json")

View File

@@ -114,6 +114,12 @@ class ForumTopicsControllerTest < ActionDispatch::IntegrationTest
assert_response :success assert_response :success
end end
should "render for a sitemap" do
get forum_topics_path(format: :sitemap)
assert_response :success
assert_equal(ForumTopic.count, response.parsed_body.css("urlset url loc").size)
end
context "with private topics" do context "with private topics" do
should "not show private topics to unprivileged users" do should "not show private topics to unprivileged users" do
as(@user) { @topic2.update!(min_level: User::Levels::MODERATOR) } as(@user) { @topic2.update!(min_level: User::Levels::MODERATOR) }

View File

@@ -23,6 +23,13 @@ class PoolsControllerTest < ActionDispatch::IntegrationTest
get pools_path, params: {:search => {:name_matches => @pool.name}} get pools_path, params: {:search => {:name_matches => @pool.name}}
assert_response :success assert_response :success
end end
should "render for a sitemap" do
get pools_path(format: :sitemap)
assert_response :success
assert_equal(Pool.count, response.parsed_body.css("urlset url loc").size)
end
end end
context "show action" do context "show action" do

View File

@@ -337,6 +337,14 @@ class PostsControllerTest < ActionDispatch::IntegrationTest
end end
end end
context "with the .sitemap format" do
should "render" do
get posts_path(format: :sitemap)
assert_response :success
assert_equal(Post.count, response.parsed_body.css("urlset url loc").size)
end
end
context "with deleted posts" do context "with deleted posts" do
setup do setup do
@post.update!(is_deleted: true) @post.update!(is_deleted: true)

View File

@@ -14,11 +14,14 @@ class StaticControllerTest < ActionDispatch::IntegrationTest
end end
context "sitemap action" do context "sitemap action" do
should "work" do [Artist, ForumTopic, Pool, Post, Tag, User, WikiPage].each do |klass|
create_list(:post, 3) should "work for #{klass.model_name.plural}" do
mock_post_search_rankings(Time.zone.yesterday, [["1girl", 100.0], ["2girls", 50.0]]) as(create(:user)) { create_list(klass.model_name.singular.to_sym, 3) }
get sitemap_path, as: :xml get sitemap_path(sitemap: klass.model_name.plural), as: :xml
assert_response :success
assert_response :success
assert_equal(1, response.parsed_body.css("sitemap loc").size)
end
end end
end end

View File

@@ -20,6 +20,12 @@ class TagsControllerTest < ActionDispatch::IntegrationTest
assert_response :success assert_response :success
end end
should "render for a sitemap" do
get tags_path(format: :sitemap)
assert_response :success
assert_equal(Tag.count, response.parsed_body.css("urlset url loc").size)
end
context "with blank search parameters" do context "with blank search parameters" do
should "strip the blank parameters with a redirect" do should "strip the blank parameters with a redirect" do
get tags_path, params: { search: { name: "touhou", category: "" } } get tags_path, params: { search: { name: "touhou", category: "" } }

View File

@@ -12,6 +12,12 @@ class UsersControllerTest < ActionDispatch::IntegrationTest
assert_response :success assert_response :success
end end
should "render for a sitemap" do
get users_path(format: :sitemap)
assert_response :success
assert_equal(User.count, response.parsed_body.css("urlset url loc").size)
end
should "list all users for /users?name=<name>" do should "list all users for /users?name=<name>" do
get users_path, params: { name: @user.name } get users_path, params: { name: @user.name }
assert_redirected_to(@user) assert_redirected_to(@user)

View File

@@ -23,6 +23,12 @@ class WikiPagesControllerTest < ActionDispatch::IntegrationTest
assert_response :success assert_response :success
end end
should "render for a sitemap" do
get wiki_pages_path(format: :sitemap)
assert_response :success
assert_equal(WikiPage.count, response.parsed_body.css("urlset url loc").size)
end
should "redirect the legacy title param to the show page" do should "redirect the legacy title param to the show page" do
get wiki_pages_path(title: "tagme") get wiki_pages_path(title: "tagme")
assert_redirected_to wiki_pages_path(search: { title_normalize: "tagme" }, redirect: true) assert_redirected_to wiki_pages_path(search: { title_normalize: "tagme" }, redirect: true)