seo: increase sitemap coverage.

Rework sitemaps to provide more coverage of the site. We want every
important page on the site - including every post, tag, and wiki page -
to be indexed by Google. We do this by generating sitemaps and sitemap
indexes that contain links to every important page on the site.
This commit is contained in:
evazion
2020-07-09 22:38:35 -05:00
parent d88a2a674f
commit 42f0112c38
21 changed files with 187 additions and 63 deletions

View File

@@ -22,10 +22,34 @@ class StaticController < ApplicationController
def site_map
end
def sitemap
@reportbooru_service = ReportbooruService.new
@posts = Post.where("created_at > ?", 1.week.ago).order(score: :desc).limit(200)
@posts = @posts.select(&:visible?)
render layout: false
def sitemap_index
@sitemap = params[:sitemap]
@limit = params.fetch(:limit, 10000).to_i
case @sitemap
when "artists"
@relation = Artist.undeleted
@search = { is_deleted: "false" }
when "forum_topics"
@relation = ForumTopic.undeleted
@search = { is_deleted: "false" }
when "pools"
@relation = Pool.undeleted
@search = { is_deleted: "false" }
when "posts"
@relation = Post.order(id: :asc)
@serach = {}
when "tags"
@relation = Tag.nonempty
@search = {}
when "users"
@relation = User.all
@search = {}
when "wiki_pages"
@relation = WikiPage.undeleted
@search = { is_deleted: "false" }
else
raise NotImplementedError
end
end
end

View File

@@ -3,9 +3,9 @@ module PaginationExtension
attr_accessor :current_page, :records_per_page, :paginator_count, :paginator_mode
def paginate(page, limit: nil, count: nil, search_count: nil)
def paginate(page, limit: nil, max_limit: 1000, count: nil, search_count: nil)
@records_per_page = limit || Danbooru.config.posts_per_page
@records_per_page = @records_per_page.to_i.clamp(1, 1000)
@records_per_page = @records_per_page.to_i.clamp(1, max_limit)
if count.present?
@paginator_count = count

View File

@@ -76,7 +76,11 @@ module PostSets
end
def per_page
(@per_page || query.find_metatag(:limit) || CurrentUser.user.per_page).to_i.clamp(0, MAX_PER_PAGE)
(@per_page || query.find_metatag(:limit) || CurrentUser.user.per_page).to_i.clamp(0, max_per_page)
end
def max_per_page
(format == "sitemap") ? 10_000 : MAX_PER_PAGE
end
def is_random?
@@ -105,7 +109,7 @@ module PostSets
if is_random?
get_random_posts
else
normalized_query.build.paginate(page, count: post_count, search_count: !post_count.nil?, limit: per_page).load
normalized_query.build.paginate(page, count: post_count, search_count: !post_count.nil?, limit: per_page, max_limit: max_per_page).load
end
end
end

View File

@@ -16,7 +16,8 @@ class ApplicationRecord < ActiveRecord::Base
search_params = params.fetch(:search, {}).permit!
search_params = defaults.merge(search_params).with_indifferent_access
search(search_params).paginate(params[:page], limit: params[:limit], search_count: count_pages)
max_limit = (params[:format] == "sitemap") ? 10_000 : 1_000
search(search_params).paginate(params[:page], limit: params[:limit], max_limit: max_limit, search_count: count_pages)
end
end
end

View File

@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<% @current_item.each do |item| %>
<url>
<loc><%= polymorphic_url(item) %></loc>
<lastmod><%= item.updated_at.iso8601 %></lastmod>
</url>
<% end %>
</urlset>

View File

@@ -0,0 +1,30 @@
<?xml version="1.0" encoding="UTF-8"?>
<%# https://support.google.com/webmasters/answer/178636 %>
<%# https://support.google.com/webmasters/answer/80471 %>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:image="http://www.google.com/schemas/sitemap-image/1.1"
xmlns:video="http://www.google.com/schemas/sitemap-video/1.1">
<% @posts.each do |post| %>
<url>
<loc><%= post_url(post) %></loc>
<lastmod><%= post.updated_at.iso8601 %></lastmod>
<% if post.visible? %>
<% if post.is_image? %>
<image:image>
<image:loc><%= post.file_url %></image:loc>
</image:image>
<% elsif post.is_video? %>
<video:video>
<video:thumbnail_loc><%= post.preview_file_url %></video:thumbnail_loc>
<video:content_loc><%= post.file_url %></video:content_loc>
<video:publication_date><%= post.created_at.iso8601 %></video:publication_date>
<video:title><%= "Post ##{post.id}" %></video:title>
<video:description><%= post.tag_string %></video:description>
<video:family_friendly><%= post.rating == "s" ? "yes" : "no" %></video:family_friendly>
</video:video>
<% end %>
<% end %>
</url>
<% end %>
</urlset>

View File

@@ -1,20 +1,19 @@
Sitemap: <%= root_url %>sitemap.xml
User-agent: *
Disallow: /
Allow: /$
<% if Rails.env.production? && Danbooru.config.hostname == request.host %>
<% if !Rails.env.production? || Danbooru.config.hostname == request.host %>
Disallow: /*.atom
Disallow: /*.json
Disallow: /*.xml
Allow: /$
Allow: /artists
Allow: /artist_commentaries
Allow: /comments
Allow: /explore
Allow: /favorite_groups
Allow: /forum_posts
Allow: /forum_topics
Allow: /iqdb_queries
Allow: /login
Allow: /notes
Allow: /pools
@@ -23,7 +22,7 @@ Allow: /sessions
Allow: /static
Allow: /tags
Allow: /uploads
Allow: /user_upgrades
Allow: /user_upgrade
Allow: /users
Allow: /wiki_pages
@@ -51,4 +50,12 @@ Allow: /packs
Allow: /terms_of_service
Allow: /privacy
Allow: /sitemap.xml
Sitemap: <%= sitemap_url(format: :xml, sitemap: "artists") %>
Sitemap: <%= sitemap_url(format: :xml, sitemap: "forum_topics") %>
Sitemap: <%= sitemap_url(format: :xml, sitemap: "pools") %>
Sitemap: <%= sitemap_url(format: :xml, sitemap: "posts") %>
Sitemap: <%= sitemap_url(format: :xml, sitemap: "tags") %>
Sitemap: <%= sitemap_url(format: :xml, sitemap: "users") %>
Sitemap: <%= sitemap_url(format: :xml, sitemap: "wiki_pages") %>
<% end %>

View File

@@ -1,41 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:image="http://www.google.com/schemas/sitemap-image/1.1">
<url>
<loc><%= posts_url %></loc>
<changefreq>daily</changefreq>
</url>
<url>
<loc><%= wiki_pages_url %></loc>
<changefreq>daily</changefreq>
</url>
<url>
<loc><%= pools_url %></loc>
<changefreq>daily</changefreq>
</url>
<% cache("sitemap", :expires_in => 24.hours) do %>
<% @reportbooru_service.post_search_rankings(Date.yesterday) do |tags, count| %>
<url>
<loc><%= posts_url(tags: tags) %></loc>
<lastmod><%= Date.today %></lastmod>
</url>
<% end %>
<% @posts.each do |post| %>
<url>
<loc><%= post_url(post) %></loc>
<image:image>
<image:loc>
<%= post.file_url %>
</image:loc>
<image:caption>
<%= post.presenter.humanized_essential_tag_string %>
</image:caption>
</image:image>
<lastmod><%= post.created_at.to_date %></lastmod>
</url>
<% end %>
<% end %>
</urlset>

View File

@@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<% 0.upto(@relation.maximum(:id) / @limit) do |page| %>
<% lo = page * @limit %>
<% hi = (page + 1) * @limit %>
<% lastmod = @relation.where(id: lo..hi).maximum(:updated_at).iso8601 %>
<% if @sitemap == "posts" %>
<% loc = posts_url(limit: @limit, format: :sitemap, tags: "id:#{lo}..#{hi}") %>
<% else %>
<% loc = polymorphic_url(@relation.klass, limit: @limit, format: :sitemap, search: { id: "#{lo}..#{hi}", **@search }) %>
<% end %>
<% if lastmod.present? %>
<sitemap>
<loc><%= loc %></loc>
<lastmod><%= lastmod %></lastmod>
</sitemap>
<% end %>
<% end %>
</sitemapindex>

View File

@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<% @tags.each do |tag| %>
<url>
<loc><%= posts_url(tags: tag.name) %></loc>
<lastmod><%= tag.updated_at.iso8601 %></lastmod>
</url>
<% end %>
</urlset>

View File

@@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<% @users.each do |user| %>
<url>
<loc><%= user_url(user) %></loc>
</url>
<% end %>
</urlset>