reports: add ability to group reports by column.
Add ability to group reports by various columns. For example, you can see the posts by the top 10 uploaders over time, or posts grouped by rating over time.
This commit is contained in:
1
Gemfile
1
Gemfile
@@ -58,6 +58,7 @@ gem "debug"
|
||||
gem "ffaker"
|
||||
gem "composite_primary_keys"
|
||||
gem "resolv"
|
||||
gem "rover-df"
|
||||
|
||||
group :development do
|
||||
gem 'rubocop', require: false
|
||||
|
||||
@@ -315,6 +315,7 @@ GEM
|
||||
nokogiri (1.13.9)
|
||||
mini_portile2 (~> 2.8.0)
|
||||
racc (~> 1.4)
|
||||
numo-narray (0.9.2.1)
|
||||
oauth2 (2.0.9)
|
||||
faraday (>= 0.17.3, < 3.0)
|
||||
jwt (>= 1.0, < 3.0)
|
||||
@@ -407,6 +408,8 @@ GEM
|
||||
reverse_markdown (2.1.1)
|
||||
nokogiri
|
||||
rexml (3.2.5)
|
||||
rover-df (0.3.4)
|
||||
numo-narray (>= 0.9.1.9)
|
||||
rubocop (1.36.0)
|
||||
json (~> 2.3)
|
||||
parallel (~> 1.10)
|
||||
@@ -582,6 +585,7 @@ DEPENDENCIES
|
||||
redis
|
||||
resolv
|
||||
responders
|
||||
rover-df
|
||||
rubocop
|
||||
rubocop-rails
|
||||
ruby-vips
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
class TimeSeriesComponent < ApplicationComponent
|
||||
delegate :current_page_path, :search_params, to: :helpers
|
||||
|
||||
attr_reader :results, :columns, :mode
|
||||
attr_reader :dataframe, :group, :mode
|
||||
|
||||
def initialize(results, columns, mode: :table)
|
||||
@results = results
|
||||
@columns = columns
|
||||
def initialize(dataframe, group: nil, mode: :table)
|
||||
@dataframe = dataframe
|
||||
@group = group
|
||||
@mode = mode.to_sym
|
||||
end
|
||||
end
|
||||
|
||||
@@ -9,20 +9,18 @@
|
||||
<% if mode == :table %>
|
||||
<table class="striped autofit" width="100%">
|
||||
<thead>
|
||||
<th>Date</th>
|
||||
|
||||
<% columns.each do |column| %>
|
||||
<%= tag.th(column.to_s.capitalize, class: ("col-expand" if column == columns.last)) %>
|
||||
<% dataframe.types.keys.each do |column| %>
|
||||
<%= tag.th(column.to_s.titleize, class: ("col-expand" if column == dataframe.types.keys.last)) %>
|
||||
<% end %>
|
||||
</thead>
|
||||
|
||||
<tbody>
|
||||
<% results.each do |row| %>
|
||||
<% dataframe.each_row do |row| %>
|
||||
<tr>
|
||||
<td><%= row["date"].to_date %></td>
|
||||
|
||||
<% columns.each do |column| %>
|
||||
<td><%= row[column.to_s] %></td>
|
||||
<% dataframe.types.keys.each do |column| %>
|
||||
<td>
|
||||
<%= row[column] %>
|
||||
</td>
|
||||
<% end %>
|
||||
<tr>
|
||||
<% end %>
|
||||
@@ -32,8 +30,8 @@
|
||||
<div class="line-chart" style="width: 100%; height: 80vh;"></div>
|
||||
|
||||
<script type="text/javascript">
|
||||
var data = <%= raw results.to_a.to_json %>;
|
||||
var columns = <%= raw columns.to_json %>;
|
||||
var data = <%= raw dataframe.each_row.map(&:values).to_json %>;
|
||||
var columns = <%= raw dataframe.types.keys.without("date").to_json %>;
|
||||
var chart = new Danbooru.TimeSeriesComponent({
|
||||
container: $(".line-chart").get(0),
|
||||
data: data,
|
||||
|
||||
@@ -11,72 +11,88 @@ class ReportsController < ApplicationController
|
||||
def show
|
||||
@report = params[:id]
|
||||
@mode = params.dig(:search, :mode) || "chart"
|
||||
@period = params.dig(:search, :period)&.downcase || "day"
|
||||
@from = params.dig(:search, :from) || 1.month.ago
|
||||
@to = params.dig(:search, :to) || Time.zone.now
|
||||
@columns = params.dig(:search, :columns).to_s.split(/[[:space:],]/).map(&:to_sym)
|
||||
@group = params.dig(:search, :group)&.downcase&.tr(" ", "_")
|
||||
@group_limit = params.dig(:search, :group_limit) || 10
|
||||
|
||||
case @report
|
||||
when "posts"
|
||||
@model = Post
|
||||
@title = "Posts Report"
|
||||
@columns = { posts: "COUNT(*)", uploaders: "COUNT(distinct uploader_id)" }
|
||||
@available_columns = { posts: "COUNT(*)", uploaders: "COUNT(distinct uploader_id)" }
|
||||
@available_groups = %w[uploader approver rating is_deleted]
|
||||
when "post_votes"
|
||||
@model = PostVote
|
||||
@title = "Post Votes Report"
|
||||
@columns = { votes: "COUNT(*)", posts: "COUNT(distinct post_id)", voters: "COUNT(distinct user_id)" }
|
||||
@available_columns = { votes: "COUNT(*)", posts: "COUNT(distinct post_id)", voters: "COUNT(distinct user_id)" }
|
||||
@available_groups = %w[]
|
||||
when "pools"
|
||||
@model = Pool
|
||||
@title = "Pools Report"
|
||||
@columns = { series_pools: "COUNT(*) FILTER (WHERE category = 'series')", collection_pools: "COUNT(*) FILTER (WHERE category = 'collection')" }
|
||||
@available_columns = { pools: "COUNT(*)" }
|
||||
@available_groups = %w[category is_deleted]
|
||||
when "comments"
|
||||
@model = Comment
|
||||
@title = "Comments Report"
|
||||
@columns = { comments: "COUNT(*)", commenters: "COUNT(distinct creator_id)" }
|
||||
@available_columns = { comments: "COUNT(*)", commenters: "COUNT(distinct creator_id)" }
|
||||
@available_groups = %w[creator do_not_bump_post is_deleted is_sticky]
|
||||
when "comment_votes"
|
||||
@model = CommentVote
|
||||
@title = "Comment Votes Report"
|
||||
@columns = { votes: "COUNT(*)", comments: "COUNT(distinct comment_id)", voters: "COUNT(distinct user_id)" }
|
||||
@available_columns = { votes: "COUNT(*)", comments: "COUNT(distinct comment_id)", voters: "COUNT(distinct user_id)" }
|
||||
@available_groups = %w[]
|
||||
when "forum_posts"
|
||||
@model = ForumPost
|
||||
@title = "Forum Posts Report"
|
||||
@columns = { forum_posts: "COUNT(*)", posters: "COUNT(distinct creator_id)" }
|
||||
@available_columns = { forum_posts: "COUNT(*)", posters: "COUNT(distinct creator_id)" }
|
||||
@available_groups = %w[creator is_deleted]
|
||||
when "bulk_update_requests"
|
||||
@model = BulkUpdateRequest
|
||||
@title = "Bulk Update Requests Report"
|
||||
@columns = { requests: "COUNT(*)", requestors: "COUNT(distinct user_id)" }
|
||||
@available_columns = { requests: "COUNT(*)", requestors: "COUNT(distinct user_id)" }
|
||||
@available_groups = %w[user approver status]
|
||||
when "tag_aliases"
|
||||
@model = TagAlias
|
||||
@title = "Tag Aliases Report"
|
||||
@columns = { aliases: "COUNT(*)" }
|
||||
@available_columns = { aliases: "COUNT(*)" }
|
||||
@available_groups = %w[status approver]
|
||||
when "tag_implications"
|
||||
@model = TagImplication
|
||||
@title = "Tag Implications Report"
|
||||
@columns = { aliases: "COUNT(*)" }
|
||||
@available_columns = { aliases: "COUNT(*)" }
|
||||
@available_groups = %w[status approver]
|
||||
when "artist_versions"
|
||||
@model = ArtistVersion
|
||||
@title = "Artist Edits Report"
|
||||
@columns = { artist_edits: "COUNT(*)", artists: "COUNT(distinct artist_id)", editors: "COUNT(distinct updater_id)" }
|
||||
@available_columns = { artist_edits: "COUNT(*)", artists: "COUNT(distinct artist_id)", editors: "COUNT(distinct updater_id)" }
|
||||
@available_groups = %w[updater]
|
||||
when "note_versions"
|
||||
@model = NoteVersion
|
||||
@title = "Note Edits Report"
|
||||
@columns = { note_edits: "COUNT(*)", posts: "COUNT(distinct post_id)", editors: "COUNT(distinct updater_id)" }
|
||||
@available_columns = { note_edits: "COUNT(*)", posts: "COUNT(distinct post_id)", editors: "COUNT(distinct updater_id)" }
|
||||
@available_groups = %w[updater]
|
||||
when "wiki_page_versions"
|
||||
@model = WikiPageVersion
|
||||
@title = "Wiki Edits Report"
|
||||
@columns = { wiki_edits: "COUNT(*)", editors: "COUNT(distinct updater_id)" }
|
||||
@available_columns = { wiki_edits: "COUNT(*)", editors: "COUNT(distinct updater_id)" }
|
||||
@available_groups = %w[updater]
|
||||
when "users"
|
||||
@model = User
|
||||
@title = "New Users Report"
|
||||
@columns = { users: "COUNT(*)" }
|
||||
@available_columns = { users: "COUNT(*)" }
|
||||
@available_groups = %w[level]
|
||||
when "bans"
|
||||
@model = Ban
|
||||
@title = "Bans Report"
|
||||
@columns = { bans: "COUNT(*)", banners: "COUNT(DISTINCT banner_id)" }
|
||||
@available_columns = { bans: "COUNT(*)", banners: "COUNT(DISTINCT banner_id)" }
|
||||
@available_groups = %w[banner duration]
|
||||
else
|
||||
raise ActiveRecord::RecordNotFound
|
||||
end
|
||||
|
||||
@period = params.dig(:search, :period)&.downcase || "day"
|
||||
@from = params.dig(:search, :from) || 1.month.ago
|
||||
@to = params.dig(:search, :to) || Time.zone.now
|
||||
|
||||
if CurrentUser.user.is_member? && CurrentUser.user.statement_timeout < 10_000
|
||||
@statement_timeout = 10_000
|
||||
else
|
||||
@@ -84,9 +100,15 @@ class ReportsController < ApplicationController
|
||||
end
|
||||
|
||||
ApplicationRecord.set_timeout(@statement_timeout) do
|
||||
@results = @model.search(params[:search], CurrentUser.user).timeseries(period: @period, from: @from, to: @to, columns: @columns)
|
||||
@group = nil unless @group&.in?(@available_groups)
|
||||
@columns = @available_columns.slice(*@columns)
|
||||
@columns = [@available_columns.first].to_h if @columns.blank?
|
||||
@dataframe = @model.search(params[:search], CurrentUser.user).timeseries(period: @period, from: @from, to: @to, groups: [@group].compact_blank, group_limit: @group_limit, columns: @columns)
|
||||
@dataframe["date"] = @dataframe["date"].map(&:to_date)
|
||||
@dataframe[@group] = @dataframe[@group].map(&:pretty_name) if @group.in?(%w[creator updater uploader banner approver user])
|
||||
@dataframe = @dataframe.crosstab("date", @group) if @group
|
||||
end
|
||||
|
||||
respond_with(@results)
|
||||
respond_with(@dataframe)
|
||||
end
|
||||
end
|
||||
|
||||
@@ -11,7 +11,7 @@ export default class TimeSeriesComponent {
|
||||
|
||||
this.options = {
|
||||
dataset: {
|
||||
dimensions: ["date", ...columns],
|
||||
dimensions: ["date", ...this.columns],
|
||||
source: data,
|
||||
},
|
||||
tooltip: {
|
||||
@@ -46,14 +46,15 @@ export default class TimeSeriesComponent {
|
||||
containLabel: true
|
||||
},
|
||||
legend: {
|
||||
data: columns.map(startCase),
|
||||
data: this.columns.map(startCase),
|
||||
},
|
||||
xAxis: { type: "time" },
|
||||
yAxis: columns.map(name => ({ type: "value" })),
|
||||
series: columns.map(name => ({
|
||||
yAxis: this.columns.map(name => ({ type: "value" })),
|
||||
series: this.columns.map(name => ({
|
||||
name: startCase(name),
|
||||
type: "line",
|
||||
areaStyle: {},
|
||||
stack: "all",
|
||||
emphasis: {
|
||||
focus: "series"
|
||||
},
|
||||
|
||||
@@ -3,60 +3,88 @@
|
||||
module Aggregatable
|
||||
extend ActiveSupport::Concern
|
||||
|
||||
def timeseries(period: "day", date_column: :created_at, from: first[date_column], to: Time.now.utc, columns: { count: "COUNT(*)" })
|
||||
def timeseries(period: "day", date_column: :created_at, from: first[date_column], to: Time.now.utc, groups: [], group_limit: 10, columns: { count: "COUNT(*)" })
|
||||
raise ArgumentError, "invalid period: #{period}" if !period.in?(%w[second minute hour day week month quarter year])
|
||||
raise ArgumentError if all.group_values.present?
|
||||
|
||||
from = from.to_date
|
||||
to = to.to_date
|
||||
|
||||
# SELECT
|
||||
# date_trunc('day', posts.created_at) AS date
|
||||
# COUNT(*) AS count
|
||||
# FROM posts
|
||||
# WHERE posts.created_at BETWEEN from AND to
|
||||
# GROUP BY date
|
||||
group_associations = groups.map { |name| reflections[name.to_s] }.compact_blank
|
||||
group_fields = groups.map { |name| reflections[name.to_s]&.foreign_key || name }
|
||||
|
||||
# SELECT date_trunc('day', posts.created_at) AS date FROM posts WHERE created_at BETWEEN from AND to GROUP BY date
|
||||
subquery = select(date_trunc(period, date_column).as("date")).where(date_column => (from..to)).group("date").reorder(nil)
|
||||
|
||||
group_fields.each do |name|
|
||||
# SELECT date_trunc('day', posts.created_at) AS date, uploader_id FROM posts WHERE created_at BETWEEN from AND to GROUP BY date, uploader_id
|
||||
subquery = subquery.select(name).group(name)
|
||||
end
|
||||
|
||||
columns.each do |name, sql|
|
||||
# SELECT COUNT(*) AS count
|
||||
# SELECT date_trunc('day', posts.created_at) AS date, uploader_id, COUNT(*) AS count FROM posts WHERE created_at BETWEEN from AND to GROUP BY date, uploader_id
|
||||
subquery = subquery.select(Arel.sql(sql).as(name.to_s).to_sql)
|
||||
end
|
||||
|
||||
# SELECT date_trunc('day', dates) AS date FROM generate_series('2022-01-01', '2022-02-15', '1 day'::interval) AS dates
|
||||
# SELECT date_trunc('day', dates) AS date FROM generate_series(from, to, '1 day'::interval) AS dates
|
||||
dates = "SELECT #{date_trunc(period, Arel.sql("dates")).to_sql} AS date FROM #{generate_timeseries(from, to, period).to_sql} AS dates"
|
||||
|
||||
# SELECT
|
||||
# date_trunc('day', dates.date) AS date,
|
||||
# COALESCE(subquery.count, 0) AS count
|
||||
# FROM (
|
||||
# SELECT date_trunc('day', dates) AS date
|
||||
# FROM generate_series(from, to, '1 day'::interval) AS dates
|
||||
# ) AS dates
|
||||
# LEFT OUTER JOIN (
|
||||
# SELECT
|
||||
# date_trunc('day', posts.created_at) AS date,
|
||||
# COUNT(*) AS count
|
||||
# FROM posts
|
||||
# WHERE posts.created_at BETWEEN from AND to
|
||||
# GROUP BY date
|
||||
# ) AS subquery
|
||||
# ORDER BY date DESC
|
||||
query =
|
||||
unscoped.
|
||||
select(date_trunc(period, Arel.sql("dates.date")).as("date")).
|
||||
from("(#{dates}) AS dates").
|
||||
joins("LEFT OUTER JOIN (#{subquery.to_sql}) AS subquery ON subquery.date = dates.date").
|
||||
order("date DESC")
|
||||
# SELECT dates.date FROM (SELECT date_trunc('day', dates) AS date FROM generate_series(from, to, '1 day'::interval) AS dates) AS dates
|
||||
query = unscoped.select("dates.date").from("(#{dates}) AS dates")
|
||||
|
||||
group_fields.each do |field|
|
||||
# CROSS JOIN (SELECT uploader_id FROM posts WHERE created_at BETWEEN from AND to AND uploader_id IS NOT NULL GROUP BY uploader_id ORDER BY COUNT(*) DESC LIMIT 10) AS uploader_ids.uploader_id
|
||||
join = select(field).where(date_column => (from..to)).where.not(field => nil).group(field).reorder(Arel.sql("COUNT(*) DESC")).limit(group_limit)
|
||||
|
||||
# SELECT dates.date, uploader_ids.uploader_id
|
||||
# FROM (SELECT date_trunc('day', dates) AS date FROM generate_series('2022-01-01', '2022-02-15', '1 day'::interval) AS dates) AS dates
|
||||
# CROSS JOIN (SELECT uploader_id FROM posts WHERE created_at BETWEEN from AND to GROUP BY uploader_ids ORDER BY COUNT(*) DESC LIMIT 10) AS uploader_ids.uploader_id
|
||||
query = query.select("#{connection.quote_table_name(field.to_s.pluralize)}.#{connection.quote_column_name(field)}")
|
||||
query = query.joins("CROSS JOIN (#{join.to_sql}) AS #{connection.quote_column_name(field.to_s.pluralize)}")
|
||||
end
|
||||
|
||||
# on_clause = "subquery.date = dates.date AND subquery.uploader_id = uploader_ids.uploader_id"
|
||||
on_clause = ["date", *group_fields].map { |group| "subquery.#{connection.quote_column_name(group)} = #{connection.quote_table_name(group.to_s.pluralize)}.#{connection.quote_column_name(group)}" }.join(" AND ")
|
||||
query = query.joins("LEFT OUTER JOIN (#{subquery.to_sql}) AS subquery ON #{on_clause}")
|
||||
query = query.reorder("date DESC")
|
||||
|
||||
columns.each do |name, sql|
|
||||
# SELECT COALESCE(subquery.count, 0) AS count
|
||||
# SELECT dates.date, uploader_ids.uploader_id, COALESCE(subquery.count, 0) AS count FROM ...
|
||||
query = query.select(coalesce(Arel.sql("subquery.#{connection.quote_column_name(name)}"), 0).as(name.to_s))
|
||||
end
|
||||
|
||||
query.select_all
|
||||
# query =
|
||||
# SELECT
|
||||
# dates.date,
|
||||
# uploader_ids.uploader_id,
|
||||
# COALESCE(subquery.count, 0) AS count
|
||||
# FROM (
|
||||
# SELECT date_trunc('day', dates) AS date FROM generate_series(from, to, '1 day'::interval) AS dates
|
||||
# ) AS dates
|
||||
# CROSS JOIN (
|
||||
# SELECT uploader_id FROM posts WHERE created_at BETWEEN from AND to AND uploader_id IS NOT NULL GROUP BY uploader_id ORDER BY COUNT(*) DESC LIMIT 10
|
||||
# ) AS uploader_ids.uploader_id
|
||||
# LEFT OUTER JOIN (
|
||||
# SELECT
|
||||
# date_trunc('day', posts.created_at) AS date,
|
||||
# uploader_id,
|
||||
# COUNT(*) AS count
|
||||
# FROM posts
|
||||
# WHERE created_at BETWEEN from AND to
|
||||
# GROUP BY date, uploader_id
|
||||
# ) subquery ON subquery.date = dates.date AND subquery.uploader_id = uploader_ids.uploader_id
|
||||
# ORDER BY date DESC
|
||||
|
||||
results = query.select_all
|
||||
types = results.columns.map { |column| [column, :object] }.to_h
|
||||
|
||||
dataframe = Danbooru::DataFrame.new(results.to_a, types: types)
|
||||
dataframe = dataframe.preload_associations(group_associations)
|
||||
dataframe
|
||||
end
|
||||
|
||||
def group_by_period(period = "day", column = :created_at)
|
||||
select(date_trunc(period, column).as("date")).group("date").order(Arel.sql("date DESC"))
|
||||
select(date_trunc(period, column).as("date")).group("date").reorder(Arel.sql("date DESC"))
|
||||
end
|
||||
|
||||
def select_all
|
||||
|
||||
47
app/logical/danbooru/data_frame.rb
Normal file
47
app/logical/danbooru/data_frame.rb
Normal file
@@ -0,0 +1,47 @@
|
||||
# A wrapper around Rover::DataFrame that adds some extra utility methods.
|
||||
#
|
||||
# @see https://github.com/ankane/rover
|
||||
module Danbooru
|
||||
class DataFrame
|
||||
attr_reader :df
|
||||
delegate :head, :shape, :types, :rename, :each_row, :[], :[]=, to: :df
|
||||
|
||||
def initialize(...)
|
||||
@df = Rover::DataFrame.new(...)
|
||||
end
|
||||
|
||||
# Replace ID columns with the actual object. For example, replace the `user_id` column with a `user` column containing User objects.
|
||||
def preload_associations(associations)
|
||||
associations.reduce(dup) do |table, association|
|
||||
primary_key = association.association_primary_key
|
||||
foreign_key = association.foreign_key
|
||||
name = association.name.to_s
|
||||
|
||||
ids = table[foreign_key].to_a.uniq.compact_blank
|
||||
records = association.klass.where(primary_key => ids).index_by(&primary_key.to_sym)
|
||||
|
||||
table.rename({ foreign_key => name })
|
||||
table[name] = table[name].map { |id| records[id] }
|
||||
table
|
||||
end
|
||||
end
|
||||
|
||||
def crosstab(index, pivot)
|
||||
new_df = DataFrame.new(index => df[index].uniq)
|
||||
|
||||
df[pivot].uniq.to_a.each do |value|
|
||||
columns = df.types.keys.without(index, pivot)
|
||||
columns.each do |column|
|
||||
name = columns.one? ? value.to_s : "#{value}_#{column}"
|
||||
new_df[name] = df[df[pivot] == value][column]
|
||||
end
|
||||
end
|
||||
|
||||
new_df
|
||||
end
|
||||
|
||||
def as_json(*options)
|
||||
df.to_a
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -13,10 +13,11 @@
|
||||
<%= f.input :from, as: :date, html5: true, input_html: { value: params[:search][:from] || 1.month.ago.to_date } %>
|
||||
<%= f.input :to, as: :date, html5: true, input_html: { value: params[:search][:to] || Time.zone.now.to_date } %>
|
||||
<%= f.input :period, collection: %w[Day Week Month Year], selected: params[:search][:period] %>
|
||||
<%= f.input :group, label: "Group By", collection: @available_groups.map { |group| [group.titleize, group] }, include_blank: true, selected: params[:search][:group] if @available_groups.present? %>
|
||||
<%= f.input :mode, as: :hidden, input_html: { value: params[:search][:mode] } %>
|
||||
<%= f.submit "Search" %>
|
||||
<% end %>
|
||||
|
||||
<%= render TimeSeriesComponent.new(@results, @columns.keys, mode: @mode) %>
|
||||
<%= render TimeSeriesComponent.new(@dataframe, mode: @mode) %>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user