Files
danbooru/app/logical/concerns/aggregatable.rb
evazion f73d2e3956 reports: add ability to group reports by column.
Add ability to group reports by various columns. For example, you can see
the posts by the top 10 uploaders over time, or posts grouped by rating
over time.
2022-10-22 04:05:10 -05:00

110 lines
5.2 KiB
Ruby

# frozen_string_literal: true
module Aggregatable
extend ActiveSupport::Concern
def timeseries(period: "day", date_column: :created_at, from: first[date_column], to: Time.now.utc, groups: [], group_limit: 10, columns: { count: "COUNT(*)" })
raise ArgumentError, "invalid period: #{period}" if !period.in?(%w[second minute hour day week month quarter year])
raise ArgumentError if all.group_values.present?
from = from.to_date
to = to.to_date
group_associations = groups.map { |name| reflections[name.to_s] }.compact_blank
group_fields = groups.map { |name| reflections[name.to_s]&.foreign_key || name }
# SELECT date_trunc('day', posts.created_at) AS date FROM posts WHERE created_at BETWEEN from AND to GROUP BY date
subquery = select(date_trunc(period, date_column).as("date")).where(date_column => (from..to)).group("date").reorder(nil)
group_fields.each do |name|
# SELECT date_trunc('day', posts.created_at) AS date, uploader_id FROM posts WHERE created_at BETWEEN from AND to GROUP BY date, uploader_id
subquery = subquery.select(name).group(name)
end
columns.each do |name, sql|
# SELECT date_trunc('day', posts.created_at) AS date, uploader_id, COUNT(*) AS count FROM posts WHERE created_at BETWEEN from AND to GROUP BY date, uploader_id
subquery = subquery.select(Arel.sql(sql).as(name.to_s).to_sql)
end
# SELECT date_trunc('day', dates) AS date FROM generate_series(from, to, '1 day'::interval) AS dates
dates = "SELECT #{date_trunc(period, Arel.sql("dates")).to_sql} AS date FROM #{generate_timeseries(from, to, period).to_sql} AS dates"
# SELECT dates.date FROM (SELECT date_trunc('day', dates) AS date FROM generate_series(from, to, '1 day'::interval) AS dates) AS dates
query = unscoped.select("dates.date").from("(#{dates}) AS dates")
group_fields.each do |field|
# CROSS JOIN (SELECT uploader_id FROM posts WHERE created_at BETWEEN from AND to AND uploader_id IS NOT NULL GROUP BY uploader_id ORDER BY COUNT(*) DESC LIMIT 10) AS uploader_ids.uploader_id
join = select(field).where(date_column => (from..to)).where.not(field => nil).group(field).reorder(Arel.sql("COUNT(*) DESC")).limit(group_limit)
# SELECT dates.date, uploader_ids.uploader_id
# FROM (SELECT date_trunc('day', dates) AS date FROM generate_series('2022-01-01', '2022-02-15', '1 day'::interval) AS dates) AS dates
# CROSS JOIN (SELECT uploader_id FROM posts WHERE created_at BETWEEN from AND to GROUP BY uploader_ids ORDER BY COUNT(*) DESC LIMIT 10) AS uploader_ids.uploader_id
query = query.select("#{connection.quote_table_name(field.to_s.pluralize)}.#{connection.quote_column_name(field)}")
query = query.joins("CROSS JOIN (#{join.to_sql}) AS #{connection.quote_column_name(field.to_s.pluralize)}")
end
# on_clause = "subquery.date = dates.date AND subquery.uploader_id = uploader_ids.uploader_id"
on_clause = ["date", *group_fields].map { |group| "subquery.#{connection.quote_column_name(group)} = #{connection.quote_table_name(group.to_s.pluralize)}.#{connection.quote_column_name(group)}" }.join(" AND ")
query = query.joins("LEFT OUTER JOIN (#{subquery.to_sql}) AS subquery ON #{on_clause}")
query = query.reorder("date DESC")
columns.each do |name, sql|
# SELECT dates.date, uploader_ids.uploader_id, COALESCE(subquery.count, 0) AS count FROM ...
query = query.select(coalesce(Arel.sql("subquery.#{connection.quote_column_name(name)}"), 0).as(name.to_s))
end
# query =
# SELECT
# dates.date,
# uploader_ids.uploader_id,
# COALESCE(subquery.count, 0) AS count
# FROM (
# SELECT date_trunc('day', dates) AS date FROM generate_series(from, to, '1 day'::interval) AS dates
# ) AS dates
# CROSS JOIN (
# SELECT uploader_id FROM posts WHERE created_at BETWEEN from AND to AND uploader_id IS NOT NULL GROUP BY uploader_id ORDER BY COUNT(*) DESC LIMIT 10
# ) AS uploader_ids.uploader_id
# LEFT OUTER JOIN (
# SELECT
# date_trunc('day', posts.created_at) AS date,
# uploader_id,
# COUNT(*) AS count
# FROM posts
# WHERE created_at BETWEEN from AND to
# GROUP BY date, uploader_id
# ) subquery ON subquery.date = dates.date AND subquery.uploader_id = uploader_ids.uploader_id
# ORDER BY date DESC
results = query.select_all
types = results.columns.map { |column| [column, :object] }.to_h
dataframe = Danbooru::DataFrame.new(results.to_a, types: types)
dataframe = dataframe.preload_associations(group_associations)
dataframe
end
def group_by_period(period = "day", column = :created_at)
select(date_trunc(period, column).as("date")).group("date").reorder(Arel.sql("date DESC"))
end
def select_all
connection.select_all(all)
end
def date_trunc(field, column)
sql_function(:date_trunc, field, column)
end
def coalesce(column, value)
sql_function(:coalesce, column, value)
end
def generate_series(from, to, interval)
sql_function(:generate_series, from, to, interval)
end
def generate_timeseries(from, to, interval)
generate_series(from, to, Arel.sql("#{connection.quote("1 #{interval}")}::interval"))
end
end