Files
danbooru/app/logical/concerns/aggregatable.rb
evazion 203067b5ed reports: add non-timeseries charts.
Add bar charts for non-timeseries data. For example, a bar chart of the
top 10 uploaders overall in the last month, rather than a timeseries
chart of the number of uploads per day for the last month.
2022-10-23 04:42:51 -05:00

132 lines
5.9 KiB
Ruby

# frozen_string_literal: true
module Aggregatable
extend ActiveSupport::Concern
def timeseries(period: "day", date_column: :created_at, from: first[date_column], to: Time.now.utc, groups: [], group_limit: 10, columns: { count: "COUNT(*)" })
raise ArgumentError, "invalid period: #{period}" if !period.in?(%w[second minute hour day week month quarter year])
raise ArgumentError if all.group_values.present?
from = from.to_date
to = to.to_date
group_fields = groups.map { |name| reflections[name.to_s]&.foreign_key || name }
# SELECT date_trunc('day', posts.created_at) AS date FROM posts WHERE created_at BETWEEN from AND to GROUP BY date
subquery = select(date_trunc(period, date_column).as("date")).where(date_column => (from..to)).group("date").reorder(nil)
group_fields.each do |name|
# SELECT date_trunc('day', posts.created_at) AS date, uploader_id FROM posts WHERE created_at BETWEEN from AND to GROUP BY date, uploader_id
subquery = subquery.select(name).group(name)
end
columns.each do |name, sql|
# SELECT date_trunc('day', posts.created_at) AS date, uploader_id, COUNT(*) AS count FROM posts WHERE created_at BETWEEN from AND to GROUP BY date, uploader_id
subquery = subquery.select(Arel.sql(sql).as(name.to_s).to_sql)
end
# SELECT date_trunc('day', dates) AS date FROM generate_series(from, to, '1 day'::interval) AS dates
dates = "SELECT #{date_trunc(period, Arel.sql("dates")).to_sql} AS date FROM #{generate_timeseries(from, to, period).to_sql} AS dates"
# SELECT dates.date FROM (SELECT date_trunc('day', dates) AS date FROM generate_series(from, to, '1 day'::interval) AS dates) AS dates
query = unscoped.select("dates.date").from("(#{dates}) AS dates")
group_fields.each do |field|
# CROSS JOIN (SELECT uploader_id FROM posts WHERE created_at BETWEEN from AND to AND uploader_id IS NOT NULL GROUP BY uploader_id ORDER BY COUNT(*) DESC LIMIT 10) AS uploader_ids.uploader_id
join = select(field).where(date_column => (from..to)).where.not(field => nil).group(field).reorder(Arel.sql("COUNT(*) DESC")).limit(group_limit)
# SELECT dates.date, uploader_ids.uploader_id
# FROM (SELECT date_trunc('day', dates) AS date FROM generate_series('2022-01-01', '2022-02-15', '1 day'::interval) AS dates) AS dates
# CROSS JOIN (SELECT uploader_id FROM posts WHERE created_at BETWEEN from AND to GROUP BY uploader_ids ORDER BY COUNT(*) DESC LIMIT 10) AS uploader_ids.uploader_id
query = query.select("#{connection.quote_table_name(field.to_s.pluralize)}.#{connection.quote_column_name(field)}")
query = query.joins("CROSS JOIN (#{join.to_sql}) AS #{connection.quote_column_name(field.to_s.pluralize)}")
end
# on_clause = "subquery.date = dates.date AND subquery.uploader_id = uploader_ids.uploader_id"
on_clause = ["date", *group_fields].map { |group| "subquery.#{connection.quote_column_name(group)} = #{connection.quote_table_name(group.to_s.pluralize)}.#{connection.quote_column_name(group)}" }.join(" AND ")
query = query.joins("LEFT OUTER JOIN (#{subquery.to_sql}) AS subquery ON #{on_clause}")
query = query.reorder("date DESC")
columns.each do |name, sql|
# SELECT dates.date, uploader_ids.uploader_id, COALESCE(subquery.count, 0) AS count FROM ...
query = query.select(coalesce(Arel.sql("subquery.#{connection.quote_column_name(name)}"), 0).as(name.to_s))
end
# query =
# SELECT
# dates.date,
# uploader_ids.uploader_id,
# COALESCE(subquery.count, 0) AS count
# FROM (
# SELECT date_trunc('day', dates) AS date FROM generate_series(from, to, '1 day'::interval) AS dates
# ) AS dates
# CROSS JOIN (
# SELECT uploader_id FROM posts WHERE created_at BETWEEN from AND to AND uploader_id IS NOT NULL GROUP BY uploader_id ORDER BY COUNT(*) DESC LIMIT 10
# ) AS uploader_ids.uploader_id
# LEFT OUTER JOIN (
# SELECT
# date_trunc('day', posts.created_at) AS date,
# uploader_id,
# COUNT(*) AS count
# FROM posts
# WHERE created_at BETWEEN from AND to
# GROUP BY date, uploader_id
# ) subquery ON subquery.date = dates.date AND subquery.uploader_id = uploader_ids.uploader_id
# ORDER BY date DESC
build_dataframe(query, groups)
end
def aggregate(date_column: :created_at, from: first[date_column], to: Time.now.utc, groups: [], limit: 50, columns: { count: "COUNT(*)" }, order: Arel.sql("#{columns.first.second} DESC"))
group_fields = groups.map { |name| reflections[name.to_s]&.foreign_key || name }
query = where(date_column => (from..to)).reorder(order).limit(limit)
group_fields.each do |name|
query = query.select(name).group(name).where.not(name => nil)
end
columns.each do |name, sql|
query = query.select(Arel.sql(sql).as(name.to_s).to_sql)
end
build_dataframe(query, groups)
end
def group_by_period(period = "day", column = :created_at)
select(date_trunc(period, column).as("date")).group("date").reorder(Arel.sql("date DESC"))
end
def select_all
connection.select_all(all)
end
def date_trunc(field, column)
sql_function(:date_trunc, field, column)
end
def coalesce(column, value)
sql_function(:coalesce, column, value)
end
def generate_series(from, to, interval)
sql_function(:generate_series, from, to, interval)
end
def generate_timeseries(from, to, interval)
generate_series(from, to, Arel.sql("#{connection.quote("1 #{interval}")}::interval"))
end
private
def build_dataframe(query, groups)
results = query.select_all
types = results.columns.map { |column| [column, :object] }.to_h
associations = groups.map { |name| reflections[name.to_s] }.compact_blank
dataframe = Danbooru::DataFrame.new(results.to_a, types: types)
dataframe = dataframe.preload_associations(associations)
dataframe
end
end