Add bar charts for non-timeseries data. For example, a bar chart of the top 10 uploaders overall in the last month, rather than a timeseries chart of the number of uploads per day for the last month.
132 lines
5.9 KiB
Ruby
132 lines
5.9 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
module Aggregatable
|
|
extend ActiveSupport::Concern
|
|
|
|
def timeseries(period: "day", date_column: :created_at, from: first[date_column], to: Time.now.utc, groups: [], group_limit: 10, columns: { count: "COUNT(*)" })
|
|
raise ArgumentError, "invalid period: #{period}" if !period.in?(%w[second minute hour day week month quarter year])
|
|
raise ArgumentError if all.group_values.present?
|
|
|
|
from = from.to_date
|
|
to = to.to_date
|
|
|
|
group_fields = groups.map { |name| reflections[name.to_s]&.foreign_key || name }
|
|
|
|
# SELECT date_trunc('day', posts.created_at) AS date FROM posts WHERE created_at BETWEEN from AND to GROUP BY date
|
|
subquery = select(date_trunc(period, date_column).as("date")).where(date_column => (from..to)).group("date").reorder(nil)
|
|
|
|
group_fields.each do |name|
|
|
# SELECT date_trunc('day', posts.created_at) AS date, uploader_id FROM posts WHERE created_at BETWEEN from AND to GROUP BY date, uploader_id
|
|
subquery = subquery.select(name).group(name)
|
|
end
|
|
|
|
columns.each do |name, sql|
|
|
# SELECT date_trunc('day', posts.created_at) AS date, uploader_id, COUNT(*) AS count FROM posts WHERE created_at BETWEEN from AND to GROUP BY date, uploader_id
|
|
subquery = subquery.select(Arel.sql(sql).as(name.to_s).to_sql)
|
|
end
|
|
|
|
# SELECT date_trunc('day', dates) AS date FROM generate_series(from, to, '1 day'::interval) AS dates
|
|
dates = "SELECT #{date_trunc(period, Arel.sql("dates")).to_sql} AS date FROM #{generate_timeseries(from, to, period).to_sql} AS dates"
|
|
|
|
# SELECT dates.date FROM (SELECT date_trunc('day', dates) AS date FROM generate_series(from, to, '1 day'::interval) AS dates) AS dates
|
|
query = unscoped.select("dates.date").from("(#{dates}) AS dates")
|
|
|
|
group_fields.each do |field|
|
|
# CROSS JOIN (SELECT uploader_id FROM posts WHERE created_at BETWEEN from AND to AND uploader_id IS NOT NULL GROUP BY uploader_id ORDER BY COUNT(*) DESC LIMIT 10) AS uploader_ids.uploader_id
|
|
join = select(field).where(date_column => (from..to)).where.not(field => nil).group(field).reorder(Arel.sql("COUNT(*) DESC")).limit(group_limit)
|
|
|
|
# SELECT dates.date, uploader_ids.uploader_id
|
|
# FROM (SELECT date_trunc('day', dates) AS date FROM generate_series('2022-01-01', '2022-02-15', '1 day'::interval) AS dates) AS dates
|
|
# CROSS JOIN (SELECT uploader_id FROM posts WHERE created_at BETWEEN from AND to GROUP BY uploader_ids ORDER BY COUNT(*) DESC LIMIT 10) AS uploader_ids.uploader_id
|
|
query = query.select("#{connection.quote_table_name(field.to_s.pluralize)}.#{connection.quote_column_name(field)}")
|
|
query = query.joins("CROSS JOIN (#{join.to_sql}) AS #{connection.quote_column_name(field.to_s.pluralize)}")
|
|
end
|
|
|
|
# on_clause = "subquery.date = dates.date AND subquery.uploader_id = uploader_ids.uploader_id"
|
|
on_clause = ["date", *group_fields].map { |group| "subquery.#{connection.quote_column_name(group)} = #{connection.quote_table_name(group.to_s.pluralize)}.#{connection.quote_column_name(group)}" }.join(" AND ")
|
|
query = query.joins("LEFT OUTER JOIN (#{subquery.to_sql}) AS subquery ON #{on_clause}")
|
|
query = query.reorder("date DESC")
|
|
|
|
columns.each do |name, sql|
|
|
# SELECT dates.date, uploader_ids.uploader_id, COALESCE(subquery.count, 0) AS count FROM ...
|
|
query = query.select(coalesce(Arel.sql("subquery.#{connection.quote_column_name(name)}"), 0).as(name.to_s))
|
|
end
|
|
|
|
# query =
|
|
# SELECT
|
|
# dates.date,
|
|
# uploader_ids.uploader_id,
|
|
# COALESCE(subquery.count, 0) AS count
|
|
# FROM (
|
|
# SELECT date_trunc('day', dates) AS date FROM generate_series(from, to, '1 day'::interval) AS dates
|
|
# ) AS dates
|
|
# CROSS JOIN (
|
|
# SELECT uploader_id FROM posts WHERE created_at BETWEEN from AND to AND uploader_id IS NOT NULL GROUP BY uploader_id ORDER BY COUNT(*) DESC LIMIT 10
|
|
# ) AS uploader_ids.uploader_id
|
|
# LEFT OUTER JOIN (
|
|
# SELECT
|
|
# date_trunc('day', posts.created_at) AS date,
|
|
# uploader_id,
|
|
# COUNT(*) AS count
|
|
# FROM posts
|
|
# WHERE created_at BETWEEN from AND to
|
|
# GROUP BY date, uploader_id
|
|
# ) subquery ON subquery.date = dates.date AND subquery.uploader_id = uploader_ids.uploader_id
|
|
# ORDER BY date DESC
|
|
|
|
build_dataframe(query, groups)
|
|
end
|
|
|
|
def aggregate(date_column: :created_at, from: first[date_column], to: Time.now.utc, groups: [], limit: 50, columns: { count: "COUNT(*)" }, order: Arel.sql("#{columns.first.second} DESC"))
|
|
group_fields = groups.map { |name| reflections[name.to_s]&.foreign_key || name }
|
|
|
|
query = where(date_column => (from..to)).reorder(order).limit(limit)
|
|
|
|
group_fields.each do |name|
|
|
query = query.select(name).group(name).where.not(name => nil)
|
|
end
|
|
|
|
columns.each do |name, sql|
|
|
query = query.select(Arel.sql(sql).as(name.to_s).to_sql)
|
|
end
|
|
|
|
build_dataframe(query, groups)
|
|
end
|
|
|
|
def group_by_period(period = "day", column = :created_at)
|
|
select(date_trunc(period, column).as("date")).group("date").reorder(Arel.sql("date DESC"))
|
|
end
|
|
|
|
def select_all
|
|
connection.select_all(all)
|
|
end
|
|
|
|
def date_trunc(field, column)
|
|
sql_function(:date_trunc, field, column)
|
|
end
|
|
|
|
def coalesce(column, value)
|
|
sql_function(:coalesce, column, value)
|
|
end
|
|
|
|
def generate_series(from, to, interval)
|
|
sql_function(:generate_series, from, to, interval)
|
|
end
|
|
|
|
def generate_timeseries(from, to, interval)
|
|
generate_series(from, to, Arel.sql("#{connection.quote("1 #{interval}")}::interval"))
|
|
end
|
|
|
|
private
|
|
|
|
def build_dataframe(query, groups)
|
|
results = query.select_all
|
|
types = results.columns.map { |column| [column, :object] }.to_h
|
|
associations = groups.map { |name| reflections[name.to_s] }.compact_blank
|
|
|
|
dataframe = Danbooru::DataFrame.new(results.to_a, types: types)
|
|
dataframe = dataframe.preload_associations(associations)
|
|
dataframe
|
|
end
|
|
end
|