docs: add documentation for various classes in app/logical.

2021-06-23 05:09:55 -05:00
parent e5cfb7904c
commit ed302fdf4d
33 changed files with 518 additions and 25 deletions
--- a/app/logical/bigquery_export_service.rb
+++ b/app/logical/bigquery_export_service.rb
@@ -1,22 +1,40 @@
-# Export all public data in a model to BigQuery and to Google Cloud Storage.
-
+# Perform a daily database dump to BigQuery and to Google Cloud Storage. This
+# contains all data visible to anonymous users.
+#
+# The database dumps are publicly accessible. The BigQuery data is at
+# `danbooru1.danbooru_public.{table}`. The Google Cloud Storage data is at
+# `gs://danbooru_public/data/{table}.json`. The storage bucket contains the data
+# in newline-delimited JSON format.
+#
+# @see DanbooruMaintenance#daily
+# @see https://console.cloud.google.com/storage/browser/danbooru_public
+# @see https://console.cloud.google.com/bigquery?d=danbooru_public&p=danbooru1&t=posts&page=table
+# @see https://cloud.google.com/bigquery/docs
+# @see https://cloud.google.com/storage/docs
+# @see https://en.wikipedia.org/wiki/JSON_streaming#Line-delimited_JSON
 class BigqueryExportService
  extend Memoist

  attr_reader :model, :dataset_name, :credentials

+  # Prepare to dump a table. Call {#export!} to dump it.
+  # @param model [ApplicationRecord] the database table to dump
+  # @param dataset_name [String] the BigQuery dataset name
+  # @param credentials [String] the Google Cloud credentials (in JSON format)
  def initialize(model = nil, dataset_name: "danbooru_public", credentials: default_credentials)
    @model = model
    @dataset_name = dataset_name
    @credentials = credentials
  end

+  # Start a background job for each table to export it to BigQuery.
  def self.async_export_all!(**options)
    models.each do |model|
      BigqueryExportJob.perform_later(model: model, **options)
    end
  end

+  # The list of database tables to dump.
  def self.models
    Rails.application.eager_load!

@@ -29,6 +47,7 @@ class BigqueryExportService
    credentials.present?
  end

+  # Dump the table to Cloud Storage and BigQuery.
  def export!
    return unless enabled? && records.any?

@@ -36,7 +55,7 @@ class BigqueryExportService
    upload_to_bigquery!(file)
  end

-  # Dump the model records to a gzipped, newline-delimited JSON tempfile.
+  # Dump the table's records to a gzipped, newline-delimited JSON tempfile.
  def dump_records!
    file = Tempfile.new("danbooru-export-dump-", binmode: true)
    file = Zlib::GzipWriter.new(file)
@@ -51,8 +70,7 @@ class BigqueryExportService
    file
  end

-  # GCS: gs://danbooru_public/data/{model}.json
-  # BQ: danbooru1.danbooru_public.{model}
+  # Upload the JSON dump to Cloud Storage, then load it into BigQuery.
  def upload_to_bigquery!(file)
    table_name = model.model_name.collection
    gsfilename = "data/#{table_name}.json"
@@ -64,24 +82,27 @@ class BigqueryExportService
    job
  end

-  # private
-
+  # The list of records to dump.
  def records
    model.visible(User.anonymous)
  end

+  # Find or create the BigQuery dataset.
  def dataset
    bigquery.dataset(dataset_name) || bigquery.create_dataset(dataset_name)
  end

+  # Find or create the Google Storage bucket.
  def bucket
    storage.bucket(dataset_name) || storage.create_bucket(dataset_name, acl: "public", default_acl: "public", storage_class: "standard", location: "us-east1")
  end

+  # The BigQuery API client.
  def bigquery
    Google::Cloud::Bigquery.new(credentials: credentials)
  end

+  # The Cloud Storage API client.
  def storage
    Google::Cloud::Storage.new(credentials: credentials)
  end