Export public database dumps to BigQuery.
* Export daily public database dumps to BigQuery and Google Cloud Storage. * Only data visible to anonymous users is exported. Some tables have null or missing fields because of this. * The bans table is excluded because some bans have an expires_at timestamp set beyond year 9999, which BigQuery doesn't support. * The favorites table is excluded because it's too slow to dump (it doesn't have an id index, which is needed by find_each). * Version tables are excluded because dumping them every day is inefficient, streaming insertions should be used instead. Links: * https://console.cloud.google.com/bigquery?project=danbooru1 * https://console.cloud.google.com/storage/browser/danbooru_public * https://storage.googleapis.com/danbooru_public/data/posts.json
This commit is contained in:
63
Gemfile.lock
63
Gemfile.lock
@@ -161,12 +161,16 @@ GEM
|
||||
daemons (1.3.1)
|
||||
dante (0.2.0)
|
||||
debug_inspector (1.0.0)
|
||||
declarative (0.0.20)
|
||||
declarative-option (0.1.0)
|
||||
delayed_job (4.1.9)
|
||||
activesupport (>= 3.0, < 6.2)
|
||||
delayed_job_active_record (4.1.5)
|
||||
activerecord (>= 3.0, < 6.2)
|
||||
delayed_job (>= 3.0, < 5)
|
||||
diff-lcs (1.4.4)
|
||||
digest-crc (0.6.3)
|
||||
rake (>= 12.0.0, < 14.0.0)
|
||||
docile (1.3.5)
|
||||
domain_name (0.5.20190701)
|
||||
unf (>= 0.0.5, < 1.0.0)
|
||||
@@ -192,6 +196,49 @@ GEM
|
||||
ffi (~> 1.0)
|
||||
globalid (0.4.2)
|
||||
activesupport (>= 4.2.0)
|
||||
google-apis-bigquery_v2 (0.6.0)
|
||||
google-apis-core (~> 0.1)
|
||||
google-apis-core (0.3.0)
|
||||
addressable (~> 2.5, >= 2.5.1)
|
||||
googleauth (~> 0.14)
|
||||
httpclient (>= 2.8.1, < 3.0)
|
||||
mini_mime (~> 1.0)
|
||||
representable (~> 3.0)
|
||||
retriable (>= 2.0, < 4.0)
|
||||
rexml
|
||||
signet (~> 0.14)
|
||||
webrick
|
||||
google-apis-iamcredentials_v1 (0.2.0)
|
||||
google-apis-core (~> 0.1)
|
||||
google-apis-storage_v1 (0.3.0)
|
||||
google-apis-core (~> 0.1)
|
||||
google-cloud-bigquery (1.28.0)
|
||||
concurrent-ruby (~> 1.0)
|
||||
google-apis-bigquery_v2 (~> 0.1)
|
||||
google-cloud-core (~> 1.2)
|
||||
googleauth (~> 0.9)
|
||||
mini_mime (~> 1.0)
|
||||
google-cloud-core (1.5.0)
|
||||
google-cloud-env (~> 1.0)
|
||||
google-cloud-errors (~> 1.0)
|
||||
google-cloud-env (1.5.0)
|
||||
faraday (>= 0.17.3, < 2.0)
|
||||
google-cloud-errors (1.0.1)
|
||||
google-cloud-storage (1.30.0)
|
||||
addressable (~> 2.5)
|
||||
digest-crc (~> 0.4)
|
||||
google-apis-iamcredentials_v1 (~> 0.1)
|
||||
google-apis-storage_v1 (~> 0.1)
|
||||
google-cloud-core (~> 1.2)
|
||||
googleauth (~> 0.9)
|
||||
mini_mime (~> 1.0)
|
||||
googleauth (0.16.0)
|
||||
faraday (>= 0.17.3, < 2.0)
|
||||
jwt (>= 1.4, < 3.0)
|
||||
memoist (~> 0.16)
|
||||
multi_json (~> 1.11)
|
||||
os (>= 0.9, < 2.0)
|
||||
signet (~> 0.14)
|
||||
hsluv (1.0.1)
|
||||
http (4.4.1)
|
||||
addressable (~> 2.3)
|
||||
@@ -201,6 +248,7 @@ GEM
|
||||
http-form_data (2.3.0)
|
||||
http-parser (1.2.3)
|
||||
ffi-compiler (>= 1.0, < 2.0)
|
||||
httpclient (2.8.3)
|
||||
i18n (1.8.9)
|
||||
concurrent-ruby (~> 1.0)
|
||||
ipaddress_2 (0.13.0)
|
||||
@@ -257,6 +305,7 @@ GEM
|
||||
multi_json (~> 1.3)
|
||||
multi_xml (~> 0.5)
|
||||
rack (>= 1.2, < 3)
|
||||
os (1.1.1)
|
||||
parallel (1.20.1)
|
||||
parser (3.0.0.0)
|
||||
ast (~> 2.4.1)
|
||||
@@ -321,9 +370,14 @@ GEM
|
||||
json
|
||||
redis (4.2.5)
|
||||
regexp_parser (2.1.1)
|
||||
representable (3.0.4)
|
||||
declarative (< 0.1.0)
|
||||
declarative-option (< 0.2.0)
|
||||
uber (< 0.2.0)
|
||||
responders (3.0.1)
|
||||
actionpack (>= 5.0)
|
||||
railties (>= 5.0)
|
||||
retriable (3.1.2)
|
||||
rexml (3.2.4)
|
||||
rubocop (1.11.0)
|
||||
parallel (~> 1.10)
|
||||
@@ -359,6 +413,11 @@ GEM
|
||||
shoulda-context (2.0.0)
|
||||
shoulda-matchers (4.5.1)
|
||||
activesupport (>= 4.2.0)
|
||||
signet (0.15.0)
|
||||
addressable (~> 2.3)
|
||||
faraday (>= 0.17.3, < 2.0)
|
||||
jwt (>= 1.5, < 3.0)
|
||||
multi_json (~> 1.10)
|
||||
simple_form (5.1.0)
|
||||
actionpack (>= 5.2)
|
||||
activemodel (>= 5.2)
|
||||
@@ -391,6 +450,7 @@ GEM
|
||||
concurrent-ruby (~> 1.0)
|
||||
tzinfo-data (1.2021.1)
|
||||
tzinfo (>= 1.0.0)
|
||||
uber (0.1.0)
|
||||
unf (0.1.4)
|
||||
unf_ext
|
||||
unf_ext (0.0.7.7)
|
||||
@@ -408,6 +468,7 @@ GEM
|
||||
rack-proxy (>= 0.6.1)
|
||||
railties (>= 5.2)
|
||||
semantic_range (>= 2.3.0)
|
||||
webrick (1.7.0)
|
||||
websocket-driver (0.7.3)
|
||||
websocket-extensions (>= 0.1.0)
|
||||
websocket-extensions (0.1.5)
|
||||
@@ -447,6 +508,8 @@ DEPENDENCIES
|
||||
factory_bot
|
||||
ffaker
|
||||
flamegraph
|
||||
google-cloud-bigquery
|
||||
google-cloud-storage
|
||||
hsluv
|
||||
http
|
||||
http-cookie!
|
||||
|
||||
Reference in New Issue
Block a user