google-cloud-bigquery
Advanced tools
Comparing version
@@ -5,2 +5,12 @@ # Change Log | ||
<a name="0.2.10"></a> | ||
## [0.2.10](https://github.com/nicolasdao/google-cloud-bigquery/compare/v0.2.9...v0.2.10) (2018-12-10) | ||
### Features | ||
* Add support for safeMode when inserting huge amount of rows ([579c959](https://github.com/nicolasdao/google-cloud-bigquery/commit/579c959)) | ||
<a name="0.2.9"></a> | ||
@@ -7,0 +17,0 @@ ## [0.2.9](https://github.com/nicolasdao/google-cloud-bigquery/compare/v0.2.8...v0.2.9) (2018-12-09) |
24
index.js
@@ -12,3 +12,3 @@ /** | ||
const { fitToSchema, fieldsToSchema } = require('./src/format') | ||
const { obj, promise: { retry } } = require('./utils') | ||
const { obj, promise: { retry }, collection } = require('./utils') | ||
@@ -47,7 +47,15 @@ const _getToken = auth => new Promise((onSuccess, onFailure) => auth.getToken((err, token) => err ? onFailure(err) : onSuccess(token))) | ||
const _retryInsert = (...args) => retry( | ||
() => bigQuery.table.insert(...args), | ||
() => true, | ||
{ ignoreFailure: true, retryInterval: [200, 800], retryAttempts: 10 } | ||
) | ||
const _retryInsert = (projectId, db, table, data, token, options={}) => { | ||
if (options.safeMode && data && data.length > 500) { | ||
return collection.batch(data, 500) | ||
.reduce((job, dataBatch) => | ||
job.then(() => _retryInsert(projectId, db, table, dataBatch, token, obj.merge(options, { safeMode: false }))), | ||
Promise.resolve(null)) | ||
} else | ||
return retry( | ||
() => bigQuery.table.insert(projectId, db, table, data, token, options), | ||
() => true, | ||
{ ignoreFailure: true, retryInterval: [200, 800], retryAttempts: 10 } | ||
) | ||
} | ||
@@ -78,7 +86,7 @@ return { | ||
fromStorage: ({ sources=[] }) => __getToken().then(token => bigQuery.table.loadData(projectId, db, table, sources, token)), | ||
values: ({ data, templateSuffix, skipInvalidRows=false, forcedSchema, insert }) => __getToken().then(token => { | ||
values: ({ data, templateSuffix, skipInvalidRows=false, forcedSchema, insert, safeMode=false }) => __getToken().then(token => { | ||
const d = Array.isArray(data) ? data : [data] | ||
const dd = forcedSchema ? d.map(x => fitToSchema(x,forcedSchema)) : d | ||
const _insert = insert || _retryInsert | ||
return _insert(projectId, db, table, dd, token, { templateSuffix, skipInvalidRows }).then(res => { | ||
return _insert(projectId, db, table, dd, token, { templateSuffix, skipInvalidRows, safeMode }).then(res => { | ||
res = res || {} | ||
@@ -85,0 +93,0 @@ res.payload = dd |
{ | ||
"name": "google-cloud-bigquery", | ||
"version": "0.2.9", | ||
"version": "0.2.10", | ||
"description": "Node.js package to create BigQuery table from Google Cloud Storage or load data into Google Cloud BigQuery tables including automatically updating the tables' schema.", | ||
@@ -5,0 +5,0 @@ "main": "index.js", |
@@ -26,3 +26,12 @@ # Google Cloud BigQuery · [](https://www.npmjs.com/package/google-cloud-bigquery) [](https://travis-ci.org/nicolasdao/google-cloud-bigquery) [](https://opensource.org/licenses/BSD-3-Clause) [](#this-is-what-we-re-up-to) | ||
2. Have a both a BigQuery DB and a Bucket in the same region (the bucket is only in case you wish to maintain BigQuery schema using data stored a Google Cloud Storage). | ||
2. Have a both a BigQuery DB and a Bucket in the same region (the bucket is only in case you wish to maintain BigQuery schema using data stored a Google Cloud Storage). As of December 2018, BigQuery is only supported in the following locations: | ||
- asia-northeast1 (Tokyo) | ||
- asia-east1 (Taiwan) | ||
- asia-southeast1 (Singapore) | ||
- australia-southeast1 (Sydney) | ||
- europe-north1 (Finland) | ||
- europe-west2 (London) | ||
- us-east4 (Northern Virginia) | ||
- eu (Multi regions in the EU) | ||
- us (Multi regions in the US) | ||
@@ -106,2 +115,16 @@ 3. Have a Service Account set up with the following 2 roles: | ||
#### IMPORTANT NOTE ABOUT QUOTAS AND LIMITS | ||
Notice that the `data` input accept both single objects or array of objects. Though BigQuery can ingest up to 10,000 rows per request and 100,000 rows per seconds, it is recommended to keep the maximum amount of rows per request to 500. You can read more about the quotas and limits at [https://cloud.google.com/bigquery/quotas#streaming_inserts](https://cloud.google.com/bigquery/quotas#streaming_inserts). | ||
To prevent inserting more than 500 rows per request, you can either code it yourself, or rely on our own implementation using the `safeMode` flag as follow: | ||
```js | ||
userTbl.insert.values({ data: lotsOfUsers, safeMode: true }) | ||
.then(() => console.log(`All users inserted`)) | ||
``` | ||
This `safeMode` flag will check that there is less than 500 items in the _lotsOfUsers_ array. If there are more than 500 items, the array is broken down in batches of 500 items which are then inserted sequentially. That means that if you're inserting 5000 users, there will be 10 sequential request of 500 users. | ||
### Getting Data | ||
@@ -108,0 +131,0 @@ |
AI-detected possible typosquat
Supply chain riskAI has identified this package as a potential typosquat of a more popular package. This suggests that the package may be intentionally mimicking another package's name, description, or other metadata.
Found 1 instance in 1 package
176046
1.16%4557
0.18%326
7.59%19
-5%13
8.33%