canvas-data-cli - npm Package Compare versions

.dockerignore

build.sh

Dockerfile

lib/Fetch.js

test/FetchTest.js

test/fixtures/mockApiObjects.js

test/fixtures/mockLogger.js

test/SyncTest.js

4

index.js

		module.exports = {
		Api: require('./lib/Api'),
		Sync: require('./lib/Sync'),
		cli: require('./lib/cli')
		cli: require('./lib/cli'),
		Fetch: require('./lib/Fetch'),
		Unpack: require('./lib/Unpack')
		}

16

lib/cli.js

		@@ -10,2 +10,3 @@ 'use strict';
		var Unpack = require('./Unpack');
		var Fetch = require('./Fetch');

		@@ -37,2 +38,14 @@ var cli = yargs.usage('npm <command>').demand(1, 'must provide a valid command').option('level', {
		}).help('help');
		}).command('fetch', 'fetch a single table', function (yargs) {
		yargs.options('config', {
		alias: 'c',
		demand: true,
		describe: 'the configuration file to use',
		type: 'string'
		}).option('table', {
		alias: 't',
		describe: 'the table to fetch',
		demand: true,
		type: 'string'
		});
		}).help('help').alias('v', 'version').version(function () {
		@@ -45,3 +58,4 @@ return require('../package').version;
		sampleConfig: { 'class': Config },
		unpack: { requireConfig: true, 'class': Unpack }
		unpack: { requireConfig: true, 'class': Unpack },
		fetch: { requireConfig: true, 'class': Fetch }
		};
		@@ -48,0 +62,0 @@ module.exports = {

58

lib/FileDownloader.js

		@@ -7,10 +7,16 @@ 'use strict';

		var MAX_ATTEMPTS = 5;
		var fs = require('fs');
		var request = require('request');
		var pump = require('pump');
		var Re = require('re');
		var reOpts = {
		retries: 5,
		strategy: {
		"type": Re.STRATEGIES.EXPONENTIAL,
		"initial": 100,
		"base": 2
		}
		};
		var re = new Re(reOpts);

		// ghetto backoff solution
		var backoff = [100, 200, 500, 1500, 3000];

		var FileDownloader = (function () {
		@@ -26,29 +32,25 @@ function FileDownloader(logger) {
		value: function downloadToFile(downloadLink, artifact, dest, cb) {
		this._downloadRetry(downloadLink, artifact, dest, 0, cb);
		}
		}, {
		key: '_downloadRetry',
		value: function _downloadRetry(downloadLink, artifact, dest, attempt, cb) {
		var _this = this;

		if (attempt > MAX_ATTEMPTS) return cb(new Error('max number of retries reached for ' + fileUrl + ', aborting'));
		this.logger.debug('downlading ' + downloadLink.filename + ' for artifact ' + artifact.tableName + ' from dump ' + artifact.sequence + ', attempt ' + attempt);
		var r = request({ method: 'GET', url: downloadLink.url });
		var badStatusCode = false;
		r.on('response', function (resp) {
		if (resp.statusCode !== 200) {
		_this.logger.debug('got non 200 status code (actual ' + resp.statusCode + ') from ' + downloadLink.url);
		badStatusCode = true;
		}
		re['try'](function (retryCount, done) {
		_this.logger.debug('downloading ' + downloadLink.filename + ' for artifact ' + artifact.tableName + ' from dump ' + artifact.sequence + ', attempt ' + (retryCount + 1));
		var r = request({ method: 'GET', url: downloadLink.url });
		var badStatusCode = false;
		r.on('response', function (resp) {
		if (resp.statusCode !== 200) {
		_this.logger.debug('got non 200 status code (actual ' + resp.statusCode + ') from ' + downloadLink.url);
		badStatusCode = true;
		}
		});
		pump(r, fs.createWriteStream(dest), function (err) {
		if (err \|\| badStatusCode) {
		_this.logger.debug('failed attempt ' + (retryCount + 1) + ' for ' + downloadLink.filename + ', err: ' + (err \|\| badStatusCode));
		return done(new Error("Failed Attempt."), retryCount);
		}
		_this.logger.debug('finished downlading ' + downloadLink.filename + ' for artifact ' + artifact.tableName + ' from dump ' + artifact.sequence);
		done(null, retryCount);
		});
		}, function (err, retryCount) {
		cb(err ? new Error('max number of retries reached for ' + downloadLink.filename + ', aborting') : null);
		});
		pump(r, fs.createWriteStream(dest), function (err) {
		if (err \|\| badStatusCode) {
		_this.logger.debug('failed attempt ' + attempt + ' for ' + downloadLink.filename + ', err: ' + err);
		return setTimeout(function () {
		return _this._downloadRetry(downloadLink, artifact, dest, attempt + 1, cb);
		}, backoff[attempt]);
		}
		_this.logger.debug('finished downlading ' + downloadLink.filename + ' for artifact ' + artifact.tableName + ' from dump ' + artifact.sequence);
		cb();
		});
		}
		@@ -55,0 +57,0 @@ }]);

97

lib/Sync.js

		@@ -34,2 +34,7 @@ 'use strict';
		_createClass(Sync, [{
		key: 'getNewCollector',
		value: function getNewCollector() {
		return { partialTables: {}, groups: {}, artifactCount: 0 };
		}
		}, {
		key: 'run',
		@@ -44,27 +49,21 @@ value: function run(cb) {
		_this.logger.info('starting from sequence ' + lastSequence);
		_this.getLatestDumps(lastSequence, function (err, dumps) {
		_this.getToDownload(lastSequence, function (err, res) {
		if (err) return cb(err);
		if (!dumps.length) {
		var toDownload = res.toDownload;
		var artifactCount = res.artifactCount;
		var schemaVersion = res.schemaVersion;
		var newestSequence = res.newestSequence;

		if (toDownload.length === 0) {
		_this.logger.info('no new dumps to process');
		return cb();
		}
		_this.logger.info('will process ' + dumps.length + ' dumps');
		dumps = dumps.map(function (dump, index) {
		return { dump: dump, index: index };
		});
		dumps[0].latestDump = true;
		var latestDump = dumps[0].dump;
		var newestSequence = dumps[0].dump.sequence;
		async.reduce(dumps, { partialTables: {}, groups: {}, artifactCount: 0 }, _this.processDump.bind(_this), function (err, results) {
		_this.logger.info('downloading ' + artifactCount + ' artifacts');
		async.eachLimit(toDownload, CONCURRENCY_LIMIT, _this.downloadArtifactGroup.bind(_this), function (err) {
		if (err) return cb(err);
		var toDownload = _.values(results.groups);
		_this.logger.info('downloading ' + results.artifactCount + ' artifacts');
		async.eachLimit(toDownload, CONCURRENCY_LIMIT, _this.downloadArtifactGroup.bind(_this), function (err) {
		state.sequence = newestSequence;
		_this.downloadSchema(schemaVersion, function (err) {
		if (err) return cb(err);
		state.sequence = newestSequence;
		_this.downloadSchema(latestDump, function (err) {
		if (err) return cb(err);
		_this.logger.info('finished, saving out state, newest sequence: ' + newestSequence);
		_this.stateStore.save(state, cb);
		});
		_this.logger.info('finished, saving out state, newest sequence: ' + newestSequence);
		_this.stateStore.save(state, cb);
		});
		@@ -76,18 +75,41 @@ });
		}, {
		key: 'downloadSchema',
		value: function downloadSchema(dump, cb) {
		key: 'getToDownload',
		value: function getToDownload(lastSequence, cb) {
		var _this2 = this;

		this.api.getSchemaVersion(dump.schemaVersion, function (err, schema) {
		this.getLatestDumps(lastSequence, function (err, dumps) {
		if (err) return cb(err);
		fs.writeFile(path.join(_this2.saveLocation, 'schema.json'), JSON.stringify(schema, 0, 2), cb);
		if (dumps.length === 0) return cb();
		_this2.logger.info('will process ' + dumps.length + ' dumps');
		dumps = dumps.map(function (dump, index) {
		return { dump: dump, index: index };
		});
		dumps[0].latestDump = true;
		var latestDump = dumps[0].dump;
		var newestSequence = dumps[0].dump.sequence;
		async.reduce(dumps, _this2.getNewCollector(), _this2.processDump.bind(_this2), function (err, results) {
		if (err) return cb(err);
		var toDownload = _.values(results.groups);
		cb(null, { toDownload: toDownload, newestSequence: newestSequence, artifactCount: results.artifactCount, schemaVersion: latestDump.schemaVersion });
		});
		});
		}
		}, {
		key: 'downloadSchema',
		value: function downloadSchema(schemaVersion, cb) {
		var _this3 = this;

		this.api.getSchemaVersion(schemaVersion, function (err, schema) {
		if (err) return cb(err);
		fs.writeFile(path.join(_this3.saveLocation, 'schema.json'), JSON.stringify(schema, 0, 2), cb);
		});
		}
		}, {
		key: 'processDump',
		value: function processDump(collector, dumpInfo, cb) {
		var _this3 = this;
		var _this4 = this;

		this.api.getFilesForDump(dumpInfo.dump.dumpId, function (err, dumpFiles) {
		if (err) return cb(err);

		for (var tableName in dumpFiles.artifactsByTable) {
		@@ -97,14 +119,16 @@ var artifact = dumpFiles.artifactsByTable[tableName];
		var willDownload = false;

		if (dumpInfo.latestDump) {
		_this3.logger.debug('will download artifact ' + tableName + ' from dump ' + artifactInfo.sequence + ' as latestDump');
		_this4.logger.debug('will download artifact ' + tableName + ' from dump ' + artifactInfo.sequence + ' as latestDump');
		willDownload = true;
		} else if (artifact.partial && collector.partialTables[tableName] !== 'foundFull') {
		_this3.logger.debug('will download artifact ' + tableName + ' from dump ' + artifactInfo.sequence + ' as partial');
		_this4.logger.debug('will download artifact ' + tableName + ' from dump ' + artifactInfo.sequence + ' as partial');
		willDownload = true;
		collector.partialTables[tableName] = 'partial';
		} else if (collector.partialTables[tableName] === 'partial' && artifact.partial === false) {
		_this3.logger.debug('will download artifact ' + tableName + ' from dump ' + artifactInfo.sequence + ' as first in partial');
		_this4.logger.debug('will download artifact ' + tableName + ' from dump ' + artifactInfo.sequence + ' as first in partial');
		willDownload = true;
		collector.partialTables[tableName] = 'foundFull';
		}

		if (willDownload) {
		@@ -116,2 +140,3 @@ collector.artifactCount++;
		}

		cb(null, collector);
		@@ -128,16 +153,16 @@ });
		value: function downloadArtifact(artifact, cb) {
		var _this4 = this;
		var _this5 = this;

		this.removeOldArtifact(artifact, function (err) {
		if (err) return cb(err);
		mkdirp(path.join(_this4.saveLocation, artifact.tableName), function (err) {
		mkdirp(path.join(_this5.saveLocation, artifact.tableName), function (err) {
		if (err) return cb(err);

		_this4.logger.info('artifact ' + artifact.tableName + ' from dump ' + artifact.sequence + ' has ' + artifact.artifact.files.length + ' to download');
		_this5.logger.info('artifact ' + artifact.tableName + ' from dump ' + artifact.sequence + ' has ' + artifact.artifact.files.length + ' to download');
		async.eachLimit(artifact.artifact.files, CONCURRENCY_LIMIT, function (downloadLink, cb) {
		var fileName = path.join(_this4.saveLocation, artifact.tableName, artifact.sequence + '_' + downloadLink.filename);
		_this4.fileDownloader.downloadToFile(downloadLink, artifact, fileName, cb);
		var fileName = path.join(_this5.saveLocation, artifact.tableName, artifact.sequence + '_' + downloadLink.filename);
		_this5.fileDownloader.downloadToFile(downloadLink, artifact, fileName, cb);
		}, function (err) {
		if (err) return cb(err);
		_this4.logger.info('artifact ' + artifact.tableName + ' from dump ' + artifact.sequence + ' finished');
		_this5.logger.info('artifact ' + artifact.tableName + ' from dump ' + artifact.sequence + ' finished');
		cb();
		@@ -158,3 +183,3 @@ });
		value: function getLatestDumps(lastSequence, collector, cb) {
		var _this5 = this;
		var _this6 = this;

		@@ -169,6 +194,6 @@ if (typeof collector === 'function') {
		if (err) return cb(err);
		(_collector = collector).push.apply(_collector, _toConsumableArray(dumps));
		(_collector = collector).unshift.apply(_collector, _toConsumableArray(dumps));
		if (dumps.length < DEFAULT_LIMIT) return cb(null, collector);
		var newestSeq = dumps[0].sequence;
		_this5.getLatestDumps(newestSeq, collector, cb);
		_this6.getLatestDumps(newestSeq, collector, cb);
		});
		@@ -175,0 +200,0 @@ }

12

package.json

		{
		"name": "canvas-data-cli",
		"version": "0.1.0",
		"version": "0.2.0",
		"description": "A CLI tool for interacting with the Canvas Data API",
		"main": "index.js",
		"scripts": {
		"test": "mocha test/*Test.js",
		"test": "mocha --compilers js:babel/register test/*Test.js",
		"prepublish": "babel src --out-dir lib/"
		@@ -31,2 +31,3 @@ },
		"pump": "^1.0.1",
		"re": "^0.1.4",
		"request": "^2.65.0",
		@@ -39,4 +40,9 @@ "rimraf": "^2.4.3",
		"babel": "^5.8.29",
		"mocha": "^2.3.3"
		"chai": "^3.5.0",
		"chai-fs": "^0.1.0",
		"mocha": "^2.3.3",
		"mocha-sinon": "^1.1.5",
		"sinon": "^1.17.3",
		"touch": "^1.0.0"
		}
		}

61

README.md

		@@ -22,4 +22,13 @@ # Canvas Data CLI
		## Usage

		### Syncing
		`canvasDataCli sync -c path/to/config.js` will start the sync process. On the first sync, it will look through all the data exports and download only the latest version of any tables that are not

		If you want to simply download all the data from Canva Data, the `sync` command can be used to keep an up-to-date copy locally.

		```Shell
		canvasDataCli sync -c path/to/config.js
		```

		This will start the sync process. On the first sync, it will look through all the data exports and download only the latest version of any tables that are not

		marked as `partial` and will download any files from older exports to complete a partial table.
		@@ -29,3 +38,51 @@

		Currently, for plus and pro customers, Canvas Data is updated daily, so running this in a daily cron job should keep your files up to date.
		If you run this daily, you should keep all of your data from Canvas Data up to date.

		### Fetch

		Fetches most up to date data for a single table from the API. This ignores any previously downloaded files and will redownload all the files associated with that table.

		```Shell
		canvasDataCli fetch -c path/to/config.js -t user_dim
		```

		This will start the fetch process and download what is needed to get the most recent data for that table (in this case, the `user_dim`).

		On subsequent executions, this will redownload all the data for that table, ignoring any previous days data.

		### Unpack

		NOTE: This only works after properly running a `sync` command

		This command will unpack the gzipped files, concat any partitioned files, and add a header to the output file

		```Shell
		canvasDataCli unpack -c path/to/config.js -f user_dim,account_dim
		```

		This command will unpack the user_dim and account_dim tables to a directory. Currently, you explictly have to give the files you want to unpack
		as this has the potential for creating very large files.


		## Developing

		Process:
		1. Write some code
		2. Write tests
		3. Open a pull request

		### Running tests

		#### In Docker

		If you use docker, you can run tests inside a docker container
		```Shell
		./build.sh
		```

		#### Native

		```Shell
		npm install .
		npm test
		```

canvas-data-cli - npm Package Compare versions

New alerts

Improved metrics

Worsened metrics

Dependency changes