🚀 Big News: Socket Acquires Coana to Bring Reachability Analysis to Every Appsec Team.Learn more
Socket
Sign inDemoInstall
Socket

mongodb-schema

Package Overview
Dependencies
Maintainers
31
Versions
77
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

mongodb-schema - npm Package Compare versions

Comparing version

to
9.0.0

.github/workflows/unit-tests.yml

79

lib/index.js
var stream = require('./stream');
var es = require('event-stream');
var _ = require('lodash');

@@ -17,41 +16,53 @@ // var debug = require('debug')('mongodb-schema:wrapper');

*
* @param {Function} fn Callback which will be passed `(err, schema)`
* @param {Function} callback Callback which will be passed `(err, schema)`
* @return {Promise} You can await promise, or use callback if provided.
*/
module.exports = function(docs, options, fn) {
// shift parameters if no options are specified
if (_.isUndefined(options) || _.isFunction(options) && _.isUndefined(fn)) {
fn = options;
options = {};
}
module.exports = function(docs, options, callback) {
const promise = new Promise((resolve, reject) => {
// shift parameters if no options are specified
if (typeof options === 'undefined' || (typeof options === 'function' && typeof callback === 'undefined')) {
callback = options;
options = {};
}
var src;
// MongoDB Cursors
if (docs.stream && typeof docs.stream === 'function') {
src = docs.stream();
// Streams
} else if (docs.pipe && typeof docs.pipe === 'function') {
src = docs;
// Arrays
} else if (_.isArray(docs)) {
src = es.readArray(docs);
} else {
fn(new Error('Unknown input type for `docs`. Must be an array, '
+ 'stream or MongoDB Cursor.'));
return;
var src;
// MongoDB Cursors
if (docs.stream && typeof docs.stream === 'function') {
src = docs.stream();
// Streams
} else if (docs.pipe && typeof docs.pipe === 'function') {
src = docs;
// Arrays
} else if (Array.isArray(docs)) {
src = es.readArray(docs);
} else {
reject(new Error(
'Unknown input type for `docs`. Must be an array, ' +
'stream or MongoDB Cursor.'
));
return;
}
var result;
src
.pipe(stream(options))
.on('data', function(data) {
result = data;
})
.on('error', function(err) {
reject(err);
})
.on('end', function() {
resolve(result);
});
});
if (callback && typeof callback === 'function') {
promise.then(callback.bind(null, null), callback);
}
var result;
src.pipe(stream(options))
.on('data', function(data) {
result = data;
})
.on('error', function(err) {
fn(err);
})
.on('end', function() {
fn(null, result);
});
return promise;
};
module.exports.stream = stream;

@@ -1,2 +0,1 @@

var _ = require('lodash');
// var debug = require('debug')('mongodb-schema:stats');

@@ -11,13 +10,17 @@

width += schema.fields.length;
width += _.sum(schema.fields.map(function(field) {
var doc = _.find(field.types, 'name', 'Document');
width += schema.fields.map(field => {
var doc = field.types.find(v => v.name === 'Document');
return widthRecursive(doc);
}));
width += _.sum(schema.fields.map(function(field) {
var arr = _.find(field.types, 'name', 'Array');
}).reduce((p, c) => p + c || 0, 0);
width += schema.fields.map(field => {
var arr = field.types.find(v => v.name === 'Array');
if (arr) {
var doc = _.find(arr.types, 'name', 'Document');
var doc = arr.types.find(v => v.name === 'Document');
return widthRecursive(doc);
}
}));
})
.reduce((p, c) => p + c || 0, 0);
}

@@ -34,14 +37,15 @@ return width;

maxChildDepth = 1 + Math.max(
_.max(schema.fields.map(function(field) {
var doc = _.find(field.types, 'name', 'Document');
Math.max(...schema.fields.map(field => {
var doc = field.types.find(v => v.name === 'Document');
return depthRecursive(doc);
})),
_.max(schema.fields.map(function(field) {
var arr = _.find(field.types, 'name', 'Array');
Math.max(...schema.fields.map(field => {
var arr = field.types.find(v => v.name === 'Array');
if (arr) {
var doc = _.find(arr.types, 'name', 'Document');
var doc = arr.types.find(v => v.name === 'Document');
return depthRecursive(doc);
}
return 0;
})));
}))
);
}

@@ -60,10 +64,10 @@ return maxChildDepth;

res = schema.fields.map(function(field) {
var doc = _.find(field.types, 'name', 'Document');
var doc = field.types.find(v => v.name === 'Document');
return branchingFactors(doc);
});
branchArray.push.apply(branchArray, _.flatten(res, true));
branchArray.push(...res.flat(Infinity));
res = schema.fields.map(function(field) {
var arr = _.find(field.types, 'name', 'Array');
var arr = field.types.find(v => v.name === 'Array');
if (arr) {
var doc = _.find(arr.types, 'name', 'Document');
var doc = arr.types.find(v => v.name === 'Document');
return branchingFactors(doc);

@@ -73,5 +77,5 @@ }

});
branchArray.push.apply(branchArray, _.flatten(res, true));
branchArray.push(...res.flat(Infinity));
}
return _.sortBy(branchArray).reverse();
return branchArray.sort().reverse();
};

@@ -78,0 +82,0 @@

var es = require('event-stream');
var Reservoir = require('reservoir');
var _ = require('lodash');
var Reservoir = require('reservoir');

@@ -16,10 +16,10 @@ // var debug = require('debug')('mongodb-schema:stream');

if (value && value._bsontype) {
if (_.includes([ 'Decimal128', 'Long' ], value._bsontype)) {
if (['Decimal128', 'Long'].includes(value._bsontype)) {
return value.toString();
}
if (_.includes([ 'Double', 'Int32' ], value._bsontype)) {
if ([ 'Double', 'Int32' ].includes(value._bsontype)) {
return String(value.value);
}
}
if (_.isString(value)) {
if (typeof value === 'string') {
return value;

@@ -72,3 +72,3 @@ }

if (tag === 'fields') {
_.each(schema, function(field) {
Object.values(schema).forEach((field) => {
// create `Undefined` pseudo-type

@@ -84,7 +84,9 @@ var missing = parent.count - field.count;

}
field.total_count = _.sum(field.types, 'count');
field.total_count = Object.values(field.types)
.map(v => v.count)
.reduce((p, c) => p + c, 0);
// recursively finalize types
finalizeSchema(field.types, field, 'types');
field.type = _.pluck(field.types, 'name');
field.type = field.types.map(v => v.name);
if (field.type.length === 1) {

@@ -94,3 +96,3 @@ field.type = field.type[0];

// a field has duplicates when any of its types have duplicates
field.has_duplicates = _.any(field.types, 'has_duplicates');
field.has_duplicates = !!field.types.find(v => v.has_duplicates);
// compute probability

@@ -100,7 +102,7 @@ field.probability = field.count / parent.count;

// turn object into array
parent.fields = _.values(parent.fields).sort(fieldComparator);
parent.fields = Object.values(parent.fields).sort(fieldComparator);
}
if (tag === 'types') {
_.each(schema, function(type) {
type.total_count = _.sum(type.lengths);
Object.values(schema).forEach(type => {
type.total_count = (type.lengths || []).reduce((p, c) => p + c || 0, 0);
// debug('recursively calling schema.fields');

@@ -118,3 +120,3 @@ finalizeSchema(type.fields, type, 'fields');

} else if (type.values) {
type.unique = _.uniq(type.values, false, extractStringValueFromBSON).length;
type.unique = new Set(type.values.map(extractStringValueFromBSON)).size;
type.has_duplicates = type.unique !== type.values.length;

@@ -128,3 +130,3 @@ }

});
parent.types = _.sortByOrder(_.values(parent.types), 'probability', 'desc');
parent.types = Object.values(parent.types).sort((a, b) => b.probability - a.probability);
}

@@ -156,27 +158,21 @@ return schema;

// set default options
options = _.defaults({}, options, {
semanticTypes: false,
storeValues: true
});
options = { semanticTypes: false, storeValues: true, ...options};
var semanticTypes = require('./semantic-types');
if (_.isObject(options.semanticTypes)) {
if (typeof options.semanticTypes === 'object') {
// enable existing types that evaluate to true
var enabledTypes = _(options.semanticTypes)
.pick(function(val) {
return _.isBoolean(val) && val;
})
.keys()
.map(function(val) {
return val.toLowerCase();
})
.value();
semanticTypes = _.pick(semanticTypes, function(val, key) {
return _.includes(enabledTypes, key.toLowerCase());
});
// merge with custom types that are functions
semanticTypes = _.assign(semanticTypes,
_.pick(options.semanticTypes, _.isFunction)
);
var enabledTypes = Object.entries(options.semanticTypes)
.filter(([, v]) => typeof v === 'boolean' && v)
.map(([k]) => k.toLowerCase());
semanticTypes = {...
Object.entries(semanticTypes)
.filter(([k]) => enabledTypes.includes(k.toLowerCase()))
.reduce((p, [k, v]) => ({...p, [k]: v}), {}),
};
Object.entries(options.semanticTypes)
.filter(([, v]) => typeof v === 'function')
.forEach(([k, v]) => {semanticTypes[k] = v;});
}

@@ -216,5 +212,9 @@

// pass value to semantic type detectors, return first match or undefined
return _.findKey(semanticTypes, function(fn) {
return fn(value, path);
});
const returnValue = Object.entries(semanticTypes)
.filter(([, v]) => {
return v(value, path);
})
.map(([k]) => k)[0];
return returnValue;
};

@@ -248,2 +248,3 @@

var addToType = function(path, value, schema) {

@@ -254,4 +255,3 @@ var bsonType = getBSONType(value);

// it is always the bson type.
var typeName = (options.semanticTypes) ?
getSemanticType(value, path) || bsonType : bsonType;
var typeName = (options.semanticTypes) ? getSemanticType(value, path) || bsonType : bsonType;
var type = schema[typeName] = _.get(schema, typeName, {

@@ -266,8 +266,6 @@ name: typeName,

if (typeName === 'Array') {
type.types = _.get(type, 'types', {});
type.lengths = _.get(type, 'lengths', []);
type.types = type.types || {};
type.lengths = type.lengths || [];
type.lengths.push(value.length);
_.each(value, function(v) {
addToType(path, v, type.types);
});
value.forEach(v => addToType(path, v, type.types));

@@ -277,10 +275,10 @@ // recurse into nested documents by calling `addToField` for all sub-fields

type.fields = _.get(type, 'fields', {});
_.forOwn(value, function(v, k) {
addToField(path + '.' + k, v, type.fields);
});
Object.entries(value).forEach(([k, v]) => addToField(path + '.' + k, v, type.fields));
// if the `storeValues` option is enabled, store some example values
} else if (options.storeValues) {
type.values = _.get(type, 'values', bsonType === 'String' ?
new Reservoir(100) : new Reservoir(10000));
var defaultValue = bsonType === 'String' ?
new Reservoir(100) : new Reservoir(10000);
type.values = type.values || defaultValue;
addToValue(type, value);

@@ -300,4 +298,5 @@ }

var pathSplitOnDot = path.split('.');
defaults[path] = {
name: _.last(path.split('.')),
name: pathSplitOnDot[pathSplitOnDot.length - 1],
path: path,

@@ -323,5 +322,3 @@ count: 0,

var parser = es.through(function write(obj) {
_.each(_.keys(obj), function(key) {
addToField(key, obj[key], rootSchema.fields);
});
Object.keys(obj).forEach(key => addToField(key, obj[key], rootSchema.fields));
rootSchema.count += 1;

@@ -328,0 +325,0 @@ this.emit('progress', obj);

{
"name": "mongodb-schema",
"description": "Infer the probabilistic schema for a MongoDB collection.",
"version": "8.2.5",
"version": "9.0.0",
"author": "Thomas Rueckstiess <thomas@rueckstiess.net>",

@@ -19,16 +19,8 @@ "license": "Apache-2.0",

"scripts": {
"start": "zuul --local 3001 --open -- test/*.test.js",
"test": "mocha",
"ci": "./node_modules/istanbul/lib/cli.js cover _mocha -- -R spec ./test/*.test.js",
"check": "mongodb-js-precommit"
"test": "nyc mocha",
"test:watch": "mocha --watch",
"coverage": "nyc report --reporter=text-lcov | coveralls",
"check": "mongodb-js-precommit './lib/**/*.js' './test/**/*.js'",
"lint": "eslint lib test examples --fix"
},
"precommit": [
"check"
],
"check": {
"ignore": [
"coverage/**/*",
"examples/**/*"
]
},
"keywords": [

@@ -45,6 +37,6 @@ "mongodb",

"dependencies": {
"async": "^1.5.2",
"async": "^3.2.0",
"event-stream": "^4.0.1",
"lodash": "^3.8.0",
"progress": "^1.1.8",
"lodash": "^4.17.20",
"progress": "^2.0.3",
"reservoir": "^0.1.2"

@@ -54,25 +46,23 @@ },

"benchmark": "^2.0.0",
"bson": "^0.5.6",
"coveralls": "^2.11.2",
"debug": "^2.2.0",
"eslint-config-mongodb-js": "^3.0.1",
"istanbul": "^0.3.15",
"mocha": "^3.1.2",
"mongodb-js-precommit": "^2.0.0",
"ms": "^0.7.1",
"pre-commit": "^1.0.10",
"yargs": "^3.32.0",
"zuul": "^3.0.0"
"bson": "^4.2.2",
"coveralls": "^3.1.0",
"debug": "^4.1.1",
"eslint-config-mongodb-js": "^5.0.3",
"istanbul": "^0.4.5",
"mocha": "^8.3.0",
"mongodb-js-precommit": "^2.2.1",
"ms": "^2.1.3",
"nyc": "^15.1.0",
"yargs": "^16.2.0"
},
"optionalDependencies": {
"stats-lite": "^2.0.0",
"cli-table": "^0.3.1",
"js-yaml": "^3.5.2",
"mongodb": "^3.1.4",
"cli-table": "^0.3.4",
"js-yaml": "^4.0.0",
"mongodb": "^3.6.4",
"mongodb-collection-sample": "^4.4.2",
"mongodb-extended-json": "^1.6.2",
"mongodb-ns": "^2.0.0",
"numeral": "^1.5.3",
"yargs": "^3.32.0"
"numeral": "^2.0.6",
"stats-lite": "^2.0.0"
}
}

Sorry, the diff of this file is not supported yet