mongodb-schema
Advanced tools
Comparing version
var stream = require('./stream'); | ||
var es = require('event-stream'); | ||
var _ = require('lodash'); | ||
@@ -17,41 +16,53 @@ // var debug = require('debug')('mongodb-schema:wrapper'); | ||
* | ||
* @param {Function} fn Callback which will be passed `(err, schema)` | ||
* @param {Function} callback Callback which will be passed `(err, schema)` | ||
* @return {Promise} You can await promise, or use callback if provided. | ||
*/ | ||
module.exports = function(docs, options, fn) { | ||
// shift parameters if no options are specified | ||
if (_.isUndefined(options) || _.isFunction(options) && _.isUndefined(fn)) { | ||
fn = options; | ||
options = {}; | ||
} | ||
module.exports = function(docs, options, callback) { | ||
const promise = new Promise((resolve, reject) => { | ||
// shift parameters if no options are specified | ||
if (typeof options === 'undefined' || (typeof options === 'function' && typeof callback === 'undefined')) { | ||
callback = options; | ||
options = {}; | ||
} | ||
var src; | ||
// MongoDB Cursors | ||
if (docs.stream && typeof docs.stream === 'function') { | ||
src = docs.stream(); | ||
// Streams | ||
} else if (docs.pipe && typeof docs.pipe === 'function') { | ||
src = docs; | ||
// Arrays | ||
} else if (_.isArray(docs)) { | ||
src = es.readArray(docs); | ||
} else { | ||
fn(new Error('Unknown input type for `docs`. Must be an array, ' | ||
+ 'stream or MongoDB Cursor.')); | ||
return; | ||
var src; | ||
// MongoDB Cursors | ||
if (docs.stream && typeof docs.stream === 'function') { | ||
src = docs.stream(); | ||
// Streams | ||
} else if (docs.pipe && typeof docs.pipe === 'function') { | ||
src = docs; | ||
// Arrays | ||
} else if (Array.isArray(docs)) { | ||
src = es.readArray(docs); | ||
} else { | ||
reject(new Error( | ||
'Unknown input type for `docs`. Must be an array, ' + | ||
'stream or MongoDB Cursor.' | ||
)); | ||
return; | ||
} | ||
var result; | ||
src | ||
.pipe(stream(options)) | ||
.on('data', function(data) { | ||
result = data; | ||
}) | ||
.on('error', function(err) { | ||
reject(err); | ||
}) | ||
.on('end', function() { | ||
resolve(result); | ||
}); | ||
}); | ||
if (callback && typeof callback === 'function') { | ||
promise.then(callback.bind(null, null), callback); | ||
} | ||
var result; | ||
src.pipe(stream(options)) | ||
.on('data', function(data) { | ||
result = data; | ||
}) | ||
.on('error', function(err) { | ||
fn(err); | ||
}) | ||
.on('end', function() { | ||
fn(null, result); | ||
}); | ||
return promise; | ||
}; | ||
module.exports.stream = stream; |
@@ -1,2 +0,1 @@ | ||
var _ = require('lodash'); | ||
// var debug = require('debug')('mongodb-schema:stats'); | ||
@@ -11,13 +10,17 @@ | ||
width += schema.fields.length; | ||
width += _.sum(schema.fields.map(function(field) { | ||
var doc = _.find(field.types, 'name', 'Document'); | ||
width += schema.fields.map(field => { | ||
var doc = field.types.find(v => v.name === 'Document'); | ||
return widthRecursive(doc); | ||
})); | ||
width += _.sum(schema.fields.map(function(field) { | ||
var arr = _.find(field.types, 'name', 'Array'); | ||
}).reduce((p, c) => p + c || 0, 0); | ||
width += schema.fields.map(field => { | ||
var arr = field.types.find(v => v.name === 'Array'); | ||
if (arr) { | ||
var doc = _.find(arr.types, 'name', 'Document'); | ||
var doc = arr.types.find(v => v.name === 'Document'); | ||
return widthRecursive(doc); | ||
} | ||
})); | ||
}) | ||
.reduce((p, c) => p + c || 0, 0); | ||
} | ||
@@ -34,14 +37,15 @@ return width; | ||
maxChildDepth = 1 + Math.max( | ||
_.max(schema.fields.map(function(field) { | ||
var doc = _.find(field.types, 'name', 'Document'); | ||
Math.max(...schema.fields.map(field => { | ||
var doc = field.types.find(v => v.name === 'Document'); | ||
return depthRecursive(doc); | ||
})), | ||
_.max(schema.fields.map(function(field) { | ||
var arr = _.find(field.types, 'name', 'Array'); | ||
Math.max(...schema.fields.map(field => { | ||
var arr = field.types.find(v => v.name === 'Array'); | ||
if (arr) { | ||
var doc = _.find(arr.types, 'name', 'Document'); | ||
var doc = arr.types.find(v => v.name === 'Document'); | ||
return depthRecursive(doc); | ||
} | ||
return 0; | ||
}))); | ||
})) | ||
); | ||
} | ||
@@ -60,10 +64,10 @@ return maxChildDepth; | ||
res = schema.fields.map(function(field) { | ||
var doc = _.find(field.types, 'name', 'Document'); | ||
var doc = field.types.find(v => v.name === 'Document'); | ||
return branchingFactors(doc); | ||
}); | ||
branchArray.push.apply(branchArray, _.flatten(res, true)); | ||
branchArray.push(...res.flat(Infinity)); | ||
res = schema.fields.map(function(field) { | ||
var arr = _.find(field.types, 'name', 'Array'); | ||
var arr = field.types.find(v => v.name === 'Array'); | ||
if (arr) { | ||
var doc = _.find(arr.types, 'name', 'Document'); | ||
var doc = arr.types.find(v => v.name === 'Document'); | ||
return branchingFactors(doc); | ||
@@ -73,5 +77,5 @@ } | ||
}); | ||
branchArray.push.apply(branchArray, _.flatten(res, true)); | ||
branchArray.push(...res.flat(Infinity)); | ||
} | ||
return _.sortBy(branchArray).reverse(); | ||
return branchArray.sort().reverse(); | ||
}; | ||
@@ -78,0 +82,0 @@ |
var es = require('event-stream'); | ||
var Reservoir = require('reservoir'); | ||
var _ = require('lodash'); | ||
var Reservoir = require('reservoir'); | ||
@@ -16,10 +16,10 @@ // var debug = require('debug')('mongodb-schema:stream'); | ||
if (value && value._bsontype) { | ||
if (_.includes([ 'Decimal128', 'Long' ], value._bsontype)) { | ||
if (['Decimal128', 'Long'].includes(value._bsontype)) { | ||
return value.toString(); | ||
} | ||
if (_.includes([ 'Double', 'Int32' ], value._bsontype)) { | ||
if ([ 'Double', 'Int32' ].includes(value._bsontype)) { | ||
return String(value.value); | ||
} | ||
} | ||
if (_.isString(value)) { | ||
if (typeof value === 'string') { | ||
return value; | ||
@@ -72,3 +72,3 @@ } | ||
if (tag === 'fields') { | ||
_.each(schema, function(field) { | ||
Object.values(schema).forEach((field) => { | ||
// create `Undefined` pseudo-type | ||
@@ -84,7 +84,9 @@ var missing = parent.count - field.count; | ||
} | ||
field.total_count = _.sum(field.types, 'count'); | ||
field.total_count = Object.values(field.types) | ||
.map(v => v.count) | ||
.reduce((p, c) => p + c, 0); | ||
// recursively finalize types | ||
finalizeSchema(field.types, field, 'types'); | ||
field.type = _.pluck(field.types, 'name'); | ||
field.type = field.types.map(v => v.name); | ||
if (field.type.length === 1) { | ||
@@ -94,3 +96,3 @@ field.type = field.type[0]; | ||
// a field has duplicates when any of its types have duplicates | ||
field.has_duplicates = _.any(field.types, 'has_duplicates'); | ||
field.has_duplicates = !!field.types.find(v => v.has_duplicates); | ||
// compute probability | ||
@@ -100,7 +102,7 @@ field.probability = field.count / parent.count; | ||
// turn object into array | ||
parent.fields = _.values(parent.fields).sort(fieldComparator); | ||
parent.fields = Object.values(parent.fields).sort(fieldComparator); | ||
} | ||
if (tag === 'types') { | ||
_.each(schema, function(type) { | ||
type.total_count = _.sum(type.lengths); | ||
Object.values(schema).forEach(type => { | ||
type.total_count = (type.lengths || []).reduce((p, c) => p + c || 0, 0); | ||
// debug('recursively calling schema.fields'); | ||
@@ -118,3 +120,3 @@ finalizeSchema(type.fields, type, 'fields'); | ||
} else if (type.values) { | ||
type.unique = _.uniq(type.values, false, extractStringValueFromBSON).length; | ||
type.unique = new Set(type.values.map(extractStringValueFromBSON)).size; | ||
type.has_duplicates = type.unique !== type.values.length; | ||
@@ -128,3 +130,3 @@ } | ||
}); | ||
parent.types = _.sortByOrder(_.values(parent.types), 'probability', 'desc'); | ||
parent.types = Object.values(parent.types).sort((a, b) => b.probability - a.probability); | ||
} | ||
@@ -156,27 +158,21 @@ return schema; | ||
// set default options | ||
options = _.defaults({}, options, { | ||
semanticTypes: false, | ||
storeValues: true | ||
}); | ||
options = { semanticTypes: false, storeValues: true, ...options}; | ||
var semanticTypes = require('./semantic-types'); | ||
if (_.isObject(options.semanticTypes)) { | ||
if (typeof options.semanticTypes === 'object') { | ||
// enable existing types that evaluate to true | ||
var enabledTypes = _(options.semanticTypes) | ||
.pick(function(val) { | ||
return _.isBoolean(val) && val; | ||
}) | ||
.keys() | ||
.map(function(val) { | ||
return val.toLowerCase(); | ||
}) | ||
.value(); | ||
semanticTypes = _.pick(semanticTypes, function(val, key) { | ||
return _.includes(enabledTypes, key.toLowerCase()); | ||
}); | ||
// merge with custom types that are functions | ||
semanticTypes = _.assign(semanticTypes, | ||
_.pick(options.semanticTypes, _.isFunction) | ||
); | ||
var enabledTypes = Object.entries(options.semanticTypes) | ||
.filter(([, v]) => typeof v === 'boolean' && v) | ||
.map(([k]) => k.toLowerCase()); | ||
semanticTypes = {... | ||
Object.entries(semanticTypes) | ||
.filter(([k]) => enabledTypes.includes(k.toLowerCase())) | ||
.reduce((p, [k, v]) => ({...p, [k]: v}), {}), | ||
}; | ||
Object.entries(options.semanticTypes) | ||
.filter(([, v]) => typeof v === 'function') | ||
.forEach(([k, v]) => {semanticTypes[k] = v;}); | ||
} | ||
@@ -216,5 +212,9 @@ | ||
// pass value to semantic type detectors, return first match or undefined | ||
return _.findKey(semanticTypes, function(fn) { | ||
return fn(value, path); | ||
}); | ||
const returnValue = Object.entries(semanticTypes) | ||
.filter(([, v]) => { | ||
return v(value, path); | ||
}) | ||
.map(([k]) => k)[0]; | ||
return returnValue; | ||
}; | ||
@@ -248,2 +248,3 @@ | ||
var addToType = function(path, value, schema) { | ||
@@ -254,4 +255,3 @@ var bsonType = getBSONType(value); | ||
// it is always the bson type. | ||
var typeName = (options.semanticTypes) ? | ||
getSemanticType(value, path) || bsonType : bsonType; | ||
var typeName = (options.semanticTypes) ? getSemanticType(value, path) || bsonType : bsonType; | ||
var type = schema[typeName] = _.get(schema, typeName, { | ||
@@ -266,8 +266,6 @@ name: typeName, | ||
if (typeName === 'Array') { | ||
type.types = _.get(type, 'types', {}); | ||
type.lengths = _.get(type, 'lengths', []); | ||
type.types = type.types || {}; | ||
type.lengths = type.lengths || []; | ||
type.lengths.push(value.length); | ||
_.each(value, function(v) { | ||
addToType(path, v, type.types); | ||
}); | ||
value.forEach(v => addToType(path, v, type.types)); | ||
@@ -277,10 +275,10 @@ // recurse into nested documents by calling `addToField` for all sub-fields | ||
type.fields = _.get(type, 'fields', {}); | ||
_.forOwn(value, function(v, k) { | ||
addToField(path + '.' + k, v, type.fields); | ||
}); | ||
Object.entries(value).forEach(([k, v]) => addToField(path + '.' + k, v, type.fields)); | ||
// if the `storeValues` option is enabled, store some example values | ||
} else if (options.storeValues) { | ||
type.values = _.get(type, 'values', bsonType === 'String' ? | ||
new Reservoir(100) : new Reservoir(10000)); | ||
var defaultValue = bsonType === 'String' ? | ||
new Reservoir(100) : new Reservoir(10000); | ||
type.values = type.values || defaultValue; | ||
addToValue(type, value); | ||
@@ -300,4 +298,5 @@ } | ||
var pathSplitOnDot = path.split('.'); | ||
defaults[path] = { | ||
name: _.last(path.split('.')), | ||
name: pathSplitOnDot[pathSplitOnDot.length - 1], | ||
path: path, | ||
@@ -323,5 +322,3 @@ count: 0, | ||
var parser = es.through(function write(obj) { | ||
_.each(_.keys(obj), function(key) { | ||
addToField(key, obj[key], rootSchema.fields); | ||
}); | ||
Object.keys(obj).forEach(key => addToField(key, obj[key], rootSchema.fields)); | ||
rootSchema.count += 1; | ||
@@ -328,0 +325,0 @@ this.emit('progress', obj); |
{ | ||
"name": "mongodb-schema", | ||
"description": "Infer the probabilistic schema for a MongoDB collection.", | ||
"version": "8.2.5", | ||
"version": "9.0.0", | ||
"author": "Thomas Rueckstiess <thomas@rueckstiess.net>", | ||
@@ -19,16 +19,8 @@ "license": "Apache-2.0", | ||
"scripts": { | ||
"start": "zuul --local 3001 --open -- test/*.test.js", | ||
"test": "mocha", | ||
"ci": "./node_modules/istanbul/lib/cli.js cover _mocha -- -R spec ./test/*.test.js", | ||
"check": "mongodb-js-precommit" | ||
"test": "nyc mocha", | ||
"test:watch": "mocha --watch", | ||
"coverage": "nyc report --reporter=text-lcov | coveralls", | ||
"check": "mongodb-js-precommit './lib/**/*.js' './test/**/*.js'", | ||
"lint": "eslint lib test examples --fix" | ||
}, | ||
"precommit": [ | ||
"check" | ||
], | ||
"check": { | ||
"ignore": [ | ||
"coverage/**/*", | ||
"examples/**/*" | ||
] | ||
}, | ||
"keywords": [ | ||
@@ -45,6 +37,6 @@ "mongodb", | ||
"dependencies": { | ||
"async": "^1.5.2", | ||
"async": "^3.2.0", | ||
"event-stream": "^4.0.1", | ||
"lodash": "^3.8.0", | ||
"progress": "^1.1.8", | ||
"lodash": "^4.17.20", | ||
"progress": "^2.0.3", | ||
"reservoir": "^0.1.2" | ||
@@ -54,25 +46,23 @@ }, | ||
"benchmark": "^2.0.0", | ||
"bson": "^0.5.6", | ||
"coveralls": "^2.11.2", | ||
"debug": "^2.2.0", | ||
"eslint-config-mongodb-js": "^3.0.1", | ||
"istanbul": "^0.3.15", | ||
"mocha": "^3.1.2", | ||
"mongodb-js-precommit": "^2.0.0", | ||
"ms": "^0.7.1", | ||
"pre-commit": "^1.0.10", | ||
"yargs": "^3.32.0", | ||
"zuul": "^3.0.0" | ||
"bson": "^4.2.2", | ||
"coveralls": "^3.1.0", | ||
"debug": "^4.1.1", | ||
"eslint-config-mongodb-js": "^5.0.3", | ||
"istanbul": "^0.4.5", | ||
"mocha": "^8.3.0", | ||
"mongodb-js-precommit": "^2.2.1", | ||
"ms": "^2.1.3", | ||
"nyc": "^15.1.0", | ||
"yargs": "^16.2.0" | ||
}, | ||
"optionalDependencies": { | ||
"stats-lite": "^2.0.0", | ||
"cli-table": "^0.3.1", | ||
"js-yaml": "^3.5.2", | ||
"mongodb": "^3.1.4", | ||
"cli-table": "^0.3.4", | ||
"js-yaml": "^4.0.0", | ||
"mongodb": "^3.6.4", | ||
"mongodb-collection-sample": "^4.4.2", | ||
"mongodb-extended-json": "^1.6.2", | ||
"mongodb-ns": "^2.0.0", | ||
"numeral": "^1.5.3", | ||
"yargs": "^3.32.0" | ||
"numeral": "^2.0.6", | ||
"stats-lite": "^2.0.0" | ||
} | ||
} |
Sorry, the diff of this file is not supported yet
New author
Supply chain riskA new npm collaborator published a version of the package for the first time. New collaborators are usually benign additions to a project, but do indicate a change to the security surface area of a package.
Found 1 instance in 1 package
45201
2.15%13
-7.14%11
-8.33%11
10%434
0.46%1
Infinity%+ Added
+ Added
+ Added
+ Added
+ Added
- Removed
- Removed
- Removed
- Removed
- Removed
- Removed
- Removed
- Removed
- Removed
- Removed
- Removed
- Removed
- Removed
- Removed
- Removed
- Removed
- Removed
- Removed
- Removed
- Removed
- Removed
- Removed
- Removed
Updated
Updated
Updated