🚀 Big News: Socket Acquires Coana to Bring Reachability Analysis to Every Appsec Team.Learn more
Socket
DemoInstallSign in
Socket

mongodb-schema

Package Overview
Dependencies
Maintainers
3
Versions
77
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

mongodb-schema - npm Package Compare versions

Comparing version

to
2.2.0

.coveralls.yml

2

index.js

@@ -1,1 +0,1 @@

var schema = module.exports = require('./lib');
module.exports = require('./lib');
var Schema = require('./schema');
var es = require('event-stream');
var assert = require('assert');
/**
* Convenience shortcut for parsing schemas.
* @param {String} ns The namespace of the collection being parsed.
* @param {Cursor|Array} docs An array of documents or a Cursor returned by `.find()`
* @param {Function} fn Callback which will be passed `(err, schema)`
* @returns {Schema}
*/
module.exports = function(ns, docs, fn) {
assert(Array.isArray(docs), 'docs must be an array');
var schema = new Schema({
ns: ns
});
var src;
es.readArray(docs).pipe(schema.stream()).on('end', fn);
if(docs.stream){
src = docs.stream();
}
else{
src = es.readArray(docs);
}
src.pipe(schema.stream()).on('end', function(){
fn.call(null, null, schema);
});
return schema;
};
module.exports.extend = Schema.extend.bind(Schema);
module.exports.Schema = Schema;
module.exports.getType = require('./type').getNameFromValue;
module.exports.FieldCollection = Schema.FieldCollection;
module.exports.BasicField = Schema.BasicField;
module.exports.EmbeddedArrayField = Schema.EmbeddedArrayField;
module.exports.EmbeddedDocumentField = Schema.EmbeddedDocumentField;
module.exports.TypeCollection = require('./type-collection');
var es = require('event-stream');
var _ = require('lodash');
var raf = require('raf');
var State = require('ampersand-state');
var parser = require('./parser');
var FieldCollection = require('./field-collection');
var Collection = require('./collection');
var State = require('./state');
var Type = require('./type');
var TypeCollection = require('./type-collection');
var ValueCollection = require('./value-collection');
var debug = require('debug')('mongodb-schema');
var FieldCollection = Collection.extend({});
var Field = State.extend({
props: {
/**
* The key in the `parent`.
*/
_id: {
type: 'string',
required: true
},
/**
* Number of times this field has been seen in a sample of documents.
*/
count: {
type: 'number',
default: 0
},
probability: {
type: 'number',
default: 0
},
unique: {
type: 'number',
default: 0
},
/**
* Title, description and default from JSON Schema:
* http://spacetelescope.github.io/understanding-json-schema/reference/generic.html#metadata
*/
/**
* If using shortened keys to save space, it is expected this be the "real"
* name of the field that could be input by the user. For example,
* if `u` is the field's `_id`, `username` is the field's title
* and is much friendlier for humans.
*/
title: {
type: 'string',
default: function() {
return this._id;
}
},
default: 'any',
description: 'string',
},
session: {
parent: 'state'
},
derived: {
/**
* The most common type seen for this field.
*
* http://spacetelescope.github.io/understanding-json-schema/reference/type.html
*/
type: {
deps: ['types.length'],
fn: function() {
if (this.types.length === 0) {
return undefined;
}
if (this.types.length === 1) {
return this.types.at(0)._id;
}
return this.types.pluck('_id');
}
},
total: {
deps: ['count', 'probability'],
fn: function() {
if (this.probability === 1) return this.count;
var parentIsArray = this.collection.parent.lengths !== undefined;
if (parentIsArray) {
return _.sum(this.types.pluck('count'));
}
return (this.count / this.probability);
}
},
has_duplicates: {
deps: ['unique', 'count'],
fn: function() {
return this.unique < this.count;
}
}
},
collections: {
types: TypeCollection,
/**
* A sample of values seen for this field.
*/
values: ValueCollection,
fields: FieldCollection
},
initialize: function() {
this.listenTo(this.types, 'add', this.onTypeAdded);
this.listenTo(this.types, 'remove', this.onTypeRemoved);
this.listenTo(this.types, 'reset refresh', this.onTypeReset);
},
/**
* When new types are added, trigger a change event to recalculate `this.type`
* and add listeners so any operations on `type.values` are relfected on
* `this.values`.
*
* @oaram {Type} type that's being added.
* @oaram {TypeCollection} collection the type was added to.
* @param {Object} options
*/
onTypeAdded: function(type) {
/**
* Currently have to manually trigger events on collections so
* derived properties are recalculated at the right time.
* In this case, triggering `change:types.length` will cause
* the `type` property to be recalculated correctly.
*/
this.trigger('change:types.length');
this.listenTo(type.values, 'add', this.onValueAdded);
this.listenTo(type.values, 'remove', this.onValueRemoved);
this.listenTo(type.values, 'reset', this.onValueReset);
},
/**
* @see Schema#onTypeAdded
*
* @oaram {Type} type being removed.
* @oaram {TypeCollection} collection it was removed from.
* @param {Object} options
*/
onTypeRemoved: function(type) {
this.trigger('change:types.length');
this.stopListening(type.values, 'add', this.onValueAdded);
this.stopListening(type.values, 'remove', this.onValueRemoved);
this.stopListening(type.values, 'reset', this.onValueReset);
},
onTypeReset: function() {
this.trigger('change:types.length');
},
/**
* @oaram {ValueCollection} collection the value was added to.
* @oaram {Value} value being added.
* @param {Object} options
*/
onValueAdded: function(value) {
this.values.add(value);
},
/**
* @oaram {ValueCollection} collection the value was removed from.
* @oaram {Value} value being removed.
* @param {Object} options
*/
onValueRemoved: function(value) {
this.values.remove(value);
},
onValueReset: function() {
this.values.reset();
},
/**
* We've finished parsing a new document! Finalize all of the probabilities
* and make sure all of our child collections are nicely sorted.
* If we have any subfields, call `commit()` on each of those as well.
*/
commit: function() {
var newprob;
var parentIsArray = this.collection.parent.lengths !== undefined;
newprob = this.count / this.parent.count;
if (newprob !== this.probability) {
this.probability = newprob;
}
var undef = this.types.get('Undefined');
if ((this.total - this.count) <= 0 && undef) {
debug('removing extraneous Undefined for `%s`', this.getId());
this.types.remove({
_id: 'Undefined'
});
} else {
if (!undef) {
debug('adding Undefined for `%s`', this.getId());
undef = this.types.add({
_id: 'Undefined',
unique: 1
});
}
undef.count = (this.total - this.count);
undef.probability = (undef.count - this.count);
}
this.types.map(function(type) {
type.probability = type.count / this.total;
type.unique = _.unique(type.values.pluck('value')).length;
}.bind(this));
this.unique = _.sum(this.types.pluck('unique'));
this.types.sort();
if (this.fields.length > 0) {
this.fields.map(function(field) {
field.commit();
});
}
},
serialize: function() {
var res = this.getAttributes({
props: true,
derived: true
}, true);
if (this.fields.length > 0) {
res.fields = this.fields.serialize();
} else {
res.values = this.values.serialize();
res.types = this.types.serialize();
}
return res;
},
});
/**
* A basic field has no descendant fields, such as `String`, `ObjectID`,
* `Boolean`, or `Date`.
* The top level schema state.
* @class
*/
var BasicField = Field.extend({});
var EmbeddedArrayField = Field.extend({
props: {
type: {
type: 'string',
default: 'Array'
},
lengths: {
type: 'array',
default: function() {
return [];
}
}
},
derived: {
average_length: {
deps: ['lengths'],
fn: function() {
return _.sum(this.lengths) / this.lengths.length;
}
}
}
});
var EmbeddedDocumentField = Field.extend({
props: {
type: {
type: 'string',
default: 'Object'
}
}
});
FieldCollection.prototype.model = function(attrs, options) {
return new attrs.klass(attrs, options);
};
function onFieldSampled(schema, _id, value) {
var type_id = Type.getNameFromValue(value);
if (type_id === 'Array') {
onEmbeddedArray(schema, _id, type_id, value);
} else if (type_id === 'Object') {
onEmbeddedDocument(schema, _id, type_id, value);
} else {
onBasicField(schema, _id, type_id, value);
}
}
function onBasicField(schema, _id, type_id, value) {
var field = schema.fields.get(_id);
if (!field) {
field = schema.fields.add({
_id: _id,
klass: BasicField,
parent: schema
});
}
field.count += 1;
var type = field.types.get(type_id);
if (!type) {
type = field.types.add({
_id: type_id,
});
}
type.count += 1;
type.values.add({
_id: value
});
}
function onEmbeddedArray(schema, _id, type_id, value) {
var field = schema.fields.get(_id);
if (!field) {
field = schema.fields.add({
_id: _id,
klass: EmbeddedArrayField,
parent: schema
});
}
field.count += 1;
field.lengths.push(value.length);
field.trigger('change:lengths');
_.each(value, function(d) {
var type_id = Type.getNameFromValue(d);
if (type_id === 'Object') {
_.each(d, function(val, key) {
onBasicField(field, key, Type.getNameFromValue(val), val);
});
} else {
onBasicField(field, '__basic__', type_id, d);
}
});
}
function onEmbeddedDocument(schema, _id, type_id, value) {
var field = schema.fields.get(_id);
if (!field) {
field = schema.fields.add({
_id: _id,
klass: EmbeddedDocumentField,
parent: schema
});
}
field.count += 1;
_.each(value, function(val, key) {
onFieldSampled(field, key, val);
});
}
var Schema = State.extend({

@@ -358,3 +29,3 @@ idAttribute: 'ns',

_.each(doc, function(val, key) {
onFieldSampled(schema, key, val);
parser.parse(schema, key, val);
});

@@ -371,6 +42,4 @@ schema.fields.map(function(field) {

return es.map(function(doc, done) {
raf(function() {
schema.parse(doc, function(err) {
done(err, doc);
});
schema.parse(doc, function(err) {
done(err, doc);
});

@@ -382,5 +51,1 @@ });

module.exports = Schema;
module.exports.FieldCollection = FieldCollection;
module.exports.BasicField = BasicField;
module.exports.EmbeddedArrayField = EmbeddedArrayField;
module.exports.EmbeddedDocumentField = EmbeddedDocumentField;

@@ -1,12 +0,11 @@

var Collection = require('./collection');
var Collection = require('ampersand-collection');
var lodashMixin = require('ampersand-collection-lodash-mixin');
var type = require('./type');
var assert = require('assert');
module.exports = Collection.extend({
module.exports = Collection.extend(lodashMixin, {
mainIndex: 'name',
model: function(attrs, options) {
var Klass = type[attrs._id];
if (!Klass) {
throw new TypeError('No value type for ' + attrs._id);
}
var Klass = type[attrs.name];
assert(Klass, 'No value type for ' + attrs.name);
return new Klass(attrs, options);

@@ -13,0 +12,0 @@ },

@@ -1,9 +0,9 @@

var State = require('./state');
var State = require('ampersand-state');
var _ = require('lodash');
var ValueCollection = require('./value-collection');
var debug = require('debug')('mongodb-schema:type');
var Type = State.extend({
idAttribute: 'name',
props: {
_id: {
name: {
type: 'string'

@@ -26,8 +26,2 @@ },

values: ValueCollection
},
serialize: function() {
return this.getAttributes({
props: true,
derived: true
}, true);
}

@@ -48,3 +42,3 @@ });

props: {
_id: {
name: {
default: 'String'

@@ -57,3 +51,3 @@ }

props: {
_id: {
name: {
default: 'Number'

@@ -66,3 +60,3 @@ }

props: {
_id: {
name: {
default: 'Long'

@@ -75,3 +69,3 @@ }

props: {
_id: {
name: {
default: 'Null'

@@ -84,3 +78,3 @@ }

props: {
_id: {
name: {
default: 'Timestamp'

@@ -93,3 +87,3 @@ }

props: {
_id: {
name: {
default: 'Boolean'

@@ -102,3 +96,3 @@ }

props: {
_id: {
name: {
default: 'Date'

@@ -111,3 +105,3 @@ }

props: {
_id: {
name: {
default: 'ObjectID'

@@ -120,3 +114,3 @@ }

props: {
_id: {
name: {
default: 'Undefined'

@@ -129,3 +123,3 @@ }

props: {
_id: {
name: {
default: 'Binary'

@@ -138,3 +132,3 @@ }

props: {
_id: {
name: {
default: 'MaxKey'

@@ -147,3 +141,3 @@ }

props: {
_id: {
name: {
default: 'MinKey'

@@ -156,3 +150,3 @@ }

props: {
_id: {
name: {
type: 'string',

@@ -166,3 +160,3 @@ default: 'Object'

props: {
_id: {
name: {
type: 'string',

@@ -173,2 +167,1 @@ default: 'Array'

});

@@ -1,5 +0,7 @@

var Collection = require('./collection');
var Collection = require('ampersand-collection');
var lodashMixin = require('ampersand-collection-lodash-mixin');
var Value = require('./value');
module.exports = Collection.extend({
module.exports = Collection.extend(lodashMixin, {
mainIndex: 'id',
model: Value,

@@ -6,0 +8,0 @@ serialize: function() {

@@ -1,7 +0,8 @@

var State = require('./state');
var State = require('ampersand-state');
module.exports = State.extend({
idAttribute: 'id',
props: {
_id: {
type: 'any'
id: {
type: 'string'
},

@@ -13,8 +14,5 @@ value: {

initialize: function(attrs) {
this.value = attrs._id;
this._id = this.cid + '-' + attrs._id;
},
valueOf: function() {
return this.value;
this.value = attrs.value;
this.id = this.cid + '-' + attrs.value;
}
});
{
"name": "mongodb-schema",
"description": "Infer the probabilistic schema for a MongoDB collection.",
"version": "2.1.1",
"version": "2.2.0",
"author": "Thomas Rueckstiess <thomas@rueckstiess.net>",
"license": "MIT",
"license": "Apache-2.0",
"homepage": "http://github.com/mongodb-js/mongodb-schema",

@@ -17,3 +17,4 @@ "repository": {

"start": "zuul --local 3001 --open -- test/*.test.js",
"test": "mocha"
"test": "mocha",
"ci": "./node_modules/istanbul/lib/cli.js cover _mocha -- -R spec ./test/*.test.js"
},

@@ -28,9 +29,10 @@ "keywords": [

"ampersand-state": "^4.5.4",
"bson": "^0.3.1",
"bson": "^0.4.0",
"debug": "^2.1.3",
"event-stream": "^3.3.0",
"lodash": "^3.8.0",
"raf": "^3.0.0"
"lodash": "^3.8.0"
},
"devDependencies": {
"coveralls": "^2.11.2",
"istanbul": "^0.3.15",
"mocha": "^2.0.1",

@@ -37,0 +39,0 @@ "mongodb-extended-json": "^1.3.0",

# mongodb-schema
Infer probabilistic schema of javascript objects or a MongoDB collection.
[![build status](https://secure.travis-ci.org/mongodb-js/mongodb-schema.png)](http://travis-ci.org/mongodb-js/mongodb-schema)
[![Coverage Status](https://coveralls.io/repos/mongodb-js/mongodb-schema/badge.svg)](https://coveralls.io/r/mongodb-js/mongodb-schema)
[![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/mongodb-js/mongodb-js?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
## Todo
Infer a probabilistic schema for a MongoDB collection.
### Punted
## Example
- [ ] update bin/mongodb-schema.js to do something real
- [ ] http://spacetelescope.github.io/understanding-json-schema/reference/generic.html#enumerated-values
`mongodb-schema` doesn't do anything directly with `mongodb` so to try the examples we'll install the node.js driver. As well, we'll need some data
in a collection to derive the schema of:
1. `npm install mongodb mongodb-schema`.
2. `mongo --eval "db.test.insert([{_id: 1, a: true}, {_id: 2, a: 'true'}, {_id: 3, a: 1}, {_id: 4}])" localhost:27017/test`
3. Create a new file `parse-schema.js` and paste in the following code:
```javascript
var parseSchema = require('mongodb-schema');
var connect = require('mongodb');
connect('mongodb://localhost:27017/test', function(err, db){
if(err) return console.error(err);
parseSchema('test.test', db.collection('test').find(), function(err, schema){
if(err) return console.error(err);
console.log(JSON.stringify(schema, null, 2));
db.close();
});
});
```
4. When we run the above with `node parse-schema.js`, we'll see something
like the following:
```javascript
{
ns: 'test.test',
count: 4, // The number of documents sampled
fields: [ // A collection of Field objects @see lib/field.js
{
name: "_id",
probability: 1, // Just as we expected, all 4 documents had `_id`
unique: 4, // All 4 values for `_id` were unique
types: [
{
name: "Number", // The only type seen was a Number
probability: 1,
unique: 4
}
]
},
{
name: "a", // Unlike `_id`, `a` was present in only 3 of 4 documents
probability: 0.75,
unique: 3, // Of the 3 values seen, all 3 were unique
// As expected, Boolean, String, and Number values were seen.
// A handy instance of `Undefined` is also provided to represent missing data",
"types": [
{
name: "Boolean",
probability: 0.25,
unique: 1
},
{
name: "String",
probability: 0.25,
unique: 1
},
{
name: "Number",
probability: 0.25,
unique: 1
},
{
name: "Undefined",
probability: 0.25
}
]
}
]
}
```
### More Examples
`mongodb-schema` supports all [BSON types][bson-types].
Checkout [the tests][tests] for more usage examples.
## Installation
```
npm install --save mongodb-schema
```
## Testing
```
npm test
```
## License
Apache 2.0
## Contributing
Under the hood, `mongodb-schema` uses [ampersand-state][ampersand-state] and
[ampersand-collection][ampersand-collection] for modeling [Schema][schema], [Field][field]'s, and [Type][type]'s.
A high-level view of the class interactions is as follows:
![](./docs/mongodb-schema_diagram.png)
[bson-types]: http://docs.mongodb.org/manual/reference/bson-types/
[ampersand-state]: http://ampersandjs.com/docs#ampersand-state
[ampersand-collection]: http://ampersandjs.com/docs#ampersand-collection
[tests]: https://github.com/mongodb-js/mongodb-schema/tree/master/test
[schema]: https://github.com/mongodb-js/mongodb-language-model/blob/master/lib/schema.js
[field]: https://github.com/mongodb-js/mongodb-language-model/blob/master/lib/field.js
[type]: https://github.com/mongodb-js/mongodb-language-model/blob/master/lib/type.js

Sorry, the diff of this file is not supported yet