mongodb-schema
Advanced tools
Comparing version
@@ -1,1 +0,1 @@ | ||
var schema = module.exports = require('./lib'); | ||
module.exports = require('./lib'); |
var Schema = require('./schema'); | ||
var es = require('event-stream'); | ||
var assert = require('assert'); | ||
/** | ||
* Convenience shortcut for parsing schemas. | ||
* @param {String} ns The namespace of the collection being parsed. | ||
* @param {Cursor|Array} docs An array of documents or a Cursor returned by `.find()` | ||
* @param {Function} fn Callback which will be passed `(err, schema)` | ||
* @returns {Schema} | ||
*/ | ||
module.exports = function(ns, docs, fn) { | ||
assert(Array.isArray(docs), 'docs must be an array'); | ||
var schema = new Schema({ | ||
ns: ns | ||
}); | ||
var src; | ||
es.readArray(docs).pipe(schema.stream()).on('end', fn); | ||
if(docs.stream){ | ||
src = docs.stream(); | ||
} | ||
else{ | ||
src = es.readArray(docs); | ||
} | ||
src.pipe(schema.stream()).on('end', function(){ | ||
fn.call(null, null, schema); | ||
}); | ||
return schema; | ||
}; | ||
module.exports.extend = Schema.extend.bind(Schema); | ||
module.exports.Schema = Schema; | ||
module.exports.getType = require('./type').getNameFromValue; | ||
module.exports.FieldCollection = Schema.FieldCollection; | ||
module.exports.BasicField = Schema.BasicField; | ||
module.exports.EmbeddedArrayField = Schema.EmbeddedArrayField; | ||
module.exports.EmbeddedDocumentField = Schema.EmbeddedDocumentField; | ||
module.exports.TypeCollection = require('./type-collection'); |
var es = require('event-stream'); | ||
var _ = require('lodash'); | ||
var raf = require('raf'); | ||
var State = require('ampersand-state'); | ||
var parser = require('./parser'); | ||
var FieldCollection = require('./field-collection'); | ||
var Collection = require('./collection'); | ||
var State = require('./state'); | ||
var Type = require('./type'); | ||
var TypeCollection = require('./type-collection'); | ||
var ValueCollection = require('./value-collection'); | ||
var debug = require('debug')('mongodb-schema'); | ||
var FieldCollection = Collection.extend({}); | ||
var Field = State.extend({ | ||
props: { | ||
/** | ||
* The key in the `parent`. | ||
*/ | ||
_id: { | ||
type: 'string', | ||
required: true | ||
}, | ||
/** | ||
* Number of times this field has been seen in a sample of documents. | ||
*/ | ||
count: { | ||
type: 'number', | ||
default: 0 | ||
}, | ||
probability: { | ||
type: 'number', | ||
default: 0 | ||
}, | ||
unique: { | ||
type: 'number', | ||
default: 0 | ||
}, | ||
/** | ||
* Title, description and default from JSON Schema: | ||
* http://spacetelescope.github.io/understanding-json-schema/reference/generic.html#metadata | ||
*/ | ||
/** | ||
* If using shortened keys to save space, it is expected this be the "real" | ||
* name of the field that could be input by the user. For example, | ||
* if `u` is the field's `_id`, `username` is the field's title | ||
* and is much friendlier for humans. | ||
*/ | ||
title: { | ||
type: 'string', | ||
default: function() { | ||
return this._id; | ||
} | ||
}, | ||
default: 'any', | ||
description: 'string', | ||
}, | ||
session: { | ||
parent: 'state' | ||
}, | ||
derived: { | ||
/** | ||
* The most common type seen for this field. | ||
* | ||
* http://spacetelescope.github.io/understanding-json-schema/reference/type.html | ||
*/ | ||
type: { | ||
deps: ['types.length'], | ||
fn: function() { | ||
if (this.types.length === 0) { | ||
return undefined; | ||
} | ||
if (this.types.length === 1) { | ||
return this.types.at(0)._id; | ||
} | ||
return this.types.pluck('_id'); | ||
} | ||
}, | ||
total: { | ||
deps: ['count', 'probability'], | ||
fn: function() { | ||
if (this.probability === 1) return this.count; | ||
var parentIsArray = this.collection.parent.lengths !== undefined; | ||
if (parentIsArray) { | ||
return _.sum(this.types.pluck('count')); | ||
} | ||
return (this.count / this.probability); | ||
} | ||
}, | ||
has_duplicates: { | ||
deps: ['unique', 'count'], | ||
fn: function() { | ||
return this.unique < this.count; | ||
} | ||
} | ||
}, | ||
collections: { | ||
types: TypeCollection, | ||
/** | ||
* A sample of values seen for this field. | ||
*/ | ||
values: ValueCollection, | ||
fields: FieldCollection | ||
}, | ||
initialize: function() { | ||
this.listenTo(this.types, 'add', this.onTypeAdded); | ||
this.listenTo(this.types, 'remove', this.onTypeRemoved); | ||
this.listenTo(this.types, 'reset refresh', this.onTypeReset); | ||
}, | ||
/** | ||
* When new types are added, trigger a change event to recalculate `this.type` | ||
* and add listeners so any operations on `type.values` are relfected on | ||
* `this.values`. | ||
* | ||
* @oaram {Type} type that's being added. | ||
* @oaram {TypeCollection} collection the type was added to. | ||
* @param {Object} options | ||
*/ | ||
onTypeAdded: function(type) { | ||
/** | ||
* Currently have to manually trigger events on collections so | ||
* derived properties are recalculated at the right time. | ||
* In this case, triggering `change:types.length` will cause | ||
* the `type` property to be recalculated correctly. | ||
*/ | ||
this.trigger('change:types.length'); | ||
this.listenTo(type.values, 'add', this.onValueAdded); | ||
this.listenTo(type.values, 'remove', this.onValueRemoved); | ||
this.listenTo(type.values, 'reset', this.onValueReset); | ||
}, | ||
/** | ||
* @see Schema#onTypeAdded | ||
* | ||
* @oaram {Type} type being removed. | ||
* @oaram {TypeCollection} collection it was removed from. | ||
* @param {Object} options | ||
*/ | ||
onTypeRemoved: function(type) { | ||
this.trigger('change:types.length'); | ||
this.stopListening(type.values, 'add', this.onValueAdded); | ||
this.stopListening(type.values, 'remove', this.onValueRemoved); | ||
this.stopListening(type.values, 'reset', this.onValueReset); | ||
}, | ||
onTypeReset: function() { | ||
this.trigger('change:types.length'); | ||
}, | ||
/** | ||
* @oaram {ValueCollection} collection the value was added to. | ||
* @oaram {Value} value being added. | ||
* @param {Object} options | ||
*/ | ||
onValueAdded: function(value) { | ||
this.values.add(value); | ||
}, | ||
/** | ||
* @oaram {ValueCollection} collection the value was removed from. | ||
* @oaram {Value} value being removed. | ||
* @param {Object} options | ||
*/ | ||
onValueRemoved: function(value) { | ||
this.values.remove(value); | ||
}, | ||
onValueReset: function() { | ||
this.values.reset(); | ||
}, | ||
/** | ||
* We've finished parsing a new document! Finalize all of the probabilities | ||
* and make sure all of our child collections are nicely sorted. | ||
* If we have any subfields, call `commit()` on each of those as well. | ||
*/ | ||
commit: function() { | ||
var newprob; | ||
var parentIsArray = this.collection.parent.lengths !== undefined; | ||
newprob = this.count / this.parent.count; | ||
if (newprob !== this.probability) { | ||
this.probability = newprob; | ||
} | ||
var undef = this.types.get('Undefined'); | ||
if ((this.total - this.count) <= 0 && undef) { | ||
debug('removing extraneous Undefined for `%s`', this.getId()); | ||
this.types.remove({ | ||
_id: 'Undefined' | ||
}); | ||
} else { | ||
if (!undef) { | ||
debug('adding Undefined for `%s`', this.getId()); | ||
undef = this.types.add({ | ||
_id: 'Undefined', | ||
unique: 1 | ||
}); | ||
} | ||
undef.count = (this.total - this.count); | ||
undef.probability = (undef.count - this.count); | ||
} | ||
this.types.map(function(type) { | ||
type.probability = type.count / this.total; | ||
type.unique = _.unique(type.values.pluck('value')).length; | ||
}.bind(this)); | ||
this.unique = _.sum(this.types.pluck('unique')); | ||
this.types.sort(); | ||
if (this.fields.length > 0) { | ||
this.fields.map(function(field) { | ||
field.commit(); | ||
}); | ||
} | ||
}, | ||
serialize: function() { | ||
var res = this.getAttributes({ | ||
props: true, | ||
derived: true | ||
}, true); | ||
if (this.fields.length > 0) { | ||
res.fields = this.fields.serialize(); | ||
} else { | ||
res.values = this.values.serialize(); | ||
res.types = this.types.serialize(); | ||
} | ||
return res; | ||
}, | ||
}); | ||
/** | ||
* A basic field has no descendant fields, such as `String`, `ObjectID`, | ||
* `Boolean`, or `Date`. | ||
* The top level schema state. | ||
* @class | ||
*/ | ||
var BasicField = Field.extend({}); | ||
var EmbeddedArrayField = Field.extend({ | ||
props: { | ||
type: { | ||
type: 'string', | ||
default: 'Array' | ||
}, | ||
lengths: { | ||
type: 'array', | ||
default: function() { | ||
return []; | ||
} | ||
} | ||
}, | ||
derived: { | ||
average_length: { | ||
deps: ['lengths'], | ||
fn: function() { | ||
return _.sum(this.lengths) / this.lengths.length; | ||
} | ||
} | ||
} | ||
}); | ||
var EmbeddedDocumentField = Field.extend({ | ||
props: { | ||
type: { | ||
type: 'string', | ||
default: 'Object' | ||
} | ||
} | ||
}); | ||
FieldCollection.prototype.model = function(attrs, options) { | ||
return new attrs.klass(attrs, options); | ||
}; | ||
function onFieldSampled(schema, _id, value) { | ||
var type_id = Type.getNameFromValue(value); | ||
if (type_id === 'Array') { | ||
onEmbeddedArray(schema, _id, type_id, value); | ||
} else if (type_id === 'Object') { | ||
onEmbeddedDocument(schema, _id, type_id, value); | ||
} else { | ||
onBasicField(schema, _id, type_id, value); | ||
} | ||
} | ||
function onBasicField(schema, _id, type_id, value) { | ||
var field = schema.fields.get(_id); | ||
if (!field) { | ||
field = schema.fields.add({ | ||
_id: _id, | ||
klass: BasicField, | ||
parent: schema | ||
}); | ||
} | ||
field.count += 1; | ||
var type = field.types.get(type_id); | ||
if (!type) { | ||
type = field.types.add({ | ||
_id: type_id, | ||
}); | ||
} | ||
type.count += 1; | ||
type.values.add({ | ||
_id: value | ||
}); | ||
} | ||
function onEmbeddedArray(schema, _id, type_id, value) { | ||
var field = schema.fields.get(_id); | ||
if (!field) { | ||
field = schema.fields.add({ | ||
_id: _id, | ||
klass: EmbeddedArrayField, | ||
parent: schema | ||
}); | ||
} | ||
field.count += 1; | ||
field.lengths.push(value.length); | ||
field.trigger('change:lengths'); | ||
_.each(value, function(d) { | ||
var type_id = Type.getNameFromValue(d); | ||
if (type_id === 'Object') { | ||
_.each(d, function(val, key) { | ||
onBasicField(field, key, Type.getNameFromValue(val), val); | ||
}); | ||
} else { | ||
onBasicField(field, '__basic__', type_id, d); | ||
} | ||
}); | ||
} | ||
function onEmbeddedDocument(schema, _id, type_id, value) { | ||
var field = schema.fields.get(_id); | ||
if (!field) { | ||
field = schema.fields.add({ | ||
_id: _id, | ||
klass: EmbeddedDocumentField, | ||
parent: schema | ||
}); | ||
} | ||
field.count += 1; | ||
_.each(value, function(val, key) { | ||
onFieldSampled(field, key, val); | ||
}); | ||
} | ||
var Schema = State.extend({ | ||
@@ -358,3 +29,3 @@ idAttribute: 'ns', | ||
_.each(doc, function(val, key) { | ||
onFieldSampled(schema, key, val); | ||
parser.parse(schema, key, val); | ||
}); | ||
@@ -371,6 +42,4 @@ schema.fields.map(function(field) { | ||
return es.map(function(doc, done) { | ||
raf(function() { | ||
schema.parse(doc, function(err) { | ||
done(err, doc); | ||
}); | ||
schema.parse(doc, function(err) { | ||
done(err, doc); | ||
}); | ||
@@ -382,5 +51,1 @@ }); | ||
module.exports = Schema; | ||
module.exports.FieldCollection = FieldCollection; | ||
module.exports.BasicField = BasicField; | ||
module.exports.EmbeddedArrayField = EmbeddedArrayField; | ||
module.exports.EmbeddedDocumentField = EmbeddedDocumentField; |
@@ -1,12 +0,11 @@ | ||
var Collection = require('./collection'); | ||
var Collection = require('ampersand-collection'); | ||
var lodashMixin = require('ampersand-collection-lodash-mixin'); | ||
var type = require('./type'); | ||
var assert = require('assert'); | ||
module.exports = Collection.extend({ | ||
module.exports = Collection.extend(lodashMixin, { | ||
mainIndex: 'name', | ||
model: function(attrs, options) { | ||
var Klass = type[attrs._id]; | ||
if (!Klass) { | ||
throw new TypeError('No value type for ' + attrs._id); | ||
} | ||
var Klass = type[attrs.name]; | ||
assert(Klass, 'No value type for ' + attrs.name); | ||
return new Klass(attrs, options); | ||
@@ -13,0 +12,0 @@ }, |
@@ -1,9 +0,9 @@ | ||
var State = require('./state'); | ||
var State = require('ampersand-state'); | ||
var _ = require('lodash'); | ||
var ValueCollection = require('./value-collection'); | ||
var debug = require('debug')('mongodb-schema:type'); | ||
var Type = State.extend({ | ||
idAttribute: 'name', | ||
props: { | ||
_id: { | ||
name: { | ||
type: 'string' | ||
@@ -26,8 +26,2 @@ }, | ||
values: ValueCollection | ||
}, | ||
serialize: function() { | ||
return this.getAttributes({ | ||
props: true, | ||
derived: true | ||
}, true); | ||
} | ||
@@ -48,3 +42,3 @@ }); | ||
props: { | ||
_id: { | ||
name: { | ||
default: 'String' | ||
@@ -57,3 +51,3 @@ } | ||
props: { | ||
_id: { | ||
name: { | ||
default: 'Number' | ||
@@ -66,3 +60,3 @@ } | ||
props: { | ||
_id: { | ||
name: { | ||
default: 'Long' | ||
@@ -75,3 +69,3 @@ } | ||
props: { | ||
_id: { | ||
name: { | ||
default: 'Null' | ||
@@ -84,3 +78,3 @@ } | ||
props: { | ||
_id: { | ||
name: { | ||
default: 'Timestamp' | ||
@@ -93,3 +87,3 @@ } | ||
props: { | ||
_id: { | ||
name: { | ||
default: 'Boolean' | ||
@@ -102,3 +96,3 @@ } | ||
props: { | ||
_id: { | ||
name: { | ||
default: 'Date' | ||
@@ -111,3 +105,3 @@ } | ||
props: { | ||
_id: { | ||
name: { | ||
default: 'ObjectID' | ||
@@ -120,3 +114,3 @@ } | ||
props: { | ||
_id: { | ||
name: { | ||
default: 'Undefined' | ||
@@ -129,3 +123,3 @@ } | ||
props: { | ||
_id: { | ||
name: { | ||
default: 'Binary' | ||
@@ -138,3 +132,3 @@ } | ||
props: { | ||
_id: { | ||
name: { | ||
default: 'MaxKey' | ||
@@ -147,3 +141,3 @@ } | ||
props: { | ||
_id: { | ||
name: { | ||
default: 'MinKey' | ||
@@ -156,3 +150,3 @@ } | ||
props: { | ||
_id: { | ||
name: { | ||
type: 'string', | ||
@@ -166,3 +160,3 @@ default: 'Object' | ||
props: { | ||
_id: { | ||
name: { | ||
type: 'string', | ||
@@ -173,2 +167,1 @@ default: 'Array' | ||
}); | ||
@@ -1,5 +0,7 @@ | ||
var Collection = require('./collection'); | ||
var Collection = require('ampersand-collection'); | ||
var lodashMixin = require('ampersand-collection-lodash-mixin'); | ||
var Value = require('./value'); | ||
module.exports = Collection.extend({ | ||
module.exports = Collection.extend(lodashMixin, { | ||
mainIndex: 'id', | ||
model: Value, | ||
@@ -6,0 +8,0 @@ serialize: function() { |
@@ -1,7 +0,8 @@ | ||
var State = require('./state'); | ||
var State = require('ampersand-state'); | ||
module.exports = State.extend({ | ||
idAttribute: 'id', | ||
props: { | ||
_id: { | ||
type: 'any' | ||
id: { | ||
type: 'string' | ||
}, | ||
@@ -13,8 +14,5 @@ value: { | ||
initialize: function(attrs) { | ||
this.value = attrs._id; | ||
this._id = this.cid + '-' + attrs._id; | ||
}, | ||
valueOf: function() { | ||
return this.value; | ||
this.value = attrs.value; | ||
this.id = this.cid + '-' + attrs.value; | ||
} | ||
}); |
{ | ||
"name": "mongodb-schema", | ||
"description": "Infer the probabilistic schema for a MongoDB collection.", | ||
"version": "2.1.1", | ||
"version": "2.2.0", | ||
"author": "Thomas Rueckstiess <thomas@rueckstiess.net>", | ||
"license": "MIT", | ||
"license": "Apache-2.0", | ||
"homepage": "http://github.com/mongodb-js/mongodb-schema", | ||
@@ -17,3 +17,4 @@ "repository": { | ||
"start": "zuul --local 3001 --open -- test/*.test.js", | ||
"test": "mocha" | ||
"test": "mocha", | ||
"ci": "./node_modules/istanbul/lib/cli.js cover _mocha -- -R spec ./test/*.test.js" | ||
}, | ||
@@ -28,9 +29,10 @@ "keywords": [ | ||
"ampersand-state": "^4.5.4", | ||
"bson": "^0.3.1", | ||
"bson": "^0.4.0", | ||
"debug": "^2.1.3", | ||
"event-stream": "^3.3.0", | ||
"lodash": "^3.8.0", | ||
"raf": "^3.0.0" | ||
"lodash": "^3.8.0" | ||
}, | ||
"devDependencies": { | ||
"coveralls": "^2.11.2", | ||
"istanbul": "^0.3.15", | ||
"mocha": "^2.0.1", | ||
@@ -37,0 +39,0 @@ "mongodb-extended-json": "^1.3.0", |
123
README.md
# mongodb-schema | ||
Infer probabilistic schema of javascript objects or a MongoDB collection. | ||
[](http://travis-ci.org/mongodb-js/mongodb-schema) | ||
[](https://coveralls.io/r/mongodb-js/mongodb-schema) | ||
[](https://gitter.im/mongodb-js/mongodb-js?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) | ||
## Todo | ||
Infer a probabilistic schema for a MongoDB collection. | ||
### Punted | ||
## Example | ||
- [ ] update bin/mongodb-schema.js to do something real | ||
- [ ] http://spacetelescope.github.io/understanding-json-schema/reference/generic.html#enumerated-values | ||
`mongodb-schema` doesn't do anything directly with `mongodb` so to try the examples we'll install the node.js driver. As well, we'll need some data | ||
in a collection to derive the schema of: | ||
1. `npm install mongodb mongodb-schema`. | ||
2. `mongo --eval "db.test.insert([{_id: 1, a: true}, {_id: 2, a: 'true'}, {_id: 3, a: 1}, {_id: 4}])" localhost:27017/test` | ||
3. Create a new file `parse-schema.js` and paste in the following code: | ||
```javascript | ||
var parseSchema = require('mongodb-schema'); | ||
var connect = require('mongodb'); | ||
connect('mongodb://localhost:27017/test', function(err, db){ | ||
if(err) return console.error(err); | ||
parseSchema('test.test', db.collection('test').find(), function(err, schema){ | ||
if(err) return console.error(err); | ||
console.log(JSON.stringify(schema, null, 2)); | ||
db.close(); | ||
}); | ||
}); | ||
``` | ||
4. When we run the above with `node parse-schema.js`, we'll see something | ||
like the following: | ||
```javascript | ||
{ | ||
ns: 'test.test', | ||
count: 4, // The number of documents sampled | ||
fields: [ // A collection of Field objects @see lib/field.js | ||
{ | ||
name: "_id", | ||
probability: 1, // Just as we expected, all 4 documents had `_id` | ||
unique: 4, // All 4 values for `_id` were unique | ||
types: [ | ||
{ | ||
name: "Number", // The only type seen was a Number | ||
probability: 1, | ||
unique: 4 | ||
} | ||
] | ||
}, | ||
{ | ||
name: "a", // Unlike `_id`, `a` was present in only 3 of 4 documents | ||
probability: 0.75, | ||
unique: 3, // Of the 3 values seen, all 3 were unique | ||
// As expected, Boolean, String, and Number values were seen. | ||
// A handy instance of `Undefined` is also provided to represent missing data", | ||
"types": [ | ||
{ | ||
name: "Boolean", | ||
probability: 0.25, | ||
unique: 1 | ||
}, | ||
{ | ||
name: "String", | ||
probability: 0.25, | ||
unique: 1 | ||
}, | ||
{ | ||
name: "Number", | ||
probability: 0.25, | ||
unique: 1 | ||
}, | ||
{ | ||
name: "Undefined", | ||
probability: 0.25 | ||
} | ||
] | ||
} | ||
] | ||
} | ||
``` | ||
### More Examples | ||
`mongodb-schema` supports all [BSON types][bson-types]. | ||
Checkout [the tests][tests] for more usage examples. | ||
## Installation | ||
``` | ||
npm install --save mongodb-schema | ||
``` | ||
## Testing | ||
``` | ||
npm test | ||
``` | ||
## License | ||
Apache 2.0 | ||
## Contributing | ||
Under the hood, `mongodb-schema` uses [ampersand-state][ampersand-state] and | ||
[ampersand-collection][ampersand-collection] for modeling [Schema][schema], [Field][field]'s, and [Type][type]'s. | ||
A high-level view of the class interactions is as follows: | ||
 | ||
[bson-types]: http://docs.mongodb.org/manual/reference/bson-types/ | ||
[ampersand-state]: http://ampersandjs.com/docs#ampersand-state | ||
[ampersand-collection]: http://ampersandjs.com/docs#ampersand-collection | ||
[tests]: https://github.com/mongodb-js/mongodb-schema/tree/master/test | ||
[schema]: https://github.com/mongodb-js/mongodb-language-model/blob/master/lib/schema.js | ||
[field]: https://github.com/mongodb-js/mongodb-language-model/blob/master/lib/field.js | ||
[type]: https://github.com/mongodb-js/mongodb-language-model/blob/master/lib/type.js |
Sorry, the diff of this file is not supported yet
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
181970
897.86%7
-12.5%39
143.75%1022
57.72%124
1027.27%5
66.67%1
Infinity%+ Added
- Removed
- Removed
- Removed
- Removed
- Removed
- Removed
- Removed
- Removed
Updated