🚀 Big News: Socket Acquires Coana to Bring Reachability Analysis to Every Appsec Team.Learn more
Socket
DemoInstallSign in
Socket

mongodb-schema

Package Overview
Dependencies
Maintainers
31
Versions
77
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

mongodb-schema - npm Package Compare versions

Comparing version

to
10.0.2

lib/schema-analyzer.d.ts

5

lib/index.d.ts

@@ -5,3 +5,4 @@ /// <reference types="node" />

import stream from './stream';
import type { SchemaParseOptions, Schema, SchemaField } from './stream';
import { SchemaAnalyzer } from './schema-analyzer';
import type { SchemaParseOptions, Schema, SchemaField } from './schema-analyzer';
import * as schemaStats from './stats';

@@ -12,2 +13,2 @@ type MongoDBCursor = AggregationCursor | FindCursor;

export type { Schema, SchemaField };
export { stream, schemaStats };
export { stream, SchemaAnalyzer, schemaStats };

4

lib/index.js

@@ -29,3 +29,3 @@ "use strict";

Object.defineProperty(exports, "__esModule", { value: true });
exports.schemaStats = exports.stream = void 0;
exports.schemaStats = exports.SchemaAnalyzer = exports.stream = void 0;
const stream_1 = require("stream");

@@ -35,2 +35,4 @@ const util_1 = require("util");

exports.stream = stream_2.default;
const schema_analyzer_1 = require("./schema-analyzer");
Object.defineProperty(exports, "SchemaAnalyzer", { enumerable: true, get: function () { return schema_analyzer_1.SchemaAnalyzer; } });
const schemaStats = __importStar(require("./stats"));

@@ -37,0 +39,0 @@ exports.schemaStats = schemaStats;

@@ -1,2 +0,2 @@

import type { Schema } from './stream';
import type { Schema } from './schema-analyzer';
declare function widthRecursive(schema?: Schema): number;

@@ -3,0 +3,0 @@ declare function depthRecursive(schema?: Schema): number;

/// <reference types="node" />
import { Duplex } from 'stream';
import type { ObjectId, MinKey, MaxKey, Long, Double, Int32, Decimal128, Binary, BSONRegExp, Code, BSONSymbol, Timestamp } from 'bson';
type BaseSchemaType = {
path: string;
count: number;
probability: number;
has_duplicates: boolean;
unique: number;
};
type ConstantSchemaType = BaseSchemaType & {
name: 'Null' | 'Undefined';
};
type TypeCastMap = {
Array: unknown[];
Binary: Binary;
Boolean: boolean;
Code: Code;
Date: Date;
Decimal128: Decimal128;
Double: Double;
Int32: Int32;
Int64: Long;
MaxKey: MaxKey;
MinKey: MinKey;
Null: null;
Object: Record<string, unknown>;
ObjectId: ObjectId;
BSONRegExp: BSONRegExp;
String: string;
BSONSymbol: BSONSymbol;
Timestamp: Timestamp;
Undefined: undefined;
};
type TypeCastTypes = keyof TypeCastMap;
type BSONValue = TypeCastMap[TypeCastTypes];
export type PrimitiveSchemaType = BaseSchemaType & {
name: 'String' | 'Number' | 'Int32' | 'Boolean' | 'Decimal128' | 'Long' | 'ObjectId' | 'Date' | 'RegExp' | 'Symbol' | 'MaxKey' | 'MinKey' | 'Binary' | 'Code' | 'Timestamp' | 'DBRef';
values: BSONValue[];
};
export type ArraySchemaType = BaseSchemaType & {
name: 'Array';
lengths: number[];
average_length: number;
total_count: number;
types: SchemaType[];
};
export type DocumentSchemaType = BaseSchemaType & {
name: 'Document';
fields: SchemaField[];
};
export type SchemaType = ConstantSchemaType | PrimitiveSchemaType | ArraySchemaType | DocumentSchemaType;
export type SchemaField = {
name: string;
count: number;
path: string;
type: string | string[];
probability: number;
has_duplicates: boolean;
types: SchemaType[];
};
export type Schema = {
count: number;
fields: SchemaField[];
};
type SemanticTypeFunction = ((value: string, path?: string) => boolean);
type SemanticTypeMap = {
[typeName: string]: SemanticTypeFunction | boolean;
};
export type SchemaParseOptions = {
semanticTypes?: boolean | SemanticTypeMap;
storeValues?: boolean;
};
declare function parse(options?: SchemaParseOptions): Duplex;
export default parse;
import type { Document } from 'bson';
import { SchemaAnalyzer } from './schema-analyzer';
import type { SchemaParseOptions } from './schema-analyzer';
export declare class ParseStream extends Duplex {
analyzer: SchemaAnalyzer;
constructor(options?: SchemaParseOptions);
_write(obj: Document, enc: unknown, cb: () => void): void;
_read(): void;
_final(cb: () => void): void;
}
export default function makeParseStream(options?: SchemaParseOptions): ParseStream;
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const reservoir_1 = __importDefault(require("reservoir"));
exports.ParseStream = void 0;
const stream_1 = require("stream");
const lodash_1 = __importDefault(require("lodash"));
const semantic_types_1 = __importDefault(require("./semantic-types"));
function extractStringValueFromBSON(value) {
if (value && value._bsontype) {
if (['Decimal128', 'Long'].includes(value._bsontype)) {
return value.toString();
}
if (['Double', 'Int32'].includes(value._bsontype)) {
return String(value.value);
}
const schema_analyzer_1 = require("./schema-analyzer");
class ParseStream extends stream_1.Duplex {
constructor(options) {
super({ objectMode: true });
this.analyzer = new schema_analyzer_1.SchemaAnalyzer(options);
}
if (typeof value === 'string') {
return value;
_write(obj, enc, cb) {
this.analyzer.analyzeDoc(obj);
this.emit('progress', obj);
cb();
}
return String(value);
}
function fieldComparator(a, b) {
const aName = a.name;
const bName = b.name;
if (aName === '_id') {
return -1;
_read() { }
_final(cb) {
this.push(this.analyzer.getResult());
this.push(null);
cb();
}
if (bName === '_id') {
return 1;
}
return aName.toLowerCase() < bName.toLowerCase() ? -1 : 1;
}
function finalizeSchema(schema, parent, tag) {
if (schema === undefined) {
return;
}
if (tag === undefined) {
finalizeSchema(schema.fields, schema, 'fields');
}
if (tag === 'fields') {
Object.values(schema).forEach((field) => {
const missing = parent.count - field.count;
if (missing > 0) {
field.types.Undefined = {
name: 'Undefined',
type: 'Undefined',
path: field.path,
count: missing
};
}
field.total_count = Object.values(field.types)
.map((v) => v.count)
.reduce((p, c) => p + c, 0);
finalizeSchema(field.types, field, 'types');
field.type = field.types.map((v) => v.name);
if (field.type.length === 1) {
field.type = field.type[0];
}
field.has_duplicates = !!field.types.find((v) => v.has_duplicates);
field.probability = field.count / parent.count;
});
parent.fields = Object.values(parent.fields).sort(fieldComparator);
}
if (tag === 'types') {
Object.values(schema).forEach((type) => {
type.total_count = (type.lengths || []).reduce((p, c) => p + c || 0, 0);
finalizeSchema(type.fields, type, 'fields');
finalizeSchema(type.types, type, 'types');
type.probability = type.count / (parent.total_count || parent.count);
if (type.name === 'Null' || type.name === 'Undefined') {
delete type.values;
type.unique = type.count === 0 ? 0 : 1;
type.has_duplicates = type.count > 1;
}
else if (type.values) {
type.unique = new Set(type.values.map(extractStringValueFromBSON)).size;
type.has_duplicates = type.unique !== type.values.length;
}
if (type.lengths) {
type.average_length = type.total_count / type.lengths.length;
}
});
parent.types = Object.values(parent.types).sort((a, b) => b.probability - a.probability);
}
exports.ParseStream = ParseStream;
function makeParseStream(options) {
return new ParseStream(options);
}
function parse(options) {
options = { semanticTypes: false, storeValues: true, ...options };
let semanticTypes = {
...semantic_types_1.default
};
if (typeof options.semanticTypes === 'object') {
const enabledTypes = Object.entries(options.semanticTypes)
.filter(([, v]) => typeof v === 'boolean' && v)
.map(([k]) => k.toLowerCase());
semanticTypes = {
...Object.entries(semanticTypes)
.filter(([k]) => enabledTypes.includes(k.toLowerCase()))
.reduce((p, [k, v]) => ({ ...p, [k]: v }), {})
};
Object.entries(options.semanticTypes)
.filter(([, v]) => typeof v === 'function')
.forEach(([k, v]) => { semanticTypes[k] = v; });
}
const rootSchema = {
fields: {},
count: 0
};
let finalized = false;
function getBSONType(value) {
let T;
if (value && value._bsontype) {
T = value._bsontype;
}
else {
T = Object.prototype.toString.call(value).replace(/^\[object (\w+)\]$/, '$1');
}
if (T === 'Object') {
T = 'Document';
}
return T;
}
function getSemanticType(value, path) {
const returnValue = Object.entries(semanticTypes)
.filter(([, v]) => {
return v(value, path);
})
.map(([k]) => k)[0];
return returnValue;
}
function addToValue(type, value) {
if (type.name === 'String') {
if (value.length > 10000) {
value = value.charCodeAt(10000 - 1) === value.codePointAt(10000 - 1)
? value.slice(0, 10000)
: value.slice(0, 10000 - 1);
}
}
type.values.pushSome(value);
}
function addToType(path, value, schema) {
var _a;
const bsonType = getBSONType(value);
const typeName = (options === null || options === void 0 ? void 0 : options.semanticTypes) ? getSemanticType(value, path) || bsonType : bsonType;
const type = schema[typeName] = lodash_1.default.get(schema, typeName, {
name: typeName,
bsonType: bsonType,
path: path,
count: 0
});
type.count++;
if (typeName === 'Array') {
type.types = (_a = type.types) !== null && _a !== void 0 ? _a : {};
type.lengths = type.lengths || [];
type.lengths.push(value.length);
value.forEach((v) => addToType(path, v, type.types));
}
else if (typeName === 'Document') {
type.fields = lodash_1.default.get(type, 'fields', {});
Object.entries(value).forEach(([k, v]) => addToField(`${path}.${k}`, v, type.fields));
}
else if (options === null || options === void 0 ? void 0 : options.storeValues) {
const defaultValue = bsonType === 'String'
? (0, reservoir_1.default)(100) : (0, reservoir_1.default)(10000);
type.values = type.values || defaultValue;
addToValue(type, value);
}
}
function addToField(path, value, schema) {
const pathSplitOnDot = path.split('.');
const defaults = {
[path]: {
name: pathSplitOnDot[pathSplitOnDot.length - 1],
path: path,
count: 0,
types: {}
}
};
lodash_1.default.defaultsDeep(schema, defaults);
const field = schema[path];
field.count++;
addToType(path, value, field.types);
}
function cleanup() {
if (!finalized) {
finalizeSchema(rootSchema);
finalized = true;
}
}
return new stream_1.Duplex({
objectMode: true,
write(obj, enc, cb) {
for (const key of Object.keys(obj)) {
addToField(key, obj[key], rootSchema.fields);
}
rootSchema.count += 1;
this.emit('progress', obj);
cb();
},
read() { },
final(cb) {
cleanup();
this.push(rootSchema);
this.push(null);
cb();
}
});
}
exports.default = parse;
exports.default = makeParseStream;
//# sourceMappingURL=stream.js.map
{
"name": "mongodb-schema",
"description": "Infer the probabilistic schema for a MongoDB collection.",
"version": "10.0.1",
"version": "10.0.2",
"author": {

@@ -36,3 +36,3 @@ "name": "MongoDB Inc",

"scripts": {
"test": "nyc mocha --colors -r ts-node/register test/*.ts",
"test": "nyc mocha --timeout 5000 --colors -r ts-node/register test/*.ts",
"test-example-parse-from-file": "ts-node examples/parse-from-file.ts",

@@ -39,0 +39,0 @@ "test-example-parse-schema": "ts-node examples/parse-schema.ts",

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet