bipf - npm Package Compare versions

Comparing version 1.6.5 to 1.7.0

compare.js

		@@ -68,35 +68,37 @@ const varint = require('fast-varint')

		module.exports = {
		compareString(buffer, start, target) {
		if (start === -1) return null
		target = Buffer.isBuffer(target) ? target : Buffer.from(target)
		const tag = varint.decode(buffer, start)
		if ((tag & TAG_MASK) !== STRING) return null
		const len = tag >> TAG_SIZE
		const _len = Math.min(target.length, len)
		return (
		buffer.compare(
		target,
		0,
		_len,
		start + varint.decode.bytes,
		start + varint.decode.bytes + _len
		) \|\| target.length - len
		)
		},
		function compareString(buffer, start, target) {
		if (start === -1) return null
		target = Buffer.isBuffer(target) ? target : Buffer.from(target)
		const tag = varint.decode(buffer, start)
		if ((tag & TAG_MASK) !== STRING) return null
		const len = tag >> TAG_SIZE
		const _len = Math.min(target.length, len)
		return (
		buffer.compare(
		target,
		0,
		_len,
		start + varint.decode.bytes,
		start + varint.decode.bytes + _len
		) \|\| target.length - len
		)
		}

		compare,

		createCompareAt(paths) {
		const getPaths = paths.map(createSeekPath)
		return function (a, b) {
		for (let i = 0; i < getPaths.length; i++) {
		const _a = getPaths[i](a, 0)
		const _b = getPaths[i](b, 0)
		const r = compare(a, _a, b, _b)
		if (r) return r
		}
		return 0
		function createCompareAt(paths) {
		const getPaths = paths.map(createSeekPath)
		return function (a, b) {
		for (let i = 0; i < getPaths.length; i++) {
		const _a = getPaths[i](a, 0)
		const _b = getPaths[i](b, 0)
		const r = compare(a, _a, b, _b)
		if (r) return r
		}
		},
		return 0
		}
		}

		module.exports = {
		compareString,
		compare,
		createCompareAt,
		}

constants.js

		@@ -13,2 +13,4 @@ const STRING = 0 // 000

		const ALREADY_BIPF = 8

		const TAG_SIZE = 3
		@@ -27,2 +29,4 @@ const TAG_MASK = 7

		ALREADY_BIPF,

		TAG_SIZE,
		@@ -29,0 +33,0 @@ TAG_MASK,

encode.js

		@@ -11,2 +11,3 @@ const varint = require('fast-varint')
		BOOLNULL,
		ALREADY_BIPF,
		} = require('./constants')
		@@ -83,4 +84,7 @@
		if ('string' === typeof value \|\| value instanceof Date) return STRING
		else if (Buffer.isBuffer(value)) return BUFFER
		else if (Number.isInteger(value) && Math.abs(value) <= 2147483647) return INT
		else if (Buffer.isBuffer(value)) {
		if (value._IS_BIPF_ENCODED) return ALREADY_BIPF
		else return BUFFER
		} else if (Number.isInteger(value) && Math.abs(value) <= 2147483647)
		return INT
		else if ('number' === typeof value && Number.isFinite(value))
		@@ -97,2 +101,3 @@ //do not support Infinity or NaN (because JSON)
		if (type === void 0) throw new Error('unknown type: ' + JSON.stringify(value))
		if (type === ALREADY_BIPF) return value.length
		const len = encodingLengthers[type](value)
		@@ -106,2 +111,6 @@ return varint.encodingLength(len << TAG_SIZE) + len
		if (type === void 0) throw new Error('unknown type: ' + JSON.stringify(value))
		if (type === ALREADY_BIPF) {
		value.copy(buffer, start, 0, value.length)
		return value.length
		}
		const len = _len === undefined ? encodingLengthers[type](value) : _len
		@@ -116,23 +125,39 @@ // if(!buffer)

		module.exports = {
		encode,
		function encodeIdempotent(value, buffer, start) {
		encode(value, buffer, start)
		buffer._IS_BIPF_ENCODED = true
		return buffer
		}

		getType,
		function getEncodedLength(buffer, start) {
		return varint.decode(buffer, start) >> TAG_SIZE
		}

		getEncodedLength(buffer, start) {
		return varint.decode(buffer, start) >> TAG_SIZE
		},
		function getEncodedType(buffer, start) {
		return varint.decode(buffer, start) & TAG_MASK
		}

		getEncodedType(buffer, start) {
		return varint.decode(buffer, start) & TAG_MASK
		},
		function allocAndEncode(value) {
		const len = encodingLength(value)
		const buffer = Buffer.allocUnsafe(len)
		encode(value, buffer, 0)
		return buffer
		}

		function allocAndEncodeIdempotent(value) {
		const len = encodingLength(value)
		const buffer = Buffer.allocUnsafe(len)
		encodeIdempotent(value, buffer, 0)
		return buffer
		}

		module.exports = {
		encode,
		encodeIdempotent,
		getType,
		getEncodedLength,
		getEncodedType,
		encodingLength,

		allocAndEncode(value) {
		const len = encodingLength(value)
		const buffer = Buffer.allocUnsafe(len)
		encode(value, buffer, 0)
		return buffer
		},
		allocAndEncode,
		allocAndEncodeIdempotent,
		}

index.js

		const varint = require('fast-varint')

		const { types, TAG_SIZE, TAG_MASK, OBJECT, ARRAY } = require('./constants')
		const {
		types,
		TAG_SIZE,
		TAG_MASK,
		STRING,
		BUFFER,
		INT,
		DOUBLE,
		OBJECT,
		ARRAY,
		BOOLNULL,
		} = require('./constants')
		const { decode } = require('./decode')
		const {
		encode,
		encodeIdempotent,
		encodingLength,
		allocAndEncode,
		allocAndEncodeIdempotent,
		getEncodedLength,
		@@ -61,4 +74,6 @@ getEncodedType,
		encode,
		encodeIdempotent,
		decode,
		allocAndEncode,
		allocAndEncodeIdempotent,
		encodingLength,
		@@ -65,0 +80,0 @@ buffer: true,

package.json

		{
		"name": "bipf",
		"description": "binary in-place format",
		"version": "1.6.5",
		"version": "1.7.0",
		"homepage": "https://github.com/ssbc/bipf",
		@@ -10,2 +10,5 @@ "repository": {
		},
		"files": [
		"*.js"
		],
		"dependencies": {
		@@ -24,9 +27,2 @@ "fast-varint": "^1.0.0"
		},
		"scripts": {
		"test": "tape test/index.js \| tap-arc && tape test/compare.js \| tap-arc && tape test/fixtures.js \| tap-arc",
		"coverage": "nyc --reporter=lcov npm test",
		"format-code": "prettier --write \".js\" \"test/.js\"",
		"format-code-staged": "pretty-quick --staged --pattern \".js\" --pattern \"test/.js\"",
		"benchmark": "node test/perf.js"
		},
		"husky": {
		@@ -41,3 +37,11 @@ "hooks": {
		],
		"license": "MIT"
		}
		"license": "MIT",
		"scripts": {
		"test": "tape test/index.js \| tap-arc && tape test/compare.js \| tap-arc && tape test/fixtures.js \| tap-arc",
		"coverage": "nyc --reporter=lcov npm test",
		"format-code": "prettier --write \".js\" \"test/.js\"",
		"format-code-staged": "pretty-quick --staged --pattern \".js\" --pattern \"test/.js\"",
		"benchmark": "node test/perf.js"
		},
		"readme": "# BIPF\n\nBinary In-Place Format. A binary format designed for in-place (without\nparsing) reads, with schemaless json-like semantics.\n\n## Motivation\n\n### In-place reads\n\nIn a database there are many cases where you need to read a bunch of\nrecords, filter out most of it (if one or two fields do not match) and\nthen immediately write whats left to a network socket. With json, this\nmeans parsing possibly hundreds of thousands of json objects (which is\nsuprisingly slow), and then reserializing whats left. An inplace\nformat doesn't actually require parsing as a whole at all. You only\nneed to parse the fields you actually read, and using length delimited\nfields instead of escapes, means you do not have to look at every byte\nto parse a field.\n\n### Length delimited collections\n\nUnfortunately, most binary json-like formats (such as msgpack and\ncbor) use element counts on collections (objects and arrays, in\njson-land) this means to find the end of a collection, you have to\nstep past each item in it (including the fields in any object\ncontained inside of it). However, if the collections are length\ndelimited, meaning marked by the encoded byte length of the object,\nnot the number of items inside it, then it's easy to jump right to the\nend of the object in one go. For this reason, databases (for example,\nmongodb, and couchdb) use length delimited collections.\n\n## Format\n\nThe format of BIPF is specificed in the\n[spec](https://github.com/ssbc/bipf-spec).\n\nAll values must have a correct length field. This makes it possible to\ntraverse all fields without looking at the values. Theirfor it is\npossible to quickly jump to any subvalue if you know it's path. If you\nare looking for a particular string, you can also skip any with the\nwrong length! Since object and array fields also begin with a length,\nyou can jump past them if you know they do not contain the value you\nare looking for. This means that seeking inside a more tree like\nobject is more efficient than seeking inside a more list like object!\n\n## Performance\n\nThis design is optimized for the performance of in-place\nreads. Encoding is expected to be slower because of the need to\ncalculate the length of collections before encoding them. If encoding\nis within half as fast as a format intended for encoding perf, that is\ngood. Of course, the intention with an in-place read system is that\nyou encode _once_ and then never decode. Just pass around the binary\nobject, reading fields out when necessary.\n\nBecause of the length encoding, the ability to update in-place is very\nlimited (not recommended actualy) but if you are building a system\naround immutable data, that is not much of a problem. Although, since\nsubobjects are fully valid as an encoded value, you can easily copy a\nsubobject into a new object, etc, without re-encoding.\n\n## Benchmark\n\nI did a simple benchmark, where I encoded and decoded this module's\npackage.json file in various ways. Please not that I am comparing the\nperformance of code written in C with code written in javascript. If\nthe javascript is within 10x the performance of the C then we are\ndoing well! (and a C implementation would likely close that gap)\n\nThe measurement is run 10k operations, then divide by number of ms\ntaken, higher number means more faster!\n\nBenchmark code is in `./test/perf.js`\n\n```\noperation, ops/ms\nbinary.encode 62.61740763932373\nJSON.stringify 325.7328990228013\nbinary.decode 83.40283569641367\nJSON.parse 242.13075060532688\nJSON.parse(buffer) 198.4126984126984\nJSON.stringify(JSON.parse()) 127.55102040816327\nbinary.seek(string) 500\nbinary.seek2(encoded) 1219.5121951219512\nbinary.seek(buffer) 1333.3333333333333\nbinary.seekPath(encoded) 558.659217877095\nbinary.seekPath(compiled) 1265.8227848101267\nbinary.compare() 1785.7142857142858\n```\n\nAs expected, `binary.encode` is much slower than `JSON.stringify`, but\nit's only 6 times worse. But the interesting comparison is\n`JSON.stringify(JSON.parse())` and `binary.seek(buffer)`. Often, in\nimplementing a database, you need to read something from disk, examine\none or two fields (to check if it matches a query) and then write it\nto network.\n\n(note: the `binary.seek` operation is fairly realistic, we seek to the\n\"dependencies\" object, then look up \"varint\" inside of that, then\ndecode the version range of \"varint\". So it's two comparisons and\ndecoding a string out)\n\nSo, in JSON land, that usually means reading it, parsing it, checking\nit, stringifying it again. This involves reading each byte in the\ninput and allocating memory for the parsed object. Then traversing\nthat object in memory and writing something to a string (more memory\nallocation, and all this memory allocation means the garbage collector\nneeds to handle it too)\n\nBut if we have in-place reads, we just read raw binary, seek into the\nappropiate places to check wether it's the objects we want, and then\nwrite it to the network directly. We don't allocate _any_ new memory\nafter reading it.\n\nFurther benchmarks and tests are necessary, but that it can be this\nfast using a _javascript implementation_ is impressive.\n\n## Cannonicisity\n\nFor a system with signatures, it's highly important that data is\n_cannonical_. There should be exactly one way to encode a given data\nstructure. There are a few edge cases here that need to be checked\nfor. (not implemented yet)\n\n* varints must not be zero padded\n* chrome and firefox preserve order of object keys, but any integer\n keys greater than zero come first, and are in increasing order.\n* the length of subfields must be checked to not excede their\n container's length. (This is a security issue)\n\nThese properties can all be checked by traversing the tags but without\nreading the keys or values. I will not consider this module _ready_\nuntil there are tests that cover these invalid cases, to ensure that\nimplementations throw an error.\n\n## API\n\n`encode, decode, encodingLength` follow the interface specified by\n[`abstract-encoding`](https://github.com/mafintosh/abstract-encoding)\n\n### encode(value, buffer, start) => length\n\nwrite `value` to `buffer` from start. returns the number of bytes\nused.\n\n### allocAndEncode(value) => buffer\n\nallocate a new buffer and write `value` into it. returns the newly\ncreated buffer.\n\n### encodeIdempotent(value, buffer, start) => length\n\nsame as `encode`, but tags the buffer as being a `bipf` buffer, such\nthat you can place this buffer in another encoded bipf, and it won't\nbe \"double encoded\", it will just be embedded inside the larger buffer.\n\n### allocAndEncodeIdempotent(value) => buffer\n\nsame as `allocAndEncode`, but tags the resulting buffer as being a\n`bipf` buffer.\n\nExample:\n\n```js\nvar obj = {address: {street: '123 Main St'}}\nvar buf1 = bipf.allocAndEncode(obj)\n\nvar innerObj = {street: '123 Main St'}\nvar innerBuf = bipf.allocAndEncodeIdempotent(innerObj)\nvar outerObj = {address: innerBuf}\nvar buf2 = bipf.allocAndEncode(outerObj)\n\ndeepEquals(buf1, buf2) // true\n```\n\nCounter-example:\n\n```js\nvar obj = {address: {street: '123 Main St'}}\nvar buf1 = bipf.allocAndEncode(obj)\n\nvar innerObj = {street: '123 Main St'}\nvar innerBuf = bipf.allocAndEncode(innerObj)\nvar outerObj = {address: innerBuf}\nvar buf2 = bipf.allocAndEncode(outerObj)\n\ndeepEquals(buf1, buf2) // false\n```\n\n### decode(buffer, start) => value\n\nread the next value from `buffer` at `start`. returns the value, and\nsets `decode.bytes` to number of bytes used.\n\n### encodingLength(value) => length\n\nreturns the length needed to encode `value`\n\n### getValueType(value) => type\n\nreturns the type tag that will be used to encode this type.\n\n### getEncodedType(buffer, start) => type\n\nget the `type` tag at `start`\n\n### types.{string,buffer,int,double,array,object,boolnull,reserved}\n\nan object containing the type tags.\n\n### iterate(buffer, start, fn) => void\n\nIf the field at `start` is an object or array, then `iterate` will\ncall the `fn` with arguments `fn(buffer, pointer, key)` for each\nsubfield. If the field at `start` is not an array or object, this\nreturns `-1`. You can stop/abort the iteration by making `fn` return\nany truthy value.\n\n### seekKey(buffer, start, target) => pointer\n\nSeek for a key `target` within an object. If `getEncodedType(buffer,\nstart) !== types.object` then will return `-1`. Otherwise, seekKey\nwill iterate over the encoding object and return a pointer to where it\nstarts.\n\nSince this defines a recursive encoding, a pointer to any valid\nsub-encoding is a valid start value.\n\n``` js\nvar obj = {\n foo: 1,\n bar: true,\n baz: 'hello'\n}\n//allocate a correctly sized buffer\nvar length = b.encodingLength(obj)\nvar buffer = Buffer.alloc(length)\n\n//encode object to buffer\nb.encode(obj, buffer, 0)\n\n//parse entire object and read a single value\nconsole.log(b.decode(buffer, 0).baz)\n\n//seek and decode a single value\nconsole.log(b.decode(buffer, b.seekKey(buffer, 0, 'baz')))\n```\n\nSee performance section for discussion on the performance of seek - if\nit's only needed to parse a couple of elements, it can be\nsignificantly faster than parsing.\n\n### seekKey2(buffer, start, target, target_start) => pointer\n\nSame as `seekKey`, except `target` must be an encoded value. This is\nusually done using `allocAndEncode`. This is a bit faster.\n\n### seekKeyCached(buffer, start, target) => pointer\n\nSame as `seekKey`, but uses a cache to avoid re-seeking the pointers\nif the same arguments have been provided in the past. However,\n`target` must be a string, not a buffer.\n\n### seekPath(buffer, start, array_of_buffers) => pointer\n\nThe same as `seekKey`, except for a recursive path. `path` should be\nan array of node buffers, just holding the key values, not encoded as\n`bipf`.\n\n### createSeekPath(path) => seekPath(buffer, start)\n\nCompiles a javascript function that does a seekPath. This is\nsignificantly faster than iterating over a javascript array and then\nlooking for each thing, because it will get optimized by the js\nengine's jit compiler.\n\n\n## License\n\nMIT\n\n\n"
		}

README.md

		@@ -146,2 +146,46 @@ # BIPF

		### allocAndEncode(value) => buffer

		allocate a new buffer and write `value` into it. returns the newly
		created buffer.

		### encodeIdempotent(value, buffer, start) => length

		same as `encode`, but tags the buffer as being a `bipf` buffer, such
		that you can place this buffer in another encoded bipf, and it won't
		be "double encoded", it will just be embedded inside the larger buffer.

		### allocAndEncodeIdempotent(value) => buffer

		same as `allocAndEncode`, but tags the resulting buffer as being a
		`bipf` buffer.

		Example:

		```js
		var obj = {address: {street: '123 Main St'}}
		var buf1 = bipf.allocAndEncode(obj)

		var innerObj = {street: '123 Main St'}
		var innerBuf = bipf.allocAndEncodeIdempotent(innerObj)
		var outerObj = {address: innerBuf}
		var buf2 = bipf.allocAndEncode(outerObj)

		deepEquals(buf1, buf2) // true
		```

		Counter-example:

		```js
		var obj = {address: {street: '123 Main St'}}
		var buf1 = bipf.allocAndEncode(obj)

		var innerObj = {street: '123 Main St'}
		var innerBuf = bipf.allocAndEncode(innerObj)
		var outerObj = {address: innerBuf}
		var buf2 = bipf.allocAndEncode(outerObj)

		deepEquals(buf1, buf2) // false
		```

		### decode(buffer, start) => value
		@@ -148,0 +192,0 @@

131

seekers.js

		@@ -66,74 +66,77 @@ const varint = require('fast-varint')

		module.exports = {
		seekKey,
		function seekKey2(buffer, start, target, targetStart) {
		const tag = varint.decode(buffer, start)
		const type = tag & TAG_MASK
		if (type !== OBJECT) return -1
		let c = varint.decode.bytes
		const len = tag >> TAG_SIZE
		const targetTag = varint.decode(target, targetStart)
		const targetLen = (targetTag >> TAG_SIZE) + varint.decode.bytes
		for (; c < len; ) {
		const keyTag = varint.decode(buffer, start + c)

		seekKey2(buffer, start, target, targetStart) {
		const tag = varint.decode(buffer, start)
		const type = tag & TAG_MASK
		if (type !== OBJECT) return -1
		let c = varint.decode.bytes
		const len = tag >> TAG_SIZE
		const targetTag = varint.decode(target, targetStart)
		const targetLen = (targetTag >> TAG_SIZE) + varint.decode.bytes
		for (; c < len; ) {
		const keyTag = varint.decode(buffer, start + c)
		if (
		keyTag === targetTag &&
		buffer.compare(
		target,
		targetStart,
		targetLen,
		start + c,
		start + c + targetLen
		) === 0
		)
		return start + c + targetLen

		if (
		keyTag === targetTag &&
		buffer.compare(
		target,
		targetStart,
		targetLen,
		start + c,
		start + c + targetLen
		) === 0
		)
		return start + c + targetLen
		c += varint.decode.bytes
		const keyLen = keyTag >> TAG_SIZE
		c += keyLen

		c += varint.decode.bytes
		const keyLen = keyTag >> TAG_SIZE
		c += keyLen
		const valueTag = varint.decode(buffer, start + c)
		c += varint.decode.bytes
		const valueLen = valueTag >> TAG_SIZE
		c += valueLen
		}
		return -1
		}

		const valueTag = varint.decode(buffer, start + c)
		c += varint.decode.bytes
		const valueLen = valueTag >> TAG_SIZE
		c += valueLen
		}
		return -1
		},
		function seekKeyCached(buffer, start, target) {
		let cache2 = cache1.get(buffer)
		if (!cache2) cache1.set(buffer, (cache2 = new Map()))
		let cache3 = cache2.get(start)
		if (!cache3) cache2.set(start, (cache3 = new Map()))
		if (typeof target !== 'string') {
		throw new Error('seekKeyCached only supports string target')
		}
		if (cache3.has(target)) {
		return cache3.get(target)
		} else {
		const result = seekKey(buffer, start, target)
		cache3.set(target, result)
		return result
		}
		}

		seekKeyCached(buffer, start, target) {
		let cache2 = cache1.get(buffer)
		if (!cache2) cache1.set(buffer, (cache2 = new Map()))
		let cache3 = cache2.get(start)
		if (!cache3) cache2.set(start, (cache3 = new Map()))
		if (typeof target !== 'string') {
		throw new Error('seekKeyCached only supports string target')
		}
		if (cache3.has(target)) {
		return cache3.get(target)
		} else {
		const result = seekKey(buffer, start, target)
		cache3.set(target, result)
		return result
		}
		},
		function seekPath(buffer, start, target, targetStart) {
		targetStart = targetStart \|\| 0
		const ary = decode(target, targetStart)
		if (!Array.isArray(ary)) throw new Error('path must be encoded array')
		for (let i = 0; i < ary.length; i++) {
		var string = ary[i]
		start = seekKey(buffer, start, string)
		if (start === -1) return -1
		}
		return start
		}

		seekPath(buffer, start, target, targetStart) {
		targetStart = targetStart \|\| 0
		const ary = decode(target, targetStart)
		if (!Array.isArray(ary)) throw new Error('path must be encoded array')
		for (let i = 0; i < ary.length; i++) {
		var string = ary[i]
		start = seekKey(buffer, start, string)
		if (start === -1) return -1
		}
		return start
		},
		function createSeekPath(target) {
		return new Function('seekKey', createSeekPathSrc(target))(seekKey)
		}

		module.exports = {
		seekKey,
		seekKey2,
		seekKeyCached,
		seekPath,
		createSeekPathSrc,

		createSeekPath(target) {
		return new Function('seekKey', createSeekPathSrc(target))(seekKey)
		},
		createSeekPath,
		}

.github/workflows/node.js.yml

.prettierrc.yaml

test/compare.js

test/fixtures.js

test/generate-fixture.js

test/index.js

test/perf.js

bipf - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics