Comparing version 1.6.2 to 1.6.3
22
index.js
@@ -298,6 +298,7 @@ var varint = require('varint') | ||
var t_length = (t_tag >> TAG_SIZE) + varint.decode.bytes | ||
for (; c + t_length < len; ) { | ||
var b_tag = varint.decode(buffer, start + c) | ||
for (; c < len; ) { | ||
var key_tag = varint.decode(buffer, start + c) | ||
if ( | ||
b_tag === t_tag && | ||
key_tag === t_tag && | ||
buffer.compare( | ||
@@ -311,7 +312,12 @@ target, | ||
) | ||
return start + c + (b_tag >> TAG_SIZE) + varint.decode.bytes | ||
else { | ||
c += (b_tag >> TAG_SIZE) + varint.decode.bytes //key | ||
c += (varint.decode(buffer, start + c) >> TAG_SIZE) + varint.decode.bytes //value | ||
} | ||
return start + c + t_length | ||
c += varint.decode.bytes | ||
var key_len = key_tag >> TAG_SIZE | ||
c += key_len | ||
var value_tag = varint.decode(buffer, start + c) | ||
c += varint.decode.bytes | ||
var value_len = value_tag >> TAG_SIZE | ||
c += value_len | ||
} | ||
@@ -318,0 +324,0 @@ return -1 |
{ | ||
"name": "bipf", | ||
"description": "binary in-place format", | ||
"version": "1.6.2", | ||
"version": "1.6.3", | ||
"homepage": "https://github.com/ssbc/bipf", | ||
@@ -22,2 +22,9 @@ "repository": { | ||
}, | ||
"scripts": { | ||
"test": "tape test/index.js | tap-arc && tape test/compare.js | tap-arc", | ||
"coverage": "nyc --reporter=lcov npm test", | ||
"format-code": "prettier --write \"*.js\" \"test/*.js\"", | ||
"format-code-staged": "pretty-quick --staged --pattern \"*.js\" --pattern \"test/*.js\"", | ||
"benchmark": "node test/perf.js" | ||
}, | ||
"husky": { | ||
@@ -32,11 +39,3 @@ "hooks": { | ||
], | ||
"license": "MIT", | ||
"scripts": { | ||
"test": "tape test/index.js | tap-arc && tape test/compare.js | tap-arc", | ||
"coverage": "nyc --reporter=lcov npm test", | ||
"format-code": "prettier --write \"*.js\" \"test/*.js\"", | ||
"format-code-staged": "pretty-quick --staged --pattern \"*.js\" --pattern \"test/*.js\"", | ||
"benchmark": "node test/perf.js" | ||
}, | ||
"readme": "# bipf\n\nBinary In-Place Format. A binary format designed for in-place (without parsing) reads,\nwith schemaless json-like semantics.\n\n## motivation\n\n### in-place reads\n\nIn a database there are many cases where you need to read a bunch of records,\nfilter out most of it (if one or two fields do not match) and then immediately write\nwhats left to a network socket. With json, this means parsing possibly hundreds of thousands\nof json objects (which is suprisingly slow), and then reserializing whats left.\nAn inplace format doesn't actually require parsing as a whole at all. You only need to parse\nthe fields you actually read, and using length delimited fields instead of escapes, means\nyou do not have to look at every byte to parse a field.\n\n\n### length delimited collections\n\nUnfortunately, most binary json-like formats (such as msgpack and cbor) use element counts\non collections (objects and arrays, in json-land) this means to find the end of a collection,\nyou have to step past each item in it (including the fields in any object contained inside of it).\nHowever, if the collections are length delimited, meaning marked by the encoded byte length of the object,\nnot the number of items inside it, then it's easy to jump right to the end of the object in one go.\nFor this reason, databases (for example, mongodb, and couchdb) use length delimited collections.\n\n## format\n\nEvery type of field is encoded with a type tag and a length packed into a varint.\nThis means that short types have a one byte tag, and one byte value.\nThe type is stored in the lowest 3 bits, and the length the higher bits.\nSince a varint stores values up to 128 bits in a single byte, values less than 16 bytes\nlong have a one byte tag, and values up to 8k long have a two byte tag, values up to 1048576 bytes\nhave a 3 byte tag, and so on.\n\n```\n<tag: varint(encoding_length(value) << 3 | type)><value>\n```\nthe type indicates the encoding of the value.\nvalid types are:\n\n```\nSTRING : 0 // utf8 encoded string\nBUFFER : 1 // raw binary buffer\nINT : 2 // little endian 32 bit integer\nDOUBLE : 3 // little endian 64 bit float\nARRAY : 4 // array of any other value\nOBJECT : 5 // list of string: value pairs\nBOOLNULL: 6 // a boolean, or null.\nEXTENDED: 7 // custom type. specific type should be indicated by varint at start of buffer.\n```\n\nAll values must have a correct length field. This makes it possible\nto traverse all fields without looking at the values. Theirfor it is possible\nto quickly jump to any subvalue if you know it's path. If you are looking for\na particular string, you can also skip any with the wrong length!\nSince object and array fields also begin with a length, you can jump past\nthem if you know the do not contain the value you are looking for.\nThis means that seeking inside a more tree like object is more efficient\nthan seeking into a more list like object!\n## performance\n\nThis design is optimized for the performance of in-place\nreads. Encoding is expected to be slower because\nof the need to calculate the length of collections\nbefore encoding them. If encoding is within half as fast\nas a format intended for encoding perf, that is good.\nOf course, the intention with an in-place read system\nis that you encode _once_ and then never decode. Just\npass around the binary object, reading fields out when\nnecessary.\n\nBecause of the length encoding, the ability to update\nin-place is very limited (not recommended actualy)\nbut if you are building a system around immutable data,\nthat is not much of a problem. Although, since subobjects\nare fully valid as an encoded value, you can easily\ncopy a subobject into a new object, etc, without re-encoding.\n\n## benchmark\n\nI did a simple benchmark, where I encoded and decoded\nthis module's package.json file in various ways. Please\nnot that I am comparing the performance of code written\nin C with code written in javascript. If the javascript\nis within 10x the performance of the C then we are doing\nwell! (and a C implementation would likely close that gap)\n\nThe measurement is run 10k operations, then divide by number of\nms taken, higher number means more faster!\n\nbenchmark code is in `./test/perf.js`\n\n```\noperation, ops/ms\nbinary.encode 62.61740763932373\nJSON.stringify 325.7328990228013\nbinary.decode 83.40283569641367\nJSON.parse 242.13075060532688\nJSON.parse(buffer) 198.4126984126984\nJSON.stringify(JSON.parse()) 127.55102040816327\nbinary.seek(string) 500\nbinary.seek2(encoded) 1219.5121951219512\nbinary.seek(buffer) 1333.3333333333333\nbinary.seekPath(encoded) 558.659217877095\nbinary.seekPath(compiled) 1265.8227848101267\nbinary.compare() 1785.7142857142858\n```\n\nAs expected, `binary.encode` is much slower than `JSON.stringify`,\nbut it's only 6 times worse.\nBut the interesting comparison is `JSON.stringify(JSON.parse())`\nand `binary.seek(buffer)`. Often, in implementing\na database, you need to read something from disk,\nexamine one or two fields (to check if it matches a query)\nand then write it to network.\n\n(note: the `binary.seek` operation is fairly realistic,\nwe seek to the \"dependencies\" object, then look up \"varint\"\ninside of that, then decode the version range of \"varint\".\nSo it's two comparisons and decoding a string out)\n\nSo, in JSON land, that usually means reading it,\nparsing it, checking it, stringifying it again.\nThis involves reading each byte in the input and\nallocating memory for the parsed object.\nThen traversing that object in memory and writing\nsomething to a string (more memory allocation,\nand all this memory allocation\nmeans the garbage collector needs to handle it too)\n\nBut if we have in-place reads, we just read raw binary,\nseek into the appropiate places to check wether it's\nthe objects we want, and then write it to the network\ndirectly. We don't allocate _any_ new memory after\nreading it.\n\nFurther benchmarks and tests are necessary, but that\nit can be this fast using a _javascript implementation_\nis impressive.\n\n## cannonicisity\n\nFor a system with signatures, it's highly important\nthat data is _cannonical_. There should be exactly\none way to encode a given data structure. There are\na few edge cases here that need to be checked for.\n(not implemented yet)\n\n* varints must not be zero padded\n* chrome and firefox preserve order of object keys,\n but any integer keys greater than zero come first,\n and are in increasing order.\n* the length of subfields *must* be checked to not excede their container's length. (This is a security issue)\n\nThese properties can all be checked by traversing the\ntags but without reading the keys or values.\nI will not consider this module _ready_ until there\nare tests that cover these invalid cases, to ensure\nthat implementations throw an error.\n\n## api\n\n`encode, decode, encodingLength` follow the interface\nspecified by [`abstract-encoding`](https://github.com/mafintosh/abstract-encoding)\n\n### encode (value, buffer, start) => length\n\nwrite `value` to `buffer` from start.\nreturns the number of bytes used.\n\n### decode (buffer, start) => value\n\nread the next value from `buffer` at `start`.\nreturns the value, and sets `decode.bytes` to number\nof bytes used.\n\n### encodingLength (value) => length\n\nreturns the length needed to encode `value`\n\n### getValueType (value) => type\n\nreturns the type tag that will be used to encode this type.\n\n### getEncodedType (buffer, start) => type\n\nget the `type` tag at `start`\n\n### types.{string,buffer,int,double,array,object,boolnull,reserved}\n\nan object containing the type tags.\n\n### iterate (buffer, start, fn) => void\n\nIf the field at `start` is an object or array, then `iterate` will call the `fn`\nwith arguments `fn(buffer, pointer, key)` for each subfield. If the field at\n`start` is not an array or object, this returns `-1`. You can stop/abort the\niteration by making `fn` return any truthy value.\n\n### seekKey (buffer, start, target) => pointer\n\nseek for a key `target` within an object. If\n`getEncodedType(buffer, start) !== types.object` then will return\n`-1`. Otherwise, seekKey will iterate over the encoding object\nand return a pointer to where it starts.\n\nSince this defines a recursive encoding, a pointer\nto any valid sub-encoding is a valid start value.\n\n``` js\nvar obj = {\n foo: 1,\n bar: true,\n baz: 'hello'\n}\n//allocate a correctly sized buffer\nvar length = b.encodingLength(obj)\nvar buffer = Buffer.alloc(length)\n\n//encode object to buffer\nb.encode(obj, buffer, 0)\n\n//parse entire object and read a single value\nconsole.log(b.decode(buffer, 0).baz)\n\n//seek and decode a single value\nconsole.log(b.decode(buffer, b.seekKey(buffer, 0, 'baz')))\n```\n\nsee performance section for discussion on the performance\nof seek - if it's only needed to parse a couple of elements,\nit can be significantly faster than parsing.\n\n### seekKeyCached (buffer, start, target) => pointer\n\nSame as `seekKey`, but uses a cache to avoid re-seeking the pointers if the\nsame arguments have been provided in the past. However, `target` must be a\nstring, not a buffer.\n\n### seekPath (buffer, start, array_of_buffers) => pointer\n\nThe same as `seekKey`, except for a recursive path.\n`path` should be an array of node buffers, just holding the key values,\nnot encoded as `bipf`.\n\n### createSeekPath(path) => seekPath(buffer, start)\n\ncompiles a javascript function that does a seekPath.\nthis is significantly faster than iterating over a javascript\narray and then looking for each thing, because it will get optimized\nby the js engine's jit compiler.\n\n\n## License\n\nMIT\n\n\n" | ||
} | ||
"license": "MIT" | ||
} |
@@ -236,2 +236,7 @@ # bipf | ||
### seekKey2 (buffer, start, target, target_start) => pointer | ||
Same as `seekKey`, except `target` must be an encoded value. This is | ||
usually done using `allocAndEncode`. This is a bit faster. | ||
### seekKeyCached (buffer, start, target) => pointer | ||
@@ -238,0 +243,0 @@ |
@@ -161,2 +161,12 @@ const tape = require('tape') | ||
tape('seekKey2() on an object', (t) => { | ||
const objEncoded = bipf.allocAndEncode({ x: 10, y: 20 }) | ||
const key = bipf.allocAndEncode('y') | ||
const pointer = bipf.seekKey2(objEncoded, 0, key, 0) | ||
t.equals(pointer, 10) | ||
const twenty = bipf.decode(objEncoded, pointer) | ||
t.equals(twenty, 20) | ||
t.end() | ||
}) | ||
tape('seekKeyCached() on an object with buffer target', (t) => { | ||
@@ -163,0 +173,0 @@ const objEncoded = bipf.allocAndEncode({ x: 10, y: 20 }) |
@@ -127,2 +127,12 @@ var BIPF = require('../') | ||
// --- | ||
start = Date.now() | ||
var dependencies = Buffer.from('dependencies') | ||
var varint = Buffer.from('varint') | ||
for (var i = 0; i < N; i++) { | ||
BIPF.decode(b, BIPF.seekKey(b, BIPF.seekKey(b, 0, dependencies), varint)) | ||
} | ||
console.log('BIPF.seek(buffer)', N / (Date.now() - start)) | ||
var _varint = encode('varint'), | ||
@@ -138,15 +148,11 @@ _dependencies = encode('dependencies') | ||
console.log('BIPF.seek2(encoded)', N / (Date.now() - start)) | ||
// --- | ||
start = Date.now() | ||
var dependencies = Buffer.from('dependencies') | ||
var varint = Buffer.from('varint') | ||
for (var i = 0; i < N; i++) { | ||
var c, d | ||
BIPF.decode( | ||
b, | ||
(d = BIPF.seekKey(b, (c = BIPF.seekKey(b, 0, dependencies)), varint)) | ||
BIPF.seekKey2(b, BIPF.seekKey2(b, 0, _dependencies, 0), _varint, 0) | ||
) | ||
} | ||
console.log('BIPF.seek(buffer)', N / (Date.now() - start)) | ||
console.log('BIPF.seek2(encoded) second run', N / (Date.now() - start)) | ||
// --- | ||
@@ -153,0 +159,0 @@ |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
1272
266
51358