bitsyntax
Advanced tools
Comparing version 0.0.1 to 0.0.2
module.exports.parse = require('./lib/parse').parse; | ||
module.exports.match = require('./lib/interp').match; | ||
module.exports.compile = require('./lib/compile').compile; | ||
module.exports.compile = require('./lib/compile').compile; | ||
module.exports.construct = require('./lib/constructor').construct; | ||
module.exports.write = require('./lib/constructor').write; | ||
module.exports.constructor = require('./lib/constructor').constructor; |
@@ -1,5 +0,4 @@ | ||
// Compile patterns to recognisers, and groups of patterns to dispatch | ||
// procedures. | ||
// Compile patterns to recognisers | ||
//var gen = require('uglify-js').uglify.gen_code; | ||
require('buffer-more-ints'); | ||
@@ -45,2 +44,11 @@ var parse = require('./parse').parse; | ||
function get_string(segment) { | ||
var expr = "byteoffset = offset / 8;\n"; | ||
var strlen = segment.value.length; | ||
var strlenbits = strlen * 8; | ||
expr += "offset += " + strlenbits + ";\n"; | ||
return expr + "if (offset > binsize) { return false; }\n" + | ||
"else { result = bin.toString(byteoffset, byteoffset + " + strlen + "); }\n"; | ||
} | ||
function skip_bits(segment) { | ||
@@ -80,2 +88,5 @@ if (typeof segment.size === 'string') { | ||
break; | ||
case 'string': | ||
assign_result = get_string(segment); | ||
break; | ||
} | ||
@@ -146,4 +157,5 @@ var handle_result = "if (result === false) { return false; }\n"; | ||
module.exports.compile = function(str) { | ||
str = (arguments.length > 1) ? [].join.call(arguments, ',') : str; | ||
var p = parse(str); | ||
return compile_pattern(p); | ||
}; |
@@ -0,1 +1,2 @@ | ||
// -*- js-indent: 2 -*- | ||
// Interpreter for bit syntax AST. | ||
@@ -5,3 +6,3 @@ // Grammar: | ||
// pattern := segment ("," segment)* | ||
// segment := (value | var) (":" size)? ("/" specifier ("-" specifier)*)? | ||
// segment := (value | var) (":" size)? ("/" specifier ("-" specifier)*)? | string | ||
// var := "_" | identifier | ||
@@ -15,4 +16,2 @@ // size := integer | var | ||
// | ||
// TODO default specifiers, restrictions on specifiers, tail match, | ||
// free variables. | ||
@@ -22,3 +21,3 @@ // We'll use an object to represent each segment, and an array of | ||
// patterns; we'll just step through each to see if it works. We rely | ||
// a prior step to validate that it's a valid pattern. | ||
// a hypothetical prior step to check that it's a valid pattern. | ||
@@ -29,3 +28,3 @@ // ? compile to intermediate instructions ? | ||
// { | ||
// type: string, | ||
// type: string, // 'string' is special case | ||
// size: integer | true, // true means 'all remaining' | ||
@@ -39,2 +38,3 @@ // name: string | null, // (may be '_') | ||
require('buffer-more-ints'); | ||
@@ -44,59 +44,15 @@ var debug = (process.env.DEBUG) ? | ||
function base_parse_int(binary, byteoffset, sizeinbits, bigendian, signed) { | ||
var size = sizeinbits / 8; | ||
var raw = 0, buffer = binary.slice(byteoffset); | ||
function parse_int(bin, off, sizeinbits, bigendian, signed) { | ||
var sizeInBytes = sizeinbits / 8; | ||
if (bigendian) { | ||
var s = size - 1; | ||
for (var i = 0; i < size; i++) { | ||
raw += (buffer[i] << ((s - i) * 8)); | ||
} | ||
return (signed) ? bin.readIntBE(sizeInBytes, off) : bin.readUIntBE(sizeInBytes, off); | ||
} | ||
else { | ||
for (var i = 0; i < size; i++) { | ||
raw += (buffer[i] << (i * 8)); | ||
} while (++i < size); | ||
return (signed) ? bin.readIntLE(sizeInBytes, off) : bin.readUIntLE(sizeInBytes, off); | ||
} | ||
if (signed) { | ||
var msb = Math.pow(2, sizeinbits - 1); | ||
if (raw > msb) { | ||
raw = -((msb * 2) - raw); | ||
} | ||
} | ||
return raw; | ||
} | ||
function node0_6_parse_int(bin, off, sizeinbits, bigendian, signed) { | ||
function parse_float(bin, off, sizeinbits, bigendian) { | ||
switch (sizeinbits) { | ||
case 8: | ||
return (signed) ? | ||
bin.readInt8(off) : | ||
bin.readUInt8(off); | ||
case 16: | ||
if (bigendian) { | ||
return (signed) ? bin.readInt16BE(off) : bin.readUInt16BE(off); | ||
} | ||
else { | ||
return (signed) ? bin.readInt16LE(off) : bin.readUInt16LE(off); | ||
} | ||
case 32: | ||
if (bigendian) { | ||
return (signed) ? bin.readInt32BE(off) : bin.readUInt32BE(off); | ||
} | ||
else { | ||
return (signed) ? bin.readInt32LE(off) : bin.readUInt32LE(off); | ||
} | ||
default: | ||
return base_parse_int(bin, off, sizeinbits, bigendian, signed); | ||
} | ||
} | ||
var parse_int = (Buffer.prototype.readInt8) ? | ||
node0_6_parse_int : base_parse_int; | ||
if (process.env.DEBUG || process.env.TEST) { | ||
module.exports.parse_int = parse_int; | ||
} | ||
function node0_6_parse_float(bin, off, sizeinbits, bigendian) { | ||
switch (sizeinbits) { | ||
case 32: | ||
return (bigendian) ? bin.readFloatBE(off) : bin.readFloatLE(off); | ||
@@ -110,43 +66,2 @@ case 64: | ||
function jspack_parse_float(bin0, off, sizeinbits, bigendian) { | ||
var bytes; | ||
var format; | ||
var bin = bin0.slice(off); | ||
switch (sizeinbits) { | ||
case 32: | ||
bytes = new Array(4); | ||
bytes[0] = bin[0]; | ||
bytes[1] = bin[1]; | ||
bytes[2] = bin[2]; | ||
bytes[3] = bin[3]; | ||
format = 'f'; | ||
break; | ||
case 64: | ||
bytes = new Array(8); | ||
bytes[0] = bin[0]; | ||
bytes[1] = bin[1]; | ||
bytes[2] = bin[2]; | ||
bytes[3] = bin[3]; | ||
bytes[4] = bin[4]; | ||
bytes[5] = bin[5]; | ||
bytes[6] = bin[6]; | ||
bytes[7] = bin[7]; | ||
format = 'd'; | ||
break; | ||
default: | ||
throw "Floats must be 32- or 64-bit"; | ||
} | ||
format = ((bigendian) ? '>' : '<') + format; | ||
return require('jspack').jspack.Unpack(format, bytes, 0); | ||
} | ||
// It probably makes little odds in speed whether we use jspack or | ||
// in-built functions; however, if I can avoid a dependency that's | ||
// good, and it may be that the in-built functions are reimplemented | ||
// in C in the future. | ||
var parse_float = (Buffer.prototype.readFloatBE) ? | ||
node0_6_parse_float : jspack_parse_float; | ||
function size_of(segment, bound) { | ||
@@ -255,2 +170,15 @@ var size = segment.size; | ||
function get_string(segment) { | ||
debug("get_string"); debug(segment); | ||
var len = segment.value.length; | ||
var byteoffset = offset / 8; | ||
offset += len * 8; | ||
if (offset > binsize) { | ||
return false; | ||
} | ||
// FIXME bytes vs UTF8 characters | ||
return binary.slice(byteoffset, byteoffset + len).toString('utf8'); | ||
} | ||
var patternlen = pattern.length; | ||
@@ -265,2 +193,5 @@ for (var i = 0; i < patternlen; i++) { | ||
switch (segment.type) { | ||
case 'string': | ||
result = get_string(segment); | ||
break; | ||
case 'integer': | ||
@@ -267,0 +198,0 @@ result = get_integer(segment); |
// Parse patterns in string form into the form we use for interpreting | ||
// (and later, for compiling). | ||
var ast = require('./pattern'); | ||
var peg = require('pegjs'), | ||
ast = require('./pattern'), | ||
path = require('path'); | ||
function compose() { | ||
var funcs = [].slice.call(arguments); | ||
return function(elem) { | ||
var result = elem; | ||
for (var i in funcs) { | ||
result = funcs[i](result); | ||
var grammar = require('fs').readFileSync( | ||
path.join(path.dirname(module.filename), 'grammar.pegjs')).toString(); | ||
var parser = peg.buildParser(grammar); | ||
function parse_pattern(string) { | ||
var segments = parser.parse(string); | ||
for (var i=0, len = segments.length; i < len; i++) { | ||
var s = segments[i]; | ||
if (s.string != undefined) { | ||
segments[i] = ast.string(s.string); | ||
} | ||
return result; | ||
else if (s.value != undefined) { | ||
segments[i] = ast.value(s.value, s.size, s.specifiers); | ||
} | ||
else if (s.name != undefined) { | ||
segments[i] = ast.variable(s.name, s.size, s.specifiers); | ||
} | ||
else { | ||
throw "Unknown segment " + s; | ||
} | ||
} | ||
return segments; | ||
} | ||
function map(array0, func) { | ||
var array = array0.slice(); | ||
for (var i in array.slice()) { | ||
array[i] = func(array[i]); | ||
} | ||
return array; | ||
} | ||
function parse_pattern(string) { | ||
return map( | ||
string.split(','), | ||
compose( | ||
function(s) { return s.replace(/\s/g, ''); }, | ||
parse_segment)); | ||
} | ||
module.exports.parse = parse_pattern; | ||
// From | ||
// http://stackoverflow.com/questions/18082/validate-numbers-in-javascript-isnumeric | ||
function isNumber(n) { | ||
return !isNaN(parseFloat(n)) && isFinite(n); | ||
} | ||
var PARTS = /^([_a-zA-Z0-9\.]*)(?:\:([a-zA-Z_0-9]+))?(?:\/([a-z0-9:-]*))?$/; | ||
function parse_segment(string) { | ||
var parts = PARTS.exec(string); | ||
var nameOrValue = parts[1]; | ||
var size = parts[2]; | ||
var specifiers = (parts[3] || '').split('-'); | ||
if (size !== undefined && isNumber(size)) { | ||
size = parseInt(size); | ||
} | ||
return ((isNumber(nameOrValue)) ? | ||
ast.value : | ||
ast.variable) (nameOrValue, size, specifiers); | ||
} | ||
module.exports.parse = function(str) { | ||
str = (arguments.length > 1) ? [].join.call(arguments, ',') : str; | ||
return parse_pattern(str); | ||
}; |
@@ -0,1 +1,2 @@ | ||
// -*- js-indent-level: 2 -*- | ||
// Constructing patterns | ||
@@ -29,2 +30,3 @@ | ||
// "206". `specifiers0` is an array. | ||
function value(val, size, specifiers0) { | ||
@@ -42,2 +44,9 @@ var specifiers = set(specifiers0); | ||
// A string can appear as a literal, but it must appear without | ||
// specifiers. | ||
function string(val) { | ||
return {value: val, type: 'string'}; | ||
} | ||
module.exports.string = string; | ||
var TYPES = {'integer': 1, 'binary': 1, 'float': 1}; | ||
@@ -94,3 +103,3 @@ function type_in(specifiers) { | ||
function size_of(segment, type, size, unit) { | ||
if (size !== undefined) { | ||
if (size !== undefined && size !== '') { | ||
return size; | ||
@@ -97,0 +106,0 @@ } |
{ | ||
"author": "Michael Bridgen <mikeb@squaremobius.net>", | ||
"author": { | ||
"name": "Michael Bridgen", | ||
"email": "<mikeb@squaremobius.net>" | ||
}, | ||
"name": "bitsyntax", | ||
"description": "Pattern-matching on byte buffers", | ||
"version": "0.0.1", | ||
"version": "0.0.2", | ||
"repository": { | ||
@@ -15,6 +18,9 @@ "type": "git", | ||
"engines": { | ||
"node": ">0.4" | ||
"node": ">=0.6" | ||
}, | ||
"dependencies": {}, | ||
"dependencies": { | ||
"pegjs": "~0.7", | ||
"buffer-more-ints": "" | ||
}, | ||
"devDependencies": {} | ||
} |
# Byte-wise matching for Node.JS | ||
Gives a compact syntax for parsing binary data, derived from [Erlang's | ||
bit syntax](http://www.erlang.org/doc/programming_examples/bit_syntax.html#id64858). | ||
Gives a compact syntax for parsing and constructing byte buffers, | ||
derived from [Erlang's bit | ||
syntax](http://www.erlang.org/doc/programming_examples/bit_syntax.html#id64858). | ||
@@ -27,2 +28,16 @@ var bitsyntax = require('bitsyntax'); | ||
Patterns can also be used to construct binaries from supplied values: | ||
var spdyDataFrame = require('bitsyntax') | ||
.constructor('streamId:32, flags:8, length:24, data/binary'); | ||
spdyDataFrame({streamId:5, flags:0, length:bin.length, data:bin}); | ||
One or more segments of a pattern may also be supplied in multiple | ||
arguments, if that is more convenient; this makes it easier to split a | ||
long pattern over lines: | ||
var p = bitsyntax.compile('size:8, payload:size/binary', | ||
'rest/binary'); | ||
## API | ||
@@ -32,4 +47,4 @@ | ||
Compiles a pattern given as a string to a function that will return | ||
either a map of bindings, or `false`, given a buffer and optionally an | ||
Compiles a pattern to a function that will return either a map of | ||
bindings, or `false`, given a buffer and optionally an | ||
environment. The environment contains values for the bound variables | ||
@@ -45,6 +60,12 @@ in the pattern (if there are any). | ||
In combination, equivalent to compile; may be useful if you want to | ||
In combination, equivalent to `compile`; may be useful if you want to | ||
examine the internal structure of patterns. | ||
var p = bitsyntax.parse('header:headerSize/binary, rest/binary'); | ||
`parse` takes strings as for `compile`, and returns the internal | ||
representation of the pattern. `match` takes this representation, a | ||
buffer, and optionally an environment, and returns the bindings or | ||
false (as with `compile`). | ||
var p = bitsyntax.parse('header:headerSize/binary', | ||
'rest/binary'); | ||
var b = bitsyntax.match(p, new Buffer([1, 2, 3, 4, 5]), | ||
@@ -55,2 +76,14 @@ {headerSize: 3}); | ||
### `constructor` | ||
Takes a pattern and returns a function that will construct a byte | ||
buffer, given values for the variables mentioned in the pattern. | ||
var cons = bitsyntax.constructor('size:8, bin/binary'); | ||
cons({size:6, bin:newBuffer('foobar')}); | ||
// => <Buffer 06 66 6f 6f 62 61 72> | ||
Patterns supplied to constructors are slightly different to patterns | ||
supplied for matching, as noted below. | ||
## Patterns | ||
@@ -83,4 +116,9 @@ | ||
In constructors, the literal value will be copied into the result | ||
binary according to the type it is given. A variable name indicates a | ||
space into which a value supplied to the constructor will be copied. | ||
The special variable name `_` discards the value matched; i.e., it | ||
simply skips over the appropriate number of bits in the input. | ||
simply skips over the appropriate number of bits in the input. '_' is | ||
not allowed in constructors. | ||
@@ -116,6 +154,11 @@ ### Size and unit | ||
In constructors, numbers will be rounded, masked or padded to fit the | ||
size and units given; for example, `'256:8'` gives the binary | ||
`Buffer<00>` because the lowest eight bits are 0; `'255:16` gives the | ||
binary `Buffer<00 ff>`. | ||
### Type name specifier | ||
One of `integer`, `binary`, `float`. If not given, the default is | ||
`integer`. | ||
One of `integer`, `binary`, `string`, `float`. If not given, the | ||
default is `integer`. | ||
@@ -131,2 +174,4 @@ An integer is a big- or little-endian, signed or unsigned | ||
A string is a UTF8 string consisting of the given number of bytes. | ||
A float is a 32- or 64-bit IEEE754 floating-point value (this is the | ||
@@ -143,5 +188,5 @@ standard JavaScript uses, as do Java and Erlang). | ||
A specifier of `big` means the integer will be parsed as big-endian, | ||
and `little` means the integer will be parsed as little-endian. The | ||
default is big-endian. | ||
A specifier of `big` means the integer will be parsed (or written into | ||
the result) as big-endian, and `little` means the integer will be | ||
parsed or written as little-endian. The default is big-endian. | ||
@@ -154,2 +199,17 @@ ### Signedness specifier | ||
Signedness is ignored in constructors. | ||
### Literal strings | ||
A quoted string appearing in a pattern is a shorthand for the bytes in | ||
its UTF8 encoding. For example, | ||
"foobar", _/binary | ||
matches any buffer that starts with the bytes `0x66, 0x6f, 0x6f, 0x62, | ||
0x61, 0x72`. | ||
When used in a constructor, a quoted string is copied verbatim into | ||
the result. | ||
## Examples | ||
@@ -156,0 +216,0 @@ |
@@ -1,4 +0,4 @@ | ||
var match = require('../lib/interp').match; | ||
var parse = require('../lib/parse').parse; | ||
var compile = require('../lib/compile').compile; | ||
var match = require('../').match; | ||
var parse = require('../').parse; | ||
var compile = require('../').compile; | ||
var assert = require('assert'); | ||
@@ -34,5 +34,16 @@ | ||
[[[245, 23, 97, 129], -2124343307]]], | ||
['n:4/signed-little-unit:8', | ||
[[[245, 23, 97, 129], -2124343307]]] | ||
[[[245, 23, 97, 129], -2124343307]]], | ||
['n:64', | ||
[[[1,2,3,4,5,6,7,8], 72623859790382856]]], | ||
['n:64/signed', | ||
[[[255,2,3,4,5,6,7,8], -71491328285473016]]], | ||
['n:64/little', | ||
[[[1,2,3,4,5,6,7,8], 578437695752307201]]], | ||
['n:64/little-signed', | ||
[[[1,2,3,4,5,6,7,255], -70080650589044223]]], | ||
['n:8/signed-unit:8-little', | ||
[[[1,2,3,4,5,6,7,255], -70080650589044223]]], | ||
]; | ||
@@ -76,3 +87,3 @@ | ||
[[0, 0, 0, 0, 0, 0, 0, 0], 0.0]]], | ||
['n:4/float-unit:8', | ||
@@ -134,1 +145,46 @@ [[[64,73,15,219], Math.PI], | ||
}); | ||
var VAR_TESTS = [ | ||
['size, n:size', | ||
[[[8, 5], 5], | ||
[[32, 0, 0, 0, 167], 167]]], | ||
['size, n:size/binary', | ||
[[[2, 5, 6], new Buffer([5, 6])]]], | ||
['a, b:a, n:b', | ||
[[[8, 32, 0, 0, 2, 100], 612]]] | ||
]; | ||
suite("Environment", | ||
function() { | ||
VAR_TESTS.forEach(function(p) { | ||
var pattern = parse(p[0]); | ||
var cpattern = compile(p[0]); | ||
p[1].forEach(function(tc) { | ||
test(p[0], function() { | ||
assert.deepEqual(tc[1], match(pattern, new Buffer(tc[0])).n); | ||
}); | ||
test(p[0], function() { | ||
assert.deepEqual(tc[1], cpattern(new Buffer(tc[0])).n); | ||
}); | ||
}); | ||
}); | ||
}); | ||
STRING_TESTS = [ | ||
['"foobar", n:8', "foobarA", 'A'.charCodeAt(0)], | ||
['n:8, "foobar", _/binary', "CfoobarGARBAGE", 'C'.charCodeAt(0)], | ||
['"foo, :-bar\\"", n:8, "another"', 'foo, :-bar"Zanother', 'Z'.charCodeAt(0)] | ||
]; | ||
suite("String", | ||
function() { | ||
STRING_TESTS.forEach(function(p) { | ||
var pattern = parse(p[0]); | ||
test(p[0], function() { | ||
var res = match(pattern, new Buffer(p[1])); | ||
assert.equal(res.n, p[2]); | ||
}); | ||
}); | ||
}); |
Bad dependency semver
QualityPackage has dependencies with an invalid semantic version. This could be a sign of beta, low quality, or unmaintained dependencies.
Found 1 instance in 1 package
Filesystem access
Supply chain riskAccesses the file system, and could potentially read sensitive data.
Found 1 instance in 1 package
Environment variable access
Supply chain riskPackage accesses environment variables, which may be a sign of credential stuffing or data theft.
Found 1 instance in 1 package
34212
13
788
245
2
2
2
+ Addedbuffer-more-ints@
+ Addedpegjs@~0.7
+ Addedpegjs@0.7.0(transitive)