kiwi-schema
Advanced tools
Comparing version 0.0.14 to 0.0.15
@@ -16,2 +16,3 @@ #!/usr/bin/env node | ||
' --cpp [PATH] Generate C++ code.', | ||
' --skew [PATH] Generate Skew code.', | ||
' --root-type [NAME] Set the root type for JSON.', | ||
@@ -28,2 +29,3 @@ ' --to-json [PATH] Convert a binary file to JSON.', | ||
'--cpp': null, | ||
'--skew': null, | ||
'--root-type': null, | ||
@@ -92,2 +94,7 @@ '--to-json': null, | ||
// Generate Skew code | ||
if (flags['--skew'] !== null) { | ||
fs.writeFileSync(flags['--skew'], kiwi.compileSchemaSkew(content)); | ||
} | ||
// Convert a binary file to JSON | ||
@@ -94,0 +101,0 @@ if (flags['--to-json'] !== null) { |
411
kiwi.js
@@ -27,3 +27,3 @@ var kiwi = exports || kiwi || {}, exports; | ||
this._index = 0; | ||
this.length = 0; | ||
this.length = data ? data.length : 0; | ||
} | ||
@@ -35,9 +35,2 @@ | ||
ByteBuffer.prototype.seekTo = function(index) { | ||
if (index > this._data.length) { | ||
throw new Error('Index out of bounds'); | ||
} | ||
this._index = index; | ||
}; | ||
ByteBuffer.prototype.readByte = function() { | ||
@@ -502,3 +495,3 @@ if (this._index + 1 > this._data.length) { | ||
function compileDecodeJS(definition, definitions) { | ||
function compileDecode(definition, definitions) { | ||
var lines = []; | ||
@@ -614,3 +607,3 @@ var indent = ' '; | ||
function compileEncodeJS(definition, definitions) { | ||
function compileEncode(definition, definitions) { | ||
var lines = []; | ||
@@ -754,5 +747,5 @@ | ||
js.push(''); | ||
js.push(name + '[' + quote('decode' + definition.name) + '] = ' + compileDecodeJS(definition, definitions) + ';'); | ||
js.push(name + '[' + quote('decode' + definition.name) + '] = ' + compileDecode(definition, definitions) + ';'); | ||
js.push(''); | ||
js.push(name + '[' + quote('encode' + definition.name) + '] = ' + compileEncodeJS(definition, definitions) + ';'); | ||
js.push(name + '[' + quote('encode' + definition.name) + '] = ' + compileEncode(definition, definitions) + ';'); | ||
break; | ||
@@ -1234,2 +1227,396 @@ } | ||
// Skew Compiler | ||
(function() { | ||
var ByteBuffer = kiwi.ByteBuffer; | ||
function popTrailingNewline(lines) { | ||
if (lines[lines.length - 1] === '') { | ||
lines.pop(); | ||
} | ||
} | ||
function skewDefaultValueForField(definitions, field) { | ||
if (field.isArray) { | ||
return 'null'; | ||
} | ||
switch (field.type) { | ||
case 'bool': return 'false'; | ||
case 'byte': | ||
case 'int': | ||
case 'uint': return '0'; | ||
case 'float': return '0.0'; | ||
case 'string': return 'null'; | ||
} | ||
var def = definitions[field.type]; | ||
if (def.kind === 'ENUM') { | ||
if (def.fields.length > 0) { | ||
return '.' + def.fields[0].name; | ||
} | ||
return '0 as ' + field.type; | ||
} | ||
return 'null'; | ||
} | ||
function skewTypeForField(field) { | ||
var type; | ||
switch (field.type) { | ||
case 'bool': type = 'bool'; break; | ||
case 'byte': | ||
case 'int': | ||
case 'uint': type = 'int'; break; | ||
case 'float': type = 'double'; break; | ||
case 'string': type = 'string'; break; | ||
default: type = field.type; break; | ||
} | ||
if (field.isArray) { | ||
type = 'List<' + type + '>'; | ||
} | ||
return type; | ||
} | ||
function compileSchemaSkew(schema) { | ||
if (typeof schema === 'string') { | ||
schema = kiwi.parseSchema(schema); | ||
} | ||
var definitions = {}; | ||
var indent = ''; | ||
var lines = []; | ||
if (schema.package !== null) { | ||
lines.push('namespace ' + schema.package + ' {'); | ||
indent += ' '; | ||
} | ||
for (var i = 0; i < schema.definitions.length; i++) { | ||
var definition = schema.definitions[i]; | ||
definitions[definition.name] = definition; | ||
} | ||
for (var i = 0; i < schema.definitions.length; i++) { | ||
var definition = schema.definitions[i]; | ||
switch (definition.kind) { | ||
case 'ENUM': { | ||
var encode = {}; | ||
var decode = {}; | ||
lines.push(indent + 'enum ' + definition.name + ' {'); | ||
for (var j = 0; j < definition.fields.length; j++) { | ||
var field = definition.fields[j]; | ||
encode[field.name] = field.value; | ||
decode[field.value] = field.name; | ||
lines.push(indent + ' ' + field.name); | ||
} | ||
lines.push(indent + '}'); | ||
lines.push(''); | ||
lines.push(indent + 'namespace ' + definition.name + ' {'); | ||
lines.push(indent + ' const _encode = ' + JSON.stringify(encode, null, 2).replace(/"/g, '').replace(/\n/g, '\n ' + indent)); | ||
lines.push(''); | ||
lines.push(indent + ' const _decode = ' + JSON.stringify(decode, null, 2).replace(/"/g, '').replace(/\n/g, '\n ' + indent)); | ||
lines.push(''); | ||
lines.push(indent + ' def encode(value ' + definition.name + ') int {'); | ||
lines.push(indent + ' return _encode[value]'); | ||
lines.push(indent + ' }'); | ||
lines.push(''); | ||
lines.push(indent + ' def decode(value int) ' + definition.name + ' {'); | ||
lines.push(indent + ' if !(value in _decode) {'); | ||
lines.push(indent + ' Kiwi.DecodeError.throwInvalidEnumValue(' + quote(definition.name) + ')'); | ||
lines.push(indent + ' }'); | ||
lines.push(indent + ' return _decode[value]'); | ||
lines.push(indent + ' }'); | ||
lines.push(indent + '}'); | ||
lines.push(''); | ||
break; | ||
} | ||
case 'STRUCT': | ||
case 'MESSAGE': { | ||
lines.push(indent + 'class ' + definition.name + ' {'); | ||
for (var j = 0; j < definition.fields.length; j += 32) { | ||
lines.push(indent + ' var _flags' + (j >> 5) + ' = 0'); | ||
} | ||
for (var j = 0; j < definition.fields.length; j++) { | ||
var field = definition.fields[j]; | ||
lines.push(indent + ' var _' + field.name + ' ' + skewTypeForField(field) + ' = ' + skewDefaultValueForField(definitions, field)); | ||
} | ||
lines.push(''); | ||
for (var j = 0; j < definition.fields.length; j++) { | ||
var field = definition.fields[j]; | ||
var type = skewTypeForField(field); | ||
var flags = '_flags' + (j >> 5); | ||
var mask = '' + (1 << (j % 31)); | ||
lines.push(indent + ' def has_' + field.name + ' bool {'); | ||
lines.push(indent + ' return (' + flags + ' & ' + mask + ') != 0'); | ||
lines.push(indent + ' }'); | ||
lines.push(''); | ||
lines.push(indent + ' def ' + field.name + ' ' + type + ' {'); | ||
lines.push(indent + ' assert(has_' + field.name + ')'); | ||
lines.push(indent + ' return _' + field.name); | ||
lines.push(indent + ' }'); | ||
lines.push(''); | ||
lines.push(indent + ' def ' + field.name + '=(value ' + type + ') {'); | ||
lines.push(indent + ' _' + field.name + ' = value'); | ||
lines.push(indent + ' ' + flags + ' |= ' + mask); | ||
lines.push(indent + ' }'); | ||
lines.push(''); | ||
} | ||
lines.push(indent + ' def encode(bb Kiwi.ByteBuffer) {'); | ||
for (var j = 0; j < definition.fields.length; j++) { | ||
var field = definition.fields[j]; | ||
var value = '_' + field.name; | ||
var code; | ||
if (field.isArray) { | ||
value = 'value'; | ||
} | ||
switch (field.type) { | ||
case 'bool': { | ||
code = 'bb.writeByte(' + value + ' as int)'; | ||
break; | ||
} | ||
case 'byte': { | ||
code = 'bb.writeByte(' + value + ')'; | ||
break; | ||
} | ||
case 'int': { | ||
code = 'bb.writeVarInt(' + value + ')'; | ||
break; | ||
} | ||
case 'uint': { | ||
code = 'bb.writeVarUint(' + value + ')'; | ||
break; | ||
} | ||
case 'float': { | ||
code = 'bb.writeVarFloat(' + value + ')'; | ||
break; | ||
} | ||
case 'string': { | ||
code = 'bb.writeString(' + value + ')'; | ||
break; | ||
} | ||
default: { | ||
var type = definitions[field.type]; | ||
if (!type) { | ||
throw new Error('Invalid type ' + quote(field.type) + ' for field ' + quote(field.name)); | ||
} else if (type.kind === 'ENUM') { | ||
code = 'bb.writeVarUint(' + type.name + '.encode(' + value + '))'; | ||
} else { | ||
code = value + '.encode(bb)'; | ||
} | ||
} | ||
} | ||
var nestedIndent = indent + ' '; | ||
if (field.isRequired) { | ||
lines.push(nestedIndent + 'assert(has_' + field.name + ')'); | ||
} else { | ||
lines.push(nestedIndent + 'if has_' + field.name + ' {'); | ||
nestedIndent += ' '; | ||
} | ||
if (definition.kind === 'MESSAGE') { | ||
lines.push(nestedIndent + 'bb.writeVarUint(' + field.value + ')'); | ||
} | ||
if (field.isArray) { | ||
lines.push(nestedIndent + 'bb.writeVarUint(_' + field.name + '.count)'); | ||
lines.push(nestedIndent + 'for value in _' + field.name + ' {'); | ||
lines.push(nestedIndent + ' ' + code); | ||
lines.push(nestedIndent + '}'); | ||
} else { | ||
lines.push(nestedIndent + code); | ||
} | ||
if (!field.isRequired) { | ||
lines.push(indent + ' }'); | ||
} | ||
lines.push(''); | ||
} | ||
if (definition.kind === 'MESSAGE') { | ||
lines.push(indent + ' bb.writeVarUint(0)'); | ||
} else { | ||
popTrailingNewline(lines); | ||
} | ||
lines.push(indent + ' }'); | ||
lines.push(''); | ||
lines.push(indent + ' def encode Uint8Array {'); | ||
lines.push(indent + ' var bb = Kiwi.ByteBuffer.new'); | ||
lines.push(indent + ' encode(bb)'); | ||
lines.push(indent + ' return bb.toUint8Array'); | ||
lines.push(indent + ' }'); | ||
lines.push(indent + '}'); | ||
lines.push(''); | ||
lines.push(indent + 'namespace ' + definition.name + ' {'); | ||
lines.push(indent + ' def decode(bytes Uint8Array) ' + definition.name + ' {'); | ||
lines.push(indent + ' return decode(Kiwi.ByteBuffer.new(bytes))'); | ||
lines.push(indent + ' }'); | ||
lines.push(''); | ||
lines.push(indent + ' def decode(bb Kiwi.ByteBuffer) ' + definition.name + ' {'); | ||
lines.push(indent + ' var self = new'); | ||
for (var j = 0; j < definition.fields.length; j++) { | ||
if (definition.fields[j].isArray) { | ||
lines.push(indent + ' var count = 0'); | ||
break; | ||
} | ||
} | ||
var nestedIndent = indent + ' '; | ||
if (definition.kind === 'MESSAGE') { | ||
lines.push(indent + ' while true {'); | ||
lines.push(indent + ' switch bb.readByte {'); | ||
lines.push(indent + ' case 0 {'); | ||
for (var j = 0; j < definition.fields.length; j++) { | ||
var field = definition.fields[j]; | ||
if (field.isRequired) { | ||
lines.push(indent + ' if !self.has_' + field.name + ' {'); | ||
lines.push(indent + ' Kiwi.DecodeError.throwMissingRequiredField(' + quote(field.name) + ')'); | ||
lines.push(indent + ' }'); | ||
} | ||
} | ||
lines.push(indent + ' break'); | ||
lines.push(indent + ' }'); | ||
lines.push(''); | ||
nestedIndent += ' '; | ||
} | ||
for (var j = 0; j < definition.fields.length; j++) { | ||
var field = definition.fields[j]; | ||
var code; | ||
switch (field.type) { | ||
case 'bool': { | ||
code = 'bb.readByte as bool'; | ||
break; | ||
} | ||
case 'byte': { | ||
code = 'bb.readByte'; | ||
break; | ||
} | ||
case 'int': { | ||
code = 'bb.readVarInt'; | ||
break; | ||
} | ||
case 'uint': { | ||
code = 'bb.readVarUint'; | ||
break; | ||
} | ||
case 'float': { | ||
code = 'bb.readVarFloat'; | ||
break; | ||
} | ||
case 'string': { | ||
code = 'bb.readString'; | ||
break; | ||
} | ||
default: { | ||
var type = definitions[field.type]; | ||
if (!type) { | ||
throw new Error('Invalid type ' + quote(field.type) + ' for field ' + quote(field.name)); | ||
} else if (type.kind === 'ENUM') { | ||
code = type.name + '.decode(bb.readVarUint)'; | ||
} else { | ||
code = type.name + '.decode(bb)'; | ||
} | ||
} | ||
} | ||
if (definition.kind === 'MESSAGE') { | ||
lines.push(nestedIndent + 'case ' + field.value + ' {'); | ||
} | ||
if (field.isArray) { | ||
lines.push(nestedIndent + ' count = bb.readVarUint'); | ||
lines.push(nestedIndent + ' self.' + field.name + ' = []'); | ||
lines.push(nestedIndent + ' for array = self._' + field.name + '; count != 0; count-- {'); | ||
lines.push(nestedIndent + ' array.append(' + code + ')'); | ||
lines.push(nestedIndent + ' }'); | ||
} else { | ||
lines.push(nestedIndent + ' self.' + field.name + ' = ' + code); | ||
} | ||
if (definition.kind === 'MESSAGE') { | ||
lines.push(nestedIndent + '}'); | ||
lines.push(''); | ||
} | ||
} | ||
if (definition.kind === 'MESSAGE') { | ||
lines.push(indent + ' default {'); | ||
lines.push(indent + ' Kiwi.DecodeError.throwInvalidMessage'); | ||
lines.push(indent + ' }'); | ||
lines.push(indent + ' }'); | ||
lines.push(indent + ' }'); | ||
} | ||
lines.push(indent + ' return self'); | ||
lines.push(indent + ' }'); | ||
lines.push(indent + '}'); | ||
lines.push(''); | ||
break; | ||
} | ||
default: { | ||
error('Invalid definition kind ' + quote(definition.kind), definition.line, definition.column); | ||
break; | ||
} | ||
} | ||
} | ||
if (schema.package !== null) { | ||
popTrailingNewline(lines); | ||
lines.push('}'); | ||
} | ||
lines.push(''); | ||
return lines.join('\n'); | ||
} | ||
kiwi.compileSchemaSkew = compileSchemaSkew; | ||
}()); | ||
}()); |
{ | ||
"name": "kiwi-schema", | ||
"version": "0.0.14", | ||
"version": "0.0.15", | ||
"description": "", | ||
@@ -17,4 +17,5 @@ "main": "kiwi.js", | ||
"devDependencies": { | ||
"mocha": "2.4.5" | ||
"mocha": "2.4.5", | ||
"skew": "0.7.40" | ||
} | ||
} |
# Kiwi Message Format | ||
This is a binary encoding format inspired by Google's [Protocol Buffer](https://developers.google.com/protocol-buffers/) format. | ||
Kiwi is a schema-based binary format for efficiently encoding trees of data. | ||
It's inspired by Google's [Protocol Buffer](https://developers.google.com/protocol-buffers/) format but is simpler, has a more compact encoding, and has better support for optional fields. | ||
Goals: | ||
* **Efficient encoding of common values:** Variable-length encoding is used for numeric values where small values take up less space. | ||
* **Efficient encoding of compound objects:** The `struct` feature supports nested objects with zero encoding overhead. | ||
* **Presence of optional fields is detectable:** This is not possible with Protocol Buffers, especially for repeated fields. | ||
* **Linearly serializable:** Reading and writing are both single-scan operations so they are cache-efficient and have guaranteed time complexity. | ||
* **Backwards compatibility:** New versions of the schema can still read old data. | ||
* **Simple implementation:** The API is very minimal and the generated C++ code only depends on a single file. | ||
Non-goals: | ||
* **Forwards compatibility:** Old versions of the schema cannot read new data. | ||
* **Optimal bit-packing:** Compression can be used after encoding for more space savings if needed. | ||
## Native Types | ||
* *bool*: A value that stores either `true` or `false`. Will use 1 byte. | ||
* *byte*: An unsigned 8-bit integer value. Uses 1 byte, obviously. | ||
* *int*: A 32-bit integer value stored using a variable-length encoding optimized for storing numbers with a small magnitude. Will use at most 5 bytes. | ||
* *uint*: A 32-bit integer value stored using a variable-length encoding optimized for storing small non-negative numbers. Will use at most 5 bytes. | ||
* *float*: A 32-bit floating-point number. Normally uses 4 bytes but a value of zero uses 1 byte ([denormal numbers](https://en.wikipedia.org/wiki/Denormal_number) become zero when encoded). | ||
* *string*: A UTF-8 null-terminated string. Will use at least 1 byte. | ||
* *T[]*: Any type can be made into an array using the `[]` suffix. | ||
* **bool:** A value that stores either `true` or `false`. Will use 1 byte. | ||
* **byte:** An unsigned 8-bit integer value. Uses 1 byte, obviously. | ||
* **int:** A 32-bit integer value stored using a variable-length encoding optimized for storing numbers with a small magnitude. Will use at most 5 bytes. | ||
* **uint:** A 32-bit integer value stored using a variable-length encoding optimized for storing small non-negative numbers. Will use at most 5 bytes. | ||
* **float:** A 32-bit floating-point number. Normally uses 4 bytes but a value of zero uses 1 byte ([denormal numbers](https://en.wikipedia.org/wiki/Denormal_number) become zero when encoded). | ||
* **string:** A UTF-8 null-terminated string. Will use at least 1 byte. | ||
* **T[]:** Any type can be made into an array using the `[]` suffix. | ||
## User Types | ||
* *enum*: A `uint` with a restricted set of values that are identified by name. | ||
* *struct*: A compound value with a fixed set of fields that are always required and written out in order. | ||
* *message*: A compound value with optional fields. A field can be made required using the `required` keyword. | ||
* **enum:** A `uint` with a restricted set of values that are identified by name. New fields can be added to any message while maintaining backwards compatibility. | ||
* **struct:** A compound value with a fixed set of fields that are always required and written out in order. New fields cannot be added to a struct once that struct is in use. | ||
* **message:** A compound value with optional fields. A field can be made required using the `required` keyword. New fields can be added to any message while maintaining backwards compatibility. | ||
@@ -44,9 +59,2 @@ ## Example Schema | ||
## Differences from Protocol Buffers | ||
* Kiwi adds support for efficient compound messages using the `struct` keyword | ||
* Enums are scoped to their type instead of dumping everything into the global scope like C | ||
* It's always possible to check for field presence, even for fields that hold arrays | ||
* The generated C++ code is a lot simpler and only depends on a single file, `kiwi.h` | ||
## Live Demo | ||
@@ -102,1 +110,25 @@ | ||
``` | ||
## [Skew](http://skew-lang.org/) Usage | ||
Make sure to generate the Skew code beforehand using something like `kiwic --schema test.kiwi --skew test.sk`. | ||
``` | ||
@import | ||
var console dynamic | ||
@entry | ||
def main int { | ||
var test = Test.new | ||
test.x = 123 | ||
var buffer = test.encode | ||
var test2 = Test.decode(buffer) | ||
if test2.has_x { | ||
console.log("x is \(test2.x)") | ||
} | ||
return 0 | ||
} | ||
``` |
Sorry, the diff of this file is not supported yet
218656
16
2623
133
2