Comparing version 1.7.0 to 1.8.0
@@ -334,2 +334,17 @@ 'use strict'; | ||
}); | ||
it('diag non-utf8 and non-printable ascii', async () => { | ||
const input = '7864f55ff8f12508b63ef2bfeca7557ae90df6311a5ec1631b4a1fa843310bd9c3a710eaace5a1bdd72ad0bfe049771c11e756338bd93865e645f1adec9b9c99ef407fbd4fc6859e7904c5ad7dc9bd10a5cc16973d5b28ec1a6dd43d9f82f9f18c3d03418e35'; | ||
let {stdout, stderr} = await execBin(`hex2diag ${ input }`); | ||
assert.strictEqual(stderr, ''); | ||
assert.strictEqual(stdout, `78 64 # string(86) | ||
f55ff8f12508b63ef2bfeca7557ae90df6311a5ec1631b # "õ_øñ%\\x08¶>ò¿ì§Uzé\\x0dö1\\x1a^Ác\\x1b" | ||
4a1fa843310bd9c3a710eaace5a1bdd72ad0bfe049771c # "J\\x1f¨C1\\x0bÙç\\x10ê¬å¡½×*пàIw\\x1c" | ||
11e756338bd93865e645f1adec9b9c99ef407fbd4fc685 # "\\x11çV3\\x8bÙ8eæEñ\\xadì\\x9b\\x9c\\x99ï@\\x7f½OÆ\\x85" | ||
9e7904c5ad7dc9bd10a5cc16973d5b28ec1a6dd43d9f82 # "\\x9ey\\x04Å\\xad}ɽ\\x10¥Ì\\x16\\x97=[(ì\\x1amÔ=\\x9f\\x82" | ||
f9f18c3d03418e35 # "ùñ\\x8c=\\x03A\\x8e5" | ||
`); | ||
({stdout, stderr} = await execBin('diag2hex', stdout)); | ||
assert.strictEqual(stderr, ''); | ||
assert.strictEqual(stdout, `${ input }\n`); | ||
}); | ||
}); |
@@ -11,18 +11,22 @@ 'use strict'; | ||
function toToken(data, pos, prefix, length) { | ||
function toToken(data, pos, prefix, length, options) { | ||
const totLength = prefix + length; | ||
common.assertEnoughData(data, pos, totLength); | ||
return new token.Token(token.Type.string, byteUtils.toString(data, pos + prefix, pos + totLength), totLength); | ||
const tok = new token.Token(token.Type.string, byteUtils.toString(data, pos + prefix, pos + totLength), totLength); | ||
if (options.retainStringBytes === true) { | ||
tok.byteValue = byteUtils.slice(data, pos + prefix, pos + totLength); | ||
} | ||
return tok; | ||
} | ||
function decodeStringCompact(data, pos, minor, _options) { | ||
return toToken(data, pos, 1, minor); | ||
function decodeStringCompact(data, pos, minor, options) { | ||
return toToken(data, pos, 1, minor, options); | ||
} | ||
function decodeString8(data, pos, _minor, options) { | ||
return toToken(data, pos, 2, _0uint.readUint8(data, pos + 1, options)); | ||
return toToken(data, pos, 2, _0uint.readUint8(data, pos + 1, options), options); | ||
} | ||
function decodeString16(data, pos, _minor, options) { | ||
return toToken(data, pos, 3, _0uint.readUint16(data, pos + 1, options)); | ||
return toToken(data, pos, 3, _0uint.readUint16(data, pos + 1, options), options); | ||
} | ||
function decodeString32(data, pos, _minor, options) { | ||
return toToken(data, pos, 5, _0uint.readUint32(data, pos + 1, options)); | ||
return toToken(data, pos, 5, _0uint.readUint32(data, pos + 1, options), options); | ||
} | ||
@@ -34,3 +38,3 @@ function decodeString64(data, pos, _minor, options) { | ||
} | ||
return toToken(data, pos, 9, l); | ||
return toToken(data, pos, 9, l, options); | ||
} | ||
@@ -37,0 +41,0 @@ const encodeString = _2bytes.encodeBytes; |
@@ -12,3 +12,3 @@ 'use strict'; | ||
function* tokensToDiagnostic(inp, width = 100) { | ||
const tokeniser = new decode.Tokeniser(inp); | ||
const tokeniser = new decode.Tokeniser(inp, { retainStringBytes: true }); | ||
let pos = 0; | ||
@@ -63,4 +63,11 @@ const indent = []; | ||
if (str) { | ||
let asString = token.type.name === 'string'; | ||
margin += ' '; | ||
const repr = token.type.name === 'bytes' ? token.value : utf8Encoder.encode(token.value); | ||
let repr = asString ? utf8Encoder.encode(token.value) : token.value; | ||
if (asString && token.byteValue !== undefined) { | ||
if (repr.length !== token.byteValue.length) { | ||
repr = token.byteValue; | ||
asString = false; | ||
} | ||
} | ||
const wh = (width / 2 - margin.length - 1) / 2; | ||
@@ -71,4 +78,4 @@ let snip = 0; | ||
snip += piece.length; | ||
const st = token.type.name === 'string' ? utf8Decoder.decode(piece) : piece.reduce((p, c) => { | ||
if (c < 32 || c === 127) { | ||
const st = asString ? utf8Decoder.decode(piece) : piece.reduce((p, c) => { | ||
if (c < 32 || c >= 127 && c < 161 || c === 173) { | ||
return `${ p }\\x${ c.toString(16).padStart(2, '0') }`; | ||
@@ -75,0 +82,0 @@ } |
@@ -38,2 +38,3 @@ 'use strict'; | ||
this.encodedBytes = undefined; | ||
this.byteValue = undefined; | ||
} | ||
@@ -40,0 +41,0 @@ toString() { |
@@ -334,2 +334,17 @@ 'use strict'; | ||
}); | ||
it('diag non-utf8 and non-printable ascii', async () => { | ||
const input = '7864f55ff8f12508b63ef2bfeca7557ae90df6311a5ec1631b4a1fa843310bd9c3a710eaace5a1bdd72ad0bfe049771c11e756338bd93865e645f1adec9b9c99ef407fbd4fc6859e7904c5ad7dc9bd10a5cc16973d5b28ec1a6dd43d9f82f9f18c3d03418e35'; | ||
let {stdout, stderr} = await execBin(`hex2diag ${ input }`); | ||
assert.strictEqual(stderr, ''); | ||
assert.strictEqual(stdout, `78 64 # string(86) | ||
f55ff8f12508b63ef2bfeca7557ae90df6311a5ec1631b # "õ_øñ%\\x08¶>ò¿ì§Uzé\\x0dö1\\x1a^Ác\\x1b" | ||
4a1fa843310bd9c3a710eaace5a1bdd72ad0bfe049771c # "J\\x1f¨C1\\x0bÙç\\x10ê¬å¡½×*пàIw\\x1c" | ||
11e756338bd93865e645f1adec9b9c99ef407fbd4fc685 # "\\x11çV3\\x8bÙ8eæEñ\\xadì\\x9b\\x9c\\x99ï@\\x7f½OÆ\\x85" | ||
9e7904c5ad7dc9bd10a5cc16973d5b28ec1a6dd43d9f82 # "\\x9ey\\x04Å\\xad}ɽ\\x10¥Ì\\x16\\x97=[(ì\\x1amÔ=\\x9f\\x82" | ||
f9f18c3d03418e35 # "ùñ\\x8c=\\x03A\\x8e5" | ||
`); | ||
({stdout, stderr} = await execBin('diag2hex', stdout)); | ||
assert.strictEqual(stderr, ''); | ||
assert.strictEqual(stdout, `${ input }\n`); | ||
}); | ||
}); |
@@ -325,2 +325,17 @@ import chai from 'chai'; | ||
}); | ||
it('diag non-utf8 and non-printable ascii', async () => { | ||
const input = '7864f55ff8f12508b63ef2bfeca7557ae90df6311a5ec1631b4a1fa843310bd9c3a710eaace5a1bdd72ad0bfe049771c11e756338bd93865e645f1adec9b9c99ef407fbd4fc6859e7904c5ad7dc9bd10a5cc16973d5b28ec1a6dd43d9f82f9f18c3d03418e35'; | ||
let {stdout, stderr} = await execBin(`hex2diag ${ input }`); | ||
assert.strictEqual(stderr, ''); | ||
assert.strictEqual(stdout, `78 64 # string(86) | ||
f55ff8f12508b63ef2bfeca7557ae90df6311a5ec1631b # "õ_øñ%\\x08¶>ò¿ì§Uzé\\x0dö1\\x1a^Ác\\x1b" | ||
4a1fa843310bd9c3a710eaace5a1bdd72ad0bfe049771c # "J\\x1f¨C1\\x0bÙç\\x10ê¬å¡½×*пàIw\\x1c" | ||
11e756338bd93865e645f1adec9b9c99ef407fbd4fc685 # "\\x11çV3\\x8bÙ8eæEñ\\xadì\\x9b\\x9c\\x99ï@\\x7f½OÆ\\x85" | ||
9e7904c5ad7dc9bd10a5cc16973d5b28ec1a6dd43d9f82 # "\\x9ey\\x04Å\\xad}ɽ\\x10¥Ì\\x16\\x97=[(ì\\x1amÔ=\\x9f\\x82" | ||
f9f18c3d03418e35 # "ùñ\\x8c=\\x03A\\x8e5" | ||
`); | ||
({stdout, stderr} = await execBin('diag2hex', stdout)); | ||
assert.strictEqual(stderr, ''); | ||
assert.strictEqual(stdout, `${ input }\n`); | ||
}); | ||
}); |
@@ -11,19 +11,26 @@ import { | ||
import { encodeBytes } from './2bytes.js'; | ||
import { toString } from './byte-utils.js'; | ||
function toToken(data, pos, prefix, length) { | ||
import { | ||
toString, | ||
slice | ||
} from './byte-utils.js'; | ||
function toToken(data, pos, prefix, length, options) { | ||
const totLength = prefix + length; | ||
assertEnoughData(data, pos, totLength); | ||
return new Token(Type.string, toString(data, pos + prefix, pos + totLength), totLength); | ||
const tok = new Token(Type.string, toString(data, pos + prefix, pos + totLength), totLength); | ||
if (options.retainStringBytes === true) { | ||
tok.byteValue = slice(data, pos + prefix, pos + totLength); | ||
} | ||
return tok; | ||
} | ||
export function decodeStringCompact(data, pos, minor, _options) { | ||
return toToken(data, pos, 1, minor); | ||
export function decodeStringCompact(data, pos, minor, options) { | ||
return toToken(data, pos, 1, minor, options); | ||
} | ||
export function decodeString8(data, pos, _minor, options) { | ||
return toToken(data, pos, 2, uint.readUint8(data, pos + 1, options)); | ||
return toToken(data, pos, 2, uint.readUint8(data, pos + 1, options), options); | ||
} | ||
export function decodeString16(data, pos, _minor, options) { | ||
return toToken(data, pos, 3, uint.readUint16(data, pos + 1, options)); | ||
return toToken(data, pos, 3, uint.readUint16(data, pos + 1, options), options); | ||
} | ||
export function decodeString32(data, pos, _minor, options) { | ||
return toToken(data, pos, 5, uint.readUint32(data, pos + 1, options)); | ||
return toToken(data, pos, 5, uint.readUint32(data, pos + 1, options), options); | ||
} | ||
@@ -35,4 +42,4 @@ export function decodeString64(data, pos, _minor, options) { | ||
} | ||
return toToken(data, pos, 9, l); | ||
return toToken(data, pos, 9, l, options); | ||
} | ||
export const encodeString = encodeBytes; |
@@ -10,3 +10,3 @@ import { Tokeniser } from './decode.js'; | ||
function* tokensToDiagnostic(inp, width = 100) { | ||
const tokeniser = new Tokeniser(inp); | ||
const tokeniser = new Tokeniser(inp, { retainStringBytes: true }); | ||
let pos = 0; | ||
@@ -61,4 +61,11 @@ const indent = []; | ||
if (str) { | ||
let asString = token.type.name === 'string'; | ||
margin += ' '; | ||
const repr = token.type.name === 'bytes' ? token.value : utf8Encoder.encode(token.value); | ||
let repr = asString ? utf8Encoder.encode(token.value) : token.value; | ||
if (asString && token.byteValue !== undefined) { | ||
if (repr.length !== token.byteValue.length) { | ||
repr = token.byteValue; | ||
asString = false; | ||
} | ||
} | ||
const wh = (width / 2 - margin.length - 1) / 2; | ||
@@ -69,4 +76,4 @@ let snip = 0; | ||
snip += piece.length; | ||
const st = token.type.name === 'string' ? utf8Decoder.decode(piece) : piece.reduce((p, c) => { | ||
if (c < 32 || c === 127) { | ||
const st = asString ? utf8Decoder.decode(piece) : piece.reduce((p, c) => { | ||
if (c < 32 || c >= 127 && c < 161 || c === 173) { | ||
return `${ p }\\x${ c.toString(16).padStart(2, '0') }`; | ||
@@ -73,0 +80,0 @@ } |
@@ -34,2 +34,3 @@ class Type { | ||
this.encodedBytes = undefined; | ||
this.byteValue = undefined; | ||
} | ||
@@ -36,0 +37,0 @@ toString() { |
@@ -325,2 +325,17 @@ import chai from 'chai'; | ||
}); | ||
it('diag non-utf8 and non-printable ascii', async () => { | ||
const input = '7864f55ff8f12508b63ef2bfeca7557ae90df6311a5ec1631b4a1fa843310bd9c3a710eaace5a1bdd72ad0bfe049771c11e756338bd93865e645f1adec9b9c99ef407fbd4fc6859e7904c5ad7dc9bd10a5cc16973d5b28ec1a6dd43d9f82f9f18c3d03418e35'; | ||
let {stdout, stderr} = await execBin(`hex2diag ${ input }`); | ||
assert.strictEqual(stderr, ''); | ||
assert.strictEqual(stdout, `78 64 # string(86) | ||
f55ff8f12508b63ef2bfeca7557ae90df6311a5ec1631b # "õ_øñ%\\x08¶>ò¿ì§Uzé\\x0dö1\\x1a^Ác\\x1b" | ||
4a1fa843310bd9c3a710eaace5a1bdd72ad0bfe049771c # "J\\x1f¨C1\\x0bÙç\\x10ê¬å¡½×*пàIw\\x1c" | ||
11e756338bd93865e645f1adec9b9c99ef407fbd4fc685 # "\\x11çV3\\x8bÙ8eæEñ\\xadì\\x9b\\x9c\\x99ï@\\x7f½OÆ\\x85" | ||
9e7904c5ad7dc9bd10a5cc16973d5b28ec1a6dd43d9f82 # "\\x9ey\\x04Å\\xad}ɽ\\x10¥Ì\\x16\\x97=[(ì\\x1amÔ=\\x9f\\x82" | ||
f9f18c3d03418e35 # "ùñ\\x8c=\\x03A\\x8e5" | ||
`); | ||
({stdout, stderr} = await execBin('diag2hex', stdout)); | ||
assert.strictEqual(stderr, ''); | ||
assert.strictEqual(stdout, `${ input }\n`); | ||
}); | ||
}); |
@@ -42,2 +42,3 @@ import { Token } from './lib/token' | ||
useMaps?: boolean | ||
retainStringBytes?: boolean | ||
tags?: TagDecoder[], | ||
@@ -44,0 +45,0 @@ tokenizer?: DecodeTokenizer |
@@ -5,3 +5,3 @@ import { Token, Type } from './token.js' | ||
import { encodeBytes } from './2bytes.js' | ||
import { toString } from './byte-utils.js' | ||
import { toString, slice } from './byte-utils.js' | ||
@@ -18,8 +18,13 @@ /** | ||
* @param {number} length | ||
* @param {DecodeOptions} options | ||
* @returns {Token} | ||
*/ | ||
function toToken (data, pos, prefix, length) { | ||
function toToken (data, pos, prefix, length, options) { | ||
const totLength = prefix + length | ||
assertEnoughData(data, pos, totLength) | ||
return new Token(Type.string, toString(data, pos + prefix, pos + totLength), totLength) | ||
const tok = new Token(Type.string, toString(data, pos + prefix, pos + totLength), totLength) | ||
if (options.retainStringBytes === true) { | ||
tok.byteValue = slice(data, pos + prefix, pos + totLength) | ||
} | ||
return tok | ||
} | ||
@@ -31,7 +36,7 @@ | ||
* @param {number} minor | ||
* @param {DecodeOptions} _options | ||
* @param {DecodeOptions} options | ||
* @returns {Token} | ||
*/ | ||
export function decodeStringCompact (data, pos, minor, _options) { | ||
return toToken(data, pos, 1, minor) | ||
export function decodeStringCompact (data, pos, minor, options) { | ||
return toToken(data, pos, 1, minor, options) | ||
} | ||
@@ -47,3 +52,3 @@ | ||
export function decodeString8 (data, pos, _minor, options) { | ||
return toToken(data, pos, 2, uint.readUint8(data, pos + 1, options)) | ||
return toToken(data, pos, 2, uint.readUint8(data, pos + 1, options), options) | ||
} | ||
@@ -59,3 +64,3 @@ | ||
export function decodeString16 (data, pos, _minor, options) { | ||
return toToken(data, pos, 3, uint.readUint16(data, pos + 1, options)) | ||
return toToken(data, pos, 3, uint.readUint16(data, pos + 1, options), options) | ||
} | ||
@@ -71,3 +76,3 @@ | ||
export function decodeString32 (data, pos, _minor, options) { | ||
return toToken(data, pos, 5, uint.readUint32(data, pos + 1, options)) | ||
return toToken(data, pos, 5, uint.readUint32(data, pos + 1, options), options) | ||
} | ||
@@ -88,5 +93,5 @@ | ||
} | ||
return toToken(data, pos, 9, l) | ||
return toToken(data, pos, 9, l, options) | ||
} | ||
export const encodeString = encodeBytes |
@@ -13,3 +13,3 @@ import { Tokeniser } from './decode.js' | ||
function * tokensToDiagnostic (inp, width = 100) { | ||
const tokeniser = new Tokeniser(inp) | ||
const tokeniser = new Tokeniser(inp, { retainStringBytes: true }) | ||
let pos = 0 | ||
@@ -81,4 +81,13 @@ const indent = [] | ||
if (str) { | ||
let asString = token.type.name === 'string' | ||
margin += ' ' | ||
const repr = token.type.name === 'bytes' ? token.value : utf8Encoder.encode(token.value) | ||
let repr = asString ? utf8Encoder.encode(token.value) : token.value | ||
if (asString && token.byteValue !== undefined) { | ||
if (repr.length !== token.byteValue.length) { | ||
// bail on printing this as a string, it's probably not utf8, so treat it as bytes | ||
// (you can probably blame a Go programmer for this) | ||
repr = token.byteValue | ||
asString = false | ||
} | ||
} | ||
const wh = ((width / 2) - margin.length - 1) / 2 | ||
@@ -89,8 +98,6 @@ let snip = 0 | ||
snip += piece.length | ||
// the assumption that we can utf8 a byte-sliced version is a stretch, | ||
// we could be slicing in the middle of a multi-byte character | ||
const st = token.type.name === 'string' | ||
const st = asString | ||
? utf8Decoder.decode(piece) | ||
: piece.reduce((/** @type {string} */ p, /** @type {number} */ c) => { | ||
if (c < 0x20 || c === 0x7f) { | ||
if (c < 0x20 || (c >= 0x7f && c < 0xa1) || c === 0xad) { | ||
return `${p}\\x${c.toString(16).padStart(2, '0')}` | ||
@@ -97,0 +104,0 @@ } |
@@ -57,2 +57,4 @@ class Type { | ||
this.encodedBytes = undefined | ||
/** @type {Uint8Array|undefined} */ | ||
this.byteValue = undefined | ||
} | ||
@@ -59,0 +61,0 @@ |
{ | ||
"name": "cborg", | ||
"version": "1.7.0", | ||
"version": "1.8.0", | ||
"description": "Fast CBOR with a focus on strictness", | ||
@@ -5,0 +5,0 @@ "main": "./cjs/cborg.js", |
@@ -249,2 +249,3 @@ # cborg - fast CBOR with a focus on strictness | ||
* `useMaps` (boolean, default `false`): when decoding major 5 (map) entries, use a `Map` rather than a plain `Object`. This will nest for any encountered map. During encode, a `Map` will be interpreted as an `Object` and will round-trip as such unless `useMaps` is supplied, in which case, all `Map`s and `Object`s will round-trip as `Map`s. There is no way to retain the distinction during round-trip without using a custom tag. | ||
* `retainStringBytes` (boolean, default `false`): when decoding strings, retain the original bytes on the `Token` object as `byteValue`. Since it is possible to encode non-UTF-8 characters in strings in CBOR, and JavaScript doesn't properly handle non-UTF-8 in its conversion from bytes (`TextEncoder` or `Buffer`), this can result in a loss of data (and an inability to round-trip). Where this is important, a token stream should be consumed instead of a plain `decode()` and the `byteValue` property on string tokens can be inspected (see [lib/diagnostic.js](lib/diagnostic.js) for an example of its use.) | ||
* `tags` (array): a mapping of tag number to tag decoder function. By default no tags are supported. See [Tag decoders](#tag-decoders). | ||
@@ -251,0 +252,0 @@ * `tokenizer` (object): an object with two methods, `next()` which returns a `Token` and `done()` which returns a `boolean`. Can be used to implement custom input decoding. See the source code for examples. |
@@ -385,2 +385,21 @@ /* eslint-env mocha */ | ||
}) | ||
it('diag non-utf8 and non-printable ascii', async () => { | ||
const input = '7864f55ff8f12508b63ef2bfeca7557ae90df6311a5ec1631b4a1fa843310bd9c3a710eaace5a1bdd72ad0bfe049771c11e756338bd93865e645f1adec9b9c99ef407fbd4fc6859e7904c5ad7dc9bd10a5cc16973d5b28ec1a6dd43d9f82f9f18c3d03418e35' | ||
let { stdout, stderr } = await execBin(`hex2diag ${input}`) | ||
assert.strictEqual(stderr, '') | ||
assert.strictEqual(stdout, | ||
`78 64 # string(86) | ||
f55ff8f12508b63ef2bfeca7557ae90df6311a5ec1631b # "õ_øñ%\\x08¶>ò¿ì§Uzé\\x0dö1\\x1a^Ác\\x1b" | ||
4a1fa843310bd9c3a710eaace5a1bdd72ad0bfe049771c # "J\\x1f¨C1\\x0bÙç\\x10ê¬å¡½×*пàIw\\x1c" | ||
11e756338bd93865e645f1adec9b9c99ef407fbd4fc685 # "\\x11çV3\\x8bÙ8eæEñ\\xadì\\x9b\\x9c\\x99ï@\\x7f½OÆ\\x85" | ||
9e7904c5ad7dc9bd10a5cc16973d5b28ec1a6dd43d9f82 # "\\x9ey\\x04Å\\xad}ɽ\\x10¥Ì\\x16\\x97=[(ì\\x1amÔ=\\x9f\\x82" | ||
f9f18c3d03418e35 # "ùñ\\x8c=\\x03A\\x8e5" | ||
`) | ||
// round-trip | ||
;({ stdout, stderr } = await execBin('diag2hex', stdout)) | ||
assert.strictEqual(stderr, '') | ||
assert.strictEqual(stdout, `${input}\n`) | ||
}) | ||
}) |
@@ -32,2 +32,3 @@ import { Token } from './lib/token'; | ||
useMaps?: boolean; | ||
retainStringBytes?: boolean; | ||
tags?: TagDecoder[]; | ||
@@ -34,0 +35,0 @@ tokenizer?: DecodeTokenizer; |
@@ -5,6 +5,6 @@ /** | ||
* @param {number} minor | ||
* @param {DecodeOptions} _options | ||
* @param {DecodeOptions} options | ||
* @returns {Token} | ||
*/ | ||
export function decodeStringCompact(data: Uint8Array, pos: number, minor: number, _options: DecodeOptions): Token; | ||
export function decodeStringCompact(data: Uint8Array, pos: number, minor: number, options: DecodeOptions): Token; | ||
/** | ||
@@ -11,0 +11,0 @@ * @param {Uint8Array} data |
@@ -50,4 +50,6 @@ export class Type { | ||
encodedBytes: Uint8Array | undefined; | ||
/** @type {Uint8Array|undefined} */ | ||
byteValue: Uint8Array | undefined; | ||
toString(): string; | ||
} | ||
//# sourceMappingURL=token.d.ts.map |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
844209
26000
429