@exodus/bytes
Advanced tools
| import { decodeAscii, encodeLatin1 } from './latin1.js' | ||
| import { decode2string } from './_utils.js' | ||
| const ERR = 'percentEncodeSet must be a string of unique increasing codepoints in range 0x20 - 0x7e' | ||
| const percentMap = new Map() | ||
| let hex, base | ||
| export function percentEncoder(set, spaceAsPlus = false) { | ||
| if (typeof set !== 'string' || /[^\x20-\x7E]/.test(set)) throw new TypeError(ERR) | ||
| if (typeof spaceAsPlus !== 'boolean') throw new TypeError('spaceAsPlus must be boolean') | ||
| const id = set + +spaceAsPlus | ||
| const cached = percentMap.get(id) | ||
| if (cached) return cached | ||
| const n = encodeLatin1(set).sort() // string checked above to be ascii | ||
| if (decodeAscii(n) !== set || new Set(n).size !== n.length) throw new TypeError(ERR) | ||
| if (!base) { | ||
| hex = Array.from({ length: 256 }, (_, i) => `%${i.toString(16).padStart(2, '0').toUpperCase()}`) | ||
| base = hex.map((h, i) => (i < 0x20 || i > 0x7e ? h : String.fromCharCode(i))) | ||
| } | ||
| const map = base.slice() // copy | ||
| for (const c of n) map[c] = hex[c] | ||
| if (spaceAsPlus) map[0x20] = '+' // overrides whatever percentEncodeSet thinks about it | ||
| // Input is not typechecked, for internal use only | ||
| const percentEncode = (u8, start = 0, end = u8.length) => decode2string(u8, start, end, map) | ||
| percentMap.set(id, percentEncode) | ||
| return percentEncode | ||
| } |
+48
| /** | ||
| * WHATWG helpers | ||
| * | ||
| * ```js | ||
| * import '@exodus/bytes/encoding.js' // For full legacy multi-byte encodings support | ||
| * import { percentEncodeAfterEncoding } from '@exodus/bytes/whatwg.js' | ||
| * ``` | ||
| * | ||
| * @module @exodus/bytes/whatwg.js | ||
| */ | ||
| /** | ||
| * Implements [percent-encode after encoding](https://url.spec.whatwg.org/#string-percent-encode-after-encoding) | ||
| * per WHATWG URL specification. | ||
| * | ||
| * > [!IMPORTANT] | ||
| * > You must import `@exodus/bytes/encoding.js` for this API to accept legacy multi-byte encodings. | ||
| * | ||
| * Encodings `utf16-le`, `utf16-be`, and `replacement` are not accepted. | ||
| * | ||
| * [C0 control percent-encode set](https://url.spec.whatwg.org/#c0-control-percent-encode-set) is | ||
| * always percent-encoded. | ||
| * | ||
| * `percentEncodeSet` is an addition to that, and must be a string of unique increasing codepoints | ||
| * in range 0x20 - 0x7e, e.g. `' "#<>'`. | ||
| * | ||
| * This method accepts [DOMStrings](https://webidl.spec.whatwg.org/#idl-DOMString) and converts them | ||
| * to [USVStrings](https://webidl.spec.whatwg.org/#idl-USVString). | ||
| * This is different from e.g. `encodeURI` and `encodeURIComponent` which throw on surrogates: | ||
| * ```js | ||
| * > percentEncodeAfterEncoding('utf8', '\ud800', ' "#$%&+,/:;<=>?@[\\]^`{|}') // component | ||
| * '%EF%BF%BD' | ||
| * > encodeURIComponent('\ud800') | ||
| * Uncaught URIError: URI malformed | ||
| * ``` | ||
| * | ||
| * @param encoding - The encoding label per WHATWG Encoding spec | ||
| * @param input - Input scalar-value string to encode | ||
| * @param percentEncodeSet - A string of ASCII chars to escape in addition to C0 control percent-encode set | ||
| * @param spaceAsPlus - Whether to encode space as `'+'` instead of `'%20'` or `' '` (default: false) | ||
| * @returns The percent-encoded string | ||
| */ | ||
| export function percentEncodeAfterEncoding( | ||
| encoding: string, | ||
| input: string, | ||
| percentEncodeSet: string, | ||
| spaceAsPlus?: boolean | ||
| ): string; |
+76
| import { utf8fromStringLoose } from '@exodus/bytes/utf8.js' | ||
| import { createSinglebyteEncoder } from '@exodus/bytes/single-byte.js' | ||
| import { isMultibyte, getMultibyteEncoder } from './fallback/encoding.js' | ||
| import { normalizeEncoding, E_ENCODING } from './fallback/encoding.api.js' | ||
| import { percentEncoder } from './fallback/percent.js' | ||
| import { encodeMap } from './fallback/single-byte.js' | ||
| import { E_STRING } from './fallback/_utils.js' | ||
| // https://url.spec.whatwg.org/#string-percent-encode-after-encoding | ||
| // Codepoints below 0x20, 0x7F specifically, and above 0x7F (non-ASCII) are always encoded | ||
| // > A C0 control is a code point in the range U+0000 NULL to U+001F INFORMATION SEPARATOR ONE, inclusive. | ||
| // > The C0 control percent-encode set are the C0 controls and all code points greater than U+007E (~). | ||
| export function percentEncodeAfterEncoding(encoding, input, percentEncodeSet, spaceAsPlus = false) { | ||
| const enc = normalizeEncoding(encoding) | ||
| // Ref: https://encoding.spec.whatwg.org/#get-an-encoder | ||
| if (!enc || enc === 'replacement' || enc === 'utf-16le' || enc === 'utf-16be') { | ||
| throw new RangeError(E_ENCODING) | ||
| } | ||
| const percent = percentEncoder(percentEncodeSet, spaceAsPlus) | ||
| if (enc === 'utf-8') return percent(utf8fromStringLoose(input)) | ||
| const multi = isMultibyte(enc) | ||
| const encoder = multi ? getMultibyteEncoder() : createSinglebyteEncoder | ||
| const fatal = encoder(enc) | ||
| try { | ||
| return percent(fatal(input)) | ||
| } catch {} | ||
| let res = '' | ||
| let last = 0 | ||
| if (multi) { | ||
| const rep = enc === 'gb18030' ? percent(fatal('\uFFFD')) : `%26%23${0xff_fd}%3B` // only gb18030 can encode it | ||
| const escaping = encoder(enc, (cp, u, i) => { | ||
| res += percent(u, last, i) | ||
| res += cp >= 0xd8_00 && cp < 0xe0_00 ? rep : `%26%23${cp}%3B` // &#cp; | ||
| last = i | ||
| return 0 // no bytes emitted | ||
| }) | ||
| const u = escaping(input) // has side effects on res | ||
| res += percent(u, last) | ||
| } else { | ||
| if (typeof input !== 'string') throw new TypeError(E_STRING) // all other paths have their own validation | ||
| const m = encodeMap(enc) | ||
| const len = input.length | ||
| const u = new Uint8Array(len) | ||
| for (let i = 0; i < len; i++) { | ||
| const x = input.charCodeAt(i) | ||
| const b = m[x] | ||
| if (!b && x) { | ||
| let cp = x | ||
| const i0 = i | ||
| if (x >= 0xd8_00 && x < 0xe0_00) { | ||
| cp = 0xff_fd | ||
| if (x < 0xdc_00 && i + 1 < len) { | ||
| const x1 = input.charCodeAt(i + 1) | ||
| if (x1 >= 0xdc_00 && x1 < 0xe0_00) { | ||
| cp = 0x1_00_00 + ((x1 & 0x3_ff) | ((x & 0x3_ff) << 10)) | ||
| i++ | ||
| } | ||
| } | ||
| } | ||
| res += `${percent(u, last, i0)}%26%23${cp}%3B` // &#cp; | ||
| last = i + 1 // skip current | ||
| } else { | ||
| u[i] = b | ||
| } | ||
| } | ||
| res += percent(u, last) | ||
| } | ||
| return res | ||
| } |
+2
-1
@@ -53,3 +53,4 @@ /** | ||
| * | ||
| * Important: does not copy data, returns a view on the same underlying buffer | ||
| * > [!IMPORTANT] | ||
| * > Does not copy data, returns a view on the same underlying buffer | ||
| * | ||
@@ -56,0 +57,0 @@ * @param arr - The input TypedArray |
@@ -20,5 +20,5 @@ const { Buffer, TextEncoder, TextDecoder } = globalThis | ||
| // in 2025 due to a regression, so we call it Latin1 as it's usable only for that | ||
| const getNativeLain1 = () => { | ||
| const getNativeLatin1 = () => { | ||
| // Not all barebone engines with TextDecoder support something except utf-8, detect | ||
| if (!nativeDecoder) { | ||
| if (nativeDecoder) { | ||
| try { | ||
@@ -32,3 +32,3 @@ return new TextDecoder('latin1', { ignoreBOM: true }) | ||
| export const nativeDecoderLatin1 = /* @__PURE__ */ getNativeLain1() | ||
| export const nativeDecoderLatin1 = /* @__PURE__ */ getNativeLatin1() | ||
| export const canDecoders = !!nativeDecoderLatin1 | ||
@@ -35,0 +35,0 @@ |
@@ -23,11 +23,7 @@ // Get a number of last bytes in an Uint8Array `u` ending at `len` that don't | ||
| // 0-3 | ||
| let p = 0 | ||
| if (len % 2 !== 0) p++ // uneven bytes | ||
| const p = len % 2 // uneven byte length adds 1 | ||
| if (len < 2) return p | ||
| const l = len - p - 1 | ||
| if (len - p >= 2) { | ||
| const last = enc === 'utf-16le' ? (u[l] << 8) ^ u[l - 1] : (u[l - 1] << 8) ^ u[l] | ||
| if (last >= 0xd8_00 && last < 0xdc_00) p += 2 // lone lead | ||
| } | ||
| return p | ||
| const last = enc === 'utf-16le' ? (u[l] << 8) ^ u[l - 1] : (u[l - 1] << 8) ^ u[l] | ||
| return last >= 0xd8_00 && last < 0xdc_00 ? p + 2 : p // lone lead adds 2 | ||
| } | ||
@@ -34,0 +30,0 @@ } |
+7
-0
@@ -13,2 +13,9 @@ /** | ||
| * | ||
| * > [!WARNING] | ||
| * > This is a lower-level API for legacy multi-byte encodings. | ||
| * > | ||
| * > For a safe WHATWG Encoding-compatible API, see `@exodus/bytes/encoding.js` import (and variants of it). | ||
| * > | ||
| * > Be sure to know what you are doing and check documentation when directly using encodings from this file. | ||
| * | ||
| * Supports all legacy multi-byte encodings listed in the WHATWG Encoding standard: | ||
@@ -15,0 +22,0 @@ * `gbk`, `gb18030`, `big5`, `euc-jp`, `iso-2022-jp`, `shift_jis`, `euc-kr`. |
+12
-2
| { | ||
| "name": "@exodus/bytes", | ||
| "version": "1.10.0", | ||
| "version": "1.11.0", | ||
| "description": "Various operations on Uint8Array data", | ||
@@ -23,2 +23,3 @@ "keywords": [ | ||
| "lint": "eslint .", | ||
| "typedoc": "typedoc && mkdir -p doc/assets && cp -r theme/styles doc/assets/", | ||
| "test:javascriptcore": "npm run test:jsc --", | ||
@@ -30,3 +31,3 @@ "test:v8": "exodus-test --engine=v8:bundle", | ||
| "test:quickjs": "exodus-test --engine=quickjs:bundle", | ||
| "test:xs": "exodus-test --engine=xs:bundle", | ||
| "test:xs": "EXODUS_TEST_IGNORE='tests/whatwg.browser.test.js' exodus-test --engine=xs:bundle", | ||
| "test:engine262": "exodus-test --engine=engine262:bundle", | ||
@@ -75,2 +76,3 @@ "test:deno": "exodus-test --engine=deno:pure", | ||
| "/fallback/latin1.js", | ||
| "/fallback/percent.js", | ||
| "/fallback/multi-byte.encodings.cjs", | ||
@@ -125,2 +127,4 @@ "/fallback/multi-byte.encodings.json", | ||
| "/utf8.node.js", | ||
| "/whatwg.js", | ||
| "/whatwg.d.ts", | ||
| "/wif.js", | ||
@@ -206,2 +210,6 @@ "/wif.d.ts" | ||
| }, | ||
| "./whatwg.js": { | ||
| "types": "./whatwg.d.ts", | ||
| "default": "./whatwg.js" | ||
| }, | ||
| "./wif.js": { | ||
@@ -229,2 +237,3 @@ "types": "./wif.d.ts", | ||
| "@exodus/test": "^1.0.0-rc.109", | ||
| "@hexagon/base64": "^2.0.4", | ||
| "@noble/hashes": "^2.0.1", | ||
@@ -260,2 +269,3 @@ "@oslojs/encoding": "^1.1.0", | ||
| "text-encoding": "^0.7.0", | ||
| "typedoc": "^0.28.16", | ||
| "typescript": "^5.9.3", | ||
@@ -262,0 +272,0 @@ "uint8array-tools": "^0.0.9", |
+78
-3
@@ -13,2 +13,4 @@ # `@exodus/bytes` | ||
| See [documentation](https://exodusoss.github.io/bytes). | ||
| ## Strict | ||
@@ -122,2 +124,9 @@ | ||
| This is similar to the following snippet (but works on all engines): | ||
| ```js | ||
| // Strict encode, requiring Unicode codepoints to be valid | ||
| if (typeof string !== 'string' || !string.isWellFormed()) throw new TypeError() | ||
| return new TextEncoder().encode(string) | ||
| ``` | ||
| #### `utf8fromStringLoose(string, format = 'uint8')` | ||
@@ -133,2 +142,9 @@ | ||
| This is similar to the following snippet (but works on all engines): | ||
| ```js | ||
| // Loose encode, replacing invalid Unicode codepoints with U+FFFD | ||
| if (typeof string !== 'string') throw new TypeError() | ||
| return new TextEncoder().encode(string) | ||
| ``` | ||
| #### `utf8toString(arr)` | ||
@@ -140,2 +156,5 @@ | ||
| This is similar to `new TextDecoder('utf-8', { fatal: true, ignoreBOM: true }).decode(arr)`, | ||
| but works on all engines. | ||
| #### `utf8toStringLoose(arr)` | ||
@@ -151,2 +170,5 @@ | ||
| This is similar to `new TextDecoder('utf-8', { ignoreBOM: true }).decode(arr)`, | ||
| but works on all engines. | ||
| ### `@exodus/bytes/utf16.js` | ||
@@ -216,2 +238,11 @@ | ||
| > [!WARNING] | ||
| > This is a lower-level API for single-byte encodings. | ||
| > It might not match what you expect, as it supports both WHATWG and unicode.org encodings under | ||
| > different names, with the main intended usecase for the latter being either non-web or legacy contexts. | ||
| > | ||
| > For a safe WHATWG Encoding-compatible API, see `@exodus/bytes/encoding.js` import (and variants of it). | ||
| > | ||
| > Be sure to know what you are doing and check documentation when directly using encodings from this file. | ||
| Supports all single-byte encodings listed in the WHATWG Encoding standard: | ||
@@ -278,4 +309,5 @@ `ibm866`, `iso-8859-2`, `iso-8859-3`, `iso-8859-4`, `iso-8859-5`, `iso-8859-6`, `iso-8859-7`, `iso-8859-8`, | ||
| Note: this is different from `new TextDecoder('iso-8859-1')` and `new TextDecoder('latin1')`, as | ||
| those alias to `new TextDecoder('windows-1252')`. | ||
| > [!NOTE] | ||
| > This is different from `new TextDecoder('iso-8859-1')` and `new TextDecoder('latin1')`, as those | ||
| > alias to `new TextDecoder('windows-1252')`. | ||
@@ -328,2 +360,9 @@ #### `latin1fromString(string)` | ||
| > [!WARNING] | ||
| > This is a lower-level API for legacy multi-byte encodings. | ||
| > | ||
| > For a safe WHATWG Encoding-compatible API, see `@exodus/bytes/encoding.js` import (and variants of it). | ||
| > | ||
| > Be sure to know what you are doing and check documentation when directly using encodings from this file. | ||
| Supports all legacy multi-byte encodings listed in the WHATWG Encoding standard: | ||
@@ -619,3 +658,4 @@ `gbk`, `gb18030`, `big5`, `euc-jp`, `iso-2022-jp`, `shift_jis`, `euc-kr`. | ||
| Important: does not copy data, returns a view on the same underlying buffer | ||
| > [!IMPORTANT] | ||
| > Does not copy data, returns a view on the same underlying buffer | ||
@@ -813,2 +853,37 @@ ### `@exodus/bytes/encoding.js` | ||
| ### `@exodus/bytes/whatwg.js` | ||
| WHATWG helpers | ||
| ```js | ||
| import '@exodus/bytes/encoding.js' // For full legacy multi-byte encodings support | ||
| import { percentEncodeAfterEncoding } from '@exodus/bytes/whatwg.js' | ||
| ``` | ||
| #### `percentEncodeAfterEncoding(encoding, input, percentEncodeSet, spaceAsPlus = false)` | ||
| Implements [percent-encode after encoding](https://url.spec.whatwg.org/#string-percent-encode-after-encoding) | ||
| per WHATWG URL specification. | ||
| > [!IMPORTANT] | ||
| > You must import `@exodus/bytes/encoding.js` for this API to accept legacy multi-byte encodings. | ||
| Encodings `utf16-le`, `utf16-be`, and `replacement` are not accepted. | ||
| [C0 control percent-encode set](https://url.spec.whatwg.org/#c0-control-percent-encode-set) is | ||
| always percent-encoded. | ||
| `percentEncodeSet` is an addition to that, and must be a string of unique increasing codepoints | ||
| in range 0x20 - 0x7e, e.g. `' "#<>'`. | ||
| This method accepts [DOMStrings](https://webidl.spec.whatwg.org/#idl-DOMString) and converts them | ||
| to [USVStrings](https://webidl.spec.whatwg.org/#idl-USVString). | ||
| This is different from e.g. `encodeURI` and `encodeURIComponent` which throw on surrogates: | ||
| ```js | ||
| > percentEncodeAfterEncoding('utf8', '\ud800', ' "#$%&+,/:;<=>?@[\\]^`{|}') // component | ||
| '%EF%BF%BD' | ||
| > encodeURIComponent('\ud800') | ||
| Uncaught URIError: URI malformed | ||
| ``` | ||
| ## Changelog | ||
@@ -815,0 +890,0 @@ |
+12
-2
@@ -14,2 +14,11 @@ /** | ||
| * | ||
| * > [!WARNING] | ||
| * > This is a lower-level API for single-byte encodings. | ||
| * > It might not match what you expect, as it supports both WHATWG and unicode.org encodings under | ||
| * > different names, with the main intended usecase for the latter being either non-web or legacy contexts. | ||
| * > | ||
| * > For a safe WHATWG Encoding-compatible API, see `@exodus/bytes/encoding.js` import (and variants of it). | ||
| * > | ||
| * > Be sure to know what you are doing and check documentation when directly using encodings from this file. | ||
| * | ||
| * Supports all single-byte encodings listed in the WHATWG Encoding standard: | ||
@@ -99,4 +108,5 @@ * `ibm866`, `iso-8859-2`, `iso-8859-3`, `iso-8859-4`, `iso-8859-5`, `iso-8859-6`, `iso-8859-7`, `iso-8859-8`, | ||
| * | ||
| * Note: this is different from `new TextDecoder('iso-8859-1')` and `new TextDecoder('latin1')`, as | ||
| * those alias to `new TextDecoder('windows-1252')`. | ||
| * > [!NOTE] | ||
| * > This is different from `new TextDecoder('iso-8859-1')` and `new TextDecoder('latin1')`, as those | ||
| * > alias to `new TextDecoder('windows-1252')`. | ||
| * | ||
@@ -103,0 +113,0 @@ * @param arr - The bytes to decode |
+1
-1
@@ -11,3 +11,3 @@ import * as js from './fallback/utf16.js' | ||
| const decoderFatal16 = isLE ? decoderFatalLE : decoderFatalBE | ||
| const decoderLoose16 = isLE ? decoderLooseLE : decoderFatalBE | ||
| const decoderLoose16 = isLE ? decoderLooseLE : decoderLooseBE | ||
| const { isWellFormed, toWellFormed } = String.prototype | ||
@@ -14,0 +14,0 @@ |
+20
-0
@@ -26,2 +26,9 @@ /** | ||
| * | ||
| * This is similar to the following snippet (but works on all engines): | ||
| * ```js | ||
| * // Strict encode, requiring Unicode codepoints to be valid | ||
| * if (typeof string !== 'string' || !string.isWellFormed()) throw new TypeError() | ||
| * return new TextEncoder().encode(string) | ||
| * ``` | ||
| * | ||
| * @param string - The string to encode | ||
@@ -44,2 +51,9 @@ * @param format - Output format (default: 'uint8') | ||
| * | ||
| * This is similar to the following snippet (but works on all engines): | ||
| * ```js | ||
| * // Loose encode, replacing invalid Unicode codepoints with U+FFFD | ||
| * if (typeof string !== 'string') throw new TypeError() | ||
| * return new TextEncoder().encode(string) | ||
| * ``` | ||
| * | ||
| * @param string - The string to encode | ||
@@ -61,2 +75,5 @@ * @param format - Output format (default: 'uint8') | ||
| * | ||
| * This is similar to `new TextDecoder('utf-8', { fatal: true, ignoreBOM: true }).decode(arr)`, | ||
| * but works on all engines. | ||
| * | ||
| * @param arr - The bytes to decode | ||
@@ -76,2 +93,5 @@ * @returns The decoded string | ||
| * | ||
| * This is similar to `new TextDecoder('utf-8', { ignoreBOM: true }).decode(arr)`, | ||
| * but works on all engines. | ||
| * | ||
| * @param arr - The bytes to decode | ||
@@ -78,0 +98,0 @@ * @returns The decoded string |
+4
-4
@@ -30,7 +30,7 @@ import { assertUint8 } from './assert.js' | ||
| let start = 0 | ||
| const last = res.length - 2 | ||
| // Search for EFBFBD | ||
| while (start < last) { | ||
| const last = res.length - 3 | ||
| // Search for EFBFBD (3-byte sequence) | ||
| while (start <= last) { | ||
| const pos = res.indexOf(0xef, start) | ||
| if (pos === -1) break | ||
| if (pos === -1 || pos > last) break | ||
| start = pos + 1 | ||
@@ -37,0 +37,0 @@ if (res[pos + 1] === 0xbf && res[pos + 2] === 0xbd) { |
+1
-2
@@ -9,2 +9,3 @@ import { toBase58checkSync, fromBase58checkSync } from '@exodus/bytes/base58check.js' | ||
| assertUint8(arr) | ||
| if (arr.length !== 33 && arr.length !== 34) throw new Error('Invalid WIF length') | ||
| const version = arr[0] | ||
@@ -18,3 +19,2 @@ if (expectedVersion !== undefined && version !== expectedVersion) { | ||
| if (arr.length === 33) return { version, privateKey, compressed: false } | ||
| if (arr.length !== 34) throw new Error('Invalid WIF length') | ||
| if (arr[33] !== 1) throw new Error('Invalid compression flag') | ||
@@ -27,3 +27,2 @@ return { version, privateKey, compressed: true } | ||
| assertUint8(privateKey, { length: 32, name: 'privateKey' }) | ||
| if (privateKey.length !== 32) throw new TypeError('Invalid privateKey length') | ||
| const out = new Uint8Array(compressed ? 34 : 33) | ||
@@ -30,0 +29,0 @@ out[0] = v |
Sorry, the diff of this file is too big to display
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
419087
2.64%67
4.69%6369
2.81%886
9.25%42
5%