@exodus/bytes
Advanced tools
| import { | ||
| fromSource, | ||
| getBOMEncoding, | ||
| normalizeEncoding, | ||
| E_ENCODING, | ||
| } from './fallback/encoding.api.js' | ||
| import labels from './fallback/encoding.labels.js' | ||
| // Lite-weight version which re-exports existing implementations on browsers, | ||
| // while still being aliased to the full impl in RN and Node.js | ||
| // WARNING: Note that browsers have bugs (which hopefully will get fixed soon) | ||
| const { TextDecoder, TextEncoder, TextDecoderStream, TextEncoderStream } = globalThis | ||
| export { normalizeEncoding, getBOMEncoding, labelToName } from './fallback/encoding.api.js' | ||
| export { TextDecoder, TextEncoder, TextDecoderStream, TextEncoderStream } | ||
| // https://encoding.spec.whatwg.org/#decode | ||
| export function legacyHookDecode(input, fallbackEncoding = 'utf-8') { | ||
| let u8 = fromSource(input) | ||
| const bomEncoding = getBOMEncoding(u8) | ||
| if (bomEncoding) u8 = u8.subarray(bomEncoding === 'utf-8' ? 3 : 2) | ||
| const enc = bomEncoding ?? normalizeEncoding(fallbackEncoding) // "the byte order mark is more authoritative than anything else" | ||
| if (enc === 'utf-8') return new TextDecoder('utf-8', { ignoreBOM: true }).decode(u8) // fast path | ||
| if (enc === 'replacement') return u8.byteLength > 0 ? '\uFFFD' : '' | ||
| if (!Object.hasOwn(labels, enc)) throw new RangeError(E_ENCODING) | ||
| return new TextDecoder(enc, { ignoreBOM: true }).decode(u8) | ||
| } |
| export * from './encoding.js' |
| export * from './encoding.js' |
| export * from './encoding.js' |
| import labels from './encoding.labels.js' | ||
| let labelsMap | ||
| export const E_ENCODING = 'Unknown encoding' | ||
| // Warning: unlike whatwg-encoding, returns lowercased labels | ||
| // Those are case-insensitive and that's how TextDecoder encoding getter normalizes them | ||
| // https://encoding.spec.whatwg.org/#names-and-labels | ||
| export function normalizeEncoding(label) { | ||
| // fast path | ||
| if (label === 'utf-8' || label === 'utf8' || label === 'UTF-8' || label === 'UTF8') return 'utf-8' | ||
| if (label === 'windows-1252' || label === 'ascii' || label === 'latin1') return 'windows-1252' | ||
| // full map | ||
| if (/[^\w\t\n\f\r .:-]/i.test(label)) return null // must be ASCII (with ASCII whitespace) | ||
| const low = `${label}`.trim().toLowerCase() | ||
| if (Object.hasOwn(labels, low)) return low | ||
| if (!labelsMap) { | ||
| labelsMap = new Map() | ||
| for (const [label, aliases] of Object.entries(labels)) { | ||
| for (const alias of aliases) labelsMap.set(alias, label) | ||
| } | ||
| } | ||
| const mapped = labelsMap.get(low) | ||
| if (mapped) return mapped | ||
| return null | ||
| } | ||
| // TODO: make this more strict against Symbol.toStringTag | ||
| // Is not very significant though, anything faking Symbol.toStringTag could as well override | ||
| // prototypes, which is not something we protect against | ||
| function isAnyArrayBuffer(x) { | ||
| if (x instanceof ArrayBuffer) return true | ||
| if (globalThis.SharedArrayBuffer && x instanceof SharedArrayBuffer) return true | ||
| if (!x || typeof x.byteLength !== 'number') return false | ||
| const s = Object.prototype.toString.call(x) | ||
| return s === '[object ArrayBuffer]' || s === '[object SharedArrayBuffer]' | ||
| } | ||
| export function fromSource(x) { | ||
| if (x instanceof Uint8Array) return x | ||
| if (ArrayBuffer.isView(x)) return new Uint8Array(x.buffer, x.byteOffset, x.byteLength) | ||
| if (isAnyArrayBuffer(x)) { | ||
| if ('detached' in x) return x.detached === true ? new Uint8Array() : new Uint8Array(x) | ||
| // Old engines without .detached, try-catch | ||
| try { | ||
| return new Uint8Array(x) | ||
| } catch { | ||
| return new Uint8Array() | ||
| } | ||
| } | ||
| throw new TypeError('Argument must be a SharedArrayBuffer, ArrayBuffer or ArrayBufferView') | ||
| } | ||
| // Warning: unlike whatwg-encoding, returns lowercased labels | ||
| // Those are case-insensitive and that's how TextDecoder encoding getter normalizes them | ||
| export function getBOMEncoding(input) { | ||
| const u8 = fromSource(input) // asserts | ||
| if (u8.length >= 3 && u8[0] === 0xef && u8[1] === 0xbb && u8[2] === 0xbf) return 'utf-8' | ||
| if (u8.length < 2) return null | ||
| if (u8[0] === 0xff && u8[1] === 0xfe) return 'utf-16le' | ||
| if (u8[0] === 0xfe && u8[1] === 0xff) return 'utf-16be' | ||
| return null | ||
| } | ||
| const uppercasePrefixes = new Set(['utf', 'iso', 'koi', 'euc', 'ibm', 'gbk']) | ||
| // Unlike normalizeEncoding, case-sensitive | ||
| // https://encoding.spec.whatwg.org/#names-and-labels | ||
| export function labelToName(label) { | ||
| const enc = normalizeEncoding(label) | ||
| if (enc === 'utf-8') return 'UTF-8' // fast path | ||
| if (!enc) return enc | ||
| if (uppercasePrefixes.has(enc.slice(0, 3))) return enc.toUpperCase() | ||
| if (enc === 'big5') return 'Big5' | ||
| if (enc === 'shift_jis') return 'Shift_JIS' | ||
| return enc | ||
| } |
+43
| /** | ||
| * ### The `@exodus/bytes` package consists of submodules, there is no single export. | ||
| * Import specific submodules instead. | ||
| * | ||
| * See [README](https://github.com/ExodusOSS/bytes/blob/main/README.md). | ||
| * | ||
| * Example: | ||
| * ```js | ||
| * import { fromHex, toHex } from '@exodus/bytes/hex.js' | ||
| * import { fromBase64, toBase64, fromBase64url, toBase64url, fromBase64any } from '@exodus/bytes/base64.js' | ||
| * import { fromBase32, toBase32, fromBase32hex, toBase32hex } from '@exodus/bytes/base32.js' | ||
| * import { fromBase58, toBase58, fromBase58xrp, toBase58xrp } from '@exodus/bytes/base58.js' | ||
| * import { fromBech32, toBech32, fromBech32m, toBech32m, getPrefix } from '@exodus/bytes/bech32.js' | ||
| * import { fromBigInt, toBigInt } from '@exodus/bytes/bigint.js' | ||
| * | ||
| * import { utf8fromString, utf8toString, utf8fromStringLoose, utf8toStringLoose } from '@exodus/bytes/utf8.js' | ||
| * import { utf16fromString, utf16toString, utf16fromStringLoose, utf16toStringLoose } from '@exodus/bytes/utf16.js' | ||
| * import { | ||
| * createSinglebyteDecoder, createSinglebyteEncoder, | ||
| * windows1252toString, windows1252fromString, | ||
| * latin1toString, latin1fromString } from '@exodus/bytes/single-byte.js' | ||
| * import { createMultibyteDecoder, createMultibyteEncoder } from '@exodus/bytes/multi-byte.js' | ||
| * | ||
| * import { | ||
| * fromBase58check, toBase58check, | ||
| * fromBase58checkSync, toBase58checkSync, | ||
| * makeBase58check } from '@exodus/bytes/base58check.js' | ||
| * import { fromWifString, toWifString, fromWifStringSync, toWifStringSync } from '@exodus/bytes/wif.js' | ||
| * | ||
| * // All encodings from the WHATWG Encoding spec | ||
| * import { TextDecoder, TextEncoder, TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding.js' | ||
| * import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from '@exodus/bytes/encoding.js' | ||
| * | ||
| * // Omits legacy multi-byte decoders to save bundle size | ||
| * import { TextDecoder, TextEncoder, TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding-lite.js' | ||
| * import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from '@exodus/bytes/encoding-lite.js' | ||
| * | ||
| * // In browser bundles, uses built-in TextDecoder / TextEncoder to save bundle size | ||
| * import { TextDecoder, TextEncoder, TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding-browser.js' | ||
| * import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from '@exodus/bytes/encoding-browser.js' | ||
| * ``` | ||
| */ | ||
| declare module "@exodus/bytes" {} |
+5
| throw new Error( | ||
| `This package consists of submodules, there is no single export. Import specific submodules instead. | ||
| See README: https://github.com/ExodusOSS/bytes/blob/main/README.md | ||
| ` | ||
| ) |
+0
-1
@@ -24,2 +24,1 @@ /// <reference types="node" /> | ||
| export function typedView(arr: ArrayBufferView, format: OutputFormat): Uint8Array | Buffer; | ||
+1
-1
@@ -210,3 +210,3 @@ import { typedView } from './array.js' | ||
| at = k + 1 | ||
| if (c !== 0 || at < zeros) throw new Error('Unexpected') // unreachable | ||
| if (c !== 0 || at < zeros) /* c8 ignore next */ throw new Error('Unexpected') // unreachable | ||
| } | ||
@@ -213,0 +213,0 @@ } |
+1
-2
@@ -1,2 +0,2 @@ | ||
| import { hashSync } from '@exodus/crypto/hash' // eslint-disable-line @exodus/import/no-deprecated | ||
| import { sha256 } from '@noble/hashes/sha2.js' | ||
| import { makeBase58check } from './fallback/base58check.js' | ||
@@ -8,3 +8,2 @@ | ||
| // eslint-disable-next-line @exodus/import/no-deprecated | ||
| const sha256 = (x) => hashSync('sha256', x, 'uint8') | ||
| const hash256sync = (x) => sha256(sha256(x)) | ||
@@ -11,0 +10,0 @@ const hash256 = hash256sync // See note at the top |
+0
-1
@@ -76,2 +76,1 @@ /// <reference types="node" /> | ||
| export function fromBase64any(str: string, options: FromBase64Options & { format: 'buffer' }): Buffer; | ||
@@ -50,2 +50,3 @@ const { Buffer, TextEncoder, TextDecoder } = globalThis | ||
| /* c8 ignore next */ | ||
| return false // eslint-disable-line no-unreachable | ||
@@ -52,0 +53,0 @@ } |
+6
-82
@@ -8,10 +8,11 @@ // We can't return native TextDecoder if it's present, as Node.js one is broken on windows-1252 and we fix that | ||
| import labels from './encoding.labels.js' | ||
| import { fromSource, getBOMEncoding, normalizeEncoding, E_ENCODING } from './encoding.api.js' | ||
| import { unfinishedBytes } from './encoding.util.js' | ||
| export { labelToName, getBOMEncoding, normalizeEncoding } from './encoding.api.js' | ||
| const E_OPTIONS = 'The "options" argument must be of type object' | ||
| const E_ENCODING = 'Unknown encoding' | ||
| const replacementChar = '\uFFFD' | ||
| const E_MULTI = | ||
| 'Legacy multi-byte encodings are disabled in /encoding-lite.js, use /encoding.js for full encodings range support' | ||
| const replacementChar = '\uFFFD' | ||
| const multibyteSet = new Set(['big5', 'euc-kr', 'euc-jp', 'iso-2022-jp', 'shift_jis', 'gbk', 'gb18030']) // prettier-ignore | ||
@@ -24,41 +25,4 @@ let createMultibyteDecoder | ||
| let labelsMap | ||
| // Warning: unlike whatwg-encoding, returns lowercased labels | ||
| // Those are case-insensitive and that's how TextDecoder encoding getter normalizes them | ||
| // https://encoding.spec.whatwg.org/#names-and-labels | ||
| export function normalizeEncoding(label) { | ||
| // fast path | ||
| if (label === 'utf-8' || label === 'utf8' || label === 'UTF-8' || label === 'UTF8') return 'utf-8' | ||
| if (label === 'windows-1252' || label === 'ascii' || label === 'latin1') return 'windows-1252' | ||
| // full map | ||
| if (/[^\w\t\n\f\r .:-]/i.test(label)) return null // must be ASCII (with ASCII whitespace) | ||
| const low = `${label}`.trim().toLowerCase() | ||
| if (Object.hasOwn(labels, low)) return low | ||
| if (!labelsMap) { | ||
| labelsMap = new Map() | ||
| for (const [label, aliases] of Object.entries(labels)) { | ||
| for (const alias of aliases) labelsMap.set(alias, label) | ||
| } | ||
| } | ||
| const mapped = labelsMap.get(low) | ||
| if (mapped) return mapped | ||
| return null | ||
| } | ||
| const define = (obj, key, value) => Object.defineProperty(obj, key, { value, writable: false }) | ||
| // TODO: make this more strict against Symbol.toStringTag | ||
| // Is not very significant though, anything faking Symbol.toStringTag could as well override | ||
| // prototypes, which is not something we protect against | ||
| function isAnyArrayBuffer(x) { | ||
| if (x instanceof ArrayBuffer) return true | ||
| if (globalThis.SharedArrayBuffer && x instanceof SharedArrayBuffer) return true | ||
| if (!x || typeof x.byteLength !== 'number') return false | ||
| const s = Object.prototype.toString.call(x) | ||
| return s === '[object ArrayBuffer]' || s === '[object SharedArrayBuffer]' | ||
| } | ||
| function isAnyUint8Array(x) { | ||
@@ -70,18 +34,2 @@ if (x instanceof Uint8Array) return true | ||
| const fromSource = (x) => { | ||
| if (x instanceof Uint8Array) return x | ||
| if (ArrayBuffer.isView(x)) return new Uint8Array(x.buffer, x.byteOffset, x.byteLength) | ||
| if (isAnyArrayBuffer(x)) { | ||
| if ('detached' in x) return x.detached === true ? new Uint8Array() : new Uint8Array(x) | ||
| // Old engines without .detached, try-catch | ||
| try { | ||
| return new Uint8Array(x) | ||
| } catch { | ||
| return new Uint8Array() | ||
| } | ||
| } | ||
| throw new TypeError('Argument must be a SharedArrayBuffer, ArrayBuffer or ArrayBufferView') | ||
| } | ||
| function unicodeDecoder(encoding, loose) { | ||
@@ -221,2 +169,3 @@ if (encoding === 'utf-8') return loose ? utf8toStringLoose : utf8toString // likely | ||
| /* c8 ignore next */ | ||
| throw new Error('Unreachable') | ||
@@ -348,13 +297,2 @@ } | ||
| // Warning: unlike whatwg-encoding, returns lowercased labels | ||
| // Those are case-insensitive and that's how TextDecoder encoding getter normalizes them | ||
| export function getBOMEncoding(input) { | ||
| const u8 = fromSource(input) // asserts | ||
| if (u8.length >= 3 && u8[0] === 0xef && u8[1] === 0xbb && u8[2] === 0xbf) return 'utf-8' | ||
| if (u8.length < 2) return null | ||
| if (u8[0] === 0xff && u8[1] === 0xfe) return 'utf-16le' | ||
| if (u8[0] === 0xfe && u8[1] === 0xff) return 'utf-16be' | ||
| return null | ||
| } | ||
| // https://encoding.spec.whatwg.org/#decode | ||
@@ -376,3 +314,3 @@ // Warning: encoding sniffed from BOM takes preference over the supplied one | ||
| suffix = replacementChar | ||
| u8 = u8.subarray(0, -1) | ||
| u8 = u8.subarray(0, -unfinishedBytes(u8, u8.byteLength, enc)) | ||
| } | ||
@@ -396,15 +334,1 @@ | ||
| } | ||
| const uppercasePrefixes = new Set(['utf', 'iso', 'koi', 'euc', 'ibm', 'gbk']) | ||
| // Unlike normalizeEncoding, case-sensitive | ||
| // https://encoding.spec.whatwg.org/#names-and-labels | ||
| export function labelToName(label) { | ||
| const enc = normalizeEncoding(label) | ||
| if (enc === 'utf-8') return 'UTF-8' // fast path | ||
| if (!enc) return enc | ||
| if (uppercasePrefixes.has(enc.slice(0, 3))) return enc.toUpperCase() | ||
| if (enc === 'big5') return 'Big5' | ||
| if (enc === 'shift_jis') return 'Shift_JIS' | ||
| return enc | ||
| } |
@@ -40,2 +40,3 @@ import { | ||
| const d = u32[i + 3] | ||
| // "(a | b | c | d) & mask" is slower on Hermes though faster on v8 | ||
| if (a & 0x80_80_80_80 || b & 0x80_80_80_80 || c & 0x80_80_80_80 || d & 0x80_80_80_80) break | ||
@@ -42,0 +43,0 @@ } |
+456
-71
@@ -1,2 +0,3 @@ | ||
| import { asciiPrefix, decodeAscii, decodeLatin1, decodeUCS2 } from './latin1.js' | ||
| import { E_STRING } from './_utils.js' | ||
| import { asciiPrefix, decodeAscii, decodeLatin1, decodeUCS2, encodeAscii } from './latin1.js' | ||
| import { getTable } from './multi-byte.table.js' | ||
@@ -6,3 +7,3 @@ | ||
| // TODO: optimize | ||
| /* Decoders */ | ||
@@ -12,65 +13,77 @@ // If the decoder is not cleared properly, state can be preserved between non-streaming calls! | ||
| // Common between euc-kr and big5 | ||
| function bigDecoder(err, pair) { | ||
| let lead = 0 | ||
| let oi = 0 | ||
| let o16 | ||
| // All except iso-2022-jp are ASCII supersets | ||
| // When adding something that is not an ASCII superset, ajust the ASCII fast path | ||
| const mappers = { | ||
| // https://encoding.spec.whatwg.org/#euc-kr-decoder | ||
| 'euc-kr': (err) => { | ||
| const euc = getTable('euc-kr') | ||
| let lead = 0 | ||
| let oi = 0 | ||
| let o16 | ||
| const decodeLead = (b) => { | ||
| const p = pair(lead, b) | ||
| lead = 0 | ||
| if (typeof p === 'number') { | ||
| o16[oi++] = p | ||
| } else if (p) { | ||
| // This is still faster than string concatenation. Can we optimize strings though? | ||
| for (let i = 0; i < p.length; i++) o16[oi++] = p.charCodeAt(i) | ||
| } else { | ||
| o16[oi++] = err() | ||
| if (b < 128) o16[oi++] = b | ||
| const decodeLead = (b) => { | ||
| if (b < 0x41 || b > 0xfe) { | ||
| lead = 0 | ||
| o16[oi++] = err() | ||
| if (b < 128) o16[oi++] = b | ||
| } else { | ||
| const p = euc[(lead - 0x81) * 190 + b - 0x41] | ||
| lead = 0 | ||
| if (p) { | ||
| o16[oi++] = p | ||
| } else { | ||
| o16[oi++] = err() | ||
| if (b < 128) o16[oi++] = b | ||
| } | ||
| } | ||
| } | ||
| } | ||
| const decode = (arr, start, end, stream) => { | ||
| let i = start | ||
| o16 = new Uint16Array(end - start + (lead ? 1 : 0)) // there are pairs but they consume more than one byte | ||
| oi = 0 | ||
| const decode = (arr, start, end, stream) => { | ||
| let i = start | ||
| o16 = new Uint16Array(end - start + (lead ? 1 : 0)) // there are pairs but they consume more than one byte | ||
| oi = 0 | ||
| if (lead && i < end) decodeLead(arr[i++]) | ||
| while (i < end) { | ||
| const b = arr[i++] | ||
| if (b < 128) { | ||
| o16[oi++] = b | ||
| } else if (b === 0x80 || b === 0xff) { | ||
| // Fast path | ||
| if (!lead) { | ||
| for (const last1 = end - 1; i < last1; ) { | ||
| const l = arr[i] | ||
| if (l < 128) { | ||
| o16[oi++] = l | ||
| i++ | ||
| } else { | ||
| if (l === 0x80 || l === 0xff) break | ||
| const b = arr[i + 1] | ||
| if (b < 0x41 || b === 0xff) break | ||
| const p = euc[(l - 0x81) * 190 + b - 0x41] | ||
| if (!p) break | ||
| o16[oi++] = p | ||
| i += 2 | ||
| } | ||
| } | ||
| } | ||
| if (lead && i < end) decodeLead(arr[i++]) | ||
| while (i < end) { | ||
| const b = arr[i++] | ||
| if (b < 128) { | ||
| o16[oi++] = b | ||
| } else if (b === 0x80 || b === 0xff) { | ||
| o16[oi++] = err() | ||
| } else { | ||
| lead = b | ||
| if (i < end) decodeLead(arr[i++]) | ||
| } | ||
| } | ||
| if (lead && !stream) { | ||
| lead = 0 | ||
| o16[oi++] = err() | ||
| } else { | ||
| lead = b | ||
| if (i < end) decodeLead(arr[i++]) | ||
| } | ||
| } | ||
| if (lead && !stream) { | ||
| lead = 0 | ||
| o16[oi++] = err() | ||
| const res = decodeUCS2(o16, oi) | ||
| o16 = null | ||
| return res | ||
| } | ||
| const res = decodeUCS2(o16, oi) | ||
| o16 = null | ||
| return res | ||
| } | ||
| return { decode, isAscii: () => lead === 0 } | ||
| } | ||
| // All except iso-2022-jp are ASCII supersets | ||
| // When adding something that is not an ASCII superset, ajust the ASCII fast path | ||
| const REP = 0xff_fd | ||
| const mappers = { | ||
| // https://encoding.spec.whatwg.org/#euc-kr-decoder | ||
| 'euc-kr': (err) => { | ||
| const euc = getTable('euc-kr') | ||
| return bigDecoder(err, (l, b) => { | ||
| if (b < 0x41 || b > 0xfe) return | ||
| const cp = euc[(l - 0x81) * 190 + b - 0x41] | ||
| return cp !== undefined && cp !== REP ? cp : undefined | ||
| }) | ||
| return { decode, isAscii: () => lead === 0 } | ||
| }, | ||
@@ -101,3 +114,3 @@ // https://encoding.spec.whatwg.org/#euc-jp-decoder | ||
| j12 = false | ||
| if (cp !== undefined && cp !== REP) { | ||
| if (cp) { | ||
| o16[oi++] = cp | ||
@@ -116,2 +129,26 @@ } else { | ||
| // Fast path, non-j12 | ||
| // lead = 0 means j12 = 0 | ||
| if (!lead) { | ||
| for (const last1 = end - 1; i < last1; ) { | ||
| const l = arr[i] | ||
| if (l < 128) { | ||
| o16[oi++] = l | ||
| i++ | ||
| } else { | ||
| const b = arr[i + 1] | ||
| if (l === 0x8e && b >= 0xa1 && b <= 0xdf) { | ||
| o16[oi++] = 0xfe_c0 + b | ||
| i += 2 | ||
| } else { | ||
| if (l < 0xa1 || l === 0xff || b < 0xa1 || b === 0xff) break | ||
| const cp = jis0208[(l - 0xa1) * 94 + b - 0xa1] | ||
| if (!cp) break | ||
| o16[oi++] = cp | ||
| i += 2 | ||
| } | ||
| } | ||
| } | ||
| } | ||
| if (lead && i < end) decodeLead(arr[i++]) | ||
@@ -194,3 +231,3 @@ if (lead && i < end) decodeLead(arr[i++]) // could be two leads, but no more | ||
| const cp = jis0208[(lead - 0x21) * 94 + b - 0x21] | ||
| if (cp !== undefined && cp !== REP) return cp | ||
| if (cp) return cp | ||
| } | ||
@@ -322,3 +359,3 @@ | ||
| const cp = jis0208[p] | ||
| if (cp !== undefined && cp !== REP) { | ||
| if (cp) { | ||
| o16[oi++] = cp | ||
@@ -338,2 +375,30 @@ return | ||
| // Fast path | ||
| if (!lead) { | ||
| for (const last1 = end - 1; i < last1; ) { | ||
| const l = arr[i] | ||
| if (l <= 0x80) { | ||
| o16[oi++] = l | ||
| i++ | ||
| } else if (l >= 0xa1 && l <= 0xdf) { | ||
| o16[oi++] = 0xfe_c0 + l | ||
| i++ | ||
| } else { | ||
| if (l === 0xa0 || l > 0xfc) break | ||
| const b = arr[i + 1] | ||
| if (b < 0x40 || b > 0xfc || b === 0x7f) break | ||
| const p = (l - (l < 0xa0 ? 0x81 : 0xc1)) * 188 + b - (b < 0x7f ? 0x40 : 0x41) | ||
| if (p >= 8836 && p <= 10_715) { | ||
| o16[oi++] = 0xe0_00 - 8836 + p | ||
| i += 2 | ||
| } else { | ||
| const cp = jis0208[p] | ||
| if (!cp) break | ||
| o16[oi++] = cp | ||
| i += 2 | ||
| } | ||
| } | ||
| } | ||
| } | ||
| if (lead && i < end) decodeLead(arr[i++]) | ||
@@ -396,2 +461,32 @@ while (i < end) { | ||
| // Fast path for 2-byte only | ||
| // pushback is always empty ad start, and g1 = 0 means g2 = g3 = 0 | ||
| if (g1 === 0) { | ||
| for (const last1 = end - 1; i < last1; ) { | ||
| const b = arr[i] | ||
| if (b < 128) { | ||
| o16[oi++] = b | ||
| i++ | ||
| } else if (b === 0x80) { | ||
| o16[oi++] = 0x20_ac | ||
| i++ | ||
| } else { | ||
| if (b === 0xff) break | ||
| const n = arr[i + 1] | ||
| let cp | ||
| if (n < 0x7f) { | ||
| if (n < 0x40) break | ||
| cp = gb18030[(b - 0x81) * 190 + n - 0x40] | ||
| } else { | ||
| if (n === 0xff || n === 0x7f) break | ||
| cp = gb18030[(b - 0x81) * 190 + n - 0x41] | ||
| } | ||
| if (!cp) break | ||
| o16[oi++] = cp // 16-bit | ||
| i += 2 | ||
| } | ||
| } | ||
| } | ||
| // First, dump everything until EOF | ||
@@ -406,7 +501,3 @@ // Same as the full loop, but without EOF handling | ||
| if (g3) { | ||
| if (b < 0x30 || b > 0x39) { | ||
| pushback.push(b, g3, g2) | ||
| g1 = g2 = g3 = 0 | ||
| o16[oi++] = err() | ||
| } else { | ||
| if (b <= 0x39 && b >= 0x30) { | ||
| const p = index( | ||
@@ -425,2 +516,6 @@ (g1 - 0x81) * 12_600 + (g2 - 0x30) * 1260 + (g3 - 0x81) * 10 + b - 0x30 | ||
| } | ||
| } else { | ||
| pushback.push(b, g3, g2) | ||
| g1 = g2 = g3 = 0 | ||
| o16[oi++] = err() | ||
| } | ||
@@ -434,3 +529,3 @@ } else if (b >= 0x81 && b <= 0xfe) { | ||
| } | ||
| } else if (b >= 0x30 && b <= 0x39) { | ||
| } else if (b <= 0x39 && b >= 0x30) { | ||
| g2 = b | ||
@@ -444,3 +539,3 @@ } else { | ||
| g1 = 0 | ||
| if (cp !== undefined && cp !== REP) { | ||
| if (cp) { | ||
| o16[oi++] = cp // 16-bit | ||
@@ -479,6 +574,81 @@ } else { | ||
| const big5 = getTable('big5') | ||
| return bigDecoder(err, (l, b) => { | ||
| if (b < 0x40 || (b > 0x7e && b < 0xa1) || b === 0xff) return | ||
| return big5[(l - 0x81) * 157 + b - (b < 0x7f ? 0x40 : 0x62)] // strings | ||
| }) | ||
| let lead = 0 | ||
| let oi = 0 | ||
| let o16 | ||
| const decodeLead = (b) => { | ||
| if (b < 0x40 || (b > 0x7e && b < 0xa1) || b === 0xff) { | ||
| lead = 0 | ||
| o16[oi++] = err() | ||
| if (b < 128) o16[oi++] = b | ||
| } else { | ||
| const p = big5[(lead - 0x81) * 157 + b - (b < 0x7f ? 0x40 : 0x62)] | ||
| lead = 0 | ||
| if (p > 0x1_00_00) { | ||
| o16[oi++] = p >> 16 | ||
| o16[oi++] = p & 0xff_ff | ||
| } else if (p) { | ||
| o16[oi++] = p | ||
| } else { | ||
| o16[oi++] = err() | ||
| if (b < 128) o16[oi++] = b | ||
| } | ||
| } | ||
| } | ||
| // eslint-disable-next-line sonarjs/no-identical-functions | ||
| const decode = (arr, start, end, stream) => { | ||
| let i = start | ||
| o16 = new Uint16Array(end - start + (lead ? 1 : 0)) // there are pairs but they consume more than one byte | ||
| oi = 0 | ||
| // Fast path | ||
| if (!lead) { | ||
| for (const last1 = end - 1; i < last1; ) { | ||
| const l = arr[i] | ||
| if (l < 128) { | ||
| o16[oi++] = l | ||
| i++ | ||
| } else { | ||
| if (l === 0x80 || l === 0xff) break | ||
| const b = arr[i + 1] | ||
| if (b < 0x40 || (b > 0x7e && b < 0xa1) || b === 0xff) break | ||
| const p = big5[(l - 0x81) * 157 + b - (b < 0x7f ? 0x40 : 0x62)] | ||
| if (p > 0x1_00_00) { | ||
| o16[oi++] = p >> 16 | ||
| o16[oi++] = p & 0xff_ff | ||
| } else { | ||
| if (!p) break | ||
| o16[oi++] = p | ||
| } | ||
| i += 2 | ||
| } | ||
| } | ||
| } | ||
| if (lead && i < end) decodeLead(arr[i++]) | ||
| while (i < end) { | ||
| const b = arr[i++] | ||
| if (b < 128) { | ||
| o16[oi++] = b | ||
| } else if (b === 0x80 || b === 0xff) { | ||
| o16[oi++] = err() | ||
| } else { | ||
| lead = b | ||
| if (i < end) decodeLead(arr[i++]) | ||
| } | ||
| } | ||
| if (lead && !stream) { | ||
| lead = 0 | ||
| o16[oi++] = err() | ||
| } | ||
| const res = decodeUCS2(o16, oi) | ||
| o16 = null | ||
| return res | ||
| } | ||
| return { decode, isAscii: () => lead === 0 } | ||
| }, | ||
@@ -498,3 +668,3 @@ } | ||
| const onErr = loose | ||
| ? () => REP | ||
| ? () => 0xff_fd | ||
| : () => { | ||
@@ -521,1 +691,216 @@ // The correct way per spec seems to be not destoying the decoder state in stream mode, even when fatal | ||
| } | ||
| /* Encoders */ | ||
| const maps = new Map() | ||
| const e7 = [[148, 236], [149, 237], [150, 243]] // prettier-ignore | ||
| const e8 = [[30, 89], [38, 97], [43, 102], [44, 103], [50, 109], [67, 126], [84, 144], [100, 160]] // prettier-ignore | ||
| const preencoders = { | ||
| __proto__: null, | ||
| big5: (p) => ((((p / 157) | 0) + 0x81) << 8) | ((p % 157 < 0x3f ? 0x40 : 0x62) + (p % 157)), | ||
| shift_jis: (p) => { | ||
| const l = (p / 188) | 0 | ||
| const t = p % 188 | ||
| return ((l + (l < 0x1f ? 0x81 : 0xc1)) << 8) | ((t < 0x3f ? 0x40 : 0x41) + t) | ||
| }, | ||
| 'euc-jp': (p) => ((((p / 94) | 0) + 0xa1) << 8) | ((p % 94) + 0xa1), | ||
| 'euc-kr': (p) => ((((p / 190) | 0) + 0x81) << 8) | ((p % 190) + 0x41), | ||
| gb18030: (p) => ((((p / 190) | 0) + 0x81) << 8) | ((p % 190 < 0x3f ? 0x40 : 0x41) + (p % 190)), | ||
| } | ||
| preencoders.gbk = preencoders.gb18030 | ||
| // We accept that encoders use non-trivial amount of mem, for perf | ||
| // most are are 128 KiB mem, big5 is 380 KiB, lazy-loaded at first use | ||
| function getMap(id, size) { | ||
| const cached = maps.get(id) | ||
| if (cached) return cached | ||
| let tname = id | ||
| const sjis = id === 'shift_jis' | ||
| if (id === 'gbk') tname = 'gb18030' | ||
| if (id === 'euc-jp' || sjis) tname = 'jis0208' | ||
| const table = getTable(tname) | ||
| const map = new Uint16Array(size) | ||
| const enc = preencoders[id] || ((p) => p + 1) | ||
| for (let i = 0; i < table.length; i++) { | ||
| const c = table[i] | ||
| if (!c) continue | ||
| if (id === 'big5') { | ||
| if (i < 5024) continue // this also skips multi-codepoint strings | ||
| // In big5, all return first entries except for these | ||
| if ( | ||
| map[c] && | ||
| c !== 0x25_50 && | ||
| c !== 0x25_5e && | ||
| c !== 0x25_61 && | ||
| c !== 0x25_6a && | ||
| c !== 0x53_41 && | ||
| c !== 0x53_45 | ||
| ) { | ||
| continue | ||
| } | ||
| } else { | ||
| if (sjis && i >= 8272 && i <= 8835) continue | ||
| if (map[c]) continue | ||
| } | ||
| if (c > 0xff_ff) { | ||
| // always a single codepoint here | ||
| const s = String.fromCharCode(c >> 16, c & 0xff_ff) | ||
| map[s.codePointAt(0)] = enc(i) | ||
| } else { | ||
| map[c] = enc(i) | ||
| } | ||
| } | ||
| for (let i = 0; i < 0x80; i++) map[i] = i | ||
| if (sjis || id === 'euc-jp') { | ||
| if (sjis) map[0x80] = 0x80 | ||
| const d = sjis ? 0xfe_c0 : 0x70_c0 | ||
| for (let i = 0xff_61; i <= 0xff_9f; i++) map[i] = i - d | ||
| map[0x22_12] = map[0xff_0d] | ||
| map[0xa5] = 0x5c | ||
| map[0x20_3e] = 0x7e | ||
| } else if (tname === 'gb18030') { | ||
| if (id === 'gbk') map[0x20_ac] = 0x80 | ||
| for (let i = 0xe7_8d; i <= 0xe7_93; i++) map[i] = i - 0x40_b4 | ||
| for (const [a, b] of e7) map[0xe7_00 | a] = 0xa6_00 | b | ||
| for (const [a, b] of e8) map[0xe8_00 | a] = 0xfe_00 | b | ||
| } | ||
| maps.set(id, map) | ||
| return map | ||
| } | ||
| const encoders = new Set(['big5', 'euc-kr', 'euc-jp', 'shift_jis', 'gbk', 'gb18030']) | ||
| const NON_LATIN = /[^\x00-\xFF]/ // eslint-disable-line no-control-regex | ||
| let gb18030r | ||
| export function multibyteEncoder(enc, onError) { | ||
| if (!encoders.has(enc)) throw new RangeError('Unsupported encoding') | ||
| const size = enc === 'big5' ? 0x2_f8_a7 : 0x1_00_00 // for big5, max codepoint in table + 1 | ||
| const width = enc === 'gb18030' ? 4 : 2 | ||
| const map = getMap(enc, size) | ||
| if (enc === 'gb18030' && !gb18030r) gb18030r = getTable('gb18030-ranges') | ||
| return (str) => { | ||
| if (typeof str !== 'string') throw new TypeError(E_STRING) | ||
| if (!NON_LATIN.test(str)) { | ||
| try { | ||
| return encodeAscii(str, E_STRICT) | ||
| } catch {} | ||
| } | ||
| const length = str.length | ||
| const u8 = new Uint8Array(length * width) | ||
| let i = 0 | ||
| while (i < length) { | ||
| const x = str.charCodeAt(i) | ||
| if (x >= 128) break | ||
| u8[i++] = x | ||
| } | ||
| // eslint-disable-next-line unicorn/consistent-function-scoping | ||
| const err = (code) => { | ||
| if (onError) return onError(code, u8, i) | ||
| throw new TypeError(E_STRICT) | ||
| } | ||
| if (!map || map.length < size) /* c8 ignore next */ throw new Error('Unreachable') // Important for perf | ||
| if (enc === 'gb18030') { | ||
| // Deduping this branch hurts other encoders perf | ||
| const encode = (cp) => { | ||
| let a = 0, b = 0 // prettier-ignore | ||
| for (const [c, d] of gb18030r) { | ||
| if (d > cp) break | ||
| a = c | ||
| b = d | ||
| } | ||
| let rp = cp === 0xe7_c7 ? 7457 : a + cp - b | ||
| u8[i++] = 0x81 + ((rp / 12_600) | 0) | ||
| rp %= 12_600 | ||
| u8[i++] = 0x30 + ((rp / 1260) | 0) | ||
| rp %= 1260 | ||
| u8[i++] = 0x81 + ((rp / 10) | 0) | ||
| u8[i++] = 0x30 + (rp % 10) | ||
| } | ||
| for (let j = i; j < length; j++) { | ||
| const x = str.charCodeAt(j) | ||
| if (x >= 0xd8_00 && x < 0xe0_00) { | ||
| if (x >= 0xdc_00 || j + 1 === length) { | ||
| i += err(x) // lone | ||
| } else { | ||
| const x1 = str.charCodeAt(j + 1) | ||
| if (x1 < 0xdc_00 || x1 >= 0xe0_00) { | ||
| i += err(x) // lone | ||
| } else { | ||
| j++ // consume x1 | ||
| encode(0x1_00_00 + ((x1 & 0x3_ff) | ((x & 0x3_ff) << 10))) | ||
| } | ||
| } | ||
| } else { | ||
| const e = map[x] | ||
| if (e & 0xff_00) { | ||
| u8[i++] = e >> 8 | ||
| u8[i++] = e & 0xff | ||
| } else if (e || x === 0) { | ||
| u8[i++] = e | ||
| } else if (x === 0xe5_e5) { | ||
| i += err(x) | ||
| } else { | ||
| encode(x) | ||
| } | ||
| } | ||
| } | ||
| } else { | ||
| const long = | ||
| enc === 'big5' | ||
| ? (x) => { | ||
| const e = map[x] | ||
| if (e & 0xff_00) { | ||
| u8[i++] = e >> 8 | ||
| u8[i++] = e & 0xff | ||
| } else if (e || x === 0) { | ||
| u8[i++] = e | ||
| } else { | ||
| i += err(x) | ||
| } | ||
| } | ||
| : (x) => { | ||
| i += err(x) | ||
| } | ||
| for (let j = i; j < length; j++) { | ||
| const x = str.charCodeAt(j) | ||
| if (x >= 0xd8_00 && x < 0xe0_00) { | ||
| if (x >= 0xdc_00 || j + 1 === length) { | ||
| i += err(x) // lone | ||
| } else { | ||
| const x1 = str.charCodeAt(j + 1) | ||
| if (x1 < 0xdc_00 || x1 >= 0xe0_00) { | ||
| i += err(x) // lone | ||
| } else { | ||
| j++ // consume x1 | ||
| long(0x1_00_00 + ((x1 & 0x3_ff) | ((x & 0x3_ff) << 10))) | ||
| } | ||
| } | ||
| } else { | ||
| const e = map[x] | ||
| if (e & 0xff_00) { | ||
| u8[i++] = e >> 8 | ||
| u8[i++] = e & 0xff | ||
| } else if (e || x === 0) { | ||
| u8[i++] = e | ||
| } else { | ||
| i += err(x) | ||
| } | ||
| } | ||
| } | ||
| } | ||
| return i === u8.length ? u8 : u8.subarray(0, i) | ||
| } | ||
| } |
@@ -43,3 +43,3 @@ import { fromBase64url } from '@exodus/bytes/base64.js' | ||
| function unwrap(res, t, pos, stringMode = false) { | ||
| function unwrap(res, t, pos, highMode = false) { | ||
| let code = 0 | ||
@@ -59,5 +59,10 @@ for (let i = 0; i < t.length; i++) { | ||
| if (stringMode) { | ||
| if (highMode) { | ||
| for (let k = 0; k < x; k++, pos++, code++) { | ||
| res[pos] = code <= 0xff_ff ? code : String.fromCodePoint(code) | ||
| if (code <= 0xff_ff) { | ||
| res[pos] = code | ||
| } else { | ||
| const c = String.fromCodePoint(code) | ||
| res[pos] = (c.charCodeAt(0) << 16) | c.charCodeAt(1) | ||
| } | ||
| } | ||
@@ -69,12 +74,12 @@ } else { | ||
| } else if (x[0] === '$' && Object.hasOwn(indices, x)) { | ||
| pos = unwrap(res, indices[x], pos, stringMode) // self-reference using shared chunks | ||
| } else if (stringMode) { | ||
| pos = unwrap(res, indices[x], pos, highMode) // self-reference using shared chunks | ||
| } else if (highMode) { | ||
| const s = [...utf16toString(loadBase64(x), 'uint8-le')] // splits by codepoints | ||
| let char | ||
| let c | ||
| for (let i = 0; i < s.length; ) { | ||
| char = s[i++] | ||
| res[pos++] = char.length === 1 ? char.charCodeAt(0) : char // strings only for high codepoints | ||
| c = s[i++] | ||
| res[pos++] = c.length === 1 ? c.charCodeAt(0) : (c.charCodeAt(0) << 16) | c.charCodeAt(1) | ||
| } | ||
| code = char.codePointAt(0) + 1 | ||
| code = c.codePointAt(0) + 1 | ||
| } else { | ||
@@ -107,13 +112,13 @@ const u16 = to16input(loadBase64(x), true) // data is little-endian | ||
| if (!Object.hasOwn(sizes, id)) throw new Error('Unknown encoding') | ||
| res = new Array(sizes[id]) // array of strings or undefined | ||
| res = new Uint32Array(sizes[id]) // array of strings or undefined | ||
| unwrap(res, indices[id], 0, true) | ||
| // Pointer code updates are embedded into the table | ||
| res[1133] = '\xCA\u0304' | ||
| res[1135] = '\xCA\u030C' | ||
| res[1164] = '\xEA\u0304' | ||
| res[1166] = '\xEA\u030C' | ||
| // These are skipped in encoder as encoder uses only pointers >= (0xA1 - 0x81) * 157 | ||
| res[1133] = 0xca_03_04 | ||
| res[1135] = 0xca_03_0c | ||
| res[1164] = 0xea_03_04 | ||
| res[1166] = 0xea_03_0c | ||
| } else { | ||
| if (!Object.hasOwn(sizes, id)) throw new Error('Unknown encoding') | ||
| res = new Uint16Array(sizes[id]) | ||
| res.fill(0xff_fd) | ||
| unwrap(res, indices[id], 0, false) | ||
@@ -120,0 +125,0 @@ } |
@@ -16,3 +16,3 @@ import { asciiPrefix, decodeAscii, decodeLatin1 } from './latin1.js' | ||
| function getEncoding(encoding) { | ||
| export function getEncoding(encoding) { | ||
| assertEncoding(encoding) | ||
@@ -19,0 +19,0 @@ if (encoding === xUserDefined) return Array.from({ length: 128 }, (_, i) => 0xf7_80 + i) |
+45
-26
@@ -15,5 +15,21 @@ import { decodeUCS2, encodeCharcodes } from './latin1.js' | ||
| if (le === isLE) return to16(u8.byteOffset % 2 === 0 ? u8 : Uint8Array.from(u8)) | ||
| return to16(swap16(Uint8Array.from(u8))) | ||
| } | ||
| const res = new Uint8Array(u8.length) | ||
| export const decode = (u16, loose = false, checked = false) => { | ||
| if (checked || isWellFormed(u16)) return decodeUCS2(u16) | ||
| if (!loose) throw new TypeError(E_STRICT) | ||
| return decodeUCS2(toWellFormed(Uint16Array.from(u16))) // cloned for replacement | ||
| } | ||
| export function encode(str, loose = false, checked = false, swapped = false) { | ||
| const arr = new Uint16Array(str.length) | ||
| if (checked) return swapped ? encodeCheckedSwapped(str, arr) : encodeChecked(str, arr) | ||
| return swapped ? encodeUncheckedSwapped(str, arr, loose) : encodeUnchecked(str, arr, loose) | ||
| } | ||
| /* eslint-disable @exodus/mutable/no-param-reassign-prop-only */ | ||
| // Assumes checked length % 2 === 0, otherwise does not swap tail | ||
| function swap16(u8) { | ||
| let i = 0 | ||
@@ -25,6 +41,6 @@ for (const last3 = u8.length - 3; i < last3; i += 4) { | ||
| const x3 = u8[i + 3] | ||
| res[i] = x1 | ||
| res[i + 1] = x0 | ||
| res[i + 2] = x3 | ||
| res[i + 3] = x2 | ||
| u8[i] = x1 | ||
| u8[i + 1] = x0 | ||
| u8[i + 2] = x3 | ||
| u8[i + 3] = x2 | ||
| } | ||
@@ -35,23 +51,10 @@ | ||
| const x1 = u8[i + 1] | ||
| res[i] = x1 | ||
| res[i + 1] = x0 | ||
| u8[i] = x1 | ||
| u8[i + 1] = x0 | ||
| } | ||
| return to16(res) | ||
| return u8 | ||
| } | ||
| export const decode = (u16, loose = false, checked = false) => { | ||
| if (checked || isWellFormed(u16)) return decodeUCS2(u16) | ||
| if (!loose) throw new TypeError(E_STRICT) | ||
| return decodeUCS2(toWellFormed(Uint16Array.from(u16))) // cloned for replacement | ||
| } | ||
| export function encode(str, loose = false, checked = false, swapped = false) { | ||
| const arr = new Uint16Array(str.length) | ||
| if (checked) return swapped ? encodeCheckedSwapped(str, arr) : encodeChecked(str, arr) | ||
| return swapped ? encodeUncheckedSwapped(str, arr, loose) : encodeUnchecked(str, arr, loose) | ||
| } | ||
| // Splitting paths into small functions helps (at least on SpiderMonkey) | ||
| /* eslint-disable @exodus/mutable/no-param-reassign-prop-only */ | ||
@@ -127,2 +130,3 @@ const encodeChecked = (str, arr) => encodeCharcodes(str, arr) // Same as encodeLatin1, but with Uint16Array | ||
| // Only needed on Hermes, everything else has native impl | ||
| export function toWellFormed(u16) { | ||
@@ -150,2 +154,3 @@ const length = u16.length | ||
| // Only needed on Hermes, everything else has native impl | ||
| export function isWellFormed(u16) { | ||
@@ -155,2 +160,6 @@ const length = u16.length | ||
| const m = 0x80_00_80_00 | ||
| const l = 0xd8_00 | ||
| const h = 0xe0_00 | ||
| // Speedup with u32, by skipping to the first surrogate | ||
@@ -167,17 +176,27 @@ // Only implemented for aligned input for now, but almost all input is aligned (pooled Buffer or 0 offset) | ||
| const d = u32[i + 3] | ||
| if (a & 0x80_00_80_00 || b & 0x80_00_80_00 || c & 0x80_00_80_00 || d & 0x80_00_80_00) break | ||
| if (a & m || b & m || c & m || d & m) break // bitwise OR does not make this faster on Hermes | ||
| } | ||
| for (; i < u32length; i++) if (u32[i] & 0x80_00_80_00) break | ||
| for (; i < u32length; i++) if (u32[i] & m) break | ||
| i *= 2 | ||
| } | ||
| // An extra loop gives ~30-40% speedup e.g. on English text without surrogates but with other symbols above 0x80_00 | ||
| for (const last3 = length - 3; ; i += 4) { | ||
| if (i >= last3) break | ||
| const a = u16[i] | ||
| const b = u16[i + 1] | ||
| const c = u16[i + 2] | ||
| const d = u16[i + 3] | ||
| if ((a >= l && a < h) || (b >= l && b < h) || (c >= l && c < h) || (d >= l && d < h)) break | ||
| } | ||
| for (; i < length; i++) { | ||
| const code = u16[i] | ||
| if (code >= 0xd8_00 && code < 0xe0_00) { | ||
| if (code >= l && code < h) { | ||
| // An unexpected trail or a lead at the very end of input | ||
| if (code > 0xdb_ff || i + 1 >= length) return false | ||
| if (code >= 0xdc_00 || i + 1 >= length) return false | ||
| i++ // consume next | ||
| const next = u16[i] // Process valid pairs immediately | ||
| if (next < 0xdc_00 || next >= 0xe0_00) return false | ||
| if (next < 0xdc_00 || next >= h) return false | ||
| } | ||
@@ -184,0 +203,0 @@ } |
+1
-1
@@ -194,3 +194,3 @@ import { encodeAsciiPrefix } from './latin1.js' | ||
| // TODO: use resizable array buffers? will have to return a non-resizeable one | ||
| if (p !== i) throw new Error('Unreachable') // Here, p === i (only when small is still true) | ||
| if (p !== i) /* c8 ignore next */ throw new Error('Unreachable') // Here, p === i (only when small is still true) | ||
| const bytesNew = new Uint8Array(p + (length - i) * 3) // maximium can be 3x of the string length in charcodes | ||
@@ -197,0 +197,0 @@ bytesNew.set(bytes) |
+0
-1
@@ -22,2 +22,1 @@ /// <reference types="node" /> | ||
| export function fromHex(str: string, format?: OutputFormat): Uint8ArrayBuffer | Buffer; | ||
+7
-1
| import { assertUint8 } from './assert.js' | ||
| import { multibyteDecoder } from './fallback/multi-byte.js' | ||
| import { multibyteDecoder, multibyteEncoder } from './fallback/multi-byte.js' | ||
@@ -14,1 +14,7 @@ export function createMultibyteDecoder(encoding, loose = false) { | ||
| } | ||
| export function createMultibyteEncoder(encoding, { mode = 'fatal' } = {}) { | ||
| // TODO: replacement, truncate (replacement will need varying length) | ||
| if (mode !== 'fatal') throw new Error('Unsupported mode') | ||
| return multibyteEncoder(encoding) // asserts | ||
| } |
| import { assertUint8 } from './assert.js' | ||
| import { isDeno, toBuf } from './fallback/_utils.js' | ||
| import { isAsciiSuperset, multibyteDecoder } from './fallback/multi-byte.js' | ||
| import { isAsciiSuperset, multibyteDecoder, multibyteEncoder } from './fallback/multi-byte.js' | ||
| import { isAscii } from 'node:buffer' | ||
@@ -24,1 +24,7 @@ | ||
| } | ||
| export function createMultibyteEncoder(encoding, { mode = 'fatal' } = {}) { | ||
| // TODO: replacement, truncate (replacement will need varying length) | ||
| if (mode !== 'fatal') throw new Error('Unsupported mode') | ||
| return multibyteEncoder(encoding) // asserts | ||
| } |
+32
-3
| { | ||
| "name": "@exodus/bytes", | ||
| "version": "1.8.0", | ||
| "version": "1.9.0", | ||
| "description": "Various operations on Uint8Array data", | ||
@@ -26,2 +26,3 @@ "scripts": { | ||
| "test": "exodus-test", | ||
| "size": "esbuild --minify --bundle", | ||
| "jsvu": "jsvu", | ||
@@ -52,2 +53,3 @@ "playwright": "exodus-test --playwright", | ||
| "/fallback/encoding.js", | ||
| "/fallback/encoding.api.js", | ||
| "/fallback/encoding.labels.js", | ||
@@ -76,2 +78,6 @@ "/fallback/encoding.util.js", | ||
| "/bigint.js", | ||
| "/encoding-browser.js", | ||
| "/encoding-browser.browser.js", | ||
| "/encoding-browser.native.js", | ||
| "/encoding-browser.d.ts", | ||
| "/encoding.js", | ||
@@ -84,2 +90,4 @@ "/encoding.d.ts", | ||
| "/hex.node.js", | ||
| "/index.js", | ||
| "/index.d.ts", | ||
| "/multi-byte.js", | ||
@@ -96,3 +104,10 @@ "/multi-byte.node.js", | ||
| ], | ||
| "main": "index.js", | ||
| "module": "index.js", | ||
| "types": "index.d.ts", | ||
| "exports": { | ||
| ".": { | ||
| "types": "./index.d.ts", | ||
| "default": "./index.js" | ||
| }, | ||
| "./array.js": { | ||
@@ -135,2 +150,9 @@ "types": "./array.d.ts", | ||
| }, | ||
| "./encoding-browser.js": { | ||
| "types": "./encoding-browser.d.ts", | ||
| "node": "./encoding-browser.js", | ||
| "react-native": "./encoding-browser.native.js", | ||
| "browser": "./encoding-browser.browser.js", | ||
| "default": "./encoding-browser.js" | ||
| }, | ||
| "./utf16.js": { | ||
@@ -147,7 +169,10 @@ "node": "./utf16.node.js", | ||
| }, | ||
| "react-native": { | ||
| "./encoding-browser.js": "./encoding-browser.native.js" | ||
| }, | ||
| "peerDependencies": { | ||
| "@exodus/crypto": "^1.0.0-rc.4" | ||
| "@noble/hashes": "^1.8.0 || ^2.0.0" | ||
| }, | ||
| "peerDependenciesMeta": { | ||
| "@exodus/crypto": { | ||
| "@noble/hashes": { | ||
| "optional": true | ||
@@ -163,2 +188,3 @@ } | ||
| "@noble/hashes": "^2.0.1", | ||
| "@oslojs/encoding": "^1.1.0", | ||
| "@petamoriken/float16": "^3.9.3", | ||
@@ -178,5 +204,7 @@ "@scure/base": "^1.2.6", | ||
| "buffer": "^6.0.3", | ||
| "c8": "^10.1.3", | ||
| "decode-utf8": "^1.0.1", | ||
| "electron": "36.5.0", | ||
| "encode-utf8": "^2.0.0", | ||
| "esbuild": "^0.27.2", | ||
| "eslint": "^8.44.0", | ||
@@ -189,2 +217,3 @@ "fast-base64-decode": "^2.0.0", | ||
| "jsvu": "^3.0.3", | ||
| "punycode": "^2.3.1", | ||
| "text-encoding": "^0.7.0", | ||
@@ -191,0 +220,0 @@ "typescript": "^5.9.3", |
+33
-5
| # `@exodus/bytes` | ||
| [](https://npmjs.org/package/@exodus/bytes) | ||
|  | ||
| [](https://github.com/ExodusOSS/bytes/releases) | ||
| [](https://www.npmcharts.com/compare/@exodus/bytes?minimal=true) | ||
| [](https://github.com/ExodusOSS/bytes/blob/HEAD/LICENSE) | ||
@@ -132,2 +133,3 @@ | ||
| import { windows1252toString, windows1252fromString } from '@exodus/bytes/single-byte.js' | ||
| import { latin1toString, latin1fromString } from '@exodus/bytes/single-byte.js' | ||
| ``` | ||
@@ -171,2 +173,7 @@ | ||
| All WHATWG Encoding spec [`windows-*` encodings](https://encoding.spec.whatwg.org/#windows-874) are supersets of | ||
| corresponding [unicode.org encodings](https://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/), meaning that | ||
| they encode/decode all the old valid (non-replacement) strings / byte sequences identically, but can also support | ||
| a wider range of inputs. | ||
| ##### `createSinglebyteDecoder(encoding, loose = false)` | ||
@@ -313,4 +320,4 @@ | ||
| import { fromBech32, toBech32 } from '@exodus/bytes/bech32.js' | ||
| import { fromBech32m, toBech32m } from '@exodus/bytes/base32.js' | ||
| import { getPrefix } from '@exodus/bytes/base32.js' | ||
| import { fromBech32m, toBech32m } from '@exodus/bytes/bech32.js' | ||
| import { getPrefix } from '@exodus/bytes/bech32.js' | ||
| ``` | ||
@@ -347,3 +354,3 @@ | ||
| On non-Node.js, requires peer dependency [@exodus/crypto](https://www.npmjs.com/package/@exodus/crypto) to be installed. | ||
| On non-Node.js, requires peer dependency [@noble/hashes](https://www.npmjs.com/package/@noble/hashes) to be installed. | ||
@@ -363,3 +370,3 @@ ##### `async fromBase58check(str, format = 'uint8')` | ||
| On non-Node.js, requires peer dependency [@exodus/crypto](https://www.npmjs.com/package/@exodus/crypto) to be installed. | ||
| On non-Node.js, requires peer dependency [@noble/hashes](https://www.npmjs.com/package/@noble/hashes) to be installed. | ||
@@ -533,4 +540,25 @@ ##### `async fromWifString(string, version)` | ||
| ### `@exodus/bytes/encoding-browser.js` | ||
| ```js | ||
| import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding-browser.js' | ||
| import { TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding-browser.js' // Requires Streams | ||
| // Hooks for standards | ||
| import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from '@exodus/bytes/encoding-browser.js' | ||
| ``` | ||
| Same as `@exodus/bytes/encoding.js`, but in browsers instead of polyfilling just uses whatever the | ||
| browser provides, drastically reducing the bundle size (to less than 2 KiB gzipped). | ||
| Under non-browser engines (Node.js, React Native, etc.) a full polyfill is used as those platforms | ||
| do not provide sufficiently complete / non-buggy `TextDecoder` APIs. | ||
| > [!NOTE] | ||
| > Implementations in browsers [have bugs](https://docs.google.com/spreadsheets/d/1pdEefRG6r9fZy61WHGz0TKSt8cO4ISWqlpBN5KntIvQ/edit), | ||
| > but they are fixing them and the expected update window is short.\ | ||
| > If you want to circumvent browser bugs, use full `@exodus/bytes/encoding.js` import. | ||
| ## License | ||
| [MIT](./LICENSE) |
+9
-11
| import { assertUint8 } from './assert.js' | ||
| import { canDecoders, nativeEncoder, isHermes, skipWeb, E_STRING } from './fallback/_utils.js' | ||
| import { canDecoders, nativeEncoder, skipWeb, E_STRING } from './fallback/_utils.js' | ||
| import { encodeAscii, encodeAsciiPrefix, encodeLatin1 } from './fallback/latin1.js' | ||
@@ -69,13 +69,11 @@ import { assertEncoding, encodingDecoder, encodeMap, E_STRICT } from './fallback/single-byte.js' | ||
| if (!isHermes) { | ||
| for (const len3 = len - 3; i < len3; i += 4) { | ||
| const x0 = s.charCodeAt(i), x1 = s.charCodeAt(i + 1), x2 = s.charCodeAt(i + 2), x3 = s.charCodeAt(i + 3) // prettier-ignore | ||
| const c0 = m[x0], c1 = m[x1], c2 = m[x2], c3 = m[x3] // prettier-ignore | ||
| if ((!c0 && x0) || (!c1 && x1) || (!c2 && x2) || (!c3 && x3)) throw new TypeError(E_STRICT) | ||
| for (const len3 = len - 3; i < len3; i += 4) { | ||
| const x0 = s.charCodeAt(i), x1 = s.charCodeAt(i + 1), x2 = s.charCodeAt(i + 2), x3 = s.charCodeAt(i + 3) // prettier-ignore | ||
| const c0 = m[x0], c1 = m[x1], c2 = m[x2], c3 = m[x3] // prettier-ignore | ||
| if ((!c0 && x0) || (!c1 && x1) || (!c2 && x2) || (!c3 && x3)) return null | ||
| x[i] = c0 | ||
| x[i + 1] = c1 | ||
| x[i + 2] = c2 | ||
| x[i + 3] = c3 | ||
| } | ||
| x[i] = c0 | ||
| x[i + 1] = c1 | ||
| x[i + 2] = c2 | ||
| x[i + 3] = c3 | ||
| } | ||
@@ -82,0 +80,0 @@ |
+29
-26
@@ -64,2 +64,28 @@ import { assertUint8 } from './assert.js' | ||
| function encode(s, m) { | ||
| const len = s.length | ||
| let i = 0 | ||
| const b = Buffer.from(s, 'utf-16le') // aligned | ||
| if (!isLE) b.swap16() | ||
| const x = new Uint16Array(b.buffer, b.byteOffset, b.byteLength / 2) | ||
| for (const len3 = len - 3; i < len3; i += 4) { | ||
| const x0 = x[i], x1 = x[i + 1], x2 = x[i + 2], x3 = x[i + 3] // prettier-ignore | ||
| const c0 = m[x0], c1 = m[x1], c2 = m[x2], c3 = m[x3] // prettier-ignore | ||
| if (!(c0 && c1 && c2 && c3) && ((!c0 && x0) || (!c1 && x1) || (!c2 && x2) || (!c3 && x3))) return null // prettier-ignore | ||
| x[i] = c0 | ||
| x[i + 1] = c1 | ||
| x[i + 2] = c2 | ||
| x[i + 3] = c3 | ||
| } | ||
| for (; i < len; i++) { | ||
| const x0 = x[i] | ||
| const c0 = m[x0] | ||
| if (!c0 && x0) return null | ||
| x[i] = c0 | ||
| } | ||
| return new Uint8Array(x) | ||
| } | ||
| export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) { | ||
@@ -86,28 +112,5 @@ // TODO: replacement, truncate (replacement will need varying length) | ||
| const len = s.length | ||
| let i = 0 | ||
| const b = Buffer.from(s, 'utf-16le') // aligned | ||
| if (!isLE) b.swap16() | ||
| const x = new Uint16Array(b.buffer, b.byteOffset, b.byteLength / 2) | ||
| for (const len3 = len - 3; i < len3; i += 4) { | ||
| const x0 = x[i], x1 = x[i + 1], x2 = x[i + 2], x3 = x[i + 3] // prettier-ignore | ||
| const c0 = m[x0], c1 = m[x1], c2 = m[x2], c3 = m[x3] // prettier-ignore | ||
| if (!(c0 && c1 && c2 && c3) && ((!c0 && x0) || (!c1 && x1) || (!c2 && x2) || (!c3 && x3))) { | ||
| throw new TypeError(E_STRICT) | ||
| } | ||
| x[i] = c0 | ||
| x[i + 1] = c1 | ||
| x[i + 2] = c2 | ||
| x[i + 3] = c3 | ||
| } | ||
| for (; i < len; i++) { | ||
| const x0 = x[i] | ||
| const c0 = m[x0] | ||
| if (!c0 && x0) throw new TypeError(E_STRICT) | ||
| x[i] = c0 | ||
| } | ||
| return new Uint8Array(x) | ||
| const res = encode(s, m) | ||
| if (!res) throw new TypeError(E_STRICT) | ||
| return res | ||
| } | ||
@@ -114,0 +117,0 @@ } |
+1
-0
@@ -37,2 +37,3 @@ import * as js from './fallback/utf16.js' | ||
| if (format === 'uint16') return u16 | ||
| /* c8 ignore next */ | ||
| throw new Error('Unreachable') | ||
@@ -39,0 +40,0 @@ } |
+6
-2
@@ -33,7 +33,11 @@ import { isDeno, isLE, E_STRING } from './fallback/_utils.js' | ||
| /* c8 ignore next */ | ||
| throw new Error('Unreachable') | ||
| } | ||
| const swapped = (x, swap) => | ||
| swap ? Buffer.from(x).swap16() : Buffer.from(x.buffer, x.byteOffset, x.byteLength) | ||
| // Convert to Buffer view or a swapped Buffer copy | ||
| const swapped = (x, swap) => { | ||
| const b = Buffer.from(x.buffer, x.byteOffset, x.byteLength) | ||
| return swap ? Buffer.from(b).swap16() : b | ||
| } | ||
@@ -40,0 +44,0 @@ // We skip TextDecoder on Node.js, as it's is somewhy significantly slower than Buffer for utf16 |
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
359246
5.65%55
14.58%5122
10.17%559
5.27%40
11.11%