@ethersproject/strings
Advanced tools
Comparing version 5.0.0-beta.135 to 5.0.0-beta.136
@@ -1,1 +0,1 @@ | ||
export declare const version = "strings/5.0.0-beta.135"; | ||
export declare const version = "strings/5.0.0-beta.136"; |
@@ -1,1 +0,1 @@ | ||
export const version = "strings/5.0.0-beta.135"; | ||
export const version = "strings/5.0.0-beta.136"; |
import { formatBytes32String, parseBytes32String } from "./bytes32"; | ||
import { nameprep } from "./idna"; | ||
import { _toEscapedUtf8String, toUtf8Bytes, toUtf8CodePoints, toUtf8String, UnicodeNormalizationForm } from "./utf8"; | ||
export { _toEscapedUtf8String, toUtf8Bytes, toUtf8CodePoints, toUtf8String, UnicodeNormalizationForm, formatBytes32String, parseBytes32String, nameprep }; | ||
import { _toEscapedUtf8String, toUtf8Bytes, toUtf8CodePoints, toUtf8String, UnicodeNormalizationForm, Utf8ErrorFunc, Utf8ErrorFuncs, Utf8ErrorReason } from "./utf8"; | ||
export { _toEscapedUtf8String, toUtf8Bytes, toUtf8CodePoints, toUtf8String, Utf8ErrorFunc, Utf8ErrorFuncs, Utf8ErrorReason, UnicodeNormalizationForm, formatBytes32String, parseBytes32String, nameprep }; |
"use strict"; | ||
import { formatBytes32String, parseBytes32String } from "./bytes32"; | ||
import { nameprep } from "./idna"; | ||
import { _toEscapedUtf8String, toUtf8Bytes, toUtf8CodePoints, toUtf8String, UnicodeNormalizationForm } from "./utf8"; | ||
export { _toEscapedUtf8String, toUtf8Bytes, toUtf8CodePoints, toUtf8String, UnicodeNormalizationForm, formatBytes32String, parseBytes32String, nameprep }; | ||
import { _toEscapedUtf8String, toUtf8Bytes, toUtf8CodePoints, toUtf8String, UnicodeNormalizationForm, Utf8ErrorFuncs, Utf8ErrorReason } from "./utf8"; | ||
export { _toEscapedUtf8String, toUtf8Bytes, toUtf8CodePoints, toUtf8String, Utf8ErrorFuncs, Utf8ErrorReason, UnicodeNormalizationForm, formatBytes32String, parseBytes32String, nameprep }; |
@@ -9,6 +9,19 @@ import { BytesLike } from "@ethersproject/bytes"; | ||
} | ||
export declare enum Utf8ErrorReason { | ||
UNEXPECTED_CONTINUE = "unexpected continuation byte", | ||
BAD_PREFIX = "bad codepoint prefix", | ||
OVERRUN = "string overrun", | ||
MISSING_CONTINUE = "missing continuation byte", | ||
OUT_OF_RANGE = "out of UTF-8 range", | ||
UTF16_SURROGATE = "UTF-16 surrogate", | ||
OVERLONG = "overlong representation" | ||
} | ||
export declare type Utf8ErrorFunc = (reason: Utf8ErrorReason, offset: number, bytes: ArrayLike<number>, output: Array<number>, badCodepoint?: number) => number; | ||
export declare const Utf8ErrorFuncs: { | ||
[name: string]: Utf8ErrorFunc; | ||
}; | ||
export declare function toUtf8Bytes(str: string, form?: UnicodeNormalizationForm): Uint8Array; | ||
export declare function _toEscapedUtf8String(bytes: BytesLike, ignoreErrors?: boolean): string; | ||
export declare function _toEscapedUtf8String(bytes: BytesLike, onError?: Utf8ErrorFunc): string; | ||
export declare function _toUtf8String(codePoints: Array<number>): string; | ||
export declare function toUtf8String(bytes: BytesLike, ignoreErrors?: boolean): string; | ||
export declare function toUtf8String(bytes: BytesLike, onError?: Utf8ErrorFunc): string; | ||
export declare function toUtf8CodePoints(str: string, form?: UnicodeNormalizationForm): Array<number>; |
@@ -16,4 +16,75 @@ "use strict"; | ||
; | ||
export var Utf8ErrorReason; | ||
(function (Utf8ErrorReason) { | ||
// A continuation byte was present where there was nothing to continue | ||
// - offset = the index the codepoint began in | ||
Utf8ErrorReason["UNEXPECTED_CONTINUE"] = "unexpected continuation byte"; | ||
// An invalid (non-continuation) byte to start a UTF-8 codepoint was found | ||
// - offset = the index the codepoint began in | ||
Utf8ErrorReason["BAD_PREFIX"] = "bad codepoint prefix"; | ||
// The string is too short to process the expected codepoint | ||
// - offset = the index the codepoint began in | ||
Utf8ErrorReason["OVERRUN"] = "string overrun"; | ||
// A missing continuation byte was expected but not found | ||
// - offset = the index the continuation byte was expected at | ||
Utf8ErrorReason["MISSING_CONTINUE"] = "missing continuation byte"; | ||
// The computed code point is outside the range for UTF-8 | ||
// - offset = start of this codepoint | ||
// - badCodepoint = the computed codepoint; outside the UTF-8 range | ||
Utf8ErrorReason["OUT_OF_RANGE"] = "out of UTF-8 range"; | ||
// UTF-8 strings may not contain UTF-16 surrogate pairs | ||
// - offset = start of this codepoint | ||
// - badCodepoint = the computed codepoint; inside the UTF-16 surrogate range | ||
Utf8ErrorReason["UTF16_SURROGATE"] = "UTF-16 surrogate"; | ||
// The string is an overlong reperesentation | ||
// - offset = start of this codepoint | ||
// - badCodepoint = the computed codepoint; already bounds checked | ||
Utf8ErrorReason["OVERLONG"] = "overlong representation"; | ||
})(Utf8ErrorReason || (Utf8ErrorReason = {})); | ||
; | ||
function errorFunc(reason, offset, bytes, output, badCodepoint) { | ||
return logger.throwArgumentError(`invalid codepoint at offset ${offset}; ${reason}`, "bytes", bytes); | ||
} | ||
function ignoreFunc(reason, offset, bytes, output, badCodepoint) { | ||
// If there is an invalid prefix (including stray continuation), skip any additional continuation bytes | ||
if (reason === Utf8ErrorReason.BAD_PREFIX || reason === Utf8ErrorReason.UNEXPECTED_CONTINUE) { | ||
let i = 0; | ||
for (let o = offset + 1; o < bytes.length; o++) { | ||
if (bytes[o] >> 6 !== 0x02) { | ||
break; | ||
} | ||
i++; | ||
} | ||
return i; | ||
} | ||
// This byte runs us past the end of the string, so just jump to the end | ||
// (but the first byte was read already read and therefore skipped) | ||
if (reason === Utf8ErrorReason.OVERRUN) { | ||
return bytes.length - offset - 1; | ||
} | ||
// Nothing to skip | ||
return 0; | ||
} | ||
function replaceFunc(reason, offset, bytes, output, badCodepoint) { | ||
// Overlong representations are otherwise "valid" code points; just non-deistingtished | ||
if (reason === Utf8ErrorReason.OVERLONG) { | ||
output.push(badCodepoint); | ||
return 0; | ||
} | ||
// Put the replacement character into the output | ||
output.push(0xfffd); | ||
// Otherwise, process as if ignoring errors | ||
return ignoreFunc(reason, offset, bytes, output, badCodepoint); | ||
} | ||
// Common error handing strategies | ||
export const Utf8ErrorFuncs = Object.freeze({ | ||
error: errorFunc, | ||
ignore: ignoreFunc, | ||
replace: replaceFunc | ||
}); | ||
// http://stackoverflow.com/questions/13356493/decode-utf-8-with-javascript#13691499 | ||
function getUtf8CodePoints(bytes, ignoreErrors) { | ||
function getUtf8CodePoints(bytes, onError) { | ||
if (onError == null) { | ||
onError = Utf8ErrorFuncs.error; | ||
} | ||
bytes = arrayify(bytes); | ||
@@ -49,21 +120,13 @@ const result = []; | ||
else { | ||
if (!ignoreErrors) { | ||
if ((c & 0xc0) === 0x80) { | ||
throw new Error("invalid utf8 byte sequence; unexpected continuation byte"); | ||
} | ||
throw new Error("invalid utf8 byte sequence; invalid prefix"); | ||
if ((c & 0xc0) === 0x80) { | ||
i += onError(Utf8ErrorReason.UNEXPECTED_CONTINUE, i - 1, bytes, result); | ||
} | ||
else { | ||
i += onError(Utf8ErrorReason.BAD_PREFIX, i - 1, bytes, result); | ||
} | ||
continue; | ||
} | ||
// Do we have enough bytes in our data? | ||
if (i + extraLength > bytes.length) { | ||
if (!ignoreErrors) { | ||
throw new Error("invalid utf8 byte sequence; too short"); | ||
} | ||
// If there is an invalid unprocessed byte, skip continuation bytes | ||
for (; i < bytes.length; i++) { | ||
if (bytes[i] >> 6 !== 0x02) { | ||
break; | ||
} | ||
} | ||
if (i - 1 + extraLength >= bytes.length) { | ||
i += onError(Utf8ErrorReason.OVERRUN, i - 1, bytes, result); | ||
continue; | ||
@@ -77,2 +140,3 @@ } | ||
if ((nextChar & 0xc0) != 0x80) { | ||
i += onError(Utf8ErrorReason.MISSING_CONTINUE, i, bytes, result); | ||
res = null; | ||
@@ -85,20 +149,9 @@ break; | ||
} | ||
// See above loop for invalid contimuation byte | ||
if (res === null) { | ||
if (!ignoreErrors) { | ||
throw new Error("invalid utf8 byte sequence; invalid continuation byte"); | ||
} | ||
continue; | ||
} | ||
// Check for overlong seuences (more bytes than needed) | ||
if (res <= overlongMask) { | ||
if (!ignoreErrors) { | ||
throw new Error("invalid utf8 byte sequence; overlong"); | ||
} | ||
continue; | ||
} | ||
// Maximum code point | ||
if (res > 0x10ffff) { | ||
if (!ignoreErrors) { | ||
throw new Error("invalid utf8 byte sequence; out-of-range"); | ||
} | ||
i += onError(Utf8ErrorReason.OUT_OF_RANGE, i - 1 - extraLength, bytes, result, res); | ||
continue; | ||
@@ -108,7 +161,10 @@ } | ||
if (res >= 0xd800 && res <= 0xdfff) { | ||
if (!ignoreErrors) { | ||
throw new Error("invalid utf8 byte sequence; utf-16 surrogate"); | ||
} | ||
i += onError(Utf8ErrorReason.UTF16_SURROGATE, i - 1 - extraLength, bytes, result, res); | ||
continue; | ||
} | ||
// Check for overlong sequences (more bytes than needed) | ||
if (res <= overlongMask) { | ||
i += onError(Utf8ErrorReason.OVERLONG, i - 1 - extraLength, bytes, result, res); | ||
continue; | ||
} | ||
result.push(res); | ||
@@ -160,4 +216,4 @@ } | ||
} | ||
export function _toEscapedUtf8String(bytes, ignoreErrors) { | ||
return '"' + getUtf8CodePoints(bytes, ignoreErrors).map((codePoint) => { | ||
export function _toEscapedUtf8String(bytes, onError) { | ||
return '"' + getUtf8CodePoints(bytes, onError).map((codePoint) => { | ||
if (codePoint < 256) { | ||
@@ -192,4 +248,4 @@ switch (codePoint) { | ||
} | ||
export function toUtf8String(bytes, ignoreErrors) { | ||
return _toUtf8String(getUtf8CodePoints(bytes, ignoreErrors)); | ||
export function toUtf8String(bytes, onError) { | ||
return _toUtf8String(getUtf8CodePoints(bytes, onError)); | ||
} | ||
@@ -196,0 +252,0 @@ export function toUtf8CodePoints(str, form = UnicodeNormalizationForm.current) { |
@@ -1,1 +0,1 @@ | ||
export declare const version = "strings/5.0.0-beta.135"; | ||
export declare const version = "strings/5.0.0-beta.136"; |
"use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.version = "strings/5.0.0-beta.135"; | ||
exports.version = "strings/5.0.0-beta.136"; |
import { formatBytes32String, parseBytes32String } from "./bytes32"; | ||
import { nameprep } from "./idna"; | ||
import { _toEscapedUtf8String, toUtf8Bytes, toUtf8CodePoints, toUtf8String, UnicodeNormalizationForm } from "./utf8"; | ||
export { _toEscapedUtf8String, toUtf8Bytes, toUtf8CodePoints, toUtf8String, UnicodeNormalizationForm, formatBytes32String, parseBytes32String, nameprep }; | ||
import { _toEscapedUtf8String, toUtf8Bytes, toUtf8CodePoints, toUtf8String, UnicodeNormalizationForm, Utf8ErrorFunc, Utf8ErrorFuncs, Utf8ErrorReason } from "./utf8"; | ||
export { _toEscapedUtf8String, toUtf8Bytes, toUtf8CodePoints, toUtf8String, Utf8ErrorFunc, Utf8ErrorFuncs, Utf8ErrorReason, UnicodeNormalizationForm, formatBytes32String, parseBytes32String, nameprep }; |
@@ -14,1 +14,3 @@ "use strict"; | ||
exports.UnicodeNormalizationForm = utf8_1.UnicodeNormalizationForm; | ||
exports.Utf8ErrorFuncs = utf8_1.Utf8ErrorFuncs; | ||
exports.Utf8ErrorReason = utf8_1.Utf8ErrorReason; |
@@ -9,6 +9,19 @@ import { BytesLike } from "@ethersproject/bytes"; | ||
} | ||
export declare enum Utf8ErrorReason { | ||
UNEXPECTED_CONTINUE = "unexpected continuation byte", | ||
BAD_PREFIX = "bad codepoint prefix", | ||
OVERRUN = "string overrun", | ||
MISSING_CONTINUE = "missing continuation byte", | ||
OUT_OF_RANGE = "out of UTF-8 range", | ||
UTF16_SURROGATE = "UTF-16 surrogate", | ||
OVERLONG = "overlong representation" | ||
} | ||
export declare type Utf8ErrorFunc = (reason: Utf8ErrorReason, offset: number, bytes: ArrayLike<number>, output: Array<number>, badCodepoint?: number) => number; | ||
export declare const Utf8ErrorFuncs: { | ||
[name: string]: Utf8ErrorFunc; | ||
}; | ||
export declare function toUtf8Bytes(str: string, form?: UnicodeNormalizationForm): Uint8Array; | ||
export declare function _toEscapedUtf8String(bytes: BytesLike, ignoreErrors?: boolean): string; | ||
export declare function _toEscapedUtf8String(bytes: BytesLike, onError?: Utf8ErrorFunc): string; | ||
export declare function _toUtf8String(codePoints: Array<number>): string; | ||
export declare function toUtf8String(bytes: BytesLike, ignoreErrors?: boolean): string; | ||
export declare function toUtf8String(bytes: BytesLike, onError?: Utf8ErrorFunc): string; | ||
export declare function toUtf8CodePoints(str: string, form?: UnicodeNormalizationForm): Array<number>; |
128
lib/utf8.js
@@ -17,4 +17,75 @@ "use strict"; | ||
; | ||
var Utf8ErrorReason; | ||
(function (Utf8ErrorReason) { | ||
// A continuation byte was present where there was nothing to continue | ||
// - offset = the index the codepoint began in | ||
Utf8ErrorReason["UNEXPECTED_CONTINUE"] = "unexpected continuation byte"; | ||
// An invalid (non-continuation) byte to start a UTF-8 codepoint was found | ||
// - offset = the index the codepoint began in | ||
Utf8ErrorReason["BAD_PREFIX"] = "bad codepoint prefix"; | ||
// The string is too short to process the expected codepoint | ||
// - offset = the index the codepoint began in | ||
Utf8ErrorReason["OVERRUN"] = "string overrun"; | ||
// A missing continuation byte was expected but not found | ||
// - offset = the index the continuation byte was expected at | ||
Utf8ErrorReason["MISSING_CONTINUE"] = "missing continuation byte"; | ||
// The computed code point is outside the range for UTF-8 | ||
// - offset = start of this codepoint | ||
// - badCodepoint = the computed codepoint; outside the UTF-8 range | ||
Utf8ErrorReason["OUT_OF_RANGE"] = "out of UTF-8 range"; | ||
// UTF-8 strings may not contain UTF-16 surrogate pairs | ||
// - offset = start of this codepoint | ||
// - badCodepoint = the computed codepoint; inside the UTF-16 surrogate range | ||
Utf8ErrorReason["UTF16_SURROGATE"] = "UTF-16 surrogate"; | ||
// The string is an overlong reperesentation | ||
// - offset = start of this codepoint | ||
// - badCodepoint = the computed codepoint; already bounds checked | ||
Utf8ErrorReason["OVERLONG"] = "overlong representation"; | ||
})(Utf8ErrorReason = exports.Utf8ErrorReason || (exports.Utf8ErrorReason = {})); | ||
; | ||
function errorFunc(reason, offset, bytes, output, badCodepoint) { | ||
return logger.throwArgumentError("invalid codepoint at offset " + offset + "; " + reason, "bytes", bytes); | ||
} | ||
function ignoreFunc(reason, offset, bytes, output, badCodepoint) { | ||
// If there is an invalid prefix (including stray continuation), skip any additional continuation bytes | ||
if (reason === Utf8ErrorReason.BAD_PREFIX || reason === Utf8ErrorReason.UNEXPECTED_CONTINUE) { | ||
var i = 0; | ||
for (var o = offset + 1; o < bytes.length; o++) { | ||
if (bytes[o] >> 6 !== 0x02) { | ||
break; | ||
} | ||
i++; | ||
} | ||
return i; | ||
} | ||
// This byte runs us past the end of the string, so just jump to the end | ||
// (but the first byte was read already read and therefore skipped) | ||
if (reason === Utf8ErrorReason.OVERRUN) { | ||
return bytes.length - offset - 1; | ||
} | ||
// Nothing to skip | ||
return 0; | ||
} | ||
function replaceFunc(reason, offset, bytes, output, badCodepoint) { | ||
// Overlong representations are otherwise "valid" code points; just non-deistingtished | ||
if (reason === Utf8ErrorReason.OVERLONG) { | ||
output.push(badCodepoint); | ||
return 0; | ||
} | ||
// Put the replacement character into the output | ||
output.push(0xfffd); | ||
// Otherwise, process as if ignoring errors | ||
return ignoreFunc(reason, offset, bytes, output, badCodepoint); | ||
} | ||
// Common error handing strategies | ||
exports.Utf8ErrorFuncs = Object.freeze({ | ||
error: errorFunc, | ||
ignore: ignoreFunc, | ||
replace: replaceFunc | ||
}); | ||
// http://stackoverflow.com/questions/13356493/decode-utf-8-with-javascript#13691499 | ||
function getUtf8CodePoints(bytes, ignoreErrors) { | ||
function getUtf8CodePoints(bytes, onError) { | ||
if (onError == null) { | ||
onError = exports.Utf8ErrorFuncs.error; | ||
} | ||
bytes = bytes_1.arrayify(bytes); | ||
@@ -50,21 +121,13 @@ var result = []; | ||
else { | ||
if (!ignoreErrors) { | ||
if ((c & 0xc0) === 0x80) { | ||
throw new Error("invalid utf8 byte sequence; unexpected continuation byte"); | ||
} | ||
throw new Error("invalid utf8 byte sequence; invalid prefix"); | ||
if ((c & 0xc0) === 0x80) { | ||
i += onError(Utf8ErrorReason.UNEXPECTED_CONTINUE, i - 1, bytes, result); | ||
} | ||
else { | ||
i += onError(Utf8ErrorReason.BAD_PREFIX, i - 1, bytes, result); | ||
} | ||
continue; | ||
} | ||
// Do we have enough bytes in our data? | ||
if (i + extraLength > bytes.length) { | ||
if (!ignoreErrors) { | ||
throw new Error("invalid utf8 byte sequence; too short"); | ||
} | ||
// If there is an invalid unprocessed byte, skip continuation bytes | ||
for (; i < bytes.length; i++) { | ||
if (bytes[i] >> 6 !== 0x02) { | ||
break; | ||
} | ||
} | ||
if (i - 1 + extraLength >= bytes.length) { | ||
i += onError(Utf8ErrorReason.OVERRUN, i - 1, bytes, result); | ||
continue; | ||
@@ -78,2 +141,3 @@ } | ||
if ((nextChar & 0xc0) != 0x80) { | ||
i += onError(Utf8ErrorReason.MISSING_CONTINUE, i, bytes, result); | ||
res = null; | ||
@@ -86,20 +150,9 @@ break; | ||
} | ||
// See above loop for invalid contimuation byte | ||
if (res === null) { | ||
if (!ignoreErrors) { | ||
throw new Error("invalid utf8 byte sequence; invalid continuation byte"); | ||
} | ||
continue; | ||
} | ||
// Check for overlong seuences (more bytes than needed) | ||
if (res <= overlongMask) { | ||
if (!ignoreErrors) { | ||
throw new Error("invalid utf8 byte sequence; overlong"); | ||
} | ||
continue; | ||
} | ||
// Maximum code point | ||
if (res > 0x10ffff) { | ||
if (!ignoreErrors) { | ||
throw new Error("invalid utf8 byte sequence; out-of-range"); | ||
} | ||
i += onError(Utf8ErrorReason.OUT_OF_RANGE, i - 1 - extraLength, bytes, result, res); | ||
continue; | ||
@@ -109,7 +162,10 @@ } | ||
if (res >= 0xd800 && res <= 0xdfff) { | ||
if (!ignoreErrors) { | ||
throw new Error("invalid utf8 byte sequence; utf-16 surrogate"); | ||
} | ||
i += onError(Utf8ErrorReason.UTF16_SURROGATE, i - 1 - extraLength, bytes, result, res); | ||
continue; | ||
} | ||
// Check for overlong sequences (more bytes than needed) | ||
if (res <= overlongMask) { | ||
i += onError(Utf8ErrorReason.OVERLONG, i - 1 - extraLength, bytes, result, res); | ||
continue; | ||
} | ||
result.push(res); | ||
@@ -163,4 +219,4 @@ } | ||
} | ||
function _toEscapedUtf8String(bytes, ignoreErrors) { | ||
return '"' + getUtf8CodePoints(bytes, ignoreErrors).map(function (codePoint) { | ||
function _toEscapedUtf8String(bytes, onError) { | ||
return '"' + getUtf8CodePoints(bytes, onError).map(function (codePoint) { | ||
if (codePoint < 256) { | ||
@@ -197,4 +253,4 @@ switch (codePoint) { | ||
exports._toUtf8String = _toUtf8String; | ||
function toUtf8String(bytes, ignoreErrors) { | ||
return _toUtf8String(getUtf8CodePoints(bytes, ignoreErrors)); | ||
function toUtf8String(bytes, onError) { | ||
return _toUtf8String(getUtf8CodePoints(bytes, onError)); | ||
} | ||
@@ -201,0 +257,0 @@ exports.toUtf8String = toUtf8String; |
@@ -30,5 +30,5 @@ { | ||
}, | ||
"tarballHash": "0xe3b6ca6f029aaab94bbde1ad0be3203bfd6ea9be004cb8809d8c43c54086c787", | ||
"tarballHash": "0xfe5085858f0eefb043baba37c2618e003a0cb6c08c866a861709c8e4df4a4f1c", | ||
"types": "./lib/index.d.ts", | ||
"version": "5.0.0-beta.135" | ||
"version": "5.0.0-beta.136" | ||
} |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
51164
1076