@stablelib/utf8
Advanced tools
Comparing version 0.7.3 to 0.8.1
@@ -85,2 +85,3 @@ "use strict"; | ||
if (b & 0x80) { | ||
var min = void 0; | ||
if (b < 0xe0) { | ||
@@ -92,3 +93,7 @@ // Need 1 more byte. | ||
var n1 = arr[++i]; | ||
if ((n1 & 0xc0) !== 0x80) { | ||
throw new Error(INVALID_UTF8); | ||
} | ||
b = (b & 0x1f) << 6 | (n1 & 0x3f); | ||
min = 0x80; | ||
} | ||
@@ -102,5 +107,9 @@ else if (b < 0xf0) { | ||
var n2 = arr[++i]; | ||
if ((n1 & 0xc0) !== 0x80 || (n2 & 0xc0) !== 0x80) { | ||
throw new Error(INVALID_UTF8); | ||
} | ||
b = (b & 0x0f) << 12 | (n1 & 0x3f) << 6 | (n2 & 0x3f); | ||
min = 0x800; | ||
} | ||
else { | ||
else if (b < 0xf8) { | ||
// Need 3 more bytes. | ||
@@ -113,7 +122,14 @@ if (i >= arr.length - 2) { | ||
var n3 = arr[++i]; | ||
if ((n1 & 0xc0) !== 0x80 || (n2 & 0xc0) !== 0x80 || (n3 & 0xc0) !== 0x80) { | ||
throw new Error(INVALID_UTF8); | ||
} | ||
b = (b & 0x0f) << 18 | (n1 & 0x3f) << 12 | (n2 & 0x3f) << 6 | (n3 & 0x3f); | ||
min = 0x10000; | ||
} | ||
if (b >= 0xd800 && b <= 0xdfff) { | ||
else { | ||
throw new Error(INVALID_UTF8); | ||
} | ||
if (b < min || (b >= 0xd800 && b <= 0xdfff)) { | ||
throw new Error(INVALID_UTF8); | ||
} | ||
if (b >= 0x10000) { | ||
@@ -120,0 +136,0 @@ // Surrogate pair. |
@@ -5,2 +5,3 @@ "use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
var hex = require("@stablelib/hex"); | ||
var utf8_1 = require("./utf8"); | ||
@@ -22,3 +23,90 @@ describe("utf8", function () { | ||
}); | ||
it("should not decode malformed bytes", function () { | ||
// Source: https://hsivonen.fi/broken-utf-8/test.html | ||
var tests = [ | ||
// Non-shortest forms for lowest single-byte (U+0000) | ||
"C0 80", | ||
"E0 80 80", | ||
"F0 80 80 80", | ||
"F8 80 80 80 80", | ||
"FC 80 80 80 80 80", | ||
// Non-shortest forms for highest single-byte (U+007F) | ||
"C1 BF", | ||
"E0 81 BF", | ||
"F0 80 81 BF", | ||
"F8 80 80 81 BF", | ||
"FC 80 80 80 81 BF", | ||
// Non-shortest forms for lowest two-byte (U+0080) | ||
"E0 82 80", | ||
"F0 80 82 80", | ||
"F8 80 80 82 80", | ||
"FC 80 80 80 82 80", | ||
// Non-shortest forms for highest two-byte (U+07FF) | ||
"E0 9F BF", | ||
"F0 80 9F BF", | ||
"F8 80 80 9F BF", | ||
"FC 80 80 80 9F BF", | ||
// Non-shortest forms for lowest three-byte (U+0800) | ||
"F0 80 A0 80", | ||
"F8 80 80 A0 80", | ||
"FC 80 80 80 A0 80", | ||
// Non-shortest forms for highest three-byte (U+FFFF) | ||
"F0 8F BF BF", | ||
"F8 80 8F BF BF", | ||
"FC 80 80 8F BF BF", | ||
// Non-shortest forms for lowest four-byte (U+10000) | ||
"F8 80 90 80 80", | ||
"FC 80 80 90 80 80", | ||
// Non-shortest forms for last Unicode (U+10FFFF) | ||
"F8 84 8F BF BF", | ||
"FC 80 84 8F BF BF", | ||
// Out of range | ||
"F4 90 80 80", | ||
"FB BF BF BF BF", | ||
"FD BF BF BF BF BF", | ||
"ED A0 80", | ||
"ED BF BF", | ||
"ED A0 BD ED B2 A9", | ||
// Out of range and non-shortest | ||
"F8 84 90 80 80", | ||
"FC 80 84 90 80 80", | ||
"F0 8D A0 80", | ||
"F0 8D BF BF", | ||
"F0 8D A0 BD F0 8D B2 A9", | ||
// Lone trails | ||
"80", | ||
"80 80", | ||
"80 80 80", | ||
"80 80 80 80", | ||
"80 80 80 80 80", | ||
"80 80 80 80 80 80", | ||
"80 80 80 80 80 80 80", | ||
"C2 B6 80", | ||
"E2 98 83 80", | ||
"F0 9F 92 A9 80", | ||
"FB BF BF BF BF 80", | ||
"FD BF BF BF BF BF 80", | ||
// Truncated sequences | ||
"C2", | ||
"E2", | ||
"E2 98", | ||
"F0", | ||
"F0 9F", | ||
"F0 9F 92", | ||
// Leftovers | ||
"FE", | ||
"FE 80", | ||
"FF", | ||
"FF 80" | ||
]; | ||
tests.forEach(function (s, i) { | ||
var b = hex.decode(s.replace(/ /g, "")); | ||
expect(function () { | ||
var x = utf8_1.decode(b); | ||
// The following will only run in case of unsuccessful test: | ||
console.log(i, "should not have decoded", s, "to", x); | ||
}).toThrowError(/invalid/); | ||
}); | ||
}); | ||
}); | ||
//# sourceMappingURL=utf8.test.js.map |
{ | ||
"name": "@stablelib/utf8", | ||
"version": "0.7.3", | ||
"version": "0.8.1", | ||
"description": "UTF-8 encoder and decoder", | ||
@@ -18,4 +18,5 @@ "main": "./lib/utf8.js", | ||
"devDependencies": { | ||
"@stablelib/benchmark": "^0.5.0" | ||
"@stablelib/benchmark": "^0.5.0", | ||
"@stablelib/hex": "^0.5.0" | ||
} | ||
} |
// Copyright (C) 2016 Dmitry Chestnykh | ||
// MIT License. See LICENSE file for details. | ||
import * as hex from "@stablelib/hex"; | ||
import { encode, decode } from "./utf8"; | ||
@@ -21,2 +22,90 @@ | ||
}); | ||
it("should not decode malformed bytes", () => { | ||
// Source: https://hsivonen.fi/broken-utf-8/test.html | ||
const tests = [ | ||
// Non-shortest forms for lowest single-byte (U+0000) | ||
"C0 80", | ||
"E0 80 80", | ||
"F0 80 80 80", | ||
"F8 80 80 80 80", | ||
"FC 80 80 80 80 80", | ||
// Non-shortest forms for highest single-byte (U+007F) | ||
"C1 BF", | ||
"E0 81 BF", | ||
"F0 80 81 BF", | ||
"F8 80 80 81 BF", | ||
"FC 80 80 80 81 BF", | ||
// Non-shortest forms for lowest two-byte (U+0080) | ||
"E0 82 80", | ||
"F0 80 82 80", | ||
"F8 80 80 82 80", | ||
"FC 80 80 80 82 80", | ||
// Non-shortest forms for highest two-byte (U+07FF) | ||
"E0 9F BF", | ||
"F0 80 9F BF", | ||
"F8 80 80 9F BF", | ||
"FC 80 80 80 9F BF", | ||
// Non-shortest forms for lowest three-byte (U+0800) | ||
"F0 80 A0 80", | ||
"F8 80 80 A0 80", | ||
"FC 80 80 80 A0 80", | ||
// Non-shortest forms for highest three-byte (U+FFFF) | ||
"F0 8F BF BF", | ||
"F8 80 8F BF BF", | ||
"FC 80 80 8F BF BF", | ||
// Non-shortest forms for lowest four-byte (U+10000) | ||
"F8 80 90 80 80", | ||
"FC 80 80 90 80 80", | ||
// Non-shortest forms for last Unicode (U+10FFFF) | ||
"F8 84 8F BF BF", | ||
"FC 80 84 8F BF BF", | ||
// Out of range | ||
"F4 90 80 80", | ||
"FB BF BF BF BF", | ||
"FD BF BF BF BF BF", | ||
"ED A0 80", | ||
"ED BF BF", | ||
"ED A0 BD ED B2 A9", | ||
// Out of range and non-shortest | ||
"F8 84 90 80 80", | ||
"FC 80 84 90 80 80", | ||
"F0 8D A0 80", | ||
"F0 8D BF BF", | ||
"F0 8D A0 BD F0 8D B2 A9", | ||
// Lone trails | ||
"80", | ||
"80 80", | ||
"80 80 80", | ||
"80 80 80 80", | ||
"80 80 80 80 80", | ||
"80 80 80 80 80 80", | ||
"80 80 80 80 80 80 80", | ||
"C2 B6 80", | ||
"E2 98 83 80", | ||
"F0 9F 92 A9 80", | ||
"FB BF BF BF BF 80", | ||
"FD BF BF BF BF BF 80", | ||
// Truncated sequences | ||
"C2", | ||
"E2", | ||
"E2 98", | ||
"F0", | ||
"F0 9F", | ||
"F0 9F 92", | ||
// Leftovers | ||
"FE", | ||
"FE 80", | ||
"FF", | ||
"FF 80" | ||
]; | ||
tests.forEach((s, i) => { | ||
const b = hex.decode(s.replace(/ /g, "")); | ||
expect(() => { | ||
const x = decode(b); | ||
// The following will only run in case of unsuccessful test: | ||
console.log(i, "should not have decoded", s, "to", x); | ||
}).toThrowError(/invalid/); | ||
}); | ||
}); | ||
}); |
19
utf8.ts
@@ -81,2 +81,3 @@ // Copyright (C) 2016 Dmitry Chestnykh | ||
if (b & 0x80) { | ||
let min; | ||
if (b < 0xe0) { | ||
@@ -88,3 +89,7 @@ // Need 1 more byte. | ||
const n1 = arr[++i]; | ||
if ((n1 & 0xc0) !== 0x80) { | ||
throw new Error(INVALID_UTF8); | ||
} | ||
b = (b & 0x1f) << 6 | (n1 & 0x3f); | ||
min = 0x80; | ||
} else if (b < 0xf0) { | ||
@@ -97,4 +102,8 @@ // Need 2 more bytes. | ||
const n2 = arr[++i]; | ||
if ((n1 & 0xc0) !== 0x80 || (n2 & 0xc0) !== 0x80) { | ||
throw new Error(INVALID_UTF8); | ||
} | ||
b = (b & 0x0f) << 12 | (n1 & 0x3f) << 6 | (n2 & 0x3f); | ||
} else { | ||
min = 0x800; | ||
} else if (b < 0xf8) { | ||
// Need 3 more bytes. | ||
@@ -107,6 +116,12 @@ if (i >= arr.length - 2) { | ||
const n3 = arr[++i]; | ||
if ((n1 & 0xc0) !== 0x80 || (n2 & 0xc0) !== 0x80 || (n3 & 0xc0) !== 0x80) { | ||
throw new Error(INVALID_UTF8); | ||
} | ||
b = (b & 0x0f) << 18 | (n1 & 0x3f) << 12 | (n2 & 0x3f) << 6 | (n3 & 0x3f); | ||
min = 0x10000; | ||
} else { | ||
throw new Error(INVALID_UTF8); | ||
} | ||
if (b >= 0xd800 && b <= 0xdfff) { | ||
if (b < min || (b >= 0xd800 && b <= 0xdfff)) { | ||
throw new Error(INVALID_UTF8); | ||
@@ -113,0 +128,0 @@ } |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
36919
602
2