@@ -67,2 +67,4 @@ export declare enum DecodingMode { | ||
| private decodeMode; | ||
| /** The number of characters that have been consumed in the current run. */ | ||
| private runConsumed; | ||
| /** Resets the instance to make it reusable. */ | ||
@@ -69,0 +71,0 @@ startEntity(decodeMode: DecodingMode): void; |
+29
-29
@@ -108,2 +108,4 @@ "use strict"; | ||
| this.decodeMode = DecodingMode.Strict; | ||
| /** The number of characters that have been consumed in the current run. */ | ||
| this.runConsumed = 0; | ||
| } | ||
@@ -118,2 +120,3 @@ /** Resets the instance to make it reusable. */ | ||
| this.consumed = 1; | ||
| this.runConsumed = 0; | ||
| } | ||
@@ -281,21 +284,26 @@ /** | ||
| const runLength = (current & bin_trie_flags_js_1.BinTrieFlags.BRANCH_LENGTH) >> 7; /* 2..63 */ | ||
| const firstChar = current & bin_trie_flags_js_1.BinTrieFlags.JUMP_TABLE; | ||
| // Fast-fail if we don't have enough remaining input for the full run (incomplete entity) | ||
| if (offset + runLength > input.length) | ||
| return -1; | ||
| // Verify first char | ||
| if (input.charCodeAt(offset) !== firstChar) { | ||
| return this.result === 0 | ||
| ? 0 | ||
| : this.emitNotTerminatedNamedEntity(); | ||
| // If we are starting a run, check the first char. | ||
| if (this.runConsumed === 0) { | ||
| const firstChar = current & bin_trie_flags_js_1.BinTrieFlags.JUMP_TABLE; | ||
| if (input.charCodeAt(offset) !== firstChar) { | ||
| return this.result === 0 | ||
| ? 0 | ||
| : this.emitNotTerminatedNamedEntity(); | ||
| } | ||
| offset++; | ||
| this.excess++; | ||
| this.runConsumed++; | ||
| } | ||
| offset++; | ||
| this.excess++; | ||
| // Remaining characters after the first | ||
| const remaining = runLength - 1; | ||
| // Iterate over packed 2-char words | ||
| for (let runPos = 1; runPos < runLength; runPos += 2) { | ||
| const packedWord = decodeTree[this.treeIndex + 1 + ((runPos - 1) >> 1)]; | ||
| const low = packedWord & 0xff; | ||
| if (input.charCodeAt(offset) !== low) { | ||
| // Check remaining characters in the run. | ||
| while (this.runConsumed < runLength) { | ||
| if (offset >= input.length) { | ||
| return -1; | ||
| } | ||
| const charIndexInPacked = this.runConsumed - 1; | ||
| const packedWord = decodeTree[this.treeIndex + 1 + (charIndexInPacked >> 1)]; | ||
| const expectedChar = charIndexInPacked % 2 === 0 | ||
| ? packedWord & 0xff | ||
| : (packedWord >> 8) & 0xff; | ||
| if (input.charCodeAt(offset) !== expectedChar) { | ||
| this.runConsumed = 0; | ||
| return this.result === 0 | ||
@@ -307,14 +315,6 @@ ? 0 | ||
| this.excess++; | ||
| const high = (packedWord >> 8) & 0xff; | ||
| if (runPos + 1 < runLength) { | ||
| if (input.charCodeAt(offset) !== high) { | ||
| return this.result === 0 | ||
| ? 0 | ||
| : this.emitNotTerminatedNamedEntity(); | ||
| } | ||
| offset++; | ||
| this.excess++; | ||
| } | ||
| this.runConsumed++; | ||
| } | ||
| this.treeIndex += 1 + ((remaining + 1) >> 1); | ||
| this.runConsumed = 0; | ||
| this.treeIndex += 1 + (runLength >> 1); | ||
| current = decodeTree[this.treeIndex]; | ||
@@ -321,0 +321,0 @@ valueLength = (current & bin_trie_flags_js_1.BinTrieFlags.VALUE_LENGTH) >> 14; |
@@ -67,2 +67,4 @@ export declare enum DecodingMode { | ||
| private decodeMode; | ||
| /** The number of characters that have been consumed in the current run. */ | ||
| private runConsumed; | ||
| /** Resets the instance to make it reusable. */ | ||
@@ -69,0 +71,0 @@ startEntity(decodeMode: DecodingMode): void; |
+29
-29
@@ -100,2 +100,4 @@ import { fromCodePoint, replaceCodePoint } from "./decode-codepoint.js"; | ||
| this.decodeMode = DecodingMode.Strict; | ||
| /** The number of characters that have been consumed in the current run. */ | ||
| this.runConsumed = 0; | ||
| } | ||
@@ -110,2 +112,3 @@ /** Resets the instance to make it reusable. */ | ||
| this.consumed = 1; | ||
| this.runConsumed = 0; | ||
| } | ||
@@ -273,21 +276,26 @@ /** | ||
| const runLength = (current & BinTrieFlags.BRANCH_LENGTH) >> 7; /* 2..63 */ | ||
| const firstChar = current & BinTrieFlags.JUMP_TABLE; | ||
| // Fast-fail if we don't have enough remaining input for the full run (incomplete entity) | ||
| if (offset + runLength > input.length) | ||
| return -1; | ||
| // Verify first char | ||
| if (input.charCodeAt(offset) !== firstChar) { | ||
| return this.result === 0 | ||
| ? 0 | ||
| : this.emitNotTerminatedNamedEntity(); | ||
| // If we are starting a run, check the first char. | ||
| if (this.runConsumed === 0) { | ||
| const firstChar = current & BinTrieFlags.JUMP_TABLE; | ||
| if (input.charCodeAt(offset) !== firstChar) { | ||
| return this.result === 0 | ||
| ? 0 | ||
| : this.emitNotTerminatedNamedEntity(); | ||
| } | ||
| offset++; | ||
| this.excess++; | ||
| this.runConsumed++; | ||
| } | ||
| offset++; | ||
| this.excess++; | ||
| // Remaining characters after the first | ||
| const remaining = runLength - 1; | ||
| // Iterate over packed 2-char words | ||
| for (let runPos = 1; runPos < runLength; runPos += 2) { | ||
| const packedWord = decodeTree[this.treeIndex + 1 + ((runPos - 1) >> 1)]; | ||
| const low = packedWord & 0xff; | ||
| if (input.charCodeAt(offset) !== low) { | ||
| // Check remaining characters in the run. | ||
| while (this.runConsumed < runLength) { | ||
| if (offset >= input.length) { | ||
| return -1; | ||
| } | ||
| const charIndexInPacked = this.runConsumed - 1; | ||
| const packedWord = decodeTree[this.treeIndex + 1 + (charIndexInPacked >> 1)]; | ||
| const expectedChar = charIndexInPacked % 2 === 0 | ||
| ? packedWord & 0xff | ||
| : (packedWord >> 8) & 0xff; | ||
| if (input.charCodeAt(offset) !== expectedChar) { | ||
| this.runConsumed = 0; | ||
| return this.result === 0 | ||
@@ -299,14 +307,6 @@ ? 0 | ||
| this.excess++; | ||
| const high = (packedWord >> 8) & 0xff; | ||
| if (runPos + 1 < runLength) { | ||
| if (input.charCodeAt(offset) !== high) { | ||
| return this.result === 0 | ||
| ? 0 | ||
| : this.emitNotTerminatedNamedEntity(); | ||
| } | ||
| offset++; | ||
| this.excess++; | ||
| } | ||
| this.runConsumed++; | ||
| } | ||
| this.treeIndex += 1 + ((remaining + 1) >> 1); | ||
| this.runConsumed = 0; | ||
| this.treeIndex += 1 + (runLength >> 1); | ||
| current = decodeTree[this.treeIndex]; | ||
@@ -313,0 +313,0 @@ valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14; |
+18
-11
| { | ||
| "name": "entities", | ||
| "version": "7.0.0", | ||
| "version": "7.0.1", | ||
| "description": "Encode & decode XML and HTML entities with ease & speed", | ||
@@ -64,3 +64,4 @@ "keywords": [ | ||
| "dist", | ||
| "src" | ||
| "src", | ||
| "!**/*.spec.ts" | ||
| ], | ||
@@ -71,2 +72,3 @@ "scripts": { | ||
| "build:trie": "node --import=tsx scripts/write-decode-map.ts", | ||
| "benchmark": "node --import=tsx scripts/benchmark.ts", | ||
| "format": "npm run format:es && npm run format:biome", | ||
@@ -84,15 +86,20 @@ "format:es": "npm run lint:es -- --fix", | ||
| "devDependencies": { | ||
| "@biomejs/biome": "^2.2.3", | ||
| "@types/node": "^24.3.1", | ||
| "@typescript-eslint/eslint-plugin": "^8.42.0", | ||
| "@typescript-eslint/parser": "^8.33.1", | ||
| "@biomejs/biome": "^2.3.11", | ||
| "@types/node": "^25.0.9", | ||
| "@typescript-eslint/eslint-plugin": "^8.53.1", | ||
| "@typescript-eslint/parser": "^8.53.1", | ||
| "@vitest/coverage-v8": "^3.2.4", | ||
| "@types/he": "^1.2.3", | ||
| "eslint": "^8.57.1", | ||
| "eslint-config-biome": "^2.1.3", | ||
| "eslint-plugin-n": "^17.21.3", | ||
| "eslint-plugin-n": "^17.23.2", | ||
| "eslint-plugin-unicorn": "^56.0.1", | ||
| "tshy": "^3.0.2", | ||
| "tsx": "^4.20.5", | ||
| "typedoc": "^0.28.12", | ||
| "typescript": "^5.9.2", | ||
| "he": "^1.2.0", | ||
| "html-entities": "^2.6.0", | ||
| "parse-entities": "^4.0.2", | ||
| "tinybench": "^5.1.0", | ||
| "tshy": "^3.1.0", | ||
| "tsx": "^4.21.0", | ||
| "typedoc": "^0.28.16", | ||
| "typescript": "^5.9.3", | ||
| "vitest": "^3.2.4" | ||
@@ -99,0 +106,0 @@ }, |
+32
-11
@@ -13,3 +13,3 @@ # entities [](https://npmjs.org/package/entities) [](https://npmjs.org/package/entities) [](https://github.com/fb55/entities/actions/workflows/nodejs-test.yml) | ||
| - ⚡️ Fast: `entities` is the fastest library for decoding HTML entities (as of | ||
| April 2022); see [performance](#performance). | ||
| September 2025); see [performance](#performance). | ||
| - 🎛 Configurable: Get an output tailored for your needs. You are fine with | ||
@@ -42,12 +42,33 @@ UTF8? That'll save you some bytes. Prefer to only have ASCII characters? We | ||
| This is how `entities` compares to other libraries on a very basic benchmark | ||
| (see `scripts/benchmark.ts`, for 10,000,000 iterations; **lower is better**): | ||
| Benchmarked in September 2025 with Node v24.6.0 on Apple M2 using `tinybench`. | ||
| Higher ops/s is better; `avg (μs)` is the mean time per operation. | ||
| See `scripts/benchmark.ts` to reproduce. | ||
| | Library | Version | `decode` perf | `encode` perf | `escape` perf | | ||
| | -------------- | ------- | ------------- | ------------- | ------------- | | ||
| | entities | `3.0.1` | 1.418s | 6.786s | 2.196s | | ||
| | html-entities | `2.3.2` | 2.530s | 6.829s | 2.415s | | ||
| | he | `1.2.0` | 5.800s | 24.237s | 3.624s | | ||
| | parse-entities | `3.0.0` | 9.660s | N/A | N/A | | ||
| ### Decoding | ||
| | Library | Version | ops/s | avg (μs) | ±% | slower | | ||
| | -------------- | ------- | --------- | -------- | ---- | ------ | | ||
| | entities | 7.0.0 | 5,838,416 | 175.57 | 0.06 | — | | ||
| | html-entities | 2.6.0 | 2,919,637 | 347.77 | 0.33 | 50.0% | | ||
| | he | 1.2.0 | 2,318,438 | 446.48 | 0.70 | 60.3% | | ||
| | parse-entities | 4.0.2 | 852,855 | 1,199.51 | 0.36 | 85.4% | | ||
| ### Encoding | ||
| | Library | Version | ops/s | avg (μs) | ±% | slower | | ||
| | -------------- | ------- | --------- | -------- | ---- | ------ | | ||
| | entities | 7.0.0 | 2,770,115 | 368.09 | 0.11 | — | | ||
| | html-entities | 2.6.0 | 1,491,963 | 679.96 | 0.58 | 46.2% | | ||
| | he | 1.2.0 | 481,278 | 2,118.25 | 0.61 | 82.6% | | ||
| ### Escaping | ||
| | Library | Version | ops/s | avg (μs) | ±% | slower | | ||
| | -------------- | ------- | --------- | -------- | ---- | ------ | | ||
| | entities | 7.0.0 | 4,616,468 | 223.84 | 0.17 | — | | ||
| | he | 1.2.0 | 3,659,301 | 280.76 | 0.58 | 20.7% | | ||
| | html-entities | 2.6.0 | 3,555,301 | 296.63 | 0.84 | 23.0% | | ||
| Note: Micro-benchmarks may vary across machines and Node versions. | ||
| --- | ||
@@ -73,4 +94,4 @@ | ||
| As of April 2022, `entities` is a bit faster than other modules. Still, this is | ||
| not a very differentiated space and other modules can catch up. | ||
| As of September 2025, `entities` is faster than other modules. Still, this is | ||
| not a differentiated space and other modules can catch up. | ||
@@ -77,0 +98,0 @@ **More importantly**, you might already have `entities` in your dependency graph |
+37
-28
@@ -121,2 +121,4 @@ import { fromCodePoint, replaceCodePoint } from "./decode-codepoint.js"; | ||
| private decodeMode = DecodingMode.Strict; | ||
| /** The number of characters that have been consumed in the current run. */ | ||
| private runConsumed = 0; | ||
@@ -131,2 +133,3 @@ /** Resets the instance to make it reusable. */ | ||
| this.consumed = 1; | ||
| this.runConsumed = 0; | ||
| } | ||
@@ -312,21 +315,34 @@ | ||
| (current & BinTrieFlags.BRANCH_LENGTH) >> 7; /* 2..63 */ | ||
| const firstChar = current & BinTrieFlags.JUMP_TABLE; | ||
| // Fast-fail if we don't have enough remaining input for the full run (incomplete entity) | ||
| if (offset + runLength > input.length) return -1; | ||
| // Verify first char | ||
| if (input.charCodeAt(offset) !== firstChar) { | ||
| return this.result === 0 | ||
| ? 0 | ||
| : this.emitNotTerminatedNamedEntity(); | ||
| // If we are starting a run, check the first char. | ||
| if (this.runConsumed === 0) { | ||
| const firstChar = current & BinTrieFlags.JUMP_TABLE; | ||
| if (input.charCodeAt(offset) !== firstChar) { | ||
| return this.result === 0 | ||
| ? 0 | ||
| : this.emitNotTerminatedNamedEntity(); | ||
| } | ||
| offset++; | ||
| this.excess++; | ||
| this.runConsumed++; | ||
| } | ||
| offset++; | ||
| this.excess++; | ||
| // Remaining characters after the first | ||
| const remaining = runLength - 1; | ||
| // Iterate over packed 2-char words | ||
| for (let runPos = 1; runPos < runLength; runPos += 2) { | ||
| // Check remaining characters in the run. | ||
| while (this.runConsumed < runLength) { | ||
| if (offset >= input.length) { | ||
| return -1; | ||
| } | ||
| const charIndexInPacked = this.runConsumed - 1; | ||
| const packedWord = | ||
| decodeTree[this.treeIndex + 1 + ((runPos - 1) >> 1)]; | ||
| const low = packedWord & 0xff; | ||
| if (input.charCodeAt(offset) !== low) { | ||
| decodeTree[ | ||
| this.treeIndex + 1 + (charIndexInPacked >> 1) | ||
| ]; | ||
| const expectedChar = | ||
| charIndexInPacked % 2 === 0 | ||
| ? packedWord & 0xff | ||
| : (packedWord >> 8) & 0xff; | ||
| if (input.charCodeAt(offset) !== expectedChar) { | ||
| this.runConsumed = 0; | ||
| return this.result === 0 | ||
@@ -338,14 +354,7 @@ ? 0 | ||
| this.excess++; | ||
| const high = (packedWord >> 8) & 0xff; | ||
| if (runPos + 1 < runLength) { | ||
| if (input.charCodeAt(offset) !== high) { | ||
| return this.result === 0 | ||
| ? 0 | ||
| : this.emitNotTerminatedNamedEntity(); | ||
| } | ||
| offset++; | ||
| this.excess++; | ||
| } | ||
| this.runConsumed++; | ||
| } | ||
| this.treeIndex += 1 + ((remaining + 1) >> 1); | ||
| this.runConsumed = 0; | ||
| this.treeIndex += 1 + (runLength >> 1); | ||
| current = decodeTree[this.treeIndex]; | ||
@@ -352,0 +361,0 @@ valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14; |
| import { describe, expect, it, vitest } from "vitest"; | ||
| import * as entities from "./decode.js"; | ||
| describe("Decode test", () => { | ||
| const testcases = [ | ||
| { input: "&amp;", output: "&" }, | ||
| { input: "&#38;", output: "&" }, | ||
| { input: "&#x26;", output: "&" }, | ||
| { input: "&#X26;", output: "&" }, | ||
| { input: "&#38;", output: "&" }, | ||
| { input: "&#38;", output: "&" }, | ||
| { input: "&#38;", output: "&" }, | ||
| { input: ":", output: ":" }, | ||
| { input: ":", output: ":" }, | ||
| { input: ":", output: ":" }, | ||
| { input: ":", output: ":" }, | ||
| { input: "&#", output: "&#" }, | ||
| { input: "&>", output: "&>" }, | ||
| { input: "id=770&#anchor", output: "id=770&#anchor" }, | ||
| ]; | ||
| for (const { input, output } of testcases) { | ||
| it(`should XML decode ${input}`, () => | ||
| expect(entities.decodeXML(input)).toBe(output)); | ||
| it(`should HTML decode ${input}`, () => | ||
| expect(entities.decodeHTML(input)).toBe(output)); | ||
| } | ||
| it("should HTML decode partial legacy entity", () => { | ||
| expect(entities.decodeHTMLStrict("×bar")).toBe("×bar"); | ||
| expect(entities.decodeHTML("×bar")).toBe("×bar"); | ||
| }); | ||
| it("should HTML decode legacy entities according to spec", () => | ||
| expect(entities.decodeHTML("?&image_uri=1&ℑ=2&image=3")).toBe( | ||
| "?&image_uri=1&ℑ=2&image=3", | ||
| )); | ||
| it("should back out of legacy entities", () => | ||
| expect(entities.decodeHTML("&a")).toBe("&a")); | ||
| it("should not parse numeric entities in strict mode", () => | ||
| expect(entities.decodeHTMLStrict("7")).toBe("7")); | ||
| it("should parse   followed by < (#852)", () => | ||
| expect(entities.decodeHTML(" <")).toBe("\u00A0<")); | ||
| it("should decode trailing legacy entities", () => { | ||
| expect(entities.decodeHTML("⨱×bar")).toBe("⨱×bar"); | ||
| }); | ||
| it("should decode multi-byte entities", () => { | ||
| expect(entities.decodeHTML("≧̸")).toBe("≧̸"); | ||
| }); | ||
| it("should not decode legacy entities followed by text in attribute mode", () => { | ||
| expect( | ||
| entities.decodeHTML("¬", entities.DecodingMode.Attribute), | ||
| ).toBe("¬"); | ||
| expect( | ||
| entities.decodeHTML("¬i", entities.DecodingMode.Attribute), | ||
| ).toBe("¬i"); | ||
| expect( | ||
| entities.decodeHTML("¬=", entities.DecodingMode.Attribute), | ||
| ).toBe("¬="); | ||
| expect(entities.decodeHTMLAttribute("¬p")).toBe("¬p"); | ||
| expect(entities.decodeHTMLAttribute("¬P")).toBe("¬P"); | ||
| expect(entities.decodeHTMLAttribute("¬3")).toBe("¬3"); | ||
| }); | ||
| }); | ||
| describe("EntityDecoder", () => { | ||
| it("should decode decimal entities", () => { | ||
| const callback = vitest.fn(); | ||
| const decoder = new entities.EntityDecoder( | ||
| entities.htmlDecodeTree, | ||
| callback, | ||
| ); | ||
| expect(decoder.write("", 1)).toBe(-1); | ||
| expect(decoder.write("8;", 0)).toBe(5); | ||
| expect(callback).toHaveBeenCalledTimes(1); | ||
| expect(callback).toHaveBeenCalledWith(":".charCodeAt(0), 5); | ||
| }); | ||
| it("should decode hex entities", () => { | ||
| const callback = vitest.fn(); | ||
| const decoder = new entities.EntityDecoder( | ||
| entities.htmlDecodeTree, | ||
| callback, | ||
| ); | ||
| expect(decoder.write(":", 1)).toBe(6); | ||
| expect(callback).toHaveBeenCalledTimes(1); | ||
| expect(callback).toHaveBeenCalledWith(":".charCodeAt(0), 6); | ||
| }); | ||
| it("should decode named entities", () => { | ||
| const callback = vitest.fn(); | ||
| const decoder = new entities.EntityDecoder( | ||
| entities.htmlDecodeTree, | ||
| callback, | ||
| ); | ||
| expect(decoder.write("&", 1)).toBe(5); | ||
| expect(callback).toHaveBeenCalledTimes(1); | ||
| expect(callback).toHaveBeenCalledWith("&".charCodeAt(0), 5); | ||
| }); | ||
| it("should decode legacy entities", () => { | ||
| const callback = vitest.fn(); | ||
| const decoder = new entities.EntityDecoder( | ||
| entities.htmlDecodeTree, | ||
| callback, | ||
| ); | ||
| decoder.startEntity(entities.DecodingMode.Legacy); | ||
| expect(decoder.write("&", 1)).toBe(-1); | ||
| expect(callback).toHaveBeenCalledTimes(0); | ||
| expect(decoder.end()).toBe(4); | ||
| expect(callback).toHaveBeenCalledTimes(1); | ||
| expect(callback).toHaveBeenCalledWith("&".charCodeAt(0), 4); | ||
| }); | ||
| it("should decode named entity written character by character", () => { | ||
| const callback = vitest.fn(); | ||
| const decoder = new entities.EntityDecoder( | ||
| entities.htmlDecodeTree, | ||
| callback, | ||
| ); | ||
| for (const c of "amp") { | ||
| expect(decoder.write(c, 0)).toBe(-1); | ||
| } | ||
| expect(decoder.write(";", 0)).toBe(5); | ||
| expect(callback).toHaveBeenCalledTimes(1); | ||
| expect(callback).toHaveBeenCalledWith("&".charCodeAt(0), 5); | ||
| }); | ||
| it("should decode numeric entity written character by character", () => { | ||
| const callback = vitest.fn(); | ||
| const decoder = new entities.EntityDecoder( | ||
| entities.htmlDecodeTree, | ||
| callback, | ||
| ); | ||
| for (const c of "#x3a") { | ||
| expect(decoder.write(c, 0)).toBe(-1); | ||
| } | ||
| expect(decoder.write(";", 0)).toBe(6); | ||
| expect(callback).toHaveBeenCalledTimes(1); | ||
| expect(callback).toHaveBeenCalledWith(":".charCodeAt(0), 6); | ||
| }); | ||
| it("should decode hex entities across several chunks", () => { | ||
| const callback = vitest.fn(); | ||
| const decoder = new entities.EntityDecoder( | ||
| entities.htmlDecodeTree, | ||
| callback, | ||
| ); | ||
| for (const chunk of ["#x", "cf", "ff", "d"]) { | ||
| expect(decoder.write(chunk, 0)).toBe(-1); | ||
| } | ||
| expect(decoder.write(";", 0)).toBe(9); | ||
| expect(callback).toHaveBeenCalledTimes(1); | ||
| expect(callback).toHaveBeenCalledWith(0xc_ff_fd, 9); | ||
| }); | ||
| it("should not fail if nothing is written", () => { | ||
| const callback = vitest.fn(); | ||
| const decoder = new entities.EntityDecoder( | ||
| entities.htmlDecodeTree, | ||
| callback, | ||
| ); | ||
| expect(decoder.end()).toBe(0); | ||
| expect(callback).toHaveBeenCalledTimes(0); | ||
| }); | ||
| /* | ||
| * Focused tests exercising early exit paths inside a compact run in the real trie. | ||
| * Discovered prefix: "zi" followed by compact run "grarr"; mismatching inside this run should | ||
| * return 0 with no emission (result still 0). | ||
| */ | ||
| describe("compact run mismatches", () => { | ||
| it("first run character mismatch returns 0", () => { | ||
| const callback = vitest.fn(); | ||
| const d = new entities.EntityDecoder( | ||
| entities.htmlDecodeTree, | ||
| callback, | ||
| ); | ||
| d.startEntity(entities.DecodingMode.Strict); | ||
| // After '&': correct prefix 'zi', wrong first run char 'X' (expected 'g'). | ||
| expect(d.write("ziXgrar", 0)).toBe(0); | ||
| expect(callback).not.toHaveBeenCalled(); | ||
| }); | ||
| it("mismatch after one correct run char returns 0", () => { | ||
| const callback = vitest.fn(); | ||
| const d = new entities.EntityDecoder( | ||
| entities.htmlDecodeTree, | ||
| callback, | ||
| ); | ||
| d.startEntity(entities.DecodingMode.Strict); | ||
| // 'zig' matches prefix + first run char; next char 'X' mismatches expected 'r'. | ||
| expect(d.write("zigXarr", 0)).toBe(0); | ||
| expect(callback).not.toHaveBeenCalled(); | ||
| }); | ||
| it("mismatch after two correct run chars returns 0", () => { | ||
| const callback = vitest.fn(); | ||
| const d = new entities.EntityDecoder( | ||
| entities.htmlDecodeTree, | ||
| callback, | ||
| ); | ||
| d.startEntity(entities.DecodingMode.Strict); | ||
| // 'zigr' matches prefix + first two run chars; next char 'X' mismatches expected 'a'. | ||
| expect(d.write("zigrXrr", 0)).toBe(0); | ||
| expect(callback).not.toHaveBeenCalled(); | ||
| }); | ||
| }); | ||
| describe("errors", () => { | ||
| it("should produce an error for a named entity without a semicolon", () => { | ||
| const errorHandlers = { | ||
| missingSemicolonAfterCharacterReference: vitest.fn(), | ||
| absenceOfDigitsInNumericCharacterReference: vitest.fn(), | ||
| validateNumericCharacterReference: vitest.fn(), | ||
| }; | ||
| const callback = vitest.fn(); | ||
| const decoder = new entities.EntityDecoder( | ||
| entities.htmlDecodeTree, | ||
| callback, | ||
| errorHandlers, | ||
| ); | ||
| decoder.startEntity(entities.DecodingMode.Legacy); | ||
| expect(decoder.write("&", 1)).toBe(5); | ||
| expect(callback).toHaveBeenCalledTimes(1); | ||
| expect(callback).toHaveBeenCalledWith("&".charCodeAt(0), 5); | ||
| expect( | ||
| errorHandlers.missingSemicolonAfterCharacterReference, | ||
| ).toHaveBeenCalledTimes(0); | ||
| decoder.startEntity(entities.DecodingMode.Legacy); | ||
| expect(decoder.write("&", 1)).toBe(-1); | ||
| expect(decoder.end()).toBe(4); | ||
| expect(callback).toHaveBeenCalledTimes(2); | ||
| expect(callback).toHaveBeenLastCalledWith("&".charCodeAt(0), 4); | ||
| expect( | ||
| errorHandlers.missingSemicolonAfterCharacterReference, | ||
| ).toHaveBeenCalledTimes(1); | ||
| }); | ||
| it("should produce an error for a numeric entity without a semicolon", () => { | ||
| const errorHandlers = { | ||
| missingSemicolonAfterCharacterReference: vitest.fn(), | ||
| absenceOfDigitsInNumericCharacterReference: vitest.fn(), | ||
| validateNumericCharacterReference: vitest.fn(), | ||
| }; | ||
| const callback = vitest.fn(); | ||
| const decoder = new entities.EntityDecoder( | ||
| entities.htmlDecodeTree, | ||
| callback, | ||
| errorHandlers, | ||
| ); | ||
| decoder.startEntity(entities.DecodingMode.Legacy); | ||
| expect(decoder.write(":", 1)).toBe(-1); | ||
| expect(decoder.end()).toBe(5); | ||
| expect(callback).toHaveBeenCalledTimes(1); | ||
| expect(callback).toHaveBeenCalledWith(0x3a, 5); | ||
| expect( | ||
| errorHandlers.missingSemicolonAfterCharacterReference, | ||
| ).toHaveBeenCalledTimes(1); | ||
| expect( | ||
| errorHandlers.absenceOfDigitsInNumericCharacterReference, | ||
| ).toHaveBeenCalledTimes(0); | ||
| expect( | ||
| errorHandlers.validateNumericCharacterReference, | ||
| ).toHaveBeenCalledTimes(1); | ||
| expect( | ||
| errorHandlers.validateNumericCharacterReference, | ||
| ).toHaveBeenCalledWith(0x3a); | ||
| }); | ||
| it("should produce an error for numeric entities without digits", () => { | ||
| const errorHandlers = { | ||
| missingSemicolonAfterCharacterReference: vitest.fn(), | ||
| absenceOfDigitsInNumericCharacterReference: vitest.fn(), | ||
| validateNumericCharacterReference: vitest.fn(), | ||
| }; | ||
| const callback = vitest.fn(); | ||
| const decoder = new entities.EntityDecoder( | ||
| entities.htmlDecodeTree, | ||
| callback, | ||
| errorHandlers, | ||
| ); | ||
| decoder.startEntity(entities.DecodingMode.Legacy); | ||
| expect(decoder.write("&#", 1)).toBe(-1); | ||
| expect(decoder.end()).toBe(0); | ||
| expect(callback).toHaveBeenCalledTimes(0); | ||
| expect( | ||
| errorHandlers.missingSemicolonAfterCharacterReference, | ||
| ).toHaveBeenCalledTimes(0); | ||
| expect( | ||
| errorHandlers.absenceOfDigitsInNumericCharacterReference, | ||
| ).toHaveBeenCalledTimes(1); | ||
| expect( | ||
| errorHandlers.absenceOfDigitsInNumericCharacterReference, | ||
| ).toHaveBeenCalledWith(2); | ||
| expect( | ||
| errorHandlers.validateNumericCharacterReference, | ||
| ).toHaveBeenCalledTimes(0); | ||
| }); | ||
| it("should produce an error for hex entities without digits", () => { | ||
| const errorHandlers = { | ||
| missingSemicolonAfterCharacterReference: vitest.fn(), | ||
| absenceOfDigitsInNumericCharacterReference: vitest.fn(), | ||
| validateNumericCharacterReference: vitest.fn(), | ||
| }; | ||
| const callback = vitest.fn(); | ||
| const decoder = new entities.EntityDecoder( | ||
| entities.htmlDecodeTree, | ||
| callback, | ||
| errorHandlers, | ||
| ); | ||
| decoder.startEntity(entities.DecodingMode.Legacy); | ||
| expect(decoder.write("&#x", 1)).toBe(-1); | ||
| expect(decoder.end()).toBe(0); | ||
| expect(callback).toHaveBeenCalledTimes(0); | ||
| expect( | ||
| errorHandlers.missingSemicolonAfterCharacterReference, | ||
| ).toHaveBeenCalledTimes(0); | ||
| expect( | ||
| errorHandlers.absenceOfDigitsInNumericCharacterReference, | ||
| ).toHaveBeenCalledTimes(1); | ||
| expect( | ||
| errorHandlers.validateNumericCharacterReference, | ||
| ).toHaveBeenCalledTimes(0); | ||
| }); | ||
| }); | ||
| }); |
| import { describe, expect, it } from "vitest"; | ||
| import * as entities from "./index.js"; | ||
| describe("Encode->decode test", () => { | ||
| const testcases = [ | ||
| { | ||
| input: "asdf & ÿ ü '", | ||
| xml: "asdf & ÿ ü '", | ||
| html: "asdf & ÿ ü '", | ||
| }, | ||
| { | ||
| input: "&", | ||
| xml: "&#38;", | ||
| html: "&#38;", | ||
| }, | ||
| ]; | ||
| for (const { input, xml, html } of testcases) { | ||
| const encodedXML = entities.encodeXML(input); | ||
| it(`should XML encode ${input}`, () => expect(encodedXML).toBe(xml)); | ||
| it(`should default to XML encode ${input}`, () => | ||
| expect(entities.encode(input)).toBe(xml)); | ||
| it(`should XML decode ${encodedXML}`, () => | ||
| expect(entities.decodeXML(encodedXML)).toBe(input)); | ||
| it(`should default to XML encode ${encodedXML}`, () => | ||
| expect(entities.decode(encodedXML)).toBe(input)); | ||
| it(`should default strict to XML encode ${encodedXML}`, () => | ||
| expect(entities.decodeStrict(encodedXML)).toBe(input)); | ||
| const encodedHTML5 = entities.encodeHTML5(input); | ||
| it(`should HTML5 encode ${input}`, () => | ||
| expect(encodedHTML5).toBe(html)); | ||
| it(`should HTML5 decode ${encodedHTML5}`, () => | ||
| expect(entities.decodeHTML(encodedHTML5)).toBe(input)); | ||
| it("should encode emojis", () => | ||
| expect(entities.encodeHTML5("😄🍾🥳💥😇")).toBe( | ||
| "😄🍾🥳💥😇", | ||
| )); | ||
| } | ||
| it("should encode data URIs (issue #16)", () => { | ||
| const data = | ||
| "data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAALAAABAAEAAAIBRAA7"; | ||
| expect(entities.decode(entities.encode(data))).toBe(data); | ||
| }); | ||
| it("should HTML encode all ASCII characters", () => { | ||
| for (let index = 0; index < 128; index++) { | ||
| const char = String.fromCharCode(index); | ||
| const encoded = entities.encodeHTML(char); | ||
| const decoded = entities.decodeHTML(encoded); | ||
| expect(decoded).toBe(char); | ||
| } | ||
| }); | ||
| it("should encode trailing parts of entities", () => | ||
| expect(entities.encodeHTML("\uD835")).toBe("�")); | ||
| it("should encode surrogate pair with first surrogate equivalent of entity, without corresponding entity", () => | ||
| expect(entities.encodeHTML("\u{1D4A4}")).toBe("𝒤")); | ||
| }); | ||
| describe("encodeNonAsciiHTML", () => { | ||
| it("should encode all non-ASCII characters", () => | ||
| expect(entities.encodeNonAsciiHTML("<test> #123! übermaßen")).toBe( | ||
| "<test> #123! übermaßen", | ||
| )); | ||
| it("should encode emojis", () => | ||
| expect(entities.encodeNonAsciiHTML("😄🍾🥳💥😇")).toBe( | ||
| "😄🍾🥳💥😇", | ||
| )); | ||
| it("should encode chars above surrogates", () => | ||
| expect(entities.encodeNonAsciiHTML("♒️♓️♈️♉️♊️♋️♌️♍️♎️♏️♐️♑️")).toBe( | ||
| "♒️♓️♈️♉️♊️♋️♌️♍️♎️♏️♐️♑️", | ||
| )); | ||
| }); |
| import { describe, expect, it } from "vitest"; | ||
| import * as entities from "./index.js"; | ||
| describe("escape HTML", () => { | ||
| it("should escape HTML attribute values", () => | ||
| expect(entities.escapeAttribute('<a " attr > & value \u00A0!')).toBe( | ||
| "<a " attr > & value !", | ||
| )); | ||
| it("should escape HTML text", () => | ||
| expect(entities.escapeText('<a " text > & value \u00A0!')).toBe( | ||
| '<a " text > & value !', | ||
| )); | ||
| }); |
| import { readFileSync } from "node:fs"; | ||
| import { describe, expect, it } from "vitest"; | ||
| import legacy from "../maps/legacy.json" with { type: "json" }; | ||
| import * as entities from "./index.js"; | ||
| const levels = ["xml", "entities"]; | ||
| describe("Documents", () => { | ||
| const levelDocuments = levels | ||
| .map((name) => new URL(`../maps/${name}.json`, import.meta.url)) | ||
| .map((url) => JSON.parse(readFileSync(url, "utf8"))) | ||
| .map((document, index) => [index, document]); | ||
| for (const [level, document] of levelDocuments) { | ||
| describe("Decode", () => { | ||
| it(levels[level], () => { | ||
| for (const entity of Object.keys(document)) { | ||
| for (let l = level; l < levels.length; l++) { | ||
| expect(entities.decode(`&${entity};`, l)).toBe( | ||
| document[entity], | ||
| ); | ||
| expect( | ||
| entities.decode(`&${entity};`, { level: l }), | ||
| ).toBe(document[entity]); | ||
| } | ||
| } | ||
| }); | ||
| }); | ||
| describe("Decode strict", () => { | ||
| it(levels[level], () => { | ||
| for (const entity of Object.keys(document)) { | ||
| for (let l = level; l < levels.length; l++) { | ||
| expect(entities.decodeStrict(`&${entity};`, l)).toBe( | ||
| document[entity], | ||
| ); | ||
| expect( | ||
| entities.decode(`&${entity};`, { | ||
| level: l, | ||
| mode: entities.DecodingMode.Strict, | ||
| }), | ||
| ).toBe(document[entity]); | ||
| } | ||
| } | ||
| }); | ||
| }); | ||
| describe("Encode", () => { | ||
| it(levels[level], () => { | ||
| for (const entity of Object.keys(document)) { | ||
| for (let l = level; l < levels.length; l++) { | ||
| const encoded = entities.encode(document[entity], l); | ||
| const decoded = entities.decode(encoded, l); | ||
| expect(decoded).toBe(document[entity]); | ||
| } | ||
| } | ||
| }); | ||
| it("should only encode non-ASCII values if asked", () => | ||
| expect( | ||
| entities.encode("Great #'s of 🎁", { | ||
| level, | ||
| mode: entities.EncodingMode.ASCII, | ||
| }), | ||
| ).toBe("Great #'s of 🎁")); | ||
| }); | ||
| } | ||
| describe("Legacy", () => { | ||
| const legacyMap: Record<string, string> = legacy; | ||
| it("should decode", () => { | ||
| for (const entity of Object.keys(legacyMap)) { | ||
| expect(entities.decodeHTML(`&${entity}`)).toBe( | ||
| legacyMap[entity], | ||
| ); | ||
| expect( | ||
| entities.decodeStrict(`&${entity}`, { | ||
| level: entities.EntityLevel.HTML, | ||
| mode: entities.DecodingMode.Legacy, | ||
| }), | ||
| ).toBe(legacyMap[entity]); | ||
| } | ||
| }); | ||
| }); | ||
| }); | ||
| const astral = [ | ||
| ["1d306", "\uD834\uDF06"], | ||
| ["1d11e", "\uD834\uDD1E"], | ||
| ]; | ||
| const astralSpecial = [ | ||
| ["80", "\u20AC"], | ||
| ["110000", "\uFFFD"], | ||
| ]; | ||
| describe("Astral entities", () => { | ||
| for (const [c, value] of astral) { | ||
| it(`should decode ${value}`, () => | ||
| expect(entities.decode(`&#x${c};`)).toBe(value)); | ||
| it(`should encode ${value}`, () => | ||
| expect(entities.encode(value)).toBe(`&#x${c};`)); | ||
| it(`should escape ${value}`, () => | ||
| expect(entities.escape(value)).toBe(`&#x${c};`)); | ||
| } | ||
| for (const [c, value] of astralSpecial) { | ||
| it(`should decode special \\u${c}`, () => | ||
| expect(entities.decode(`&#x${c};`)).toBe(value)); | ||
| } | ||
| }); | ||
| describe("Escape", () => { | ||
| it("should always decode ASCII chars", () => { | ||
| for (let index = 0; index < 0x7f; index++) { | ||
| const c = String.fromCharCode(index); | ||
| expect(entities.decodeXML(entities.escape(c))).toBe(c); | ||
| } | ||
| }); | ||
| it("should keep UTF8 characters", () => | ||
| expect(entities.escapeUTF8('ß < "ü"')).toBe(`ß < "ü"`)); | ||
| }); |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
144
17.07%392254
-4.96%19
35.71%109
-3.54%4948
-9.03%19
11.76%