Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

html-encoding-sniffer

Package Overview
Dependencies
Maintainers
6
Versions
7
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

html-encoding-sniffer - npm Package Compare versions

Comparing version 2.0.1 to 3.0.0

56

lib/html-encoding-sniffer.js

@@ -5,4 +5,4 @@ "use strict";

// https://html.spec.whatwg.org/#encoding-sniffing-algorithm
module.exports = (buffer, { transportLayerEncodingLabel, defaultEncoding = "windows-1252" } = {}) => {
let encoding = whatwgEncoding.getBOMEncoding(buffer); // see https://github.com/whatwg/html/issues/1910
module.exports = (uint8Array, { transportLayerEncodingLabel, defaultEncoding = "windows-1252" } = {}) => {
let encoding = whatwgEncoding.getBOMEncoding(uint8Array);

@@ -14,3 +14,3 @@ if (encoding === null && transportLayerEncodingLabel !== undefined) {

if (encoding === null) {
encoding = prescanMetaCharset(buffer);
encoding = prescanMetaCharset(uint8Array);
}

@@ -26,13 +26,13 @@

// https://html.spec.whatwg.org/multipage/syntax.html#prescan-a-byte-stream-to-determine-its-encoding
function prescanMetaCharset(buffer) {
const l = Math.min(buffer.length, 1024);
function prescanMetaCharset(uint8Array) {
const l = Math.min(uint8Array.byteLength, 1024);
for (let i = 0; i < l; i++) {
let c = buffer[i];
let c = uint8Array[i];
if (c === 0x3C) {
// "<"
const c1 = buffer[i + 1];
const c2 = buffer[i + 2];
const c3 = buffer[i + 3];
const c4 = buffer[i + 4];
const c5 = buffer[i + 5];
const c1 = uint8Array[i + 1];
const c2 = uint8Array[i + 2];
const c3 = uint8Array[i + 3];
const c4 = uint8Array[i + 4];
const c5 = uint8Array[i + 5];
// !-- (comment start)

@@ -42,5 +42,5 @@ if (c1 === 0x21 && c2 === 0x2D && c3 === 0x2D) {

for (; i < l; i++) {
c = buffer[i];
const cMinus1 = buffer[i - 1];
const cMinus2 = buffer[i - 2];
c = uint8Array[i];
const cMinus1 = uint8Array[i - 1];
const cMinus2 = uint8Array[i - 2];
// --> (comment end)

@@ -65,3 +65,3 @@ if (c === 0x3E && cMinus1 === 0x2D && cMinus2 === 0x2D) {

do {
attrRes = getAttribute(buffer, i, l);
attrRes = getAttribute(uint8Array, i, l);
if (attrRes.attr && !attributeList.has(attrRes.attr.name)) {

@@ -105,3 +105,3 @@ attributeList.add(attrRes.attr.name);

for (i += 2; i < l; i++) {
c = buffer[i];
c = uint8Array[i];
// space or >

@@ -114,3 +114,3 @@ if (isSpaceCharacter(c) || c === 0x3E) {

do {
attrRes = getAttribute(buffer, i, l);
attrRes = getAttribute(uint8Array, i, l);
i = attrRes.i;

@@ -121,3 +121,3 @@ } while (attrRes.attr);

for (i += 2; i < l; i++) {
c = buffer[i];
c = uint8Array[i];
// >

@@ -135,5 +135,5 @@ if (c === 0x3E) {

// https://html.spec.whatwg.org/multipage/syntax.html#concept-get-attributes-when-sniffing
function getAttribute(buffer, i, l) {
function getAttribute(uint8Array, i, l) {
for (; i < l; i++) {
let c = buffer[i];
let c = uint8Array[i];
// space or /

@@ -150,3 +150,3 @@ if (isSpaceCharacter(c) || c === 0x2F) {

nameLoop:for (; i < l; i++) {
c = buffer[i];
c = uint8Array[i];
// "="

@@ -160,3 +160,3 @@ if (c === 0x3D && name !== "") {

for (i++; i < l; i++) {
c = buffer[i];
c = uint8Array[i];
// space

@@ -187,7 +187,7 @@ if (isSpaceCharacter(c)) {

}
c = buffer[i];
c = uint8Array[i];
// space
if (isSpaceCharacter(c)) {
for (i++; i < l; i++) {
c = buffer[i];
c = uint8Array[i];
// space

@@ -205,3 +205,3 @@ if (isSpaceCharacter(c)) {

for (i++; i < l; i++) {
c = buffer[i];
c = uint8Array[i];

@@ -235,3 +235,3 @@ if (c === quote) {

for (i++; i < l; i++) {
c = buffer[i];
c = uint8Array[i];

@@ -258,3 +258,3 @@ // space or >

while (true) {
const indexOfCharset = string.substring(position).search(/charset/i);
const indexOfCharset = string.substring(position).search(/charset/ui);

@@ -300,3 +300,3 @@ if (indexOfCharset === -1) {

const indexOfASCIIWhitespaceOrSemicolon = string.substring(position + 1).search(/\x09|\x0A|\x0C|\x0D|\x20|;/);
const indexOfASCIIWhitespaceOrSemicolon = string.substring(position + 1).search(/\x09|\x0A|\x0C|\x0D|\x20|;/u);
const end = indexOfASCIIWhitespaceOrSemicolon === -1 ?

@@ -303,0 +303,0 @@ string.length :

@@ -1,2 +0,2 @@

Copyright © 2016–2020 Domenic Denicola <d@domenic.me>
Copyright © Domenic Denicola <d@domenic.me>

@@ -3,0 +3,0 @@ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

@@ -8,3 +8,3 @@ {

],
"version": "2.0.1",
"version": "3.0.0",
"author": "Domenic Denicola <d@domenic.me> (https://domenic.me/)",

@@ -22,11 +22,12 @@ "license": "MIT",

"dependencies": {
"whatwg-encoding": "^1.0.5"
"whatwg-encoding": "^2.0.0"
},
"devDependencies": {
"eslint": "^6.8.0",
"mocha": "^7.0.0"
"@domenic/eslint-config": "^1.4.0",
"eslint": "^7.32.0",
"mocha": "^9.1.1"
},
"engines": {
"node": ">=10"
"node": ">=12"
}
}

@@ -9,6 +9,8 @@ # Determine the Encoding of a HTML Byte Stream

const htmlBuffer = fs.readFileSync("./html-page.html");
const sniffedEncoding = htmlEncodingSniffer(htmlBuffer);
const htmlBytes = fs.readFileSync("./html-page.html");
const sniffedEncoding = htmlEncodingSniffer(htmlBytes);
```
The passed bytes are given as a `Uint8Array`; the Node.js `Buffer` subclass of `Uint8Array` will also work, as shown above.
The returned value will be a canonical [encoding name](https://encoding.spec.whatwg.org/#names-and-labels) (not a label). You might then combine this with the [whatwg-encoding](https://github.com/jsdom/whatwg-encoding) package to decode the result:

@@ -18,3 +20,3 @@

const whatwgEncoding = require("whatwg-encoding");
const htmlString = whatwgEncoding.decode(htmlBuffer, sniffedEncoding);
const htmlString = whatwgEncoding.decode(htmlBytes, sniffedEncoding);
```

@@ -27,3 +29,3 @@

```js
const sniffedEncoding = htmlEncodingSniffer(htmlBuffer, {
const sniffedEncoding = htmlEncodingSniffer(htmlBytes, {
transportLayerEncodingLabel,

@@ -30,0 +32,0 @@ defaultEncoding

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc