Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

istextorbinary

Package Overview
Dependencies
Maintainers
4
Versions
106
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

istextorbinary - npm Package Compare versions

Comparing version 5.15.0-next.1627595674.345b3aa16bba43ba618239c19c8def5cf66eda10 to 5.15.0-next.1627747584.40036cea671e4dee4fd2c4a154d3f69017ec7422

94

edition-browsers/index.js

@@ -91,3 +91,10 @@ /* eslint no-use-before-define:0 */

// Extract
const chunkEnd = Math.min(buffer.length, chunkBegin + chunkLength);
chunkBegin = getChunkBegin(buffer, chunkBegin);
if (chunkBegin === -1) {
return binaryEncoding;
}
const chunkEnd = getChunkEnd(buffer, Math.min(buffer.length, chunkBegin + chunkLength));
if (chunkEnd > buffer.length) {
return binaryEncoding;
}
const contentChunkUTF8 = buffer.toString(textEncoding, chunkBegin, chunkEnd);

@@ -108,1 +115,86 @@ // Detect encoding

}
// The functions below are created to handle multibyte utf8 characters.
// To understand how the encoding works,
// check this article: https://en.wikipedia.org/wiki/UTF-8#Encoding
function getChunkBegin(buf, chunkBegin) {
// If it's the beginning, just return.
if (chunkBegin === 0) {
return 0;
}
if (!isLaterByteOfUtf8(buf[chunkBegin])) {
return chunkBegin;
}
let begin = chunkBegin - 3;
if (begin >= 0) {
if (isFirstByteOf4ByteChar(buf[begin])) {
return begin;
}
}
begin = chunkBegin - 2;
if (begin >= 0) {
if (isFirstByteOf4ByteChar(buf[begin]) ||
isFirstByteOf3ByteChar(buf[begin])) {
return begin;
}
}
begin = chunkBegin - 1;
if (begin >= 0) {
// Is it a 4-byte, 3-byte utf8 character?
if (isFirstByteOf4ByteChar(buf[begin]) ||
isFirstByteOf3ByteChar(buf[begin]) ||
isFirstByteOf2ByteChar(buf[begin])) {
return begin;
}
}
return -1;
}
function getChunkEnd(buf, chunkEnd) {
// If it's the end, just return.
if (chunkEnd === buf.length) {
return chunkEnd;
}
let index = chunkEnd - 3;
if (index >= 0) {
if (isFirstByteOf4ByteChar(buf[index])) {
return chunkEnd + 1;
}
}
index = chunkEnd - 2;
if (index >= 0) {
if (isFirstByteOf4ByteChar(buf[index])) {
return chunkEnd + 2;
}
if (isFirstByteOf3ByteChar(buf[index])) {
return chunkEnd + 1;
}
}
index = chunkEnd - 1;
if (index >= 0) {
if (isFirstByteOf4ByteChar(buf[index])) {
return chunkEnd + 3;
}
if (isFirstByteOf3ByteChar(buf[index])) {
return chunkEnd + 2;
}
if (isFirstByteOf2ByteChar(buf[index])) {
return chunkEnd + 1;
}
}
return chunkEnd;
}
function isFirstByteOf4ByteChar(byte) {
// eslint-disable-next-line no-bitwise
return byte >> 3 === 30; // 11110xxx?
}
function isFirstByteOf3ByteChar(byte) {
// eslint-disable-next-line no-bitwise
return byte >> 4 === 14; // 1110xxxx?
}
function isFirstByteOf2ByteChar(byte) {
// eslint-disable-next-line no-bitwise
return byte >> 5 === 6; // 110xxxxx?
}
function isLaterByteOfUtf8(byte) {
// eslint-disable-next-line no-bitwise
return byte >> 6 === 2; // 10xxxxxx?
}

@@ -92,3 +92,10 @@ /* eslint no-use-before-define:0 */

// Extract
const chunkEnd = Math.min(buffer.length, chunkBegin + chunkLength);
chunkBegin = getChunkBegin(buffer, chunkBegin);
if (chunkBegin === -1) {
return binaryEncoding;
}
const chunkEnd = getChunkEnd(buffer, Math.min(buffer.length, chunkBegin + chunkLength));
if (chunkEnd > buffer.length) {
return binaryEncoding;
}
const contentChunkUTF8 = buffer.toString(textEncoding, chunkBegin, chunkEnd);

@@ -109,1 +116,86 @@ // Detect encoding

}
// The functions below are created to handle multibyte utf8 characters.
// To understand how the encoding works,
// check this article: https://en.wikipedia.org/wiki/UTF-8#Encoding
function getChunkBegin(buf, chunkBegin) {
// If it's the beginning, just return.
if (chunkBegin === 0) {
return 0;
}
if (!isLaterByteOfUtf8(buf[chunkBegin])) {
return chunkBegin;
}
let begin = chunkBegin - 3;
if (begin >= 0) {
if (isFirstByteOf4ByteChar(buf[begin])) {
return begin;
}
}
begin = chunkBegin - 2;
if (begin >= 0) {
if (isFirstByteOf4ByteChar(buf[begin]) ||
isFirstByteOf3ByteChar(buf[begin])) {
return begin;
}
}
begin = chunkBegin - 1;
if (begin >= 0) {
// Is it a 4-byte, 3-byte utf8 character?
if (isFirstByteOf4ByteChar(buf[begin]) ||
isFirstByteOf3ByteChar(buf[begin]) ||
isFirstByteOf2ByteChar(buf[begin])) {
return begin;
}
}
return -1;
}
function getChunkEnd(buf, chunkEnd) {
// If it's the end, just return.
if (chunkEnd === buf.length) {
return chunkEnd;
}
let index = chunkEnd - 3;
if (index >= 0) {
if (isFirstByteOf4ByteChar(buf[index])) {
return chunkEnd + 1;
}
}
index = chunkEnd - 2;
if (index >= 0) {
if (isFirstByteOf4ByteChar(buf[index])) {
return chunkEnd + 2;
}
if (isFirstByteOf3ByteChar(buf[index])) {
return chunkEnd + 1;
}
}
index = chunkEnd - 1;
if (index >= 0) {
if (isFirstByteOf4ByteChar(buf[index])) {
return chunkEnd + 3;
}
if (isFirstByteOf3ByteChar(buf[index])) {
return chunkEnd + 2;
}
if (isFirstByteOf2ByteChar(buf[index])) {
return chunkEnd + 1;
}
}
return chunkEnd;
}
function isFirstByteOf4ByteChar(byte) {
// eslint-disable-next-line no-bitwise
return byte >> 3 === 30; // 11110xxx?
}
function isFirstByteOf3ByteChar(byte) {
// eslint-disable-next-line no-bitwise
return byte >> 4 === 14; // 1110xxxx?
}
function isFirstByteOf2ByteChar(byte) {
// eslint-disable-next-line no-bitwise
return byte >> 5 === 6; // 110xxxxx?
}
function isLaterByteOfUtf8(byte) {
// eslint-disable-next-line no-bitwise
return byte >> 6 === 2; // 10xxxxxx?
}

@@ -119,3 +119,10 @@ "use strict";

// Extract
const chunkEnd = Math.min(buffer.length, chunkBegin + chunkLength);
chunkBegin = getChunkBegin(buffer, chunkBegin);
if (chunkBegin === -1) {
return binaryEncoding;
}
const chunkEnd = getChunkEnd(buffer, Math.min(buffer.length, chunkBegin + chunkLength));
if (chunkEnd > buffer.length) {
return binaryEncoding;
}
const contentChunkUTF8 = buffer.toString(textEncoding, chunkBegin, chunkEnd);

@@ -137,1 +144,86 @@ // Detect encoding

exports.getEncoding = getEncoding;
// The functions below are created to handle multibyte utf8 characters.
// To understand how the encoding works,
// check this article: https://en.wikipedia.org/wiki/UTF-8#Encoding
function getChunkBegin(buf, chunkBegin) {
// If it's the beginning, just return.
if (chunkBegin === 0) {
return 0;
}
if (!isLaterByteOfUtf8(buf[chunkBegin])) {
return chunkBegin;
}
let begin = chunkBegin - 3;
if (begin >= 0) {
if (isFirstByteOf4ByteChar(buf[begin])) {
return begin;
}
}
begin = chunkBegin - 2;
if (begin >= 0) {
if (isFirstByteOf4ByteChar(buf[begin]) ||
isFirstByteOf3ByteChar(buf[begin])) {
return begin;
}
}
begin = chunkBegin - 1;
if (begin >= 0) {
// Is it a 4-byte, 3-byte utf8 character?
if (isFirstByteOf4ByteChar(buf[begin]) ||
isFirstByteOf3ByteChar(buf[begin]) ||
isFirstByteOf2ByteChar(buf[begin])) {
return begin;
}
}
return -1;
}
function getChunkEnd(buf, chunkEnd) {
// If it's the end, just return.
if (chunkEnd === buf.length) {
return chunkEnd;
}
let index = chunkEnd - 3;
if (index >= 0) {
if (isFirstByteOf4ByteChar(buf[index])) {
return chunkEnd + 1;
}
}
index = chunkEnd - 2;
if (index >= 0) {
if (isFirstByteOf4ByteChar(buf[index])) {
return chunkEnd + 2;
}
if (isFirstByteOf3ByteChar(buf[index])) {
return chunkEnd + 1;
}
}
index = chunkEnd - 1;
if (index >= 0) {
if (isFirstByteOf4ByteChar(buf[index])) {
return chunkEnd + 3;
}
if (isFirstByteOf3ByteChar(buf[index])) {
return chunkEnd + 2;
}
if (isFirstByteOf2ByteChar(buf[index])) {
return chunkEnd + 1;
}
}
return chunkEnd;
}
function isFirstByteOf4ByteChar(byte) {
// eslint-disable-next-line no-bitwise
return byte >> 3 === 30; // 11110xxx?
}
function isFirstByteOf3ByteChar(byte) {
// eslint-disable-next-line no-bitwise
return byte >> 4 === 14; // 1110xxxx?
}
function isFirstByteOf2ByteChar(byte) {
// eslint-disable-next-line no-bitwise
return byte >> 5 === 6; // 110xxxxx?
}
function isLaterByteOfUtf8(byte) {
// eslint-disable-next-line no-bitwise
return byte >> 6 === 2; // 10xxxxxx?
}

2

package.json
{
"title": "Is Text or Binary?",
"name": "istextorbinary",
"version": "5.15.0-next.1627595674.345b3aa16bba43ba618239c19c8def5cf66eda10",
"version": "5.15.0-next.1627747584.40036cea671e4dee4fd2c4a154d3f69017ec7422",
"description": "Determine if a filename and/or buffer is text or binary. Smarter detection than the other solutions.",

@@ -6,0 +6,0 @@ "homepage": "https://github.com/bevry/istextorbinary",

@@ -113,3 +113,16 @@ /* eslint no-use-before-define:0 */

// Extract
const chunkEnd = Math.min(buffer.length, chunkBegin + chunkLength)
chunkBegin = getChunkBegin(buffer, chunkBegin)
if (chunkBegin === -1) {
return binaryEncoding
}
const chunkEnd = getChunkEnd(
buffer,
Math.min(buffer.length, chunkBegin + chunkLength)
)
if (chunkEnd > buffer.length) {
return binaryEncoding
}
const contentChunkUTF8 = buffer.toString(textEncoding, chunkBegin, chunkEnd)

@@ -132,1 +145,115 @@

}
// The functions below are created to handle multibyte utf8 characters.
// To understand how the encoding works,
// check this article: https://en.wikipedia.org/wiki/UTF-8#Encoding
function getChunkBegin(buf: Buffer, chunkBegin: number) {
// If it's the beginning, just return.
if (chunkBegin === 0) {
return 0
}
if (!isLaterByteOfUtf8(buf[chunkBegin])) {
return chunkBegin
}
let begin = chunkBegin - 3
if (begin >= 0) {
if (isFirstByteOf4ByteChar(buf[begin])) {
return begin
}
}
begin = chunkBegin - 2
if (begin >= 0) {
if (
isFirstByteOf4ByteChar(buf[begin]) ||
isFirstByteOf3ByteChar(buf[begin])
) {
return begin
}
}
begin = chunkBegin - 1
if (begin >= 0) {
// Is it a 4-byte, 3-byte utf8 character?
if (
isFirstByteOf4ByteChar(buf[begin]) ||
isFirstByteOf3ByteChar(buf[begin]) ||
isFirstByteOf2ByteChar(buf[begin])
) {
return begin
}
}
return -1
}
function getChunkEnd(buf: Buffer, chunkEnd: number) {
// If it's the end, just return.
if (chunkEnd === buf.length) {
return chunkEnd
}
let index = chunkEnd - 3
if (index >= 0) {
if (isFirstByteOf4ByteChar(buf[index])) {
return chunkEnd + 1
}
}
index = chunkEnd - 2
if (index >= 0) {
if (isFirstByteOf4ByteChar(buf[index])) {
return chunkEnd + 2
}
if (isFirstByteOf3ByteChar(buf[index])) {
return chunkEnd + 1
}
}
index = chunkEnd - 1
if (index >= 0) {
if (isFirstByteOf4ByteChar(buf[index])) {
return chunkEnd + 3
}
if (isFirstByteOf3ByteChar(buf[index])) {
return chunkEnd + 2
}
if (isFirstByteOf2ByteChar(buf[index])) {
return chunkEnd + 1
}
}
return chunkEnd
}
function isFirstByteOf4ByteChar(byte: number) {
// eslint-disable-next-line no-bitwise
return byte >> 3 === 30 // 11110xxx?
}
function isFirstByteOf3ByteChar(byte: number) {
// eslint-disable-next-line no-bitwise
return byte >> 4 === 14 // 1110xxxx?
}
function isFirstByteOf2ByteChar(byte: number) {
// eslint-disable-next-line no-bitwise
return byte >> 5 === 6 // 110xxxxx?
}
function isLaterByteOfUtf8(byte: number) {
// eslint-disable-next-line no-bitwise
return byte >> 6 === 2 // 10xxxxxx?
}

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc