Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

istextorbinary

Package Overview
Dependencies
Maintainers
4
Versions
106
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

istextorbinary - npm Package Compare versions

Comparing version 5.15.0 to 6.0.0-next.1627748439.0ee4d89a92f81b1d8280e8d0dd12cf2fb9976ed0

95

edition-browsers/index.js

@@ -91,3 +91,10 @@ /* eslint no-use-before-define:0 */

// Extract
const chunkEnd = Math.min(buffer.length, chunkBegin + chunkLength);
chunkBegin = getChunkBegin(buffer, chunkBegin);
if (chunkBegin === -1) {
return binaryEncoding;
}
const chunkEnd = getChunkEnd(buffer, Math.min(buffer.length, chunkBegin + chunkLength));
if (chunkEnd > buffer.length) {
return binaryEncoding;
}
const contentChunkUTF8 = buffer.toString(textEncoding, chunkBegin, chunkEnd);

@@ -108,1 +115,87 @@ // Detect encoding

}
// ====================================
// The functions below are created to handle multibyte utf8 characters.
// To understand how the encoding works, check this article: https://en.wikipedia.org/wiki/UTF-8#Encoding
// @todo add documentation for these
function getChunkBegin(buf, chunkBegin) {
// If it's the beginning, just return.
if (chunkBegin === 0) {
return 0;
}
if (!isLaterByteOfUtf8(buf[chunkBegin])) {
return chunkBegin;
}
let begin = chunkBegin - 3;
if (begin >= 0) {
if (isFirstByteOf4ByteChar(buf[begin])) {
return begin;
}
}
begin = chunkBegin - 2;
if (begin >= 0) {
if (isFirstByteOf4ByteChar(buf[begin]) ||
isFirstByteOf3ByteChar(buf[begin])) {
return begin;
}
}
begin = chunkBegin - 1;
if (begin >= 0) {
// Is it a 4-byte, 3-byte utf8 character?
if (isFirstByteOf4ByteChar(buf[begin]) ||
isFirstByteOf3ByteChar(buf[begin]) ||
isFirstByteOf2ByteChar(buf[begin])) {
return begin;
}
}
return -1;
}
function getChunkEnd(buf, chunkEnd) {
// If it's the end, just return.
if (chunkEnd === buf.length) {
return chunkEnd;
}
let index = chunkEnd - 3;
if (index >= 0) {
if (isFirstByteOf4ByteChar(buf[index])) {
return chunkEnd + 1;
}
}
index = chunkEnd - 2;
if (index >= 0) {
if (isFirstByteOf4ByteChar(buf[index])) {
return chunkEnd + 2;
}
if (isFirstByteOf3ByteChar(buf[index])) {
return chunkEnd + 1;
}
}
index = chunkEnd - 1;
if (index >= 0) {
if (isFirstByteOf4ByteChar(buf[index])) {
return chunkEnd + 3;
}
if (isFirstByteOf3ByteChar(buf[index])) {
return chunkEnd + 2;
}
if (isFirstByteOf2ByteChar(buf[index])) {
return chunkEnd + 1;
}
}
return chunkEnd;
}
function isFirstByteOf4ByteChar(byte) {
// eslint-disable-next-line no-bitwise
return byte >> 3 === 30; // 11110xxx?
}
function isFirstByteOf3ByteChar(byte) {
// eslint-disable-next-line no-bitwise
return byte >> 4 === 14; // 1110xxxx?
}
function isFirstByteOf2ByteChar(byte) {
// eslint-disable-next-line no-bitwise
return byte >> 5 === 6; // 110xxxxx?
}
function isLaterByteOfUtf8(byte) {
// eslint-disable-next-line no-bitwise
return byte >> 6 === 2; // 10xxxxxx?
}

@@ -92,3 +92,10 @@ /* eslint no-use-before-define:0 */

// Extract
const chunkEnd = Math.min(buffer.length, chunkBegin + chunkLength);
chunkBegin = getChunkBegin(buffer, chunkBegin);
if (chunkBegin === -1) {
return binaryEncoding;
}
const chunkEnd = getChunkEnd(buffer, Math.min(buffer.length, chunkBegin + chunkLength));
if (chunkEnd > buffer.length) {
return binaryEncoding;
}
const contentChunkUTF8 = buffer.toString(textEncoding, chunkBegin, chunkEnd);

@@ -109,1 +116,87 @@ // Detect encoding

}
// ====================================
// The functions below are created to handle multibyte utf8 characters.
// To understand how the encoding works, check this article: https://en.wikipedia.org/wiki/UTF-8#Encoding
// @todo add documentation for these
function getChunkBegin(buf, chunkBegin) {
// If it's the beginning, just return.
if (chunkBegin === 0) {
return 0;
}
if (!isLaterByteOfUtf8(buf[chunkBegin])) {
return chunkBegin;
}
let begin = chunkBegin - 3;
if (begin >= 0) {
if (isFirstByteOf4ByteChar(buf[begin])) {
return begin;
}
}
begin = chunkBegin - 2;
if (begin >= 0) {
if (isFirstByteOf4ByteChar(buf[begin]) ||
isFirstByteOf3ByteChar(buf[begin])) {
return begin;
}
}
begin = chunkBegin - 1;
if (begin >= 0) {
// Is it a 4-byte, 3-byte utf8 character?
if (isFirstByteOf4ByteChar(buf[begin]) ||
isFirstByteOf3ByteChar(buf[begin]) ||
isFirstByteOf2ByteChar(buf[begin])) {
return begin;
}
}
return -1;
}
function getChunkEnd(buf, chunkEnd) {
// If it's the end, just return.
if (chunkEnd === buf.length) {
return chunkEnd;
}
let index = chunkEnd - 3;
if (index >= 0) {
if (isFirstByteOf4ByteChar(buf[index])) {
return chunkEnd + 1;
}
}
index = chunkEnd - 2;
if (index >= 0) {
if (isFirstByteOf4ByteChar(buf[index])) {
return chunkEnd + 2;
}
if (isFirstByteOf3ByteChar(buf[index])) {
return chunkEnd + 1;
}
}
index = chunkEnd - 1;
if (index >= 0) {
if (isFirstByteOf4ByteChar(buf[index])) {
return chunkEnd + 3;
}
if (isFirstByteOf3ByteChar(buf[index])) {
return chunkEnd + 2;
}
if (isFirstByteOf2ByteChar(buf[index])) {
return chunkEnd + 1;
}
}
return chunkEnd;
}
function isFirstByteOf4ByteChar(byte) {
// eslint-disable-next-line no-bitwise
return byte >> 3 === 30; // 11110xxx?
}
function isFirstByteOf3ByteChar(byte) {
// eslint-disable-next-line no-bitwise
return byte >> 4 === 14; // 1110xxxx?
}
function isFirstByteOf2ByteChar(byte) {
// eslint-disable-next-line no-bitwise
return byte >> 5 === 6; // 110xxxxx?
}
function isLaterByteOfUtf8(byte) {
// eslint-disable-next-line no-bitwise
return byte >> 6 === 2; // 10xxxxxx?
}

@@ -119,3 +119,10 @@ "use strict";

// Extract
const chunkEnd = Math.min(buffer.length, chunkBegin + chunkLength);
chunkBegin = getChunkBegin(buffer, chunkBegin);
if (chunkBegin === -1) {
return binaryEncoding;
}
const chunkEnd = getChunkEnd(buffer, Math.min(buffer.length, chunkBegin + chunkLength));
if (chunkEnd > buffer.length) {
return binaryEncoding;
}
const contentChunkUTF8 = buffer.toString(textEncoding, chunkBegin, chunkEnd);

@@ -137,1 +144,87 @@ // Detect encoding

exports.getEncoding = getEncoding;
// ====================================
// The functions below are created to handle multibyte utf8 characters.
// To understand how the encoding works, check this article: https://en.wikipedia.org/wiki/UTF-8#Encoding
// @todo add documentation for these
function getChunkBegin(buf, chunkBegin) {
// If it's the beginning, just return.
if (chunkBegin === 0) {
return 0;
}
if (!isLaterByteOfUtf8(buf[chunkBegin])) {
return chunkBegin;
}
let begin = chunkBegin - 3;
if (begin >= 0) {
if (isFirstByteOf4ByteChar(buf[begin])) {
return begin;
}
}
begin = chunkBegin - 2;
if (begin >= 0) {
if (isFirstByteOf4ByteChar(buf[begin]) ||
isFirstByteOf3ByteChar(buf[begin])) {
return begin;
}
}
begin = chunkBegin - 1;
if (begin >= 0) {
// Is it a 4-byte, 3-byte utf8 character?
if (isFirstByteOf4ByteChar(buf[begin]) ||
isFirstByteOf3ByteChar(buf[begin]) ||
isFirstByteOf2ByteChar(buf[begin])) {
return begin;
}
}
return -1;
}
function getChunkEnd(buf, chunkEnd) {
// If it's the end, just return.
if (chunkEnd === buf.length) {
return chunkEnd;
}
let index = chunkEnd - 3;
if (index >= 0) {
if (isFirstByteOf4ByteChar(buf[index])) {
return chunkEnd + 1;
}
}
index = chunkEnd - 2;
if (index >= 0) {
if (isFirstByteOf4ByteChar(buf[index])) {
return chunkEnd + 2;
}
if (isFirstByteOf3ByteChar(buf[index])) {
return chunkEnd + 1;
}
}
index = chunkEnd - 1;
if (index >= 0) {
if (isFirstByteOf4ByteChar(buf[index])) {
return chunkEnd + 3;
}
if (isFirstByteOf3ByteChar(buf[index])) {
return chunkEnd + 2;
}
if (isFirstByteOf2ByteChar(buf[index])) {
return chunkEnd + 1;
}
}
return chunkEnd;
}
function isFirstByteOf4ByteChar(byte) {
// eslint-disable-next-line no-bitwise
return byte >> 3 === 30; // 11110xxx?
}
function isFirstByteOf3ByteChar(byte) {
// eslint-disable-next-line no-bitwise
return byte >> 4 === 14; // 1110xxxx?
}
function isFirstByteOf2ByteChar(byte) {
// eslint-disable-next-line no-bitwise
return byte >> 5 === 6; // 110xxxxx?
}
function isLaterByteOfUtf8(byte) {
// eslint-disable-next-line no-bitwise
return byte >> 6 === 2; // 10xxxxxx?
}
# History
## v6.0.0 2021 August 1
- Thanks to [Kukhyeon Heo](https://github.com/sainthkh) for [pull request #214](https://github.com/bevry/istextorbinary/pull/214) `istextorbinary` can now speak UTF8 multibyte characters, now understanding that Crilly, CJK, Emoji, etc. are not binary. This is a big win.
- Closes [issue #13](https://github.com/bevry/istextorbinary/issues/13) reported by [dlsgusrn7577](https://github.com/dlsgusrn7577)
- Updated dependencies, [base files](https://github.com/bevry/base), and [editions](https://editions.bevry.me) using [boundation](https://github.com/bevry/boundation)
## v5.15.0 2021 July 30

@@ -4,0 +10,0 @@

19

package.json
{
"title": "Is Text or Binary?",
"name": "istextorbinary",
"version": "5.15.0",
"version": "6.0.0-next.1627748439.0ee4d89a92f81b1d8280e8d0dd12cf2fb9976ed0",
"description": "Determine if a filename and/or buffer is text or binary. Smarter detection than the other solutions.",

@@ -84,2 +84,3 @@ "homepage": "https://github.com/bevry/istextorbinary",

"Ian Sibner <sibnerian@gmail.com> (https://github.com/sibnerian)",
"Kukhyeon Heo <sainthkh@gmail.com> (https://github.com/sainthkh)",
"Michael Mooring <mike@mdm.cc> (https://github.com/mikeumus)",

@@ -173,10 +174,10 @@ "Rob Loach <robloach@gmail.com> (https://github.com/robloach)",

"binaryextensions": "^4.18.0",
"textextensions": "^5.13.0"
"textextensions": "^5.14.0"
},
"devDependencies": {
"@bevry/update-contributors": "^1.19.0",
"@bevry/update-contributors": "^1.20.0",
"@typescript-eslint/eslint-plugin": "^4.28.5",
"@typescript-eslint/parser": "^4.28.5",
"assert-helpers": "^8.4.0",
"eslint": "^7.31.0",
"eslint": "^7.32.0",
"eslint-config-bevry": "^3.27.0",

@@ -186,11 +187,11 @@ "eslint-config-prettier": "^8.3.0",

"filedirname": "^2.7.0",
"kava": "^5.14.0",
"kava": "^5.15.0",
"make-deno-edition": "^1.3.0",
"prettier": "^2.3.2",
"projectz": "^2.21.0",
"projectz": "^2.22.0",
"surge": "^0.23.0",
"typedoc": "^0.21.4",
"typescript": "4.3.5",
"valid-directory": "^3.7.0",
"valid-module": "^1.16.0"
"valid-directory": "^3.9.0",
"valid-module": "^1.17.0"
},

@@ -236,2 +237,2 @@ "scripts": {

}
}
}

@@ -96,3 +96,3 @@ <!-- TITLE/ -->

<script type="module">
import * as pkg from '//cdn.skypack.dev/istextorbinary@^5.15.0'
import * as pkg from '//cdn.skypack.dev/istextorbinary@^6.0.0'
</script>

@@ -105,3 +105,3 @@ ```

<script type="module">
import * as pkg from '//unpkg.com/istextorbinary@^5.15.0'
import * as pkg from '//unpkg.com/istextorbinary@^6.0.0'
</script>

@@ -114,3 +114,3 @@ ```

<script type="module">
import * as pkg from '//dev.jspm.io/istextorbinary@5.15.0'
import * as pkg from '//dev.jspm.io/istextorbinary@6.0.0'
</script>

@@ -182,2 +182,3 @@ ```

<li><a href="https://github.com/sibnerian">Ian Sibner</a> — <a href="https://github.com/bevry/istextorbinary/commits?author=sibnerian" title="View the GitHub contributions of Ian Sibner on repository bevry/istextorbinary">view contributions</a></li>
<li><a href="https://github.com/sainthkh">Kukhyeon Heo</a> — <a href="https://github.com/bevry/istextorbinary/commits?author=sainthkh" title="View the GitHub contributions of Kukhyeon Heo on repository bevry/istextorbinary">view contributions</a></li>
<li><a href="https://github.com/mikeumus">Michael Mooring</a> — <a href="https://github.com/bevry/istextorbinary/commits?author=mikeumus" title="View the GitHub contributions of Michael Mooring on repository bevry/istextorbinary">view contributions</a></li>

@@ -184,0 +185,0 @@ <li><a href="https://github.com/robloach">Rob Loach</a> — <a href="https://github.com/bevry/istextorbinary/commits?author=robloach" title="View the GitHub contributions of Rob Loach on repository bevry/istextorbinary">view contributions</a></li>

@@ -12,2 +12,3 @@ /* eslint no-use-before-define:0 */

chunkLength?: number
/** If not provided, will check the start, beginning, and end */

@@ -114,3 +115,16 @@ chunkBegin?: number

// Extract
const chunkEnd = Math.min(buffer.length, chunkBegin + chunkLength)
chunkBegin = getChunkBegin(buffer, chunkBegin)
if (chunkBegin === -1) {
return binaryEncoding
}
const chunkEnd = getChunkEnd(
buffer,
Math.min(buffer.length, chunkBegin + chunkLength)
)
if (chunkEnd > buffer.length) {
return binaryEncoding
}
const contentChunkUTF8 = buffer.toString(textEncoding, chunkBegin, chunkEnd)

@@ -133,1 +147,116 @@

}
// ====================================
// The functions below are created to handle multibyte utf8 characters.
// To understand how the encoding works, check this article: https://en.wikipedia.org/wiki/UTF-8#Encoding
// @todo add documentation for these
function getChunkBegin(buf: Buffer, chunkBegin: number) {
// If it's the beginning, just return.
if (chunkBegin === 0) {
return 0
}
if (!isLaterByteOfUtf8(buf[chunkBegin])) {
return chunkBegin
}
let begin = chunkBegin - 3
if (begin >= 0) {
if (isFirstByteOf4ByteChar(buf[begin])) {
return begin
}
}
begin = chunkBegin - 2
if (begin >= 0) {
if (
isFirstByteOf4ByteChar(buf[begin]) ||
isFirstByteOf3ByteChar(buf[begin])
) {
return begin
}
}
begin = chunkBegin - 1
if (begin >= 0) {
// Is it a 4-byte, 3-byte utf8 character?
if (
isFirstByteOf4ByteChar(buf[begin]) ||
isFirstByteOf3ByteChar(buf[begin]) ||
isFirstByteOf2ByteChar(buf[begin])
) {
return begin
}
}
return -1
}
function getChunkEnd(buf: Buffer, chunkEnd: number) {
// If it's the end, just return.
if (chunkEnd === buf.length) {
return chunkEnd
}
let index = chunkEnd - 3
if (index >= 0) {
if (isFirstByteOf4ByteChar(buf[index])) {
return chunkEnd + 1
}
}
index = chunkEnd - 2
if (index >= 0) {
if (isFirstByteOf4ByteChar(buf[index])) {
return chunkEnd + 2
}
if (isFirstByteOf3ByteChar(buf[index])) {
return chunkEnd + 1
}
}
index = chunkEnd - 1
if (index >= 0) {
if (isFirstByteOf4ByteChar(buf[index])) {
return chunkEnd + 3
}
if (isFirstByteOf3ByteChar(buf[index])) {
return chunkEnd + 2
}
if (isFirstByteOf2ByteChar(buf[index])) {
return chunkEnd + 1
}
}
return chunkEnd
}
function isFirstByteOf4ByteChar(byte: number) {
// eslint-disable-next-line no-bitwise
return byte >> 3 === 30 // 11110xxx?
}
function isFirstByteOf3ByteChar(byte: number) {
// eslint-disable-next-line no-bitwise
return byte >> 4 === 14 // 1110xxxx?
}
function isFirstByteOf2ByteChar(byte: number) {
// eslint-disable-next-line no-bitwise
return byte >> 5 === 6 // 110xxxxx?
}
function isLaterByteOfUtf8(byte: number) {
// eslint-disable-next-line no-bitwise
return byte >> 6 === 2 // 10xxxxxx?
}

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc