istextorbinary - npm Package Compare versions

94

edition-browsers/index.js

		@@ -91,3 +91,10 @@ /* eslint no-use-before-define:0 */
		// Extract
		const chunkEnd = Math.min(buffer.length, chunkBegin + chunkLength);
		chunkBegin = getChunkBegin(buffer, chunkBegin);
		if (chunkBegin === -1) {
		return binaryEncoding;
		}
		const chunkEnd = getChunkEnd(buffer, Math.min(buffer.length, chunkBegin + chunkLength));
		if (chunkEnd > buffer.length) {
		return binaryEncoding;
		}
		const contentChunkUTF8 = buffer.toString(textEncoding, chunkBegin, chunkEnd);
		@@ -108,1 +115,86 @@ // Detect encoding
		}
		// The functions below are created to handle multibyte utf8 characters.
		// To understand how the encoding works,
		// check this article: https://en.wikipedia.org/wiki/UTF-8#Encoding
		function getChunkBegin(buf, chunkBegin) {
		// If it's the beginning, just return.
		if (chunkBegin === 0) {
		return 0;
		}
		if (!isLaterByteOfUtf8(buf[chunkBegin])) {
		return chunkBegin;
		}
		let begin = chunkBegin - 3;
		if (begin >= 0) {
		if (isFirstByteOf4ByteChar(buf[begin])) {
		return begin;
		}
		}
		begin = chunkBegin - 2;
		if (begin >= 0) {
		if (isFirstByteOf4ByteChar(buf[begin]) \|\|
		isFirstByteOf3ByteChar(buf[begin])) {
		return begin;
		}
		}
		begin = chunkBegin - 1;
		if (begin >= 0) {
		// Is it a 4-byte, 3-byte utf8 character?
		if (isFirstByteOf4ByteChar(buf[begin]) \|\|
		isFirstByteOf3ByteChar(buf[begin]) \|\|
		isFirstByteOf2ByteChar(buf[begin])) {
		return begin;
		}
		}
		return -1;
		}
		function getChunkEnd(buf, chunkEnd) {
		// If it's the end, just return.
		if (chunkEnd === buf.length) {
		return chunkEnd;
		}
		let index = chunkEnd - 3;
		if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
		return chunkEnd + 1;
		}
		}
		index = chunkEnd - 2;
		if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
		return chunkEnd + 2;
		}
		if (isFirstByteOf3ByteChar(buf[index])) {
		return chunkEnd + 1;
		}
		}
		index = chunkEnd - 1;
		if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
		return chunkEnd + 3;
		}
		if (isFirstByteOf3ByteChar(buf[index])) {
		return chunkEnd + 2;
		}
		if (isFirstByteOf2ByteChar(buf[index])) {
		return chunkEnd + 1;
		}
		}
		return chunkEnd;
		}
		function isFirstByteOf4ByteChar(byte) {
		// eslint-disable-next-line no-bitwise
		return byte >> 3 === 30; // 11110xxx?
		}
		function isFirstByteOf3ByteChar(byte) {
		// eslint-disable-next-line no-bitwise
		return byte >> 4 === 14; // 1110xxxx?
		}
		function isFirstByteOf2ByteChar(byte) {
		// eslint-disable-next-line no-bitwise
		return byte >> 5 === 6; // 110xxxxx?
		}
		function isLaterByteOfUtf8(byte) {
		// eslint-disable-next-line no-bitwise
		return byte >> 6 === 2; // 10xxxxxx?
		}

94

edition-es2019-esm/index.js

		@@ -92,3 +92,10 @@ /* eslint no-use-before-define:0 */
		// Extract
		const chunkEnd = Math.min(buffer.length, chunkBegin + chunkLength);
		chunkBegin = getChunkBegin(buffer, chunkBegin);
		if (chunkBegin === -1) {
		return binaryEncoding;
		}
		const chunkEnd = getChunkEnd(buffer, Math.min(buffer.length, chunkBegin + chunkLength));
		if (chunkEnd > buffer.length) {
		return binaryEncoding;
		}
		const contentChunkUTF8 = buffer.toString(textEncoding, chunkBegin, chunkEnd);
		@@ -109,1 +116,86 @@ // Detect encoding
		}
		// The functions below are created to handle multibyte utf8 characters.
		// To understand how the encoding works,
		// check this article: https://en.wikipedia.org/wiki/UTF-8#Encoding
		function getChunkBegin(buf, chunkBegin) {
		// If it's the beginning, just return.
		if (chunkBegin === 0) {
		return 0;
		}
		if (!isLaterByteOfUtf8(buf[chunkBegin])) {
		return chunkBegin;
		}
		let begin = chunkBegin - 3;
		if (begin >= 0) {
		if (isFirstByteOf4ByteChar(buf[begin])) {
		return begin;
		}
		}
		begin = chunkBegin - 2;
		if (begin >= 0) {
		if (isFirstByteOf4ByteChar(buf[begin]) \|\|
		isFirstByteOf3ByteChar(buf[begin])) {
		return begin;
		}
		}
		begin = chunkBegin - 1;
		if (begin >= 0) {
		// Is it a 4-byte, 3-byte utf8 character?
		if (isFirstByteOf4ByteChar(buf[begin]) \|\|
		isFirstByteOf3ByteChar(buf[begin]) \|\|
		isFirstByteOf2ByteChar(buf[begin])) {
		return begin;
		}
		}
		return -1;
		}
		function getChunkEnd(buf, chunkEnd) {
		// If it's the end, just return.
		if (chunkEnd === buf.length) {
		return chunkEnd;
		}
		let index = chunkEnd - 3;
		if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
		return chunkEnd + 1;
		}
		}
		index = chunkEnd - 2;
		if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
		return chunkEnd + 2;
		}
		if (isFirstByteOf3ByteChar(buf[index])) {
		return chunkEnd + 1;
		}
		}
		index = chunkEnd - 1;
		if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
		return chunkEnd + 3;
		}
		if (isFirstByteOf3ByteChar(buf[index])) {
		return chunkEnd + 2;
		}
		if (isFirstByteOf2ByteChar(buf[index])) {
		return chunkEnd + 1;
		}
		}
		return chunkEnd;
		}
		function isFirstByteOf4ByteChar(byte) {
		// eslint-disable-next-line no-bitwise
		return byte >> 3 === 30; // 11110xxx?
		}
		function isFirstByteOf3ByteChar(byte) {
		// eslint-disable-next-line no-bitwise
		return byte >> 4 === 14; // 1110xxxx?
		}
		function isFirstByteOf2ByteChar(byte) {
		// eslint-disable-next-line no-bitwise
		return byte >> 5 === 6; // 110xxxxx?
		}
		function isLaterByteOfUtf8(byte) {
		// eslint-disable-next-line no-bitwise
		return byte >> 6 === 2; // 10xxxxxx?
		}

94

edition-es2019/index.js

		@@ -119,3 +119,10 @@ "use strict";
		// Extract
		const chunkEnd = Math.min(buffer.length, chunkBegin + chunkLength);
		chunkBegin = getChunkBegin(buffer, chunkBegin);
		if (chunkBegin === -1) {
		return binaryEncoding;
		}
		const chunkEnd = getChunkEnd(buffer, Math.min(buffer.length, chunkBegin + chunkLength));
		if (chunkEnd > buffer.length) {
		return binaryEncoding;
		}
		const contentChunkUTF8 = buffer.toString(textEncoding, chunkBegin, chunkEnd);
		@@ -137,1 +144,86 @@ // Detect encoding
		exports.getEncoding = getEncoding;
		// The functions below are created to handle multibyte utf8 characters.
		// To understand how the encoding works,
		// check this article: https://en.wikipedia.org/wiki/UTF-8#Encoding
		function getChunkBegin(buf, chunkBegin) {
		// If it's the beginning, just return.
		if (chunkBegin === 0) {
		return 0;
		}
		if (!isLaterByteOfUtf8(buf[chunkBegin])) {
		return chunkBegin;
		}
		let begin = chunkBegin - 3;
		if (begin >= 0) {
		if (isFirstByteOf4ByteChar(buf[begin])) {
		return begin;
		}
		}
		begin = chunkBegin - 2;
		if (begin >= 0) {
		if (isFirstByteOf4ByteChar(buf[begin]) \|\|
		isFirstByteOf3ByteChar(buf[begin])) {
		return begin;
		}
		}
		begin = chunkBegin - 1;
		if (begin >= 0) {
		// Is it a 4-byte, 3-byte utf8 character?
		if (isFirstByteOf4ByteChar(buf[begin]) \|\|
		isFirstByteOf3ByteChar(buf[begin]) \|\|
		isFirstByteOf2ByteChar(buf[begin])) {
		return begin;
		}
		}
		return -1;
		}
		function getChunkEnd(buf, chunkEnd) {
		// If it's the end, just return.
		if (chunkEnd === buf.length) {
		return chunkEnd;
		}
		let index = chunkEnd - 3;
		if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
		return chunkEnd + 1;
		}
		}
		index = chunkEnd - 2;
		if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
		return chunkEnd + 2;
		}
		if (isFirstByteOf3ByteChar(buf[index])) {
		return chunkEnd + 1;
		}
		}
		index = chunkEnd - 1;
		if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
		return chunkEnd + 3;
		}
		if (isFirstByteOf3ByteChar(buf[index])) {
		return chunkEnd + 2;
		}
		if (isFirstByteOf2ByteChar(buf[index])) {
		return chunkEnd + 1;
		}
		}
		return chunkEnd;
		}
		function isFirstByteOf4ByteChar(byte) {
		// eslint-disable-next-line no-bitwise
		return byte >> 3 === 30; // 11110xxx?
		}
		function isFirstByteOf3ByteChar(byte) {
		// eslint-disable-next-line no-bitwise
		return byte >> 4 === 14; // 1110xxxx?
		}
		function isFirstByteOf2ByteChar(byte) {
		// eslint-disable-next-line no-bitwise
		return byte >> 5 === 6; // 110xxxxx?
		}
		function isLaterByteOfUtf8(byte) {
		// eslint-disable-next-line no-bitwise
		return byte >> 6 === 2; // 10xxxxxx?
		}

2

package.json

		{
		"title": "Is Text or Binary?",
		"name": "istextorbinary",
		"version": "5.15.0-next.1627595674.345b3aa16bba43ba618239c19c8def5cf66eda10",
		"version": "5.15.0-next.1627747584.40036cea671e4dee4fd2c4a154d3f69017ec7422",
		"description": "Determine if a filename and/or buffer is text or binary. Smarter detection than the other solutions.",
		@@ -6,0 +6,0 @@ "homepage": "https://github.com/bevry/istextorbinary",

129

source/index.ts

		@@ -113,3 +113,16 @@ /* eslint no-use-before-define:0 */
		// Extract
		const chunkEnd = Math.min(buffer.length, chunkBegin + chunkLength)
		chunkBegin = getChunkBegin(buffer, chunkBegin)
		if (chunkBegin === -1) {
		return binaryEncoding
		}

		const chunkEnd = getChunkEnd(
		buffer,
		Math.min(buffer.length, chunkBegin + chunkLength)
		)

		if (chunkEnd > buffer.length) {
		return binaryEncoding
		}

		const contentChunkUTF8 = buffer.toString(textEncoding, chunkBegin, chunkEnd)
		@@ -132,1 +145,115 @@
		}

		// The functions below are created to handle multibyte utf8 characters.
		// To understand how the encoding works,
		// check this article: https://en.wikipedia.org/wiki/UTF-8#Encoding

		function getChunkBegin(buf: Buffer, chunkBegin: number) {
		// If it's the beginning, just return.
		if (chunkBegin === 0) {
		return 0
		}

		if (!isLaterByteOfUtf8(buf[chunkBegin])) {
		return chunkBegin
		}

		let begin = chunkBegin - 3

		if (begin >= 0) {
		if (isFirstByteOf4ByteChar(buf[begin])) {
		return begin
		}
		}

		begin = chunkBegin - 2

		if (begin >= 0) {
		if (
		isFirstByteOf4ByteChar(buf[begin]) \|\|
		isFirstByteOf3ByteChar(buf[begin])
		) {
		return begin
		}
		}

		begin = chunkBegin - 1

		if (begin >= 0) {
		// Is it a 4-byte, 3-byte utf8 character?
		if (
		isFirstByteOf4ByteChar(buf[begin]) \|\|
		isFirstByteOf3ByteChar(buf[begin]) \|\|
		isFirstByteOf2ByteChar(buf[begin])
		) {
		return begin
		}
		}

		return -1
		}

		function getChunkEnd(buf: Buffer, chunkEnd: number) {
		// If it's the end, just return.
		if (chunkEnd === buf.length) {
		return chunkEnd
		}

		let index = chunkEnd - 3

		if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
		return chunkEnd + 1
		}
		}

		index = chunkEnd - 2

		if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
		return chunkEnd + 2
		}

		if (isFirstByteOf3ByteChar(buf[index])) {
		return chunkEnd + 1
		}
		}

		index = chunkEnd - 1

		if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
		return chunkEnd + 3
		}

		if (isFirstByteOf3ByteChar(buf[index])) {
		return chunkEnd + 2
		}

		if (isFirstByteOf2ByteChar(buf[index])) {
		return chunkEnd + 1
		}
		}

		return chunkEnd
		}

		function isFirstByteOf4ByteChar(byte: number) {
		// eslint-disable-next-line no-bitwise
		return byte >> 3 === 30 // 11110xxx?
		}

		function isFirstByteOf3ByteChar(byte: number) {
		// eslint-disable-next-line no-bitwise
		return byte >> 4 === 14 // 1110xxxx?
		}

		function isFirstByteOf2ByteChar(byte: number) {
		// eslint-disable-next-line no-bitwise
		return byte >> 5 === 6 // 110xxxxx?
		}

		function isLaterByteOfUtf8(byte: number) {
		// eslint-disable-next-line no-bitwise
		return byte >> 6 === 2 // 10xxxxxx?
		}

compiled-types/index.d.ts.map

Sorry, the diff of this file is not supported yet

istextorbinary - npm Package Compare versions

Improved metrics