istextorbinary - npm Package Compare versions

95

edition-browsers/index.js

		@@ -91,3 +91,10 @@ /* eslint no-use-before-define:0 */
		// Extract
		const chunkEnd = Math.min(buffer.length, chunkBegin + chunkLength);
		chunkBegin = getChunkBegin(buffer, chunkBegin);
		if (chunkBegin === -1) {
		return binaryEncoding;
		}
		const chunkEnd = getChunkEnd(buffer, Math.min(buffer.length, chunkBegin + chunkLength));
		if (chunkEnd > buffer.length) {
		return binaryEncoding;
		}
		const contentChunkUTF8 = buffer.toString(textEncoding, chunkBegin, chunkEnd);
		@@ -108,1 +115,87 @@ // Detect encoding
		}
		// ====================================
		// The functions below are created to handle multibyte utf8 characters.
		// To understand how the encoding works, check this article: https://en.wikipedia.org/wiki/UTF-8#Encoding
		// @todo add documentation for these
		function getChunkBegin(buf, chunkBegin) {
		// If it's the beginning, just return.
		if (chunkBegin === 0) {
		return 0;
		}
		if (!isLaterByteOfUtf8(buf[chunkBegin])) {
		return chunkBegin;
		}
		let begin = chunkBegin - 3;
		if (begin >= 0) {
		if (isFirstByteOf4ByteChar(buf[begin])) {
		return begin;
		}
		}
		begin = chunkBegin - 2;
		if (begin >= 0) {
		if (isFirstByteOf4ByteChar(buf[begin]) \|\|
		isFirstByteOf3ByteChar(buf[begin])) {
		return begin;
		}
		}
		begin = chunkBegin - 1;
		if (begin >= 0) {
		// Is it a 4-byte, 3-byte utf8 character?
		if (isFirstByteOf4ByteChar(buf[begin]) \|\|
		isFirstByteOf3ByteChar(buf[begin]) \|\|
		isFirstByteOf2ByteChar(buf[begin])) {
		return begin;
		}
		}
		return -1;
		}
		function getChunkEnd(buf, chunkEnd) {
		// If it's the end, just return.
		if (chunkEnd === buf.length) {
		return chunkEnd;
		}
		let index = chunkEnd - 3;
		if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
		return chunkEnd + 1;
		}
		}
		index = chunkEnd - 2;
		if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
		return chunkEnd + 2;
		}
		if (isFirstByteOf3ByteChar(buf[index])) {
		return chunkEnd + 1;
		}
		}
		index = chunkEnd - 1;
		if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
		return chunkEnd + 3;
		}
		if (isFirstByteOf3ByteChar(buf[index])) {
		return chunkEnd + 2;
		}
		if (isFirstByteOf2ByteChar(buf[index])) {
		return chunkEnd + 1;
		}
		}
		return chunkEnd;
		}
		function isFirstByteOf4ByteChar(byte) {
		// eslint-disable-next-line no-bitwise
		return byte >> 3 === 30; // 11110xxx?
		}
		function isFirstByteOf3ByteChar(byte) {
		// eslint-disable-next-line no-bitwise
		return byte >> 4 === 14; // 1110xxxx?
		}
		function isFirstByteOf2ByteChar(byte) {
		// eslint-disable-next-line no-bitwise
		return byte >> 5 === 6; // 110xxxxx?
		}
		function isLaterByteOfUtf8(byte) {
		// eslint-disable-next-line no-bitwise
		return byte >> 6 === 2; // 10xxxxxx?
		}

95

edition-es2019-esm/index.js

		@@ -92,3 +92,10 @@ /* eslint no-use-before-define:0 */
		// Extract
		const chunkEnd = Math.min(buffer.length, chunkBegin + chunkLength);
		chunkBegin = getChunkBegin(buffer, chunkBegin);
		if (chunkBegin === -1) {
		return binaryEncoding;
		}
		const chunkEnd = getChunkEnd(buffer, Math.min(buffer.length, chunkBegin + chunkLength));
		if (chunkEnd > buffer.length) {
		return binaryEncoding;
		}
		const contentChunkUTF8 = buffer.toString(textEncoding, chunkBegin, chunkEnd);
		@@ -109,1 +116,87 @@ // Detect encoding
		}
		// ====================================
		// The functions below are created to handle multibyte utf8 characters.
		// To understand how the encoding works, check this article: https://en.wikipedia.org/wiki/UTF-8#Encoding
		// @todo add documentation for these
		function getChunkBegin(buf, chunkBegin) {
		// If it's the beginning, just return.
		if (chunkBegin === 0) {
		return 0;
		}
		if (!isLaterByteOfUtf8(buf[chunkBegin])) {
		return chunkBegin;
		}
		let begin = chunkBegin - 3;
		if (begin >= 0) {
		if (isFirstByteOf4ByteChar(buf[begin])) {
		return begin;
		}
		}
		begin = chunkBegin - 2;
		if (begin >= 0) {
		if (isFirstByteOf4ByteChar(buf[begin]) \|\|
		isFirstByteOf3ByteChar(buf[begin])) {
		return begin;
		}
		}
		begin = chunkBegin - 1;
		if (begin >= 0) {
		// Is it a 4-byte, 3-byte utf8 character?
		if (isFirstByteOf4ByteChar(buf[begin]) \|\|
		isFirstByteOf3ByteChar(buf[begin]) \|\|
		isFirstByteOf2ByteChar(buf[begin])) {
		return begin;
		}
		}
		return -1;
		}
		function getChunkEnd(buf, chunkEnd) {
		// If it's the end, just return.
		if (chunkEnd === buf.length) {
		return chunkEnd;
		}
		let index = chunkEnd - 3;
		if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
		return chunkEnd + 1;
		}
		}
		index = chunkEnd - 2;
		if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
		return chunkEnd + 2;
		}
		if (isFirstByteOf3ByteChar(buf[index])) {
		return chunkEnd + 1;
		}
		}
		index = chunkEnd - 1;
		if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
		return chunkEnd + 3;
		}
		if (isFirstByteOf3ByteChar(buf[index])) {
		return chunkEnd + 2;
		}
		if (isFirstByteOf2ByteChar(buf[index])) {
		return chunkEnd + 1;
		}
		}
		return chunkEnd;
		}
		function isFirstByteOf4ByteChar(byte) {
		// eslint-disable-next-line no-bitwise
		return byte >> 3 === 30; // 11110xxx?
		}
		function isFirstByteOf3ByteChar(byte) {
		// eslint-disable-next-line no-bitwise
		return byte >> 4 === 14; // 1110xxxx?
		}
		function isFirstByteOf2ByteChar(byte) {
		// eslint-disable-next-line no-bitwise
		return byte >> 5 === 6; // 110xxxxx?
		}
		function isLaterByteOfUtf8(byte) {
		// eslint-disable-next-line no-bitwise
		return byte >> 6 === 2; // 10xxxxxx?
		}

95

edition-es2019/index.js

		@@ -119,3 +119,10 @@ "use strict";
		// Extract
		const chunkEnd = Math.min(buffer.length, chunkBegin + chunkLength);
		chunkBegin = getChunkBegin(buffer, chunkBegin);
		if (chunkBegin === -1) {
		return binaryEncoding;
		}
		const chunkEnd = getChunkEnd(buffer, Math.min(buffer.length, chunkBegin + chunkLength));
		if (chunkEnd > buffer.length) {
		return binaryEncoding;
		}
		const contentChunkUTF8 = buffer.toString(textEncoding, chunkBegin, chunkEnd);
		@@ -137,1 +144,87 @@ // Detect encoding
		exports.getEncoding = getEncoding;
		// ====================================
		// The functions below are created to handle multibyte utf8 characters.
		// To understand how the encoding works, check this article: https://en.wikipedia.org/wiki/UTF-8#Encoding
		// @todo add documentation for these
		function getChunkBegin(buf, chunkBegin) {
		// If it's the beginning, just return.
		if (chunkBegin === 0) {
		return 0;
		}
		if (!isLaterByteOfUtf8(buf[chunkBegin])) {
		return chunkBegin;
		}
		let begin = chunkBegin - 3;
		if (begin >= 0) {
		if (isFirstByteOf4ByteChar(buf[begin])) {
		return begin;
		}
		}
		begin = chunkBegin - 2;
		if (begin >= 0) {
		if (isFirstByteOf4ByteChar(buf[begin]) \|\|
		isFirstByteOf3ByteChar(buf[begin])) {
		return begin;
		}
		}
		begin = chunkBegin - 1;
		if (begin >= 0) {
		// Is it a 4-byte, 3-byte utf8 character?
		if (isFirstByteOf4ByteChar(buf[begin]) \|\|
		isFirstByteOf3ByteChar(buf[begin]) \|\|
		isFirstByteOf2ByteChar(buf[begin])) {
		return begin;
		}
		}
		return -1;
		}
		function getChunkEnd(buf, chunkEnd) {
		// If it's the end, just return.
		if (chunkEnd === buf.length) {
		return chunkEnd;
		}
		let index = chunkEnd - 3;
		if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
		return chunkEnd + 1;
		}
		}
		index = chunkEnd - 2;
		if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
		return chunkEnd + 2;
		}
		if (isFirstByteOf3ByteChar(buf[index])) {
		return chunkEnd + 1;
		}
		}
		index = chunkEnd - 1;
		if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
		return chunkEnd + 3;
		}
		if (isFirstByteOf3ByteChar(buf[index])) {
		return chunkEnd + 2;
		}
		if (isFirstByteOf2ByteChar(buf[index])) {
		return chunkEnd + 1;
		}
		}
		return chunkEnd;
		}
		function isFirstByteOf4ByteChar(byte) {
		// eslint-disable-next-line no-bitwise
		return byte >> 3 === 30; // 11110xxx?
		}
		function isFirstByteOf3ByteChar(byte) {
		// eslint-disable-next-line no-bitwise
		return byte >> 4 === 14; // 1110xxxx?
		}
		function isFirstByteOf2ByteChar(byte) {
		// eslint-disable-next-line no-bitwise
		return byte >> 5 === 6; // 110xxxxx?
		}
		function isLaterByteOfUtf8(byte) {
		// eslint-disable-next-line no-bitwise
		return byte >> 6 === 2; // 10xxxxxx?
		}

6

HISTORY.md

		# History

		## v6.0.0 2021 August 1

		- Thanks to [Kukhyeon Heo](https://github.com/sainthkh) for [pull request #214](https://github.com/bevry/istextorbinary/pull/214) `istextorbinary` can now speak UTF8 multibyte characters, now understanding that Crilly, CJK, Emoji, etc. are not binary. This is a big win.
		- Closes [issue #13](https://github.com/bevry/istextorbinary/issues/13) reported by [dlsgusrn7577](https://github.com/dlsgusrn7577)
		- Updated dependencies, [base files](https://github.com/bevry/base), and [editions](https://editions.bevry.me) using [boundation](https://github.com/bevry/boundation)

		## v5.15.0 2021 July 30
		@@ -4,0 +10,0 @@

19

package.json

		{
		"title": "Is Text or Binary?",
		"name": "istextorbinary",
		"version": "5.15.0",
		"version": "6.0.0-next.1627748439.0ee4d89a92f81b1d8280e8d0dd12cf2fb9976ed0",
		"description": "Determine if a filename and/or buffer is text or binary. Smarter detection than the other solutions.",
		@@ -84,2 +84,3 @@ "homepage": "https://github.com/bevry/istextorbinary",
		"Ian Sibner <sibnerian@gmail.com> (https://github.com/sibnerian)",
		"Kukhyeon Heo <sainthkh@gmail.com> (https://github.com/sainthkh)",
		"Michael Mooring <mike@mdm.cc> (https://github.com/mikeumus)",
		@@ -173,10 +174,10 @@ "Rob Loach <robloach@gmail.com> (https://github.com/robloach)",
		"binaryextensions": "^4.18.0",
		"textextensions": "^5.13.0"
		"textextensions": "^5.14.0"
		},
		"devDependencies": {
		"@bevry/update-contributors": "^1.19.0",
		"@bevry/update-contributors": "^1.20.0",
		"@typescript-eslint/eslint-plugin": "^4.28.5",
		"@typescript-eslint/parser": "^4.28.5",
		"assert-helpers": "^8.4.0",
		"eslint": "^7.31.0",
		"eslint": "^7.32.0",
		"eslint-config-bevry": "^3.27.0",
		@@ -186,11 +187,11 @@ "eslint-config-prettier": "^8.3.0",
		"filedirname": "^2.7.0",
		"kava": "^5.14.0",
		"kava": "^5.15.0",
		"make-deno-edition": "^1.3.0",
		"prettier": "^2.3.2",
		"projectz": "^2.21.0",
		"projectz": "^2.22.0",
		"surge": "^0.23.0",
		"typedoc": "^0.21.4",
		"typescript": "4.3.5",
		"valid-directory": "^3.7.0",
		"valid-module": "^1.16.0"
		"valid-directory": "^3.9.0",
		"valid-module": "^1.17.0"
		},
		@@ -236,2 +237,2 @@ "scripts": {
		}
		}
		}

7

README.md

		@@ -96,3 +96,3 @@ <!-- TITLE/ -->
		<script type="module">
		import * as pkg from '//cdn.skypack.dev/istextorbinary@^5.15.0'
		import * as pkg from '//cdn.skypack.dev/istextorbinary@^6.0.0'
		</script>
		@@ -105,3 +105,3 @@ ```
		<script type="module">
		import * as pkg from '//unpkg.com/istextorbinary@^5.15.0'
		import * as pkg from '//unpkg.com/istextorbinary@^6.0.0'
		</script>
		@@ -114,3 +114,3 @@ ```
		<script type="module">
		import * as pkg from '//dev.jspm.io/istextorbinary@5.15.0'
		import * as pkg from '//dev.jspm.io/istextorbinary@6.0.0'
		</script>
		@@ -182,2 +182,3 @@ ```
		<li><a href="https://github.com/sibnerian">Ian Sibner</a> — <a href="https://github.com/bevry/istextorbinary/commits?author=sibnerian" title="View the GitHub contributions of Ian Sibner on repository bevry/istextorbinary">view contributions</a></li>
		<li><a href="https://github.com/sainthkh">Kukhyeon Heo</a> — <a href="https://github.com/bevry/istextorbinary/commits?author=sainthkh" title="View the GitHub contributions of Kukhyeon Heo on repository bevry/istextorbinary">view contributions</a></li>
		<li><a href="https://github.com/mikeumus">Michael Mooring</a> — <a href="https://github.com/bevry/istextorbinary/commits?author=mikeumus" title="View the GitHub contributions of Michael Mooring on repository bevry/istextorbinary">view contributions</a></li>
		@@ -184,0 +185,0 @@ <li><a href="https://github.com/robloach">Rob Loach</a> — <a href="https://github.com/bevry/istextorbinary/commits?author=robloach" title="View the GitHub contributions of Rob Loach on repository bevry/istextorbinary">view contributions</a></li>

131

source/index.ts

		@@ -12,2 +12,3 @@ /* eslint no-use-before-define:0 */
		chunkLength?: number

		/** If not provided, will check the start, beginning, and end */
		@@ -114,3 +115,16 @@ chunkBegin?: number
		// Extract
		const chunkEnd = Math.min(buffer.length, chunkBegin + chunkLength)
		chunkBegin = getChunkBegin(buffer, chunkBegin)
		if (chunkBegin === -1) {
		return binaryEncoding
		}

		const chunkEnd = getChunkEnd(
		buffer,
		Math.min(buffer.length, chunkBegin + chunkLength)
		)

		if (chunkEnd > buffer.length) {
		return binaryEncoding
		}

		const contentChunkUTF8 = buffer.toString(textEncoding, chunkBegin, chunkEnd)
		@@ -133,1 +147,116 @@
		}

		// ====================================
		// The functions below are created to handle multibyte utf8 characters.
		// To understand how the encoding works, check this article: https://en.wikipedia.org/wiki/UTF-8#Encoding
		// @todo add documentation for these

		function getChunkBegin(buf: Buffer, chunkBegin: number) {
		// If it's the beginning, just return.
		if (chunkBegin === 0) {
		return 0
		}

		if (!isLaterByteOfUtf8(buf[chunkBegin])) {
		return chunkBegin
		}

		let begin = chunkBegin - 3

		if (begin >= 0) {
		if (isFirstByteOf4ByteChar(buf[begin])) {
		return begin
		}
		}

		begin = chunkBegin - 2

		if (begin >= 0) {
		if (
		isFirstByteOf4ByteChar(buf[begin]) \|\|
		isFirstByteOf3ByteChar(buf[begin])
		) {
		return begin
		}
		}

		begin = chunkBegin - 1

		if (begin >= 0) {
		// Is it a 4-byte, 3-byte utf8 character?
		if (
		isFirstByteOf4ByteChar(buf[begin]) \|\|
		isFirstByteOf3ByteChar(buf[begin]) \|\|
		isFirstByteOf2ByteChar(buf[begin])
		) {
		return begin
		}
		}

		return -1
		}

		function getChunkEnd(buf: Buffer, chunkEnd: number) {
		// If it's the end, just return.
		if (chunkEnd === buf.length) {
		return chunkEnd
		}

		let index = chunkEnd - 3

		if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
		return chunkEnd + 1
		}
		}

		index = chunkEnd - 2

		if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
		return chunkEnd + 2
		}

		if (isFirstByteOf3ByteChar(buf[index])) {
		return chunkEnd + 1
		}
		}

		index = chunkEnd - 1

		if (index >= 0) {
		if (isFirstByteOf4ByteChar(buf[index])) {
		return chunkEnd + 3
		}

		if (isFirstByteOf3ByteChar(buf[index])) {
		return chunkEnd + 2
		}

		if (isFirstByteOf2ByteChar(buf[index])) {
		return chunkEnd + 1
		}
		}

		return chunkEnd
		}

		function isFirstByteOf4ByteChar(byte: number) {
		// eslint-disable-next-line no-bitwise
		return byte >> 3 === 30 // 11110xxx?
		}

		function isFirstByteOf3ByteChar(byte: number) {
		// eslint-disable-next-line no-bitwise
		return byte >> 4 === 14 // 1110xxxx?
		}

		function isFirstByteOf2ByteChar(byte: number) {
		// eslint-disable-next-line no-bitwise
		return byte >> 5 === 6 // 110xxxxx?
		}

		function isLaterByteOfUtf8(byte: number) {
		// eslint-disable-next-line no-bitwise
		return byte >> 6 === 2 // 10xxxxxx?
		}

compiled-types/index.d.ts.map

Sorry, the diff of this file is not supported yet

istextorbinary - npm Package Compare versions

New alerts

Improved metrics

Worsened metrics

Dependency changes