utfstring - npm Package Compare versions

Comparing version 2.0.0 to 2.0.1

CHANGELOG.md

		@@ -0,12 +1,16 @@
		# 2.0.1
		* Fix bug causing utfstring to incorrectly calculate logical character indices.
		- For example, `utfstring.charCodeAt("\u0001\u1F1E6", 1)` returned 56806 when it should have returned 127462.

		# 2.0.0
		- Abstract grapheme cluster identification in order to separate visual graphemes from individual code points.
		* Abstract grapheme cluster identification in order to separate visual graphemes from individual code points.
		- The classic example if this is regional indicators, which are separate code points but combined by display systems into one visible character. Automatically treating them as a single character can be confusing when using utfstring in other Unicode-aware libraries. Since a number of other programming languages (eg. Ruby, Elixir) don't combine regional indicators when determining length, substrings, etc, I've decided to move regional indicator combination support from the existing utfstring functions to a separate implementation available in `UtfString.visual`, which supports regional indicators but otherwise behaves identically.

		# 1.3.1
		- Fix bug causing incorrect character index calculations for strings containing newlines.
		* Fix bug causing incorrect character index calculations for strings containing newlines.

		# 1.3.0
		- Added `findByteIndex` and `findCharIndex` functions for converting between JavaScript string indices and UTF character boundaries.
		* Added `findByteIndex` and `findCharIndex` functions for converting between JavaScript string indices and UTF character boundaries.

		# 1.2.0
		- Changed module behavior such that `var UtfString = require('utfstring')` works instead of having to do `var UtfString = require('utfstring/utfstring.js').UtfString`.
		* Changed module behavior such that `var UtfString = require('utfstring')` works instead of having to do `var UtfString = require('utfstring/utfstring.js').UtfString`.

package.json

		{
		"name": "utfstring",
		"version": "2.0.0",
		"version": "2.0.1",
		"description": "UTF-safe string operations",
		@@ -5,0 +5,0 @@ "repository": {

spec/char_code_at_spec.js

		@@ -10,2 +10,3 @@ var UtfString = require('../utfstring.js');
		expect(UtfString.charCodeAt(str, 2)).toEqual(99);
		expect(UtfString.charCodeAt(str, 3)).toBeNaN();
		});
		@@ -20,6 +21,7 @@
		expect(UtfString.charCodeAt(str, 4)).toEqual(12358); // う "u"
		expect(UtfString.charCodeAt(str, 5)).toBeNaN();
		});

		it('works with astral plane unicode characters', function() {
		str = '𤔣';
		var str = "\u{24523}";
		expect(UtfString.charCodeAt(str, 0)).toEqual(148771);
		@@ -29,2 +31,10 @@ expect(UtfString.charCodeAt(str, 1)).toBeNaN();

		it('works with mixed astral and non-astral plane characters', function() {
		var str = "\u0001\u{1F1E6}\u0002";
		expect(UtfString.charCodeAt(str, 0)).toEqual(1);
		expect(UtfString.charCodeAt(str, 1)).toEqual(127462);
		expect(UtfString.charCodeAt(str, 2)).toEqual(2);
		expect(UtfString.charCodeAt(str, 3)).toBeNaN();
		});

		it('works with regional indicators', function() {
		@@ -31,0 +41,0 @@ var str = '🇫🇷';

spec/string_to_code_points_spec.js

		@@ -24,2 +24,9 @@ var UtfString = require('../utfstring.js');

		it('works with mixed astral and non-astral plane characters', function() {
		var str = "\u0001\u{1F1E6}\u0002";
		expect(UtfString.stringToCodePoints(str)).toEqual(
		[1, 127462, 2]
		);
		});

		it('works with regional indicators', function() {
		@@ -26,0 +33,0 @@ var str = '🇫🇷';

utfstring.js

		@@ -258,21 +258,38 @@ (function() {
		var byteIndex = 0;
		var charCount = 0;
		var curCharIndex = 0;

		do {
		while (true) {
		var match = scanner.exec(string);
		var nextIdx;

		if (match === null) {
		break;
		if (match) {
		nextIdx = match.index;
		} else {
		nextIdx = string.length;
		}

		if (charCount < charIndex) {
		byteIndex += match[0].length;
		charCount ++;
		} else {
		while (curCharIndex < charIndex) {
		if (byteIndex == nextIdx) {
		if (curCharIndex < charIndex) {
		curCharIndex ++;

		if (match) {
		byteIndex += match[0].length;
		} else {
		byteIndex ++;
		}
		}

		break;
		}

		byteIndex ++;
		curCharIndex ++;
		}

		if (curCharIndex == charIndex) {
		break;
		} else if (byteIndex >= string.length \|\| !match) {
		return -1;
		}
		} while (match !== null);

		if (byteIndex >= string.length) {
		return -1;
		}
		@@ -279,0 +296,0 @@

tester.js

utfstring - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics