Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

utfstring

Package Overview
Dependencies
Maintainers
1
Versions
15
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

utfstring - npm Package Compare versions

Comparing version 1.2.0 to 1.3.0

spec/find_byte_index_spec.js

3

CHANGELOG.md

@@ -0,2 +1,5 @@

# 1.3.0
- Added `findByteIndex` and `findCharIndex` functions for converting between JavaScript string indices and UTF character boundaries.
# 1.2.0
- Changed module behavior such that `var UtfString = require('utfstring')` works instead of having to do `var UtfString = require('utfstring/utfstring.js').UtfString`.

2

package.json
{
"name": "utfstring",
"version": "1.2.0",
"version": "1.3.0",
"description": "UTF-safe string operations",

@@ -5,0 +5,0 @@ "repository": {

@@ -78,2 +78,6 @@ utfstring

* `findByteIndex(String str, Integer charIndex)` - Finds the byte index for the given character index. Note: a "byte index" is really a "JavaScript string index", not a true byte offset. Use this function to convert a UTF character boundary to a JavaScript string index.
* `findCharIndex(String str, Integer byteIndex)` - Finds the character index for the given byte index. Note: a "byte index" is really a "JavaSciprt string index", not a true byte offset. Use this function to convert a JavaScript string index to (the closest) UTF character boundary.
## Running Tests

@@ -80,0 +84,0 @@

@@ -14,4 +14,37 @@ (function() {

UtfString.findCharIndex = function(string, byteIndex) {
if (byteIndex >= string.length) {
return -1;
}
// optimization: don't iterate unless necessary
if (!containsUnsupportedCharacters(string)) {
return byteIndex;
}
var regStr = unsupportedPairs.source + '|.';
var scanner = new RegExp(regStr, 'g');
var charCount = 0;
while (scanner.exec(string) !== null) {
if (scanner.lastIndex > byteIndex) {
break;
}
charCount ++;
}
return charCount;
};
UtfString.findByteIndex = function(string, charIndex) {
if (charIndex >= this.length(string)) {
return -1;
}
return scan(string, createScanner(), charIndex);
};
UtfString.charAt = function(string, index) {
var byteIndex = findCharacterByteIndex(string, index);
var byteIndex = this.findByteIndex(string, index);

@@ -30,3 +63,3 @@ if ((byteIndex < 0) || (byteIndex >= string.length)) {

}
}
};

@@ -49,3 +82,3 @@ UtfString.charCodeAt = function(string, index) {

return code;
}
};

@@ -62,3 +95,3 @@ UtfString.fromCharCode = function(charCode) {

}
}
};

@@ -70,3 +103,3 @@ UtfString.indexOf = function(string, searchValue, start) {

var startByteIndex = findCharacterByteIndex(string, start);
var startByteIndex = this.findByteIndex(string, start);
var index = string.indexOf(searchValue, startByteIndex);

@@ -77,5 +110,5 @@

} else {
return findCharIndex(string, index);
return this.findCharIndex(string, index);
}
}
};

@@ -88,3 +121,3 @@ UtfString.lastIndexOf = function(string, searchValue, start) {

} else {
var startByteIndex = findCharacterByteIndex(string, start);
var startByteIndex = this.findByteIndex(string, start);
index = string.lastIndexOf(searchValue, startByteIndex);

@@ -96,8 +129,8 @@ }

} else {
return findCharIndex(string, index);
return this.findCharIndex(string, index);
}
}
};
UtfString.slice = function(string, start, finish) {
var startByteIndex = findCharacterByteIndex(string, start);
var startByteIndex = this.findByteIndex(string, start);
var finishByteIndex;

@@ -112,3 +145,3 @@

} else {
finishByteIndex = findCharacterByteIndex(string, finish);
finishByteIndex = this.findByteIndex(string, finish);

@@ -121,3 +154,3 @@ if (finishByteIndex < 0) {

return string.slice(startByteIndex, finishByteIndex);
}
};

@@ -134,10 +167,11 @@ UtfString.substr = function(string, start, length) {

}
}
};
// they do the same thing
UtfString.substring = UtfString.slice
UtfString.substring = UtfString.slice;
UtfString.length = function(string) {
return findCharIndex(string, string.length);
}
// findCharIndex will return -1 if string is empty, so add 1
return this.findCharIndex(string, string.length - 1) + 1;
};

@@ -158,3 +192,3 @@ UtfString.stringToCodePoints = function(string) {

return result;
}
};

@@ -169,3 +203,3 @@ UtfString.codePointsToString = function(arr) {

return chars.join('');
}
};

@@ -194,3 +228,3 @@ UtfString.stringToBytes = function(string) {

return result;
}
};

@@ -208,3 +242,3 @@ UtfString.bytesToString = function(arr) {

return result.join('');
}
};

@@ -227,29 +261,4 @@ UtfString.stringToCharArray = function(string) {

return result;
}
};
function findCharIndex(string, byteIndex) {
// optimization: don't iterate unless necessary
if (!containsUnsupportedCharacters(string)) {
return byteIndex;
}
var regStr = unsupportedPairs.source + '|.';
var scanner = new RegExp(regStr, 'g');
var charCount = 0;
while (scanner.exec(string) !== null) {
if (scanner.lastIndex > byteIndex) {
break;
}
charCount ++;
}
return charCount;
}
function findCharacterByteIndex(string, charIndex) {
return scan(string, createScanner(), charIndex);
}
function findSurrogateByteIndex(string, charIndex) {

@@ -256,0 +265,0 @@ return scan(string, new RegExp(surrogatePairs.source, 'g'), charIndex);

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc