Comparing version 1.2.0 to 1.3.0
@@ -0,2 +1,5 @@ | ||
# 1.3.0 | ||
- Added `findByteIndex` and `findCharIndex` functions for converting between JavaScript string indices and UTF character boundaries. | ||
# 1.2.0 | ||
- Changed module behavior such that `var UtfString = require('utfstring')` works instead of having to do `var UtfString = require('utfstring/utfstring.js').UtfString`. |
{ | ||
"name": "utfstring", | ||
"version": "1.2.0", | ||
"version": "1.3.0", | ||
"description": "UTF-safe string operations", | ||
@@ -5,0 +5,0 @@ "repository": { |
@@ -78,2 +78,6 @@ utfstring | ||
* `findByteIndex(String str, Integer charIndex)` - Finds the byte index for the given character index. Note: a "byte index" is really a "JavaScript string index", not a true byte offset. Use this function to convert a UTF character boundary to a JavaScript string index. | ||
* `findCharIndex(String str, Integer byteIndex)` - Finds the character index for the given byte index. Note: a "byte index" is really a "JavaSciprt string index", not a true byte offset. Use this function to convert a JavaScript string index to (the closest) UTF character boundary. | ||
## Running Tests | ||
@@ -80,0 +84,0 @@ |
103
utfstring.js
@@ -14,4 +14,37 @@ (function() { | ||
UtfString.findCharIndex = function(string, byteIndex) { | ||
if (byteIndex >= string.length) { | ||
return -1; | ||
} | ||
// optimization: don't iterate unless necessary | ||
if (!containsUnsupportedCharacters(string)) { | ||
return byteIndex; | ||
} | ||
var regStr = unsupportedPairs.source + '|.'; | ||
var scanner = new RegExp(regStr, 'g'); | ||
var charCount = 0; | ||
while (scanner.exec(string) !== null) { | ||
if (scanner.lastIndex > byteIndex) { | ||
break; | ||
} | ||
charCount ++; | ||
} | ||
return charCount; | ||
}; | ||
UtfString.findByteIndex = function(string, charIndex) { | ||
if (charIndex >= this.length(string)) { | ||
return -1; | ||
} | ||
return scan(string, createScanner(), charIndex); | ||
}; | ||
UtfString.charAt = function(string, index) { | ||
var byteIndex = findCharacterByteIndex(string, index); | ||
var byteIndex = this.findByteIndex(string, index); | ||
@@ -30,3 +63,3 @@ if ((byteIndex < 0) || (byteIndex >= string.length)) { | ||
} | ||
} | ||
}; | ||
@@ -49,3 +82,3 @@ UtfString.charCodeAt = function(string, index) { | ||
return code; | ||
} | ||
}; | ||
@@ -62,3 +95,3 @@ UtfString.fromCharCode = function(charCode) { | ||
} | ||
} | ||
}; | ||
@@ -70,3 +103,3 @@ UtfString.indexOf = function(string, searchValue, start) { | ||
var startByteIndex = findCharacterByteIndex(string, start); | ||
var startByteIndex = this.findByteIndex(string, start); | ||
var index = string.indexOf(searchValue, startByteIndex); | ||
@@ -77,5 +110,5 @@ | ||
} else { | ||
return findCharIndex(string, index); | ||
return this.findCharIndex(string, index); | ||
} | ||
} | ||
}; | ||
@@ -88,3 +121,3 @@ UtfString.lastIndexOf = function(string, searchValue, start) { | ||
} else { | ||
var startByteIndex = findCharacterByteIndex(string, start); | ||
var startByteIndex = this.findByteIndex(string, start); | ||
index = string.lastIndexOf(searchValue, startByteIndex); | ||
@@ -96,8 +129,8 @@ } | ||
} else { | ||
return findCharIndex(string, index); | ||
return this.findCharIndex(string, index); | ||
} | ||
} | ||
}; | ||
UtfString.slice = function(string, start, finish) { | ||
var startByteIndex = findCharacterByteIndex(string, start); | ||
var startByteIndex = this.findByteIndex(string, start); | ||
var finishByteIndex; | ||
@@ -112,3 +145,3 @@ | ||
} else { | ||
finishByteIndex = findCharacterByteIndex(string, finish); | ||
finishByteIndex = this.findByteIndex(string, finish); | ||
@@ -121,3 +154,3 @@ if (finishByteIndex < 0) { | ||
return string.slice(startByteIndex, finishByteIndex); | ||
} | ||
}; | ||
@@ -134,10 +167,11 @@ UtfString.substr = function(string, start, length) { | ||
} | ||
} | ||
}; | ||
// they do the same thing | ||
UtfString.substring = UtfString.slice | ||
UtfString.substring = UtfString.slice; | ||
UtfString.length = function(string) { | ||
return findCharIndex(string, string.length); | ||
} | ||
// findCharIndex will return -1 if string is empty, so add 1 | ||
return this.findCharIndex(string, string.length - 1) + 1; | ||
}; | ||
@@ -158,3 +192,3 @@ UtfString.stringToCodePoints = function(string) { | ||
return result; | ||
} | ||
}; | ||
@@ -169,3 +203,3 @@ UtfString.codePointsToString = function(arr) { | ||
return chars.join(''); | ||
} | ||
}; | ||
@@ -194,3 +228,3 @@ UtfString.stringToBytes = function(string) { | ||
return result; | ||
} | ||
}; | ||
@@ -208,3 +242,3 @@ UtfString.bytesToString = function(arr) { | ||
return result.join(''); | ||
} | ||
}; | ||
@@ -227,29 +261,4 @@ UtfString.stringToCharArray = function(string) { | ||
return result; | ||
} | ||
}; | ||
function findCharIndex(string, byteIndex) { | ||
// optimization: don't iterate unless necessary | ||
if (!containsUnsupportedCharacters(string)) { | ||
return byteIndex; | ||
} | ||
var regStr = unsupportedPairs.source + '|.'; | ||
var scanner = new RegExp(regStr, 'g'); | ||
var charCount = 0; | ||
while (scanner.exec(string) !== null) { | ||
if (scanner.lastIndex > byteIndex) { | ||
break; | ||
} | ||
charCount ++; | ||
} | ||
return charCount; | ||
} | ||
function findCharacterByteIndex(string, charIndex) { | ||
return scan(string, createScanner(), charIndex); | ||
} | ||
function findSurrogateByteIndex(string, charIndex) { | ||
@@ -256,0 +265,0 @@ return scan(string, new RegExp(surrogatePairs.source, 'g'), charIndex); |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
96
43045
20
872