cldr-segmentation
Advanced tools
Comparing version 2.0.1 to 2.0.2
@@ -1,5 +0,9 @@ | ||
### 2.0.1 | ||
## 2.0.2 | ||
* Ensure suppressions are preceded by a space, i.e. are whole words. | ||
- Fixes bug where "Phillip. Is my name." would contain no breaks because 'pp.' is an English suppression. | ||
## 2.0.1 | ||
* Fix dependency issue causing problems installing via npm. | ||
### 2.0.0 | ||
## 2.0.0 | ||
* Massive performance improvements. | ||
@@ -10,3 +14,3 @@ - Switched from regex-based approach to state machine engine borrowed from ICU4J. | ||
### 1.0.0 | ||
## 1.0.0 | ||
* Birthday! |
{ | ||
"name": "cldr-segmentation", | ||
"version": "2.0.1", | ||
"version": "2.0.2", | ||
"description": "CLDR text segmentation for JavaScript", | ||
@@ -5,0 +5,0 @@ "main": "dist/cldr-segmentation.js", |
@@ -9,4 +9,5 @@ ( () => { | ||
let cldrSegmentation = require('cldr-segmentation'); | ||
let englishSuppressions = cldrSegmentation.suppressions.en; | ||
const cldrSegmentation = require('cldr-segmentation'); | ||
const englishSuppressions = cldrSegmentation.suppressions.en; | ||
const germanSuppressions = cldrSegmentation.suppressions.de; | ||
@@ -27,3 +28,17 @@ describe('#sentenceSplit', () => { | ||
}); | ||
it('splits correctly when an uli exception occurs just before a potential break', () => { | ||
let str = 'Hi, my name is Philipp. Just because I can.'; | ||
let result = cldrSegmentation.sentenceSplit(str, englishSuppressions); | ||
expect(result).toEqual(['Hi, my name is Philipp. ', 'Just because I can.']); | ||
}); | ||
it('splits correctly when a German uli exception occurs just before a potential break', () => { | ||
let str = "Dies ist ein test Satz. Und hier ein Zweiter."; | ||
let result = cldrSegmentation.sentenceSplit(str, germanSuppressions); | ||
expect(result).toEqual(["Dies ist ein test Satz. ", "Und hier ein Zweiter."]); | ||
}); | ||
}); | ||
})(); |
@@ -61,2 +61,6 @@ export const suppressions = {}; | ||
if (idx != 0 && cursor.getCodePoint(idx - 1) != 32) { | ||
return true; | ||
} | ||
if (!found) { | ||
@@ -63,0 +67,0 @@ return true; |
@@ -1,24 +0,8 @@ | ||
var fs = require('fs'); | ||
// var hp = fs.readFileSync('/Users/cameron/Desktop/Harry Potter and the Sorcerer.txt', {encoding: 'utf-8'}); | ||
var cldrSegmentation = require('./dist/cldr-segmentation.js'); | ||
// var str = "I live in the U.S.A. Where do you live?"; | ||
var str = "Dies ist ein test Satz. Und hier ein Zweiter."; | ||
// var bi = new cldrSegmentation.BreakIterator(cldrSegmentation.suppressions.en); | ||
// bi.eachSentence(hp, function(str, _, _) { | ||
// console.log(str); | ||
// }); | ||
// for (var i = 0; i < str.length; i ++) { | ||
// console.log(str.charCodeAt(i) + ": " + str[i]); | ||
// } | ||
var str = "I really like Mrs. Patterson. She's nice."; | ||
var iterations = 0; | ||
var elapsedTime = 0; | ||
while (elapsedTime < 5000) { | ||
var start = Date.now(); | ||
cldrSegmentation.sentenceSplit(str); | ||
iterations ++; | ||
elapsedTime += Date.now() - start; | ||
} | ||
console.log((iterations / (elapsedTime / 1000)).toString() + " i/s"); | ||
console.log(cldrSegmentation.sentenceSplit(str, cldrSegmentation.suppressions.de)); |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
1856253
2
16076