Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

cldr-segmentation

Package Overview
Dependencies
Maintainers
1
Versions
11
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

cldr-segmentation - npm Package Compare versions

Comparing version 2.0.3 to 2.1.0

.tool-versions

5

CHANGELOG.md

@@ -0,1 +1,6 @@

## 2.1.0
* Add ability to copy and merge suppression objects.
- Enables adding custom suppression strings.
* Add custom English suppression for "Dr.".
## 2.0.3

@@ -2,0 +7,0 @@ * Somehow the require of 'utfstring' was changed to 'UtfString', which worked on Mac OS's case-insensitive filesystem but errored in Linux environments.

1

Gruntfile.js

@@ -38,2 +38,3 @@ let fs = require('fs');

'src/suppressions.js',
'src/customSuppressions.js',
...suppressions,

@@ -40,0 +41,0 @@ 'src/suppressions/all.js'

4

package.json
{
"name": "cldr-segmentation",
"version": "2.0.3",
"version": "2.1.0",
"description": "CLDR text segmentation for JavaScript",

@@ -36,3 +36,3 @@ "main": "dist/cldr-segmentation.js",

"cldrSegmentation": "./",
"grunt": "~1.0",
"grunt": "~1.3",
"grunt-babel": "~7.0",

@@ -39,0 +39,0 @@ "grunt-contrib-concat": "~1.0",

@@ -0,1 +1,3 @@

[![Build Status](https://travis-ci.org/camertron/cldr-segmentation.js.svg?branch=master)](https://travis-ci.org/camertron/cldr-segmentation.js)
cldr-segmentation

@@ -80,5 +82,15 @@ ===

Suppressions are just objects with a single `shouldBreak` function that returns a boolean. The function is passed a cursor object positioned at the index of the proposed break. Cursors deal exclusively with Unicode codepoints, meaning your custom suppression logic will need to be implemented in those terms. For example, let's create a custom suppression function that doesn't allow breaks after sentences that end with the letter 't'.
Suppressions are just strings after which a break should not occur. This library comes with a set of common suppressions for a variety of languages, but you may want to add your own. Suppression objects can be merged. For example, here's how to add "Dr." to the set of English suppressions:
```javascript
var customSupps = cldrSegmentation.Suppressions.create(['Dr.']);
var supps = cldrSegmentation.suppressions.en.merge(customSupps);
cldrSegmentation.sentenceSplit("We love Dr. Strange. He's cool.", supps);
```
## Custom Suppression Objects
Suppression objects are just plain 'ol Javascript objects with a single `shouldBreak` function that returns a boolean. The function is passed a cursor object positioned at the index of the proposed break. Cursors deal exclusively with Unicode codepoints, meaning your custom suppression logic will need to be implemented in those terms. For example, let's create a custom suppression function that doesn't allow breaks after sentences that end with the letter 't'.
```javascript
class TeeSuppression {

@@ -85,0 +97,0 @@ shouldBreak(cursor) {

@@ -86,4 +86,4 @@ ( () => {

describe('without ULI exceptions', () => {
it('splits on certain abbreviations like Mr. and Mrs. (use ULI rules to avoid this behavior)', () => {
describe('without suppressions', () => {
it('splits on certain abbreviations like Mr. and Mrs. (use suppressions to avoid this behavior)', () => {
let str = "I really like Mrs. Patterson. She's nice.";

@@ -90,0 +90,0 @@

@@ -10,3 +10,3 @@ ( () => {

let cldrSegmentation = require('cldr-segmentation');
let utfstring = require('UtfString');
let utfstring = require('utfstring');
let fs = require('fs');

@@ -13,0 +13,0 @@

@@ -21,3 +21,3 @@ ( () => {

it('splits correctly using uli exceptions', () => {
it('splits correctly using suppressions', () => {
let str = "I like Mrs. Murphy. She's nice.";

@@ -29,3 +29,10 @@ let result = cldrSegmentation.sentenceSplit(str, englishSuppressions);

it('splits correctly when an uli exception occurs just before a potential break', () => {
it('splits correctly using custom suppressions', () => {
let str = "I like Dr. Murphy. She's nice.";
let result = cldrSegmentation.sentenceSplit(str, englishSuppressions);
expect(result).toEqual(["I like Dr. Murphy. ", "She's nice."]);
});
it('splits correctly when a suppression occurs just before a potential break', () => {
let str = 'Hi, my name is Philipp. Just because I can.';

@@ -37,3 +44,3 @@ let result = cldrSegmentation.sentenceSplit(str, englishSuppressions);

it('splits correctly when a German uli exception occurs just before a potential break', () => {
it('splits correctly when a German suppression occurs just before a potential break', () => {
let str = "Dies ist ein test Satz. Und hier ein Zweiter.";

@@ -40,0 +47,0 @@ let result = cldrSegmentation.sentenceSplit(str, germanSuppressions);

@@ -28,2 +28,10 @@ export const suppressions = {};

merge(otherSupp) {
return new Suppressions(
this.forwardTrie.merge(otherSupp.forwardTrie),
this.backwardTrie.merge(otherSupp.backwardTrie),
[...this.list, ...otherSupp.list]
)
}
shouldBreak(cursor) {

@@ -30,0 +38,0 @@ var idx = cursor.logicalPosition;

@@ -222,4 +222,8 @@ suppressions['de'] = ( () => {

if (customSuppressions['de']) {
supp = supp.merge(customSuppressions['de']);
}
supp.lock();
return supp;
})();

@@ -131,4 +131,8 @@ suppressions['en'] = ( () => {

if (customSuppressions['en']) {
supp = supp.merge(customSuppressions['en']);
}
supp.lock();
return supp;
})();

@@ -165,4 +165,8 @@ suppressions['es'] = ( () => {

if (customSuppressions['es']) {
supp = supp.merge(customSuppressions['es']);
}
supp.lock();
return supp;
})();

@@ -83,4 +83,8 @@ suppressions['fr'] = ( () => {

if (customSuppressions['fr']) {
supp = supp.merge(customSuppressions['fr']);
}
supp.lock();
return supp;
})();

@@ -48,4 +48,8 @@ suppressions['it'] = ( () => {

if (customSuppressions['it']) {
supp = supp.merge(customSuppressions['it']);
}
supp.lock();
return supp;
})();

@@ -174,4 +174,8 @@ suppressions['pt'] = ( () => {

if (customSuppressions['pt']) {
supp = supp.merge(customSuppressions['pt']);
}
supp.lock();
return supp;
})();

@@ -23,4 +23,8 @@ suppressions['ru'] = ( () => {

if (customSuppressions['ru']) {
supp = supp.merge(customSuppressions['ru']);
}
supp.lock();
return supp;
})();

@@ -14,2 +14,26 @@ class Node {

}
copy() {
let childrenCopy = {};
for (const key in this.children) {
childrenCopy[key] = this.children[key].copy();
}
return new Node(this.value, childrenCopy);
}
forEach(callback) {
this._forEach(callback, []);
}
_forEach(callback, path) {
if (this.value) {
callback(path, this.value);
}
for (const key in this.children) {
this.children[key]._forEach(callback, [...path, key]);
}
}
}

@@ -48,2 +72,20 @@

}
copy() {
return new Trie(this.root.copy());
}
forEach(callback) {
this.root.forEach(callback);
}
merge(otherTrie) {
let result = this.copy();
otherTrie.forEach((key, value) => {
result.add(key, value);
});
return result;
}
}

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc