Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

graphemesplit

Package Overview
Dependencies
Maintainers
1
Versions
15
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

graphemesplit - npm Package Compare versions

Comparing version 2.0.1 to 2.0.3

benchmark.js

120

generate.js

@@ -1,9 +0,9 @@

const fs = require('fs')
const https = require('https')
const stream = require('stream')
const fs = require("fs");
const https = require("https");
const stream = require("stream");
const linesStream = require('@orisano/lines-stream')
const UnicodeTrieBuilder = require('unicode-trie/builder')
const linesStream = require("@orisano/lines-stream");
const UnicodeTrieBuilder = require("unicode-trie/builder");
const types = require('./types')
const types = require("./types");

@@ -16,63 +16,73 @@ function parseLine() {

transform(line, encoding, callback) {
const body = line.split('#')[0]
const body = line.split("#")[0];
if (body.trim().length === 0) {
callback()
return
callback();
return;
}
const [rawRange, type] = body.split(';').map(x => x.trim())
const range = rawRange.split('..').map(x => parseInt(x, 16))
const [rawRange, type] = body.split(";").map(x => x.trim());
const range = rawRange.split("..").map(x => parseInt(x, 16));
if (range.length > 1) {
this.push({start: range[0], end: range[1], type})
this.push({ start: range[0], end: range[1], type });
} else {
this.push({start: range[0], end: range[0], type})
this.push({ start: range[0], end: range[0], type });
}
callback()
},
})
callback();
}
});
}
https.get('https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt', res => {
const {statusCode} = res
if (statusCode !== 200) {
console.error(`failed to request: ${statusCode}`)
res.resume()
return
https.get(
"https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt",
res => {
const { statusCode } = res;
if (statusCode !== 200) {
console.error(`failed to request: ${statusCode}`);
res.resume();
return;
}
const trie = new UnicodeTrieBuilder(types.Other);
res
.setEncoding("utf8")
.pipe(linesStream())
.pipe(parseLine())
.on("data", ({ start, end, type }) => {
trie.setRange(start, end, types[type]);
})
.on("end", () => {
fs.writeFileSync(
"./typeTrie.json",
JSON.stringify({
data: trie.toBuffer().toString("base64")
})
);
});
}
);
const trie = new UnicodeTrieBuilder(types.Other)
res
.setEncoding('utf8')
.pipe(linesStream())
.pipe(parseLine())
.on('data', ({start, end, type}) => {
trie.setRange(start, end, types[type])
})
.on('end', () => {
fs.writeFileSync('./typeTrie.json', JSON.stringify({
data: trie.toBuffer().toString('base64'),
}))
})
})
https.get('https://unicode.org/Public/emoji/11.0/emoji-data.txt', res => {
const {statusCode} = res
https.get("https://unicode.org/Public/emoji/11.0/emoji-data.txt", res => {
const { statusCode } = res;
if (statusCode !== 200) {
console.error(`failed to request: ${statusCode}`)
res.resume()
return
console.error(`failed to request: ${statusCode}`);
res.resume();
return;
}
const trie = new UnicodeTrieBuilder()
const trie = new UnicodeTrieBuilder();
res
.setEncoding('utf8')
.pipe(linesStream())
.pipe(parseLine())
.on('data', ({start, end, type}) => {
if (type === 'Extended_Pictographic') trie.setRange(start, end, types.Extended_Pictographic)
})
.on('end', () => {
fs.writeFileSync('./extPict.json', JSON.stringify({
data: trie.toBuffer().toString('base64'),
}))
})
})
.setEncoding("utf8")
.pipe(linesStream())
.pipe(parseLine())
.on("data", ({ start, end, type }) => {
if (type === "Extended_Pictographic")
trie.setRange(start, end, types.Extended_Pictographic);
})
.on("end", () => {
fs.writeFileSync(
"./extPict.json",
JSON.stringify({
data: trie.toBuffer().toString("base64")
})
);
});
});

@@ -1,12 +0,12 @@

const types = require('./types')
const typeTrieData = require('./typeTrie').data
const extPictData = require('./extPict').data
const types = require("./types");
const typeTrieData = require("./typeTrie").data;
const extPictData = require("./extPict").data;
const UnicodeTrie = require('unicode-trie')
const UnicodeTrie = require("unicode-trie");
const typeTrie = new UnicodeTrie(Buffer.from(typeTrieData, 'base64'))
const extPict = new UnicodeTrie(Buffer.from(extPictData, 'base64'))
const typeTrie = new UnicodeTrie(Buffer.from(typeTrieData, "base64"));
const extPict = new UnicodeTrie(Buffer.from(extPictData, "base64"));
function is(type, bit) {
return (type & bit) !== 0
return (type & bit) !== 0;
}

@@ -17,9 +17,9 @@

ExtendOrZWJ: 1,
NotBoundary: 2,
}
NotBoundary: 2
};
function nextGraphemeClusterSize(ts, start) {
const L = ts.length
const L = ts.length;
let ri = 0
let ri = 0;
let gb11State = GB11State.Initial;

@@ -29,8 +29,8 @@

for (let i = start; i + 1 < L; i++) {
const curr = ts[i + 0]
const next = ts[i + 1]
const curr = ts[i + 0];
const next = ts[i + 1];
// for GB12, GB13
if (!is(curr, types.Regional_Indicator)) {
ri = 0
ri = 0;
}

@@ -43,16 +43,19 @@

if (is(curr, types.Extended_Pictographic)) {
gb11State = GB11State.ExtendOrZWJ
gb11State = GB11State.ExtendOrZWJ;
} else {
gb11State = GB11State.Initial
gb11State = GB11State.Initial;
}
break
break;
case GB11State.ExtendOrZWJ:
if (is(curr, types.Extend)) {
gb11State = GB11State.ExtendOrZWJ
} else if (is(curr, types.ZWJ) && is(next, types.Extended_Pictographic)) {
gb11State = GB11State.NotBoundary
gb11State = GB11State.ExtendOrZWJ;
} else if (
is(curr, types.ZWJ) &&
is(next, types.Extended_Pictographic)
) {
gb11State = GB11State.NotBoundary;
} else {
gb11State = GB11State.Initial
gb11State = GB11State.Initial;
}
break
break;
}

@@ -62,65 +65,81 @@

if (is(curr, types.CR) && is(next, types.LF)) {
continue
continue;
}
// GB4: (Control | CR | LF) ÷
if (is(curr, types.Control | types.CR | types.LF)) {
return (i + 1) - start
return i + 1 - start;
}
// GB5: ÷ (Control | CR | LF)
if (is(next, types.Control | types.CR | types.LF)) {
return (i + 1) - start
return i + 1 - start;
}
// GB6: L x (L | V | LV | LVT)
if (is(curr, types.L) && is(next, types.L | types.V | types.LV | types.LVT)) {
continue
if (
is(curr, types.L) &&
is(next, types.L | types.V | types.LV | types.LVT)
) {
continue;
}
// GB7: (LV | V) x (V | T)
if (is(curr, types.LV | types.V) && is(next, types.V | types.T)) {
continue
continue;
}
// GB8: (LVT | T) x T
if (is(curr, types.LVT | types.T) && is(next, types.T)) {
continue
continue;
}
// GB9: x (Extend | ZWJ)
if (is(next, types.Extend | types.ZWJ)) {
continue
continue;
}
// GB9a: x SpacingMark
if (is(next, types.SpacingMark)) {
continue
continue;
}
// GB9b: Prepend x
if (is(curr, types.Prepend)) {
continue
continue;
}
// GB11: \p{Extended_Pictographic} Extend* ZWJ x \p{Extended_Pictographic}
if (gb11State === GB11State.NotBoundary) {
continue
continue;
}
// GB12: sot (RI RI)* RI x RI
// GB13: [^RI] (RI RI)* RI x RI
if (is(curr, types.Regional_Indicator) && is(next, types.Regional_Indicator) && ri % 2 === 0) {
ri++
continue
if (
is(curr, types.Regional_Indicator) &&
is(next, types.Regional_Indicator) &&
ri % 2 === 0
) {
ri++;
continue;
}
// GB999: Any ÷ Any
return (i + 1) - start
return i + 1 - start;
}
// GB2: Any ÷ eot
return L - start
return L - start;
}
module.exports = function split(str) {
const graphemeClusters = []
const graphemeClusters = [];
const codePoints = [...str].map(x => x.codePointAt(0))
const ts = codePoints.map(c => typeTrie.get(c) | extPict.get(c))
for (let offset = 0; offset < codePoints.length;) {
const size = nextGraphemeClusterSize(ts, offset)
const graphemeCluster = codePoints.slice(offset, offset + size)
graphemeClusters.push(String.fromCodePoint(...graphemeCluster))
offset += size
const map = [0];
const ts = [];
for (let i = 0; i < str.length; ) {
const code = str.codePointAt(i);
ts.push(typeTrie.get(code) | extPict.get(code));
i += code > 65535 ? 2 : 1;
map.push(i);
}
return graphemeClusters
}
for (let offset = 0; offset < ts.length; ) {
const size = nextGraphemeClusterSize(ts, offset);
const start = map[offset];
const end = map[offset + size];
graphemeClusters.push(str.slice(start, end));
offset += size;
}
return graphemeClusters;
};
{
"name": "graphemesplit",
"version": "2.0.1",
"version": "2.0.3",
"main": "index.js",

@@ -8,4 +8,8 @@ "author": "Nao YONASHIRO",

"repository": "github:orisano/graphemesplit",
"scripts": {
"fmt": "prettier --write **/*.js"
},
"devDependencies": {
"@orisano/lines-stream": "^0.1.2"
"@orisano/lines-stream": "^0.1.2",
"prettier": "^1.16.4"
},

@@ -12,0 +16,0 @@ "dependencies": {

@@ -1,44 +0,54 @@

const assert = require('assert')
const https = require('https')
const stream = require('stream')
const assert = require("assert");
const https = require("https");
const stream = require("stream");
const linesStream = require('@orisano/lines-stream')
const linesStream = require("@orisano/lines-stream");
const graphemesplit = require('./')
const graphemesplit = require("./");
https.get('https://www.unicode.org/Public/UNIDATA/auxiliary/GraphemeBreakTest.txt', res => {
const {statusCode} = res
if (statusCode !== 200) {
console.error(`failed to http request: ${statusCode}`)
res.resume()
return
}
res
.pipe(linesStream())
.pipe(new stream.Transform({
decodeStrings: false,
readableObjectMode: true,
https.get(
"https://www.unicode.org/Public/UNIDATA/auxiliary/GraphemeBreakTest.txt",
res => {
const { statusCode } = res;
if (statusCode !== 200) {
console.error(`failed to http request: ${statusCode}`);
res.resume();
return;
}
res
.pipe(linesStream())
.pipe(
new stream.Transform({
decodeStrings: false,
readableObjectMode: true,
transform(line, encoding, callback) {
callback()
transform(line, encoding, callback) {
callback();
if (line.trim().length === 0) return
const [body, description] = line.split('#')
const test = body.trim()
if (test.length === 0) return
const graphemeClusters = test
.split('÷')
.filter(x => x.length > 0)
.map(x => {
const codePoints = x.split('×')
.map(y => parseInt(y.trim(), 16))
return String.fromCodePoint(...codePoints)
})
this.push({expected: graphemeClusters, description})
}
}))
.on('data', ({expected, description}) => {
const got = graphemesplit(expected.join(''))
assert.deepStrictEqual(got, expected, `unexpected grapheme clusters. expected: ${expected}, but got: ${got} # ${description}`)
})
})
if (line.trim().length === 0) return;
const [body, description] = line.split("#");
const test = body.trim();
if (test.length === 0) return;
const graphemeClusters = test
.split("÷")
.filter(x => x.length > 0)
.map(x => {
const codePoints = x
.split("×")
.map(y => parseInt(y.trim(), 16));
return String.fromCodePoint(...codePoints);
});
this.push({ expected: graphemeClusters, description });
}
})
)
.on("data", ({ expected, description }) => {
const got = graphemesplit(expected.join(""));
assert.deepStrictEqual(
got,
expected,
`unexpected grapheme clusters. expected: ${expected}, but got: ${got} # ${description}`
);
});
}
);

@@ -16,3 +16,3 @@ module.exports = {

LVT: 1 << 12,
Extended_Pictographic: 1 << 13,
}
Extended_Pictographic: 1 << 13
};
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc