Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

@ephox/polaris

Package Overview
Dependencies
Maintainers
2
Versions
74
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@ephox/polaris - npm Package Compare versions

Comparing version 6.0.9 to 6.1.0-alpha.0

3

CHANGELOG.md

@@ -9,2 +9,5 @@ # Changelog

### Added
- New `Words.getWordsWithIndices` API to extract words and their start and end indices when provided an array of characters. #TINY-9654
## 6.0.2 - 2022-06-29

@@ -11,0 +14,0 @@

8

lib/main/ts/ephox/polaris/api/Words.d.ts

@@ -1,5 +0,7 @@

import * as WordOptions from '../words/Words';
declare type GetWordsApi = <T>(chars: T[], extract: (char: T) => string, options?: WordOptions.WordOptions) => T[][];
import * as Words from '../words/Words';
declare type GetWordsApi = <T>(chars: T[], extract: (char: T) => string, options?: Words.WordOptions) => Words.Word<T>[];
declare const getWords: GetWordsApi;
export { getWords };
declare type GetWordsAndIndicesApi = <T>(chars: T[], extract: (char: T) => string, options?: Words.WordOptions) => Words.WordsWithIndices<T>;
declare const getWordsWithIndices: GetWordsAndIndicesApi;
export { getWords, getWordsWithIndices };
//# sourceMappingURL=Words.d.ts.map

@@ -1,4 +0,5 @@

import * as WordOptions from '../words/Words';
const getWords = WordOptions.getWords;
export { getWords };
import * as Words from '../words/Words';
const getWords = Words.getWords;
const getWordsWithIndices = Words.getWordsWithIndices;
export { getWords, getWordsWithIndices };
//# sourceMappingURL=Words.js.map

@@ -1,2 +0,2 @@

declare const punctuationStr = "[!-#%-*,-\\/:;?@\\[-\\]_{}\u00A1\u00AB\u00B7\u00BB\u00BF;\u00B7\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1361-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u3008\u3009\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30\u2E31\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]";
declare const punctuationStr = "[~^\u2116|!-*+-\\/:;?@\\[-\\]_{}\u00A1\u00AB\u00B7\u00BB\u00BF;\u00B7\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1361-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u3008\u3009\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30\u2E31\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]";
declare const characterIndices: {

@@ -3,0 +3,0 @@ ALETTER: number;

/* eslint-disable max-len */
const punctuationStr = '[!-#%-*,-\\/:;?@\\[-\\]_{}\u00A1\u00AB\u00B7\u00BB\u00BF;\u00B7\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1361-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u3008\u3009\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30\u2E31\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]';
const punctuationStr = `[~^№|!-*+-\\/:;?@\\[-\\]_{}\u00A1\u00AB\u00B7\u00BB\u00BF;\u00B7\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1361-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u3008\u3009\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30\u2E31\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]`;
const regExps = {

@@ -4,0 +4,0 @@ aletter: '[A-Za-z\u00AA\u00B5\u00BA\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02C1\u02C6-\u02D1\u02E0-\u02E4\u02EC\u02EE\u0370-\u0374\u0376\u0377\u037A-\u037D\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03F5\u03F7-\u0481\u048A-\u0527\u0531-\u0556\u0559\u0561-\u0587\u05D0-\u05EA\u05F0-\u05F3\u0620-\u064A\u066E\u066F\u0671-\u06D3\u06D5\u06E5\u06E6\u06EE\u06EF\u06FA-\u06FC\u06FF\u0710\u0712-\u072F\u074D-\u07A5\u07B1\u07CA-\u07EA\u07F4\u07F5\u07FA\u0800-\u0815\u081A\u0824\u0828\u0840-\u0858\u0904-\u0939\u093D\u0950\u0958-\u0961\u0971-\u0977\u0979-\u097F\u0985-\u098C\u098F\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BD\u09CE\u09DC\u09DD\u09DF-\u09E1\u09F0\u09F1\u0A05-\u0A0A\u0A0F\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32\u0A33\u0A35\u0A36\u0A38\u0A39\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2\u0AB3\u0AB5-\u0AB9\u0ABD\u0AD0\u0AE0\u0AE1\u0B05-\u0B0C\u0B0F\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32\u0B33\u0B35-\u0B39\u0B3D\u0B5C\u0B5D\u0B5F-\u0B61\u0B71\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99\u0B9A\u0B9C\u0B9E\u0B9F\u0BA3\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB9\u0BD0\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3D\u0C58\u0C59\u0C60\u0C61\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBD\u0CDE\u0CE0\u0CE1\u0CF1\u0CF2\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D3A\u0D3D\u0D4E\u0D60\u0D61\u0D7A-\u0D7F\u0D85-\u0D96\u0D9A-\u0DB1\u0DB3-\u0DBB\u0DBD\u0DC0-\u0DC6\u0F00\u0F40-\u0F47\u0F49-\u0F6C\u0F88-\u0F8C\u10A0-\u10C5\u10D0-\u10FA\u10FC\u1100-\u1248\u124A-\u124D\u1250-\u1256\u1258\u125A-\u125D\u1260-\u1288\u128A-\u128D\u1290-\u12B0\u12B2-\u12B5\u12B8-\u12BE\u12C0\u12C2-\u12C5\u12C8-\u12D6\u12D8-\u1310\u1312-\u1315\u1318-\u135A\u1380-\u138F\u13A0-\u13F4\u1401-\u166C\u166F-\u167F\u1681-\u169A\u16A0-\u16EA\u16EE-\u16F0\u1700-\u170C\u170E-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176C\u176E-\u1770\u1820-\u1877\u1880-\u18A8\u18AA\u18B0-\u18F5\u1900-\u191C\u1A00-\u1A16\u1B05-\u1B33\u1B45-\u1B4B\u1B83-\u1BA0\u1BAE\u1BAF\u1BC0-\u1BE5\u1C00-\u1C23\u1C4D-\u1C4F\u1C5A-\u1C7D\u1CE9-\u1CEC\u1CEE-\u1CF1\u1D00-\u1DBF\u1E00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FCC\u1FD0-\u1FD3\u1FD6-\u1FDB\u1FE0-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2071\u207F\u2090-\u209C\u2102\u2107\u210A-\u2113\u2115\u2119-\u211D\u2124\u2126\u2128\u212A-\u212D\u212F-\u2139\u213C-\u213F\u2145-\u2149\u214E\u2160-\u2188\u24B6-\u24E9\u2C00-\u2C2E\u2C30-\u2C5E\u2C60-\u2CE4\u2CEB-\u2CEE\u2D00-\u2D25\u2D30-\u2D65\u2D6F\u2D80-\u2D96\u2DA0-\u2DA6\u2DA8-\u2DAE\u2DB0-\u2DB6\u2DB8-\u2DBE\u2DC0-\u2DC6\u2DC8-\u2DCE\u2DD0-\u2DD6\u2DD8-\u2DDE\u2E2F\u3005\u303B\u303C\u3105-\u312D\u3131-\u318E\u31A0-\u31BA\uA000-\uA48C\uA4D0-\uA4FD\uA500-\uA60C\uA610-\uA61F\uA62A\uA62B\uA640-\uA66E\uA67F-\uA697\uA6A0-\uA6EF\uA717-\uA71F\uA722-\uA788\uA78B-\uA78E\uA790\uA791\uA7A0-\uA7A9\uA7FA-\uA801\uA803-\uA805\uA807-\uA80A\uA80C-\uA822\uA840-\uA873\uA882-\uA8B3\uA8F2-\uA8F7\uA8FB\uA90A-\uA925\uA930-\uA946\uA960-\uA97C\uA984-\uA9B2\uA9CF\uAA00-\uAA28\uAA40-\uAA42\uAA44-\uAA4B\uAB01-\uAB06\uAB09-\uAB0E\uAB11-\uAB16\uAB20-\uAB26\uAB28-\uAB2E\uABC0-\uABE2\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uFB00-\uFB06\uFB13-\uFB17\uFB1D\uFB1F-\uFB28\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40\uFB41\uFB43\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE70-\uFE74\uFE76-\uFEFC\uFF21-\uFF3A\uFF41-\uFF5A\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]',

@@ -47,5 +47,12 @@ import { characterIndices as ci } from './UnicodeData';

// WB4. Ignore format and extend characters.
if (type === ci.EXTEND || type === ci.FORMAT ||
prevType === ci.EXTEND || prevType === ci.FORMAT ||
nextType === ci.EXTEND || nextType === ci.FORMAT) {
if ((type === ci.EXTEND || type === ci.FORMAT) &&
(nextType === ci.ALETTER || nextType === ci.NUMERIC || nextType === ci.KATAKANA ||
nextType === ci.EXTEND || nextType === ci.FORMAT)
||
(nextType === ci.EXTEND ||
// TINY-9654: Only ignore format characters if they do not precede a word boundary. Since some format characters overlap with whitespace characters (ex: \ufeff) and
// our word extraction logic excludes whitespace characters, if a whitespace-overlapping format character that precedes a word boundary is not split on, whichever word
// it is a part of will not be added to the list of extracted words, causing inaccuracies.
nextType === ci.FORMAT && (nextNextType === ci.ALETTER || nextNextType === ci.NUMERIC || nextNextType === ci.KATAKANA || nextNextType === ci.EXTEND || nextNextType === ci.FORMAT))
&& (type === ci.ALETTER || type === ci.NUMERIC || type === ci.KATAKANA || type === ci.EXTEND || type === ci.FORMAT)) {
return false;

@@ -52,0 +59,0 @@ }

@@ -0,1 +1,10 @@

export declare type Word<T> = T[];
interface WordIndex {
readonly start: number;
readonly end: number;
}
export interface WordsWithIndices<T> {
readonly words: Word<T>[];
readonly indices: WordIndex[];
}
export interface WordOptions {

@@ -5,4 +14,5 @@ includeWhitespace?: boolean;

}
declare const getWords: <T>(chars: T[], extract: (char: T) => string, options?: WordOptions) => T[][];
export { getWords };
declare const getWordsWithIndices: <T>(chars: Word<T>, extract: (char: T) => string, options?: WordOptions) => WordsWithIndices<T>;
declare const getWords: <T>(chars: Word<T>, extract: (char: T) => string, options?: WordOptions) => Word<T>[];
export { getWords, getWordsWithIndices };
//# sourceMappingURL=Words.d.ts.map

@@ -1,2 +0,2 @@

import { Unicode } from '@ephox/katamari';
import { Arr } from '@ephox/katamari';
import { classify } from './StringMapper';

@@ -23,4 +23,5 @@ import * as UnicodeData from './UnicodeData';

};
const findWords = (chars, sChars, characterMap, options) => {
const findWordsWithIndices = (chars, sChars, characterMap, options) => {
const words = [];
const indices = [];
let word = [];

@@ -48,2 +49,6 @@ // Loop through each character in the classification map and determine whether

words.push(word);
indices.push({
start: startOfWord,
end: endOfWord
});
}

@@ -53,3 +58,3 @@ word = [];

}
return words;
return { words, indices };
};

@@ -60,3 +65,3 @@ const getDefaultOptions = () => ({

});
const getWords = (chars, extract, options) => {
const getWordsWithIndices = (chars, extract, options) => {
options = {

@@ -66,16 +71,8 @@ ...getDefaultOptions(),

};
const filteredChars = [];
const extractedChars = [];
// tslint:disable-next-line:prefer-for-of
for (let i = 0; i < chars.length; i++) {
const ch = extract(chars[i]);
if (ch !== Unicode.zeroWidth) {
filteredChars.push(chars[i]);
extractedChars.push(ch);
}
}
const extractedChars = Arr.map(chars, extract);
const characterMap = classify(extractedChars);
return findWords(filteredChars, extractedChars, characterMap, options);
return findWordsWithIndices(chars, extractedChars, characterMap, options);
};
export { getWords };
const getWords = (chars, extract, options) => getWordsWithIndices(chars, extract, options).words;
export { getWords, getWordsWithIndices };
//# sourceMappingURL=Words.js.map

@@ -44,4 +44,30 @@ import { Assert, UnitTest } from '@ephox/bedrock-client';

assertWords(['42.6±4.2'], '42.6±4.2');
assertWords(['ab'], 'a\ufeffb');
// TINY-9654: Does not split on extend characters (ex: \u0300)
assertWords(['a\u0300b'], 'a\u0300b');
assertWords(['a\u0300bc'], 'a\u0300bc');
assertWords(['ab\u0300c'], 'ab\u0300c');
assertWords(['a\u0300b', 'c'], 'a\u0300b c');
assertWords(['\u0300b'], '\u0300b');
assertWords(['a', '\u0300b'], 'a \u0300b');
assertWords(['\u0300'], '\u0300');
assertWords(['\u0300'], '\u0300 ');
assertWords(['a\u0300'], 'a\u0300');
assertWords(['a\u0300'], 'a\u0300 ');
// TINY-9654: Does not split on format characters (ex: \ufeff) if they do not precede a word boundary
// TINY-9654: Does not strip \ufeff characters (obsolete TINY-1166 fix removed)
assertWords(['a\ufeffb'], 'a\ufeffb');
assertWords(['a\ufeffbc'], 'a\ufeffbc');
assertWords(['ab\ufeffc'], 'ab\ufeffc');
assertWords(['a\ufeffb', 'c'], 'a\ufeffb c');
assertWords(['\ufeffb'], '\ufeffb');
assertWords(['a', '\ufeffb'], 'a \ufeffb');
// TINY-9654: Split on format characters if they precede a word boundary. Some format characters overlap with whitespace
// characters (ex: \ufeff). Since whitespace characters are not extracted, if a whitespace-overlapping format character that
// precedes a word boundary is not split on, whichever word it is a part of will not be added to the list of extracted words,
// causing inaccuracies.
assertWords([], '\ufeff');
assertWords([], '\ufeff ');
assertWords(['a'], 'a\ufeff');
assertWords(['a'], 'a\ufeff ');
});
//# sourceMappingURL=WordsTest.js.map
{
"name": "@ephox/polaris",
"description": "This project does data manipulation on arrays and strings.",
"version": "6.0.9",
"version": "6.1.0-alpha.0",
"repository": {

@@ -28,3 +28,2 @@ "type": "git",

"scripts": {
"prepublishOnly": "tsc -b",
"test": "bedrock-auto -b chrome-headless -d src/test/ts",

@@ -38,4 +37,3 @@ "test-manual": "bedrock -d src/test/ts",

"module": "./lib/main/ts/ephox/polaris/api/Main.js",
"types": "./lib/main/ts/ephox/polaris/api/Main.d.ts",
"gitHead": "d0a354c4921b1ce815cfb36cc72ea3a309932fee"
"types": "./lib/main/ts/ephox/polaris/api/Main.d.ts"
}

@@ -1,8 +0,12 @@

import * as WordOptions from '../words/Words';
import * as Words from '../words/Words';
type GetWordsApi = <T>(chars: T[], extract: (char: T) => string, options?: WordOptions.WordOptions) => T[][];
const getWords: GetWordsApi = WordOptions.getWords;
type GetWordsApi = <T>(chars: T[], extract: (char: T) => string, options?: Words.WordOptions) => Words.Word<T>[];
const getWords: GetWordsApi = Words.getWords;
type GetWordsAndIndicesApi = <T>(chars: T[], extract: (char: T) => string, options?: Words.WordOptions) => Words.WordsWithIndices<T>;
const getWordsWithIndices: GetWordsAndIndicesApi = Words.getWordsWithIndices;
export {
getWords
getWords,
getWordsWithIndices
};
/* eslint-disable max-len */
const punctuationStr = '[!-#%-*,-\\/:;?@\\[-\\]_{}\u00A1\u00AB\u00B7\u00BB\u00BF;\u00B7\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1361-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u3008\u3009\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30\u2E31\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]';
const punctuationStr = `[~^№|!-*+-\\/:;?@\\[-\\]_{}\u00A1\u00AB\u00B7\u00BB\u00BF;\u00B7\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1361-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u3008\u3009\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30\u2E31\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]`;

@@ -5,0 +5,0 @@ const regExps = {

@@ -59,5 +59,12 @@ import { CharacterMap } from './StringMapper';

// WB4. Ignore format and extend characters.
if (type === ci.EXTEND || type === ci.FORMAT ||
prevType === ci.EXTEND || prevType === ci.FORMAT ||
nextType === ci.EXTEND || nextType === ci.FORMAT) {
if ((type === ci.EXTEND || type === ci.FORMAT) &&
(nextType === ci.ALETTER || nextType === ci.NUMERIC || nextType === ci.KATAKANA ||
nextType === ci.EXTEND || nextType === ci.FORMAT)
||
(nextType === ci.EXTEND ||
// TINY-9654: Only ignore format characters if they do not precede a word boundary. Since some format characters overlap with whitespace characters (ex: \ufeff) and
// our word extraction logic excludes whitespace characters, if a whitespace-overlapping format character that precedes a word boundary is not split on, whichever word
// it is a part of will not be added to the list of extracted words, causing inaccuracies.
nextType === ci.FORMAT && (nextNextType === ci.ALETTER || nextNextType === ci.NUMERIC || nextNextType === ci.KATAKANA || nextNextType === ci.EXTEND || nextNextType === ci.FORMAT))
&& (type === ci.ALETTER || type === ci.NUMERIC || type === ci.KATAKANA || type === ci.EXTEND || type === ci.FORMAT)) {
return false;

@@ -64,0 +71,0 @@ }

@@ -1,2 +0,2 @@

import { Unicode } from '@ephox/katamari';
import { Arr } from '@ephox/katamari';

@@ -29,6 +29,19 @@ import { CharacterMap, classify } from './StringMapper';

const findWords = <T>(chars: T[], sChars: string[], characterMap: CharacterMap, options: WordOptions): T[][] => {
const words: T[][] = [];
let word: T[] = [];
export type Word<T> = T[];
interface WordIndex {
readonly start: number;
readonly end: number;
}
export interface WordsWithIndices<T> {
readonly words: Word<T>[];
readonly indices: WordIndex[];
}
const findWordsWithIndices = <T>(chars: Word<T>, sChars: string[], characterMap: CharacterMap, options: WordOptions): WordsWithIndices<T> => {
const words: Word<T>[] = [];
const indices: WordIndex[] = [];
let word: Word<T> = [];
// Loop through each character in the classification map and determine whether

@@ -61,2 +74,6 @@ // it precedes a word boundary, building an array of distinct words as we go.

words.push(word);
indices.push({
start: startOfWord,
end: endOfWord
});
}

@@ -68,3 +85,3 @@

return words;
return { words, indices };
};

@@ -82,3 +99,3 @@

const getWords = <T>(chars: T[], extract: (char: T) => string, options?: WordOptions): T[][] => {
const getWordsWithIndices = <T>(chars: Word<T>, extract: (char: T) => string, options?: WordOptions): WordsWithIndices<T> => {
options = {

@@ -88,21 +105,13 @@ ...getDefaultOptions(),

};
const filteredChars: T[] = [];
const extractedChars: string[] = [];
// tslint:disable-next-line:prefer-for-of
for (let i = 0; i < chars.length; i++) {
const ch = extract(chars[i]);
if (ch !== Unicode.zeroWidth) {
filteredChars.push(chars[i]);
extractedChars.push(ch);
}
}
const extractedChars: string[] = Arr.map(chars, extract);
const characterMap: CharacterMap = classify(extractedChars);
return findWords(filteredChars, extractedChars, characterMap, options);
return findWordsWithIndices(chars, extractedChars, characterMap, options);
};
const getWords = <T>(chars: Word<T>, extract: (char: T) => string, options?: WordOptions): Word<T>[] =>
getWordsWithIndices(chars, extract, options).words;
export {
getWords
getWords,
getWordsWithIndices
};

@@ -55,3 +55,32 @@ import { Assert, UnitTest } from '@ephox/bedrock-client';

assertWords([ '42.6±4.2' ], '42.6±4.2');
assertWords([ 'ab' ], 'a\ufeffb');
// TINY-9654: Does not split on extend characters (ex: \u0300)
assertWords([ 'a\u0300b' ], 'a\u0300b');
assertWords([ 'a\u0300bc' ], 'a\u0300bc');
assertWords([ 'ab\u0300c' ], 'ab\u0300c');
assertWords([ 'a\u0300b', 'c' ], 'a\u0300b c');
assertWords([ '\u0300b' ], '\u0300b');
assertWords([ 'a', '\u0300b' ], 'a \u0300b');
assertWords([ '\u0300' ], '\u0300');
assertWords([ '\u0300' ], '\u0300 ');
assertWords([ 'a\u0300' ], 'a\u0300');
assertWords([ 'a\u0300' ], 'a\u0300 ');
// TINY-9654: Does not split on format characters (ex: \ufeff) if they do not precede a word boundary
// TINY-9654: Does not strip \ufeff characters (obsolete TINY-1166 fix removed)
assertWords([ 'a\ufeffb' ], 'a\ufeffb');
assertWords([ 'a\ufeffbc' ], 'a\ufeffbc');
assertWords([ 'ab\ufeffc' ], 'ab\ufeffc');
assertWords([ 'a\ufeffb', 'c' ], 'a\ufeffb c');
assertWords([ '\ufeffb' ], '\ufeffb');
assertWords([ 'a', '\ufeffb' ], 'a \ufeffb');
// TINY-9654: Split on format characters if they precede a word boundary. Some format characters overlap with whitespace
// characters (ex: \ufeff). Since whitespace characters are not extracted, if a whitespace-overlapping format character that
// precedes a word boundary is not split on, whichever word it is a part of will not be added to the list of extracted words,
// causing inaccuracies.
assertWords([ ], '\ufeff');
assertWords([ ], '\ufeff ');
assertWords([ 'a' ], 'a\ufeff');
assertWords([ 'a' ], 'a\ufeff ');
});

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc