@@ -13,13 +13,11 @@ /! @orchidjs/unicode-variants \| https://github.com/orchidjs/unicode-variants \| Apache License (v2) /
		*/
		const arrayToPattern = (chars) =>{
		const arrayToPattern = chars => {
		chars = chars.filter(Boolean);

		chars = chars.filter( Boolean );
		if (chars.length < 2) {
		return chars[0] \|\| '';
		}

		if( chars.length < 2 ){
		return chars[0] \|\| '';
		}

		return (maxValueLength(chars) == 1) ? '['+chars.join('')+']' : '(?:'+chars.join('\|')+')';
		return maxValueLength(chars) == 1 ? '[' + chars.join('') + ']' : '(?:' + chars.join('\|') + ')';
		};

		/**
		@@ -29,38 +27,30 @@ * @param {string[]} array
		*/
		const sequencePattern = (array)=>{

		if( !hasDuplicates(array) ){
		return array.join('');
		}
		const sequencePattern = array => {
		if (!hasDuplicates(array)) {
		return array.join('');
		}

		let pattern = '';
		let prev_char_count = 0;
		let pattern = '';
		let prev_char_count = 0;

		const prev_pattern = ()=>{
		if( prev_char_count > 1 ){
		pattern += '{'+prev_char_count+'}';
		}
		};
		const prev_pattern = () => {
		if (prev_char_count > 1) {
		pattern += '{' + prev_char_count + '}';
		}
		};

		array.forEach((char,i)=>{
		array.forEach((char, i) => {
		if (char === array[i - 1]) {
		prev_char_count++;
		return;
		}

		if( char === array[i-1] ){
		prev_char_count++;
		return;
		}

		prev_pattern();

		pattern += char;
		prev_char_count = 1;
		});

		prev_pattern();

		return pattern;

		prev_pattern();
		pattern += char;
		prev_char_count = 1;
		});
		prev_pattern();
		return pattern;
		};



		/**
		@@ -73,9 +63,7 @@ * Convert array of strings to a regular expression
		*/
		const setToPattern = (chars)=>{
		let array = toArray(chars);
		return arrayToPattern(array);

		const setToPattern = chars => {
		let array = toArray(chars);
		return arrayToPattern(array);
		};



		/**
		@@ -86,7 +74,6 @@ *
		*/
		const hasDuplicates = (array) => {
		return (new Set(array)).size !== array.length;

		const hasDuplicates = array => {
		return new Set(array).size !== array.length;
		};


		/**
		@@ -97,6 +84,6 @@ * https://stackoverflow.com/questions/63006601/why-does-u-throw-an-invalid-escape-error
		*/
		const escape_regex = (str) => {
		return (str + '').replace(/([\$\*\+\.\?\[\]\^\{\\|\}\\])/gu, '\\$1');

		const escape_regex = str => {
		return (str + '').replace(/([\$\*\+\.\?\[\]\^\{\\|\}\\])/gu, '\\$1');
		};

		/**
		@@ -107,14 +94,13 @@ * Return the max length of array values
		*/
		const maxValueLength = (array) => {
		return array.reduce( (longest, value) => Math.max(longest,unicodeLength(value)),0);

		const maxValueLength = array => {
		return array.reduce((longest, value) => Math.max(longest, unicodeLength(value)), 0);
		};


		/**
		* @param {string} str
		*/
		const unicodeLength = (str) => {
		return toArray(str).length;

		const unicodeLength = str => {
		return toArray(str).length;
		};

		/**
		@@ -124,4 +110,5 @@ * @param {any} p
		*/
		const toArray = (p) => Array.from(p);

		const toArray = p => Array.from(p);

		/**
		@@ -133,51 +120,49 @@ * Get all possible combinations of substrings that add up to the given string
		*/
		const allSubstrings = (input) => {
		const allSubstrings = input => {
		if (input.length === 1) return [[input]];
		/** @type {string[][]} */

		if( input.length === 1) return [[input]];

		/** @type {string[][]} */
		let result = [];

		const start = input.substring(1);
		const suba = allSubstrings(start);

		suba.forEach(function(subresult) {
		let tmp = subresult.slice(0);
		tmp[0] = input.charAt(0) + tmp[0];
		result.push(tmp);

		tmp = subresult.slice(0);
		tmp.unshift(input.charAt(0));
		result.push(tmp);
		});

		return result;
		let result = [];
		const start = input.substring(1);
		const suba = allSubstrings(start);
		suba.forEach(function (subresult) {
		let tmp = subresult.slice(0);
		tmp[0] = input.charAt(0) + tmp[0];
		result.push(tmp);
		tmp = subresult.slice(0);
		tmp.unshift(input.charAt(0));
		result.push(tmp);
		});
		return result;
		};

		/**
		* @typedef {{[key:string]:string}} TUnicodeMap
		* @typedef {{[key:string]:Set<string>}} TUnicodeSets
		* @typedef {[[number,number]]} TCodePoints
		* @typedef {{folded:string,composed:string,code_point:number}} TCodePointObj
		* @typedef {{start:number,end:number,length:number,substr:string}} TSequencePart
		*/
		/** @type {TCodePoints} */
		const code_points = [[ 0, 65535 ]];

		const code_points = [[0, 65535]];
		const accent_pat = '[\u0300-\u036F\u{b7}\u{2be}]'; // \u{2bc}

		/** @type {TUnicodeMap} */

		exports.unicode_map = void 0;
		/** @type {RegExp} */

		/** @type {RegExp} */
		let multi_char_reg;

		const max_char_length = 3;
		/** @type {TUnicodeMap} */

		/** @type {TUnicodeMap} */
		const latin_convert = {
		'æ': 'ae',
		'ⱥ': 'a',
		'ø': 'o',
		'⁄': '/',
		'∕': '/',
		'æ': 'ae',
		'ⱥ': 'a',
		'ø': 'o',
		'⁄': '/',
		'∕': '/'
		};

		const convert_pat = new RegExp(Object.keys(latin_convert).join('\|')+'\|'+accent_pat,'gu');



		const convert_pat = new RegExp(Object.keys(latin_convert).join('\|') + '\|' + accent_pat, 'gu');
		/**
		@@ -188,8 +173,7 @@ * Initialize the unicode_map from the give code point ranges
		*/
		const initialize = (_code_points) => {
		if( exports.unicode_map !== undefined ) return;
		exports.unicode_map = generateMap(_code_points \|\| code_points );

		const initialize = _code_points => {
		if (exports.unicode_map !== undefined) return;
		exports.unicode_map = generateMap(_code_points \|\| code_points);
		};


		/**
		@@ -201,5 +185,4 @@ * Helper method for normalize a string
		*/
		const normalize = (str,form='NFKD') => str.normalize(form);


		const normalize = (str, form = 'NFKD') => str.normalize(form);
		/**
		@@ -210,23 +193,17 @@ * Compatibility Decomposition without reordering string
		*/
		const decompose = (str) =>{

		if( str.match(/[\u0f71-\u0f81]/) ){
		return toArray(str).reduce(
		/**
		* @param {string} result
		* @param {string} char
		*/
		(result, char) =>{
		return result + normalize(char)
		},
		''
		);
		}
		const decompose = str => {
		if (str.match(/[\u0f71-\u0f81]/)) {
		return toArray(str).reduce(
		/**
		* @param {string} result
		* @param {string} char
		*/
		(result, char) => {
		return result + normalize(char);
		}, '');
		}

		return normalize(str);
		return normalize(str);
		};




		/**
		@@ -238,15 +215,10 @@ * Remove accents
		*/
		const asciifold = (str) => {
		return decompose(str)
		.toLowerCase()
		.replace(convert_pat,(/** @type {string} */ char) => {
		return latin_convert[char] \|\| '';
		});

		const asciifold = str => {
		return decompose(str).toLowerCase().replace(convert_pat, (
		/** @type {string} */
		char) => {
		return latin_convert[char] \|\| '';
		});
		};






		/**
		@@ -257,42 +229,41 @@ * Generate a list of unicode variants from the list of code points
		*/
		function* generator(code_points){

		for(const [code_point_min, code_point_max] of code_points){
		for(let i = code_point_min; i <= code_point_max; i++){
		function* generator(code_points) {
		for (const [code_point_min, code_point_max] of code_points) {
		for (let i = code_point_min; i <= code_point_max; i++) {
		let composed = String.fromCharCode(i);
		let folded = asciifold(composed);

		let composed = String.fromCharCode(i);
		let folded = asciifold(composed);
		if (folded == composed.toLowerCase()) {
		continue;
		} // skip when folded is a string longer than 3 characters long
		// bc the resulting regex patterns will be long
		// eg:
		// folded صلى الله عليه وسلم length 18 code point 65018
		// folded جل جلاله length 8 code point 65019


		if( folded == composed.toLowerCase() ){
		continue;
		}
		if (folded.length > max_char_length) {
		continue;
		}

		// skip when folded is a string longer than 3 characters long
		// bc the resulting regex patterns will be long
		// eg:
		// folded صلى الله عليه وسلم length 18 code point 65018
		// folded جل جلاله length 8 code point 65019
		if( folded.length > max_char_length ){
		continue;
		}
		if (folded.length == 0) {
		continue;
		}

		if( folded.length == 0 ){
		continue
		}
		let decomposed = normalize(composed);
		let recomposed = normalize(decomposed, 'NFC');

		let decomposed = normalize(composed);
		let recomposed = normalize(decomposed,'NFC');
		if (recomposed === composed && folded === decomposed) {
		continue;
		}

		if( recomposed === composed && folded === decomposed ){
		continue;
		}


		yield {folded:folded,composed:composed,code_point:i};
		}
		}
		yield {
		folded: folded,
		composed: composed,
		code_point: i
		};
		}
		}
		}


		/**
		@@ -303,35 +274,31 @@ * Generate a unicode map from the list of code points
		*/
		const generateSets = (code_points) => {

		/** @type {{[key:string]:Set<string>}} */
		const unicode_sets = {};
		const generateSets = code_points => {
		/** @type {{[key:string]:Set<string>}} */
		const unicode_sets = {};
		/**
		* @param {string} folded
		* @param {string} to_add
		*/

		const addMatching = (folded, to_add) => {
		/** @type {Set<string>} */
		const folded_set = unicode_sets[folded] \|\| new Set();
		const patt = new RegExp('^' + setToPattern(folded_set) + '$', 'iu');

		/**
		* @param {string} folded
		* @param {string} to_add
		*/
		const addMatching = (folded,to_add) => {
		if (to_add.match(patt)) {
		return;
		}

		/** @type {Set<string>} */
		const folded_set = unicode_sets[folded] \|\| new Set();
		folded_set.add(escape_regex(to_add));
		unicode_sets[folded] = folded_set;
		};

		const patt = new RegExp( '^'+setToPattern(folded_set)+'$','iu');
		if( to_add.match(patt) ){
		return;
		}
		for (let value of generator(code_points)) {
		addMatching(value.folded, value.folded);
		addMatching(value.folded, value.composed);
		}

		folded_set.add(escape_regex(to_add));
		unicode_sets[folded] = folded_set;
		};


		for( let value of generator(code_points) ){
		addMatching(value.folded,value.folded);
		addMatching(value.folded,value.composed);
		}

		return unicode_sets;
		return unicode_sets;
		};

		/**
		@@ -344,33 +311,30 @@ * Generate a unicode map from the list of code points
		*/
		const generateMap = (code_points) => {

		/** @type {TUnicodeSets} */
		const unicode_sets = generateSets(code_points);
		const generateMap = code_points => {
		/** @type {TUnicodeSets} */
		const unicode_sets = generateSets(code_points);
		/** @type {TUnicodeMap} */

		/** @type {TUnicodeMap} */
		const unicode_map = {};
		const unicode_map = {};
		/** @type {string[]} */

		/** @type {string[]} */
		let multi_char = [];
		let multi_char = [];

		for( let folded in unicode_sets ){
		for (let folded in unicode_sets) {
		let set = unicode_sets[folded];

		let set = unicode_sets[folded];
		if( set ){
		unicode_map[folded] = setToPattern(set);
		}
		if (set) {
		unicode_map[folded] = setToPattern(set);
		}

		if( folded.length > 1 ){
		multi_char.push(escape_regex(folded));
		}
		}
		if (folded.length > 1) {
		multi_char.push(escape_regex(folded));
		}
		}

		multi_char.sort((a, b) => b.length - a.length );
		const multi_char_patt = arrayToPattern(multi_char);
		multi_char_reg = new RegExp('^'+multi_char_patt,'u');

		return unicode_map;
		multi_char.sort((a, b) => b.length - a.length);
		const multi_char_patt = arrayToPattern(multi_char);
		multi_char_reg = new RegExp('^' + multi_char_patt, 'u');
		return unicode_map;
		};


		/**
		@@ -382,20 +346,19 @@ * Map each element of an array from it's folded value to all possible unicode matches
		*/
		const mapSequence = (strings,min_replacement=1) =>{
		let chars_replaced = 0;

		const mapSequence = (strings, min_replacement = 1) => {
		let chars_replaced = 0;
		strings = strings.map(str => {
		if (exports.unicode_map[str]) {
		chars_replaced += str.length;
		}

		strings = strings.map((str)=>{
		if( exports.unicode_map[str] ){
		chars_replaced += str.length;
		}
		return exports.unicode_map[str] \|\| str;
		});
		return exports.unicode_map[str] \|\| str;
		});

		if( chars_replaced >= min_replacement ){
		return sequencePattern(strings);
		}
		if (chars_replaced >= min_replacement) {
		return sequencePattern(strings);
		}

		return '';
		return '';
		};

		/**
		@@ -414,13 +377,9 @@ * Convert a short string and split it into all possible patterns
		*/
		const substringsToPattern = (str,min_replacement=1) => {

		min_replacement = Math.max(min_replacement,str.length-1);

		return arrayToPattern(
		allSubstrings(str).map( (sub_pat) =>{
		return mapSequence(sub_pat,min_replacement)
		})
		);
		const substringsToPattern = (str, min_replacement = 1) => {
		min_replacement = Math.max(min_replacement, str.length - 1);
		return arrayToPattern(allSubstrings(str).map(sub_pat => {
		return mapSequence(sub_pat, min_replacement);
		}));
		};

		/**
		@@ -433,18 +392,16 @@ * Convert an array of sequences into a pattern
		*/
		const sequencesToPattern = (sequences,all=true) => {

		let min_replacement = sequences.length > 1 ? 1 : 0;
		return arrayToPattern(
		sequences.map( (sequence) =>{
		let seq = [];
		const len = all ? sequence.length() : sequence.length() - 1;
		for( let j = 0; j < len; j++){
		seq.push(substringsToPattern(sequence.substrs[j]\|\|'',min_replacement));
		}
		const sequencesToPattern = (sequences, all = true) => {
		let min_replacement = sequences.length > 1 ? 1 : 0;
		return arrayToPattern(sequences.map(sequence => {
		let seq = [];
		const len = all ? sequence.length() : sequence.length() - 1;

		return sequencePattern(seq);
		})
		);
		for (let j = 0; j < len; j++) {
		seq.push(substringsToPattern(sequence.substrs[j] \|\| '', min_replacement));
		}

		return sequencePattern(seq);
		}));
		};

		/**
		@@ -455,118 +412,116 @@ * Return true if the sequence is already in the sequences
		*/
		const inSequences = (needle_seq, sequences) => {

		for(const seq of sequences){

		if( seq.start != needle_seq.start \|\| seq.end != needle_seq.end ){
		continue;
		}
		const inSequences = (needle_seq, sequences) => {
		for (const seq of sequences) {
		if (seq.start != needle_seq.start \|\| seq.end != needle_seq.end) {
		continue;
		}

		if( seq.substrs.join('') !== needle_seq.substrs.join('') ){
		continue;
		}
		if (seq.substrs.join('') !== needle_seq.substrs.join('')) {
		continue;
		}

		let needle_parts = needle_seq.parts;
		/**
		* @param {TSequencePart} part
		*/

		let needle_parts = needle_seq.parts;
		const filter = part => {
		for (const needle_part of needle_parts) {
		if (needle_part.start === part.start && needle_part.substr === part.substr) {
		return false;
		}

		/**
		* @param {TSequencePart} part
		*/
		const filter = (part) =>{
		if (part.length == 1 \|\| needle_part.length == 1) {
		continue;
		} // check for overlapping parts
		// a = ['::=','==']
		// b = ['::','===']
		// a = ['r','sm']
		// b = ['rs','m']

		for(const needle_part of needle_parts){

		if( needle_part.start === part.start && needle_part.substr === part.substr ){
		return false;
		}
		if (part.start < needle_part.start && part.end > needle_part.start) {
		return true;
		}

		if( part.length == 1 \|\| needle_part.length == 1 ){
		continue;
		}
		if (needle_part.start < part.start && needle_part.end > part.start) {
		return true;
		}
		}

		return false;
		};

		// check for overlapping parts
		// a = ['::=','==']
		// b = ['::','===']
		// a = ['r','sm']
		// b = ['rs','m']
		if( part.start < needle_part.start && part.end > needle_part.start ){
		return true;
		}
		let filtered = seq.parts.filter(filter);

		if( needle_part.start < part.start && needle_part.end > part.start ){
		return true;
		}
		if (filtered.length > 0) {
		continue;
		}

		}
		return true;
		}

		return false;
		};

		let filtered = seq.parts.filter(filter);

		if( filtered.length > 0 ){
		continue;
		}

		return true;
		}

		return false;
		return false;
		};

		class Sequence{
		class Sequence {
		constructor() {
		/** @type {TSequencePart[]} */
		this.parts = [];
		/** @type {string[]} */

		constructor(){
		this.substrs = [];
		this.start = 0;
		this.end = 0;
		}
		/**
		* @param {TSequencePart\|undefined} part
		*/

		/** @type {TSequencePart[]} */
		this.parts = [];

		/** @type {string[]} */
		this.substrs = [];
		this.start = 0;
		this.end = 0;
		}
		add(part) {
		if (part) {
		this.parts.push(part);
		this.substrs.push(part.substr);
		this.start = Math.min(part.start, this.start);
		this.end = Math.max(part.end, this.end);
		}
		}

		/**
		* @param {TSequencePart\|undefined} part
		*/
		add(part){
		if( part ){
		this.parts.push(part);
		this.substrs.push(part.substr);
		this.start = Math.min(part.start,this.start);
		this.end = Math.max(part.end,this.end);
		}
		}
		last() {
		return this.parts[this.parts.length - 1];
		}

		last(){
		return this.parts[this.parts.length-1];
		}
		length() {
		return this.parts.length;
		}
		/**
		* @param {number} position
		* @param {TSequencePart} last_piece
		*/

		length(){
		return this.parts.length;
		}

		/**
		* @param {number} position
		* @param {TSequencePart} last_piece
		*/
		clone(position, last_piece){
		let clone = new Sequence();
		clone(position, last_piece) {
		let clone = new Sequence();
		let parts = JSON.parse(JSON.stringify(this.parts));
		let last_part = parts.pop();

		let parts = JSON.parse(JSON.stringify(this.parts));
		let last_part = parts.pop();
		for( const part of parts ){
		clone.add(part);
		}
		for (const part of parts) {
		clone.add(part);
		}

		let last_substr = last_piece.substr.substring(0,position-last_part.start);
		let clone_last_len = last_substr.length;
		clone.add({start:last_part.start,end:last_part.start+clone_last_len,length:clone_last_len,substr:last_substr});
		let last_substr = last_piece.substr.substring(0, position - last_part.start);
		let clone_last_len = last_substr.length;
		clone.add({
		start: last_part.start,
		end: last_part.start + clone_last_len,
		length: clone_last_len,
		substr: last_substr
		});
		return clone;
		}

		return clone;
		}

		}

		/**
		@@ -587,98 +542,98 @@ * Expand a regular expression pattern to include unicode variants
		*/
		const getPattern = (str) => {
		initialize();

		str = asciifold(str);

		let pattern = '';
		let sequences = [new Sequence()];
		const getPattern = str => {
		initialize();
		str = asciifold(str);
		let pattern = '';
		let sequences = [new Sequence()];

		for( let i = 0; i < str.length; i++ ){
		for (let i = 0; i < str.length; i++) {
		let substr = str.substring(i);
		let match = substr.match(multi_char_reg);
		const char = str.substring(i, i + 1);
		const match_str = match ? match[0] : null; // loop through sequences
		// add either the char or multi_match

		let substr = str.substring(i);
		let match = substr.match(multi_char_reg);
		const char = str.substring(i,i+1);
		const match_str = match ? match[0] : null;
		let overlapping = [];
		let added_types = new Set();

		for (const sequence of sequences) {
		const last_piece = sequence.last();

		// loop through sequences
		// add either the char or multi_match
		let overlapping = [];
		let added_types = new Set();
		for(const sequence of sequences){
		if (!last_piece \|\| last_piece.length == 1 \|\| last_piece.end <= i) {
		// if we have a multi match
		if (match_str) {
		const len = match_str.length;
		sequence.add({
		start: i,
		end: i + len,
		length: len,
		substr: match_str
		});
		added_types.add('1');
		} else {
		sequence.add({
		start: i,
		end: i + 1,
		length: 1,
		substr: char
		});
		added_types.add('2');
		}
		} else if (match_str) {
		let clone = sequence.clone(i, last_piece);
		const len = match_str.length;
		clone.add({
		start: i,
		end: i + len,
		length: len,
		substr: match_str
		});
		overlapping.push(clone);
		} else {
		// don't add char
		// adding would create invalid patterns: 234 => [2,34,4]
		added_types.add('3');
		}
		} // if we have overlapping

		const last_piece = sequence.last();

		if (overlapping.length > 0) {
		// ['ii','iii'] before ['i','i','iii']
		overlapping = overlapping.sort((a, b) => {
		return a.length() - b.length();
		});

		if( !last_piece \|\| last_piece.length == 1 \|\| last_piece.end <= i ){
		for (let clone of overlapping) {
		// don't add if we already have an equivalent sequence
		if (inSequences(clone, sequences)) {
		continue;
		}

		// if we have a multi match
		if( match_str ){
		const len = match_str.length;
		sequence.add({start:i,end:i+len,length:len,substr:match_str});
		added_types.add('1');
		}else {
		sequence.add({start:i,end:i+1,length:1,substr:char});
		added_types.add('2');
		}
		sequences.push(clone);
		}

		}else if( match_str ){
		continue;
		} // if we haven't done anything unique
		// clean up the patterns
		// helps keep patterns smaller
		// if str = 'r₨㎧aarss', pattern will be 446 instead of 655

		let clone = sequence.clone(i,last_piece);

		const len = match_str.length;
		clone.add({start:i,end:i+len,length:len,substr:match_str});
		if (i > 0 && added_types.size == 1 && !added_types.has('3')) {
		pattern += sequencesToPattern(sequences, false);
		let new_seq = new Sequence();
		const old_seq = sequences[0];

		overlapping.push(clone);
		if (old_seq) {
		new_seq.add(old_seq.last());
		}

		}else {
		// don't add char
		// adding would create invalid patterns: 234 => [2,34,4]
		added_types.add('3');
		}
		sequences = [new_seq];
		}
		}

		}


		// if we have overlapping
		if( overlapping.length > 0 ){

		// ['ii','iii'] before ['i','i','iii']
		overlapping = overlapping.sort((a,b)=>{
		return a.length() - b.length();
		});

		for( let clone of overlapping){

		// don't add if we already have an equivalent sequence
		if( inSequences(clone, sequences) ){
		continue;
		}

		sequences.push(clone);
		}

		continue;
		}


		// if we haven't done anything unique
		// clean up the patterns
		// helps keep patterns smaller
		// if str = 'r₨㎧aarss', pattern will be 446 instead of 655
		if( i > 0 && added_types.size == 1 && !added_types.has('3') ){
		pattern += sequencesToPattern(sequences,false);
		let new_seq = new Sequence();
		const old_seq = sequences[0];
		if( old_seq ){
		new_seq.add(old_seq.last());
		}
		sequences = [new_seq];
		}

		}

		pattern += sequencesToPattern(sequences,true);

		return pattern;
		pattern += sequencesToPattern(sequences, true);
		return pattern;
		};
		@@ -685,0 +640,0 @@

783

dist/umd/index.js

		@@ -15,13 +15,11 @@ /! @orchidjs/unicode-variants \| https://github.com/orchidjs/unicode-variants \| Apache License (v2) /
		*/
		const arrayToPattern = (chars) =>{
		const arrayToPattern = chars => {
		chars = chars.filter(Boolean);

		chars = chars.filter( Boolean );
		if (chars.length < 2) {
		return chars[0] \|\| '';
		}

		if( chars.length < 2 ){
		return chars[0] \|\| '';
		}

		return (maxValueLength(chars) == 1) ? '['+chars.join('')+']' : '(?:'+chars.join('\|')+')';
		return maxValueLength(chars) == 1 ? '[' + chars.join('') + ']' : '(?:' + chars.join('\|') + ')';
		};

		/**
		@@ -31,38 +29,30 @@ * @param {string[]} array
		*/
		const sequencePattern = (array)=>{

		if( !hasDuplicates(array) ){
		return array.join('');
		}
		const sequencePattern = array => {
		if (!hasDuplicates(array)) {
		return array.join('');
		}

		let pattern = '';
		let prev_char_count = 0;
		let pattern = '';
		let prev_char_count = 0;

		const prev_pattern = ()=>{
		if( prev_char_count > 1 ){
		pattern += '{'+prev_char_count+'}';
		}
		};
		const prev_pattern = () => {
		if (prev_char_count > 1) {
		pattern += '{' + prev_char_count + '}';
		}
		};

		array.forEach((char,i)=>{
		array.forEach((char, i) => {
		if (char === array[i - 1]) {
		prev_char_count++;
		return;
		}

		if( char === array[i-1] ){
		prev_char_count++;
		return;
		}

		prev_pattern();

		pattern += char;
		prev_char_count = 1;
		});

		prev_pattern();

		return pattern;

		prev_pattern();
		pattern += char;
		prev_char_count = 1;
		});
		prev_pattern();
		return pattern;
		};



		/**
		@@ -75,9 +65,7 @@ * Convert array of strings to a regular expression
		*/
		const setToPattern = (chars)=>{
		let array = toArray(chars);
		return arrayToPattern(array);

		const setToPattern = chars => {
		let array = toArray(chars);
		return arrayToPattern(array);
		};



		/**
		@@ -88,7 +76,6 @@ *
		*/
		const hasDuplicates = (array) => {
		return (new Set(array)).size !== array.length;

		const hasDuplicates = array => {
		return new Set(array).size !== array.length;
		};


		/**
		@@ -99,6 +86,6 @@ * https://stackoverflow.com/questions/63006601/why-does-u-throw-an-invalid-escape-error
		*/
		const escape_regex = (str) => {
		return (str + '').replace(/([\$\*\+\.\?\[\]\^\{\\|\}\\])/gu, '\\$1');

		const escape_regex = str => {
		return (str + '').replace(/([\$\*\+\.\?\[\]\^\{\\|\}\\])/gu, '\\$1');
		};

		/**
		@@ -109,14 +96,13 @@ * Return the max length of array values
		*/
		const maxValueLength = (array) => {
		return array.reduce( (longest, value) => Math.max(longest,unicodeLength(value)),0);

		const maxValueLength = array => {
		return array.reduce((longest, value) => Math.max(longest, unicodeLength(value)), 0);
		};


		/**
		* @param {string} str
		*/
		const unicodeLength = (str) => {
		return toArray(str).length;

		const unicodeLength = str => {
		return toArray(str).length;
		};

		/**
		@@ -126,4 +112,5 @@ * @param {any} p
		*/
		const toArray = (p) => Array.from(p);

		const toArray = p => Array.from(p);

		/**
		@@ -135,51 +122,49 @@ * Get all possible combinations of substrings that add up to the given string
		*/
		const allSubstrings = (input) => {
		const allSubstrings = input => {
		if (input.length === 1) return [[input]];
		/** @type {string[][]} */

		if( input.length === 1) return [[input]];

		/** @type {string[][]} */
		let result = [];

		const start = input.substring(1);
		const suba = allSubstrings(start);

		suba.forEach(function(subresult) {
		let tmp = subresult.slice(0);
		tmp[0] = input.charAt(0) + tmp[0];
		result.push(tmp);

		tmp = subresult.slice(0);
		tmp.unshift(input.charAt(0));
		result.push(tmp);
		});

		return result;
		let result = [];
		const start = input.substring(1);
		const suba = allSubstrings(start);
		suba.forEach(function (subresult) {
		let tmp = subresult.slice(0);
		tmp[0] = input.charAt(0) + tmp[0];
		result.push(tmp);
		tmp = subresult.slice(0);
		tmp.unshift(input.charAt(0));
		result.push(tmp);
		});
		return result;
		};

		/**
		* @typedef {{[key:string]:string}} TUnicodeMap
		* @typedef {{[key:string]:Set<string>}} TUnicodeSets
		* @typedef {[[number,number]]} TCodePoints
		* @typedef {{folded:string,composed:string,code_point:number}} TCodePointObj
		* @typedef {{start:number,end:number,length:number,substr:string}} TSequencePart
		*/
		/** @type {TCodePoints} */
		const code_points = [[ 0, 65535 ]];

		const code_points = [[0, 65535]];
		const accent_pat = '[\u0300-\u036F\u{b7}\u{2be}]'; // \u{2bc}

		/** @type {TUnicodeMap} */

		exports.unicode_map = void 0;
		/** @type {RegExp} */

		/** @type {RegExp} */
		let multi_char_reg;

		const max_char_length = 3;
		/** @type {TUnicodeMap} */

		/** @type {TUnicodeMap} */
		const latin_convert = {
		'æ': 'ae',
		'ⱥ': 'a',
		'ø': 'o',
		'⁄': '/',
		'∕': '/',
		'æ': 'ae',
		'ⱥ': 'a',
		'ø': 'o',
		'⁄': '/',
		'∕': '/'
		};

		const convert_pat = new RegExp(Object.keys(latin_convert).join('\|')+'\|'+accent_pat,'gu');



		const convert_pat = new RegExp(Object.keys(latin_convert).join('\|') + '\|' + accent_pat, 'gu');
		/**
		@@ -190,8 +175,7 @@ * Initialize the unicode_map from the give code point ranges
		*/
		const initialize = (_code_points) => {
		if( exports.unicode_map !== undefined ) return;
		exports.unicode_map = generateMap(_code_points \|\| code_points );

		const initialize = _code_points => {
		if (exports.unicode_map !== undefined) return;
		exports.unicode_map = generateMap(_code_points \|\| code_points);
		};


		/**
		@@ -203,5 +187,4 @@ * Helper method for normalize a string
		*/
		const normalize = (str,form='NFKD') => str.normalize(form);


		const normalize = (str, form = 'NFKD') => str.normalize(form);
		/**
		@@ -212,23 +195,17 @@ * Compatibility Decomposition without reordering string
		*/
		const decompose = (str) =>{

		if( str.match(/[\u0f71-\u0f81]/) ){
		return toArray(str).reduce(
		/**
		* @param {string} result
		* @param {string} char
		*/
		(result, char) =>{
		return result + normalize(char)
		},
		''
		);
		}
		const decompose = str => {
		if (str.match(/[\u0f71-\u0f81]/)) {
		return toArray(str).reduce(
		/**
		* @param {string} result
		* @param {string} char
		*/
		(result, char) => {
		return result + normalize(char);
		}, '');
		}

		return normalize(str);
		return normalize(str);
		};




		/**
		@@ -240,15 +217,10 @@ * Remove accents
		*/
		const asciifold = (str) => {
		return decompose(str)
		.toLowerCase()
		.replace(convert_pat,(/** @type {string} */ char) => {
		return latin_convert[char] \|\| '';
		});

		const asciifold = str => {
		return decompose(str).toLowerCase().replace(convert_pat, (
		/** @type {string} */
		char) => {
		return latin_convert[char] \|\| '';
		});
		};






		/**
		@@ -259,42 +231,41 @@ * Generate a list of unicode variants from the list of code points
		*/
		function* generator(code_points){

		for(const [code_point_min, code_point_max] of code_points){
		for(let i = code_point_min; i <= code_point_max; i++){
		function* generator(code_points) {
		for (const [code_point_min, code_point_max] of code_points) {
		for (let i = code_point_min; i <= code_point_max; i++) {
		let composed = String.fromCharCode(i);
		let folded = asciifold(composed);

		let composed = String.fromCharCode(i);
		let folded = asciifold(composed);
		if (folded == composed.toLowerCase()) {
		continue;
		} // skip when folded is a string longer than 3 characters long
		// bc the resulting regex patterns will be long
		// eg:
		// folded صلى الله عليه وسلم length 18 code point 65018
		// folded جل جلاله length 8 code point 65019


		if( folded == composed.toLowerCase() ){
		continue;
		}
		if (folded.length > max_char_length) {
		continue;
		}

		// skip when folded is a string longer than 3 characters long
		// bc the resulting regex patterns will be long
		// eg:
		// folded صلى الله عليه وسلم length 18 code point 65018
		// folded جل جلاله length 8 code point 65019
		if( folded.length > max_char_length ){
		continue;
		}
		if (folded.length == 0) {
		continue;
		}

		if( folded.length == 0 ){
		continue
		}
		let decomposed = normalize(composed);
		let recomposed = normalize(decomposed, 'NFC');

		let decomposed = normalize(composed);
		let recomposed = normalize(decomposed,'NFC');
		if (recomposed === composed && folded === decomposed) {
		continue;
		}

		if( recomposed === composed && folded === decomposed ){
		continue;
		}


		yield {folded:folded,composed:composed,code_point:i};
		}
		}
		yield {
		folded: folded,
		composed: composed,
		code_point: i
		};
		}
		}
		}


		/**
		@@ -305,35 +276,31 @@ * Generate a unicode map from the list of code points
		*/
		const generateSets = (code_points) => {

		/** @type {{[key:string]:Set<string>}} */
		const unicode_sets = {};
		const generateSets = code_points => {
		/** @type {{[key:string]:Set<string>}} */
		const unicode_sets = {};
		/**
		* @param {string} folded
		* @param {string} to_add
		*/

		const addMatching = (folded, to_add) => {
		/** @type {Set<string>} */
		const folded_set = unicode_sets[folded] \|\| new Set();
		const patt = new RegExp('^' + setToPattern(folded_set) + '$', 'iu');

		/**
		* @param {string} folded
		* @param {string} to_add
		*/
		const addMatching = (folded,to_add) => {
		if (to_add.match(patt)) {
		return;
		}

		/** @type {Set<string>} */
		const folded_set = unicode_sets[folded] \|\| new Set();
		folded_set.add(escape_regex(to_add));
		unicode_sets[folded] = folded_set;
		};

		const patt = new RegExp( '^'+setToPattern(folded_set)+'$','iu');
		if( to_add.match(patt) ){
		return;
		}
		for (let value of generator(code_points)) {
		addMatching(value.folded, value.folded);
		addMatching(value.folded, value.composed);
		}

		folded_set.add(escape_regex(to_add));
		unicode_sets[folded] = folded_set;
		};


		for( let value of generator(code_points) ){
		addMatching(value.folded,value.folded);
		addMatching(value.folded,value.composed);
		}

		return unicode_sets;
		return unicode_sets;
		};

		/**
		@@ -346,33 +313,30 @@ * Generate a unicode map from the list of code points
		*/
		const generateMap = (code_points) => {

		/** @type {TUnicodeSets} */
		const unicode_sets = generateSets(code_points);
		const generateMap = code_points => {
		/** @type {TUnicodeSets} */
		const unicode_sets = generateSets(code_points);
		/** @type {TUnicodeMap} */

		/** @type {TUnicodeMap} */
		const unicode_map = {};
		const unicode_map = {};
		/** @type {string[]} */

		/** @type {string[]} */
		let multi_char = [];
		let multi_char = [];

		for( let folded in unicode_sets ){
		for (let folded in unicode_sets) {
		let set = unicode_sets[folded];

		let set = unicode_sets[folded];
		if( set ){
		unicode_map[folded] = setToPattern(set);
		}
		if (set) {
		unicode_map[folded] = setToPattern(set);
		}

		if( folded.length > 1 ){
		multi_char.push(escape_regex(folded));
		}
		}
		if (folded.length > 1) {
		multi_char.push(escape_regex(folded));
		}
		}

		multi_char.sort((a, b) => b.length - a.length );
		const multi_char_patt = arrayToPattern(multi_char);
		multi_char_reg = new RegExp('^'+multi_char_patt,'u');

		return unicode_map;
		multi_char.sort((a, b) => b.length - a.length);
		const multi_char_patt = arrayToPattern(multi_char);
		multi_char_reg = new RegExp('^' + multi_char_patt, 'u');
		return unicode_map;
		};


		/**
		@@ -384,20 +348,19 @@ * Map each element of an array from it's folded value to all possible unicode matches
		*/
		const mapSequence = (strings,min_replacement=1) =>{
		let chars_replaced = 0;

		const mapSequence = (strings, min_replacement = 1) => {
		let chars_replaced = 0;
		strings = strings.map(str => {
		if (exports.unicode_map[str]) {
		chars_replaced += str.length;
		}

		strings = strings.map((str)=>{
		if( exports.unicode_map[str] ){
		chars_replaced += str.length;
		}
		return exports.unicode_map[str] \|\| str;
		});
		return exports.unicode_map[str] \|\| str;
		});

		if( chars_replaced >= min_replacement ){
		return sequencePattern(strings);
		}
		if (chars_replaced >= min_replacement) {
		return sequencePattern(strings);
		}

		return '';
		return '';
		};

		/**
		@@ -416,13 +379,9 @@ * Convert a short string and split it into all possible patterns
		*/
		const substringsToPattern = (str,min_replacement=1) => {

		min_replacement = Math.max(min_replacement,str.length-1);

		return arrayToPattern(
		allSubstrings(str).map( (sub_pat) =>{
		return mapSequence(sub_pat,min_replacement)
		})
		);
		const substringsToPattern = (str, min_replacement = 1) => {
		min_replacement = Math.max(min_replacement, str.length - 1);
		return arrayToPattern(allSubstrings(str).map(sub_pat => {
		return mapSequence(sub_pat, min_replacement);
		}));
		};

		/**
		@@ -435,18 +394,16 @@ * Convert an array of sequences into a pattern
		*/
		const sequencesToPattern = (sequences,all=true) => {

		let min_replacement = sequences.length > 1 ? 1 : 0;
		return arrayToPattern(
		sequences.map( (sequence) =>{
		let seq = [];
		const len = all ? sequence.length() : sequence.length() - 1;
		for( let j = 0; j < len; j++){
		seq.push(substringsToPattern(sequence.substrs[j]\|\|'',min_replacement));
		}
		const sequencesToPattern = (sequences, all = true) => {
		let min_replacement = sequences.length > 1 ? 1 : 0;
		return arrayToPattern(sequences.map(sequence => {
		let seq = [];
		const len = all ? sequence.length() : sequence.length() - 1;

		return sequencePattern(seq);
		})
		);
		for (let j = 0; j < len; j++) {
		seq.push(substringsToPattern(sequence.substrs[j] \|\| '', min_replacement));
		}

		return sequencePattern(seq);
		}));
		};

		/**
		@@ -457,118 +414,116 @@ * Return true if the sequence is already in the sequences
		*/
		const inSequences = (needle_seq, sequences) => {

		for(const seq of sequences){

		if( seq.start != needle_seq.start \|\| seq.end != needle_seq.end ){
		continue;
		}
		const inSequences = (needle_seq, sequences) => {
		for (const seq of sequences) {
		if (seq.start != needle_seq.start \|\| seq.end != needle_seq.end) {
		continue;
		}

		if( seq.substrs.join('') !== needle_seq.substrs.join('') ){
		continue;
		}
		if (seq.substrs.join('') !== needle_seq.substrs.join('')) {
		continue;
		}

		let needle_parts = needle_seq.parts;
		/**
		* @param {TSequencePart} part
		*/

		let needle_parts = needle_seq.parts;
		const filter = part => {
		for (const needle_part of needle_parts) {
		if (needle_part.start === part.start && needle_part.substr === part.substr) {
		return false;
		}

		/**
		* @param {TSequencePart} part
		*/
		const filter = (part) =>{
		if (part.length == 1 \|\| needle_part.length == 1) {
		continue;
		} // check for overlapping parts
		// a = ['::=','==']
		// b = ['::','===']
		// a = ['r','sm']
		// b = ['rs','m']

		for(const needle_part of needle_parts){

		if( needle_part.start === part.start && needle_part.substr === part.substr ){
		return false;
		}
		if (part.start < needle_part.start && part.end > needle_part.start) {
		return true;
		}

		if( part.length == 1 \|\| needle_part.length == 1 ){
		continue;
		}
		if (needle_part.start < part.start && needle_part.end > part.start) {
		return true;
		}
		}

		return false;
		};

		// check for overlapping parts
		// a = ['::=','==']
		// b = ['::','===']
		// a = ['r','sm']
		// b = ['rs','m']
		if( part.start < needle_part.start && part.end > needle_part.start ){
		return true;
		}
		let filtered = seq.parts.filter(filter);

		if( needle_part.start < part.start && needle_part.end > part.start ){
		return true;
		}
		if (filtered.length > 0) {
		continue;
		}

		}
		return true;
		}

		return false;
		};

		let filtered = seq.parts.filter(filter);

		if( filtered.length > 0 ){
		continue;
		}

		return true;
		}

		return false;
		return false;
		};

		class Sequence{
		class Sequence {
		constructor() {
		/** @type {TSequencePart[]} */
		this.parts = [];
		/** @type {string[]} */

		constructor(){
		this.substrs = [];
		this.start = 0;
		this.end = 0;
		}
		/**
		* @param {TSequencePart\|undefined} part
		*/

		/** @type {TSequencePart[]} */
		this.parts = [];

		/** @type {string[]} */
		this.substrs = [];
		this.start = 0;
		this.end = 0;
		}
		add(part) {
		if (part) {
		this.parts.push(part);
		this.substrs.push(part.substr);
		this.start = Math.min(part.start, this.start);
		this.end = Math.max(part.end, this.end);
		}
		}

		/**
		* @param {TSequencePart\|undefined} part
		*/
		add(part){
		if( part ){
		this.parts.push(part);
		this.substrs.push(part.substr);
		this.start = Math.min(part.start,this.start);
		this.end = Math.max(part.end,this.end);
		}
		}
		last() {
		return this.parts[this.parts.length - 1];
		}

		last(){
		return this.parts[this.parts.length-1];
		}
		length() {
		return this.parts.length;
		}
		/**
		* @param {number} position
		* @param {TSequencePart} last_piece
		*/

		length(){
		return this.parts.length;
		}

		/**
		* @param {number} position
		* @param {TSequencePart} last_piece
		*/
		clone(position, last_piece){
		let clone = new Sequence();
		clone(position, last_piece) {
		let clone = new Sequence();
		let parts = JSON.parse(JSON.stringify(this.parts));
		let last_part = parts.pop();

		let parts = JSON.parse(JSON.stringify(this.parts));
		let last_part = parts.pop();
		for( const part of parts ){
		clone.add(part);
		}
		for (const part of parts) {
		clone.add(part);
		}

		let last_substr = last_piece.substr.substring(0,position-last_part.start);
		let clone_last_len = last_substr.length;
		clone.add({start:last_part.start,end:last_part.start+clone_last_len,length:clone_last_len,substr:last_substr});
		let last_substr = last_piece.substr.substring(0, position - last_part.start);
		let clone_last_len = last_substr.length;
		clone.add({
		start: last_part.start,
		end: last_part.start + clone_last_len,
		length: clone_last_len,
		substr: last_substr
		});
		return clone;
		}

		return clone;
		}

		}

		/**
		@@ -589,98 +544,98 @@ * Expand a regular expression pattern to include unicode variants
		*/
		const getPattern = (str) => {
		initialize();

		str = asciifold(str);

		let pattern = '';
		let sequences = [new Sequence()];
		const getPattern = str => {
		initialize();
		str = asciifold(str);
		let pattern = '';
		let sequences = [new Sequence()];

		for( let i = 0; i < str.length; i++ ){
		for (let i = 0; i < str.length; i++) {
		let substr = str.substring(i);
		let match = substr.match(multi_char_reg);
		const char = str.substring(i, i + 1);
		const match_str = match ? match[0] : null; // loop through sequences
		// add either the char or multi_match

		let substr = str.substring(i);
		let match = substr.match(multi_char_reg);
		const char = str.substring(i,i+1);
		const match_str = match ? match[0] : null;
		let overlapping = [];
		let added_types = new Set();

		for (const sequence of sequences) {
		const last_piece = sequence.last();

		// loop through sequences
		// add either the char or multi_match
		let overlapping = [];
		let added_types = new Set();
		for(const sequence of sequences){
		if (!last_piece \|\| last_piece.length == 1 \|\| last_piece.end <= i) {
		// if we have a multi match
		if (match_str) {
		const len = match_str.length;
		sequence.add({
		start: i,
		end: i + len,
		length: len,
		substr: match_str
		});
		added_types.add('1');
		} else {
		sequence.add({
		start: i,
		end: i + 1,
		length: 1,
		substr: char
		});
		added_types.add('2');
		}
		} else if (match_str) {
		let clone = sequence.clone(i, last_piece);
		const len = match_str.length;
		clone.add({
		start: i,
		end: i + len,
		length: len,
		substr: match_str
		});
		overlapping.push(clone);
		} else {
		// don't add char
		// adding would create invalid patterns: 234 => [2,34,4]
		added_types.add('3');
		}
		} // if we have overlapping

		const last_piece = sequence.last();

		if (overlapping.length > 0) {
		// ['ii','iii'] before ['i','i','iii']
		overlapping = overlapping.sort((a, b) => {
		return a.length() - b.length();
		});

		if( !last_piece \|\| last_piece.length == 1 \|\| last_piece.end <= i ){
		for (let clone of overlapping) {
		// don't add if we already have an equivalent sequence
		if (inSequences(clone, sequences)) {
		continue;
		}

		// if we have a multi match
		if( match_str ){
		const len = match_str.length;
		sequence.add({start:i,end:i+len,length:len,substr:match_str});
		added_types.add('1');
		}else {
		sequence.add({start:i,end:i+1,length:1,substr:char});
		added_types.add('2');
		}
		sequences.push(clone);
		}

		}else if( match_str ){
		continue;
		} // if we haven't done anything unique
		// clean up the patterns
		// helps keep patterns smaller
		// if str = 'r₨㎧aarss', pattern will be 446 instead of 655

		let clone = sequence.clone(i,last_piece);

		const len = match_str.length;
		clone.add({start:i,end:i+len,length:len,substr:match_str});
		if (i > 0 && added_types.size == 1 && !added_types.has('3')) {
		pattern += sequencesToPattern(sequences, false);
		let new_seq = new Sequence();
		const old_seq = sequences[0];

		overlapping.push(clone);
		if (old_seq) {
		new_seq.add(old_seq.last());
		}

		}else {
		// don't add char
		// adding would create invalid patterns: 234 => [2,34,4]
		added_types.add('3');
		}
		sequences = [new_seq];
		}
		}

		}


		// if we have overlapping
		if( overlapping.length > 0 ){

		// ['ii','iii'] before ['i','i','iii']
		overlapping = overlapping.sort((a,b)=>{
		return a.length() - b.length();
		});

		for( let clone of overlapping){

		// don't add if we already have an equivalent sequence
		if( inSequences(clone, sequences) ){
		continue;
		}

		sequences.push(clone);
		}

		continue;
		}


		// if we haven't done anything unique
		// clean up the patterns
		// helps keep patterns smaller
		// if str = 'r₨㎧aarss', pattern will be 446 instead of 655
		if( i > 0 && added_types.size == 1 && !added_types.has('3') ){
		pattern += sequencesToPattern(sequences,false);
		let new_seq = new Sequence();
		const old_seq = sequences[0];
		if( old_seq ){
		new_seq.add(old_seq.last());
		}
		sequences = [new_seq];
		}

		}

		pattern += sequencesToPattern(sequences,true);

		return pattern;
		pattern += sequencesToPattern(sequences, true);
		return pattern;
		};
		@@ -687,0 +642,0 @@

package.json

		{
		"name": "@orchidjs/unicode-variants",
		"version": "1.0.1",
		"version": "1.0.2",
		"description": "Unicode variant string matching",
		@@ -5,0 +5,0 @@ "main": "dist/umd/index.js",

dist/esm/index.mjs.js

dist/esm/index.mjs.js.map

dist/esm/regex.mjs.js

dist/esm/regex.mjs.js.map

dist/esm/strings.mjs.js

dist/esm/strings.mjs.js.map

dist/cjs/index.js.map

Sorry, the diff of this file is not supported yet

dist/umd/index.js.map

Sorry, the diff of this file is not supported yet

dist/umd/index.min.js.map

Sorry, the diff of this file is not supported yet

@orchidjs/unicode-variants - npm Package Compare versions

Improved metrics