char-regex
Advanced tools
Comparing version 1.0.1 to 1.0.2
@@ -7,8 +7,8 @@ /** | ||
* | ||
* "❤️👊🏽".split(charRegex()) | ||
* "❤️👊🏽".match(charRegex()); | ||
* //=> ["❤️", "👊🏽"] | ||
* ``` | ||
*/ | ||
declare function charRegex(): RegExp; | ||
declare function charRegex(): RegExp | ||
export = charRegex; | ||
export = charRegex |
59
index.js
"use strict" | ||
// From: https://github.com/lodash/lodash/blob/master/.internal/unicodeSize.js | ||
// Based on: https://github.com/lodash/lodash/blob/6018350ac10d5ce6a5b7db625140b82aeab804df/.internal/unicodeSize.js | ||
module.exports = () => { | ||
/** Used to compose unicode character classes. */ | ||
const rsAstralRange = "\\ud800-\\udfff" | ||
const rsComboMarksRange = "\\u0300-\\u036f" | ||
const reComboHalfMarksRange = "\\ufe20-\\ufe2f" | ||
const rsComboSymbolsRange = "\\u20d0-\\u20ff" | ||
const rsComboMarksExtendedRange = "\\u1ab0-\\u1aff" | ||
const rsComboMarksSupplementRange = "\\u1dc0-\\u1dff" | ||
const rsComboRange = rsComboMarksRange + reComboHalfMarksRange + rsComboSymbolsRange + rsComboMarksExtendedRange + rsComboMarksSupplementRange | ||
const rsVarRange = "\\ufe0e\\ufe0f" | ||
// Used to compose unicode character classes. | ||
const astralRange = "\\ud800-\\udfff" | ||
const comboMarksRange = "\\u0300-\\u036f" | ||
const comboHalfMarksRange = "\\ufe20-\\ufe2f" | ||
const comboSymbolsRange = "\\u20d0-\\u20ff" | ||
const comboMarksExtendedRange = "\\u1ab0-\\u1aff" | ||
const comboMarksSupplementRange = "\\u1dc0-\\u1dff" | ||
const comboRange = comboMarksRange + comboHalfMarksRange + comboSymbolsRange + comboMarksExtendedRange + comboMarksSupplementRange | ||
const varRange = "\\ufe0e\\ufe0f" | ||
const familyRange = "\\uD83D\\uDC69\\uD83C\\uDFFB\\u200D\\uD83C\\uDF93" | ||
/** Used to compose unicode capture groups. */ | ||
const rsAstral = `[${rsAstralRange}]` | ||
const rsCombo = `[${rsComboRange}]` | ||
const rsFitz = "\\ud83c[\\udffb-\\udfff]" | ||
const rsModifier = `(?:${rsCombo}|${rsFitz})` | ||
const rsNonAstral = `[^${rsAstralRange}]` | ||
const rsRegional = "(?:\\ud83c[\\udde6-\\uddff]){2}" | ||
const rsSurrPair = "[\\ud800-\\udbff][\\udc00-\\udfff]" | ||
const rsZWJ = "\\u200d" | ||
// Used to compose unicode capture groups. | ||
const astral = `[${astralRange}]` | ||
const combo = `[${comboRange}]` | ||
const fitz = "\\ud83c[\\udffb-\\udfff]" | ||
const modifier = `(?:${combo}|${fitz})` | ||
const nonAstral = `[^${astralRange}]` | ||
const regional = "(?:\\uD83C[\\uDDE6-\\uDDFF]){2}" | ||
const surrogatePair = "[\\ud800-\\udbff][\\udc00-\\udfff]" | ||
const zwj = "\\u200d" | ||
const blackFlag = "(?:\\ud83c\\udff4\\udb40\\udc67\\udb40\\udc62\\udb40(?:\\udc65|\\udc73|\\udc77)\\udb40(?:\\udc6e|\\udc63|\\udc6c)\\udb40(?:\\udc67|\\udc74|\\udc73)\\udb40\\udc7f)" | ||
const family = `[${familyRange}]` | ||
/** Used to compose unicode regexes. */ | ||
const reOptMod = `${rsModifier}?` | ||
const rsOptVar = `[${rsVarRange}]?` | ||
const rsOptJoin = `(?:${rsZWJ}(?:${[rsNonAstral, rsRegional, rsSurrPair].join("|")})${rsOptVar + reOptMod})*` | ||
const rsSeq = rsOptVar + reOptMod + rsOptJoin | ||
const rsNonAstralCombo = `${rsNonAstral}${rsCombo}?` | ||
const rsSymbol = `(?:${[rsNonAstralCombo, rsCombo, rsRegional, rsSurrPair, rsAstral].join("|")})` | ||
// Used to compose unicode regexes. | ||
const optModifier = `${modifier}?` | ||
const optVar = `[${varRange}]?` | ||
const optJoin = `(?:${zwj}(?:${[nonAstral, regional, surrogatePair].join("|")})${optVar + optModifier})*` | ||
const seq = optVar + optModifier + optJoin | ||
const nonAstralCombo = `${nonAstral}${combo}?` | ||
const symbol = `(?:${[nonAstralCombo, combo, regional, surrogatePair, astral, family].join("|")})` | ||
/** Used to match [string symbols](https://mathiasbynens.be/notes/javascript-unicode). */ | ||
return new RegExp(`${rsFitz}(?=${rsFitz})|${rsSymbol + rsSeq}`, "g") | ||
// Used to match [String symbols](https://mathiasbynens.be/notes/javascript-unicode). | ||
return new RegExp(`${blackFlag}|${fitz}(?=${fitz})|${symbol + seq}`, "g") | ||
} |
{ | ||
"name": "char-regex", | ||
"version": "1.0.1", | ||
"version": "1.0.2", | ||
"description": "A regex to match any full character, considering weird character ranges.", | ||
@@ -29,4 +29,9 @@ "repository": "https://github.com/Richienb/char-regex.git", | ||
"devDependencies": { | ||
"@babel/core": "^7.8.4", | ||
"@babel/plugin-proposal-unicode-property-regex": "^7.8.3", | ||
"array-uniq": "^2.1.0", | ||
"ava": "^3.0.0", | ||
"emoji.json": "^12.1.1", | ||
"eslint-config-richienb": "^0.3.0", | ||
"unicode-chars": "^1.0.1", | ||
"xo": "^0.25.3" | ||
@@ -33,0 +38,0 @@ }, |
# Char Regex [![Travis CI Build Status](https://img.shields.io/travis/com/Richienb/char-regex/master.svg?style=for-the-badge)](https://travis-ci.com/Richienb/char-regex) | ||
A regex to match any full character, considering weird character ranges. Extracted from https://github.com/lodash/lodash/blob/master/.internal/unicodeSize.js. | ||
A regex to match any full character, considering weird character ranges. Tested on every single emoji and unicode character. Based on the Lodash implementation. | ||
@@ -18,6 +18,6 @@ [![NPM Badge](https://nodei.co/npm/char-regex.png)](https://npmjs.com/package/char-regex) | ||
"❤️👊🏽".match(/./) | ||
"❤️👊🏽".match(/./); | ||
//=> ["", "", "", "", "", "", ""] | ||
"❤️👊🏽".match(charRegex()) | ||
"❤️👊🏽".match(charRegex()); | ||
//=> ["❤️", "👊🏽"] | ||
@@ -24,0 +24,0 @@ ``` |
4959
46
8