@6degrees/arabic-strings
Advanced tools
Comparing version 0.3.0 to 1.0.0
# @6degrees/arabic-strings | ||
## 1.0.0 | ||
### Major Changes | ||
- Flat structure | ||
## 0.3.0 | ||
@@ -4,0 +10,0 @@ |
@@ -0,2 +1,17 @@ | ||
declare const TASHKEEL: Array<string>; | ||
declare const ALIF_REGEX: RegExp; | ||
declare const YA_REGEX: RegExp; | ||
declare const TA_REGEX: RegExp; | ||
declare const TASHKEEL_REGEX: RegExp; | ||
declare const TATWEEL_REGEX: RegExp; | ||
declare function howArabic(str: string): number; | ||
declare function howNotArabic(str: string): number; | ||
declare function isArabic(str: string, threshold?: number): boolean; | ||
declare function hasArabic(str: string): boolean; | ||
declare function removeTashkel(str: string): string; | ||
declare function removeNonArabic(str: string, excludeChars?: string): string; | ||
declare function removeArabic(str: string): string; | ||
declare function removeTatwel(str: string): string; | ||
declare function sanitize(str: string): string; | ||
export { } | ||
export { ALIF_REGEX, TASHKEEL, TASHKEEL_REGEX, TATWEEL_REGEX, TA_REGEX, YA_REGEX, hasArabic, howArabic, howNotArabic, isArabic, removeArabic, removeNonArabic, removeTashkel, removeTatwel, sanitize }; |
"use strict"; | ||
var __defProp = Object.defineProperty; | ||
var __getOwnPropDesc = Object.getOwnPropertyDescriptor; | ||
var __getOwnPropNames = Object.getOwnPropertyNames; | ||
var __hasOwnProp = Object.prototype.hasOwnProperty; | ||
var __export = (target, all) => { | ||
for (var name in all) | ||
__defProp(target, name, { get: all[name], enumerable: true }); | ||
}; | ||
var __copyProps = (to, from, except, desc) => { | ||
if (from && typeof from === "object" || typeof from === "function") { | ||
for (let key of __getOwnPropNames(from)) | ||
if (!__hasOwnProp.call(to, key) && key !== except) | ||
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); | ||
} | ||
return to; | ||
}; | ||
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); | ||
// src/index.ts | ||
var _arabicStrings = class { | ||
// /ـ/g; // /\u0640/g | ||
static howArabic(str) { | ||
str = str.replace(/[\u0021-\u0040\s]/gm, ""); | ||
const match = str.match(/[\u0621-\u0652]/gm) || []; | ||
const result = match.length / str.length; | ||
return result; | ||
} | ||
static howNotArabic(str) { | ||
str = str.replace(/[\u0021-\u0040\s]/gm, ""); | ||
const match = str.match(/[^\u0621-\u0652]/gm) || []; | ||
const result = match.length / str.length; | ||
return result; | ||
} | ||
static isArabic(str, threshold = 0.79) { | ||
return _arabicStrings.howArabic(str) >= threshold; | ||
} | ||
static hasArabic(str) { | ||
return /[\u0621-\u064A]/.test(str); | ||
} | ||
static removeTashkel(str) { | ||
return str.replace(_arabicStrings.TASHKEEL_REGEX, ""); | ||
} | ||
static removeNonArabic(str, excludeChars = "") { | ||
const exclusionRegex = new RegExp(`[${excludeChars}]`, "gm"); | ||
return str.replace(new RegExp(`[^\\u0621-\\u0652${excludeChars}]`, "gm"), ""); | ||
} | ||
static removeArabic(str) { | ||
return str.replace(/[\u0621-\u0652]/gm, ""); | ||
} | ||
static removeTatwel(str) { | ||
return str.replace(_arabicStrings.TATWEEL_REGEX, ""); | ||
} | ||
static sanitize(str) { | ||
str = str.replace(_arabicStrings.ALIF_REGEX, "\u0627").replace(_arabicStrings.YA_REGEX, "\u064A").replace(_arabicStrings.TA_REGEX, "\u0647"); | ||
str = _arabicStrings.removeTatwel(str); | ||
str = _arabicStrings.removeTashkel(str); | ||
return str; | ||
} | ||
}; | ||
var arabicStrings = _arabicStrings; | ||
arabicStrings.TASHKEEL = [ | ||
var src_exports = {}; | ||
__export(src_exports, { | ||
ALIF_REGEX: () => ALIF_REGEX, | ||
TASHKEEL: () => TASHKEEL, | ||
TASHKEEL_REGEX: () => TASHKEEL_REGEX, | ||
TATWEEL_REGEX: () => TATWEEL_REGEX, | ||
TA_REGEX: () => TA_REGEX, | ||
YA_REGEX: () => YA_REGEX, | ||
hasArabic: () => hasArabic, | ||
howArabic: () => howArabic, | ||
howNotArabic: () => howNotArabic, | ||
isArabic: () => isArabic, | ||
removeArabic: () => removeArabic, | ||
removeNonArabic: () => removeNonArabic, | ||
removeTashkel: () => removeTashkel, | ||
removeTatwel: () => removeTatwel, | ||
sanitize: () => sanitize | ||
}); | ||
module.exports = __toCommonJS(src_exports); | ||
var TASHKEEL = [ | ||
"\u0610", | ||
@@ -107,7 +102,61 @@ // ( ؐ) arabic sign sallallahou alayhe wassallam | ||
]; | ||
arabicStrings.ALIF_REGEX = new RegExp("[\u0623\u0625\u0622]", "g"); | ||
arabicStrings.YA_REGEX = new RegExp("[\u0649\u064A]", "g"); | ||
arabicStrings.TA_REGEX = new RegExp("[\u0629]", "g"); | ||
arabicStrings.TASHKEEL_REGEX = new RegExp(_arabicStrings.TASHKEEL.join(""), "g"); | ||
arabicStrings.TATWEEL_REGEX = new RegExp("\u0640", "g"); | ||
module.exports = arabicStrings; | ||
var ALIF_REGEX = new RegExp("[\u0623\u0625\u0622]", "g"); | ||
var YA_REGEX = new RegExp("[\u0649\u064A]", "g"); | ||
var TA_REGEX = new RegExp("[\u0629]", "g"); | ||
var TASHKEEL_REGEX = new RegExp(TASHKEEL.join(""), "g"); | ||
var TATWEEL_REGEX = new RegExp("\u0640", "g"); | ||
function howArabic(str) { | ||
str = str.replace(/[\u0021-\u0040\s]/gm, ""); | ||
const match = str.match(/[\u0621-\u0652]/gm) || []; | ||
const result = match.length / str.length; | ||
return result; | ||
} | ||
function howNotArabic(str) { | ||
str = str.replace(/[\u0021-\u0040\s]/gm, ""); | ||
const match = str.match(/[^\u0621-\u0652]/gm) || []; | ||
const result = match.length / str.length; | ||
return result; | ||
} | ||
function isArabic(str, threshold = 0.79) { | ||
return howArabic(str) >= threshold; | ||
} | ||
function hasArabic(str) { | ||
return /[\u0621-\u064A]/.test(str); | ||
} | ||
function removeTashkel(str) { | ||
return str.replace(TASHKEEL_REGEX, ""); | ||
} | ||
function removeNonArabic(str, excludeChars = "") { | ||
const exclusionRegex = new RegExp(`[${excludeChars}]`, "gm"); | ||
return str.replace(new RegExp(`[^\\u0621-\\u0652${excludeChars}]`, "gm"), ""); | ||
} | ||
function removeArabic(str) { | ||
return str.replace(/[\u0621-\u0652]/gm, ""); | ||
} | ||
function removeTatwel(str) { | ||
return str.replace(TATWEEL_REGEX, ""); | ||
} | ||
function sanitize(str) { | ||
str = str.replace(ALIF_REGEX, "\u0627").replace(YA_REGEX, "\u064A").replace(TA_REGEX, "\u0647"); | ||
str = removeTatwel(str); | ||
str = removeTashkel(str); | ||
return str; | ||
} | ||
// Annotate the CommonJS export names for ESM import in node: | ||
0 && (module.exports = { | ||
ALIF_REGEX, | ||
TASHKEEL, | ||
TASHKEEL_REGEX, | ||
TATWEEL_REGEX, | ||
TA_REGEX, | ||
YA_REGEX, | ||
hasArabic, | ||
howArabic, | ||
howNotArabic, | ||
isArabic, | ||
removeArabic, | ||
removeNonArabic, | ||
removeTashkel, | ||
removeTatwel, | ||
sanitize | ||
}); |
{ | ||
"name": "@6degrees/arabic-strings", | ||
"license": "MIT", | ||
"version": "0.3.0", | ||
"version": "1.0.0", | ||
"main": "dist/index.js", | ||
@@ -6,0 +6,0 @@ "module": "dist/index.mjs", |
180
src/index.ts
@@ -1,101 +0,115 @@ | ||
class arabicStrings { | ||
static TASHKEEL = [ | ||
"\u0610", // ( ؐ) arabic sign sallallahou alayhe wassallam | ||
"\u0611", // ( ؑ) arabic sign alayhe assallam | ||
"\u0612", // ( ؒ) arabic sign rahmatullah alayhe | ||
"\u0613", // ( ؓ) arabic sign radi allahou anhu | ||
"\u0614", // ( ؔ) arabic sign takhallus | ||
"\u0615", // ( ؕ) arabic small high tah | ||
"\u0616", // ( ؖ) arabic small high ligature alef with lam with yeh | ||
"\u0617", // ( ؗ) arabic small high zain | ||
"\u0618", // ( ؘ) arabic small fatha | ||
"\u0619", // ( ؙ) arabic small damma | ||
"\u061a", // ( ؚ) arabic small kasra | ||
"\u064b", // ( ً) arabic fathatan | ||
"\u064c", // ( ٌ) arabic dammatan | ||
"\u064d", // ( ٍ) arabic kasratan | ||
"\u064e", // ( َ) arabic fatha | ||
"\u064f", // ( ُ) arabic damma | ||
"\u0650", // ( ِ) arabic kasra | ||
"\u0651", // ( ّ) arabic shadda | ||
"\u0652", // ( ْ) arabic sukun | ||
"\u0653", // ( ٓ) arabic maddah above | ||
"\u0654", // ( ٔ) arabic hamza above | ||
"\u0655", // ( ٕ) arabic hamza below | ||
"\u0656", // ( ٖ) arabic subscript alef | ||
"\u0657", // ( ٗ) arabic inverted damma | ||
"\u0658", // ( ٘) arabic mark noon ghunna | ||
"\u065a", // ( ٚ) arabic vowel sign small v above | ||
"\u065b", // ( ٛ) arabic vowel sign inverted small v above | ||
"\u065c", // ( ٜ) arabic vowel sign dot below | ||
"\u065d", // ( ٝ) arabic reversed damma | ||
"\u065e", // ( ٞ) arabic fatha with two dots | ||
] | ||
static ALIF_REGEX = new RegExp("[أإآ]", "g"); | ||
static YA_REGEX = new RegExp("[ىي]", "g"); | ||
const TASHKEEL: Array<string> = [ | ||
"\u0610", // ( ؐ) arabic sign sallallahou alayhe wassallam | ||
"\u0611", // ( ؑ) arabic sign alayhe assallam | ||
"\u0612", // ( ؒ) arabic sign rahmatullah alayhe | ||
"\u0613", // ( ؓ) arabic sign radi allahou anhu | ||
"\u0614", // ( ؔ) arabic sign takhallus | ||
"\u0615", // ( ؕ) arabic small high tah | ||
"\u0616", // ( ؖ) arabic small high ligature alef with lam with yeh | ||
"\u0617", // ( ؗ) arabic small high zain | ||
"\u0618", // ( ؘ) arabic small fatha | ||
"\u0619", // ( ؙ) arabic small damma | ||
"\u061a", // ( ؚ) arabic small kasra | ||
"\u064b", // ( ً) arabic fathatan | ||
"\u064c", // ( ٌ) arabic dammatan | ||
"\u064d", // ( ٍ) arabic kasratan | ||
"\u064e", // ( َ) arabic fatha | ||
"\u064f", // ( ُ) arabic damma | ||
"\u0650", // ( ِ) arabic kasra | ||
"\u0651", // ( ّ) arabic shadda | ||
"\u0652", // ( ْ) arabic sukun | ||
"\u0653", // ( ٓ) arabic maddah above | ||
"\u0654", // ( ٔ) arabic hamza above | ||
"\u0655", // ( ٕ) arabic hamza below | ||
"\u0656", // ( ٖ) arabic subscript alef | ||
"\u0657", // ( ٗ) arabic inverted damma | ||
"\u0658", // ( ٘) arabic mark noon ghunna | ||
"\u065a", // ( ٚ) arabic vowel sign small v above | ||
"\u065b", // ( ٛ) arabic vowel sign inverted small v above | ||
"\u065c", // ( ٜ) arabic vowel sign dot below | ||
"\u065d", // ( ٝ) arabic reversed damma | ||
"\u065e", // ( ٞ) arabic fatha with two dots | ||
] | ||
static TA_REGEX = new RegExp("[ة]", "g"); | ||
const ALIF_REGEX = new RegExp("[أإآ]", "g"); | ||
static TASHKEEL_REGEX = new RegExp(arabicStrings.TASHKEEL.join(""), "g"); | ||
const YA_REGEX = new RegExp("[ىي]", "g"); | ||
static TATWEEL_REGEX = new RegExp("\u0640", "g"); // /ـ/g; // /\u0640/g | ||
static howArabic(str: string): number { | ||
// strip punctuation, digits, and spaces | ||
str = str.replace(/[\u0021-\u0040\s]/gm, ""); | ||
const TA_REGEX = new RegExp("[ة]", "g"); | ||
const match = str.match(/[\u0621-\u0652]/gm) || []; | ||
const result = match.length / str.length; | ||
const TASHKEEL_REGEX = new RegExp(TASHKEEL.join(""), "g"); | ||
return result; | ||
} | ||
const TATWEEL_REGEX = new RegExp("\u0640", "g"); // /ـ/g; // /\u0640/g | ||
static howNotArabic(str: string): number { | ||
// strip punctuation, digits, and spaces | ||
str = str.replace(/[\u0021-\u0040\s]/gm, ""); | ||
function howArabic(str: string): number { | ||
// strip punctuation, digits, and spaces | ||
str = str.replace(/[\u0021-\u0040\s]/gm, ""); | ||
const match = str.match(/[^\u0621-\u0652]/gm) || []; | ||
const result = match.length / str.length; | ||
const match = str.match(/[\u0621-\u0652]/gm) || []; | ||
const result = match.length / str.length; | ||
return result; | ||
} | ||
return result; | ||
} | ||
static isArabic(str: string, threshold: number = 0.79): boolean { | ||
return arabicStrings.howArabic(str) >= threshold; | ||
} | ||
function howNotArabic(str: string): number { | ||
// strip punctuation, digits, and spaces | ||
str = str.replace(/[\u0021-\u0040\s]/gm, ""); | ||
static hasArabic(str: string): boolean { | ||
return /[\u0621-\u064A]/.test(str); | ||
} | ||
const match = str.match(/[^\u0621-\u0652]/gm) || []; | ||
const result = match.length / str.length; | ||
static removeTashkel(str: string): string { | ||
return str.replace(arabicStrings.TASHKEEL_REGEX, ''); | ||
} | ||
return result; | ||
} | ||
static removeNonArabic(str: string, excludeChars: string = ""): string { | ||
const exclusionRegex = new RegExp(`[${excludeChars}]`, "gm"); | ||
return str.replace(new RegExp(`[^\\u0621-\\u0652${excludeChars}]`, "gm"), ""); | ||
} | ||
static removeArabic(str: string): string { | ||
return str.replace(/[\u0621-\u0652]/gm, ""); | ||
} | ||
function isArabic(str: string, threshold: number = 0.79): boolean { | ||
return howArabic(str) >= threshold; | ||
} | ||
static removeTatwel (str: string): string { | ||
return str.replace(arabicStrings.TATWEEL_REGEX, ''); | ||
function hasArabic(str: string): boolean { | ||
return /[\u0621-\u064A]/.test(str); | ||
} | ||
} | ||
function removeTashkel(str: string): string { | ||
return str.replace(TASHKEEL_REGEX, ''); | ||
} | ||
static sanitize(str: string): string { | ||
str = str.replace(arabicStrings.ALIF_REGEX, 'ا').replace(arabicStrings.YA_REGEX, 'ي').replace(arabicStrings.TA_REGEX, 'ه'); | ||
str = arabicStrings.removeTatwel(str) | ||
str = arabicStrings.removeTashkel(str) | ||
return str; | ||
} | ||
function removeNonArabic(str: string, excludeChars: string = ""): string { | ||
const exclusionRegex = new RegExp(`[${excludeChars}]`, "gm"); | ||
return str.replace(new RegExp(`[^\\u0621-\\u0652${excludeChars}]`, "gm"), ""); | ||
} | ||
function removeArabic(str: string): string { | ||
return str.replace(/[\u0621-\u0652]/gm, ""); | ||
} | ||
module.exports = arabicStrings; | ||
function removeTatwel (str: string): string { | ||
return str.replace(TATWEEL_REGEX, ''); | ||
} | ||
function sanitize(str: string): string { | ||
str = str.replace(ALIF_REGEX, 'ا').replace(YA_REGEX, 'ي').replace(TA_REGEX, 'ه'); | ||
str = removeTatwel(str) | ||
str = removeTashkel(str) | ||
return str; | ||
} | ||
export { | ||
TASHKEEL, | ||
ALIF_REGEX, | ||
TASHKEEL_REGEX, | ||
TA_REGEX, | ||
YA_REGEX, | ||
TATWEEL_REGEX, | ||
howArabic, | ||
howNotArabic, | ||
isArabic, | ||
hasArabic, | ||
removeTashkel, | ||
removeNonArabic, | ||
removeArabic, | ||
removeTatwel, | ||
sanitize, | ||
}; |
# Todos | ||
- [] Consider borrowing intellegnce from [pyarabic](https://github.com/linuxscout/pyarabic/blob/master/doc/features.md) | ||
- [] Consider borrowing intellegnce from [pyarabic](https://github.com/linuxscout/pyarabic/blob/master/doc/features.md) |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
No v1
QualityPackage is not semver >=1. This means it is not stable and does not support ^ ranges.
Found 1 instance in 1 package
19770
414
0