Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

@6degrees/arabic-strings

Package Overview
Dependencies
Maintainers
1
Versions
9
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@6degrees/arabic-strings - npm Package Compare versions

Comparing version 0.3.0 to 1.0.0

6

CHANGELOG.md
# @6degrees/arabic-strings
## 1.0.0
### Major Changes
- Flat structure
## 0.3.0

@@ -4,0 +10,0 @@

17

dist/index.d.ts

@@ -0,2 +1,17 @@

declare const TASHKEEL: Array<string>;
declare const ALIF_REGEX: RegExp;
declare const YA_REGEX: RegExp;
declare const TA_REGEX: RegExp;
declare const TASHKEEL_REGEX: RegExp;
declare const TATWEEL_REGEX: RegExp;
declare function howArabic(str: string): number;
declare function howNotArabic(str: string): number;
declare function isArabic(str: string, threshold?: number): boolean;
declare function hasArabic(str: string): boolean;
declare function removeTashkel(str: string): string;
declare function removeNonArabic(str: string, excludeChars?: string): string;
declare function removeArabic(str: string): string;
declare function removeTatwel(str: string): string;
declare function sanitize(str: string): string;
export { }
export { ALIF_REGEX, TASHKEEL, TASHKEEL_REGEX, TATWEEL_REGEX, TA_REGEX, YA_REGEX, hasArabic, howArabic, howNotArabic, isArabic, removeArabic, removeNonArabic, removeTashkel, removeTatwel, sanitize };

145

dist/index.js
"use strict";
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
// src/index.ts
var _arabicStrings = class {
// /ـ/g; // /\u0640/g
static howArabic(str) {
str = str.replace(/[\u0021-\u0040\s]/gm, "");
const match = str.match(/[\u0621-\u0652]/gm) || [];
const result = match.length / str.length;
return result;
}
static howNotArabic(str) {
str = str.replace(/[\u0021-\u0040\s]/gm, "");
const match = str.match(/[^\u0621-\u0652]/gm) || [];
const result = match.length / str.length;
return result;
}
static isArabic(str, threshold = 0.79) {
return _arabicStrings.howArabic(str) >= threshold;
}
static hasArabic(str) {
return /[\u0621-\u064A]/.test(str);
}
static removeTashkel(str) {
return str.replace(_arabicStrings.TASHKEEL_REGEX, "");
}
static removeNonArabic(str, excludeChars = "") {
const exclusionRegex = new RegExp(`[${excludeChars}]`, "gm");
return str.replace(new RegExp(`[^\\u0621-\\u0652${excludeChars}]`, "gm"), "");
}
static removeArabic(str) {
return str.replace(/[\u0621-\u0652]/gm, "");
}
static removeTatwel(str) {
return str.replace(_arabicStrings.TATWEEL_REGEX, "");
}
static sanitize(str) {
str = str.replace(_arabicStrings.ALIF_REGEX, "\u0627").replace(_arabicStrings.YA_REGEX, "\u064A").replace(_arabicStrings.TA_REGEX, "\u0647");
str = _arabicStrings.removeTatwel(str);
str = _arabicStrings.removeTashkel(str);
return str;
}
};
var arabicStrings = _arabicStrings;
arabicStrings.TASHKEEL = [
var src_exports = {};
__export(src_exports, {
ALIF_REGEX: () => ALIF_REGEX,
TASHKEEL: () => TASHKEEL,
TASHKEEL_REGEX: () => TASHKEEL_REGEX,
TATWEEL_REGEX: () => TATWEEL_REGEX,
TA_REGEX: () => TA_REGEX,
YA_REGEX: () => YA_REGEX,
hasArabic: () => hasArabic,
howArabic: () => howArabic,
howNotArabic: () => howNotArabic,
isArabic: () => isArabic,
removeArabic: () => removeArabic,
removeNonArabic: () => removeNonArabic,
removeTashkel: () => removeTashkel,
removeTatwel: () => removeTatwel,
sanitize: () => sanitize
});
module.exports = __toCommonJS(src_exports);
var TASHKEEL = [
"\u0610",

@@ -107,7 +102,61 @@ // ( ؐ) arabic sign sallallahou alayhe wassallam

];
arabicStrings.ALIF_REGEX = new RegExp("[\u0623\u0625\u0622]", "g");
arabicStrings.YA_REGEX = new RegExp("[\u0649\u064A]", "g");
arabicStrings.TA_REGEX = new RegExp("[\u0629]", "g");
arabicStrings.TASHKEEL_REGEX = new RegExp(_arabicStrings.TASHKEEL.join(""), "g");
arabicStrings.TATWEEL_REGEX = new RegExp("\u0640", "g");
module.exports = arabicStrings;
var ALIF_REGEX = new RegExp("[\u0623\u0625\u0622]", "g");
var YA_REGEX = new RegExp("[\u0649\u064A]", "g");
var TA_REGEX = new RegExp("[\u0629]", "g");
var TASHKEEL_REGEX = new RegExp(TASHKEEL.join(""), "g");
var TATWEEL_REGEX = new RegExp("\u0640", "g");
function howArabic(str) {
str = str.replace(/[\u0021-\u0040\s]/gm, "");
const match = str.match(/[\u0621-\u0652]/gm) || [];
const result = match.length / str.length;
return result;
}
function howNotArabic(str) {
str = str.replace(/[\u0021-\u0040\s]/gm, "");
const match = str.match(/[^\u0621-\u0652]/gm) || [];
const result = match.length / str.length;
return result;
}
function isArabic(str, threshold = 0.79) {
return howArabic(str) >= threshold;
}
function hasArabic(str) {
return /[\u0621-\u064A]/.test(str);
}
function removeTashkel(str) {
return str.replace(TASHKEEL_REGEX, "");
}
function removeNonArabic(str, excludeChars = "") {
const exclusionRegex = new RegExp(`[${excludeChars}]`, "gm");
return str.replace(new RegExp(`[^\\u0621-\\u0652${excludeChars}]`, "gm"), "");
}
function removeArabic(str) {
return str.replace(/[\u0621-\u0652]/gm, "");
}
function removeTatwel(str) {
return str.replace(TATWEEL_REGEX, "");
}
function sanitize(str) {
str = str.replace(ALIF_REGEX, "\u0627").replace(YA_REGEX, "\u064A").replace(TA_REGEX, "\u0647");
str = removeTatwel(str);
str = removeTashkel(str);
return str;
}
// Annotate the CommonJS export names for ESM import in node:
0 && (module.exports = {
ALIF_REGEX,
TASHKEEL,
TASHKEEL_REGEX,
TATWEEL_REGEX,
TA_REGEX,
YA_REGEX,
hasArabic,
howArabic,
howNotArabic,
isArabic,
removeArabic,
removeNonArabic,
removeTashkel,
removeTatwel,
sanitize
});
{
"name": "@6degrees/arabic-strings",
"license": "MIT",
"version": "0.3.0",
"version": "1.0.0",
"main": "dist/index.js",

@@ -6,0 +6,0 @@ "module": "dist/index.mjs",

@@ -1,101 +0,115 @@

class arabicStrings {
static TASHKEEL = [
"\u0610", // ( ؐ) arabic sign sallallahou alayhe wassallam
"\u0611", // ( ؑ) arabic sign alayhe assallam
"\u0612", // ( ؒ) arabic sign rahmatullah alayhe
"\u0613", // ( ؓ) arabic sign radi allahou anhu
"\u0614", // ( ؔ) arabic sign takhallus
"\u0615", // ( ؕ) arabic small high tah
"\u0616", // ( ؖ) arabic small high ligature alef with lam with yeh
"\u0617", // ( ؗ) arabic small high zain
"\u0618", // ( ؘ) arabic small fatha
"\u0619", // ( ؙ) arabic small damma
"\u061a", // ( ؚ) arabic small kasra
"\u064b", // ( ً) arabic fathatan
"\u064c", // ( ٌ) arabic dammatan
"\u064d", // ( ٍ) arabic kasratan
"\u064e", // ( َ) arabic fatha
"\u064f", // ( ُ) arabic damma
"\u0650", // ( ِ) arabic kasra
"\u0651", // ( ّ) arabic shadda
"\u0652", // ( ْ) arabic sukun
"\u0653", // ( ٓ) arabic maddah above
"\u0654", // ( ٔ) arabic hamza above
"\u0655", // ( ٕ) arabic hamza below
"\u0656", // ( ٖ) arabic subscript alef
"\u0657", // ( ٗ) arabic inverted damma
"\u0658", // ( ٘) arabic mark noon ghunna
"\u065a", // ( ٚ) arabic vowel sign small v above
"\u065b", // ( ٛ) arabic vowel sign inverted small v above
"\u065c", // ( ٜ) arabic vowel sign dot below
"\u065d", // ( ٝ) arabic reversed damma
"\u065e", // ( ٞ) arabic fatha with two dots
]
static ALIF_REGEX = new RegExp("[أإآ]", "g");
static YA_REGEX = new RegExp("[ىي]", "g");
const TASHKEEL: Array<string> = [
"\u0610", // ( ؐ) arabic sign sallallahou alayhe wassallam
"\u0611", // ( ؑ) arabic sign alayhe assallam
"\u0612", // ( ؒ) arabic sign rahmatullah alayhe
"\u0613", // ( ؓ) arabic sign radi allahou anhu
"\u0614", // ( ؔ) arabic sign takhallus
"\u0615", // ( ؕ) arabic small high tah
"\u0616", // ( ؖ) arabic small high ligature alef with lam with yeh
"\u0617", // ( ؗ) arabic small high zain
"\u0618", // ( ؘ) arabic small fatha
"\u0619", // ( ؙ) arabic small damma
"\u061a", // ( ؚ) arabic small kasra
"\u064b", // ( ً) arabic fathatan
"\u064c", // ( ٌ) arabic dammatan
"\u064d", // ( ٍ) arabic kasratan
"\u064e", // ( َ) arabic fatha
"\u064f", // ( ُ) arabic damma
"\u0650", // ( ِ) arabic kasra
"\u0651", // ( ّ) arabic shadda
"\u0652", // ( ْ) arabic sukun
"\u0653", // ( ٓ) arabic maddah above
"\u0654", // ( ٔ) arabic hamza above
"\u0655", // ( ٕ) arabic hamza below
"\u0656", // ( ٖ) arabic subscript alef
"\u0657", // ( ٗ) arabic inverted damma
"\u0658", // ( ٘) arabic mark noon ghunna
"\u065a", // ( ٚ) arabic vowel sign small v above
"\u065b", // ( ٛ) arabic vowel sign inverted small v above
"\u065c", // ( ٜ) arabic vowel sign dot below
"\u065d", // ( ٝ) arabic reversed damma
"\u065e", // ( ٞ) arabic fatha with two dots
]
static TA_REGEX = new RegExp("[ة]", "g");
const ALIF_REGEX = new RegExp("[أإآ]", "g");
static TASHKEEL_REGEX = new RegExp(arabicStrings.TASHKEEL.join(""), "g");
const YA_REGEX = new RegExp("[ىي]", "g");
static TATWEEL_REGEX = new RegExp("\u0640", "g"); // /ـ/g; // /\u0640/g
static howArabic(str: string): number {
// strip punctuation, digits, and spaces
str = str.replace(/[\u0021-\u0040\s]/gm, "");
const TA_REGEX = new RegExp("[ة]", "g");
const match = str.match(/[\u0621-\u0652]/gm) || [];
const result = match.length / str.length;
const TASHKEEL_REGEX = new RegExp(TASHKEEL.join(""), "g");
return result;
}
const TATWEEL_REGEX = new RegExp("\u0640", "g"); // /ـ/g; // /\u0640/g
static howNotArabic(str: string): number {
// strip punctuation, digits, and spaces
str = str.replace(/[\u0021-\u0040\s]/gm, "");
function howArabic(str: string): number {
// strip punctuation, digits, and spaces
str = str.replace(/[\u0021-\u0040\s]/gm, "");
const match = str.match(/[^\u0621-\u0652]/gm) || [];
const result = match.length / str.length;
const match = str.match(/[\u0621-\u0652]/gm) || [];
const result = match.length / str.length;
return result;
}
return result;
}
static isArabic(str: string, threshold: number = 0.79): boolean {
return arabicStrings.howArabic(str) >= threshold;
}
function howNotArabic(str: string): number {
// strip punctuation, digits, and spaces
str = str.replace(/[\u0021-\u0040\s]/gm, "");
static hasArabic(str: string): boolean {
return /[\u0621-\u064A]/.test(str);
}
const match = str.match(/[^\u0621-\u0652]/gm) || [];
const result = match.length / str.length;
static removeTashkel(str: string): string {
return str.replace(arabicStrings.TASHKEEL_REGEX, '');
}
return result;
}
static removeNonArabic(str: string, excludeChars: string = ""): string {
const exclusionRegex = new RegExp(`[${excludeChars}]`, "gm");
return str.replace(new RegExp(`[^\\u0621-\\u0652${excludeChars}]`, "gm"), "");
}
static removeArabic(str: string): string {
return str.replace(/[\u0621-\u0652]/gm, "");
}
function isArabic(str: string, threshold: number = 0.79): boolean {
return howArabic(str) >= threshold;
}
static removeTatwel (str: string): string {
return str.replace(arabicStrings.TATWEEL_REGEX, '');
function hasArabic(str: string): boolean {
return /[\u0621-\u064A]/.test(str);
}
}
function removeTashkel(str: string): string {
return str.replace(TASHKEEL_REGEX, '');
}
static sanitize(str: string): string {
str = str.replace(arabicStrings.ALIF_REGEX, 'ا').replace(arabicStrings.YA_REGEX, 'ي').replace(arabicStrings.TA_REGEX, 'ه');
str = arabicStrings.removeTatwel(str)
str = arabicStrings.removeTashkel(str)
return str;
}
function removeNonArabic(str: string, excludeChars: string = ""): string {
const exclusionRegex = new RegExp(`[${excludeChars}]`, "gm");
return str.replace(new RegExp(`[^\\u0621-\\u0652${excludeChars}]`, "gm"), "");
}
function removeArabic(str: string): string {
return str.replace(/[\u0621-\u0652]/gm, "");
}
module.exports = arabicStrings;
function removeTatwel (str: string): string {
return str.replace(TATWEEL_REGEX, '');
}
function sanitize(str: string): string {
str = str.replace(ALIF_REGEX, 'ا').replace(YA_REGEX, 'ي').replace(TA_REGEX, 'ه');
str = removeTatwel(str)
str = removeTashkel(str)
return str;
}
export {
TASHKEEL,
ALIF_REGEX,
TASHKEEL_REGEX,
TA_REGEX,
YA_REGEX,
TATWEEL_REGEX,
howArabic,
howNotArabic,
isArabic,
hasArabic,
removeTashkel,
removeNonArabic,
removeArabic,
removeTatwel,
sanitize,
};
# Todos
- [] Consider borrowing intellegnce from [pyarabic](https://github.com/linuxscout/pyarabic/blob/master/doc/features.md)
- [] Consider borrowing intellegnce from [pyarabic](https://github.com/linuxscout/pyarabic/blob/master/doc/features.md)

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc