New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

text-categorizer

Package Overview
Dependencies
Maintainers
0
Versions
6
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

text-categorizer - npm Package Compare versions

Comparing version 1.0.3 to 1.0.4

.github/workflows/github-actions-demo.yml

2

dist/constants.d.ts

@@ -14,2 +14,4 @@ export declare const PATTERNS: {

FILE_PATH: RegExp;
MEASUREMENT: RegExp;
SQL: RegExp;
};

6

dist/constants.js

@@ -8,3 +8,3 @@ export const PATTERNS = {

DATE: /\b(\d{1,4}[-/.]\d{1,2}[-/.]\d{1,4}|\d{1,2}\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\s+\d{1,4})\b/gi,
EQUATION: /[\d\s]*[+\-*/()=]+[\d\s]*/,
EQUATION: /(?:\d+\s*|\(\s*)[+\-*/()=]+\s*(?:\d+\s*|\)) /,
PHONE: /(?:\+\d{1,3}\s?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/g,

@@ -14,3 +14,5 @@ ADDRESS: /\d+\s+([A-Za-z]+(\.?\s|\.)){1,}\s*,?\s*([A-Za-z]+\s*,\s*)?[A-Z]{2}\s*\d{5}(-\d{4})?/g,

PRODUCT_CODE: /^[A-Z0-9]{3,}-[A-Z0-9]{3,}$|^[A-Z]{2,4}\d{4,}$/,
FILE_PATH: /^(?:[a-zA-Z]:\\|\/|\.\/|\.\.\/)(?:[^\\\/:*?"<>|\r\n]+\\)*[^\\\/:*?"<>|\r\n]*$/
FILE_PATH: /^(?:[a-zA-Z]:\\|\/|\.\/|\.\.\/)(?:[^\\\/:*?"<>|\r\n]+\\)*[^\\\/:*?"<>|\r\n]*$/,
MEASUREMENT: /(\d+(\.\d+)?\s*(cm|mm|in|ft|yd|mi|km|g|kg|lb|oz|ml|l|tsp|tbsp|cup|pt|qt|gal|fl oz|in²|ft²|yd²|mi²|km²|ac|ha|sq mi|sq km|°F|°C|K|°|'|"))/gi,
SQL: /\b(SELECT|INSERT|UPDATE|DELETE|FROM|WHERE|JOIN|GROUP BY|ORDER BY|HAVING|CREATE|ALTER|DROP|TABLE|INDEX)\b/i
};

@@ -15,3 +15,3 @@ import type { ContentCategory } from "./types";

private static extractSocialElements;
private static isSearchQuery;
private static isSearchParams;
private static extractDates;

@@ -26,2 +26,3 @@ private static isPhoneNumber;

private static isProductCode;
private static isMeasurement;
private static parseCurrency;

@@ -28,0 +29,0 @@ private static parseCsv;

@@ -7,3 +7,3 @@ import { PATTERNS } from "./constants";

static containsLinks(text) {
return text.match(PATTERNS.URL) || [];
return text.match(PATTERNS.URL) || undefined;
}

@@ -91,9 +91,5 @@ static isList(text) {

}
static isSearchQuery(text) {
const searchPatterns = [
/^(what|how|who|where|when|why)\s.+\??$/i,
/^["'].+["']\s*(site:|filetype:|OR|AND)/i,
/^[^.!?]+\??$/
];
return searchPatterns.some((pattern) => pattern.test(text.trim()));
static isSearchParams(text) {
const searchPattern = /(\?|\&)?[a-zA-Z0-9_]+=[^&]*/;
return searchPattern.test(text.trim());
}

@@ -129,4 +125,3 @@ static extractDates(text) {

static isSql(text) {
const sqlKeywords = /\b(SELECT|INSERT|UPDATE|DELETE|FROM|WHERE|JOIN|GROUP BY|ORDER BY|HAVING|CREATE|ALTER|DROP|TABLE|INDEX)\b/i;
return sqlKeywords.test(text) && text.includes(";");
return PATTERNS.SQL.test(text) && text.includes(";");
}

@@ -142,2 +137,5 @@ static isFilePath(text) {

}
static isMeasurement(text) {
return PATTERNS.MEASUREMENT.test(text);
}
static parseCurrency(text) {

@@ -249,9 +247,2 @@ const match = text.match(PATTERNS.CURRENCY);

}
if (this.isSearchQuery(content)) {
return {
type: "query",
content,
metadata: { confidence: 0.8 }
};
}
const dates = this.extractDates(content);

@@ -289,2 +280,26 @@ if (dates.length > 0) {

}
if (this.isEquation(content)) {
return {
type: "equation",
content,
metadata: { format: "math" }
};
}
if (this.isSearchParams(content)) {
return {
type: "search",
content,
metadata: { links: this.containsLinks(content) }
};
}
if (this.isMeasurement(content)) {
return {
type: "measurement",
content,
metadata: {
amount: parseFloat(content.match(PATTERNS.MEASUREMENT)?.[0] || ""),
unit: content.match(/[a-zA-Z]+/)?.[0] || ""
}
};
}
return { type: "text", content };

@@ -291,0 +306,0 @@ }

export type ContentCategory = {
type: "link" | "code" | "list" | "text" | "email" | "json" | "markdown" | "equation" | "date" | "social" | "query" | "phone" | "address" | "csv" | "xml" | "sql" | "filepath" | "currency" | "productCode";
type: "link" | "code" | "list" | "text" | "email" | "json" | "markdown" | "equation" | "date" | "social" | "search" | "phone" | "address" | "csv" | "xml" | "sql" | "filepath" | "currency" | "productCode" | "measurement";
content: string;

@@ -18,3 +18,4 @@ metadata?: {

structured?: any;
unit?: string;
};
};
{
"name": "text-categorizer",
"version": "1.0.3",
"version": "1.0.4",
"description": "Intelligent text content type detection and classification",

@@ -45,6 +45,9 @@ "main": "dist/index.js",

"devDependencies": {
"@types/jest": "^29.5.14",
"@types/node": "^18.0.0",
"typescript": "^4.9.0",
"eslint": "^8.0.0"
"eslint": "^8.0.0",
"jest": "^29.7.0",
"ts-jest": "^29.2.5",
"typescript": "^4.9.0"
}
}

@@ -183,3 +183,8 @@ # Text Categorizer

- Product codes
- Measurement
## TODO
- Measurements
## Contributing

@@ -186,0 +191,0 @@

@@ -8,3 +8,3 @@ export const PATTERNS = {

DATE: /\b(\d{1,4}[-/.]\d{1,2}[-/.]\d{1,4}|\d{1,2}\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\s+\d{1,4})\b/gi,
EQUATION: /[\d\s]*[+\-*/()=]+[\d\s]*/,
EQUATION: /(?:\d+\s*|\(\s*)[+\-*/()=]+\s*(?:\d+\s*|\)) /, ///[\d\s]*[+\-*/()=]+[\d\s]*/,
PHONE: /(?:\+\d{1,3}\s?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/g,

@@ -16,3 +16,6 @@ ADDRESS:

FILE_PATH:
/^(?:[a-zA-Z]:\\|\/|\.\/|\.\.\/)(?:[^\\\/:*?"<>|\r\n]+\\)*[^\\\/:*?"<>|\r\n]*$/
/^(?:[a-zA-Z]:\\|\/|\.\/|\.\.\/)(?:[^\\\/:*?"<>|\r\n]+\\)*[^\\\/:*?"<>|\r\n]*$/,
MEASUREMENT:
/(\d+(\.\d+)?\s*(cm|mm|in|ft|yd|mi|km|g|kg|lb|oz|ml|l|tsp|tbsp|cup|pt|qt|gal|fl oz|in²|ft²|yd²|mi²|km²|ac|ha|sq mi|sq km|°F|°C|K|°|'|"))/gi,
SQL: /\b(SELECT|INSERT|UPDATE|DELETE|FROM|WHERE|JOIN|GROUP BY|ORDER BY|HAVING|CREATE|ALTER|DROP|TABLE|INDEX)\b/i
};

@@ -11,4 +11,4 @@ import { PATTERNS } from "./constants";

private static containsLinks(text: string): string[] {
return text.match(PATTERNS.URL) || [];
private static containsLinks(text: string): string[] | undefined {
return text.match(PATTERNS.URL) || undefined;
}

@@ -116,10 +116,6 @@

private static isSearchQuery(text: string): boolean {
const searchPatterns = [
/^(what|how|who|where|when|why)\s.+\??$/i,
/^["'].+["']\s*(site:|filetype:|OR|AND)/i,
/^[^.!?]+\??$/
];
private static isSearchParams(text: string): boolean {
const searchPattern = /(\?|\&)?[a-zA-Z0-9_]+=[^&]*/;
return searchPatterns.some((pattern) => pattern.test(text.trim()));
return searchPattern.test(text.trim());
}

@@ -167,5 +163,3 @@

private static isSql(text: string): boolean {
const sqlKeywords =
/\b(SELECT|INSERT|UPDATE|DELETE|FROM|WHERE|JOIN|GROUP BY|ORDER BY|HAVING|CREATE|ALTER|DROP|TABLE|INDEX)\b/i;
return sqlKeywords.test(text) && text.includes(";");
return PATTERNS.SQL.test(text) && text.includes(";");
}

@@ -185,2 +179,6 @@

private static isMeasurement(text: string): boolean {
return PATTERNS.MEASUREMENT.test(text);
}
private static parseCurrency(text: string): {

@@ -313,10 +311,2 @@ amount: number;

if (this.isSearchQuery(content)) {
return {
type: "query",
content,
metadata: { confidence: 0.8 }
};
}
const dates = this.extractDates(content);

@@ -358,2 +348,29 @@ if (dates.length > 0) {

if (this.isEquation(content)) {
return {
type: "equation",
content,
metadata: { format: "math" }
};
}
if (this.isSearchParams(content)) {
return {
type: "search",
content,
metadata: { links: this.containsLinks(content) }
};
}
if (this.isMeasurement(content)) {
return {
type: "measurement",
content,
metadata: {
amount: parseFloat(content.match(PATTERNS.MEASUREMENT)?.[0] || ""),
unit: content.match(/[a-zA-Z]+/)?.[0] || ""
}
};
}
return { type: "text", content };

@@ -360,0 +377,0 @@ }

@@ -13,3 +13,3 @@ export type ContentCategory = {

| "social"
| "query"
| "search"
| "phone"

@@ -22,3 +22,4 @@ | "address"

| "currency"
| "productCode";
| "productCode"
| "measurement";
content: string;

@@ -39,3 +40,4 @@ metadata?: {

structured?: any;
unit?: string;
};
};
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc