🚀 Socket Launch Week Day 5:Introducing Repository Access Permissions and Custom Roles.Learn more
Sign In

predict-data-types

Package Overview
Dependencies
Maintainers
1
Versions
10
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

predict-data-types - npm Package Compare versions

Comparing version
1.7.0
to
1.7.1
+11
-3
index.d.ts

@@ -20,5 +20,9 @@ // Type definitions for predict-data-types

| 'ip'
| 'macaddress'
| 'color'
| 'percentage'
| 'currency';
| 'currency'
| 'mention'
| 'cron'
| 'hashtag';

@@ -41,5 +45,9 @@ /**

readonly IP: 'ip';
readonly MACADDRESS: 'macaddress';
readonly COLOR: 'color';
readonly PERCENTAGE: 'percentage';
readonly CURRENCY: 'currency';
readonly MENTION: 'mention';
readonly CRON: 'cron';
readonly HASHTAG: 'hashtag';
};

@@ -179,3 +187,3 @@

*/
const DataTypes: typeof import('.').DataTypes;
export { DataTypes };

@@ -185,5 +193,5 @@ /**

*/
const Formats: typeof import('.').Formats;
export { Formats };
}
export = predictDataTypes;
+199
-13

@@ -18,5 +18,9 @@ /**

IP: 'ip',
MACADDRESS: 'macaddress',
COLOR: 'color',
PERCENTAGE: 'percentage',
CURRENCY: 'currency'
CURRENCY: 'currency',
MENTION: 'mention',
CRON: 'cron',
HASHTAG: 'hashtag'
};

@@ -44,3 +48,6 @@

PERCENTAGE: /^-?\d+(?:\.\d+)?%$/,
CURRENCY: /^[$€£¥₹][\d,]+(?:\.\d{1,2})?$|^[\d,]+(?:\.\d{1,2})?[$€£¥₹]$/
CURRENCY: /^[$€£¥₹][\d,]+(?:\.\d{1,2})?$|^[\d,]+(?:\.\d{1,2})?[$€£¥₹]$/,
MENTION: /^@[A-Za-z0-9][A-Za-z0-9_-]*$/,
MAC_ADDRESS: /^(?:[0-9a-fA-F]{2}[:-]){5}[0-9a-fA-F]{2}$/,
HASHTAG: /^#[A-Za-z][A-Za-z0-9_]*$/
};

@@ -328,2 +335,13 @@

/**
* Checks if a given value is a social media mention (e.g., @username)
* Allows letters, numbers, underscores, and hyphens after the @,
* must start with a letter or number.
* @param {string} value - The value to check
* @returns {boolean} True if the value is a mention, false otherwise
*/
function isMention(value) {
return PATTERNS.MENTION.test(value);
}
/**
* Checks if a given value is a valid IP address (IPv4 or IPv6)

@@ -338,2 +356,11 @@ * @param {string} value - The value to check

/**
* Checks if a given value is a valid MAC address
* @param {string} value - The value to check
* @returns {boolean} True if the value is a valid MAC address, false otherwise
*/
function isMACAddress(value) {
return PATTERNS.MAC_ADDRESS.test(value);
}
/**
* Checks if a given value is a valid hex color code

@@ -364,4 +391,138 @@ * @param {string} value - The value to check

}
function isHashtag(value, options = {}) {
if (!value.startsWith('#')) return false;
// If preferring hashtags over 3-char hex, check hashtag pattern first
if (options.preferHashtagOver3CharHex && value.length === 4) {
// Check if it matches hashtag pattern before checking hex
if (PATTERNS.HASHTAG.test(value)) {
return true;
}
}
// Reject hex colors (valid ones)
if (isHexColor(value)) return false;
// Reject hex-like patterns (invalid hex but looks like hex format)
// E.g., #GGGGGG (6 chars, all letters) or #GGG (3 chars, all letters)
// These should be treated as invalid strings, not hashtags
if ((value.length === 4 || value.length === 7)) {
// Check if it's all hex-like characters (letters and numbers only)
const withoutHash = value.slice(1);
const isHexLike = /^[A-Fa-f0-9]+$/.test(withoutHash);
if (isHexLike) {
// It's already handled by isHexColor above, so this won't be reached
// But if somehow a malformed hex gets here, reject it
return false;
}
// If it has non-hex characters at hex length, could be malformed hex
// Check if it looks like someone tried to write hex (all caps letters)
if (/^[A-Z]+$/.test(withoutHash)) {
return false; // Looks like failed hex attempt
}
}
// Test against hashtag pattern
return PATTERNS.HASHTAG.test(value);
}
/**
* Checks if a given value is a valid cron expression
* @param {string} value - The value to check
* @returns {boolean} True if the value is a valid cron expression, false otherwise
*/
function isCron(value) {
const trimmedValue = value.trim();
const fields = trimmedValue.split(/\s+/);
// Must have exactly 5 fields
if (fields.length !== 5) {
return false;
}
// Field ranges: minute(0-59), hour(0-23), day(1-31), month(1-12), weekday(0-7)
const ranges = [
{ min: 0, max: 59 }, // minute
{ min: 0, max: 23 }, // hour
{ min: 1, max: 31 }, // day
{ min: 1, max: 12 }, // month
{ min: 0, max: 7 } // weekday (0 and 7 are Sunday)
];
for (let i = 0; i < fields.length; i++) {
const field = fields[i];
if (!isValidCronField(field, ranges[i])) {
return false;
}
}
return true;
}
/**
* Validates a single cron field
* @param {string} field - The field to validate
* @param {Object} range - The valid range {min, max}
* @returns {boolean} True if valid, false otherwise
*/
function isValidCronField(field, range) {
if (field === '*') {
return true;
}
// Handle step values like */5 or 1-5/2
const stepParts = field.split('/');
if (stepParts.length > 2) {
return false;
}
const baseField = stepParts[0];
const step = stepParts[1];
// Validate step if present
if (step !== undefined) {
const stepNum = parseInt(step, 10);
if (isNaN(stepNum) || stepNum < 1) {
return false;
}
}
// Handle ranges and lists
const parts = baseField.split(',');
for (const part of parts) {
if (!isValidCronPart(part, range)) {
return false;
}
}
return true;
}
/**
* Validates a single part of a cron field (before comma split)
* @param {string} part - The part to validate
* @param {Object} range - The valid range {min, max}
* @returns {boolean} True if valid, false otherwise
*/
function isValidCronPart(part, range) {
if (part === '*') {
return true;
}
// Handle ranges like 1-5
const rangeParts = part.split('-');
if (rangeParts.length === 1) {
// Single number
const num = parseInt(part, 10);
return !isNaN(num) && num >= range.min && num <= range.max;
} else if (rangeParts.length === 2) {
const start = parseInt(rangeParts[0], 10);
const end = parseInt(rangeParts[1], 10);
return !isNaN(start) && !isNaN(end) && start >= range.min && end <= range.max && start <= end;
}
return false;
}
/**
* Tokenizes a string by splitting on commas while respecting quoted strings and nested objects/arrays

@@ -462,5 +623,7 @@ * Optimized version with improved performance for large inputs

* @param {string} value - The value to analyze
* @param {Object} [options={}] - Detection options
* @param {boolean} [options.preferHashtagOver3CharHex=false] - Prefer hashtags over 3-char hex colors for ambiguous values like #dev, #bad
* @returns {string} The detected data type
*/
function detectFieldType(value) {
function detectFieldType(value, options = {}) {
const trimmedValue = value.trim();

@@ -485,2 +648,4 @@

return 'ip';
} else if (isMACAddress(trimmedValue)) {
return 'macaddress';
} else if (isPhoneNumber(trimmedValue)) {

@@ -490,4 +655,13 @@ return 'phone';

return 'email';
} else if (isMention(trimmedValue)) {
return 'mention';
} else if (options.preferHashtagOver3CharHex && trimmedValue.length === 4 && isHashtag(trimmedValue, options)) {
// When preferring hashtags, check 3-char values as hashtags first
return 'hashtag';
} else if (isHexColor(trimmedValue)) {
return 'color';
} else if (isHashtag(trimmedValue, options)) {
return 'hashtag';
} else if (isCron(trimmedValue)) {
return 'cron';
} else if (trimmedValue.startsWith('[') && trimmedValue.endsWith(']')) {

@@ -599,2 +773,3 @@ return 'array';

'currency': 'string',
'hashtag': 'string',
'array': 'array',

@@ -636,2 +811,6 @@ 'object': 'object'

properties[fieldName].pattern = '^[$€£¥₹][\\d,]+(?:\\.\\d{1,2})?$|^[\\d,]+(?:\\.\\d{1,2})?[$€£¥₹]$';
} else if (dataType === 'mention') {
properties[fieldName].pattern = '^@[A-Za-z0-9][A-Za-z0-9_-]*$';
} else if (dataType === 'hashtag') {
properties[fieldName].pattern = '^#[A-Za-z][A-Za-z0-9_]*$';
}

@@ -653,2 +832,4 @@

* @param {string} [format=Formats.NONE] - Output format: Formats.NONE (default) or Formats.JSONSCHEMA
* @param {Object} [options={}] - Detection options
* @param {boolean} [options.preferHashtagOver3CharHex=false] - Prefer hashtags over 3-char hex colors for ambiguous values like #dev, #bad
* @returns {string|Object} DataType for primitives, or schema object for objects

@@ -661,4 +842,6 @@ * @example

* infer([{ name: "Alice" }, { name: "Bob" }]) // → { name: 'string' }
* infer("#dev") // → 'color' (default: 3-char hex takes priority)
* infer("#dev", Formats.NONE, { preferHashtagOver3CharHex: true }) // → 'hashtag'
*/
function infer(input, format = Formats.NONE) {
function infer(input, format = Formats.NONE, options = {}) {
if (input === null || input === undefined) {

@@ -670,3 +853,3 @@ throw new Error('Input cannot be null or undefined');

if (typeof input === 'string') {
return detectFieldType(input);
return detectFieldType(input, options);
}

@@ -684,3 +867,3 @@

// Array of objects - infer schema
const schema = inferSchemaFromObjects(input);
const schema = inferSchemaFromObjects(input, options);

@@ -696,3 +879,3 @@ // Convert to JSON Schema if requested

// Array of primitive values - find common type
const types = input.map(val => detectFieldType(String(val)));
const types = input.map(val => detectFieldType(String(val), options));
const typeCounts = {};

@@ -704,4 +887,4 @@ types.forEach(type => {

const typePriority = [
'uuid', 'email', 'phone', 'url', 'ip', 'color',
'currency', 'percentage', 'date', 'boolean',
'uuid', 'email', 'phone', 'url', 'ip', 'macaddress', 'mention', 'color', 'hashtag',
'currency', 'percentage', 'date', 'cron', 'boolean',
'number', 'array', 'object', 'string'

@@ -737,5 +920,6 @@ ];

* @param {Array<Object>} rows - Array of objects to analyze
* @param {Object} [options={}] - Detection options
* @returns {Object} Schema with field names as keys and types as values
*/
function inferSchemaFromObjects(rows) {
function inferSchemaFromObjects(rows, options = {}) {
if (!rows.every(row => row !== null && typeof row === 'object' && !Array.isArray(row))) {

@@ -768,3 +952,3 @@ throw new Error('All items must be objects');

const stringValues = values.map(val => String(val));
const types = stringValues.map(val => detectFieldType(val));
const types = stringValues.map(val => detectFieldType(val, options));
const typeCounts = {};

@@ -776,4 +960,4 @@ types.forEach(type => {

const typePriority = [
'uuid', 'email', 'phone', 'url', 'ip', 'color',
'currency', 'percentage', 'date', 'boolean',
'uuid', 'email', 'phone', 'url', 'ip', 'macaddress', 'mention', 'color', 'hashtag',
'currency', 'percentage', 'date', 'cron', 'boolean',
'number', 'array', 'object', 'string'

@@ -783,2 +967,3 @@ ];

let finalType = 'string';
for (const priorityType of typePriority) {

@@ -791,2 +976,3 @@ if (typeCounts[priorityType] === types.length) {

schema[fieldName] = finalType;

@@ -793,0 +979,0 @@ });

{
"name": "predict-data-types",
"version": "1.7.0",
"description": "A lightweight, zero-dependency npm package that predicts data types for comma-separated values, including JSON objects, and validates URLs, phone numbers, email addresses, IP addresses, colors, percentages, and currency within string values.",
"main": "index.js",
"author": "Melih Birim",
"types": "index.d.ts",
"repository": {
"type": "git",
"url": "git+https://github.com/melihbirim/predict-data-types.git"
},
"bugs": {
"url": "https://github.com/melihbirim/predict-data-types/issues"
},
"homepage": "https://github.com/melihbirim/predict-data-types#readme",
"engines": {
"node": ">=16.0.0"
},
"files": [
"index.js",
"index.d.ts",
"README.md",
"LICENSE"
],
"scripts": {
"test": "mocha test",
"test:coverage": "npx c8 mocha test",
"lint": "eslint .",
"lint:fix": "eslint . --fix"
},
"keywords": [
"predict",
"data",
"types",
"json",
"url",
"phone",
"email",
"geolocation",
"ip",
"color",
"percentage",
"currency",
"validation"
],
"license": "MIT",
"dependencies": {},
"devDependencies": {
"@types/node": "^24.10.4",
"chai": "^4.5.0",
"eslint": "^9.39.2",
"mocha": "^10.8.2",
"typescript": "^5.9.3"
}
"name": "predict-data-types",
"version": "1.7.1",
"description": "A lightweight, zero-dependency npm package that predicts data types for comma-separated values, including JSON objects, and validates URLs, phone numbers, email addresses, IP addresses, colors, percentages, and currency within string values.",
"main": "index.js",
"author": "Melih Birim",
"types": "index.d.ts",
"repository": {
"type": "git",
"url": "git+https://github.com/melihbirim/predict-data-types.git"
},
"bugs": {
"url": "https://github.com/melihbirim/predict-data-types/issues"
},
"homepage": "https://github.com/melihbirim/predict-data-types#readme",
"engines": {
"node": ">=16.0.0"
},
"files": [
"index.js",
"index.d.ts",
"README.md",
"LICENSE"
],
"scripts": {
"test": "mocha test",
"test:coverage": "npx c8 mocha test",
"lint": "eslint .",
"lint:fix": "eslint . --fix",
"prepare": "husky"
},
"lint-staged": {
"*.js": [
"eslint --fix"
]
},
"keywords": [
"predict",
"data",
"types",
"json",
"url",
"phone",
"email",
"geolocation",
"ip",
"color",
"percentage",
"currency",
"validation"
],
"license": "MIT",
"devDependencies": {
"@types/node": "^24.10.4",
"chai": "^4.5.0",
"eslint": "^9.39.2",
"husky": "^9.1.7",
"lint-staged": "^16.2.7",
"mocha": "^10.8.2",
"typescript": "^5.9.3"
}
}

@@ -47,3 +47,3 @@ # Predict Data Types

Zero-dependency package for automatic data type detection from strings, arrays, and JSON objects. Detects 14+ data types including primitives, emails, URLs, UUIDs, dates, IPs, colors, percentages, and currency.
Zero-dependency package for automatic data type detection from strings, arrays, and JSON objects. Detects 18+ data types including primitives, emails, URLs, UUIDs, dates, IPs, colors, percentages, hashtags, mentions, and currency.

@@ -55,3 +55,3 @@ > **💡 Important:** This library performs **runtime type detection** on string values, not static type checking. TypeScript is a compile-time type system for your code structure - this library analyzes actual data content at runtime. They solve completely different problems!

- **Smart Type Inference**: One `infer()` function handles strings, arrays, objects, and arrays of objects
- **14 Data Types**: Primitives plus emails, URLs, UUIDs, dates, IPs, colors, percentages, currency
- **18 Data Types**: Primitives plus emails, URLs, UUIDs, dates, IPs, colors, percentages, currency, hashtags, MAC addresses, mentions, CRON, and hashes
- **JSON Schema Generation**: Automatically generate JSON Schema from objects (compatible with Ajv, etc.)

@@ -63,3 +63,3 @@ - **Type Constants**: Use `DataTypes` for type-safe comparisons instead of string literals

- **45+ Date Formats**: Comprehensive date parsing including month names and timezones
- **Battle-Tested**: 61 comprehensive test cases
- **Battle-Tested**: 75+ comprehensive test cases

@@ -127,2 +127,4 @@ ## Installation

| `currency` | `$100`, `€50.99` |
| `hashtag` | `#hello`, `#OpenSource`, `#dev_community` |
| `mention` | `@username`, `@user_name123`, `@john-doe` |
| `array` | `[1, 2, 3]` |

@@ -162,5 +164,8 @@ | `object` | `{"name": "John"}` |

DataTypes.IP // 'ip'
DataTypes.MACADDRESS // 'macaddress'
DataTypes.COLOR // 'color'
DataTypes.PERCENTAGE // 'percentage'
DataTypes.CURRENCY // 'currency'
DataTypes.MENTION // 'mention'
DataTypes.HASHTAG // 'hashtag'
```

@@ -196,4 +201,11 @@

infer("test@example.com"); // → 'email'
infer("@username"); // → 'mention'
infer("42"); // → 'number'
infer("#OpenSource"); // → 'hashtag'
infer(["#dev", "#opensource", "#community"]); // → 'hashtag'
// Ambiguous 3-char values (can be hex color or hashtag)
infer("#bad"); // → 'color' (default: hex takes priority)
infer("#bad", "none", { preferHashtagOver3CharHex: true }); // → 'hashtag'
// Array of values → Common DataType

@@ -216,5 +228,7 @@ infer(["1", "2", "3"]); // → 'number'

]);
// → { name: 'string', age: 'number', email: 'email' }
```
### JSON Schema Format

@@ -263,2 +277,4 @@

});
```

@@ -302,4 +318,2 @@

### Complex Data
- ✅ Sample data files

@@ -330,3 +344,3 @@

### `infer(input, format?)`
### `infer(input, format?, options?)`

@@ -339,2 +353,4 @@ **The main function - handles any input type:**

- `format` (optional): Output format - `Formats.NONE` (default) or `Formats.JSONSCHEMA`
- `options` (optional): Configuration options
- `preferHashtagOver3CharHex` (boolean, default: false): When true, treats ambiguous 3-character values like `#bad`, `#ace` as hashtags instead of hex colors

@@ -370,4 +386,12 @@ **Returns:**

// → { type: 'object', properties: {...}, required: [...] }
// Hashtag field example
infer({ tag: "#OpenSource" }, Formats.JSONSCHEMA);
// {
// tag: { type: 'string', pattern: '^#[A-Za-z0-9_]+$' }
// }
```
### Constants

@@ -379,3 +403,3 @@

DataTypes.PHONE, DataTypes.URL, DataTypes.UUID, DataTypes.DATE,
DataTypes.IP, DataTypes.COLOR, DataTypes.PERCENTAGE, DataTypes.CURRENCY,
DataTypes.IP, DataTypes.COLOR, DataTypes.PERCENTAGE, DataTypes.CURRENCY, DataTypes.HASHTAG,
DataTypes.ARRAY, DataTypes.OBJECT

@@ -382,0 +406,0 @@ ```