predict-data-types
Advanced tools
+11
-3
@@ -20,5 +20,9 @@ // Type definitions for predict-data-types | ||
| | 'ip' | ||
| | 'macaddress' | ||
| | 'color' | ||
| | 'percentage' | ||
| | 'currency'; | ||
| | 'currency' | ||
| | 'mention' | ||
| | 'cron' | ||
| | 'hashtag'; | ||
@@ -41,5 +45,9 @@ /** | ||
| readonly IP: 'ip'; | ||
| readonly MACADDRESS: 'macaddress'; | ||
| readonly COLOR: 'color'; | ||
| readonly PERCENTAGE: 'percentage'; | ||
| readonly CURRENCY: 'currency'; | ||
| readonly MENTION: 'mention'; | ||
| readonly CRON: 'cron'; | ||
| readonly HASHTAG: 'hashtag'; | ||
| }; | ||
@@ -179,3 +187,3 @@ | ||
| */ | ||
| const DataTypes: typeof import('.').DataTypes; | ||
| export { DataTypes }; | ||
@@ -185,5 +193,5 @@ /** | ||
| */ | ||
| const Formats: typeof import('.').Formats; | ||
| export { Formats }; | ||
| } | ||
| export = predictDataTypes; |
+199
-13
@@ -18,5 +18,9 @@ /** | ||
| IP: 'ip', | ||
| MACADDRESS: 'macaddress', | ||
| COLOR: 'color', | ||
| PERCENTAGE: 'percentage', | ||
| CURRENCY: 'currency' | ||
| CURRENCY: 'currency', | ||
| MENTION: 'mention', | ||
| CRON: 'cron', | ||
| HASHTAG: 'hashtag' | ||
| }; | ||
@@ -44,3 +48,6 @@ | ||
| PERCENTAGE: /^-?\d+(?:\.\d+)?%$/, | ||
| CURRENCY: /^[$€£¥₹][\d,]+(?:\.\d{1,2})?$|^[\d,]+(?:\.\d{1,2})?[$€£¥₹]$/ | ||
| CURRENCY: /^[$€£¥₹][\d,]+(?:\.\d{1,2})?$|^[\d,]+(?:\.\d{1,2})?[$€£¥₹]$/, | ||
| MENTION: /^@[A-Za-z0-9][A-Za-z0-9_-]*$/, | ||
| MAC_ADDRESS: /^(?:[0-9a-fA-F]{2}[:-]){5}[0-9a-fA-F]{2}$/, | ||
| HASHTAG: /^#[A-Za-z][A-Za-z0-9_]*$/ | ||
| }; | ||
@@ -328,2 +335,13 @@ | ||
| /** | ||
| * Checks if a given value is a social media mention (e.g., @username) | ||
| * Allows letters, numbers, underscores, and hyphens after the @, | ||
| * must start with a letter or number. | ||
| * @param {string} value - The value to check | ||
| * @returns {boolean} True if the value is a mention, false otherwise | ||
| */ | ||
| function isMention(value) { | ||
| return PATTERNS.MENTION.test(value); | ||
| } | ||
| /** | ||
| * Checks if a given value is a valid IP address (IPv4 or IPv6) | ||
@@ -338,2 +356,11 @@ * @param {string} value - The value to check | ||
| /** | ||
| * Checks if a given value is a valid MAC address | ||
| * @param {string} value - The value to check | ||
| * @returns {boolean} True if the value is a valid MAC address, false otherwise | ||
| */ | ||
| function isMACAddress(value) { | ||
| return PATTERNS.MAC_ADDRESS.test(value); | ||
| } | ||
| /** | ||
| * Checks if a given value is a valid hex color code | ||
@@ -364,4 +391,138 @@ * @param {string} value - The value to check | ||
| } | ||
| function isHashtag(value, options = {}) { | ||
| if (!value.startsWith('#')) return false; | ||
| // If preferring hashtags over 3-char hex, check hashtag pattern first | ||
| if (options.preferHashtagOver3CharHex && value.length === 4) { | ||
| // Check if it matches hashtag pattern before checking hex | ||
| if (PATTERNS.HASHTAG.test(value)) { | ||
| return true; | ||
| } | ||
| } | ||
| // Reject hex colors (valid ones) | ||
| if (isHexColor(value)) return false; | ||
| // Reject hex-like patterns (invalid hex but looks like hex format) | ||
| // E.g., #GGGGGG (6 chars, all letters) or #GGG (3 chars, all letters) | ||
| // These should be treated as invalid strings, not hashtags | ||
| if ((value.length === 4 || value.length === 7)) { | ||
| // Check if it's all hex-like characters (letters and numbers only) | ||
| const withoutHash = value.slice(1); | ||
| const isHexLike = /^[A-Fa-f0-9]+$/.test(withoutHash); | ||
| if (isHexLike) { | ||
| // It's already handled by isHexColor above, so this won't be reached | ||
| // But if somehow a malformed hex gets here, reject it | ||
| return false; | ||
| } | ||
| // If it has non-hex characters at hex length, could be malformed hex | ||
| // Check if it looks like someone tried to write hex (all caps letters) | ||
| if (/^[A-Z]+$/.test(withoutHash)) { | ||
| return false; // Looks like failed hex attempt | ||
| } | ||
| } | ||
| // Test against hashtag pattern | ||
| return PATTERNS.HASHTAG.test(value); | ||
| } | ||
| /** | ||
| * Checks if a given value is a valid cron expression | ||
| * @param {string} value - The value to check | ||
| * @returns {boolean} True if the value is a valid cron expression, false otherwise | ||
| */ | ||
| function isCron(value) { | ||
| const trimmedValue = value.trim(); | ||
| const fields = trimmedValue.split(/\s+/); | ||
| // Must have exactly 5 fields | ||
| if (fields.length !== 5) { | ||
| return false; | ||
| } | ||
| // Field ranges: minute(0-59), hour(0-23), day(1-31), month(1-12), weekday(0-7) | ||
| const ranges = [ | ||
| { min: 0, max: 59 }, // minute | ||
| { min: 0, max: 23 }, // hour | ||
| { min: 1, max: 31 }, // day | ||
| { min: 1, max: 12 }, // month | ||
| { min: 0, max: 7 } // weekday (0 and 7 are Sunday) | ||
| ]; | ||
| for (let i = 0; i < fields.length; i++) { | ||
| const field = fields[i]; | ||
| if (!isValidCronField(field, ranges[i])) { | ||
| return false; | ||
| } | ||
| } | ||
| return true; | ||
| } | ||
| /** | ||
| * Validates a single cron field | ||
| * @param {string} field - The field to validate | ||
| * @param {Object} range - The valid range {min, max} | ||
| * @returns {boolean} True if valid, false otherwise | ||
| */ | ||
| function isValidCronField(field, range) { | ||
| if (field === '*') { | ||
| return true; | ||
| } | ||
| // Handle step values like */5 or 1-5/2 | ||
| const stepParts = field.split('/'); | ||
| if (stepParts.length > 2) { | ||
| return false; | ||
| } | ||
| const baseField = stepParts[0]; | ||
| const step = stepParts[1]; | ||
| // Validate step if present | ||
| if (step !== undefined) { | ||
| const stepNum = parseInt(step, 10); | ||
| if (isNaN(stepNum) || stepNum < 1) { | ||
| return false; | ||
| } | ||
| } | ||
| // Handle ranges and lists | ||
| const parts = baseField.split(','); | ||
| for (const part of parts) { | ||
| if (!isValidCronPart(part, range)) { | ||
| return false; | ||
| } | ||
| } | ||
| return true; | ||
| } | ||
| /** | ||
| * Validates a single part of a cron field (before comma split) | ||
| * @param {string} part - The part to validate | ||
| * @param {Object} range - The valid range {min, max} | ||
| * @returns {boolean} True if valid, false otherwise | ||
| */ | ||
| function isValidCronPart(part, range) { | ||
| if (part === '*') { | ||
| return true; | ||
| } | ||
| // Handle ranges like 1-5 | ||
| const rangeParts = part.split('-'); | ||
| if (rangeParts.length === 1) { | ||
| // Single number | ||
| const num = parseInt(part, 10); | ||
| return !isNaN(num) && num >= range.min && num <= range.max; | ||
| } else if (rangeParts.length === 2) { | ||
| const start = parseInt(rangeParts[0], 10); | ||
| const end = parseInt(rangeParts[1], 10); | ||
| return !isNaN(start) && !isNaN(end) && start >= range.min && end <= range.max && start <= end; | ||
| } | ||
| return false; | ||
| } | ||
| /** | ||
| * Tokenizes a string by splitting on commas while respecting quoted strings and nested objects/arrays | ||
@@ -462,5 +623,7 @@ * Optimized version with improved performance for large inputs | ||
| * @param {string} value - The value to analyze | ||
| * @param {Object} [options={}] - Detection options | ||
| * @param {boolean} [options.preferHashtagOver3CharHex=false] - Prefer hashtags over 3-char hex colors for ambiguous values like #dev, #bad | ||
| * @returns {string} The detected data type | ||
| */ | ||
| function detectFieldType(value) { | ||
| function detectFieldType(value, options = {}) { | ||
| const trimmedValue = value.trim(); | ||
@@ -485,2 +648,4 @@ | ||
| return 'ip'; | ||
| } else if (isMACAddress(trimmedValue)) { | ||
| return 'macaddress'; | ||
| } else if (isPhoneNumber(trimmedValue)) { | ||
@@ -490,4 +655,13 @@ return 'phone'; | ||
| return 'email'; | ||
| } else if (isMention(trimmedValue)) { | ||
| return 'mention'; | ||
| } else if (options.preferHashtagOver3CharHex && trimmedValue.length === 4 && isHashtag(trimmedValue, options)) { | ||
| // When preferring hashtags, check 3-char values as hashtags first | ||
| return 'hashtag'; | ||
| } else if (isHexColor(trimmedValue)) { | ||
| return 'color'; | ||
| } else if (isHashtag(trimmedValue, options)) { | ||
| return 'hashtag'; | ||
| } else if (isCron(trimmedValue)) { | ||
| return 'cron'; | ||
| } else if (trimmedValue.startsWith('[') && trimmedValue.endsWith(']')) { | ||
@@ -599,2 +773,3 @@ return 'array'; | ||
| 'currency': 'string', | ||
| 'hashtag': 'string', | ||
| 'array': 'array', | ||
@@ -636,2 +811,6 @@ 'object': 'object' | ||
| properties[fieldName].pattern = '^[$€£¥₹][\\d,]+(?:\\.\\d{1,2})?$|^[\\d,]+(?:\\.\\d{1,2})?[$€£¥₹]$'; | ||
| } else if (dataType === 'mention') { | ||
| properties[fieldName].pattern = '^@[A-Za-z0-9][A-Za-z0-9_-]*$'; | ||
| } else if (dataType === 'hashtag') { | ||
| properties[fieldName].pattern = '^#[A-Za-z][A-Za-z0-9_]*$'; | ||
| } | ||
@@ -653,2 +832,4 @@ | ||
| * @param {string} [format=Formats.NONE] - Output format: Formats.NONE (default) or Formats.JSONSCHEMA | ||
| * @param {Object} [options={}] - Detection options | ||
| * @param {boolean} [options.preferHashtagOver3CharHex=false] - Prefer hashtags over 3-char hex colors for ambiguous values like #dev, #bad | ||
| * @returns {string|Object} DataType for primitives, or schema object for objects | ||
@@ -661,4 +842,6 @@ * @example | ||
| * infer([{ name: "Alice" }, { name: "Bob" }]) // → { name: 'string' } | ||
| * infer("#dev") // → 'color' (default: 3-char hex takes priority) | ||
| * infer("#dev", Formats.NONE, { preferHashtagOver3CharHex: true }) // → 'hashtag' | ||
| */ | ||
| function infer(input, format = Formats.NONE) { | ||
| function infer(input, format = Formats.NONE, options = {}) { | ||
| if (input === null || input === undefined) { | ||
@@ -670,3 +853,3 @@ throw new Error('Input cannot be null or undefined'); | ||
| if (typeof input === 'string') { | ||
| return detectFieldType(input); | ||
| return detectFieldType(input, options); | ||
| } | ||
@@ -684,3 +867,3 @@ | ||
| // Array of objects - infer schema | ||
| const schema = inferSchemaFromObjects(input); | ||
| const schema = inferSchemaFromObjects(input, options); | ||
@@ -696,3 +879,3 @@ // Convert to JSON Schema if requested | ||
| // Array of primitive values - find common type | ||
| const types = input.map(val => detectFieldType(String(val))); | ||
| const types = input.map(val => detectFieldType(String(val), options)); | ||
| const typeCounts = {}; | ||
@@ -704,4 +887,4 @@ types.forEach(type => { | ||
| const typePriority = [ | ||
| 'uuid', 'email', 'phone', 'url', 'ip', 'color', | ||
| 'currency', 'percentage', 'date', 'boolean', | ||
| 'uuid', 'email', 'phone', 'url', 'ip', 'macaddress', 'mention', 'color', 'hashtag', | ||
| 'currency', 'percentage', 'date', 'cron', 'boolean', | ||
| 'number', 'array', 'object', 'string' | ||
@@ -737,5 +920,6 @@ ]; | ||
| * @param {Array<Object>} rows - Array of objects to analyze | ||
| * @param {Object} [options={}] - Detection options | ||
| * @returns {Object} Schema with field names as keys and types as values | ||
| */ | ||
| function inferSchemaFromObjects(rows) { | ||
| function inferSchemaFromObjects(rows, options = {}) { | ||
| if (!rows.every(row => row !== null && typeof row === 'object' && !Array.isArray(row))) { | ||
@@ -768,3 +952,3 @@ throw new Error('All items must be objects'); | ||
| const stringValues = values.map(val => String(val)); | ||
| const types = stringValues.map(val => detectFieldType(val)); | ||
| const types = stringValues.map(val => detectFieldType(val, options)); | ||
| const typeCounts = {}; | ||
@@ -776,4 +960,4 @@ types.forEach(type => { | ||
| const typePriority = [ | ||
| 'uuid', 'email', 'phone', 'url', 'ip', 'color', | ||
| 'currency', 'percentage', 'date', 'boolean', | ||
| 'uuid', 'email', 'phone', 'url', 'ip', 'macaddress', 'mention', 'color', 'hashtag', | ||
| 'currency', 'percentage', 'date', 'cron', 'boolean', | ||
| 'number', 'array', 'object', 'string' | ||
@@ -783,2 +967,3 @@ ]; | ||
| let finalType = 'string'; | ||
| for (const priorityType of typePriority) { | ||
@@ -791,2 +976,3 @@ if (typeCounts[priorityType] === types.length) { | ||
| schema[fieldName] = finalType; | ||
@@ -793,0 +979,0 @@ }); |
+60
-53
| { | ||
| "name": "predict-data-types", | ||
| "version": "1.7.0", | ||
| "description": "A lightweight, zero-dependency npm package that predicts data types for comma-separated values, including JSON objects, and validates URLs, phone numbers, email addresses, IP addresses, colors, percentages, and currency within string values.", | ||
| "main": "index.js", | ||
| "author": "Melih Birim", | ||
| "types": "index.d.ts", | ||
| "repository": { | ||
| "type": "git", | ||
| "url": "git+https://github.com/melihbirim/predict-data-types.git" | ||
| }, | ||
| "bugs": { | ||
| "url": "https://github.com/melihbirim/predict-data-types/issues" | ||
| }, | ||
| "homepage": "https://github.com/melihbirim/predict-data-types#readme", | ||
| "engines": { | ||
| "node": ">=16.0.0" | ||
| }, | ||
| "files": [ | ||
| "index.js", | ||
| "index.d.ts", | ||
| "README.md", | ||
| "LICENSE" | ||
| ], | ||
| "scripts": { | ||
| "test": "mocha test", | ||
| "test:coverage": "npx c8 mocha test", | ||
| "lint": "eslint .", | ||
| "lint:fix": "eslint . --fix" | ||
| }, | ||
| "keywords": [ | ||
| "predict", | ||
| "data", | ||
| "types", | ||
| "json", | ||
| "url", | ||
| "phone", | ||
| "email", | ||
| "geolocation", | ||
| "ip", | ||
| "color", | ||
| "percentage", | ||
| "currency", | ||
| "validation" | ||
| ], | ||
| "license": "MIT", | ||
| "dependencies": {}, | ||
| "devDependencies": { | ||
| "@types/node": "^24.10.4", | ||
| "chai": "^4.5.0", | ||
| "eslint": "^9.39.2", | ||
| "mocha": "^10.8.2", | ||
| "typescript": "^5.9.3" | ||
| } | ||
| "name": "predict-data-types", | ||
| "version": "1.7.1", | ||
| "description": "A lightweight, zero-dependency npm package that predicts data types for comma-separated values, including JSON objects, and validates URLs, phone numbers, email addresses, IP addresses, colors, percentages, and currency within string values.", | ||
| "main": "index.js", | ||
| "author": "Melih Birim", | ||
| "types": "index.d.ts", | ||
| "repository": { | ||
| "type": "git", | ||
| "url": "git+https://github.com/melihbirim/predict-data-types.git" | ||
| }, | ||
| "bugs": { | ||
| "url": "https://github.com/melihbirim/predict-data-types/issues" | ||
| }, | ||
| "homepage": "https://github.com/melihbirim/predict-data-types#readme", | ||
| "engines": { | ||
| "node": ">=16.0.0" | ||
| }, | ||
| "files": [ | ||
| "index.js", | ||
| "index.d.ts", | ||
| "README.md", | ||
| "LICENSE" | ||
| ], | ||
| "scripts": { | ||
| "test": "mocha test", | ||
| "test:coverage": "npx c8 mocha test", | ||
| "lint": "eslint .", | ||
| "lint:fix": "eslint . --fix", | ||
| "prepare": "husky" | ||
| }, | ||
| "lint-staged": { | ||
| "*.js": [ | ||
| "eslint --fix" | ||
| ] | ||
| }, | ||
| "keywords": [ | ||
| "predict", | ||
| "data", | ||
| "types", | ||
| "json", | ||
| "url", | ||
| "phone", | ||
| "email", | ||
| "geolocation", | ||
| "ip", | ||
| "color", | ||
| "percentage", | ||
| "currency", | ||
| "validation" | ||
| ], | ||
| "license": "MIT", | ||
| "devDependencies": { | ||
| "@types/node": "^24.10.4", | ||
| "chai": "^4.5.0", | ||
| "eslint": "^9.39.2", | ||
| "husky": "^9.1.7", | ||
| "lint-staged": "^16.2.7", | ||
| "mocha": "^10.8.2", | ||
| "typescript": "^5.9.3" | ||
| } | ||
| } |
+31
-7
@@ -47,3 +47,3 @@ # Predict Data Types | ||
| Zero-dependency package for automatic data type detection from strings, arrays, and JSON objects. Detects 14+ data types including primitives, emails, URLs, UUIDs, dates, IPs, colors, percentages, and currency. | ||
| Zero-dependency package for automatic data type detection from strings, arrays, and JSON objects. Detects 18+ data types including primitives, emails, URLs, UUIDs, dates, IPs, colors, percentages, hashtags, mentions, and currency. | ||
@@ -55,3 +55,3 @@ > **💡 Important:** This library performs **runtime type detection** on string values, not static type checking. TypeScript is a compile-time type system for your code structure - this library analyzes actual data content at runtime. They solve completely different problems! | ||
| - **Smart Type Inference**: One `infer()` function handles strings, arrays, objects, and arrays of objects | ||
| - **14 Data Types**: Primitives plus emails, URLs, UUIDs, dates, IPs, colors, percentages, currency | ||
| - **18 Data Types**: Primitives plus emails, URLs, UUIDs, dates, IPs, colors, percentages, currency, hashtags, MAC addresses, mentions, CRON, and hashes | ||
| - **JSON Schema Generation**: Automatically generate JSON Schema from objects (compatible with Ajv, etc.) | ||
@@ -63,3 +63,3 @@ - **Type Constants**: Use `DataTypes` for type-safe comparisons instead of string literals | ||
| - **45+ Date Formats**: Comprehensive date parsing including month names and timezones | ||
| - **Battle-Tested**: 61 comprehensive test cases | ||
| - **Battle-Tested**: 75+ comprehensive test cases | ||
@@ -127,2 +127,4 @@ ## Installation | ||
| | `currency` | `$100`, `€50.99` | | ||
| | `hashtag` | `#hello`, `#OpenSource`, `#dev_community` | | ||
| | `mention` | `@username`, `@user_name123`, `@john-doe` | | ||
| | `array` | `[1, 2, 3]` | | ||
@@ -162,5 +164,8 @@ | `object` | `{"name": "John"}` | | ||
| DataTypes.IP // 'ip' | ||
| DataTypes.MACADDRESS // 'macaddress' | ||
| DataTypes.COLOR // 'color' | ||
| DataTypes.PERCENTAGE // 'percentage' | ||
| DataTypes.CURRENCY // 'currency' | ||
| DataTypes.MENTION // 'mention' | ||
| DataTypes.HASHTAG // 'hashtag' | ||
| ``` | ||
@@ -196,4 +201,11 @@ | ||
| infer("test@example.com"); // → 'email' | ||
| infer("@username"); // → 'mention' | ||
| infer("42"); // → 'number' | ||
| infer("#OpenSource"); // → 'hashtag' | ||
| infer(["#dev", "#opensource", "#community"]); // → 'hashtag' | ||
| // Ambiguous 3-char values (can be hex color or hashtag) | ||
| infer("#bad"); // → 'color' (default: hex takes priority) | ||
| infer("#bad", "none", { preferHashtagOver3CharHex: true }); // → 'hashtag' | ||
| // Array of values → Common DataType | ||
@@ -216,5 +228,7 @@ infer(["1", "2", "3"]); // → 'number' | ||
| ]); | ||
| // → { name: 'string', age: 'number', email: 'email' } | ||
| ``` | ||
| ### JSON Schema Format | ||
@@ -263,2 +277,4 @@ | ||
| }); | ||
| ``` | ||
@@ -302,4 +318,2 @@ | ||
| ### Complex Data | ||
| - ✅ Sample data files | ||
@@ -330,3 +344,3 @@ | ||
| ### `infer(input, format?)` | ||
| ### `infer(input, format?, options?)` | ||
@@ -339,2 +353,4 @@ **The main function - handles any input type:** | ||
| - `format` (optional): Output format - `Formats.NONE` (default) or `Formats.JSONSCHEMA` | ||
| - `options` (optional): Configuration options | ||
| - `preferHashtagOver3CharHex` (boolean, default: false): When true, treats ambiguous 3-character values like `#bad`, `#ace` as hashtags instead of hex colors | ||
@@ -370,4 +386,12 @@ **Returns:** | ||
| // → { type: 'object', properties: {...}, required: [...] } | ||
| // Hashtag field example | ||
| infer({ tag: "#OpenSource" }, Formats.JSONSCHEMA); | ||
| // { | ||
| // tag: { type: 'string', pattern: '^#[A-Za-z0-9_]+$' } | ||
| // } | ||
| ``` | ||
| ### Constants | ||
@@ -379,3 +403,3 @@ | ||
| DataTypes.PHONE, DataTypes.URL, DataTypes.UUID, DataTypes.DATE, | ||
| DataTypes.IP, DataTypes.COLOR, DataTypes.PERCENTAGE, DataTypes.CURRENCY, | ||
| DataTypes.IP, DataTypes.COLOR, DataTypes.PERCENTAGE, DataTypes.CURRENCY, DataTypes.HASHTAG, | ||
| DataTypes.ARRAY, DataTypes.OBJECT | ||
@@ -382,0 +406,0 @@ ``` |
53945
16.5%1036
19.91%481
5.25%7
40%