predict-data-types
Advanced tools
| name: CI/CD Pipeline | ||
| on: | ||
| push: | ||
| branches: [ main, develop, upgrades ] | ||
| pull_request: | ||
| branches: [ main, develop ] | ||
| jobs: | ||
| lint: | ||
| runs-on: ubuntu-latest | ||
| timeout-minutes: 5 | ||
| steps: | ||
| - name: Checkout code | ||
| uses: actions/checkout@v4 | ||
| - name: Setup Node.js 18.x | ||
| uses: actions/setup-node@v4 | ||
| with: | ||
| node-version: '18.x' | ||
| cache: 'npm' | ||
| - name: Install dependencies | ||
| run: npm ci --prefer-offline --no-audit | ||
| - name: Run linting | ||
| run: npm run lint | ||
| - name: Check TypeScript definitions | ||
| run: npx tsc --noEmit index.d.ts | ||
| test: | ||
| runs-on: ubuntu-latest | ||
| timeout-minutes: 10 | ||
| strategy: | ||
| matrix: | ||
| node-version: [16.x, 18.x, 20.x] | ||
| fail-fast: false | ||
| steps: | ||
| - name: Checkout code | ||
| uses: actions/checkout@v4 | ||
| - name: Setup Node.js ${{ matrix.node-version }} | ||
| uses: actions/setup-node@v4 | ||
| with: | ||
| node-version: ${{ matrix.node-version }} | ||
| cache: 'npm' | ||
| - name: Install dependencies | ||
| run: npm ci --prefer-offline --no-audit | ||
| - name: Run tests | ||
| run: npm test | ||
| test-cross-platform: | ||
| runs-on: ${{ matrix.os }} | ||
| timeout-minutes: 8 | ||
| strategy: | ||
| matrix: | ||
| os: [windows-latest, macos-latest] | ||
| fail-fast: false | ||
| steps: | ||
| - name: Checkout code | ||
| uses: actions/checkout@v4 | ||
| - name: Setup Node.js | ||
| uses: actions/setup-node@v4 | ||
| with: | ||
| node-version: '18.x' | ||
| cache: 'npm' | ||
| - name: Install dependencies | ||
| run: npm ci --prefer-offline --no-audit | ||
| - name: Run tests | ||
| run: npm test | ||
| security-audit: | ||
| runs-on: ubuntu-latest | ||
| timeout-minutes: 5 | ||
| if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/develop') | ||
| steps: | ||
| - name: Checkout code | ||
| uses: actions/checkout@v4 | ||
| - name: Setup Node.js | ||
| uses: actions/setup-node@v4 | ||
| with: | ||
| node-version: '18.x' | ||
| cache: 'npm' | ||
| - name: Install dependencies | ||
| run: npm ci --prefer-offline --no-audit | ||
| - name: Security audit | ||
| run: npm audit --audit-level moderate | ||
| continue-on-error: true | ||
| - name: Check for high/critical vulnerabilities | ||
| run: npm audit --audit-level high --production | ||
| publish: | ||
| needs: [lint, test, test-cross-platform] | ||
| runs-on: ubuntu-latest | ||
| timeout-minutes: 5 | ||
| if: github.event_name == 'push' && github.ref == 'refs/heads/main' | ||
| steps: | ||
| - name: Checkout code | ||
| uses: actions/checkout@v4 | ||
| - name: Setup Node.js | ||
| uses: actions/setup-node@v4 | ||
| with: | ||
| node-version: '18.x' | ||
| registry-url: 'https://registry.npmjs.org' | ||
| cache: 'npm' | ||
| - name: Install dependencies | ||
| run: npm ci --prefer-offline --no-audit | ||
| - name: Check if version changed | ||
| id: version-check | ||
| run: | | ||
| CURRENT_VERSION=$(node -p "require('./package.json').version") | ||
| PUBLISHED_VERSION=$(npm show predict-data-types version 2>/dev/null || echo "0.0.0") | ||
| echo "current=$CURRENT_VERSION" >> $GITHUB_OUTPUT | ||
| echo "published=$PUBLISHED_VERSION" >> $GITHUB_OUTPUT | ||
| if [ "$CURRENT_VERSION" != "$PUBLISHED_VERSION" ]; then | ||
| echo "should_publish=true" >> $GITHUB_OUTPUT | ||
| else | ||
| echo "should_publish=false" >> $GITHUB_OUTPUT | ||
| fi | ||
| - name: Publish to npm | ||
| if: steps.version-check.outputs.should_publish == 'true' | ||
| run: npm publish | ||
| env: | ||
| NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} |
+278
| # Contributing to Predict Data Types | ||
| Thank you for your interest in contributing to predict-data-types! This document provides guidelines and information for contributors. | ||
| ## ๐ Getting Started | ||
| ### Prerequisites | ||
| - Node.js (v14 or higher) | ||
| - npm (v6 or higher) | ||
| - Git | ||
| ### Setting up the Development Environment | ||
| 1. **Fork and clone the repository:** | ||
| ```bash | ||
| git clone https://github.com/YOUR_USERNAME/predict-data-types.git | ||
| cd predict-data-types | ||
| ``` | ||
| 2. **Install dependencies:** | ||
| ```bash | ||
| npm install | ||
| ``` | ||
| 3. **Run tests to ensure everything works:** | ||
| ```bash | ||
| npm test | ||
| npm run lint | ||
| ``` | ||
| ## ๐งช Development Workflow | ||
| ### Test-Driven Development (TDD) | ||
| We follow **Test-Driven Development** practices. Before implementing any changes: | ||
| 1. **Write tests first** for the new functionality or bug fix | ||
| 2. **Run tests** to ensure they fail initially | ||
| 3. **Implement** the minimal code to make tests pass | ||
| 4. **Refactor** while keeping tests green | ||
| 5. **Run full test suite** to ensure no regressions | ||
| ### Code Quality Standards | ||
| - **ESLint**: All code must pass ESLint checks (`npm run lint`) | ||
| - **Testing**: All new code must have corresponding tests | ||
| - **JSDoc**: Public functions must have JSDoc documentation | ||
| - **YAGNI Principle**: Don't add features that aren't needed | ||
| - **DRY Principle**: Don't repeat yourself | ||
| ### Making Changes | ||
| 1. **Create a feature branch:** | ||
| ```bash | ||
| git checkout -b feature/your-feature-name | ||
| ``` | ||
| 2. **Write tests first:** | ||
| ```bash | ||
| # Add tests to test/index.spec.js | ||
| npm test # Should fail initially | ||
| ``` | ||
| 3. **Implement your changes:** | ||
| ```bash | ||
| # Make changes to index.js or other files | ||
| npm test # Should pass now | ||
| npm run lint # Should pass | ||
| ``` | ||
| 4. **Commit your changes:** | ||
| ```bash | ||
| git add . | ||
| git commit -m "feat: add awesome new feature" | ||
| ``` | ||
| ## ๐ Commit Message Guidelines | ||
| We follow conventional commit messages: | ||
| - `feat:` new features | ||
| - `fix:` bug fixes | ||
| - `docs:` documentation changes | ||
| - `style:` formatting, missing semi colons, etc | ||
| - `refactor:` code refactoring | ||
| - `test:` adding missing tests | ||
| - `chore:` maintenance tasks | ||
| Examples: | ||
| ``` | ||
| feat: add support for IPv4 address detection | ||
| fix: resolve UUID pattern variable name bug | ||
| docs: update README with better examples | ||
| test: add edge cases for date validation | ||
| ``` | ||
| ## ๐ง Code Style | ||
| ### ESLint Configuration | ||
| We use ESLint with the following key rules: | ||
| - 4-space indentation | ||
| - Single quotes for strings | ||
| - Semicolons required | ||
| - No console.log in production code | ||
| - Prefer const over let/var | ||
| ### JSDoc Documentation | ||
| All public functions must have JSDoc comments: | ||
| ```javascript | ||
| /** | ||
| * Brief description of what the function does | ||
| * @param {string} value - Description of the parameter | ||
| * @returns {boolean} Description of what is returned | ||
| * @throws {Error} Description of when errors are thrown | ||
| * | ||
| * @example | ||
| * // Usage example | ||
| * functionName('example input'); | ||
| * // Returns: expected output | ||
| */ | ||
| function functionName(value) { | ||
| // Implementation | ||
| } | ||
| ``` | ||
| ## ๐งช Testing Guidelines | ||
| ### Test Structure | ||
| Tests are organized by functionality: | ||
| ```javascript | ||
| describe('Feature Name', () => { | ||
| it('should handle specific case', () => { | ||
| // Arrange | ||
| const input = 'test data'; | ||
| // Act | ||
| const result = predictDataTypes(input); | ||
| // Assert | ||
| expect(result).to.deep.equal(expectedOutput); | ||
| }); | ||
| }); | ||
| ``` | ||
| ### Test Categories | ||
| 1. **Happy Path**: Normal use cases | ||
| 2. **Edge Cases**: Boundary conditions, empty inputs, etc. | ||
| 3. **Error Cases**: Invalid inputs, error conditions | ||
| 4. **Integration**: Multiple features working together | ||
| ### Running Tests | ||
| ```bash | ||
| npm test # Run all tests | ||
| npm test -- --grep "UUID" # Run specific tests | ||
| npm run lint # Check code style | ||
| npm run lint:fix # Auto-fix style issues | ||
| ``` | ||
| ## ๐ Bug Reports | ||
| When reporting bugs, please include: | ||
| 1. **Description**: Clear description of the issue | ||
| 2. **Steps to Reproduce**: Minimal code example | ||
| 3. **Expected Behavior**: What should happen | ||
| 4. **Actual Behavior**: What actually happens | ||
| 5. **Environment**: Node.js version, OS, etc. | ||
| **Bug Report Template:** | ||
| ```markdown | ||
| ## Bug Description | ||
| Brief description of the bug | ||
| ## Steps to Reproduce | ||
| ```js | ||
| const predictDataTypes = require('predict-data-types'); | ||
| const result = predictDataTypes('your input here'); | ||
| console.log(result); // Shows unexpected output | ||
| ``` | ||
| ## Expected Behavior | ||
| Should return: `{ 'expected': 'result' }` | ||
| ## Actual Behavior | ||
| Actually returns: `{ 'actual': 'result' }` | ||
| ## Environment | ||
| - Node.js version: 18.x | ||
| - Package version: 1.1.0 | ||
| - OS: macOS/Linux/Windows | ||
| ``` | ||
| ## ๐ก Feature Requests | ||
| Before submitting feature requests: | ||
| 1. **Check existing issues** to avoid duplicates | ||
| 2. **Consider YAGNI**: Is this feature truly needed? | ||
| 3. **Provide use cases**: Why is this feature valuable? | ||
| 4. **Consider implementation**: How might this work? | ||
| **Feature Request Template:** | ||
| ```markdown | ||
| ## Feature Description | ||
| Clear description of the proposed feature | ||
| ## Use Case | ||
| Why is this feature needed? What problem does it solve? | ||
| ## Proposed Solution | ||
| How should this feature work? | ||
| ## Additional Context | ||
| Any other relevant information | ||
| ``` | ||
| ## ๐ Code Review Process | ||
| 1. **Automated Checks**: All PRs must pass tests and linting | ||
| 2. **Manual Review**: Code will be reviewed for: | ||
| - Correctness and functionality | ||
| - Test coverage and quality | ||
| - Documentation completeness | ||
| - Code style and maintainability | ||
| - Performance implications | ||
| 3. **Feedback**: Address any review comments | ||
| 4. **Approval**: At least one maintainer approval required | ||
| ## ๐ Pull Request Checklist | ||
| Before submitting a PR, ensure: | ||
| - [ ] Tests are written and passing (`npm test`) | ||
| - [ ] Code passes linting (`npm run lint`) | ||
| - [ ] JSDoc documentation is added for new public functions | ||
| - [ ] README is updated if needed | ||
| - [ ] Commit messages follow conventional format | ||
| - [ ] No console.log statements in production code | ||
| - [ ] Changes follow YAGNI and DRY principles | ||
| ## ๐ Performance Considerations | ||
| - **Regex Efficiency**: Avoid ReDoS-vulnerable patterns | ||
| - **Memory Usage**: Be mindful of large string processing | ||
| - **Time Complexity**: Consider algorithmic efficiency | ||
| - **Dependencies**: Minimize new dependencies | ||
| ## ๐ท๏ธ Release Process | ||
| Maintainers handle releases following semantic versioning: | ||
| - **PATCH**: Bug fixes | ||
| - **MINOR**: New features (backward compatible) | ||
| - **MAJOR**: Breaking changes | ||
| ## โ Questions? | ||
| - **GitHub Issues**: For bugs and feature requests | ||
| - **GitHub Discussions**: For questions and general discussion | ||
| - **Documentation**: Check README.md first | ||
| ## ๐ Recognition | ||
| Contributors will be recognized in: | ||
| - GitHub contributors list | ||
| - Release notes for significant contributions | ||
| - README acknowledgments | ||
| --- | ||
| Thank you for contributing to predict-data-types! Your help makes this project better for everyone. ๐ |
| // eslint.config.js - ESLint v9 flat configuration compatible with Node.js 16+ | ||
| module.exports = [ | ||
| { | ||
| ignores: ['node_modules/**', 'coverage/**', '*.config.js'] | ||
| }, | ||
| { | ||
| files: ['*.js', 'test/**/*.js'], | ||
| languageOptions: { | ||
| ecmaVersion: 2022, | ||
| sourceType: 'commonjs', | ||
| globals: { | ||
| // Node.js globals | ||
| console: 'readonly', | ||
| process: 'readonly', | ||
| require: 'readonly', | ||
| module: 'readonly', | ||
| exports: 'readonly', | ||
| __dirname: 'readonly', | ||
| __filename: 'readonly', | ||
| Buffer: 'readonly', | ||
| global: 'readonly', | ||
| // Test globals | ||
| describe: 'readonly', | ||
| it: 'readonly', | ||
| expect: 'readonly', | ||
| beforeEach: 'readonly', | ||
| afterEach: 'readonly', | ||
| before: 'readonly', | ||
| after: 'readonly' | ||
| } | ||
| }, | ||
| rules: { | ||
| // Core ESLint recommended rules (manual selection for Node 16 compatibility) | ||
| 'no-unused-vars': ['error', { 'argsIgnorePattern': '^_' }], | ||
| 'no-undef': 'error', | ||
| 'no-unreachable': 'error', | ||
| 'no-constant-condition': 'error', | ||
| 'no-dupe-args': 'error', | ||
| 'no-dupe-keys': 'error', | ||
| 'no-duplicate-case': 'error', | ||
| 'no-empty': 'error', | ||
| 'no-ex-assign': 'error', | ||
| 'no-extra-boolean-cast': 'error', | ||
| 'no-extra-semi': 'error', | ||
| 'no-func-assign': 'error', | ||
| 'no-invalid-regexp': 'error', | ||
| 'no-irregular-whitespace': 'error', | ||
| 'no-obj-calls': 'error', | ||
| 'no-regex-spaces': 'error', | ||
| 'no-sparse-arrays': 'error', | ||
| 'use-isnan': 'error', | ||
| 'valid-typeof': 'error', | ||
| // Code style rules | ||
| 'indent': ['error', 4], | ||
| 'quotes': ['error', 'single'], | ||
| 'semi': ['error', 'always'], | ||
| 'no-console': 'off', // Allow console in this project | ||
| 'no-trailing-spaces': 'error', | ||
| 'eol-last': ['error', 'always'], | ||
| 'prefer-const': 'error', | ||
| 'no-var': 'error', | ||
| 'comma-dangle': ['error', 'never'], | ||
| 'object-curly-spacing': ['error', 'always'], | ||
| 'array-bracket-spacing': ['error', 'never'], | ||
| 'space-before-function-paren': ['error', 'never'] | ||
| } | ||
| } | ||
| ]; |
+48
| // Type definitions for predict-data-types | ||
| // Project: predict-data-types | ||
| // Definitions by: Melih Birim | ||
| /** | ||
| * Supported data types that can be predicted | ||
| */ | ||
| declare type DataType = | ||
| | 'string' | ||
| | 'number' | ||
| | 'boolean' | ||
| | 'email' | ||
| | 'phone' | ||
| | 'url' | ||
| | 'uuid' | ||
| | 'date' | ||
| | 'array' | ||
| | 'object'; | ||
| /** | ||
| * Result object mapping field names/values to their predicted data types | ||
| */ | ||
| declare type PredictionResult = Record<string, DataType>; | ||
| /** | ||
| * Predicts data types for comma-separated values or structured data | ||
| * | ||
| * @param str - The input string to analyze | ||
| * @param firstRowIsHeader - Whether to treat the first row as column headers (default: false) | ||
| * @returns Object mapping field names/values to their predicted data types | ||
| * @throws Error when input is null, undefined, or not a string | ||
| * | ||
| * @example | ||
| * ```typescript | ||
| * const predictDataTypes = require('predict-data-types'); | ||
| * | ||
| * // Basic usage | ||
| * const result = predictDataTypes('John, 30, true, 2023-01-01'); | ||
| * // Result: { 'John': 'string', '30': 'number', 'true': 'boolean', '2023-01-01': 'date' } | ||
| * | ||
| * // With headers | ||
| * const csvResult = predictDataTypes('name,age,active\nJohn,30,true', true); | ||
| * // Result: { 'name': 'string', 'age': 'number', 'active': 'boolean' } | ||
| * ``` | ||
| */ | ||
| declare function predictDataTypes(str: string, firstRowIsHeader?: boolean): PredictionResult; | ||
| export = predictDataTypes; |
+220
| # ๐บ๏ธ Predict Data Types - Roadmap | ||
| This roadmap outlines the planned features and improvements for the predict-data-types package. Items are organized by priority and implementation complexity. | ||
| ## ๐ฏ **Current Status (v1.1.0)** | ||
| โ **Completed Features:** | ||
| - Automatic type detection for 10+ data types | ||
| - CSV support with optional headers | ||
| - Robust input validation and error handling | ||
| - TypeScript definitions | ||
| - Comprehensive test coverage (31+ tests) | ||
| - Performance optimizations (cached regex patterns) | ||
| - ESLint configuration and code quality | ||
| - JSDoc documentation | ||
| ## ๐ **Roadmap** | ||
| ### **Phase 1: Enhanced Type Detection** (v1.2.0) | ||
| **High Impact, Easy Implementation** | ||
| - [ ] **Geographic Coordinates** | ||
| ```javascript | ||
| '40.7128,-74.0060' -> 'coordinates' | ||
| 'lat:40.7128,lng:-74.0060' -> 'coordinates' | ||
| ``` | ||
| - [ ] **IP Address Detection** | ||
| ```javascript | ||
| '192.168.1.1' -> 'ipv4' | ||
| '2001:db8::1' -> 'ipv6' | ||
| ``` | ||
| - [ ] **Color Code Detection** | ||
| ```javascript | ||
| '#FF5733' -> 'color_hex' | ||
| 'rgb(255, 87, 51)' -> 'color_rgb' | ||
| 'rgba(255, 87, 51, 0.8)' -> 'color_rgba' | ||
| 'hsl(120, 100%, 50%)' -> 'color_hsl' | ||
| ``` | ||
| - [ ] **File & MIME Type Detection** | ||
| ```javascript | ||
| 'document.pdf' -> 'file' | ||
| 'image/jpeg' -> 'mime_type' | ||
| 'application/json' -> 'mime_type' | ||
| ``` | ||
| ### **Phase 2: Configuration & Customization** (v1.3.0) | ||
| **Medium Impact, Medium Implementation** | ||
| - [ ] **Configuration Options** | ||
| ```javascript | ||
| const options = { | ||
| strictMode: true, // More conservative type detection | ||
| customPatterns: { // User-defined regex patterns | ||
| 'product_id': /^PRD-\d{6}$/, | ||
| 'order_code': /^ORD-[A-Z]{3}-\d{4}$/ | ||
| }, | ||
| dateFormats: ['DD/MM/YYYY'], // Limit date formats | ||
| confidence: true, // Return confidence scores | ||
| locale: 'en-US' // Region-specific detection | ||
| } | ||
| ``` | ||
| - [ ] **Confidence Scores** | ||
| ```javascript | ||
| // Returns: { value: 'email', confidence: 0.95, alternates: ['string'] } | ||
| ``` | ||
| - [ ] **Custom Pattern Support** | ||
| - Business-specific type definitions | ||
| - Regex pattern validation | ||
| - Pattern priority management | ||
| ### **Phase 3: Output Format Extensions** (v1.4.0) | ||
| **High Impact, Medium Implementation** | ||
| - [ ] **JSON Schema Generation** | ||
| ```javascript | ||
| predictDataTypes('John,25', { outputFormat: 'jsonSchema' }); | ||
| // Returns: { type: 'object', properties: { John: { type: 'string' } } } | ||
| ``` | ||
| - [ ] **SQL DDL Generation** | ||
| ```javascript | ||
| predictDataTypes('name,age\nJohn,25', { | ||
| header: true, | ||
| outputFormat: 'sql', | ||
| tableName: 'users' | ||
| }); | ||
| // Returns: "CREATE TABLE users (name VARCHAR(255), age INTEGER);" | ||
| ``` | ||
| - [ ] **TypeScript Interface Generation** | ||
| ```javascript | ||
| predictDataTypes('name,age\nJohn,25', { | ||
| header: true, | ||
| outputFormat: 'typescript', | ||
| interfaceName: 'User' | ||
| }); | ||
| // Returns: "interface User { name: string; age: number; }" | ||
| ``` | ||
| - [ ] **Other Format Support** | ||
| - MongoDB schema | ||
| - Prisma schema | ||
| - GraphQL schema | ||
| - Zod validation schema | ||
| ### **Phase 4: Performance & Scalability** (v1.5.0) | ||
| **Medium Impact, High Implementation** | ||
| - [ ] **Batch Processing API** | ||
| ```javascript | ||
| predictDataTypes.batch([ | ||
| 'user1,25,email1@test.com', | ||
| 'user2,30,email2@test.com' | ||
| ], { header: true, parallel: true }); | ||
| ``` | ||
| - [ ] **Streaming API for Large Files** | ||
| ```javascript | ||
| predictDataTypes.stream(fs.createReadStream('large.csv')) | ||
| .on('schema', (schema) => console.log(schema)) | ||
| .on('progress', (percent) => console.log(`${percent}% complete`)); | ||
| ``` | ||
| - [ ] **Performance Optimizations** | ||
| - Web Workers support for browser usage | ||
| - Memory-efficient processing for large datasets | ||
| - Incremental schema building | ||
| - Smart sampling for large files | ||
| ### **Phase 5: Advanced Features** (v2.0.0) | ||
| **High Impact, High Implementation** | ||
| - [ ] **Locale-Specific Detection** | ||
| - Region-specific date formats (EU vs US) | ||
| - Localized phone number validation | ||
| - Currency format detection | ||
| - Address format recognition | ||
| - [ ] **Security-Aware Detection** | ||
| ```javascript | ||
| '****-****-****-1234' -> 'credit_card_masked' | ||
| '***-**-1234' -> 'ssn_masked' | ||
| ``` | ||
| - [ ] **Statistical Analysis** | ||
| - Data quality metrics | ||
| - Null/empty value percentages | ||
| - Value distribution analysis | ||
| - Outlier detection | ||
| - [ ] **Machine Learning Integration** | ||
| - Pattern learning from user feedback | ||
| - Context-aware type detection | ||
| - Adaptive confidence scoring | ||
| ## ๐ฏ **Target Use Cases** | ||
| ### **Primary Users:** | ||
| 1. **Data Scientists & Analysts** - CSV exploration, pipeline automation | ||
| 2. **Backend Developers** - API validation, form processing | ||
| 3. **ETL Pipeline Developers** - Data transformation and validation | ||
| 4. **Frontend Developers** - File upload processing, data tables | ||
| 5. **No-Code Tool Builders** - Automatic schema inference | ||
| ### **Real-World Applications:** | ||
| - **Database Schema Generation** - Auto-create tables from CSV uploads | ||
| - **API Documentation** - Generate OpenAPI specs from sample data | ||
| - **Form Builder Tools** - Infer form field types from examples | ||
| - **Data Migration** - Type mapping between different systems | ||
| - **Analytics Dashboards** - Smart column type detection | ||
| ## ๐ค **Considerations & Trade-offs** | ||
| ### **What Might Be Removed:** | ||
| - [ ] **Review complex nested object/array parsing** - might be overkill for CSV use cases | ||
| - [ ] **Evaluate date format complexity** - too many formats can cause false positives | ||
| - [ ] **Assess phone validation scope** - very region-specific, consider simplification | ||
| ### **Architecture Decisions:** | ||
| - **Performance vs Features** - Balance detection accuracy with speed | ||
| - **Bundle Size vs Functionality** - Consider tree-shaking and modular approach | ||
| - **Backward Compatibility** - Maintain API stability across versions | ||
| ## ๐ **Timeline Estimates** | ||
| - **Phase 1 (v1.2.0)**: ~2-3 weeks | ||
| - **Phase 2 (v1.3.0)**: ~3-4 weeks | ||
| - **Phase 3 (v1.4.0)**: ~4-5 weeks | ||
| - **Phase 4 (v1.5.0)**: ~6-8 weeks | ||
| - **Phase 5 (v2.0.0)**: ~10-12 weeks | ||
| ## ๐ค **Contributing** | ||
| We welcome contributions! Please check our [Contributing Guidelines](CONTRIBUTING.md) for details on: | ||
| - How to propose new features | ||
| - Development setup and testing | ||
| - Code style and quality standards | ||
| - Pull request process | ||
| ## ๐ฌ **Feedback & Discussion** | ||
| Have ideas or suggestions? We'd love to hear from you: | ||
| - ๐ [Issue Tracker](https://github.com/melihbirim/predict-data-types/issues) - Bug reports and feature requests | ||
| - ๐ญ [Discussions](https://github.com/melihbirim/predict-data-types/discussions) - Ideas and questions | ||
| - ๐ง Direct feedback via issues or pull requests | ||
| --- | ||
| **Last Updated:** September 12, 2025 | ||
| **Next Review:** October 12, 2025 |
| // TypeScript compilation and usage test | ||
| const predictDataTypes = require('./index'); | ||
| // Test basic usage - should compile without errors | ||
| const basicResult = predictDataTypes('John, 30, true, john@example.com'); | ||
| console.log('Basic types:', basicResult); | ||
| // Test with header mode | ||
| const csvData = 'name,age,active,email\nJohn,30,true,john@example.com'; | ||
| const csvResult = predictDataTypes(csvData, true); | ||
| console.log('CSV with headers:', csvResult); | ||
| // Test TypeScript type checking | ||
| const userName: string = basicResult['John']; // Should be valid | ||
| const userAge: string = basicResult['30']; // Should be valid | ||
| const userActive: string = basicResult['true']; // Should be valid | ||
| const userEmail: string = basicResult['john@example.com']; // Should be valid | ||
| console.log('TypeScript compilation successful!'); | ||
| console.log(`User: ${userName}, Age type: ${userAge}, Active: ${userActive}, Email type: ${userEmail}`); |
+275
-106
@@ -1,32 +0,53 @@ | ||
| const moment = require('moment'); | ||
| const os = require('os'); | ||
| const dayjs = require('dayjs'); | ||
| const customParseFormat = require('dayjs/plugin/customParseFormat'); | ||
| // Enable dayjs plugins | ||
| dayjs.extend(customParseFormat); | ||
| // Cached compiled regex patterns for performance | ||
| const PATTERNS = { | ||
| URL: /^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_+.~#?&/=]*)$/, | ||
| UUID: /^[0-9a-f]{8}-[0-9a-f]{4}-[0-5][0-9a-f]{3}-[089ab][0-9a-f]{3}-[0-9a-f]{12}$/i, | ||
| PHONE: /^(\+\d{1,3}\s)?\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}$/, | ||
| EMAIL: /^[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}$/i, | ||
| DATE_COMPONENT: /(\d{4})-(\d{1,2})-(\d{1,2})/, | ||
| DATE_CHARS: /^[\d\-/\s:.TZ+-]+$/, | ||
| LEADING_ZERO: /^0\d/ | ||
| }; | ||
| /** | ||
| * Checks if a given value represents a valid date in various formats | ||
| * @param {string} value - The value to check | ||
| * @returns {boolean} True if the value is a valid date, false otherwise | ||
| */ | ||
| function isDate(value) { | ||
| const formats = [ | ||
| moment.ISO_8601, | ||
| "YYYY-MM-DDTHH:mm:ss.SSSZ", | ||
| "YYYY-MM-DDTHH:mm:ssZ", | ||
| "YYYY-MM-DDTHH:mm:ss", | ||
| "YYYY-MM-DDTHH:mmZ", | ||
| "YYYY-MM-DDTHH:mm", | ||
| "YYYY-MM-DD HH:mm:ss.SSS", | ||
| "YYYY-MM-DD HH:mm:ss", | ||
| "YYYY-MM-DD HH:mm", | ||
| "DD/MM/YYYY", | ||
| "DD/MM/YYYY HH:mm:ss", | ||
| "DD/MM/YYYY HH:mm", | ||
| "MM/DD/YYYY", | ||
| "MM/DD/YYYY HH:mm:ss", | ||
| "MM/DD/YYYY HH:mm", | ||
| "DD-MMM-YYYY", | ||
| "DD-MMM-YYYY HH:mm:ss", | ||
| "DD-MMM-YYYY HH:mm", | ||
| "MMM-DD-YYYY", | ||
| "MMM-DD-YYYY HH:mm:ss", | ||
| "MMM-DD-YYYY HH:mm", | ||
| 'YYYY-MM-DDTHH:mm:ss.SSSZ', | ||
| 'YYYY-MM-DDTHH:mm:ss.SSSZ', | ||
| 'YYYY-MM-DDTHH:mm:ssZ', | ||
| 'YYYY-MM-DDTHH:mm:ss', | ||
| 'YYYY-MM-DDTHH:mmZ', | ||
| 'YYYY-MM-DDTHH:mm', | ||
| 'YYYY-MM-DD HH:mm:ss.SSS', | ||
| 'YYYY-MM-DD HH:mm:ss', | ||
| 'YYYY-MM-DD HH:mm', | ||
| 'YYYY-MM-DD', | ||
| 'DD/MM/YYYY', | ||
| 'DD/MM/YYYY HH:mm:ss', | ||
| 'DD/MM/YYYY HH:mm', | ||
| 'MM/DD/YYYY', | ||
| 'MM/DD/YYYY HH:mm:ss', | ||
| 'MM/DD/YYYY HH:mm', | ||
| 'DD-MMM-YYYY', | ||
| 'DD-MMM-YYYY HH:mm:ss', | ||
| 'DD-MMM-YYYY HH:mm', | ||
| 'MMM-DD-YYYY', | ||
| 'MMM-DD-YYYY HH:mm:ss', | ||
| 'MMM-DD-YYYY HH:mm' | ||
| ]; | ||
| // First try strict parsing with specific formats | ||
| for (let i = 0; i < formats.length; i++) { | ||
| const date = moment(value, formats[i], true); | ||
| if (date.isValid()) { | ||
| const date = dayjs(value, formats[i], true); | ||
| if (date.isValid() && date.format(formats[i]) === value) { | ||
| return true; | ||
@@ -36,98 +57,170 @@ } | ||
| // For ISO format and basic dates, be more conservative | ||
| // Only accept if it looks like a valid date and doesn't contain invalid characters | ||
| if (!PATTERNS.DATE_CHARS.test(value)) { | ||
| return false; | ||
| } | ||
| const defaultParsed = dayjs(value); | ||
| if (defaultParsed.isValid() && value.length >= 8) { // At least YYYY-MM-DD length | ||
| // Check if the parsed date's string representation matches the input | ||
| // This prevents dayjs from "fixing" invalid dates like 2023-13-32 | ||
| const reformatted = defaultParsed.format('YYYY-MM-DD'); | ||
| if (value.startsWith(reformatted) || value === reformatted) { | ||
| const year = defaultParsed.year(); | ||
| const month = defaultParsed.month() + 1; | ||
| const day = defaultParsed.date(); | ||
| // Extract original components for validation | ||
| const dateMatch = value.match(PATTERNS.DATE_COMPONENT); | ||
| if (dateMatch) { | ||
| const [, origYear, origMonth, origDay] = dateMatch; | ||
| if (parseInt(origYear) === year && | ||
| parseInt(origMonth) === month && | ||
| parseInt(origDay) === day) { | ||
| return true; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| return false; | ||
| } | ||
| /** | ||
| * Checks if a given value represents a boolean (true/false, yes/no, on/off, 1/0) | ||
| * @param {*} val - The value to check | ||
| * @returns {boolean} True if the value is a boolean representation, false otherwise | ||
| */ | ||
| function isBoolean(val) { | ||
| return ((typeof val == 'string' | ||
| && (val.toLowerCase() === 'true' || val.toLowerCase() === 'yes' || val.toLowerCase() === 'false' || val.toLowerCase() === 'no')) | ||
| || val === 1 | ||
| || val === 0) | ||
| if (typeof val === 'string') { | ||
| const lower = val.toLowerCase(); | ||
| return lower === 'true' || lower === 'false' || | ||
| lower === 'yes' || lower === 'no' || | ||
| lower === 'on' || lower === 'off'; | ||
| } | ||
| return val === 1 || val === 0; | ||
| } | ||
| /** | ||
| * Checks if a given value is a valid URL | ||
| * @param {string} value - The value to check | ||
| * @returns {boolean} True if the value is a valid URL, false otherwise | ||
| */ | ||
| function isURL(value) { | ||
| const urlPattern = /^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&\/=]*)$/; | ||
| return urlPattern.test(value); | ||
| return PATTERNS.URL.test(value); | ||
| } | ||
| /** | ||
| * Checks if a given value is a valid UUID (Version 1-5) | ||
| * @param {string} value - The value to check | ||
| * @returns {boolean} True if the value is a valid UUID, false otherwise | ||
| */ | ||
| function isUUID(value) { | ||
| const urlPattern = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-5][0-9a-f]{3}-[089ab][0-9a-f]{3}-[0-9a-f]{12}$/i; | ||
| return urlPattern.test(value); | ||
| return PATTERNS.UUID.test(value); | ||
| } | ||
| /** | ||
| * Checks if a given value is a valid phone number | ||
| * @param {string} value - The value to check | ||
| * @returns {boolean} True if the value is a valid phone number, false otherwise | ||
| */ | ||
| function isPhoneNumber(value) { | ||
| const phonePattern = new RegExp( | ||
| "^(\\+\\d{1,3}\\s)?\\(?\\d{3}\\)?[\\s.-]?\\d{3}[\\s.-]?\\d{4}$" | ||
| ); | ||
| return phonePattern.test(value); | ||
| return PATTERNS.PHONE.test(value); | ||
| } | ||
| /** | ||
| * Checks if a given value is a valid email address | ||
| * @param {string} value - The value to check | ||
| * @returns {boolean} True if the value is a valid email address, false otherwise | ||
| */ | ||
| function isEmail(value) { | ||
| const emailPattern = new RegExp( | ||
| "^[a-z0-9._%+-]+@[a-z0-9.-]+\\.[a-z]{2,}$", | ||
| "i" | ||
| ); | ||
| return emailPattern.test(value); | ||
| return PATTERNS.EMAIL.test(value); | ||
| } | ||
| /** | ||
| * Tokenizes a string by splitting on commas while respecting quoted strings and nested objects/arrays | ||
| * Optimized version with improved performance for large inputs | ||
| * @param {string} text - The text to tokenize | ||
| * @returns {string[]} Array of tokens | ||
| */ | ||
| function tokenize(text) { | ||
| const tokens = []; | ||
| const textLength = text.length; | ||
| let i = 0; | ||
| while (i < text.length) { | ||
| const char = text.charAt(i); | ||
| if (char == ' ') { | ||
| while (i < textLength) { | ||
| // Skip whitespace at the beginning | ||
| while (i < textLength && text[i] === ' ') { | ||
| i++; | ||
| continue | ||
| }; | ||
| } | ||
| if (i >= textLength) break; | ||
| const char = text[i]; | ||
| const tokenStart = i; | ||
| if (char === '{' || char === '[') { | ||
| let j = i + 1; | ||
| let count = 1; | ||
| while (j < text.length && count > 0) { | ||
| if (text.charAt(j) === '{' || text.charAt(j) === '[') { | ||
| count++; | ||
| } else if (text.charAt(j) === '}' || text.charAt(j) === ']') { | ||
| count--; | ||
| // Handle nested objects/arrays | ||
| const openChar = char; | ||
| const closeChar = char === '{' ? '}' : ']'; | ||
| let depth = 1; | ||
| i++; | ||
| while (i < textLength && depth > 0) { | ||
| if (text[i] === openChar) { | ||
| depth++; | ||
| } else if (text[i] === closeChar) { | ||
| depth--; | ||
| } | ||
| j++; | ||
| i++; | ||
| } | ||
| tokens.push(text.substring(i, j)); | ||
| i = j; | ||
| tokens.push(text.substring(tokenStart, i)); | ||
| } else if (char === '"') { | ||
| let j = i + 1; | ||
| while (j < text.length && text.charAt(j) !== '"') { | ||
| if (text.charAt(j) === '\\') { | ||
| j++; | ||
| // Handle quoted strings | ||
| i++; | ||
| while (i < textLength && text[i] !== '"') { | ||
| if (text[i] === '\\') { | ||
| i++; // Skip escaped character | ||
| } | ||
| j++; | ||
| i++; | ||
| } | ||
| tokens.push(text.substring(i, j + 1)); | ||
| i = j + 1; | ||
| i++; // Include closing quote | ||
| tokens.push(text.substring(tokenStart, i)); | ||
| } else { | ||
| let j = i + 1; | ||
| while (j < text.length && text.charAt(j) !== ',' && text.charAt(j) !== '{' && text.charAt(j) !== '[') { | ||
| j++; | ||
| // Handle regular tokens | ||
| while (i < textLength && text[i] !== ',' && text[i] !== '{' && text[i] !== '[') { | ||
| i++; | ||
| } | ||
| tokens.push(text.substring(i, j)); | ||
| i = j; | ||
| tokens.push(text.substring(tokenStart, i)); | ||
| } | ||
| if (i < text.length && text.charAt(i) === ',') { | ||
| // Skip comma separator | ||
| if (i < textLength && text[i] === ',') { | ||
| i++; | ||
| } | ||
| } | ||
| return tokens; | ||
| } | ||
| function predictDataTypes(str, firstRowIsHeader = false) { | ||
| let header = ""; | ||
| /** | ||
| * Parses input string into header and data components | ||
| * @param {string} str - The input string | ||
| * @param {boolean} firstRowIsHeader - Whether first row should be treated as headers | ||
| * @returns {{header: string[], data: string[]}} Parsed header and data | ||
| */ | ||
| function parseHeaderAndData(str, firstRowIsHeader) { | ||
| let header = ''; | ||
| let data = str; | ||
| if (firstRowIsHeader) { | ||
| let tmp = str.split(os.EOL); | ||
| if (tmp.length > 1) { | ||
| header = tmp[0].split(","); | ||
| data = tokenize(tmp[1]); | ||
| }else{ | ||
| return {}; | ||
| // Handle different line endings: \r\n (Windows), \n (Unix), \r (old Mac) | ||
| const lines = str.split(/\r?\n|\r/); | ||
| if (lines.length > 1 && lines[0].trim() && lines[1].trim()) { | ||
| header = lines[0].split(','); | ||
| data = tokenize(lines[1]); | ||
| } else { | ||
| return { header: [], data: [] }; | ||
| } | ||
@@ -138,36 +231,112 @@ } else { | ||
| } | ||
| return { header, data }; | ||
| } | ||
| /** | ||
| * Detects the data type for a single field value | ||
| * @param {string} value - The value to analyze | ||
| * @returns {string} The detected data type | ||
| */ | ||
| function detectFieldType(value) { | ||
| const trimmedValue = value.trim(); | ||
| if (isBoolean(trimmedValue)) { | ||
| return 'boolean'; | ||
| } else if (!isNaN(parseFloat(trimmedValue)) && isFinite(trimmedValue) && !PATTERNS.LEADING_ZERO.test(trimmedValue)) { | ||
| // Numbers, but not those with leading zeros like '01' | ||
| return 'number'; | ||
| } else if (isDate(trimmedValue)) { | ||
| return 'date'; | ||
| } else if (isURL(trimmedValue)) { | ||
| return 'url'; | ||
| } else if (isUUID(trimmedValue)) { | ||
| return 'uuid'; | ||
| } else if (isPhoneNumber(trimmedValue)) { | ||
| return 'phone'; | ||
| } else if (isEmail(trimmedValue)) { | ||
| return 'email'; | ||
| } else if (trimmedValue.startsWith('[') && trimmedValue.endsWith(']')) { | ||
| return 'array'; | ||
| } else if (trimmedValue.startsWith('{') && trimmedValue.endsWith('}')) { | ||
| return 'object'; | ||
| } else { | ||
| return 'string'; | ||
| } | ||
| } | ||
| /** | ||
| * Processes data fields and assigns types | ||
| * @param {string[]} data - Array of data values | ||
| * @param {string[]} header - Array of header names | ||
| * @param {boolean} firstRowIsHeader - Whether headers are being used | ||
| * @returns {Object<string, string>} Object mapping field names to types | ||
| */ | ||
| function processFields(data, header, firstRowIsHeader) { | ||
| const types = {}; | ||
| console.log(`header ${header}`) | ||
| console.log(`data ${data}`) | ||
| for (let i = 0; i < data.length; i++) { | ||
| const part = data[i].trim(); | ||
| const field = header[i].trim(); | ||
| // When using headers, only process fields that have corresponding headers | ||
| if (firstRowIsHeader && i >= header.length) { | ||
| continue; // Skip extra data fields beyond header length | ||
| } | ||
| if (isBoolean(part)) { | ||
| types[field] = "boolean"; | ||
| } else if (!isNaN(parseFloat(part)) && isFinite(part)) { | ||
| types[field] = "number"; | ||
| } else if (isDate(part)) { | ||
| types[field] = "date"; | ||
| } else if (isURL(part)) { | ||
| types[field] = "url"; | ||
| } else if (isUUID(part)) { | ||
| types[field] = "uuid"; | ||
| } else if (isPhoneNumber(part)) { | ||
| types[field] = "phone"; | ||
| } else if (isEmail(part)) { | ||
| types[field] = "email"; | ||
| } else if (part.startsWith("[") && part.endsWith("]")) { | ||
| types[field] = "array"; | ||
| } else if (part.startsWith("{") && part.endsWith("}")) { | ||
| types[field] = "object"; | ||
| } | ||
| else { | ||
| types[field] = "string"; | ||
| } | ||
| // Handle missing header fields gracefully | ||
| const field = (header[i] && header[i].trim) ? header[i].trim() : `field_${i}`; | ||
| const fieldType = detectFieldType(data[i]); | ||
| types[field] = fieldType; | ||
| } | ||
| return types; | ||
| } | ||
| /** | ||
| * Predicts data types for comma-separated values or structured data | ||
| * @param {string} str - The input string to analyze | ||
| * @param {boolean} [firstRowIsHeader=false] - Whether to treat the first row as column headers | ||
| * @returns {Object<string, string>} Object mapping field names/values to their predicted data types | ||
| * @throws {Error} When input is null, undefined, or not a string | ||
| * | ||
| * @example | ||
| * // Basic usage | ||
| * predictDataTypes('John, 30, true, 2023-01-01') | ||
| * // Returns: { 'John': 'string', '30': 'number', 'true': 'boolean', '2023-01-01': 'date' } | ||
| * | ||
| * @example | ||
| * // With headers | ||
| * predictDataTypes('name,age,active\nJohn,30,true', true) | ||
| * // Returns: { 'name': 'string', 'age': 'number', 'active': 'boolean' } | ||
| */ | ||
| function predictDataTypes(str, firstRowIsHeader = false) { | ||
| // Input validation | ||
| if (str === null || str === undefined) { | ||
| throw new Error('Input must be a string'); | ||
| } | ||
| if (typeof str !== 'string') { | ||
| throw new Error('Input must be a string'); | ||
| } | ||
| // Handle empty string case or whitespace-only strings | ||
| if (str.length === 0 || str.trim().length === 0) { | ||
| return {}; | ||
| } | ||
| // Parse input into header and data components | ||
| const { header, data } = parseHeaderAndData(str, firstRowIsHeader); | ||
| // Handle case where no valid data was parsed | ||
| if (data.length === 0) { | ||
| return {}; | ||
| } | ||
| // Process fields and detect their types | ||
| const types = processFields(data, header, firstRowIsHeader); | ||
| // If no data was processed but we have non-empty input, treat as single empty string | ||
| if (Object.keys(types).length === 0 && str.length > 0) { | ||
| types[''] = 'string'; | ||
| } | ||
| return types; | ||
| } | ||
@@ -174,0 +343,0 @@ |
+10
-4
| { | ||
| "name": "predict-data-types", | ||
| "version": "1.1.0", | ||
| "version": "1.2.0", | ||
| "description": "A simple npm package that predicts data types for comma-separated values, including JSON objects, and validates URLs, phone numbers, email addresses, and geolocation data within string values.", | ||
| "main": "index.js", | ||
| "types": "index.d.ts", | ||
| "scripts": { | ||
| "test": "mocha test" | ||
| "test": "mocha test", | ||
| "lint": "eslint .", | ||
| "lint:fix": "eslint . --fix" | ||
| }, | ||
@@ -22,8 +25,11 @@ "keywords": [ | ||
| "dependencies": { | ||
| "moment": "^2.29.4" | ||
| "dayjs": "^1.11.18" | ||
| }, | ||
| "devDependencies": { | ||
| "@types/node": "^24.3.1", | ||
| "chai": "^4.3.7", | ||
| "mocha": "^10.2.0" | ||
| "eslint": "^9.35.0", | ||
| "mocha": "^10.2.0", | ||
| "typescript": "^5.9.2" | ||
| } | ||
| } |
+200
-24
| # Predict Data Types | ||
| A simple npm package that predicts data types for comma-separated values, including JSON objects, and validates URLs, phone numbers, email addresses, and geolocation data within string values. | ||
| [](https://www.npmjs.com/package/predict-data-types) | ||
| [](https://opensource.org/licenses/MIT) | ||
| ## Installation | ||
| A lightweight and robust npm package that automatically predicts data types for comma-separated values, including JSON objects, and validates URLs, phone numbers, email addresses, UUIDs, dates, and more within string values. | ||
| ## โจ Features | ||
| - ๐ฏ **Automatic Type Detection**: Intelligently identifies 9+ data types | ||
| - ๐ **Input Validation**: Robust error handling and input validation | ||
| - ๐ **CSV Support**: Parse CSV-like data with optional headers | ||
| - ๐ **Lightweight**: Minimal dependencies (only dayjs) | ||
| - ๐ **Well Tested**: Comprehensive test suite with edge cases | ||
| - ๐ง **TypeScript Ready**: (Coming soon) Type definitions included | ||
| - โก **Fast**: Optimized tokenization and regex patterns | ||
| ## ๐ฆ Installation | ||
| ```bash | ||
| npm install predict-data-types | ||
| ``` | ||
| ## Supported data types | ||
| - boolean | ||
| - string | ||
| - number (decimal and integers are mapped to a number) // TODO improvement for specific data type | ||
| - phone number | ||
| - url | ||
| - array (recursive check) | ||
| - object | ||
| ## ๐ง Supported Data Types | ||
| ## Usage | ||
| ``` js | ||
| | Type | Description | Examples | | ||
| |------|-------------|----------| | ||
| | `string` | Plain text values | `'John'`, `'Hello World'` | | ||
| | `number` | Integers and decimals | `42`, `3.14`, `-17`, `1e10` | | ||
| | `boolean` | Boolean representations | `true`, `false`, `yes`, `no` | | ||
| | `email` | Valid email addresses | `user@example.com`, `test+tag@domain.co.uk` | | ||
| | `phone` | Phone numbers | `555-555-5555`, `(555) 555-5555`, `+1 555-555-5555` | | ||
| | `url` | Web URLs | `https://example.com`, `http://subdomain.site.co.uk/path` | | ||
| | `uuid` | UUID v1-v5 | `550e8400-e29b-41d4-a716-446655440000` | | ||
| | `date` | Various date formats | `2023-12-31`, `31/12/2023`, `2023-12-31T23:59:59Z` | | ||
| | `array` | JSON arrays | `[1, 2, 3]`, `["apple", "banana"]` | | ||
| | `object` | JSON objects | `{"name": "John", "age": 30}` | | ||
| ## ๐ Usage | ||
| ### Basic Usage | ||
| ```javascript | ||
| const predictDataTypes = require('predict-data-types'); | ||
| const text = 'John, 123 Main St, john@example.com, 555-555-5555, {"name": "John", "age": 30}, http://example.com'; | ||
| const text = 'John, 30, true, john@example.com, 2023-01-01'; | ||
| const types = predictDataTypes(text); | ||
| console.log(types); | ||
| // Output: | ||
| // { | ||
| // 'John': 'string', | ||
| // '30': 'number', | ||
| // 'true': 'boolean', | ||
| // 'john@example.com': 'email', | ||
| // '2023-01-01': 'date' | ||
| // } | ||
| ``` | ||
| { | ||
| 'John': 'string', | ||
| '123 Main St': 'string', | ||
| 'john@example.com': 'email', | ||
| '555-555-5555': 'phone', | ||
| '{"name": "John", "age": 30}': 'object', | ||
| 'http://example.com': 'url' | ||
| ### Advanced Examples | ||
| #### CSV-like Data with Headers | ||
| ```javascript | ||
| const csvData = `name,age,active,email,signup_date | ||
| John,30,true,john@example.com,2023-01-01 | ||
| Jane,25,false,jane@example.com,2023-02-15`; | ||
| const types = predictDataTypes(csvData, true); // true = first row is header | ||
| console.log(types); | ||
| // Output: | ||
| // { | ||
| // 'name': 'string', | ||
| // 'age': 'number', | ||
| // 'active': 'boolean', | ||
| // 'email': 'email', | ||
| // 'signup_date': 'date' | ||
| // } | ||
| ``` | ||
| #### Mixed Complex Data | ||
| ```javascript | ||
| const complexData = ` | ||
| user@test.com, | ||
| 555-123-4567, | ||
| https://github.com/user/repo, | ||
| 550e8400-e29b-41d4-a716-446655440000, | ||
| {"settings": {"theme": "dark"}}, | ||
| [1, 2, 3, 4, 5] | ||
| `; | ||
| const types = predictDataTypes(complexData); | ||
| console.log(types); | ||
| // Output: | ||
| // { | ||
| // 'user@test.com': 'email', | ||
| // '555-123-4567': 'phone', | ||
| // 'https://github.com/user/repo': 'url', | ||
| // '550e8400-e29b-41d4-a716-446655440000': 'uuid', | ||
| // '{"settings": {"theme": "dark"}}': 'object', | ||
| // '[1, 2, 3, 4, 5]': 'array' | ||
| // } | ||
| ``` | ||
| #### Date Format Detection | ||
| ```javascript | ||
| const dates = '2023-12-31, 31/12/2023, 2023-12-31T23:59:59Z, Dec-31-2023'; | ||
| const types = predictDataTypes(dates); | ||
| console.log(types); | ||
| // Output: | ||
| // { | ||
| // '2023-12-31': 'date', | ||
| // '31/12/2023': 'date', | ||
| // '2023-12-31T23:59:59Z': 'date', | ||
| // 'Dec-31-2023': 'date' | ||
| // } | ||
| ``` | ||
| ## ๐ API Reference | ||
| ### `predictDataTypes(input, firstRowIsHeader)` | ||
| **Parameters:** | ||
| - `input` (string): The comma-separated string to analyze | ||
| - `firstRowIsHeader` (boolean, optional): Whether to treat the first row as column headers (default: `false`) | ||
| **Returns:** | ||
| - `Object<string, string>`: Mapping of field names/values to their predicted data types | ||
| **Throws:** | ||
| - `Error`: When input is null, undefined, or not a string | ||
| **Supported Date Formats:** | ||
| - ISO 8601: `2023-12-31T23:59:59Z` | ||
| - Standard: `YYYY-MM-DD`, `DD/MM/YYYY`, `MM/DD/YYYY` | ||
| - With time: `YYYY-MM-DD HH:mm:ss` | ||
| - Month names: `DD-MMM-YYYY`, `MMM-DD-YYYY` | ||
| ## โ ๏ธ Error Handling | ||
| The package includes robust error handling: | ||
| ```javascript | ||
| // These will throw errors | ||
| try { | ||
| predictDataTypes(null); // Error: Input cannot be null or undefined | ||
| predictDataTypes(123); // Error: Input must be a string | ||
| predictDataTypes([1,2,3]); // Error: Input must be a string | ||
| } catch (error) { | ||
| console.error(error.message); | ||
| } | ||
| ``` | ||
| // These will return empty object or appropriate results | ||
| predictDataTypes(''); // Returns: {} | ||
| predictDataTypes(' '); // Returns: { '': 'string' } | ||
| ``` | ||
| ## ๐งช Development | ||
| ### Running Tests | ||
| ```bash | ||
| npm test # Run all tests | ||
| npm run lint # Check code quality | ||
| npm run lint:fix # Auto-fix lint issues | ||
| ``` | ||
| ### Test Coverage | ||
| The package includes comprehensive tests covering: | ||
| - โ All supported data types | ||
| - โ Edge cases and error conditions | ||
| - โ Input validation | ||
| - โ Complex nested structures | ||
| - โ Various date formats | ||
| - โ Header mode functionality | ||
| ## ๐ Changelog | ||
| ### v1.1.0 | ||
| - โ Fixed UUID pattern variable name bug | ||
| - โ Replaced deprecated moment.js with dayjs | ||
| - โ Added comprehensive input validation | ||
| - โ Fixed security vulnerabilities | ||
| - โ Added ESLint configuration | ||
| - โ Enhanced test coverage | ||
| - โ Added JSDoc documentation | ||
| - โ Improved README documentation | ||
| ## ๐ค Contributing | ||
| Contributions are welcome! Please read our [Contributing Guidelines](CONTRIBUTING.md) for details on our code of conduct and development process. | ||
| 1. Fork the repository | ||
| 2. Create your feature branch (`git checkout -b feature/amazing-feature`) | ||
| 3. Run tests (`npm test`) | ||
| 4. Commit your changes (`git commit -m 'Add amazing feature'`) | ||
| 5. Push to the branch (`git push origin feature/amazing-feature`) | ||
| 6. Open a Pull Request | ||
| ## ๐ License | ||
| This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. | ||
| ## ๐ Support | ||
| - ๐ [Documentation](README.md) | ||
| - ๏ฟฝ๏ธ [Roadmap](ROADMAP.md) - Planned features and improvements | ||
| - ๏ฟฝ๐ [Issue Tracker](https://github.com/melihbirim/predict-data-types/issues) | ||
| - ๐ฌ [Discussions](https://github.com/melihbirim/predict-data-types/discussions) | ||
| --- | ||
| Made with โค๏ธ by [Melih Birim](https://github.com/melihbirim) |
+264
-2
@@ -6,3 +6,3 @@ const chai = require('chai'); | ||
| describe('predictDataTypes', () => { | ||
| it('should predict data types for string and url', () => { | ||
@@ -60,2 +60,264 @@ const text = 'John, http://asd.com'; | ||
| }) | ||
| // Edge cases and input validation tests | ||
| describe('Input validation and edge cases', () => { | ||
| it('should handle null input', () => { | ||
| expect(() => predictDataTypes(null)).to.throw('Input must be a string'); | ||
| }); | ||
| it('should handle undefined input', () => { | ||
| expect(() => predictDataTypes(undefined)).to.throw('Input must be a string'); | ||
| }); | ||
| it('should handle empty string input', () => { | ||
| const result = predictDataTypes(''); | ||
| expect(result).to.deep.equal({}); | ||
| }); | ||
| it('should handle whitespace-only string input', () => { | ||
| const result = predictDataTypes(' '); | ||
| expect(result).to.deep.equal({}); | ||
| }); | ||
| it('should handle non-string input types', () => { | ||
| expect(() => predictDataTypes(123)).to.throw('Input must be a string'); | ||
| expect(() => predictDataTypes({})).to.throw('Input must be a string'); | ||
| expect(() => predictDataTypes([])).to.throw('Input must be a string'); | ||
| }); | ||
| it('should handle single character input', () => { | ||
| const result = predictDataTypes('a'); | ||
| expect(result).to.deep.equal({ 'a': 'string' }); | ||
| }); | ||
| it('should handle missing header fields gracefully', () => { | ||
| const csvData = 'name,age,active\nJohn,30'; // Missing third column | ||
| const result = predictDataTypes(csvData, true); | ||
| expect(result).to.deep.equal({ | ||
| 'name': 'string', | ||
| 'age': 'number' | ||
| }); | ||
| }); | ||
| it('should handle more data fields than header fields', () => { | ||
| const csvData = 'name,age\nJohn,30,true,extra'; // Extra columns | ||
| const result = predictDataTypes(csvData, true); | ||
| expect(result).to.deep.equal({ | ||
| 'name': 'string', | ||
| 'age': 'number' | ||
| }); | ||
| }); | ||
| }); | ||
| // UUID tests - testing the bug fix | ||
| describe('UUID detection', () => { | ||
| it('should correctly detect valid UUIDs', () => { | ||
| const text = '550e8400-e29b-41d4-a716-446655440000'; | ||
| const types = predictDataTypes(text); | ||
| expect(types).to.deep.equal({ | ||
| '550e8400-e29b-41d4-a716-446655440000': 'uuid' | ||
| }); | ||
| }); | ||
| it('should correctly detect multiple UUIDs', () => { | ||
| const text = '550e8400-e29b-41d4-a716-446655440000, 6ba7b810-9dad-11d1-80b4-00c04fd430c8'; | ||
| const types = predictDataTypes(text); | ||
| expect(types).to.deep.equal({ | ||
| '550e8400-e29b-41d4-a716-446655440000': 'uuid', | ||
| '6ba7b810-9dad-11d1-80b4-00c04fd430c8': 'uuid' | ||
| }); | ||
| }); | ||
| it('should not detect invalid UUIDs', () => { | ||
| const text = '550e8400-e29b-41d4-a716-44665544000, not-a-uuid-at-all'; | ||
| const types = predictDataTypes(text); | ||
| expect(types).to.deep.equal({ | ||
| '550e8400-e29b-41d4-a716-44665544000': 'string', | ||
| 'not-a-uuid-at-all': 'string' | ||
| }); | ||
| }); | ||
| }); | ||
| // Enhanced boolean detection tests | ||
| describe('Boolean detection', () => { | ||
| it('should detect various true/false representations', () => { | ||
| const text = 'true, false, TRUE, FALSE, yes, no, YES, NO, on, off, ON, OFF'; | ||
| const types = predictDataTypes(text); | ||
| expect(types).to.deep.equal({ | ||
| 'true': 'boolean', | ||
| 'false': 'boolean', | ||
| 'TRUE': 'boolean', | ||
| 'FALSE': 'boolean', | ||
| 'yes': 'boolean', | ||
| 'no': 'boolean', | ||
| 'YES': 'boolean', | ||
| 'NO': 'boolean', | ||
| 'on': 'boolean', | ||
| 'off': 'boolean', | ||
| 'ON': 'boolean', | ||
| 'OFF': 'boolean' | ||
| }); | ||
| }); | ||
| it('should detect comprehensive boolean representations', () => { | ||
| const text = 'true, false, yes, no, on, off'; | ||
| const types = predictDataTypes(text); | ||
| expect(types).to.deep.equal({ | ||
| 'true': 'boolean', | ||
| 'false': 'boolean', | ||
| 'yes': 'boolean', | ||
| 'no': 'boolean', | ||
| 'on': 'boolean', | ||
| 'off': 'boolean' | ||
| }); | ||
| }); | ||
| it('should not detect partial boolean words', () => { | ||
| const text = 'truthy, falsy, yesss, nope, online, offline, 10, 01'; | ||
| const types = predictDataTypes(text); | ||
| expect(types).to.deep.equal({ | ||
| 'truthy': 'string', | ||
| 'falsy': 'string', | ||
| 'yesss': 'string', | ||
| 'nope': 'string', | ||
| 'online': 'string', | ||
| 'offline': 'string', | ||
| '10': 'number', | ||
| '01': 'string' // Leading zero makes it a string | ||
| }); | ||
| }); | ||
| }); | ||
| // Email detection tests | ||
| describe('Email detection', () => { | ||
| it('should detect valid email addresses', () => { | ||
| const text = 'user@example.com, test.email+tag@domain.co.uk'; | ||
| const types = predictDataTypes(text); | ||
| expect(types).to.deep.equal({ | ||
| 'user@example.com': 'email', | ||
| 'test.email+tag@domain.co.uk': 'email' | ||
| }); | ||
| }); | ||
| it('should not detect invalid email addresses', () => { | ||
| const text = '@example.com, user@, user@domain, user.domain.com'; | ||
| const types = predictDataTypes(text); | ||
| expect(types).to.deep.equal({ | ||
| '@example.com': 'string', | ||
| 'user@': 'string', | ||
| 'user@domain': 'string', | ||
| 'user.domain.com': 'string' | ||
| }); | ||
| }); | ||
| }); | ||
| // Date format tests | ||
| describe('Date detection', () => { | ||
| it('should detect various date formats', () => { | ||
| const text = '2023-12-31, 31/12/2023, 12/31/2023, 2023-12-31T23:59:59Z'; | ||
| const types = predictDataTypes(text); | ||
| expect(types).to.deep.equal({ | ||
| '2023-12-31': 'date', | ||
| '31/12/2023': 'date', | ||
| '12/31/2023': 'date', | ||
| '2023-12-31T23:59:59Z': 'date' | ||
| }); | ||
| }); | ||
| it('should not detect invalid dates', () => { | ||
| const text = '2023-13-32, 32/13/2023, not-a-date'; | ||
| const types = predictDataTypes(text); | ||
| expect(types).to.deep.equal({ | ||
| '2023-13-32': 'string', | ||
| '32/13/2023': 'string', | ||
| 'not-a-date': 'string' | ||
| }); | ||
| }); | ||
| }); | ||
| // Number detection edge cases | ||
| describe('Number detection', () => { | ||
| it('should detect various number formats', () => { | ||
| const text = '42, -42, 3.14, -3.14, 0, 0.0, 1e10, -1e-10'; | ||
| const types = predictDataTypes(text); | ||
| expect(types).to.deep.equal({ | ||
| '42': 'number', | ||
| '-42': 'number', | ||
| '3.14': 'number', | ||
| '-3.14': 'number', | ||
| '0': 'number', | ||
| '0.0': 'number', | ||
| '1e10': 'number', | ||
| '-1e-10': 'number' | ||
| }); | ||
| }); | ||
| it('should not detect invalid numbers', () => { | ||
| const text = '42abc, 3.14.15, --42, 1e10e5'; | ||
| const types = predictDataTypes(text); | ||
| expect(types).to.deep.equal({ | ||
| '42abc': 'string', | ||
| '3.14.15': 'string', | ||
| '--42': 'string', | ||
| '1e10e5': 'string' | ||
| }); | ||
| }); | ||
| }); | ||
| // Tokenization edge cases | ||
| describe('Tokenization edge cases', () => { | ||
| it('should handle nested objects and arrays', () => { | ||
| const text = '{"nested": {"array": [1, 2, 3]}}, [{"key": "value"}]'; | ||
| const types = predictDataTypes(text); | ||
| expect(types).to.deep.equal({ | ||
| '{"nested": {"array": [1, 2, 3]}}': 'object', | ||
| '[{"key": "value"}]': 'array' | ||
| }); | ||
| }); | ||
| it('should handle quoted strings with commas', () => { | ||
| const text = '"Hello, world", "Another, string"'; | ||
| const types = predictDataTypes(text); | ||
| expect(types).to.deep.equal({ | ||
| '"Hello, world"': 'string', | ||
| '"Another, string"': 'string' | ||
| }); | ||
| }); | ||
| it('should handle strings with escaped quotes', () => { | ||
| const text = '"He said \\"Hello\\"", normal-string'; | ||
| const types = predictDataTypes(text); | ||
| expect(types).to.deep.equal({ | ||
| '"He said \\"Hello\\""': 'string', | ||
| 'normal-string': 'string' | ||
| }); | ||
| }); | ||
| }); | ||
| // Header mode tests | ||
| describe('Header mode functionality', () => { | ||
| it('should handle single line input in header mode', () => { | ||
| const result = predictDataTypes('name, age', true); | ||
| expect(result).to.deep.equal({}); | ||
| }); | ||
| it('should handle multi-line input with headers', () => { | ||
| const text = 'name,age,active\nJohn,30,true'; | ||
| const types = predictDataTypes(text, true); | ||
| expect(types).to.deep.equal({ | ||
| 'name': 'string', | ||
| 'age': 'number', | ||
| 'active': 'boolean' | ||
| }); | ||
| }); | ||
| it('should handle mismatched header and data columns', () => { | ||
| const text = 'name,age,active\nJohn,30'; // Missing third column | ||
| const types = predictDataTypes(text, true); | ||
| expect(types).to.deep.equal({ | ||
| 'name': 'string', | ||
| 'age': 'number' | ||
| }); | ||
| }); | ||
| }); | ||
| }); |
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
53152
462.69%11
120%718
248.54%217
429.27%5
150%1
Infinity%+ Added
+ Added
- Removed
- Removed