Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

type-analyzer

Package Overview
Dependencies
Maintainers
2
Versions
14
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

type-analyzer - npm Package Compare versions

Comparing version 0.2.1 to 0.2.2

.eslintrc

6

CHANGELOG.md

@@ -19,2 +19,8 @@

#### [0.2.2] - Oct 07 2019 (b913061)
37de75d add eslint and prettier
0f10ab9 [Doc] Update README to add ignoredDataTypes
3e0134e remove TYPE city all together
2cf641b port internal type-analyzer commits over (currently at 1.5.0)
#### [0.2.1] - Jan 03 2019 (c6fb704)

@@ -21,0 +27,0 @@ 4c2a767 Add CHANGELOG.md

18

package.json
{
"name": "type-analyzer",
"description": "Infer types from columns in JSON",
"version": "0.2.1",
"version": "0.2.2",
"main": "index.js",

@@ -11,3 +11,3 @@ "repository": {

"scripts": {
"lint": "eslint src test",
"lint": "eslint src test --fix",
"test": "tape test"

@@ -18,11 +18,9 @@ },

"eslint": "^4.4.1",
"eslint-config-uber-es5": "^2.0.3",
"eslint-config-prettier": "^4.3.0",
"eslint-config-uber-es2015": "^3.1.2",
"eslint-plugin-prettier": "^3.1.0",
"eslint-plugin-es5": "^1.4.1",
"tape": "^4.6.3",
"tape-run": "^2.1.4"
},
"eslintConfig": {
"extends": [
"eslint-config-uber-es5"
]
},
"contributors": [

@@ -36,2 +34,6 @@ {

"email": "chua@uber.com"
},
{
"name": "Shan He",
"email": "shan@uber.com"
}

@@ -38,0 +40,0 @@ ],

# type-analyzer
> Infer types from CSV columns.
Infer data types from CSV columns.

@@ -10,16 +10,17 @@ ## Overview

* Geo-JSON,
* WKT Geometry,
* Boolean,
* Currency,
* Percent,
* DateTime,
* Date,
* Time,
* Int,
* Float,
* Number,
* Zipcode,
* City,
* String
* **DATE**
* **TIME**
* **DATETIME**
* **NUMBER**
* **INT**
* **FLOAT**
* **CURRENCY**
* **PERCENT**
* **STRING**
* **ZIPCODE**
* **BOOLEAN**
* **GEOMETRY**
* **GEOMETRY_FROM_STRING**
* **PAIR_GEOMETRY_FROM_STRING**
* **NONE**

@@ -32,13 +33,36 @@ ## Installation

Usage is super simple, simply call computeColMeta on your data like so
### `Analyzer.computeColMeta(data, rules, options)` (Function)
**Parameters**
- `data` **Array** _required_ An array of row object
- `rules` **Array** _optional_ An array of custom regex rules
- `options` **Object** _optional_ Option object
- `options.ignoreDataTypes` **Array** _optional_ Data types to ignore
```js
var Analyzer = require('type-analyzer').Analyzer;
var data = [
{
"ST_AsText": "MULTIPOLYGON (((30 20, 45 40, 10 40, 30 20)), ((15 5, 40 10, 10 20, 5 10, 15 5)))",
"name": "san_francisco",
"lat": "37.7749295",
"lng": "-122.4194155",
"launch_date": "2010-06-05",
"added_at": "2010-06-05 12:00"
},
{
"ST_AsText": "MULTIPOLYGON (((30 20, 45 40, 10 40, 30 20)), ((15 5, 40 10, 10 20, 5 10, 15 5)))",
"name": "paris",
"lat": "48.856666",
"lng": "2.3509871",
"launch_date": "2011-12-04",
"added_at": "2010-06-05 12:00"
},
]
var colMeta = Analyzer.computeColMeta(data);
```
- **`rules`**
But imagine you want to ensure that a column full of ids represented as numbers
is identified as a column of strings, type-analyzer's got you. Simply pass an
array of rules:
You can pass in an array of custom rules. For example. if you want to ensure that a column full of ids represented as numbers is identified as a column of strings. Rules can be matched with either exact `name` of the column, or `regex` used to match names. Note: Analyzer prefers rules using name over regex since better performance.

@@ -51,4 +75,12 @@ ```js

var colMeta = Analyzer.computeColMeta(data, [{regex: /id/, dataType: 'STRING'}]);
```
Note: Analyzer prefers rules using name over regex since better performance.
- **`options.ignoreDataTypes`**
You can also pass in `ignoreDataTypes` to ignore certain types. This will improve your type checking performance.
```js
var DATA_TYPES = require('type-analyzer').DATA_TYPES;
var colMeta = Analyzer.computeColMeta(arr, [], {ignoredDataTypes: DATA_TYPES.CURRENCY})[0].type,
```

@@ -59,3 +91,7 @@

### `DATA_TYPES`
You can import all availale types as a constant.
## Update

@@ -62,0 +98,0 @@ Breaking changes with v1.0.0: Regex has moved into src, but can more easily be

@@ -20,3 +20,2 @@ // Copyright (c) 2017 Uber Technologies, Inc.

// THE SOFTWARE.
'use strict';

@@ -43,9 +42,13 @@

/**
* Check if a given value is a null for a validator
* @param {String} value - value to be checked if null
* @param {String} validatorName - the name of the current validation function
* @return {Boolean} whether or not the current value is null
**/
* Check if a given value is a null for a validator
* @param {String} value - value to be checked if null
* @param {String} validatorName - the name of the current validation function
* @return {Boolean} whether or not the current value is null
**/
function valueIsNullForValidator(value, validatorName) {
if (value === null || value === CONSTANT.NULL || typeof value === 'undefined') {
if (
value === null ||
value === CONSTANT.NULL ||
typeof value === 'undefined'
) {
return true;

@@ -68,11 +71,14 @@ }

});
var validator = VALIDATOR_MAP[validatorName];
var strikes = Math.min(NUMBER_OF_ALLOWED_HITS, nonNullData.length);
var hits = 0;
nonNullData.some(function iterateAcrossData(row) {
var value = row[columnName];
if (Boolean(VALIDATOR_MAP[validatorName](value)) === false) {
strikes -= 1;
var isValueValid = Boolean(validator(row[columnName]));
if (isValueValid) {
hits++;
} else {
hits += 1;
strikes--;
}
if (strikes <= 0) {

@@ -83,2 +89,3 @@ return true;

});
return strikes > 0 && hits > 0;

@@ -104,11 +111,25 @@ };

/**
* Generate metadata about columns in a dataset
* @param {Object} data - data for which meta will be generated
* @param {Object} analyzerRules - regexs describing column overrides
* @return {Object} column metadata
**/
Analyzer.computeColMeta = function computeColMeta(data, analyzerRules) {
* Generate metadata about columns in a dataset
* @param {Object} data - data for which meta will be generated
* @param {Object} analyzerRules - regexs describing column overrides
* @param {Object.array} ignoredDataTypes - array of datatypes to ignore when validating
* @return {Object} column metadata
**/
Analyzer.computeColMeta = function computeColMeta(
data,
analyzerRules,
options
) {
var ignoredDataTypes = (options || {}).ignoredDataTypes || [];
var allValidators = CONSTANT.VALIDATORS.filter(function filterValidators(
validator
) {
return this.indexOf(validator) < 0;
},
ignoredDataTypes);
if (!data || Object.keys(data).length === 0) {
return [];
}
var _columns = Object.keys(data[0]);

@@ -120,5 +141,5 @@ /* eslint-disable max-statements */

var type = getTypeFromRules(analyzerRules, columnName);
// If it's not there then try to infer the type
// ff it's not there then try to infer the type
if (!type) {
type = CONSTANT.VALIDATORS.find(buildValidatorFinder(data, columnName));
type = allValidators.find(buildValidatorFinder(data, columnName));
}

@@ -143,5 +164,5 @@ // if theres still no type, dump this column

label: columnName,
type: type,
category: category,
format: format
type,
category,
format
};

@@ -154,3 +175,6 @@

}
colMeta.geoType = typeof geoSample.type === 'string' ? geoSample.type.toUpperCase() : null;
colMeta.geoType =
typeof geoSample.type === 'string'
? geoSample.type.toUpperCase()
: null;
}

@@ -157,0 +181,0 @@ if (type === CONSTANT.DATA_TYPES.GEOMETRY_FROM_STRING) {

@@ -20,3 +20,2 @@ // Copyright (c) 2017 Uber Technologies, Inc.

// THE SOFTWARE.
'use strict';

@@ -40,3 +39,2 @@

STRING: 'STRING',
CITY: 'CITY',
ZIPCODE: 'ZIPCODE',

@@ -85,3 +83,2 @@

CONSTANT.DATA_TYPES.BOOLEAN,
CONSTANT.DATA_TYPES.CITY,
CONSTANT.DATA_TYPES.ZIPCODE

@@ -98,4 +95,4 @@ ];

CONSTANT.TYPES_TO_CATEGORIES = Object.keys(CONSTANT.POSSIBLE_TYPES)
.reduce(function generateTypeToCategoryMap(res, category) {
CONSTANT.TYPES_TO_CATEGORIES = Object.keys(CONSTANT.POSSIBLE_TYPES).reduce(
function generateTypeToCategoryMap(res, category) {
CONSTANT.POSSIBLE_TYPES[category].forEach(function loopAcrossTypes(type) {

@@ -105,3 +102,5 @@ res[type] = category;

return res;
}, {});
},
{}
);

@@ -138,3 +137,2 @@ // NOTE: the order of validator is important.

CONSTANT.DATA_TYPES.ZIPCODE,
CONSTANT.DATA_TYPES.CITY,
CONSTANT.DATA_TYPES.STRING

@@ -141,0 +139,0 @@ ];

@@ -20,4 +20,4 @@ // Copyright (c) 2017 Uber Technologies, Inc.

// THE SOFTWARE.
'use strict';
'use strict';
var TimeRegex = require('./time-regex');

@@ -48,6 +48,2 @@

// maybe we should import a list of cities we have.
// reference: http://stackoverflow.com/a/25677072
isCity: /^([a-zA-Z\u0080-\u024F]+(?:. |-| |'))*[a-zA-Z\u0080-\u024F]*$/,
isTime: TimeRegex.ALL_TIME_FORMAT_REGEX,

@@ -54,0 +50,0 @@

@@ -20,4 +20,4 @@ // Copyright (c) 2017 Uber Technologies, Inc.

// THE SOFTWARE.
'use strict';
'use strict';
/**

@@ -63,2 +63,3 @@ * Given an array of regexes to union, build a string of them

].reverse();
// the reverse is important to put the more specific regexs higher in the order

@@ -80,7 +81,9 @@ var TIME_FORMAT_REGEX_STRINGS = [

// {'(\d{2)....': 'M-D-YYYY'}
var TIME_FORMAT_REGEX_MAP = TIME_FORMAT_STRINGS
.reduce(function generateRegexMap(timeFormats, str, index) {
var TIME_FORMAT_REGEX_MAP = TIME_FORMAT_STRINGS.reduce(
function generateRegexMap(timeFormats, str, index) {
timeFormats[TIME_FORMAT_REGEX_STRINGS[index]] = str;
return timeFormats;
}, {});
},
{}
);

@@ -134,2 +137,3 @@ var ALL_TIME_FORMAT_REGEX_STR = union(Object.keys(TIME_FORMAT_REGEX_MAP));

];
var dateFormatStrings = [

@@ -148,7 +152,11 @@ 'YYYY-M-D',

// {'(\d{2)....': 'M-D-YYYY'}
var DATE_FORMAT_REGEX_MAP = dateFormatStrings
.reduce(function generateRegexMap(dateFormats, str, index) {
dateFormats[dateFormatRegexStrings[index]] = str;
return dateFormats;
}, {});
var DATE_FORMAT_REGEX_MAP = dateFormatStrings.reduce(function generateRegexMap(
dateFormats,
str,
index
) {
dateFormats[dateFormatRegexStrings[index]] = str;
return dateFormats;
},
{});

@@ -158,6 +166,8 @@ // COMPUTE THEIR CROSS PRODUCT

// {'SOME HELLISH REGEX': 'YYYY HH:MM:SS'}
var DATE_TIME_MAP = Object.keys(DATE_FORMAT_REGEX_MAP)
.reduce(function reduceDate(dateTimes, dateRegex) {
var DATE_TIME_MAP = Object.keys(DATE_FORMAT_REGEX_MAP).reduce(
function reduceDate(dateTimes, dateRegex) {
var dateStr = DATE_FORMAT_REGEX_MAP[dateRegex];
Object.keys(TIME_FORMAT_REGEX_MAP).forEach(function loopAcrosTimes(timeRegex) {
Object.keys(TIME_FORMAT_REGEX_MAP).forEach(function loopAcrosTimes(
timeRegex
) {
var timeStr = TIME_FORMAT_REGEX_MAP[timeRegex];

@@ -170,3 +180,5 @@ dateTimes[dateRegex + ' ' + timeRegex] = dateStr + ' ' + timeStr;

return dateTimes;
}, {});
},
{}
);
var ALL_DATE_TIME_REGEX = new RegExp(union(Object.keys(DATE_TIME_MAP)));

@@ -173,0 +185,0 @@

@@ -67,5 +67,4 @@ // Copyright (c) 2017 Uber Technologies, Inc.

VALIDATOR_MAP[DATA_TYPES.ZIPCODE] = Utils.buildRegexCheck('isZipCode');
VALIDATOR_MAP[DATA_TYPES.CITY] = Utils.buildRegexCheck('isCity');
VALIDATOR_MAP[DATA_TYPES.STRING] = Utils.isString;
module.exports = VALIDATOR_MAP;

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc