type-analyzer
Advanced tools
Comparing version 0.2.5 to 0.2.6
{ | ||
"name": "type-analyzer", | ||
"description": "Infer types from columns in JSON", | ||
"version": "0.2.5", | ||
"version": "0.2.6", | ||
"main": "index.js", | ||
@@ -6,0 +6,0 @@ "repository": { |
@@ -40,2 +40,8 @@ // Copyright (c) 2017 Uber Technologies, Inc. | ||
var VALIDATOR_CONSIDERS_NAN_NULL = { | ||
INT: true, | ||
NUMBER: true, | ||
FLOAT: true | ||
}; | ||
/** | ||
@@ -51,2 +57,3 @@ * Check if a given value is a null for a validator | ||
value === CONSTANT.NULL || | ||
value === CONSTANT.DB_NULL || | ||
typeof value === 'undefined' | ||
@@ -57,2 +64,6 @@ ) { | ||
if (Number.isNaN(value) && VALIDATOR_CONSIDERS_NAN_NULL[validatorName]) { | ||
return true; | ||
} | ||
if (value === '' && VALIDATOR_CONSIDERS_EMPTY_STRING_NULL[validatorName]) { | ||
@@ -110,2 +121,8 @@ return true; | ||
function _pushIntoArr(arr, item) { | ||
arr.push(item); | ||
} | ||
function _noop() {} | ||
/** | ||
@@ -123,3 +140,6 @@ * Generate metadata about columns in a dataset | ||
) { | ||
var ignoredDataTypes = (options || {}).ignoredDataTypes || []; | ||
options = options || {}; | ||
var ignoredDataTypes = options.ignoredDataTypes || []; | ||
var keepUnknowns = options.keepUnknowns; | ||
var maybePushUnknown = keepUnknowns ? _pushIntoArr : _noop; | ||
var allValidators = CONSTANT.VALIDATORS.filter(function filterValidators( | ||
@@ -137,3 +157,3 @@ validator | ||
var _columns = Object.keys(data[0]); | ||
/* eslint-disable max-statements */ | ||
/* eslint-disable max-statements, complexity */ | ||
return _columns.reduce(function iterator(res, columnName) { | ||
@@ -147,7 +167,18 @@ var format = ''; | ||
} | ||
// if theres still no type, dump this column | ||
var category = Analyzer._category(type); | ||
var colMeta = { | ||
key: columnName, | ||
label: columnName, | ||
type: CONSTANT.DATA_TYPES.STRING, | ||
category: category || CONSTANT.CATEGORIES.DIMENSION, | ||
format: '' | ||
}; | ||
// if theres still no type, potentially dump this column | ||
if (!type) { | ||
maybePushUnknown(res, colMeta); | ||
return res; | ||
} | ||
colMeta.type = type; | ||
// if its a time, detect and record the time | ||
@@ -158,2 +189,3 @@ if (type && CONSTANT.TIME_VALIDATORS.indexOf(type) !== -1) { | ||
if (sample === null) { | ||
maybePushUnknown(res, colMeta); | ||
return res; | ||
@@ -163,14 +195,8 @@ } | ||
} | ||
colMeta.format = format; | ||
var colMeta = { | ||
key: columnName, | ||
label: columnName, | ||
type, | ||
category, | ||
format | ||
}; | ||
if (type === CONSTANT.DATA_TYPES.GEOMETRY) { | ||
var geoSample = Utils.findFirstNonNullValue(data, columnName); | ||
if (geoSample === null) { | ||
maybePushUnknown(res, colMeta); | ||
return res; | ||
@@ -186,2 +212,3 @@ } | ||
if (geoStringSample === null) { | ||
maybePushUnknown(res, colMeta); | ||
return res; | ||
@@ -188,0 +215,0 @@ } |
@@ -64,2 +64,4 @@ // Copyright (c) 2017 Uber Technologies, Inc. | ||
// Common in databases like MySQL: https://dev.mysql.com/doc/refman/8.0/en/null-values.html | ||
DB_NULL: '\\N', | ||
NULL: 'NULL' | ||
@@ -66,0 +68,0 @@ }; |
@@ -20,2 +20,3 @@ // Copyright (c) 2017 Uber Technologies, Inc. | ||
// THE SOFTWARE. | ||
/* eslint-disable max-statements */ | ||
@@ -85,3 +86,5 @@ 'use strict'; | ||
arr = [[1,2,3], [4,5,6], [7,8,9], ['1', 'b'], ['2', 3], ['he']].map(mapArr); | ||
arr = [[1, 2, 3], [4, 5, 6], [7, 8, 9], ['1', 'b'], ['2', 3], ['he']].map( | ||
mapArr | ||
); | ||
assert.equal( | ||
@@ -99,3 +102,3 @@ Analyzer.computeColMeta(arr)[0].type, | ||
arr = [{a: 1}, [4,5,6], {b: 2}, {c: 3}, {d: 4}, {d: 5}].map(mapArr); | ||
arr = [{a: 1}, [4, 5, 6], {b: 2}, {c: 3}, {d: 4}, {d: 5}].map(mapArr); | ||
assert.equal( | ||
@@ -120,2 +123,9 @@ Analyzer.computeColMeta(arr)[0].type, | ||
arr = [NaN, NaN, NaN, 1, '222,222', '-333,333,333', -4, '+5,000'].map(mapArr); | ||
assert.equal( | ||
Analyzer.computeColMeta(arr)[0].type, | ||
'INT', | ||
'Treats NaNs as nulls and inteprets values as integer' | ||
); | ||
arr = ['-.1111', '+.2', '+3,333.3333', 444.4444, '5,555,555.5'].map(mapArr); | ||
@@ -129,4 +139,12 @@ assert.equal( | ||
arr = [ | ||
1, '222,222', '-333,333,333', -4, '+5,000', | ||
'-.1111', '+.2', '+3,333.3333', 444.4444, '5,555,555.5' | ||
1, | ||
'222,222', | ||
'-333,333,333', | ||
-4, | ||
'+5,000', | ||
'-.1111', | ||
'+.2', | ||
'+3,333.3333', | ||
444.4444, | ||
'5,555,555.5' | ||
].map(mapArr); | ||
@@ -139,2 +157,18 @@ assert.equal( | ||
arr = [ | ||
NaN, | ||
NaN, | ||
NaN, | ||
'-.1111', | ||
'+.2', | ||
'+3,333.3333', | ||
444.4444, | ||
'5,555,555.5' | ||
].map(mapArr); | ||
assert.equal( | ||
Analyzer.computeColMeta(arr)[0].type, | ||
'FLOAT', | ||
'Treats NaNs as nulls still inteprets values as floats' | ||
); | ||
arr = ['$1', '$0.12', '$1.12', '$1,000.12', '$1,000.12'].map(mapArr); | ||
@@ -160,2 +194,18 @@ assert.equal( | ||
arr = [ | ||
'\\N', | ||
'\\N', | ||
'\\N', | ||
'10.12345%', | ||
'-10.222%', | ||
'+1,000.33%', | ||
'10.4%', | ||
'10.55%' | ||
].map(mapArr); | ||
assert.equal( | ||
Analyzer.computeColMeta(arr)[0].type, | ||
'PERCENT', | ||
'Ignore database nulls, and inteprets values as percents' | ||
); | ||
[2.3, '+4,000', '-5,023.234', '2.3e+2', '$23,203', '23.45%'].forEach( | ||
@@ -167,5 +217,5 @@ function loopAcrossExamples(ex) { | ||
'MEASURE', | ||
'Inteprets sci or money valeus, eg ' | ||
+ ex + | ||
' formatted values as numbers' | ||
'Inteprets sci or money valeus, eg ' + | ||
ex + | ||
' formatted values as numbers' | ||
); | ||
@@ -191,5 +241,16 @@ } | ||
arr = [ | ||
1, '222,222', '-333,333,333', -4, '+5,000', | ||
'-.1111', '+.2', '+3,333.3333', 444.4444, '5,555,555.5', | ||
'182891173641581479', '2e53', '1e16', 182891173641581479 | ||
1, | ||
'222,222', | ||
'-333,333,333', | ||
-4, | ||
'+5,000', | ||
'-.1111', | ||
'+.2', | ||
'+3,333.3333', | ||
444.4444, | ||
'5,555,555.5', | ||
'182891173641581479', | ||
'2e53', | ||
'1e16', | ||
182891173641581479 | ||
].map(mapArr); | ||
@@ -242,3 +303,27 @@ assert.equal( | ||
arr = ['\\N', '\\N', '\\N', '\\N', '\\N'].map(mapArr); | ||
assert.equal( | ||
Analyzer.computeColMeta(arr, [], {keepUnknowns: true})[0].type, | ||
'STRING', | ||
'Interprets as a string' | ||
); | ||
assert.end(); | ||
}); | ||
test('Analyzer: handling of unknown types', function t(assert) { | ||
var arr = []; | ||
['', null, undefined, ''].forEach(function loopAcrossExamples(ex) { | ||
arr = [ex, ex, ex, ex, ex, ex].map(mapArr); | ||
assert.equal( | ||
Analyzer.computeColMeta(arr, [], { | ||
keepUnknowns: true | ||
})[0].type, | ||
'STRING', | ||
'Interprets ' + ex + ' as a string' | ||
); | ||
}); | ||
assert.end(); | ||
}); |
@@ -84,3 +84,8 @@ // Copyright (c) 2017 Uber Technologies, Inc. | ||
type: 'LineString', | ||
coordinates: [[102.0, 0.0], [103.0, 1.0], [104.0, 0.0], [105.0, 1.0]] | ||
coordinates: [ | ||
[102.0, 0.0], | ||
[103.0, 1.0], | ||
[104.0, 0.0], | ||
[105.0, 1.0] | ||
] | ||
} | ||
@@ -92,3 +97,9 @@ }, | ||
coordinates: [ | ||
[[100.0, 0.0], [101.0, 0.0], [101.0, 1.0], [100.0, 1.0], [100.0, 0.0]] | ||
[ | ||
[100.0, 0.0], | ||
[101.0, 0.0], | ||
[101.0, 1.0], | ||
[100.0, 1.0], | ||
[100.0, 0.0] | ||
] | ||
] | ||
@@ -145,3 +156,3 @@ } | ||
expectedType, | ||
`correctly indentifies ${ expectedType } as WKT ${ expectedType }s` | ||
`correctly indentifies ${expectedType} as WKT ${expectedType}s` | ||
); | ||
@@ -277,2 +288,43 @@ }); | ||
test('Analyzer: nulls without dropping unknowns, and just intepreting as string', function t(assert) { | ||
var nullExample = [ | ||
{a: '2016-11-04 12:43:36.711458', b: null, c: null, d: null}, | ||
{a: '2016-11-04 12:43:36.711458', b: null, c: null, d: null}, | ||
{a: '2016-11-04 12:43:36.711458', b: null, c: null, d: null}, | ||
{a: '2016-11-04 12:43:36.711458', b: null, c: null, d: null}, | ||
{a: '2016-11-04 12:43:36.711458', b: 1.2, c: null, d: null}, | ||
{a: '2016-11-04 12:43:36.711458', b: null, c: null, d: null}, | ||
{a: '2016-11-04 12:43:36.711458', b: null, c: null, d: null}, | ||
{a: '2016-11-04 12:43:36.711458', b: null, c: null, d: null} | ||
]; | ||
var known = [ | ||
{ | ||
category: 'TIME', | ||
format: 'YYYY-M-D HH:mm:ss.SSSS', | ||
key: 'a', | ||
label: 'a', | ||
type: 'DATETIME' | ||
}, | ||
{category: 'MEASURE', format: '', key: 'b', label: 'b', type: 'FLOAT'}, | ||
{category: 'TIME', format: '', key: 'c', label: 'c', type: 'DATETIME'}, | ||
{ | ||
category: 'GEOMETRY', | ||
format: '', | ||
key: 'd', | ||
label: 'd', | ||
type: 'GEOMETRY_FROM_STRING' | ||
} | ||
]; | ||
var rules = [ | ||
{regex: /c/, dataType: 'DATETIME'}, | ||
{regex: /d/, dataType: 'GEOMETRY_FROM_STRING'} | ||
]; | ||
var analyzed = Analyzer.computeColMeta(nullExample, rules, { | ||
keepUnknowns: true | ||
}); | ||
assert.deepEqual(analyzed, known, 'Analyzer handles null data well'); | ||
assert.end(); | ||
}); | ||
test('Analyzer: long test', function t(assert) { | ||
@@ -279,0 +331,0 @@ var analyzed = Analyzer.computeColMeta(LargeData); |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
New author
Supply chain riskA new npm collaborator published a version of the package for the first time. New collaborators are usually benign additions to a project, but do indicate a change to the security surface area of a package.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
2063
84403
24
1