numpy-parser
Advanced tools
Comparing version 1.0.2 to 1.2.1
{ | ||
"name": "numpy-parser", | ||
"version": "1.0.2", | ||
"version": "1.2.1", | ||
"description": "A JS parser for binary .npy files.", | ||
"main": "src/main.js", | ||
"main": "dist/main.js", | ||
"scripts": { | ||
"test": "mocha" | ||
"test": "./node_modules/.bin/mocha --require @babel/register \"test/**/*.spec.js\"", | ||
"build": "BABEL_ENV=production ./node_modules/.bin/babel src/ --out-dir dist/ --source-maps", | ||
"prepublishOnly": "npm run-script build" | ||
}, | ||
@@ -26,4 +28,10 @@ "repository": { | ||
"devDependencies": { | ||
"@babel/cli": "^7.2.3", | ||
"@babel/core": "^7.2.2", | ||
"@babel/preset-env": "^7.2.3", | ||
"@babel/register": "^7.0.0", | ||
"babel-minify": "^0.5.0", | ||
"babel-preset-minify": "^0.5.0", | ||
"mocha": "^5.2.0" | ||
} | ||
} |
204
src/main.js
@@ -1,73 +0,163 @@ | ||
function asciiDecode(buffer) { | ||
const castBuffer = new Uint8Array(buffer); | ||
return String.fromCharCode(...castBuffer); | ||
} | ||
/* A simple stateful wrapper around a DataView that keeps track of the current offset.*/ | ||
function readUint16LE(buffer) { | ||
const view = new DataView(buffer); | ||
var value = view.getUint8(0); | ||
value |= view.getUint8(1) << 8; | ||
return value; | ||
} | ||
class DataViewReader { | ||
constructor(dataViewOrBuffer) { | ||
if (dataViewOrBuffer instanceof DataView) { | ||
this.dataView = dataViewOrBuffer; | ||
} else if (dataViewOrBuffer instanceof ArrayBuffer) { | ||
this.dataView = new DataView(dataViewOrBuffer); | ||
} | ||
this.offset = 0; | ||
} | ||
function typedArrayFromBuffer(dtype, buffer, offset) { | ||
switch (dtype) { | ||
/* Variable length accessors */ | ||
// Unsigned Integer | ||
case '|u1': | ||
return new Uint8Array(buffer, offset); | ||
case '<u2': | ||
return new Uint16Array(buffer, offset); | ||
case '<u4': | ||
return new Uint32Array(buffer, offset); | ||
readBytes(length) { | ||
const buffer = new DataView(this.dataView.buffer, this.offset, length) | ||
this.offset += length; | ||
return buffer; | ||
} | ||
// Integer | ||
case '|i1': | ||
return new Int8Array(buffer, offset); | ||
case '<i2': | ||
return new Int16Array(buffer, offset); | ||
case '<i4': | ||
return new Int32Array(buffer, offset); | ||
readAndASCIIDecodeBytes(length) { | ||
const array = new Uint8Array(this.dataView.buffer, this.offset, length) | ||
this.offset += length; | ||
return this._decodeASCIIByteArray(array); | ||
} | ||
// Floating Point | ||
case '<f4': | ||
return new Float32Array(buffer, offset); | ||
case '<f8': | ||
return new Float64Array(buffer, offset); | ||
/* Fixed length accessors */ | ||
default: | ||
throw new Error('unknown numeric dtype: ' + header.descr); | ||
readUint8(littleEndian = false) { | ||
const value = this.dataView.getUint8(this.offset, littleEndian); | ||
this.offset += Uint8Array.BYTES_PER_ELEMENT; | ||
return value; | ||
} | ||
readUint16(littleEndian = false) { | ||
const value = this.dataView.getUint16(this.offset, littleEndian); | ||
this.offset += Uint16Array.BYTES_PER_ELEMENT; | ||
return value; | ||
} | ||
readUint32(littleEndian = false) { | ||
const value = this.dataView.getUint32(this.offset, littleEndian); | ||
this.offset += Uint32Array.BYTES_PER_ELEMENT; | ||
return value; | ||
} | ||
/* Helpers */ | ||
_decodeASCIIByteArray(array) { | ||
const characters = [] | ||
for (const byte of array) { | ||
const char = String.fromCharCode(byte); | ||
characters.push(char); | ||
} | ||
return characters.join(''); | ||
} | ||
} | ||
function fromArrayBuffer(buffer) { | ||
// check the magic number | ||
const magic = asciiDecode(buffer.slice(0,6)); | ||
if (magic.slice(1,6) != 'NUMPY') { | ||
throw new Error(`unknown file type: "${magic}"`); | ||
export function fromArrayBuffer(buffer) { | ||
if (!buffer instanceof ArrayBuffer) { | ||
throw new Error('Argument must be an ArrayBuffer.'); | ||
} | ||
const reader = new DataViewReader(buffer); | ||
// comments are taken from https://docs.scipy.org/doc/numpy-1.14.1/neps/npy-format.html#format-specification-version-1-0 | ||
// "The first 6 bytes are a magic string: exactly "x93NUMPY"" | ||
const magicByte = reader.readUint8(); | ||
const magicWord = reader.readAndASCIIDecodeBytes(5); | ||
if (magicByte != 0x93 || magicWord != 'NUMPY') { | ||
throw new Error(`unknown file type: "${magicByte}${magicWord}"`); | ||
} | ||
// "The next 1 byte is an unsigned byte: the major version number of the file format, e.g. x01."" | ||
const versionMajor = reader.readUint8(); | ||
// "The next 1 byte is an unsigned byte: the minor version number of the file format, e.g. x00." | ||
const versionMinor = reader.readUint8(); | ||
// Parse header length. This depends on the major file format version as follows: | ||
let headerLength; | ||
if (versionMajor <= 1) { | ||
// "The next 2 bytes form a little-endian unsigned short int: the length of the header data HEADER_LEN." | ||
headerLength = reader.readUint16(true); | ||
} else { | ||
// "The next 4 bytes form a little-endian unsigned int: the length of the header data HEADER_LEN." | ||
headerLength = reader.readUint32(true); | ||
} | ||
/* "The next HEADER_LEN bytes form the header data describing the array’s format. | ||
It is an ASCII string which contains a Python literal expression of a dictionary. | ||
It is terminated by a newline (‘n’) and padded with spaces (‘x20’) to make the total | ||
length of the magic string + 4 + HEADER_LEN be evenly divisible by 16." */ | ||
const preludeLength = 6 + 4 + headerLength; | ||
if (preludeLength % 16 != 0) { | ||
console.warn(`NPY file header is incorrectly padded. (${preludeLength} is not evenly divisible by 16.)`) | ||
} | ||
const headerStr = reader.readAndASCIIDecodeBytes(headerLength); | ||
const header = parseHeaderStr(headerStr); | ||
if (header.fortran_order) { | ||
throw new Error('NPY file is written in Fortran byte order, support for this byte order is not yet implemented.') | ||
} | ||
// Intepret the bytes according to the specified dtype | ||
const constructor = typedArrayConstructorForDescription(header.descr); | ||
const data = new constructor(buffer, reader.offset); | ||
// Return object with same signature as NDArray expects: {data, shape} | ||
return { data: data, shape: header.shape }; | ||
} | ||
// read the header | ||
const version = new Uint8Array(buffer.slice(6, 8)), | ||
headerLength = readUint16LE(buffer.slice(8, 10)), | ||
headerStr = asciiDecode(buffer.slice(10, 10 + headerLength)), | ||
offsetBytes = 10 + headerLength; | ||
function parseHeaderStr(headerStr) { | ||
const jsonHeader = headerStr | ||
.toLowerCase() // fixes boolean literals: False -> false | ||
.replace('(','[').replace('),',']') // shape tuple to array: (10,) -> [10,] | ||
.toLowerCase() // boolean literals: False -> false | ||
.replace('(','[').replace('),',']') // Python tuple to JS array: (10,) -> [10,] | ||
.replace('[,','[1,]').replace(',]',',1]') // implicit dimensions: [10,] -> [10,1] | ||
.replace(/'/g, '"'); // fixes single quotes | ||
const header = JSON.parse(jsonHeader); | ||
if (header.fortran_order) { | ||
throw new Error('file is in Fortran byte order; giving up') | ||
} | ||
.replace(/'/g, '"'); // single quotes -> double quotes | ||
return JSON.parse(jsonHeader); | ||
} | ||
// Intepret the bytes according to the specified dtype | ||
const data = typedArrayFromBuffer(header.descr, buffer, offsetBytes); | ||
return { data: data, shape: header.shape }; | ||
function typedArrayConstructorForDescription(dtypeDescription) { | ||
/* 'dtype' description strings consist of three characters, indicating one of three | ||
properties each: byte order, data type, and byte length. | ||
Byte order: '<' (little-endian), '>' (big-endian), or '|' (not applicable) | ||
Data type: 'u' (unsigned), 'i' (signed integer), 'f' (floating) | ||
Byte Length: 1, 2, 4 or 8 bytes | ||
Note that for 1 byte dtypes there is no byte order, thus the use of '|' (not applicable). | ||
Data types are specified in numpy source: | ||
https://github.com/numpy/numpy/blob/8aa121415760cc6839a546c3f84e238d1dfa1aa6/numpy/core/_dtype.py#L13 | ||
*/ | ||
switch (dtypeDescription) { | ||
// Unsigned Integers | ||
case '|u1': | ||
return Uint8Array; | ||
case '<u2': | ||
return Uint16Array; | ||
case '<u4': | ||
return Uint32Array; | ||
case '<u8': | ||
throw new Error('Because JavaScript doesn\'t currently include standard support for 64-bit unsigned integer values, support for this dtype is not yet implemented.'); | ||
// Integers | ||
case '|i1': // "byte" | ||
return Int8Array; | ||
case '<i2': // "short" | ||
return Int16Array; | ||
case '<i4': // "intc" | ||
return Int32Array; | ||
case '<i8': // "longlong" (??) | ||
throw new Error('Because JavaScript doesn\'t currently include standard support for 64-bit integer values, support for this dtype is not yet implemented.'); | ||
// Floating | ||
case '<f2': // "half" | ||
throw new Error('Because JavaScript doesn\'t currently include standard support for 16-bit floating point values, support for this dtype is not yet implemented.'); | ||
case '<f4': // "single" | ||
return Float32Array; | ||
case '<f8': // "double" "longfloat" | ||
return Float64Array; | ||
// No support for ComplexFloating, on-number types (flexible/character/void...) yet | ||
default: | ||
throw new Error('Unknown or not yet implemented numpy dtype description: ' + dtype); | ||
} | ||
} | ||
module.exports = { | ||
fromArrayBuffer: fromArrayBuffer | ||
}; |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
34514
52
197
7
1