numpy-parser - npm Package Compare versions

		{
		"name": "numpy-parser",
		"version": "1.0.2",
		"version": "1.2.1",
		"description": "A JS parser for binary .npy files.",
		"main": "src/main.js",
		"main": "dist/main.js",
		"scripts": {
		"test": "mocha"
		"test": "./node_modules/.bin/mocha --require @babel/register \"test/*/.spec.js\"",
		"build": "BABEL_ENV=production ./node_modules/.bin/babel src/ --out-dir dist/ --source-maps",
		"prepublishOnly": "npm run-script build"
		},
		@@ -26,4 +28,10 @@ "repository": {
		"devDependencies": {
		"@babel/cli": "^7.2.3",
		"@babel/core": "^7.2.2",
		"@babel/preset-env": "^7.2.3",
		"@babel/register": "^7.0.0",
		"babel-minify": "^0.5.0",
		"babel-preset-minify": "^0.5.0",
		"mocha": "^5.2.0"
		}
		}

204

src/main.js

		@@ -1,73 +0,163 @@
		function asciiDecode(buffer) {
		const castBuffer = new Uint8Array(buffer);
		return String.fromCharCode(...castBuffer);
		}
		/* A simple stateful wrapper around a DataView that keeps track of the current offset.*/

		function readUint16LE(buffer) {
		const view = new DataView(buffer);
		var value = view.getUint8(0);
		value \|= view.getUint8(1) << 8;
		return value;
		}
		class DataViewReader {
		constructor(dataViewOrBuffer) {
		if (dataViewOrBuffer instanceof DataView) {
		this.dataView = dataViewOrBuffer;
		} else if (dataViewOrBuffer instanceof ArrayBuffer) {
		this.dataView = new DataView(dataViewOrBuffer);
		}
		this.offset = 0;
		}

		function typedArrayFromBuffer(dtype, buffer, offset) {
		switch (dtype) {
		/* Variable length accessors */

		// Unsigned Integer
		case '\|u1':
		return new Uint8Array(buffer, offset);
		case '<u2':
		return new Uint16Array(buffer, offset);
		case '<u4':
		return new Uint32Array(buffer, offset);
		readBytes(length) {
		const buffer = new DataView(this.dataView.buffer, this.offset, length)
		this.offset += length;
		return buffer;
		}

		// Integer
		case '\|i1':
		return new Int8Array(buffer, offset);
		case '<i2':
		return new Int16Array(buffer, offset);
		case '<i4':
		return new Int32Array(buffer, offset);
		readAndASCIIDecodeBytes(length) {
		const array = new Uint8Array(this.dataView.buffer, this.offset, length)
		this.offset += length;
		return this._decodeASCIIByteArray(array);
		}

		// Floating Point
		case '<f4':
		return new Float32Array(buffer, offset);
		case '<f8':
		return new Float64Array(buffer, offset);
		/* Fixed length accessors */

		default:
		throw new Error('unknown numeric dtype: ' + header.descr);
		readUint8(littleEndian = false) {
		const value = this.dataView.getUint8(this.offset, littleEndian);
		this.offset += Uint8Array.BYTES_PER_ELEMENT;
		return value;
		}

		readUint16(littleEndian = false) {
		const value = this.dataView.getUint16(this.offset, littleEndian);
		this.offset += Uint16Array.BYTES_PER_ELEMENT;
		return value;
		}

		readUint32(littleEndian = false) {
		const value = this.dataView.getUint32(this.offset, littleEndian);
		this.offset += Uint32Array.BYTES_PER_ELEMENT;
		return value;
		}

		/* Helpers */

		_decodeASCIIByteArray(array) {
		const characters = []
		for (const byte of array) {
		const char = String.fromCharCode(byte);
		characters.push(char);
		}
		return characters.join('');
		}
		}

		function fromArrayBuffer(buffer) {
		// check the magic number
		const magic = asciiDecode(buffer.slice(0,6));
		if (magic.slice(1,6) != 'NUMPY') {
		throw new Error(`unknown file type: "${magic}"`);

		export function fromArrayBuffer(buffer) {
		if (!buffer instanceof ArrayBuffer) {
		throw new Error('Argument must be an ArrayBuffer.');
		}
		const reader = new DataViewReader(buffer);
		// comments are taken from https://docs.scipy.org/doc/numpy-1.14.1/neps/npy-format.html#format-specification-version-1-0
		// "The first 6 bytes are a magic string: exactly "x93NUMPY""
		const magicByte = reader.readUint8();
		const magicWord = reader.readAndASCIIDecodeBytes(5);
		if (magicByte != 0x93 \|\| magicWord != 'NUMPY') {
		throw new Error(`unknown file type: "${magicByte}${magicWord}"`);
		}
		// "The next 1 byte is an unsigned byte: the major version number of the file format, e.g. x01.""
		const versionMajor = reader.readUint8();
		// "The next 1 byte is an unsigned byte: the minor version number of the file format, e.g. x00."
		const versionMinor = reader.readUint8();
		// Parse header length. This depends on the major file format version as follows:
		let headerLength;
		if (versionMajor <= 1) {
		// "The next 2 bytes form a little-endian unsigned short int: the length of the header data HEADER_LEN."
		headerLength = reader.readUint16(true);
		} else {
		// "The next 4 bytes form a little-endian unsigned int: the length of the header data HEADER_LEN."
		headerLength = reader.readUint32(true);
		}
		/* "The next HEADER_LEN bytes form the header data describing the array’s format.
		It is an ASCII string which contains a Python literal expression of a dictionary.
		It is terminated by a newline (‘n’) and padded with spaces (‘x20’) to make the total
		length of the magic string + 4 + HEADER_LEN be evenly divisible by 16." */
		const preludeLength = 6 + 4 + headerLength;
		if (preludeLength % 16 != 0) {
		console.warn(`NPY file header is incorrectly padded. (${preludeLength} is not evenly divisible by 16.)`)
		}
		const headerStr = reader.readAndASCIIDecodeBytes(headerLength);
		const header = parseHeaderStr(headerStr);
		if (header.fortran_order) {
		throw new Error('NPY file is written in Fortran byte order, support for this byte order is not yet implemented.')
		}
		// Intepret the bytes according to the specified dtype
		const constructor = typedArrayConstructorForDescription(header.descr);
		const data = new constructor(buffer, reader.offset);
		// Return object with same signature as NDArray expects: {data, shape}
		return { data: data, shape: header.shape };
		}

		// read the header
		const version = new Uint8Array(buffer.slice(6, 8)),
		headerLength = readUint16LE(buffer.slice(8, 10)),
		headerStr = asciiDecode(buffer.slice(10, 10 + headerLength)),
		offsetBytes = 10 + headerLength;

		function parseHeaderStr(headerStr) {
		const jsonHeader = headerStr
		.toLowerCase() // fixes boolean literals: False -> false
		.replace('(','[').replace('),',']') // shape tuple to array: (10,) -> [10,]
		.toLowerCase() // boolean literals: False -> false
		.replace('(','[').replace('),',']') // Python tuple to JS array: (10,) -> [10,]
		.replace('[,','[1,]').replace(',]',',1]') // implicit dimensions: [10,] -> [10,1]
		.replace(/'/g, '"'); // fixes single quotes
		const header = JSON.parse(jsonHeader);
		if (header.fortran_order) {
		throw new Error('file is in Fortran byte order; giving up')
		}
		.replace(/'/g, '"'); // single quotes -> double quotes
		return JSON.parse(jsonHeader);
		}

		// Intepret the bytes according to the specified dtype
		const data = typedArrayFromBuffer(header.descr, buffer, offsetBytes);

		return { data: data, shape: header.shape };
		function typedArrayConstructorForDescription(dtypeDescription) {
		/* 'dtype' description strings consist of three characters, indicating one of three
		properties each: byte order, data type, and byte length.

		Byte order: '<' (little-endian), '>' (big-endian), or '\|' (not applicable)
		Data type: 'u' (unsigned), 'i' (signed integer), 'f' (floating)
		Byte Length: 1, 2, 4 or 8 bytes

		Note that for 1 byte dtypes there is no byte order, thus the use of '\|' (not applicable).
		Data types are specified in numpy source:
		https://github.com/numpy/numpy/blob/8aa121415760cc6839a546c3f84e238d1dfa1aa6/numpy/core/_dtype.py#L13
		*/
		switch (dtypeDescription) {

		// Unsigned Integers
		case '\|u1':
		return Uint8Array;
		case '<u2':
		return Uint16Array;
		case '<u4':
		return Uint32Array;
		case '<u8':
		throw new Error('Because JavaScript doesn\'t currently include standard support for 64-bit unsigned integer values, support for this dtype is not yet implemented.');

		// Integers
		case '\|i1': // "byte"
		return Int8Array;
		case '<i2': // "short"
		return Int16Array;
		case '<i4': // "intc"
		return Int32Array;
		case '<i8': // "longlong" (??)
		throw new Error('Because JavaScript doesn\'t currently include standard support for 64-bit integer values, support for this dtype is not yet implemented.');

		// Floating
		case '<f2': // "half"
		throw new Error('Because JavaScript doesn\'t currently include standard support for 16-bit floating point values, support for this dtype is not yet implemented.');
		case '<f4': // "single"
		return Float32Array;
		case '<f8': // "double" "longfloat"
		return Float64Array;

		// No support for ComplexFloating, on-number types (flexible/character/void...) yet

		default:
		throw new Error('Unknown or not yet implemented numpy dtype description: ' + dtype);
		}
		}

		module.exports = {
		fromArrayBuffer: fromArrayBuffer
		};

test/test_main.js

.travis.yml

Sorry, the diff of this file is not supported yet

test/generate_test_data.py

Sorry, the diff of this file is not supported yet

New alerts

Improved metrics

Worsened metrics