@mangosteen/line-by-line - npm Package Compare versions

20

dist/line-by-line-iterator.d.ts

		/// <reference types="node" />
		/**
		* Split a string at new-line boundaries. Returns an array of lines.
		*
		* @param text String to split into lines.
		* @returns An array of lines.
		*/
		export declare function splitStringLines(text: string): string[];
		/**
		* Iterate over a string line by line. Each iterated value is a line found within the `text`.
		* This generator always yields at least one value.
		*
		* Use this generator to reduce memory pressure in your code.
		*
		* @param text String to iterate over line by line.
		* @returns Iterator that yields individual lines.
		*/
		export declare function iterateStringLines(text: string): Generator<string, void, void>;
		/**
		* Reads a stream line by line. Iterate over the iterator using `for await` loop.
		* @param stream Readable stream to read line by line.
		* @param encoding Optional encoding to use when decoding the stream into text. Default: `utf8`
		* @returns Iterator that yields individual lines.
		*/
		export declare function lineByLine(stream: NodeJS.ReadableStream): AsyncIterableIterator<string>;
		export declare function iterateStreamLines(stream: NodeJS.ReadableStream, encoding?: BufferEncoding): AsyncGenerator<string, void, void>;
		//# sourceMappingURL=line-by-line-iterator.d.ts.map

177

dist/line-by-line-iterator.js

		"use strict";
		var __importDefault = (this && this.__importDefault) \|\| function (mod) {
		return (mod && mod.__esModule) ? mod : { "default": mod };
		};
		Object.defineProperty(exports, "__esModule", { value: true });
		exports.lineByLine = void 0;
		const readline_1 = __importDefault(require("readline"));
		exports.iterateStreamLines = exports.iterateStringLines = exports.splitStringLines = void 0;
		const string_decoder_1 = require("string_decoder");
		/**
		* Split a string at new-line boundaries. Returns an array of lines.
		*
		* @param text String to split into lines.
		* @returns An array of lines.
		*/
		function splitStringLines(text) {
		return text.split(/\r\n\|\n\|\r/);
		}
		exports.splitStringLines = splitStringLines;
		/**
		* Iterate over a string line by line. Each iterated value is a line found within the `text`.
		* This generator always yields at least one value.
		*
		* Use this generator to reduce memory pressure in your code.
		*
		* @param text String to iterate over line by line.
		* @returns Iterator that yields individual lines.
		*/
		function* iterateStringLines(text) {
		const newLinePattern = /\r\n\|\n\|\r/g;
		let match;
		let prevMatchIndex = 0;
		while ((match = newLinePattern.exec(text)) !== null) {
		yield text.slice(prevMatchIndex, match.index);
		prevMatchIndex = newLinePattern.lastIndex;
		}
		yield text.slice(prevMatchIndex);
		}
		exports.iterateStringLines = iterateStringLines;
		/**
		* Reads a stream line by line. Iterate over the iterator using `for await` loop.
		* @param stream Readable stream to read line by line.
		* @param encoding Optional encoding to use when decoding the stream into text. Default: `utf8`
		* @returns Iterator that yields individual lines.
		*/
		function lineByLine(stream) {
		// Store a captured error in a promise
		let setCapturedError;
		const capturedError = new Promise((_resolve, reject) => {
		setCapturedError = reject;
		});
		// Clean up all resources
		let isDestroyed = false;
		const cleanup = () => {
		if (isDestroyed == false) {
		isDestroyed = true;
		stream.off('error', onError);
		stream.off('close', cleanup);
		readLineInterface.off('close', cleanup);
		// Close the readline interface
		readLineInterface.close();
		// Destroy the stream
		const s = stream;
		if (typeof s.destroy === 'function') {
		s.destroy();
		async function* iterateStreamLines(stream, encoding) {
		// Nullable because stream could emit only strings, or alternate between strings and buffers!
		let decoder;
		// Nullable because the stream might be empty and not yield a single chunk!
		let buffer;
		for await (const chunk of stream) {
		// Initialize buffer for the first chunk
		if (buffer == null) {
		buffer = '';
		}
		// Add chunk to the buffer
		if (typeof chunk === 'string') {
		if (decoder) {
		buffer += decoder.end();
		decoder = undefined;
		}
		buffer += chunk;
		}
		};
		const onError = (err) => {
		setCapturedError(err);
		cleanup();
		};
		stream.once('error', onError);
		stream.once('close', cleanup);
		// Use readline package to process the stream
		const readLineInterface = readline_1.default.createInterface({
		input: stream,
		crlfDelay: Infinity,
		});
		readLineInterface.once('close', cleanup);
		const readLineIterator = readLineInterface[Symbol.asyncIterator]();
		// Create our own async iterator that wraps the readline iterator
		const iterator = {
		async next() {
		try {
		return await Promise.race([
		capturedError,
		readLineIterator.next(),
		]);
		else if (Buffer.isBuffer(chunk)) {
		if (!decoder) {
		decoder = new string_decoder_1.StringDecoder(encoding);
		}
		catch (error) {
		onError(error);
		throw error;
		buffer += decoder.write(chunk);
		}
		else {
		throw new Error(`Unsupported chunk type: ${typeof chunk}`);
		}
		// If the buffered text ends with \r, make sure to NOT process that character because it might
		// be part of the \r\n pair, and we might get \n in the next chunk!
		const endsWithCR = buffer.endsWith('\r');
		if (buffer.length > (endsWithCR ? 1 : 0)) {
		if (endsWithCR) {
		buffer = buffer.slice(0, -1);
		}
		},
		// Called when there is a "break;", "throw;" or "return;" in "for await" loop
		// https://262.ecma-international.org/6.0/#sec-iteration
		async return(value) {
		try {
		if (readLineIterator.return != null) {
		return await readLineIterator.return(value);
		let lastLine = undefined;
		// The iterate function always yields at least one value
		for (const line of iterateStringLines(buffer)) {
		if (lastLine != null) {
		yield lastLine;
		}
		else {
		return {
		done: true,
		value,
		};
		}
		lastLine = line;
		}
		finally {
		cleanup();
		// Last line might be incomplete, so we need to continue adding chunks to it
		buffer = lastLine;
		// Put back the \r we took
		if (endsWithCR) {
		buffer += '\r';
		}
		},
		// No idea who ever uses this, but let's cleanup anyway
		async throw(e) {
		try {
		if (readLineIterator.throw != null) {
		return await readLineIterator.throw(e);
		}
		else if (e != null) {
		throw e;
		}
		else {
		return {
		done: true,
		value: undefined,
		};
		}
		}
		finally {
		cleanup();
		}
		},
		// Conform to the only-once iterable protocol:
		// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Iterators_and_Generators#iterables
		[Symbol.asyncIterator]: () => iterator,
		};
		return iterator;
		}
		}
		// Decode final bytes
		if (decoder) {
		buffer += decoder.end();
		decoder = undefined;
		}
		if (buffer != null) {
		yield* iterateStringLines(buffer);
		}
		}
		exports.lineByLine = lineByLine;
		exports.iterateStreamLines = iterateStreamLines;
		//# sourceMappingURL=line-by-line-iterator.js.map

4

dist/line-by-line-stream.d.ts

		@@ -7,5 +7,7 @@ /// <reference types="node" />
		* - `Readable` side: object mode, returns a `string` for each line.
		*
		* @param encoding Optional encoding to use when decoding the stream into text. Default: `utf8`
		* @returns Transform stream that converts a stream of bytes into line strings.
		*/
		export declare function createLineByLineStream(): stream.Transform;
		export declare function createLineByLineStream(encoding?: BufferEncoding): stream.Transform;
		//# sourceMappingURL=line-by-line-stream.d.ts.map

12

dist/line-by-line-stream.js

		@@ -33,5 +33,7 @@ "use strict";
		* - `Readable` side: object mode, returns a `string` for each line.
		*
		* @param encoding Optional encoding to use when decoding the stream into text. Default: `utf8`
		* @returns Transform stream that converts a stream of bytes into line strings.
		*/
		function createLineByLineStream() {
		function createLineByLineStream(encoding) {
		// Used to signal when the iterator requested more data to process
		@@ -41,5 +43,4 @@ const readableEvents = new events_1.default();
		const readable = new stream_1.default.Readable({
		autoDestroy: true,
		objectMode: false,
		highWaterMark: 0,
		encoding,
		read(_size) {
		@@ -53,2 +54,3 @@ readableEvents.emit('resume');
		autoDestroy: true,
		decodeStrings: false,
		transform(chunk, encoding, callback) {
		@@ -69,3 +71,3 @@ (async () => {
		readable.push(null);
		whenDone.then(() => callback());
		whenDone.then(() => callback(), callback);
		},
		@@ -79,3 +81,3 @@ destroy(error, callback) {
		try {
		for await (const line of line_by_line_iterator_1.lineByLine(readable)) {
		for await (const line of line_by_line_iterator_1.iterateStreamLines(readable, encoding)) {
		if (transform.destroyed) {
		@@ -82,0 +84,0 @@ break;

24

package.json

		{
		"name": "@mangosteen/line-by-line",
		"version": "1.0.0",
		"description": "Read stream line by line using async iterator or object-mode stream transfrom.",
		"version": "2.0.0",
		"description": "Read stream line by line using async iterator or object-mode stream transform.",
		"main": "dist/index.js",
		@@ -22,2 +22,3 @@ "types": "dist/index.d.ts",
		"async",
		"iterable",
		"iterator",
		@@ -29,5 +30,17 @@ "generator",
		"line-by-line",
		"lines",
		"string",
		"text",
		"cr",
		"lf",
		"crlf",
		"csv",
		"parse",
		"parser"
		"parser",
		"split",
		"line by line",
		"read line",
		"reader",
		"line reader",
		"encoding"
		],
		@@ -42,7 +55,8 @@ "author": "Paya",
		"@types/jest": "^26.0.24",
		"@types/node": "^14.17.5",
		"@types/node": "^14.17.10",
		"jest": "^27.0.6",
		"ts-jest": "^27.0.4",
		"ts-jest": "^27.0.5",
		"ts-node": "^10.2.1",
		"typescript": "^4.3.5"
		}
		}

88

README.md

		@@ -6,2 +6,5 @@ # line-by-line

		Initially, `line-by-line` used `readline` package internally, but because of its shortcomings
		(inability to specify encoding), it now implements a custom line-reading algorithm.

		# Why not just use the built-in `readline` package?
		@@ -12,3 +15,3 @@

		Our package internally uses `readline`, but wraps it in a way to fix the above shortcomings.
		Additionally, `readline` is hard-coded to `utf8` encoding, so you cannot use it with other encodings.

		@@ -21,14 +24,59 @@ # Installation

		# Usage (iterator)
		# Usage (string array)

		```js
		import fs from 'fs/promises';
		import { splitStringLines } from '@mangosteen/line-by-line';

		(async () => {
		const fileBuffer: Buffer = await fs.readFile('./shakespeare.txt');
		const text: string = fileBuffer.toString('utf8');
		const lines: string[] = splitStringLines(text);

		for (const line of lines) {
		console.log('Line:', line);
		}
		})();
		```

		`splitStringLines` splits the text into lines array.

		This may potentially consume a lot of memory, because at one point you need to hold both
		the entire input string and the entire output array of lines. Thus, we generally recommend
		using below functions instead.

		# Usage (string iterator)

		```js
		import fs from 'fs/promises';
		import { iterateStringLines } from '@mangosteen/line-by-line';

		(async () => {
		const fileBuffer: Buffer = await fs.readFile('./shakespeare.txt');
		const text: string = fileBuffer.toString('utf8');
		const iterator: Iterable<string> = iterateStringLines(text);

		for (const line of iterator) {
		console.log('Line:', line);
		}
		})();
		```

		`iterateStringLines` is a generator function that lazily yields lines one by one.

		You still need to hold the entire input string in memory, but the output lines can
		be processed efficiently.

		# Usage (stream iterator)

		```js
		import fs from 'fs';
		import { lineByLine } from '@mangosteen/line-by-line';
		import { iterateStreamLines } from '@mangosteen/line-by-line';

		(async () => {
		const inputStream = fs.createReadStream('./shakespeare.txt');
		const iterator: AsyncIterable<string> = iterateStreamLines(inputStream, 'utf8');

		for await (const line of lineByLine(inputStream)) {
		for await (const line of iterator) {
		console.log('Line:', line);
		console.log(typeof line); // 'string'
		}
		@@ -38,9 +86,17 @@ })();

		The iterator automatically closes and destroys the input stream, and fully propagates input stream errors.
		When you `break`, `return` or `throw` from within the `for await` loop, everything gets cleaned up automatically.
		Errors thrown by the stream work the same way.
		`iterateStreamLines` is async generator function that lazily yields lines one by one.

		You cannot reuse the same input stream for multiple `for await` loops or multiple `lineByLine` iterators,
		because everything gets cleaned up automatically.
		This is the most efficient method of reading lines. The input is a stream and can be
		processed on-demand. The output is generated on-demand as well, one line at a time.

		When the stream iterator returned by `iterateStreamLines` is consumed (via `for await`),
		it will automatically close and destroy the input stream, and fully propagate input stream
		errors. You won't need to do anything more to clean up the input stream.

		When you `break`, `return` or `throw` from within the `for await` loop, everything gets
		cleaned up automatically. Errors thrown by the stream work the same way.

		You cannot reuse the same input stream for multiple `for await` loops or multiple `lineByLine`
		iterators, because everything gets cleaned up automatically.

		# Usage (transform stream)
		@@ -59,3 +115,3 @@
		fs.createReadStream('./shakespeare.txt'),
		createLineByLineStream(),
		createLineByLineStream('utf8'),
		createSinkStream(),
		@@ -69,5 +125,4 @@ );
		highWaterMark: 0,
		write(chunk, _encoding, callback): void {
		console.log('Line:', chunk);
		console.log(typeof chunk); // 'string'
		write(line: string, _encoding, callback): void {
		console.log('Line:', line);
		callback();
		@@ -79,3 +134,4 @@ },

		The `createLineByLineStream` transform stream `Writable` side expects a standard non-`objectMode` stream.
		The `Readable` side runs in an `objectMode`, where each object is a line `string`.
		The `createLineByLineStream` transform stream's `Writable` side expects a standard non-`objectMode` stream.
		The `Readable` side runs in an `objectMode`, where each object is a line `string`.
		You can specify an `encoding` to decode any `Buffer`s the transform stream receives.

.vscode/settings.json

dist/example/example-iterator.d.ts

dist/example/example-iterator.d.ts.map

dist/example/example-iterator.js

dist/example/example-iterator.js.map

dist/example/example-transform.d.ts

dist/example/example-transform.d.ts.map

dist/example/example-transform.js

dist/example/example-transform.js.map

dist/test/line-by-line-iterator.spec.d.ts

dist/test/line-by-line-iterator.spec.d.ts.map

dist/test/line-by-line-iterator.spec.js

dist/test/line-by-line-iterator.spec.js.map

dist/test/line-by-line-stream.spec.d.ts

dist/test/line-by-line-stream.spec.d.ts.map

dist/test/line-by-line-stream.spec.js

dist/test/line-by-line-stream.spec.js.map

dist/test/spec-utils.d.ts

dist/test/spec-utils.d.ts.map

dist/test/spec-utils.js

dist/test/spec-utils.js.map

jest.config.js

lib/example/example-iterator.ts

lib/example/example-transform.ts

lib/index.ts

lib/line-by-line-iterator.ts

lib/line-by-line-stream.ts

lib/test/line-by-line-iterator.spec.ts

lib/test/line-by-line-stream.spec.ts

lib/test/spec-utils.ts

tsconfig.json

dist/line-by-line-iterator.d.ts.map

Sorry, the diff of this file is not supported yet

dist/line-by-line-iterator.js.map

Sorry, the diff of this file is not supported yet

dist/line-by-line-stream.d.ts.map

Sorry, the diff of this file is not supported yet

dist/line-by-line-stream.js.map

Sorry, the diff of this file is not supported yet

@mangosteen/line-by-line - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics