Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

@mangosteen/line-by-line

Package Overview
Dependencies
Maintainers
1
Versions
2
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@mangosteen/line-by-line - npm Package Compare versions

Comparing version 1.0.0 to 2.0.0

20

dist/line-by-line-iterator.d.ts
/// <reference types="node" />
/**
* Split a string at new-line boundaries. Returns an array of lines.
*
* @param text String to split into lines.
* @returns An array of lines.
*/
export declare function splitStringLines(text: string): string[];
/**
* Iterate over a string line by line. Each iterated value is a line found within the `text`.
* This generator always yields at least one value.
*
* Use this generator to reduce memory pressure in your code.
*
* @param text String to iterate over line by line.
* @returns Iterator that yields individual lines.
*/
export declare function iterateStringLines(text: string): Generator<string, void, void>;
/**
* Reads a stream line by line. Iterate over the iterator using `for await` loop.
* @param stream Readable stream to read line by line.
* @param encoding Optional encoding to use when decoding the stream into text. Default: `utf8`
* @returns Iterator that yields individual lines.
*/
export declare function lineByLine(stream: NodeJS.ReadableStream): AsyncIterableIterator<string>;
export declare function iterateStreamLines(stream: NodeJS.ReadableStream, encoding?: BufferEncoding): AsyncGenerator<string, void, void>;
//# sourceMappingURL=line-by-line-iterator.d.ts.map

177

dist/line-by-line-iterator.js
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.lineByLine = void 0;
const readline_1 = __importDefault(require("readline"));
exports.iterateStreamLines = exports.iterateStringLines = exports.splitStringLines = void 0;
const string_decoder_1 = require("string_decoder");
/**
* Split a string at new-line boundaries. Returns an array of lines.
*
* @param text String to split into lines.
* @returns An array of lines.
*/
function splitStringLines(text) {
return text.split(/\r\n|\n|\r/);
}
exports.splitStringLines = splitStringLines;
/**
* Iterate over a string line by line. Each iterated value is a line found within the `text`.
* This generator always yields at least one value.
*
* Use this generator to reduce memory pressure in your code.
*
* @param text String to iterate over line by line.
* @returns Iterator that yields individual lines.
*/
function* iterateStringLines(text) {
const newLinePattern = /\r\n|\n|\r/g;
let match;
let prevMatchIndex = 0;
while ((match = newLinePattern.exec(text)) !== null) {
yield text.slice(prevMatchIndex, match.index);
prevMatchIndex = newLinePattern.lastIndex;
}
yield text.slice(prevMatchIndex);
}
exports.iterateStringLines = iterateStringLines;
/**
* Reads a stream line by line. Iterate over the iterator using `for await` loop.
* @param stream Readable stream to read line by line.
* @param encoding Optional encoding to use when decoding the stream into text. Default: `utf8`
* @returns Iterator that yields individual lines.
*/
function lineByLine(stream) {
// Store a captured error in a promise
let setCapturedError;
const capturedError = new Promise((_resolve, reject) => {
setCapturedError = reject;
});
// Clean up all resources
let isDestroyed = false;
const cleanup = () => {
if (isDestroyed == false) {
isDestroyed = true;
stream.off('error', onError);
stream.off('close', cleanup);
readLineInterface.off('close', cleanup);
// Close the readline interface
readLineInterface.close();
// Destroy the stream
const s = stream;
if (typeof s.destroy === 'function') {
s.destroy();
async function* iterateStreamLines(stream, encoding) {
// Nullable because stream could emit only strings, or alternate between strings and buffers!
let decoder;
// Nullable because the stream might be empty and not yield a single chunk!
let buffer;
for await (const chunk of stream) {
// Initialize buffer for the first chunk
if (buffer == null) {
buffer = '';
}
// Add chunk to the buffer
if (typeof chunk === 'string') {
if (decoder) {
buffer += decoder.end();
decoder = undefined;
}
buffer += chunk;
}
};
const onError = (err) => {
setCapturedError(err);
cleanup();
};
stream.once('error', onError);
stream.once('close', cleanup);
// Use readline package to process the stream
const readLineInterface = readline_1.default.createInterface({
input: stream,
crlfDelay: Infinity,
});
readLineInterface.once('close', cleanup);
const readLineIterator = readLineInterface[Symbol.asyncIterator]();
// Create our own async iterator that wraps the readline iterator
const iterator = {
async next() {
try {
return await Promise.race([
capturedError,
readLineIterator.next(),
]);
else if (Buffer.isBuffer(chunk)) {
if (!decoder) {
decoder = new string_decoder_1.StringDecoder(encoding);
}
catch (error) {
onError(error);
throw error;
buffer += decoder.write(chunk);
}
else {
throw new Error(`Unsupported chunk type: ${typeof chunk}`);
}
// If the buffered text ends with \r, make sure to NOT process that character because it might
// be part of the \r\n pair, and we might get \n in the next chunk!
const endsWithCR = buffer.endsWith('\r');
if (buffer.length > (endsWithCR ? 1 : 0)) {
if (endsWithCR) {
buffer = buffer.slice(0, -1);
}
},
// Called when there is a "break;", "throw;" or "return;" in "for await" loop
// https://262.ecma-international.org/6.0/#sec-iteration
async return(value) {
try {
if (readLineIterator.return != null) {
return await readLineIterator.return(value);
let lastLine = undefined;
// The iterate function always yields at least one value
for (const line of iterateStringLines(buffer)) {
if (lastLine != null) {
yield lastLine;
}
else {
return {
done: true,
value,
};
}
lastLine = line;
}
finally {
cleanup();
// Last line might be incomplete, so we need to continue adding chunks to it
buffer = lastLine;
// Put back the \r we took
if (endsWithCR) {
buffer += '\r';
}
},
// No idea who ever uses this, but let's cleanup anyway
async throw(e) {
try {
if (readLineIterator.throw != null) {
return await readLineIterator.throw(e);
}
else if (e != null) {
throw e;
}
else {
return {
done: true,
value: undefined,
};
}
}
finally {
cleanup();
}
},
// Conform to the only-once iterable protocol:
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Iterators_and_Generators#iterables
[Symbol.asyncIterator]: () => iterator,
};
return iterator;
}
}
// Decode final bytes
if (decoder) {
buffer += decoder.end();
decoder = undefined;
}
if (buffer != null) {
yield* iterateStringLines(buffer);
}
}
exports.lineByLine = lineByLine;
exports.iterateStreamLines = iterateStreamLines;
//# sourceMappingURL=line-by-line-iterator.js.map

@@ -7,5 +7,7 @@ /// <reference types="node" />

* - `Readable` side: object mode, returns a `string` for each line.
*
* @param encoding Optional encoding to use when decoding the stream into text. Default: `utf8`
* @returns Transform stream that converts a stream of bytes into line strings.
*/
export declare function createLineByLineStream(): stream.Transform;
export declare function createLineByLineStream(encoding?: BufferEncoding): stream.Transform;
//# sourceMappingURL=line-by-line-stream.d.ts.map

@@ -33,5 +33,7 @@ "use strict";

* - `Readable` side: object mode, returns a `string` for each line.
*
* @param encoding Optional encoding to use when decoding the stream into text. Default: `utf8`
* @returns Transform stream that converts a stream of bytes into line strings.
*/
function createLineByLineStream() {
function createLineByLineStream(encoding) {
// Used to signal when the iterator requested more data to process

@@ -41,5 +43,4 @@ const readableEvents = new events_1.default();

const readable = new stream_1.default.Readable({
autoDestroy: true,
objectMode: false,
highWaterMark: 0,
encoding,
read(_size) {

@@ -53,2 +54,3 @@ readableEvents.emit('resume');

autoDestroy: true,
decodeStrings: false,
transform(chunk, encoding, callback) {

@@ -69,3 +71,3 @@ (async () => {

readable.push(null);
whenDone.then(() => callback());
whenDone.then(() => callback(), callback);
},

@@ -79,3 +81,3 @@ destroy(error, callback) {

try {
for await (const line of line_by_line_iterator_1.lineByLine(readable)) {
for await (const line of line_by_line_iterator_1.iterateStreamLines(readable, encoding)) {
if (transform.destroyed) {

@@ -82,0 +84,0 @@ break;

{
"name": "@mangosteen/line-by-line",
"version": "1.0.0",
"description": "Read stream line by line using async iterator or object-mode stream transfrom.",
"version": "2.0.0",
"description": "Read stream line by line using async iterator or object-mode stream transform.",
"main": "dist/index.js",

@@ -22,2 +22,3 @@ "types": "dist/index.d.ts",

"async",
"iterable",
"iterator",

@@ -29,5 +30,17 @@ "generator",

"line-by-line",
"lines",
"string",
"text",
"cr",
"lf",
"crlf",
"csv",
"parse",
"parser"
"parser",
"split",
"line by line",
"read line",
"reader",
"line reader",
"encoding"
],

@@ -42,7 +55,8 @@ "author": "Paya",

"@types/jest": "^26.0.24",
"@types/node": "^14.17.5",
"@types/node": "^14.17.10",
"jest": "^27.0.6",
"ts-jest": "^27.0.4",
"ts-jest": "^27.0.5",
"ts-node": "^10.2.1",
"typescript": "^4.3.5"
}
}

@@ -6,2 +6,5 @@ # line-by-line

Initially, `line-by-line` used `readline` package internally, but because of its shortcomings
(inability to specify encoding), it now implements a custom line-reading algorithm.
# Why not just use the built-in `readline` package?

@@ -12,3 +15,3 @@

Our package internally uses `readline`, but wraps it in a way to fix the above shortcomings.
Additionally, `readline` is hard-coded to `utf8` encoding, so you cannot use it with other encodings.

@@ -21,14 +24,59 @@ # Installation

# Usage (iterator)
# Usage (string array)
```js
import fs from 'fs/promises';
import { splitStringLines } from '@mangosteen/line-by-line';
(async () => {
const fileBuffer: Buffer = await fs.readFile('./shakespeare.txt');
const text: string = fileBuffer.toString('utf8');
const lines: string[] = splitStringLines(text);
for (const line of lines) {
console.log('Line:', line);
}
})();
```
`splitStringLines` splits the text into lines array.
This may potentially consume a lot of memory, because at one point you need to hold both
the entire input string and the entire output array of lines. Thus, we generally recommend
using below functions instead.
# Usage (string iterator)
```js
import fs from 'fs/promises';
import { iterateStringLines } from '@mangosteen/line-by-line';
(async () => {
const fileBuffer: Buffer = await fs.readFile('./shakespeare.txt');
const text: string = fileBuffer.toString('utf8');
const iterator: Iterable<string> = iterateStringLines(text);
for (const line of iterator) {
console.log('Line:', line);
}
})();
```
`iterateStringLines` is a generator function that lazily yields lines one by one.
You still need to hold the entire input string in memory, but the output lines can
be processed efficiently.
# Usage (stream iterator)
```js
import fs from 'fs';
import { lineByLine } from '@mangosteen/line-by-line';
import { iterateStreamLines } from '@mangosteen/line-by-line';
(async () => {
const inputStream = fs.createReadStream('./shakespeare.txt');
const iterator: AsyncIterable<string> = iterateStreamLines(inputStream, 'utf8');
for await (const line of lineByLine(inputStream)) {
for await (const line of iterator) {
console.log('Line:', line);
console.log(typeof line); // 'string'
}

@@ -38,9 +86,17 @@ })();

The iterator automatically closes and destroys the input stream, and fully propagates input stream errors.
When you `break`, `return` or `throw` from within the `for await` loop, everything gets cleaned up automatically.
Errors thrown by the stream work the same way.
`iterateStreamLines` is async generator function that lazily yields lines one by one.
You cannot reuse the same input stream for multiple `for await` loops or multiple `lineByLine` iterators,
because everything gets cleaned up automatically.
This is the most efficient method of reading lines. The input is a stream and can be
processed on-demand. The output is generated on-demand as well, one line at a time.
When the stream iterator returned by `iterateStreamLines` is consumed (via `for await`),
it will automatically close and destroy the input stream, and fully propagate input stream
errors. You won't need to do anything more to clean up the input stream.
When you `break`, `return` or `throw` from within the `for await` loop, everything gets
cleaned up automatically. Errors thrown by the stream work the same way.
You cannot reuse the same input stream for multiple `for await` loops or multiple `lineByLine`
iterators, because everything gets cleaned up automatically.
# Usage (transform stream)

@@ -59,3 +115,3 @@

fs.createReadStream('./shakespeare.txt'),
createLineByLineStream(),
createLineByLineStream('utf8'),
createSinkStream(),

@@ -69,5 +125,4 @@ );

highWaterMark: 0,
write(chunk, _encoding, callback): void {
console.log('Line:', chunk);
console.log(typeof chunk); // 'string'
write(line: string, _encoding, callback): void {
console.log('Line:', line);
callback();

@@ -79,3 +134,4 @@ },

The `createLineByLineStream` transform stream `Writable` side expects a standard non-`objectMode` stream.
The `Readable` side runs in an `objectMode`, where each object is a line `string`.
The `createLineByLineStream` transform stream's `Writable` side expects a standard non-`objectMode` stream.
The `Readable` side runs in an `objectMode`, where each object is a line `string`.
You can specify an `encoding` to decode any `Buffer`s the transform stream receives.

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc