@mangosteen/line-by-line
Advanced tools
Comparing version 1.0.0 to 2.0.0
/// <reference types="node" /> | ||
/** | ||
* Split a string at new-line boundaries. Returns an array of lines. | ||
* | ||
* @param text String to split into lines. | ||
* @returns An array of lines. | ||
*/ | ||
export declare function splitStringLines(text: string): string[]; | ||
/** | ||
* Iterate over a string line by line. Each iterated value is a line found within the `text`. | ||
* This generator always yields at least one value. | ||
* | ||
* Use this generator to reduce memory pressure in your code. | ||
* | ||
* @param text String to iterate over line by line. | ||
* @returns Iterator that yields individual lines. | ||
*/ | ||
export declare function iterateStringLines(text: string): Generator<string, void, void>; | ||
/** | ||
* Reads a stream line by line. Iterate over the iterator using `for await` loop. | ||
* @param stream Readable stream to read line by line. | ||
* @param encoding Optional encoding to use when decoding the stream into text. Default: `utf8` | ||
* @returns Iterator that yields individual lines. | ||
*/ | ||
export declare function lineByLine(stream: NodeJS.ReadableStream): AsyncIterableIterator<string>; | ||
export declare function iterateStreamLines(stream: NodeJS.ReadableStream, encoding?: BufferEncoding): AsyncGenerator<string, void, void>; | ||
//# sourceMappingURL=line-by-line-iterator.d.ts.map |
"use strict"; | ||
var __importDefault = (this && this.__importDefault) || function (mod) { | ||
return (mod && mod.__esModule) ? mod : { "default": mod }; | ||
}; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.lineByLine = void 0; | ||
const readline_1 = __importDefault(require("readline")); | ||
exports.iterateStreamLines = exports.iterateStringLines = exports.splitStringLines = void 0; | ||
const string_decoder_1 = require("string_decoder"); | ||
/** | ||
* Split a string at new-line boundaries. Returns an array of lines. | ||
* | ||
* @param text String to split into lines. | ||
* @returns An array of lines. | ||
*/ | ||
function splitStringLines(text) { | ||
return text.split(/\r\n|\n|\r/); | ||
} | ||
exports.splitStringLines = splitStringLines; | ||
/** | ||
* Iterate over a string line by line. Each iterated value is a line found within the `text`. | ||
* This generator always yields at least one value. | ||
* | ||
* Use this generator to reduce memory pressure in your code. | ||
* | ||
* @param text String to iterate over line by line. | ||
* @returns Iterator that yields individual lines. | ||
*/ | ||
function* iterateStringLines(text) { | ||
const newLinePattern = /\r\n|\n|\r/g; | ||
let match; | ||
let prevMatchIndex = 0; | ||
while ((match = newLinePattern.exec(text)) !== null) { | ||
yield text.slice(prevMatchIndex, match.index); | ||
prevMatchIndex = newLinePattern.lastIndex; | ||
} | ||
yield text.slice(prevMatchIndex); | ||
} | ||
exports.iterateStringLines = iterateStringLines; | ||
/** | ||
* Reads a stream line by line. Iterate over the iterator using `for await` loop. | ||
* @param stream Readable stream to read line by line. | ||
* @param encoding Optional encoding to use when decoding the stream into text. Default: `utf8` | ||
* @returns Iterator that yields individual lines. | ||
*/ | ||
function lineByLine(stream) { | ||
// Store a captured error in a promise | ||
let setCapturedError; | ||
const capturedError = new Promise((_resolve, reject) => { | ||
setCapturedError = reject; | ||
}); | ||
// Clean up all resources | ||
let isDestroyed = false; | ||
const cleanup = () => { | ||
if (isDestroyed == false) { | ||
isDestroyed = true; | ||
stream.off('error', onError); | ||
stream.off('close', cleanup); | ||
readLineInterface.off('close', cleanup); | ||
// Close the readline interface | ||
readLineInterface.close(); | ||
// Destroy the stream | ||
const s = stream; | ||
if (typeof s.destroy === 'function') { | ||
s.destroy(); | ||
async function* iterateStreamLines(stream, encoding) { | ||
// Nullable because stream could emit only strings, or alternate between strings and buffers! | ||
let decoder; | ||
// Nullable because the stream might be empty and not yield a single chunk! | ||
let buffer; | ||
for await (const chunk of stream) { | ||
// Initialize buffer for the first chunk | ||
if (buffer == null) { | ||
buffer = ''; | ||
} | ||
// Add chunk to the buffer | ||
if (typeof chunk === 'string') { | ||
if (decoder) { | ||
buffer += decoder.end(); | ||
decoder = undefined; | ||
} | ||
buffer += chunk; | ||
} | ||
}; | ||
const onError = (err) => { | ||
setCapturedError(err); | ||
cleanup(); | ||
}; | ||
stream.once('error', onError); | ||
stream.once('close', cleanup); | ||
// Use readline package to process the stream | ||
const readLineInterface = readline_1.default.createInterface({ | ||
input: stream, | ||
crlfDelay: Infinity, | ||
}); | ||
readLineInterface.once('close', cleanup); | ||
const readLineIterator = readLineInterface[Symbol.asyncIterator](); | ||
// Create our own async iterator that wraps the readline iterator | ||
const iterator = { | ||
async next() { | ||
try { | ||
return await Promise.race([ | ||
capturedError, | ||
readLineIterator.next(), | ||
]); | ||
else if (Buffer.isBuffer(chunk)) { | ||
if (!decoder) { | ||
decoder = new string_decoder_1.StringDecoder(encoding); | ||
} | ||
catch (error) { | ||
onError(error); | ||
throw error; | ||
buffer += decoder.write(chunk); | ||
} | ||
else { | ||
throw new Error(`Unsupported chunk type: ${typeof chunk}`); | ||
} | ||
// If the buffered text ends with \r, make sure to NOT process that character because it might | ||
// be part of the \r\n pair, and we might get \n in the next chunk! | ||
const endsWithCR = buffer.endsWith('\r'); | ||
if (buffer.length > (endsWithCR ? 1 : 0)) { | ||
if (endsWithCR) { | ||
buffer = buffer.slice(0, -1); | ||
} | ||
}, | ||
// Called when there is a "break;", "throw;" or "return;" in "for await" loop | ||
// https://262.ecma-international.org/6.0/#sec-iteration | ||
async return(value) { | ||
try { | ||
if (readLineIterator.return != null) { | ||
return await readLineIterator.return(value); | ||
let lastLine = undefined; | ||
// The iterate function always yields at least one value | ||
for (const line of iterateStringLines(buffer)) { | ||
if (lastLine != null) { | ||
yield lastLine; | ||
} | ||
else { | ||
return { | ||
done: true, | ||
value, | ||
}; | ||
} | ||
lastLine = line; | ||
} | ||
finally { | ||
cleanup(); | ||
// Last line might be incomplete, so we need to continue adding chunks to it | ||
buffer = lastLine; | ||
// Put back the \r we took | ||
if (endsWithCR) { | ||
buffer += '\r'; | ||
} | ||
}, | ||
// No idea who ever uses this, but let's cleanup anyway | ||
async throw(e) { | ||
try { | ||
if (readLineIterator.throw != null) { | ||
return await readLineIterator.throw(e); | ||
} | ||
else if (e != null) { | ||
throw e; | ||
} | ||
else { | ||
return { | ||
done: true, | ||
value: undefined, | ||
}; | ||
} | ||
} | ||
finally { | ||
cleanup(); | ||
} | ||
}, | ||
// Conform to the only-once iterable protocol: | ||
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Iterators_and_Generators#iterables | ||
[Symbol.asyncIterator]: () => iterator, | ||
}; | ||
return iterator; | ||
} | ||
} | ||
// Decode final bytes | ||
if (decoder) { | ||
buffer += decoder.end(); | ||
decoder = undefined; | ||
} | ||
if (buffer != null) { | ||
yield* iterateStringLines(buffer); | ||
} | ||
} | ||
exports.lineByLine = lineByLine; | ||
exports.iterateStreamLines = iterateStreamLines; | ||
//# sourceMappingURL=line-by-line-iterator.js.map |
@@ -7,5 +7,7 @@ /// <reference types="node" /> | ||
* - `Readable` side: object mode, returns a `string` for each line. | ||
* | ||
* @param encoding Optional encoding to use when decoding the stream into text. Default: `utf8` | ||
* @returns Transform stream that converts a stream of bytes into line strings. | ||
*/ | ||
export declare function createLineByLineStream(): stream.Transform; | ||
export declare function createLineByLineStream(encoding?: BufferEncoding): stream.Transform; | ||
//# sourceMappingURL=line-by-line-stream.d.ts.map |
@@ -33,5 +33,7 @@ "use strict"; | ||
* - `Readable` side: object mode, returns a `string` for each line. | ||
* | ||
* @param encoding Optional encoding to use when decoding the stream into text. Default: `utf8` | ||
* @returns Transform stream that converts a stream of bytes into line strings. | ||
*/ | ||
function createLineByLineStream() { | ||
function createLineByLineStream(encoding) { | ||
// Used to signal when the iterator requested more data to process | ||
@@ -41,5 +43,4 @@ const readableEvents = new events_1.default(); | ||
const readable = new stream_1.default.Readable({ | ||
autoDestroy: true, | ||
objectMode: false, | ||
highWaterMark: 0, | ||
encoding, | ||
read(_size) { | ||
@@ -53,2 +54,3 @@ readableEvents.emit('resume'); | ||
autoDestroy: true, | ||
decodeStrings: false, | ||
transform(chunk, encoding, callback) { | ||
@@ -69,3 +71,3 @@ (async () => { | ||
readable.push(null); | ||
whenDone.then(() => callback()); | ||
whenDone.then(() => callback(), callback); | ||
}, | ||
@@ -79,3 +81,3 @@ destroy(error, callback) { | ||
try { | ||
for await (const line of line_by_line_iterator_1.lineByLine(readable)) { | ||
for await (const line of line_by_line_iterator_1.iterateStreamLines(readable, encoding)) { | ||
if (transform.destroyed) { | ||
@@ -82,0 +84,0 @@ break; |
{ | ||
"name": "@mangosteen/line-by-line", | ||
"version": "1.0.0", | ||
"description": "Read stream line by line using async iterator or object-mode stream transfrom.", | ||
"version": "2.0.0", | ||
"description": "Read stream line by line using async iterator or object-mode stream transform.", | ||
"main": "dist/index.js", | ||
@@ -22,2 +22,3 @@ "types": "dist/index.d.ts", | ||
"async", | ||
"iterable", | ||
"iterator", | ||
@@ -29,5 +30,17 @@ "generator", | ||
"line-by-line", | ||
"lines", | ||
"string", | ||
"text", | ||
"cr", | ||
"lf", | ||
"crlf", | ||
"csv", | ||
"parse", | ||
"parser" | ||
"parser", | ||
"split", | ||
"line by line", | ||
"read line", | ||
"reader", | ||
"line reader", | ||
"encoding" | ||
], | ||
@@ -42,7 +55,8 @@ "author": "Paya", | ||
"@types/jest": "^26.0.24", | ||
"@types/node": "^14.17.5", | ||
"@types/node": "^14.17.10", | ||
"jest": "^27.0.6", | ||
"ts-jest": "^27.0.4", | ||
"ts-jest": "^27.0.5", | ||
"ts-node": "^10.2.1", | ||
"typescript": "^4.3.5" | ||
} | ||
} |
@@ -6,2 +6,5 @@ # line-by-line | ||
Initially, `line-by-line` used `readline` package internally, but because of its shortcomings | ||
(inability to specify encoding), it now implements a custom line-reading algorithm. | ||
# Why not just use the built-in `readline` package? | ||
@@ -12,3 +15,3 @@ | ||
Our package internally uses `readline`, but wraps it in a way to fix the above shortcomings. | ||
Additionally, `readline` is hard-coded to `utf8` encoding, so you cannot use it with other encodings. | ||
@@ -21,14 +24,59 @@ # Installation | ||
# Usage (iterator) | ||
# Usage (string array) | ||
```js | ||
import fs from 'fs/promises'; | ||
import { splitStringLines } from '@mangosteen/line-by-line'; | ||
(async () => { | ||
const fileBuffer: Buffer = await fs.readFile('./shakespeare.txt'); | ||
const text: string = fileBuffer.toString('utf8'); | ||
const lines: string[] = splitStringLines(text); | ||
for (const line of lines) { | ||
console.log('Line:', line); | ||
} | ||
})(); | ||
``` | ||
`splitStringLines` splits the text into lines array. | ||
This may potentially consume a lot of memory, because at one point you need to hold both | ||
the entire input string and the entire output array of lines. Thus, we generally recommend | ||
using below functions instead. | ||
# Usage (string iterator) | ||
```js | ||
import fs from 'fs/promises'; | ||
import { iterateStringLines } from '@mangosteen/line-by-line'; | ||
(async () => { | ||
const fileBuffer: Buffer = await fs.readFile('./shakespeare.txt'); | ||
const text: string = fileBuffer.toString('utf8'); | ||
const iterator: Iterable<string> = iterateStringLines(text); | ||
for (const line of iterator) { | ||
console.log('Line:', line); | ||
} | ||
})(); | ||
``` | ||
`iterateStringLines` is a generator function that lazily yields lines one by one. | ||
You still need to hold the entire input string in memory, but the output lines can | ||
be processed efficiently. | ||
# Usage (stream iterator) | ||
```js | ||
import fs from 'fs'; | ||
import { lineByLine } from '@mangosteen/line-by-line'; | ||
import { iterateStreamLines } from '@mangosteen/line-by-line'; | ||
(async () => { | ||
const inputStream = fs.createReadStream('./shakespeare.txt'); | ||
const iterator: AsyncIterable<string> = iterateStreamLines(inputStream, 'utf8'); | ||
for await (const line of lineByLine(inputStream)) { | ||
for await (const line of iterator) { | ||
console.log('Line:', line); | ||
console.log(typeof line); // 'string' | ||
} | ||
@@ -38,9 +86,17 @@ })(); | ||
The iterator automatically closes and destroys the input stream, and fully propagates input stream errors. | ||
When you `break`, `return` or `throw` from within the `for await` loop, everything gets cleaned up automatically. | ||
Errors thrown by the stream work the same way. | ||
`iterateStreamLines` is async generator function that lazily yields lines one by one. | ||
You cannot reuse the same input stream for multiple `for await` loops or multiple `lineByLine` iterators, | ||
because everything gets cleaned up automatically. | ||
This is the most efficient method of reading lines. The input is a stream and can be | ||
processed on-demand. The output is generated on-demand as well, one line at a time. | ||
When the stream iterator returned by `iterateStreamLines` is consumed (via `for await`), | ||
it will automatically close and destroy the input stream, and fully propagate input stream | ||
errors. You won't need to do anything more to clean up the input stream. | ||
When you `break`, `return` or `throw` from within the `for await` loop, everything gets | ||
cleaned up automatically. Errors thrown by the stream work the same way. | ||
You cannot reuse the same input stream for multiple `for await` loops or multiple `lineByLine` | ||
iterators, because everything gets cleaned up automatically. | ||
# Usage (transform stream) | ||
@@ -59,3 +115,3 @@ | ||
fs.createReadStream('./shakespeare.txt'), | ||
createLineByLineStream(), | ||
createLineByLineStream('utf8'), | ||
createSinkStream(), | ||
@@ -69,5 +125,4 @@ ); | ||
highWaterMark: 0, | ||
write(chunk, _encoding, callback): void { | ||
console.log('Line:', chunk); | ||
console.log(typeof chunk); // 'string' | ||
write(line: string, _encoding, callback): void { | ||
console.log('Line:', line); | ||
callback(); | ||
@@ -79,3 +134,4 @@ }, | ||
The `createLineByLineStream` transform stream `Writable` side expects a standard non-`objectMode` stream. | ||
The `Readable` side runs in an `objectMode`, where each object is a line `string`. | ||
The `createLineByLineStream` transform stream's `Writable` side expects a standard non-`objectMode` stream. | ||
The `Readable` side runs in an `objectMode`, where each object is a line `string`. | ||
You can specify an `encoding` to decode any `Buffer`s the transform stream receives. |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Filesystem access
Supply chain riskAccesses the file system, and could potentially read sensitive data.
Found 1 instance in 1 package
130
0
21728
6
15
247
1