@quadient/evolve-data-transformations
Advanced tools
Comparing version 0.0.6 to 0.0.7
export * from "./csv/index"; | ||
export * from "./json/index"; | ||
export * from "./xml/index"; | ||
export * from "./utils/index"; |
export * from "./csv/index"; | ||
export * from "./json/index"; | ||
export * from "./xml/index"; | ||
export * from "./utils/index"; |
@@ -0,1 +1,2 @@ | ||
export * from "./json-common"; | ||
export * from "./json-writer"; | ||
@@ -6,1 +7,2 @@ export * from "./json-parser"; | ||
export * from "./string-to-json-transform-stream"; | ||
export * from "./json-materializing-transform-stream"; |
@@ -0,1 +1,2 @@ | ||
export * from "./json-common"; | ||
export * from "./json-writer"; | ||
@@ -6,1 +7,2 @@ export * from "./json-parser"; | ||
export * from "./string-to-json-transform-stream"; | ||
export * from "./json-materializing-transform-stream"; |
@@ -1,3 +0,34 @@ | ||
import { JsonEvent } from "./json-writer"; | ||
import { JsonEvent } from "./json-common"; | ||
import { JsonParserCallback } from "./json-parser"; | ||
export interface MaterializerOptions { | ||
materializedPaths: string[]; | ||
} | ||
/** | ||
* Helper class that consumes JsonEvent objects and produces also JsonEvents, but it can | ||
*/ | ||
export declare class JsonMaterializer { | ||
private readonly path; | ||
private readonly currentMaterializedValue; | ||
private readonly materializePathRegexps; | ||
private readonly callback; | ||
private state; | ||
constructor(callback: (_: JsonEvent) => any, options: MaterializerOptions); | ||
write(event: JsonEvent): any; | ||
flush(): void; | ||
} | ||
/** | ||
* Partially streaming Parser. Basically it parses input and produces events for the JSON tokens, but it can | ||
* materialize the whole objects and produce special events with them for only specific parts of the json input. | ||
*/ | ||
export declare class JsonMaterializingParser { | ||
private readonly parser; | ||
private readonly materializer; | ||
constructor(callback: JsonParserCallback, options: MaterializerOptions); | ||
/** | ||
* @async | ||
*/ | ||
parse(text: string): Promise<void>; | ||
flush(): Promise<void>; | ||
} | ||
/** | ||
* Helper class that can be used to transform JSON parser events to a deserialized object, | ||
@@ -4,0 +35,0 @@ * like if it has been deserialized by "JSON.parse". |
@@ -1,3 +0,111 @@ | ||
import { JsonEventType } from "./json-writer"; | ||
import { JsonEventType } from "./json-common"; | ||
import { JsonParser } from "./json-parser"; | ||
/** | ||
* Helper class that consumes JsonEvent objects and produces also JsonEvents, but it can | ||
*/ | ||
export class JsonMaterializer { | ||
constructor(callback, options) { | ||
this.path = new JsonParserPathHelper(); | ||
this.currentMaterializedValue = new JsonParserValueHelper(); | ||
this.materializePathRegexps = []; | ||
this.state = MaterializerState.INITIAL; | ||
this.callback = callback; | ||
if (options === null || options === void 0 ? void 0 : options.materializedPaths) { | ||
for (let path of options === null || options === void 0 ? void 0 : options.materializedPaths) { | ||
path = path.replace(/[-[\]{}()+?.,\\^$|#\s]/g, "\\$&"); | ||
path = path.replace(/\*\*/g, "(.*)"); | ||
path = path.replace(/\*/g, "([^.]*)"); | ||
if (!path.startsWith("^")) | ||
path = "^" + path; | ||
if (!path.endsWith("$")) | ||
path = path + "$"; | ||
this.materializePathRegexps.push(new RegExp(path, "i")); | ||
} | ||
} | ||
} | ||
write(event) { | ||
let callbackResult = undefined; | ||
this.path.event(event); | ||
switch (this.state) { | ||
case MaterializerState.INITIAL: | ||
// maybe we can start materialization on next event | ||
const currentPath = this.path.currentPath; | ||
for (const pathToMaterialize of this.materializePathRegexps) { | ||
if (currentPath.match(pathToMaterialize)) { | ||
this.state = MaterializerState.MATERIALIZING_START; | ||
} | ||
} | ||
callbackResult = this.callback(event); | ||
break; | ||
case MaterializerState.MATERIALIZING_START: | ||
if (event.type === JsonEventType.END_ARRAY) { | ||
// We were materializing an array member, but the array is finished. | ||
this.state = MaterializerState.INITIAL; | ||
callbackResult = this.callback(event); | ||
} | ||
else { | ||
if (this.currentMaterializedValue.event(event)) { | ||
callbackResult = this.callback({ type: JsonEventType.ANY_VALUE, data: this.currentMaterializedValue.value }); | ||
this.state = MaterializerState.INITIAL; | ||
// maybe we can start another materialization on next event... | ||
const currentPath = this.path.currentPath; | ||
for (const pathToMaterialize of this.materializePathRegexps) { | ||
if (currentPath.match(pathToMaterialize)) { | ||
this.state = MaterializerState.MATERIALIZING_START; | ||
} | ||
} | ||
} | ||
else { | ||
this.state = MaterializerState.MATERIALIZING; | ||
} | ||
} | ||
break; | ||
case MaterializerState.MATERIALIZING: | ||
if (this.currentMaterializedValue.event(event)) { | ||
callbackResult = this.callback({ type: JsonEventType.ANY_VALUE, data: this.currentMaterializedValue.value }); | ||
this.state = MaterializerState.INITIAL; | ||
// maybe we can start another materialization on next event... | ||
const currentPath = this.path.currentPath; | ||
for (const pathToMaterialize of this.materializePathRegexps) { | ||
if (currentPath.match(pathToMaterialize)) { | ||
this.state = MaterializerState.MATERIALIZING_START; | ||
} | ||
} | ||
} | ||
break; | ||
} | ||
return callbackResult; | ||
} | ||
flush() { | ||
if (this.state != MaterializerState.INITIAL) { | ||
throw new Error("Unexpected end of input"); | ||
} | ||
} | ||
} | ||
/** | ||
* Partially streaming Parser. Basically it parses input and produces events for the JSON tokens, but it can | ||
* materialize the whole objects and produce special events with them for only specific parts of the json input. | ||
*/ | ||
export class JsonMaterializingParser { | ||
constructor(callback, options) { | ||
this.parser = new JsonParser(async (event) => { | ||
const result = this.materializer.write(event); | ||
if (result) { | ||
await result; | ||
} | ||
}); | ||
this.materializer = new JsonMaterializer(callback, options); | ||
} | ||
/** | ||
* @async | ||
*/ | ||
async parse(text) { | ||
return this.parser.parse(text); | ||
} | ||
async flush() { | ||
await this.parser.flush(); | ||
await this.materializer.flush(); | ||
} | ||
} | ||
/** | ||
* Helper class that can be used to transform JSON parser events to a deserialized object, | ||
@@ -201,1 +309,7 @@ * like if it has been deserialized by "JSON.parse". | ||
} | ||
var MaterializerState; | ||
(function (MaterializerState) { | ||
MaterializerState[MaterializerState["INITIAL"] = 0] = "INITIAL"; | ||
MaterializerState[MaterializerState["MATERIALIZING_START"] = 1] = "MATERIALIZING_START"; | ||
MaterializerState[MaterializerState["MATERIALIZING"] = 2] = "MATERIALIZING"; | ||
})(MaterializerState || (MaterializerState = {})); |
@@ -1,3 +0,3 @@ | ||
import { JsonEvent } from "./json-writer"; | ||
declare type JsonParserCallback = (_: JsonEvent) => Promise<any>; | ||
import { JsonEvent } from "./json-common"; | ||
export declare type JsonParserCallback = (_: JsonEvent) => Promise<any>; | ||
export declare class JsonParser { | ||
@@ -25,2 +25,1 @@ private readonly callback; | ||
} | ||
export {}; |
@@ -1,2 +0,2 @@ | ||
import { E_END_ARRAY, E_END_OBJECT, E_START_ARRAY, E_START_OBJECT, JsonEventType } from "./json-writer"; | ||
import { JsonEventType, E_START_OBJECT, E_END_OBJECT, E_START_ARRAY, E_END_ARRAY } from "./json-common"; | ||
var State; | ||
@@ -3,0 +3,0 @@ (function (State) { |
@@ -1,2 +0,2 @@ | ||
import { JsonEvent } from "./json-writer"; | ||
import { JsonEvent } from "./json-common"; | ||
export declare class JsonToStringTransformStream extends TransformStream<JsonEvent, string> { | ||
@@ -3,0 +3,0 @@ private readonly JsonWriter; |
@@ -1,39 +0,2 @@ | ||
export declare type JsonEvent = { | ||
type: JsonEventType.START_OBJECT; | ||
} | { | ||
type: JsonEventType.END_OBJECT; | ||
} | { | ||
type: JsonEventType.START_ARRAY; | ||
} | { | ||
type: JsonEventType.END_ARRAY; | ||
} | { | ||
type: JsonEventType.PROPERTY_NAME; | ||
data: string; | ||
} | { | ||
type: JsonEventType.TEXT_VALUE; | ||
data: string; | ||
} | { | ||
type: JsonEventType.NUMBER_VALUE; | ||
data: number; | ||
} | { | ||
type: JsonEventType.BOOLEAN_VALUE; | ||
data: boolean; | ||
} | { | ||
type: JsonEventType.NULL_VALUE; | ||
}; | ||
export declare enum JsonEventType { | ||
START_OBJECT = 0, | ||
END_OBJECT = 1, | ||
START_ARRAY = 2, | ||
END_ARRAY = 3, | ||
PROPERTY_NAME = 4, | ||
TEXT_VALUE = 5, | ||
NUMBER_VALUE = 6, | ||
BOOLEAN_VALUE = 7, | ||
NULL_VALUE = 8 | ||
} | ||
export declare const E_START_OBJECT: JsonEvent; | ||
export declare const E_END_OBJECT: JsonEvent; | ||
export declare const E_START_ARRAY: JsonEvent; | ||
export declare const E_END_ARRAY: JsonEvent; | ||
import { JsonEvent } from "./json-common"; | ||
export declare class JsonWriter { | ||
@@ -43,2 +6,3 @@ private separatorNeeded; | ||
private readonly callback; | ||
private readonly objectSerializerCallback; | ||
constructor(callback: (_: string) => Promise<any>); | ||
@@ -50,1 +14,2 @@ private emitSeparatorIfNeeded; | ||
} | ||
export declare function serializeValueAsync(value: any, callback: (event: JsonEvent) => Promise<any>): Promise<void>; |
@@ -1,18 +0,2 @@ | ||
export var JsonEventType; | ||
(function (JsonEventType) { | ||
JsonEventType[JsonEventType["START_OBJECT"] = 0] = "START_OBJECT"; | ||
JsonEventType[JsonEventType["END_OBJECT"] = 1] = "END_OBJECT"; | ||
JsonEventType[JsonEventType["START_ARRAY"] = 2] = "START_ARRAY"; | ||
JsonEventType[JsonEventType["END_ARRAY"] = 3] = "END_ARRAY"; | ||
JsonEventType[JsonEventType["PROPERTY_NAME"] = 4] = "PROPERTY_NAME"; | ||
JsonEventType[JsonEventType["TEXT_VALUE"] = 5] = "TEXT_VALUE"; | ||
JsonEventType[JsonEventType["NUMBER_VALUE"] = 6] = "NUMBER_VALUE"; | ||
JsonEventType[JsonEventType["BOOLEAN_VALUE"] = 7] = "BOOLEAN_VALUE"; | ||
JsonEventType[JsonEventType["NULL_VALUE"] = 8] = "NULL_VALUE"; | ||
})(JsonEventType || (JsonEventType = {})); | ||
// Useful constants for events without data. Saves some objects creation. | ||
export const E_START_OBJECT = { type: JsonEventType.START_OBJECT }; | ||
export const E_END_OBJECT = { type: JsonEventType.END_OBJECT }; | ||
export const E_START_ARRAY = { type: JsonEventType.START_ARRAY }; | ||
export const E_END_ARRAY = { type: JsonEventType.END_ARRAY }; | ||
import { JsonEventType, E_START_OBJECT, E_END_OBJECT, E_START_ARRAY, E_END_ARRAY } from "./json-common"; | ||
export class JsonWriter { | ||
@@ -23,2 +7,3 @@ constructor(callback) { | ||
this.callback = callback; | ||
this.objectSerializerCallback = this.write.bind(this); | ||
} | ||
@@ -90,5 +75,45 @@ async emitSeparatorIfNeeded() { | ||
break; | ||
case JsonEventType.ANY_VALUE: | ||
await serializeValueAsync(event.data, this.objectSerializerCallback); | ||
break; | ||
} | ||
} | ||
} | ||
export async function serializeValueAsync(value, callback) { | ||
if (Array.isArray(value)) { | ||
await serializeArrayAsync(value, callback); | ||
return; | ||
} | ||
switch (typeof value) { | ||
case "object": | ||
await serializeObjectAsync(value, callback); | ||
break; | ||
case "string": | ||
await callback({ type: JsonEventType.TEXT_VALUE, data: value }); | ||
break; | ||
case "number": | ||
await callback({ type: JsonEventType.NUMBER_VALUE, data: value }); | ||
break; | ||
case "boolean": | ||
await callback({ type: JsonEventType.BOOLEAN_VALUE, data: value }); | ||
break; | ||
default: | ||
throw new Error("Cannot serialize data type: " + typeof value); | ||
} | ||
} | ||
async function serializeObjectAsync(o, callback) { | ||
await callback(E_START_OBJECT); | ||
for (const key in o) { | ||
await callback({ type: JsonEventType.PROPERTY_NAME, data: key }); | ||
await serializeValueAsync(o[key], callback); | ||
} | ||
await callback(E_END_OBJECT); | ||
} | ||
async function serializeArrayAsync(a, callback) { | ||
await callback(E_START_ARRAY); | ||
for (const item of a) { | ||
await serializeValueAsync(item, callback); | ||
} | ||
await callback(E_END_ARRAY); | ||
} | ||
function escapeJsonValue(str) { | ||
@@ -95,0 +120,0 @@ let result = "", counter = 0, length = str.length, cc; |
@@ -1,2 +0,2 @@ | ||
import { JsonEvent } from "./json-writer"; | ||
import { JsonEvent } from "./json-common"; | ||
export declare class StringToJsonTransformStream extends TransformStream<string, JsonEvent> { | ||
@@ -3,0 +3,0 @@ private readonly parser; |
@@ -5,3 +5,3 @@ { | ||
"description": "Library for data transformations.", | ||
"version": "0.0.6", | ||
"version": "0.0.7", | ||
"main": "dist/index.js", | ||
@@ -8,0 +8,0 @@ "types": "dist/index.d.ts", |
@@ -110,2 +110,76 @@ # Data Transformations Package | ||
### Partial materialization | ||
Processing JSON using events is efficient with respect to used memory during the transformation, but | ||
is quiet intricate. Especially when compared to fully deserializing json to object using `JSON.parse`. | ||
In most of the situations it will be possible to use a combination of both approaches. When dealing with large data | ||
it often appears, that data structure contains some array with many members. In such case it would be useful | ||
to handle all JSON input with streaming approach, but the members of array could be safely deserialized to objects as | ||
the individual members of the array are small enough to fit in memory. | ||
For this approach there is a set of helper classes used for partial materialization of the json data. One can specify which | ||
parts of the json structure will be materialized (deserialized to object) with a json path. | ||
A special event `JsonEvent.ANY_VALUE` will be triggered for those deserialized parts. | ||
Here is an example using `JsonMaterializingParser` class: | ||
```js | ||
const inputJson = '{"people": [{"firtsName": "Mike", "lastName": "Smith"}, {"firstName": "Foo", "lastName": "Bar"}]}'; | ||
const materializedPaths = [".people[*]"]; | ||
const writer = new JsonWriter(async (s) => { | ||
console.log(s); | ||
}); | ||
const parserCallback = async function (event: JsonEvent) { | ||
if (event.type === JsonEventType.ANY_VALUE) { | ||
const o = event.data; | ||
o.full_name = o.firstName + " " + o.lastName; | ||
} | ||
await writer.write(event); | ||
} | ||
const parser = new JsonMaterializingParser(parserCallback, { materializedPaths }); | ||
await parser.parse(inputJson); | ||
await parser.flush(); | ||
await writer.flush(); | ||
``` | ||
Output: | ||
```json | ||
{"people":[ | ||
{"firstName":"Mike","lastName":"Smith","full_name":"Mike Smith"}, | ||
{"firstName":"Foo","lastName":"Bar","full_name":"Foo Bar"} | ||
]} | ||
``` | ||
And here follows example with streams, making use of `JsonMaterializingTransformStream`: | ||
```js | ||
const input = new StringReadableStream( | ||
'{"people": [{"firstName": "Mike", "lastName": "Smith"}, {"firstName": "Foo", "lastName": "Bar"}]}' | ||
); | ||
const materializedPaths = [".people[*]"]; | ||
const transformer = new TransformStream<JsonEvent, JsonEvent>({ | ||
transform(event, controller) { | ||
if (event.type === JsonEventType.ANY_VALUE) { | ||
const o = event.data; | ||
o.full_name = o.firstName + " " + o.lastName; | ||
} | ||
controller.enqueue(event); | ||
} | ||
}); | ||
await input | ||
.pipeThrough(new StringToJsonTransformStream()) | ||
.pipeThrough(new JsonMaterializingTransformStream({materializedPaths})) | ||
.pipeThrough(transformer) | ||
.pipeThrough(new JsonToStringTransformStream()) | ||
.pipeTo(new ConsoleLogWritableStream()); | ||
``` | ||
Output: | ||
```json | ||
{"people":[ | ||
{"firstName":"Mike","lastName":"Smith","full_name":"Mike Smith"}, | ||
{"firstName":"Foo","lastName":"Bar","full_name":"Foo Bar"} | ||
]} | ||
``` | ||
## CSV | ||
@@ -162,1 +236,13 @@ | ||
``` | ||
Following stream-compatible classes are available: | ||
- `StringToXmlTransformStream` - transforms stream of strings to stream of `XmlEvent` objects (xml deserialization). | ||
- `XmlToStringTransformStream` - transforms stream of `XmlEvent` objects to a string stream (xml serialization). | ||
- `StringToJsonTransformStream` - transforms stream of strings to stream of `JsonEvent` objects (json deserialization). | ||
- `JsonToStringTransformStream` - transforms stream of `JsonEvent` objects to a string stream (json serialization). | ||
- `JsonMaterializingTransformStream` - transforms stream of `JsonEvent` objects to `JsonEvent` objects. Some events just | ||
pass through, some are consumed and translated to special event containing | ||
an object representing the materialized part of the JSON part. | ||
- `StringToCsvTransformStream` - transforms stream of strings to `CsvEvent` objects (csv deserialization). | ||
- |
@@ -1,2 +0,15 @@ | ||
import { JsonParser, JsonParserPathHelper, JsonParserValueHelper } from "../../src"; | ||
import { | ||
JsonEvent, | ||
JsonEventType, | ||
JsonMaterializingParser, | ||
JsonParser, | ||
JsonParserPathHelper, | ||
JsonParserValueHelper, | ||
ConsoleLogWritableStream, | ||
JsonMaterializingTransformStream, | ||
JsonToStringTransformStream, | ||
JsonWriter, | ||
StringReadableStream, | ||
StringToJsonTransformStream, | ||
} from "../../src"; | ||
import { describe, expect, it } from "../../../test/src"; | ||
@@ -39,2 +52,75 @@ | ||
describe("JsonMaterializingParser", () => { | ||
it("simple test", async () => { | ||
await testMaterializingParser( | ||
'{"data": {"records": [1, {"a": "b"}]}}', | ||
[".data.records[*]"], | ||
[ | ||
{ type: JsonEventType.START_OBJECT }, | ||
{ type: JsonEventType.PROPERTY_NAME, data: "data" }, | ||
{ type: JsonEventType.START_OBJECT }, | ||
{ type: JsonEventType.PROPERTY_NAME, data: "records" }, | ||
{ type: JsonEventType.START_ARRAY }, | ||
{ type: JsonEventType.ANY_VALUE, data: 1 }, | ||
{ type: JsonEventType.ANY_VALUE, data: { a: "b" } }, | ||
{ type: JsonEventType.END_ARRAY }, | ||
{ type: JsonEventType.END_OBJECT }, | ||
{ type: JsonEventType.END_OBJECT }, | ||
] | ||
); | ||
}); | ||
it("Documentation example 1", async () => { | ||
const input = new StringReadableStream( | ||
'{"people": [{"firstName": "Mike", "lastName": "Smith"}, {"firstName": "Foo", "lastName": "Bar"}]}' | ||
); | ||
const transformer = new TransformStream<JsonEvent, JsonEvent>({ | ||
transform(event, controller) { | ||
if (event.type === JsonEventType.ANY_VALUE) { | ||
const o = event.data; | ||
o.full_name = o.firstName + " " + o.lastName; | ||
} | ||
controller.enqueue(event); | ||
}, | ||
}); | ||
let result = ""; | ||
const resultStream = new WritableStream<string>({ | ||
write(s) { | ||
result += s; | ||
}, | ||
}); | ||
await input | ||
.pipeThrough(new StringToJsonTransformStream()) | ||
.pipeThrough(new JsonMaterializingTransformStream({ materializedPaths: [".people[*]"] })) | ||
.pipeThrough(transformer) | ||
.pipeThrough(new JsonToStringTransformStream()) | ||
.pipeTo(resultStream); | ||
expect(result).toBe( | ||
'{"people":[{"firstName":"Mike","lastName":"Smith","full_name":"Mike Smith"},{"firstName":"Foo","lastName":"Bar","full_name":"Foo Bar"}]}' | ||
); | ||
}); | ||
it("Documentation example 2", async () => { | ||
let result = ""; | ||
const inputJson = '{"people": [{"firstName": "Mike", "lastName": "Smith"}, {"firstName": "Foo", "lastName": "Bar"}]}'; | ||
const materializedPaths = [".people[*]"]; | ||
const writer = new JsonWriter(async (s) => { | ||
result += s; | ||
}); | ||
const parserCallback = async function (event: JsonEvent) { | ||
if (event.type === JsonEventType.ANY_VALUE) { | ||
const o = event.data; | ||
o.full_name = o.firstName + " " + o.lastName; | ||
} | ||
await writer.write(event); | ||
}; | ||
const parser = new JsonMaterializingParser(parserCallback, { materializedPaths }); | ||
await parser.parse(inputJson); | ||
await parser.flush(); | ||
await writer.flush(); | ||
expect(result).toBe( | ||
'{"people":[{"firstName":"Mike","lastName":"Smith","full_name":"Mike Smith"},{"firstName":"Foo","lastName":"Bar","full_name":"Foo Bar"}]}' | ||
); | ||
}); | ||
}); | ||
async function testValue(json: string) { | ||
@@ -55,1 +141,13 @@ const helper = new JsonParserValueHelper(); | ||
} | ||
async function testMaterializingParser(json: string, materializedPaths: string[], expectedEvents: JsonEvent[]) { | ||
const result: JsonEvent[] = []; | ||
const parser = new JsonMaterializingParser( | ||
async (event) => { | ||
result.push(event); | ||
}, | ||
{ materializedPaths: materializedPaths } | ||
); | ||
await parser.parse(json); | ||
expect(result).toEqual(expectedEvents); | ||
} |
@@ -25,2 +25,21 @@ import * as jw from "../../src"; | ||
}); | ||
it("serializeValue", async () => { | ||
const result: any[] = []; | ||
const callback = async function (event: jw.JsonEvent) { | ||
result.push(event); | ||
}; | ||
await jw.serializeValueAsync({ a: "s", b: [1, true] }, callback); | ||
expect(result).toEqual([ | ||
jw.E_START_OBJECT, | ||
{ type: jw.JsonEventType.PROPERTY_NAME, data: "a" }, | ||
{ type: jw.JsonEventType.TEXT_VALUE, data: "s" }, | ||
{ type: jw.JsonEventType.PROPERTY_NAME, data: "b" }, | ||
jw.E_START_ARRAY, | ||
{ type: jw.JsonEventType.NUMBER_VALUE, data: 1 }, | ||
{ type: jw.JsonEventType.BOOLEAN_VALUE, data: true }, | ||
jw.E_END_ARRAY, | ||
jw.E_END_OBJECT, | ||
]); | ||
}); | ||
}); | ||
@@ -27,0 +46,0 @@ |
export * from "./csv/index"; | ||
export * from "./json/index"; | ||
export * from "./xml/index"; | ||
export * from "./utils/index"; |
@@ -0,1 +1,2 @@ | ||
export * from "./json-common"; | ||
export * from "./json-writer"; | ||
@@ -6,1 +7,2 @@ export * from "./json-parser"; | ||
export * from "./string-to-json-transform-stream"; | ||
export * from "./json-materializing-transform-stream"; |
@@ -1,4 +0,128 @@ | ||
import { JsonEvent, JsonEventType } from "./json-writer"; | ||
import { JsonEvent, JsonEventType } from "./json-common"; | ||
import { JsonParser, JsonParserCallback } from "./json-parser"; | ||
export interface MaterializerOptions { | ||
materializedPaths: string[]; | ||
} | ||
/** | ||
* Helper class that consumes JsonEvent objects and produces also JsonEvents, but it can | ||
*/ | ||
export class JsonMaterializer { | ||
private readonly path: JsonParserPathHelper = new JsonParserPathHelper(); | ||
private readonly currentMaterializedValue: JsonParserValueHelper = new JsonParserValueHelper(); | ||
private readonly materializePathRegexps: RegExp[] = []; | ||
private readonly callback: (_: JsonEvent) => any; | ||
private state: MaterializerState = MaterializerState.INITIAL; | ||
constructor(callback: (_: JsonEvent) => any, options: MaterializerOptions) { | ||
this.callback = callback; | ||
if (options?.materializedPaths) { | ||
for (let path of options?.materializedPaths) { | ||
path = path.replace(/[-[\]{}()+?.,\\^$|#\s]/g, "\\$&"); | ||
path = path.replace(/\*\*/g, "(.*)"); | ||
path = path.replace(/\*/g, "([^.]*)"); | ||
if (!path.startsWith("^")) path = "^" + path; | ||
if (!path.endsWith("$")) path = path + "$"; | ||
this.materializePathRegexps.push(new RegExp(path, "i")); | ||
} | ||
} | ||
} | ||
write(event: JsonEvent): any { | ||
let callbackResult: any = undefined; | ||
this.path.event(event); | ||
switch (this.state) { | ||
case MaterializerState.INITIAL: | ||
// maybe we can start materialization on next event | ||
const currentPath = this.path.currentPath; | ||
for (const pathToMaterialize of this.materializePathRegexps) { | ||
if (currentPath.match(pathToMaterialize)) { | ||
this.state = MaterializerState.MATERIALIZING_START; | ||
} | ||
} | ||
callbackResult = this.callback(event); | ||
break; | ||
case MaterializerState.MATERIALIZING_START: | ||
if (event.type === JsonEventType.END_ARRAY) { | ||
// We were materializing an array member, but the array is finished. | ||
this.state = MaterializerState.INITIAL; | ||
callbackResult = this.callback(event); | ||
} else { | ||
if (this.currentMaterializedValue.event(event)) { | ||
callbackResult = this.callback({ type: JsonEventType.ANY_VALUE, data: this.currentMaterializedValue.value }); | ||
this.state = MaterializerState.INITIAL; | ||
// maybe we can start another materialization on next event... | ||
const currentPath = this.path.currentPath; | ||
for (const pathToMaterialize of this.materializePathRegexps) { | ||
if (currentPath.match(pathToMaterialize)) { | ||
this.state = MaterializerState.MATERIALIZING_START; | ||
} | ||
} | ||
} else { | ||
this.state = MaterializerState.MATERIALIZING; | ||
} | ||
} | ||
break; | ||
case MaterializerState.MATERIALIZING: | ||
if (this.currentMaterializedValue.event(event)) { | ||
callbackResult = this.callback({ type: JsonEventType.ANY_VALUE, data: this.currentMaterializedValue.value }); | ||
this.state = MaterializerState.INITIAL; | ||
// maybe we can start another materialization on next event... | ||
const currentPath = this.path.currentPath; | ||
for (const pathToMaterialize of this.materializePathRegexps) { | ||
if (currentPath.match(pathToMaterialize)) { | ||
this.state = MaterializerState.MATERIALIZING_START; | ||
} | ||
} | ||
} | ||
break; | ||
} | ||
return callbackResult; | ||
} | ||
flush() { | ||
if (this.state != MaterializerState.INITIAL) { | ||
throw new Error("Unexpected end of input"); | ||
} | ||
} | ||
} | ||
/** | ||
* Partially streaming Parser. Basically it parses input and produces events for the JSON tokens, but it can | ||
* materialize the whole objects and produce special events with them for only specific parts of the json input. | ||
*/ | ||
export class JsonMaterializingParser { | ||
private readonly parser: JsonParser; | ||
private readonly materializer: JsonMaterializer; | ||
constructor(callback: JsonParserCallback, options: MaterializerOptions) { | ||
this.parser = new JsonParser(async (event: JsonEvent) => { | ||
const result = this.materializer.write(event); | ||
if (result) { | ||
await result; | ||
} | ||
}); | ||
this.materializer = new JsonMaterializer(callback, options); | ||
} | ||
/** | ||
* @async | ||
*/ | ||
async parse(text: string) { | ||
return this.parser.parse(text); | ||
} | ||
async flush() { | ||
await this.parser.flush(); | ||
await this.materializer.flush(); | ||
} | ||
} | ||
/** | ||
* Helper class that can be used to transform JSON parser events to a deserialized object, | ||
@@ -221,1 +345,7 @@ * like if it has been deserialized by "JSON.parse". | ||
} | ||
enum MaterializerState { | ||
INITIAL, | ||
MATERIALIZING_START, | ||
MATERIALIZING, | ||
} |
@@ -1,4 +0,4 @@ | ||
import { E_END_ARRAY, E_END_OBJECT, E_START_ARRAY, E_START_OBJECT, JsonEvent, JsonEventType } from "./json-writer"; | ||
import { JsonEvent, JsonEventType, E_START_OBJECT, E_END_OBJECT, E_START_ARRAY, E_END_ARRAY } from "./json-common"; | ||
type JsonParserCallback = (_: JsonEvent) => Promise<any>; | ||
export type JsonParserCallback = (_: JsonEvent) => Promise<any>; | ||
@@ -5,0 +5,0 @@ enum State { |
@@ -1,2 +0,3 @@ | ||
import { JsonEvent, JsonWriter } from "./json-writer"; | ||
import { JsonWriter } from "./json-writer"; | ||
import { JsonEvent } from "./json-common"; | ||
@@ -3,0 +4,0 @@ export class JsonToStringTransformStream extends TransformStream<JsonEvent, string> { |
@@ -1,31 +0,3 @@ | ||
export type JsonEvent = | ||
| { type: JsonEventType.START_OBJECT } | ||
| { type: JsonEventType.END_OBJECT } | ||
| { type: JsonEventType.START_ARRAY } | ||
| { type: JsonEventType.END_ARRAY } | ||
| { type: JsonEventType.PROPERTY_NAME; data: string } | ||
| { type: JsonEventType.TEXT_VALUE; data: string } | ||
| { type: JsonEventType.NUMBER_VALUE; data: number } | ||
| { type: JsonEventType.BOOLEAN_VALUE; data: boolean } | ||
| { type: JsonEventType.NULL_VALUE }; | ||
import { JsonEvent, JsonEventType, E_START_OBJECT, E_END_OBJECT, E_START_ARRAY, E_END_ARRAY } from "./json-common"; | ||
export enum JsonEventType { | ||
START_OBJECT, | ||
END_OBJECT, | ||
START_ARRAY, | ||
END_ARRAY, | ||
PROPERTY_NAME, | ||
TEXT_VALUE, | ||
NUMBER_VALUE, | ||
BOOLEAN_VALUE, | ||
NULL_VALUE, | ||
} | ||
// Useful constants for events without data. Saves some objects creation. | ||
export const E_START_OBJECT: JsonEvent = { type: JsonEventType.START_OBJECT } as const; | ||
export const E_END_OBJECT: JsonEvent = { type: JsonEventType.END_OBJECT } as const; | ||
export const E_START_ARRAY: JsonEvent = { type: JsonEventType.START_ARRAY } as const; | ||
export const E_END_ARRAY: JsonEvent = { type: JsonEventType.END_ARRAY } as const; | ||
export class JsonWriter { | ||
@@ -35,5 +7,7 @@ private separatorNeeded = false; | ||
private readonly callback: (_: string) => Promise<any>; | ||
private readonly objectSerializerCallback: (event: JsonEvent) => Promise<void>; | ||
constructor(callback: (_: string) => Promise<any>) { | ||
this.callback = callback; | ||
this.objectSerializerCallback = this.write.bind(this); | ||
} | ||
@@ -109,2 +83,5 @@ | ||
break; | ||
case JsonEventType.ANY_VALUE: | ||
await serializeValueAsync(event.data, this.objectSerializerCallback); | ||
break; | ||
} | ||
@@ -114,2 +91,42 @@ } | ||
export async function serializeValueAsync(value: any, callback: (event: JsonEvent) => Promise<any>) { | ||
if (Array.isArray(value)) { | ||
await serializeArrayAsync(value, callback); | ||
return; | ||
} | ||
switch (typeof value) { | ||
case "object": | ||
await serializeObjectAsync(value, callback); | ||
break; | ||
case "string": | ||
await callback({ type: JsonEventType.TEXT_VALUE, data: value }); | ||
break; | ||
case "number": | ||
await callback({ type: JsonEventType.NUMBER_VALUE, data: value }); | ||
break; | ||
case "boolean": | ||
await callback({ type: JsonEventType.BOOLEAN_VALUE, data: value }); | ||
break; | ||
default: | ||
throw new Error("Cannot serialize data type: " + typeof value); | ||
} | ||
} | ||
async function serializeObjectAsync(o: any, callback: (event: JsonEvent) => Promise<any>) { | ||
await callback(E_START_OBJECT); | ||
for (const key in o) { | ||
await callback({ type: JsonEventType.PROPERTY_NAME, data: key }); | ||
await serializeValueAsync(o[key], callback); | ||
} | ||
await callback(E_END_OBJECT); | ||
} | ||
async function serializeArrayAsync(a: Array<any>, callback: (event: JsonEvent) => Promise<any>) { | ||
await callback(E_START_ARRAY); | ||
for (const item of a) { | ||
await serializeValueAsync(item, callback); | ||
} | ||
await callback(E_END_ARRAY); | ||
} | ||
function escapeJsonValue(str: string) { | ||
@@ -116,0 +133,0 @@ let result = "", |
import { JsonParser } from "./json-parser"; | ||
import { JsonEvent } from "./json-writer"; | ||
import { JsonEvent } from "./json-common"; | ||
@@ -4,0 +4,0 @@ export class StringToJsonTransformStream extends TransformStream<string, JsonEvent> { |
284929
71
7031
246