@graphy/content.nq.read
Advanced tools
Comparing version 3.0.6 to 3.1.0
288
main.js
const FN_TO_STRING_OBJECT = Object.prototype.toString; | ||
// try something else | ||
const stream = require('@graphy/core.iso.stream'); | ||
@@ -18,2 +10,11 @@ const factory = require('@graphy/core.data.factory').raw; | ||
const H_ESCAPES_JSON = { | ||
'\t': '\\t', | ||
'\u0008': '\\b', | ||
'\n': '\\n', | ||
'\r': '\\r', | ||
'\f': '\\f', | ||
'"': '\\"', | ||
}; | ||
const R_ESCAPES = /(\\[\\])|\\([^tbnrfuU\\])/g; | ||
@@ -26,10 +27,12 @@ const R_UNICODE_8 = /\\U([0-9A-Fa-f]{8})/g; | ||
const R_CLEAN = /\s*(?:#[^\n]*\n\s*)*\s*/y; | ||
const R_CLEAN_COMMENTS = /\s*(#[^\n]*\n\s*)*\s*/y; | ||
const R_LITERAL_ESCAPELESS = /^"([^\\"]*)"(?:\^\^<([^\\>]*)>|@([^ \t.]+)|)?$/; | ||
const R_LITERAL = /^"(.*)"(?:\^\^<(.*)>|@([^ \t.]+)|)?$/; | ||
const RT_HAS_ESCAPES = /[\\]/; | ||
const R_EOL = /[^\n]+\n/y; | ||
const RT_ABSOLUTE_IRI_VALID = /^[a-z][a-z0-9+\-.]*:([^\0-\x20<>"{}|^`\\]|\\u[A-Fa-f0-9]{4}|\\U[A-Fa-f0-9]{8})*$/; | ||
const RT_BLANK_NODE_LABEL_VALID = /^(?:[A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\u{02ff}\u{0370}-\u{037d}\u{037f}-\u{1fff}\u{200c}-\u{200d}\u{2070}-\u{218f}\u{2c00}-\u{2fef}\u{3001}-\u{d7ff}\u{f900}-\u{fdcf}\u{fdf0}-\u{fffd}\u{10000}-\u{effff}_0-9])(?:(?:[A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\u{02ff}\u{0370}-\u{037d}\u{037f}-\u{1fff}\u{200c}-\u{200d}\u{2070}-\u{218f}\u{2c00}-\u{2fef}\u{3001}-\u{d7ff}\u{f900}-\u{fdcf}\u{fdf0}-\u{fffd}\u{10000}-\u{effff}_\-0-9\xb7\u{0300}-\u{036f}\u{203f}-\u{2040}.])*[A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\u{02ff}\u{0370}-\u{037d}\u{037f}-\u{1fff}\u{200c}-\u{200d}\u{2070}-\u{218f}\u{2c00}-\u{2fef}\u{3001}-\u{d7ff}\u{f900}-\u{fdcf}\u{fdf0}-\u{fffd}\u{10000}-\u{effff}_\-0-9\xb7\u{0300}-\u{036f}\u{203f}-\u{2040}])?$/u; | ||
const RT_ESCAPES_INVALID = /\\[^"tbnrfuU\\_~.!$&'()*+,;=/?#@%-]|\\u[^A-Fa-f0-9]{4}|\\U[^A-Fa-f0-9]{8}/; | ||
const RT_LITERAL_CONTENTS_VALID = /^([^"\\\n\r]|\\[tbnrf"'\\]|\\u[A-Fa-f0-9]{4}|\\U[A-Fa-f0-9]{8})*$/; | ||
const RT_ESCAPES_INVALID = /(?:(?:^|[^\\])(?:\\(?:\\\\)*[^"tbnrfuU\\_~.!$&'()*+,;=/?#@%-])(?:[^\\]|$))|\\u[^A-Fa-f0-9]{4}|\\U[^A-Fa-f0-9]{8}/; | ||
const RT_LITERAL_CONTENTS_VALID = /^(?:[^\\\n\r]|\\[tbnrf"'\\]|\\u[A-Fa-f0-9]{4}|\\U[A-Fa-f0-9]{8})*$/; | ||
const RT_LANGUAGE_VALID = /^[a-z]+(-[a-z0-9]+)*$/; | ||
@@ -39,7 +42,63 @@ | ||
const R_QUAD_ESCAPELESS_SP = /(?:<([^\\>]*)>|_:([^\x20\t<]+))[\x20\t]*<([^\\>]*)>[\x20\t]*(?:(<[^\\>]*)>|_:([^\x20\t<]+)|"([^"\\]*)"(?:\^\^<([^\\>]*)>|@([^\x20\t.]+)|))[\x20\t]*(?:<([^\\>]*)>|_:([^\x20\t<]+)|)[\x20\t]*\.\s*(?:#[^\n]*\n\s*|\n\s*)+/y; | ||
const R_QUAD = /(?:<([^>]*)>|_:([^\x20\t<]+))[\x20\t]*<([^>]*)>[\x20\t]*(?:(<[^>]*)>|_:([^\x20\t<]+)|"((?:[^"\\]|\\.)*)"(?:\^\^<([^>]*)>|@([^\x20\t.]+)|))[\x20\t]*(?:<([^>]*)>|_:([^\x20\t<]+)|)[\x20\t]*\.\s*(?:#[^\n]*\n\s*|\n\s*)+/y; | ||
const R_QUAD_ESCAPELESS_SP = /(?:<([^\\>]*)>|_:([^\x20\t<]+))[\x20\t]*<([^\\>]*)>[\x20\t]*(?:(<[^\\>]*)>|_:([^\x20\t<]+)|"([^"\\]*)"(?:\^\^<([^\\>]*)>|@([^\x20\t.]+)|))[\x20\t]*(?:<([^\\>]*)>|_:([^\x20\t<]+)|)[\x20\t]*\.\s*(#[^\n]*\n\s*|\n\s*)+/y; | ||
const R_QUAD = /(?:<([^>]*)>|_:([^\x20\t<]+))[\x20\t]*<([^>]*)>[\x20\t]*(?:(<[^>]*)>|_:([^\x20\t<]+)|"((?:[^"\\]|\\.)*)"(?:\^\^<([^>]*)>|@([^\x20\t.]+)|))[\x20\t]*(?:<([^>]*)>|_:([^\x20\t<]+)|)[\x20\t]*\.\s*(#[^\n]*\n\s*|\n\s*)+/y; | ||
class NQuads_Reader extends stream.Transform { | ||
constructor(g_impls) { | ||
super({ | ||
// do not decode strings into buffers | ||
decodeStrings: false, | ||
// accept strings as input on writable side | ||
writableObjectMode: false, | ||
// output quad objects on readable side | ||
readableObjectMode: true, | ||
// implementations | ||
flush: g_impls.flush, | ||
transform: g_impls.transform, | ||
}); | ||
// when the writable side is piped into | ||
this.on('pipe', (ds_input) => { | ||
// input stream has encoding option; ensure stream encoding is utf8 | ||
if('function' === typeof ds_input.setEncoding) { | ||
ds_input.setEncoding('utf8'); | ||
} | ||
}); | ||
} | ||
// intercept pipe | ||
pipe(ds_out) { | ||
let ds_dst = ds_out; | ||
// non-object mode | ||
if(!ds_dst._writableState.objectMode) { | ||
// transform to JSON | ||
ds_out = stream.quads_to_json(); | ||
} | ||
// yet object mode and graphy writable | ||
else if(ds_out.isGraphyWritable) { | ||
// transform to quad-stream | ||
ds_out = stream.quads_to_writable(); | ||
} | ||
// interim stream created | ||
if(ds_out !== ds_dst) { | ||
// forward output to super | ||
super.pipe(ds_out); | ||
// pipe outpu to destination | ||
return ds_out.pipe(ds_dst); | ||
} | ||
// forward as-is to super | ||
else { | ||
return super.pipe(ds_dst); | ||
} | ||
} | ||
} | ||
class reader { | ||
@@ -69,4 +128,9 @@ constructor(g_config) { | ||
validate: b_validate, | ||
destroyed: false, | ||
}); | ||
// clean regex | ||
let r_clean = R_CLEAN; | ||
// validation | ||
@@ -92,5 +156,4 @@ Object.assign(this, b_validate | ||
.replace(R_ESCAPES, '$1$2') // no need to escape anything other than reserved characters | ||
.replace(/"/g, '\\"') // escape all quotes ;) | ||
+'"'): s_literal); | ||
.replace(/[\t"\f\u0008]/g, s => H_ESCAPES_JSON[s]) | ||
+'"'): s_literal); | ||
} | ||
@@ -111,5 +174,4 @@ catch(e_parse) { | ||
.replace(R_ESCAPES, '$1$2') // no need to escape anything other than reserved characters | ||
.replace(/"/g, '\\"') // escape all quotes ;) | ||
+'"'): s_literal), | ||
.replace(/[\t"\f\u0008]/g, s => H_ESCAPES_JSON[s]) | ||
+'"'): s_literal), | ||
}); | ||
@@ -124,9 +186,46 @@ | ||
// create transform | ||
ds_transform = this.transform = new stream.Transform({ | ||
// do not decode strings into buffers | ||
decodeStrings: false, | ||
ds_transform = this.transform = new NQuads_Reader({ | ||
// on data event | ||
transform: (s_chunk, s_encoding, fk_chunk) => { | ||
// first transform | ||
if(!b_init) { | ||
// notify that data will begin | ||
ds_transform.emit('ready'); | ||
// output quad objects on readable side | ||
readableObjectMode: true, | ||
// do not emit 'ready' event again | ||
b_init = false; | ||
} | ||
// concatenate current chunk to previous chunk | ||
let s = this.s += s_chunk; | ||
// remove whitespace & comments from beginning | ||
r_clean.lastIndex = 0; | ||
let m_clean = r_clean.exec(s); | ||
if(this.emit_comments) { | ||
this.emit_comments(m_clean[1]); | ||
} | ||
// update index and prepare to match statement | ||
this.i = r_clean.lastIndex; | ||
// cache chunk length | ||
this.n = s.length; | ||
// resume parsing | ||
try { | ||
this.safe_parse(true); | ||
} | ||
// read error occurred; emit and destroy stream | ||
catch(e_read) { | ||
return ds_transform.destroy(e_read); | ||
} | ||
// emit progress event updates | ||
ds_transform.emit('progress', s_chunk.length); | ||
// done transforming this chunk | ||
fk_chunk(); | ||
}, | ||
// once there's no more data to consume, invoke eof | ||
@@ -140,11 +239,14 @@ flush: (fk_flush) => { | ||
// parse safely | ||
this.safe_parse(); | ||
try { | ||
this.safe_parse(); | ||
} | ||
// read error occurred; pass to flush errback and exit method | ||
catch(e_read) { | ||
// destroying during flush means overriding push | ||
return ds_transform.demolish(e_read); | ||
} | ||
// still unparsed characters | ||
// still unparsed characters; pass to flush errback and exit method | ||
if(this.s.length) { | ||
// throw parse error | ||
fk_flush(new Error(`parsing error occurred in state: statement\n ${this.s.substr(0, 50)}\n ^ starting here`)); | ||
// exit method | ||
return; | ||
return ds_transform.demolish(new Error(`parsing error occurred in state: statement\n ${this.s.substr(0, 50)}\n ^ starting here`)); | ||
} | ||
@@ -165,49 +267,2 @@ } | ||
}, | ||
// on data event | ||
transform: (s_chunk, s_encoding, fk_chunk) => { | ||
// first transform | ||
if(!b_init) { | ||
// notify that data will begin | ||
ds_transform.emit('ready'); | ||
// do not emit 'ready' event again | ||
b_init = false; | ||
} | ||
// stream is paused | ||
if(this.n < 0) { | ||
return this.error('stream received new data while it was supposed to be paused!'); | ||
} | ||
// concatenate current chunk to previous chunk | ||
let s = this.s += s_chunk; | ||
// remove whitespace & comments from beginning | ||
R_CLEAN.lastIndex = 0; | ||
R_CLEAN.exec(s); | ||
// update index and prepare to match statement | ||
this.i = R_CLEAN.lastIndex; | ||
// cache chunk length | ||
this.n = s.length; | ||
// resume parsing | ||
this.safe_parse(true); | ||
// read error occurred | ||
if(this.read_error) { | ||
// destroy stream | ||
ds_transform.destroy(this.read_error); | ||
} | ||
// no errors | ||
else { | ||
// emit progress event updates | ||
ds_transform.emit('progress', s_chunk.length); | ||
// done transforming this chunk | ||
fk_chunk(); | ||
} | ||
}, | ||
}); | ||
@@ -218,23 +273,39 @@ | ||
// bind events to transform stream | ||
this.bind(g_config); | ||
// new listener added | ||
ds_transform.on('newListener', (s_event) => { | ||
// comment | ||
if('comment' === s_event) { | ||
r_clean = R_CLEAN_COMMENTS; | ||
this.emit_comments = (s_captured) => { | ||
if(!s_captured) return; | ||
let a_comments = s_captured.slice(1).replace(/\n\s+$/, '').split(/\n+\s*#/g); | ||
// notify once and never again | ||
ds_transform.once('pipe', (ds_input) => { | ||
// input stream has encoding option | ||
if(ds_input.setEncoding) { | ||
// ensure stream is encoding in utf8 | ||
ds_input.setEncoding('utf8'); | ||
for(let s_comment of a_comments) { | ||
ds_transform.emit('comment', s_comment); | ||
} | ||
}; | ||
} | ||
}); | ||
// input | ||
// bind events to transform stream | ||
this.bind(g_config); | ||
// input given | ||
if(g_input) { | ||
// input is stream | ||
if(g_input.stream) { | ||
g_input.stream.pipe(ds_transform); | ||
let ds_input = g_input.stream; | ||
// go async so caller has chance to bind event listeners | ||
setTimeout(() => { | ||
ds_input.pipe(ds_transform); | ||
}, 0); | ||
} | ||
// string | ||
else if('string' in g_input) { | ||
else if('string' === typeof g_input.string) { | ||
let s_input = g_input.string; | ||
// go async so caller has chance to bind event listeners | ||
setTimeout(() => { | ||
ds_transform.end(g_input.string, 'utf8'); | ||
ds_transform.end(s_input, 'utf8'); | ||
}, 0); | ||
@@ -244,3 +315,3 @@ } | ||
else { | ||
throw new TypeError('invalid input: '+('object' === typeof g_input? JSON.stringify(g_input): g_input)); | ||
throw new TypeError(`Invalid argument for input parameter: ${'object' === typeof g_input? JSON.stringify(g_input): g_input}`); | ||
} | ||
@@ -251,3 +322,4 @@ } | ||
_error(s_message) { | ||
this.read_error = new Error(s_message); | ||
this.destroyed = true; | ||
throw new Error(s_message); | ||
} | ||
@@ -260,6 +332,8 @@ | ||
if(g_config.error) ds_transform.on('error', g_config.error); | ||
if(g_config.comment) ds_transform.on('comment', g_config.comment); | ||
if(g_config.read) ds_transform.once('read', g_config.read); | ||
if(g_config.progress) ds_transform.on('progress', g_config.progress); | ||
if(g_config.eof) ds_transform.once('eof', g_config.eof); | ||
if(g_config.end) ds_transform.on('end', g_config.end); | ||
if(g_config.end) ds_transform.once('end', g_config.end); | ||
if(g_config.finish) ds_transform.once('finish', g_config.finish); | ||
if(g_config.data) ds_transform.on('data', g_config.data); | ||
@@ -331,2 +405,7 @@ } | ||
); | ||
// comments | ||
if(this.emit_comments) { | ||
this.emit_comments(m_statement_e_sp[11]); | ||
} | ||
} | ||
@@ -390,8 +469,23 @@ else { | ||
// match counter: 1 | ||
// comments | ||
if(this.emit_comments) { | ||
this.emit_comments(m_statement[11]); | ||
} | ||
} | ||
else { | ||
// prepare sticky regex index | ||
R_EOL.lastIndex = i; | ||
if(R_EOL.exec(s)) { | ||
// advance index | ||
this.i = R_EOL.lastIndex; | ||
this._error(`invalid statement:\n${this.s.substr(0, 50)}\n ^ starting here`); | ||
// match counter: 2 | ||
} | ||
else { | ||
// break loop to retry on next chunk if eos | ||
break; | ||
} | ||
break; | ||
} | ||
} // brace #2 | ||
} // brace #1 | ||
@@ -406,3 +500,3 @@ } // end of while | ||
const read = module.exports = function(...a_args) { | ||
module.exports = function(...a_args) { | ||
let g_config = {}; | ||
@@ -427,3 +521,3 @@ | ||
// config struct | ||
else if(z_arg_0 && 'object' === typeof z_arg_0 && '[object Object]' === FN_TO_STRING_OBJECT.call(z_arg_0)) { | ||
else if(z_arg_0 && 'object' === typeof z_arg_0 && '[object Object]' === Object.prototype.toString.call(z_arg_0)) { | ||
g_config = z_arg_0; | ||
@@ -430,0 +524,0 @@ |
{ | ||
"name": "@graphy/content.nq.read", | ||
"version": "3.0.6", | ||
"version": "3.1.0", | ||
"description": "Single-threaded RDF N-Quads content reader", | ||
@@ -26,6 +26,9 @@ "keywords": [ | ||
"dependencies": { | ||
"@graphy/core.data.factory": "^3.0.6", | ||
"@graphy/core.iso.stream": "^3.0.6", | ||
"@graphy/core.data.factory": "^3.1.0", | ||
"@graphy/core.iso.stream": "^3.1.0", | ||
"uri-js": "^4.2.2" | ||
}, | ||
"engines": { | ||
"node": ">=8.4.0" | ||
} | ||
} | ||
} |
16466
445