rdf-canonize
Advanced tools
Comparing version 3.4.0 to 4.0.0
@@ -11,5 +11,5 @@ /* | ||
* | ||
* @param prefix the prefix to use ('<prefix><counter>'). | ||
* @param existing an existing Map to use. | ||
* @param counter the counter to use. | ||
* @param {string} prefix - The prefix to use ('<prefix><counter>'). | ||
* @param {Map} [existing] - An existing Map to use. | ||
* @param {number} [counter] - The counter to use. | ||
*/ | ||
@@ -25,3 +25,3 @@ constructor(prefix, existing = new Map(), counter = 0) { | ||
* | ||
* @return a copy of this IdentifierIssuer. | ||
* @returns {object} - A copy of this IdentifierIssuer. | ||
*/ | ||
@@ -37,5 +37,5 @@ clone() { | ||
* | ||
* @param [old] the old identifier to get the new identifier for. | ||
* @param {string} [old] - The old identifier to get the new identifier for. | ||
* | ||
* @return the new identifier. | ||
* @returns {string} - The new identifier. | ||
*/ | ||
@@ -65,6 +65,6 @@ getId(old) { | ||
* | ||
* @param old the old identifier to check. | ||
* @param {string} old - The old identifier to check. | ||
* | ||
* @return true if the old identifier has been assigned a new identifier, | ||
* false if not. | ||
* @returns {boolean} - True if the old identifier has been assigned a new | ||
* identifier, false if not. | ||
*/ | ||
@@ -79,3 +79,4 @@ hasId(old) { | ||
* | ||
* @return the list of old IDs that has been issued new IDs in order. | ||
* @returns {Array} - The list of old IDs that has been issued new IDs in | ||
* order. | ||
*/ | ||
@@ -82,0 +83,0 @@ getOldIds() { |
173
lib/index.js
@@ -37,6 +37,4 @@ /** | ||
const URDNA2015 = require('./URDNA2015'); | ||
const URGNA2012 = require('./URGNA2012'); | ||
const URDNA2015Sync = require('./URDNA2015Sync'); | ||
const URGNA2012Sync = require('./URGNA2012Sync'); | ||
const RDFC10 = require('./RDFC10'); | ||
const RDFC10Sync = require('./RDFC10Sync'); | ||
@@ -49,7 +47,13 @@ // optional native support | ||
// return a dataset from input dataset or legacy dataset | ||
function _inputToDataset(input/*, options*/) { | ||
// back-compat with legacy dataset | ||
if(!Array.isArray(input)) { | ||
return exports.NQuads.legacyDatasetToQuads(input); | ||
// return a dataset from input dataset or n-quads | ||
function _inputToDataset(input, options) { | ||
if(options.inputFormat) { | ||
if(options.inputFormat === 'application/n-quads') { | ||
if(typeof input !== 'string') { | ||
throw new Error('N-Quads input must be a string.'); | ||
} | ||
return exports.NQuads.parse(input); | ||
} | ||
throw new Error( | ||
`Unknown canonicalization input format: "${options.inputFormat}".`); | ||
} | ||
@@ -59,2 +63,20 @@ return input; | ||
// check for valid output format | ||
function _checkOutputFormat(options) { | ||
// only N-Quads supported | ||
if(options.format) { | ||
if(options.format !== 'application/n-quads') { | ||
throw new Error( | ||
`Unknown canonicalization output format: "${options.format}".`); | ||
} | ||
} | ||
} | ||
// helper to trace URDNA2015 usage | ||
function _traceURDNA2015() { | ||
if(!!globalThis.RDF_CANONIZE_TRACE_URDNA2015) { | ||
console.trace('[rdf-canonize] URDNA2015 is deprecated, use RDFC-1.0'); | ||
} | ||
} | ||
// expose helpers | ||
@@ -67,5 +89,5 @@ exports.NQuads = require('./NQuads'); | ||
* | ||
* @param api the native API. | ||
* @param {object} [api] - The native API. | ||
* | ||
* @return the currently set native API. | ||
* @returns {object} - The currently set native API. | ||
*/ | ||
@@ -83,6 +105,5 @@ exports._rdfCanonizeNative = function(api) { | ||
* @param {Array|object|string} input - The input to canonize given as a | ||
* dataset or legacy dataset. | ||
* dataset or format specified by 'inputFormat' option. | ||
* @param {object} options - The options to use: | ||
* {string} algorithm - The canonicalization algorithm to use, `URDNA2015` or | ||
* `URGNA2012`. | ||
* {string} algorithm - The canonicalization algorithm to use, `RDFC-1.0`. | ||
* {Function} [createMessageDigest] - A factory function for creating a | ||
@@ -93,17 +114,43 @@ * `MessageDigest` interface that overrides the built-in message digest | ||
* the canonize algorithm will result in different output. | ||
* {string} [messageDigestAlgorithm=sha256] - Message digest algorithm used | ||
* by the default implementation of `createMessageDigest`. Supported | ||
* algorithms are: 'sha256', 'sha384', 'sha512', and the 'SHA###' and | ||
* 'SHA-###' variations. | ||
* {Map} [canonicalIdMap] - An optional Map to be populated by the canonical | ||
* identifier issuer with the bnode identifier mapping generated by the | ||
* canonicalization algorithm. | ||
* {string} [inputFormat] - The format of the input. Use | ||
* 'application/n-quads' for a N-Quads string that will be parsed. Omit or | ||
* falsy for a JSON dataset. | ||
* {string} [format] - The format of the output. Omit or use | ||
* 'application/n-quads' for a N-Quads string. | ||
* {boolean} [useNative=false] - Use native implementation. | ||
* {number} [maxDeepIterations=Infinity] - The maximum number of times to run | ||
* {number} [maxWorkFactor=1] - Control of the maximum number of times to run | ||
* deep comparison algorithms (such as the N-Degree Hash Quads algorithm | ||
* used in URDNA2015) before bailing out and throwing an error; this is a | ||
* used in RDFC-1.0) before bailing out and throwing an error; this is a | ||
* useful setting for preventing wasted CPU cycles or DoS when canonizing | ||
* meaningless or potentially malicious datasets, a recommended value is | ||
* `1`. | ||
* meaningless or potentially malicious datasets. This parameter sets the | ||
* maximum number of iterations based on the number of non-unique blank | ||
* nodes. `0` to disable iterations, `1` for a O(n) limit, `2` for a O(n^2) | ||
* limit, `3` and higher may handle "poison" graphs but may take | ||
* significant computational resources, `Infinity` for no limitation. | ||
* Defaults to `1` which can handle many common inputs. | ||
* {number} [maxDeepIterations=-1] - The maximum number of times to run | ||
* deep comparison algorithms (such as the N-Degree Hash Quads algorithm | ||
* used in RDFC-1.0) before bailing out and throwing an error; this is a | ||
* useful setting for preventing wasted CPU cycles or DoS when canonizing | ||
* meaningless or potentially malicious datasets. If set to a value other | ||
* than `-1` it will explicitly set the number of iterations and override | ||
* `maxWorkFactor`. It is recommended to use `maxWorkFactor`. | ||
* {AbortSignal} [signal] - An AbortSignal used to abort the operation. The | ||
* aborted status is only periodically checked for performance reasons. | ||
* {boolean} [rejectURDNA2015=false] - Reject the "URDNA2015" algorithm name | ||
* instead of treating it as an alias for "RDFC-1.0". | ||
* | ||
* @return a Promise that resolves to the canonicalized RDF Dataset. | ||
* @returns {Promise<object>} - A Promise that resolves to the canonicalized | ||
* RDF Dataset. | ||
*/ | ||
exports.canonize = async function(input, options) { | ||
exports.canonize = async function(input, options = {}) { | ||
const dataset = _inputToDataset(input, options); | ||
_checkOutputFormat(options); | ||
@@ -123,15 +170,13 @@ if(options.useNative) { | ||
if(options.algorithm === 'URDNA2015') { | ||
return new URDNA2015(options).main(dataset); | ||
} | ||
if(options.algorithm === 'URGNA2012') { | ||
if(options.createMessageDigest) { | ||
throw new Error( | ||
'"createMessageDigest" cannot be used with "URGNA2012".'); | ||
} | ||
return new URGNA2012(options).main(dataset); | ||
} | ||
if(!('algorithm' in options)) { | ||
throw new Error('No RDF Dataset Canonicalization algorithm specified.'); | ||
} | ||
if(options.algorithm === 'RDFC-1.0') { | ||
return new RDFC10(options).main(dataset); | ||
} | ||
// URDNA2015 deprecated, handled as alias for RDFC-1.0 if allowed | ||
if(options.algorithm === 'URDNA2015' && !options.rejectURDNA2015) { | ||
_traceURDNA2015(); | ||
return new RDFC10(options).main(dataset); | ||
} | ||
throw new Error( | ||
@@ -147,6 +192,5 @@ 'Invalid RDF Dataset Canonicalization algorithm: ' + options.algorithm); | ||
* @param {Array|object|string} input - The input to canonize given as a | ||
* dataset or legacy dataset. | ||
* dataset or format specified by 'inputFormat' option. | ||
* @param {object} options - The options to use: | ||
* {string} algorithm - The canonicalization algorithm to use, `URDNA2015` or | ||
* `URGNA2012`. | ||
* {string} algorithm - The canonicalization algorithm to use, `RDFC-1.0`. | ||
* {Function} [createMessageDigest] - A factory function for creating a | ||
@@ -157,14 +201,47 @@ * `MessageDigest` interface that overrides the built-in message digest | ||
* the canonize algorithm will result in different output. | ||
* {string} [messageDigestAlgorithm=sha256] - Message digest algorithm used | ||
* by the default implementation of `createMessageDigest`. Supported | ||
* algorithms are: 'sha256', 'sha384', 'sha512', and the 'SHA###' and | ||
* 'SHA-###' variations. | ||
* {Map} [canonicalIdMap] - An optional Map to be populated by the canonical | ||
* identifier issuer with the bnode identifier mapping generated by the | ||
* canonicalization algorithm. | ||
* {string} [inputFormat] - The format of the input. Use | ||
* 'application/n-quads' for a N-Quads string that will be parsed. Omit or | ||
* falsy for a JSON dataset. | ||
* {string} [format] - The format of the output. Omit or use | ||
* 'application/n-quads' for a N-Quads string. | ||
* {boolean} [useNative=false] - Use native implementation. | ||
* {number} [maxDeepIterations=Infinity] - The maximum number of times to run | ||
* {number} [maxWorkFactor=1] - Control of the maximum number of times to run | ||
* deep comparison algorithms (such as the N-Degree Hash Quads algorithm | ||
* used in URDNA2015) before bailing out and throwing an error; this is a | ||
* used in RDFC-1.0) before bailing out and throwing an error; this is a | ||
* useful setting for preventing wasted CPU cycles or DoS when canonizing | ||
* meaningless or potentially malicious datasets, a recommended value is | ||
* `1`. | ||
* meaningless or potentially malicious datasets. This parameter sets the | ||
* maximum number of iterations based on the number of non-unique blank | ||
* nodes. `0` to disable iterations, `1` for a O(n) limit, `2` for a O(n^2) | ||
* limit, `3` and higher may handle "poison" graphs but may take | ||
* significant computational resources, `Infinity` for no limitation. | ||
* Defaults to `1` which can handle many common inputs. | ||
* {number} [maxDeepIterations=-1] - The maximum number of times to run | ||
* deep comparison algorithms (such as the N-Degree Hash Quads algorithm | ||
* used in RDFC-1.0) before bailing out and throwing an error; this is a | ||
* useful setting for preventing wasted CPU cycles or DoS when canonizing | ||
* meaningless or potentially malicious datasets. If set to a value other | ||
* than `-1` it will explicitly set the number of iterations and override | ||
* `maxWorkFactor`. It is recommended to use `maxWorkFactor`. | ||
* {number} [timeout=1000] - The maximum number of milliseconds before the | ||
* operation will timeout. This is only periodically checked for | ||
* performance reasons. Use 0 to disable. Note: This is a replacement for | ||
* the async canonize `signal` option common timeout use case. If complex | ||
* abort logic is required, use the async function and the `signal` | ||
* parameter. | ||
* {boolean} [rejectURDNA2015=false] - Reject the "URDNA2015" algorithm name | ||
* instead of treating it as an alias for "RDFC-1.0". | ||
* | ||
* @return the RDF dataset in canonical form. | ||
* @returns {Promise<object>} - A Promise that resolves to the canonicalized | ||
* RDF Dataset. | ||
*/ | ||
exports._canonizeSync = function(input, options) { | ||
exports._canonizeSync = function(input, options = {}) { | ||
const dataset = _inputToDataset(input, options); | ||
_checkOutputFormat(options); | ||
@@ -181,17 +258,15 @@ if(options.useNative) { | ||
} | ||
if(options.algorithm === 'URDNA2015') { | ||
return new URDNA2015Sync(options).main(dataset); | ||
} | ||
if(options.algorithm === 'URGNA2012') { | ||
if(options.createMessageDigest) { | ||
throw new Error( | ||
'"createMessageDigest" cannot be used with "URGNA2012".'); | ||
} | ||
return new URGNA2012Sync(options).main(dataset); | ||
} | ||
if(!('algorithm' in options)) { | ||
throw new Error('No RDF Dataset Canonicalization algorithm specified.'); | ||
} | ||
if(options.algorithm === 'RDFC-1.0') { | ||
return new RDFC10Sync(options).main(dataset); | ||
} | ||
// URDNA2015 deprecated, handled as alias for RDFC-1.0 if allowed | ||
if(options.algorithm === 'URDNA2015' && !options.rejectURDNA2015) { | ||
_traceURDNA2015(); | ||
return new RDFC10Sync(options).main(dataset); | ||
} | ||
throw new Error( | ||
'Invalid RDF Dataset Canonicalization algorithm: ' + options.algorithm); | ||
}; |
/* | ||
* Copyright (c) 2016-2021 Digital Bazaar, Inc. All rights reserved. | ||
* Copyright (c) 2016-2023 Digital Bazaar, Inc. All rights reserved. | ||
*/ | ||
@@ -8,2 +8,14 @@ 'use strict'; | ||
const algorithmMap = new Map([ | ||
['sha256', 'sha256'], | ||
['SHA256', 'sha256'], | ||
['SHA-256', 'sha256'], | ||
['sha384', 'sha384'], | ||
['SHA384', 'sha384'], | ||
['SHA-384', 'sha384'], | ||
['sha512', 'sha512'], | ||
['SHA512', 'sha512'], | ||
['SHA-512', 'sha512'], | ||
]); | ||
module.exports = class MessageDigest { | ||
@@ -13,6 +25,9 @@ /** | ||
* | ||
* @param algorithm the algorithm to use. | ||
* @param {string} algorithm - The algorithm to use. | ||
*/ | ||
constructor(algorithm) { | ||
this.md = crypto.createHash(algorithm); | ||
if(!algorithmMap.has(algorithm)) { | ||
throw new Error(`Unsupported algorithm "${algorithm}".`); | ||
} | ||
this.md = crypto.createHash(algorithmMap.get(algorithm)); | ||
} | ||
@@ -24,2 +39,4 @@ | ||
// async code awaits this but it is not async to support | ||
// the sync code | ||
digest() { | ||
@@ -26,0 +43,0 @@ return this.md.digest('hex'); |
@@ -20,3 +20,3 @@ /*! | ||
(() => { | ||
const iri = '(?:<([^:]+:[^>]*)>)'; | ||
// https://www.w3.org/TR/n-quads/#sec-grammar | ||
// https://www.w3.org/TR/turtle/#grammar-production-BLANK_NODE_LABEL | ||
@@ -49,9 +49,17 @@ const PN_CHARS_BASE = | ||
const BLANK_NODE_LABEL = | ||
'(_:' + | ||
'_:(' + | ||
'(?:[' + PN_CHARS_U + '0-9])' + | ||
'(?:(?:[' + PN_CHARS + '.])*(?:[' + PN_CHARS + ']))?' + | ||
')'; | ||
// Older simple regex: const IRI = '(?:<([^:]+:[^>]*)>)'; | ||
const UCHAR4 = '\\\\u[0-9A-Fa-f]{4}'; | ||
const UCHAR8 = '\\\\U[0-9A-Fa-f]{8}'; | ||
const IRI = '(?:<((?:' + | ||
'[^\u0000-\u0020<>"{}|^`\\\\]' + '|' + | ||
UCHAR4 + '|' + | ||
UCHAR8 + | ||
')*)>)'; | ||
const bnode = BLANK_NODE_LABEL; | ||
const plain = '"([^"\\\\]*(?:\\\\.[^"\\\\]*)*)"'; | ||
const datatype = '(?:\\^\\^' + iri + ')'; | ||
const datatype = '(?:\\^\\^' + IRI + ')'; | ||
const language = '(?:@([a-zA-Z]+(?:-[a-zA-Z0-9]+)*))'; | ||
@@ -63,6 +71,6 @@ const literal = '(?:' + plain + '(?:' + datatype + '|' + language + ')?)'; | ||
// define quad part regexes | ||
const subject = '(?:' + iri + '|' + bnode + ')' + ws; | ||
const property = iri + ws; | ||
const object = '(?:' + iri + '|' + bnode + '|' + literal + ')' + wso; | ||
const graphName = '(?:\\.|(?:(?:' + iri + '|' + bnode + ')' + wso + '\\.))'; | ||
const subject = '(?:' + IRI + '|' + bnode + ')' + ws; | ||
const property = IRI + ws; | ||
const object = '(?:' + IRI + '|' + bnode + '|' + literal + ')' + wso; | ||
const graphName = '(?:\\.|(?:(?:' + IRI + '|' + bnode + ')' + wso + '\\.))'; | ||
@@ -82,5 +90,6 @@ // end of line and empty regexes | ||
* | ||
* @param input the N-Quads input to parse. | ||
* @param {string} input - The N-Quads input to parse. | ||
* | ||
* @return an RDF dataset (an array of quads per http://rdf.js.org/). | ||
* @returns {Array} - An RDF dataset (an array of quads per | ||
* https://rdf.js.org/). | ||
*/ | ||
@@ -115,15 +124,30 @@ static parse(input) { | ||
if(match[1] !== undefined) { | ||
quad.subject = {termType: TYPE_NAMED_NODE, value: match[1]}; | ||
quad.subject = { | ||
termType: TYPE_NAMED_NODE, | ||
value: _iriUnescape(match[1]) | ||
}; | ||
} else { | ||
quad.subject = {termType: TYPE_BLANK_NODE, value: match[2]}; | ||
quad.subject = { | ||
termType: TYPE_BLANK_NODE, | ||
value: match[2] | ||
}; | ||
} | ||
// get predicate | ||
quad.predicate = {termType: TYPE_NAMED_NODE, value: match[3]}; | ||
quad.predicate = { | ||
termType: TYPE_NAMED_NODE, | ||
value: _iriUnescape(match[3]) | ||
}; | ||
// get object | ||
if(match[4] !== undefined) { | ||
quad.object = {termType: TYPE_NAMED_NODE, value: match[4]}; | ||
quad.object = { | ||
termType: TYPE_NAMED_NODE, | ||
value: _iriUnescape(match[4]) | ||
}; | ||
} else if(match[5] !== undefined) { | ||
quad.object = {termType: TYPE_BLANK_NODE, value: match[5]}; | ||
quad.object = { | ||
termType: TYPE_BLANK_NODE, | ||
value: match[5] | ||
}; | ||
} else { | ||
@@ -138,3 +162,3 @@ quad.object = { | ||
if(match[7] !== undefined) { | ||
quad.object.datatype.value = match[7]; | ||
quad.object.datatype.value = _iriUnescape(match[7]); | ||
} else if(match[8] !== undefined) { | ||
@@ -146,3 +170,3 @@ quad.object.datatype.value = RDF_LANGSTRING; | ||
} | ||
quad.object.value = _unescape(match[6]); | ||
quad.object.value = _stringLiteralUnescape(match[6]); | ||
} | ||
@@ -154,3 +178,3 @@ | ||
termType: TYPE_NAMED_NODE, | ||
value: match[9] | ||
value: _iriUnescape(match[9]) | ||
}; | ||
@@ -195,10 +219,7 @@ } else if(match[10] !== undefined) { | ||
* | ||
* @param dataset (array of quads) the RDF dataset to convert. | ||
* @param {Array} dataset - The Array of quads RDF dataset to convert. | ||
* | ||
* @return the N-Quads string. | ||
* @returns {string} - The N-Quads string. | ||
*/ | ||
static serialize(dataset) { | ||
if(!Array.isArray(dataset)) { | ||
dataset = NQuads.legacyDatasetToQuads(dataset); | ||
} | ||
const quads = []; | ||
@@ -214,8 +235,8 @@ for(const quad of dataset) { | ||
* | ||
* @param {Object} s - N-Quad subject component. | ||
* @param {Object} p - N-Quad predicate component. | ||
* @param {Object} o - N-Quad object component. | ||
* @param {Object} g - N-Quad graph component. | ||
* @param {object} s - N-Quad subject component. | ||
* @param {object} p - N-Quad predicate component. | ||
* @param {object} o - N-Quad object component. | ||
* @param {object} g - N-Quad graph component. | ||
* | ||
* @return {string} the N-Quad. | ||
* @returns {string} - The N-Quad. | ||
*/ | ||
@@ -227,17 +248,21 @@ static serializeQuadComponents(s, p, o, g) { | ||
if(s.termType === TYPE_NAMED_NODE) { | ||
nquad += `<${s.value}>`; | ||
nquad += `<${_iriEscape(s.value)}>`; | ||
} else { | ||
nquad += `${s.value}`; | ||
nquad += `_:${s.value}`; | ||
} | ||
// predicate can only be NamedNode | ||
nquad += ` <${p.value}> `; | ||
// predicate normally a NamedNode, can be a BlankNode in generalized RDF | ||
if(p.termType === TYPE_NAMED_NODE) { | ||
nquad += ` <${_iriEscape(p.value)}> `; | ||
} else { | ||
nquad += ` _:${p.value} `; | ||
} | ||
// object is NamedNode, BlankNode, or Literal | ||
if(o.termType === TYPE_NAMED_NODE) { | ||
nquad += `<${o.value}>`; | ||
nquad += `<${_iriEscape(o.value)}>`; | ||
} else if(o.termType === TYPE_BLANK_NODE) { | ||
nquad += o.value; | ||
nquad += `_:${o.value}`; | ||
} else { | ||
nquad += `"${_escape(o.value)}"`; | ||
nquad += `"${_stringLiteralEscape(o.value)}"`; | ||
if(o.datatype.value === RDF_LANGSTRING) { | ||
@@ -248,3 +273,3 @@ if(o.language) { | ||
} else if(o.datatype.value !== XSD_STRING) { | ||
nquad += `^^<${o.datatype.value}>`; | ||
nquad += `^^<${_iriEscape(o.datatype.value)}>`; | ||
} | ||
@@ -256,5 +281,5 @@ } | ||
if(g.termType === TYPE_NAMED_NODE) { | ||
nquad += ` <${g.value}>`; | ||
nquad += ` <${_iriEscape(g.value)}>`; | ||
} else if(g.termType === TYPE_BLANK_NODE) { | ||
nquad += ` ${g.value}`; | ||
nquad += ` _:${g.value}`; | ||
} | ||
@@ -269,5 +294,5 @@ | ||
* | ||
* @param quad the RDF quad convert. | ||
* @param {object} quad - The RDF quad convert. | ||
* | ||
* @return the N-Quad string. | ||
* @returns {string} - The N-Quad string. | ||
*/ | ||
@@ -278,66 +303,2 @@ static serializeQuad(quad) { | ||
} | ||
/** | ||
* Converts a legacy-formatted dataset to an array of quads dataset per | ||
* http://rdf.js.org/. | ||
* | ||
* @param dataset the legacy dataset to convert. | ||
* | ||
* @return the array of quads dataset. | ||
*/ | ||
static legacyDatasetToQuads(dataset) { | ||
const quads = []; | ||
const termTypeMap = { | ||
'blank node': TYPE_BLANK_NODE, | ||
IRI: TYPE_NAMED_NODE, | ||
literal: TYPE_LITERAL | ||
}; | ||
for(const graphName in dataset) { | ||
const triples = dataset[graphName]; | ||
triples.forEach(triple => { | ||
const quad = {}; | ||
for(const componentName in triple) { | ||
const oldComponent = triple[componentName]; | ||
const newComponent = { | ||
termType: termTypeMap[oldComponent.type], | ||
value: oldComponent.value | ||
}; | ||
if(newComponent.termType === TYPE_LITERAL) { | ||
newComponent.datatype = { | ||
termType: TYPE_NAMED_NODE | ||
}; | ||
if('datatype' in oldComponent) { | ||
newComponent.datatype.value = oldComponent.datatype; | ||
} | ||
if('language' in oldComponent) { | ||
if(!('datatype' in oldComponent)) { | ||
newComponent.datatype.value = RDF_LANGSTRING; | ||
} | ||
newComponent.language = oldComponent.language; | ||
} else if(!('datatype' in oldComponent)) { | ||
newComponent.datatype.value = XSD_STRING; | ||
} | ||
} | ||
quad[componentName] = newComponent; | ||
} | ||
if(graphName === '@default') { | ||
quad.graph = { | ||
termType: TYPE_DEFAULT_GRAPH, | ||
value: '' | ||
}; | ||
} else { | ||
quad.graph = { | ||
termType: graphName.startsWith('_:') ? | ||
TYPE_BLANK_NODE : TYPE_NAMED_NODE, | ||
value: graphName | ||
}; | ||
} | ||
quads.push(quad); | ||
}); | ||
} | ||
return quads; | ||
} | ||
}; | ||
@@ -348,6 +309,6 @@ | ||
* | ||
* @param t1 the first triple. | ||
* @param t2 the second triple. | ||
* @param {object} t1 - The first triple. | ||
* @param {object} t2 - The second triple. | ||
* | ||
* @return true if the triples are the same, false if not. | ||
* @returns {boolean} - True if the triples are the same, false if not. | ||
*/ | ||
@@ -377,31 +338,60 @@ function _compareTriples(t1, t2) { | ||
const _escapeRegex = /["\\\n\r]/g; | ||
const _stringLiteralEscapeRegex = /[\u0000-\u001F\u007F"\\]/g; | ||
const _stringLiteralEscapeMap = []; | ||
for(let n = 0; n <= 0x7f; ++n) { | ||
if(_stringLiteralEscapeRegex.test(String.fromCharCode(n))) { | ||
// default UCHAR mapping | ||
_stringLiteralEscapeMap[n] = | ||
'\\u' + n.toString(16).toUpperCase().padStart(4, '0'); | ||
// reset regex | ||
_stringLiteralEscapeRegex.lastIndex = 0; | ||
} | ||
} | ||
// special ECHAR mappings | ||
_stringLiteralEscapeMap['\b'.codePointAt(0)] = '\\b'; | ||
_stringLiteralEscapeMap['\t'.codePointAt(0)] = '\\t'; | ||
_stringLiteralEscapeMap['\n'.codePointAt(0)] = '\\n'; | ||
_stringLiteralEscapeMap['\f'.codePointAt(0)] = '\\f'; | ||
_stringLiteralEscapeMap['\r'.codePointAt(0)] = '\\r'; | ||
_stringLiteralEscapeMap['"' .codePointAt(0)] = '\\"'; | ||
_stringLiteralEscapeMap['\\'.codePointAt(0)] = '\\\\'; | ||
/** | ||
* Escape string to N-Quads literal | ||
* Escape string to N-Quads literal. | ||
* | ||
* @param {string} s - String to escape. | ||
* | ||
* @returns {string} - Escaped N-Quads literal. | ||
*/ | ||
function _escape(s) { | ||
return s.replace(_escapeRegex, function(match) { | ||
switch(match) { | ||
case '"': return '\\"'; | ||
case '\\': return '\\\\'; | ||
case '\n': return '\\n'; | ||
case '\r': return '\\r'; | ||
} | ||
function _stringLiteralEscape(s) { | ||
if(!_stringLiteralEscapeRegex.test(s)) { | ||
return s; | ||
} | ||
return s.replace(_stringLiteralEscapeRegex, function(match) { | ||
return _stringLiteralEscapeMap[match.codePointAt(0)]; | ||
}); | ||
} | ||
const _unescapeRegex = | ||
/(?:\\([tbnrf"'\\]))|(?:\\u([0-9A-Fa-f]{4}))|(?:\\U([0-9A-Fa-f]{8}))/g; | ||
const _stringLiteralUnescapeRegex = | ||
/(?:\\([btnfr"'\\]))|(?:\\u([0-9A-Fa-f]{4}))|(?:\\U([0-9A-Fa-f]{8}))/g; | ||
/** | ||
* Unescape N-Quads literal to string | ||
* Unescape N-Quads literal to string. | ||
* | ||
* @param {string} s - String to unescape. | ||
* | ||
* @returns {string} - Unescaped N-Quads literal. | ||
*/ | ||
function _unescape(s) { | ||
return s.replace(_unescapeRegex, function(match, code, u, U) { | ||
function _stringLiteralUnescape(s) { | ||
if(!_stringLiteralUnescapeRegex.test(s)) { | ||
return s; | ||
} | ||
return s.replace(_stringLiteralUnescapeRegex, function(match, code, u, U) { | ||
if(code) { | ||
switch(code) { | ||
case 'b': return '\b'; | ||
case 't': return '\t'; | ||
case 'b': return '\b'; | ||
case 'n': return '\n'; | ||
case 'f': return '\f'; | ||
case 'r': return '\r'; | ||
case 'f': return '\f'; | ||
case '"': return '"'; | ||
@@ -416,6 +406,57 @@ case '\'': return '\''; | ||
if(U) { | ||
// FIXME: support larger values | ||
throw new Error('Unsupported U escape'); | ||
return String.fromCodePoint(parseInt(U, 16)); | ||
} | ||
}); | ||
} | ||
const _iriEscapeRegex = /[\u0000-\u0020<>"{}|^`\\]/g; | ||
const _iriEscapeRegexMap = []; | ||
for(let n = 0; n <= 0x7f; ++n) { | ||
if(_iriEscapeRegex.test(String.fromCharCode(n))) { | ||
// UCHAR mapping | ||
_iriEscapeRegexMap[n] = | ||
'\\u' + n.toString(16).toUpperCase().padStart(4, '0'); | ||
// reset regex | ||
_iriEscapeRegex.lastIndex = 0; | ||
} | ||
} | ||
/** | ||
* Escape IRI to N-Quads IRI. | ||
* | ||
* @param {string} s - IRI to escape. | ||
* | ||
* @returns {string} - Escaped N-Quads IRI. | ||
*/ | ||
function _iriEscape(s) { | ||
if(!_iriEscapeRegex.test(s)) { | ||
return s; | ||
} | ||
return s.replace(_iriEscapeRegex, function(match) { | ||
return _iriEscapeRegexMap[match.codePointAt(0)]; | ||
}); | ||
} | ||
const _iriUnescapeRegex = | ||
/(?:\\u([0-9A-Fa-f]{4}))|(?:\\U([0-9A-Fa-f]{8}))/g; | ||
/** | ||
* Unescape N-Quads IRI to IRI. | ||
* | ||
* @param {string} s - IRI to unescape. | ||
* | ||
* @returns {string} - Unescaped N-Quads IRI. | ||
*/ | ||
function _iriUnescape(s) { | ||
if(!_iriUnescapeRegex.test(s)) { | ||
return s; | ||
} | ||
return s.replace(_iriUnescapeRegex, function(match, u, U) { | ||
if(u) { | ||
return String.fromCharCode(parseInt(u, 16)); | ||
} | ||
if(U) { | ||
return String.fromCodePoint(parseInt(U, 16)); | ||
} | ||
}); | ||
} |
@@ -11,3 +11,3 @@ /*! | ||
* | ||
* @param list the array of elements to iterate over. | ||
* @param {Array} list - The array of elements to iterate over. | ||
*/ | ||
@@ -29,3 +29,3 @@ constructor(list) { | ||
* | ||
* @return true if there is another permutation, false if not. | ||
* @returns {boolean} - True if there is another permutation, false if not. | ||
*/ | ||
@@ -40,3 +40,3 @@ hasNext() { | ||
* | ||
* @return the next permutation. | ||
* @returns {any} - The next permutation. | ||
*/ | ||
@@ -43,0 +43,0 @@ next() { |
{ | ||
"name": "rdf-canonize", | ||
"version": "3.4.0", | ||
"description": "An implementation of the RDF Dataset Normalization Algorithm in JavaScript", | ||
"version": "4.0.0", | ||
"description": "An implementation of the RDF Dataset Canonicalization algorithm in JavaScript", | ||
"homepage": "https://github.com/digitalbazaar/rdf-canonize", | ||
@@ -33,21 +33,39 @@ "author": { | ||
"benchmark": "^2.1.4", | ||
"chai": "^4.2.0", | ||
"delay": "^5.0.0", | ||
"eslint": "^7.23.0", | ||
"eslint-config-digitalbazaar": "^2.6.1", | ||
"mocha": "^8.3.2", | ||
"browserify": "^17.0.0", | ||
"chai": "^4.3.10", | ||
"envify": "^4.1.0", | ||
"eslint": "^8.53.0", | ||
"eslint-config-digitalbazaar": "^5.0.1", | ||
"eslint-plugin-jsdoc": "^46.9.0", | ||
"esmify": "^2.1.1", | ||
"fs-extra": "^11.1.1", | ||
"join-path-js": "^0.0.0", | ||
"karma": "^6.4.2", | ||
"karma-babel-preprocessor": "^8.0.2", | ||
"karma-browserify": "^8.1.0", | ||
"karma-chrome-launcher": "^3.2.0", | ||
"karma-mocha": "^2.0.1", | ||
"karma-mocha-reporter": "^2.2.5", | ||
"karma-server-side": "github:fargies/karma-server-side#9397553473fcbc2aaabb7dc9f59e96f9ff26791c", | ||
"karma-sourcemap-loader": "^0.4.0", | ||
"karma-webpack": "^5.0.0", | ||
"klona": "^2.0.6", | ||
"mocha": "^10.2.0", | ||
"mocha-lcov-reporter": "^1.3.0", | ||
"nsolid": "0.0.0", | ||
"nyc": "^15.1.0" | ||
"nyc": "^15.1.0", | ||
"webpack": "^5.89.0" | ||
}, | ||
"engines": { | ||
"node": ">=12" | ||
"node": ">=18" | ||
}, | ||
"keywords": [ | ||
"JSON", | ||
"JSON-LD", | ||
"Linked Data", | ||
"JSON-LD", | ||
"RDF", | ||
"RDF Dataset Canonicalization", | ||
"Semantic Web", | ||
"jsonld" | ||
"jsonld", | ||
"rdf-canon" | ||
], | ||
@@ -57,13 +75,27 @@ "scripts": { | ||
"test": "npm run test-node", | ||
"test-node": "NODE_ENV=test mocha -R spec --check-leaks", | ||
"test-node": "NODE_ENV=test mocha --delay -A -R spec --check-leaks test/test-node.js", | ||
"test-karma": "NODE_ENV=test karma start", | ||
"benchmark": "node benchmark/benchmark.js", | ||
"coverage": "NODE_ENV=test nyc --reporter=lcov --reporter=text-summary npm test", | ||
"coverage-ci": "NODE_ENV=test nyc --reporter=lcovonly npm run test", | ||
"coverage": "NODE_ENV=test nyc npm test", | ||
"coverage-ci": "NODE_ENV=test nyc --reporter=lcovonly --reporter=text-summary --reporter=text npm run test", | ||
"coverage-report": "nyc report", | ||
"lint": "eslint '*.js' 'lib/*.js' 'test/*.js' 'benchmark/*.js'" | ||
"lint": "eslint ." | ||
}, | ||
"browser": { | ||
"./lib/MessageDigest.js": "./lib/MessageDigest-browser.js", | ||
"./lib/MessageDigest.js": "./lib/MessageDigest-webcrypto.js", | ||
"./lib/platform.js": "./lib/platform-browser.js", | ||
"rdf-canonize-native": false | ||
}, | ||
"react-native": { | ||
"./lib/MessageDigest.js": "./lib/MessageDigest-webcrypto.js", | ||
"./lib/platform.js": "./lib/platform-browser.js", | ||
"rdf-canonize-native": false | ||
}, | ||
"nyc": { | ||
"reporter": [ | ||
"lcov", | ||
"text-summary", | ||
"text" | ||
] | ||
} | ||
} |
171
README.md
# rdf-canonize | ||
[![Build status](https://img.shields.io/github/workflow/status/digitalbazaar/rdf-canonize/Node.js%20CI)](https://github.com/digitalbazaar/rdf-canonize/actions?query=workflow%3A%22Node.js+CI%22) | ||
[![Build status](https://img.shields.io/github/actions/workflow/status/digitalbazaar/rdf-canonize/main.yml)](https://github.com/digitalbazaar/rdf-canonize/actions/workflows/main.yml) | ||
[![Coverage status](https://img.shields.io/codecov/c/github/digitalbazaar/rdf-canonize)](https://codecov.io/gh/digitalbazaar/rdf-canonize) | ||
[![Dependency Status](https://img.shields.io/david/digitalbazaar/rdf-canonize.svg)](https://david-dm.org/digitalbazaar/rdf-canonize) | ||
An implementation of the [RDF Dataset Canonicalization Algorithm][] in JavaScript. | ||
An implementation of the [RDF Dataset Canonicalization][] specification in | ||
JavaScript. | ||
@@ -12,3 +12,4 @@ Introduction | ||
... | ||
See the [RDF Dataset Canonicalization][] specification for details on the | ||
specification and algorithm this library implements. | ||
@@ -32,3 +33,3 @@ Installation | ||
useful if your application requires doing many canonizing operations | ||
asyncronously in parallel or in the background. It is **highly recommended** | ||
asynchronously in parallel or in the background. It is **highly recommended** | ||
that you understand your requirements and benchmark using JavaScript vs native | ||
@@ -55,3 +56,3 @@ bindings. The native bindings add overhead and the JavaScript implementation | ||
Install in your project with npm and use your favorite browser bundler tool. | ||
Install in your project with `npm` and use your favorite browser bundler tool. | ||
@@ -62,17 +63,129 @@ Examples | ||
```js | ||
const dataset = { | ||
// canonize a dataset with the default algorithm | ||
const dataset = [ | ||
// ... | ||
}; | ||
]; | ||
const canonical = await canonize.canonize(dataset, {algorithm: 'RDFC-1.0'}); | ||
// canonize a data set with a particular algorithm with async/await | ||
const canonical = await canonize.canonize(dataset, {algorithm: 'URDNA2015'}); | ||
// parse and canonize N-Quads with the default algorithm | ||
// canonize a data set with a particular algorithm and force use of the | ||
// native implementation | ||
const canonical = await canonize.canonize(dataset, { | ||
algorithm: 'URDNA2015', | ||
useNative: true | ||
const nquads = "..."; | ||
const canonical = await canonize.canonize(nquads, { | ||
algorithm: 'RDFC-1.0', | ||
inputFormat: 'application/n-quads' | ||
}); | ||
``` | ||
### Using with React Native | ||
Using this library with React Native requires a polyfill such as | ||
[`data-integrity-rn`](https://github.com/digitalcredentials/data-integrity-rn) | ||
to be imported before this library: | ||
```js | ||
import '@digitalcredentials/data-integrity-rn' | ||
import * as canonize from 'rdf-canonize' | ||
``` | ||
The polyfill needs to provide the following globals: | ||
* `crypto.subtle` | ||
* `TextEncoder` | ||
Algorithm Support | ||
----------------- | ||
* "[RDFC-1.0][]": Supported. | ||
* Primary algorithm in the [RDF Dataset Canonicalization][] specification. | ||
* "[URDNA2015][]": Deprecated and supported as an alias for "RDFC-1.0". | ||
* Former algorithm name that evolved into "RDFC-1.0". | ||
* **NOTE**: There are minor differences in the [canonical N-Quads | ||
form](https://w3c.github.io/rdf-canon/spec/#canonical-quads) that *could* | ||
cause canonical output differences in some cases. See the 4.0.0 changelog | ||
or code for details. If strict "URDNA2015" support is required, use a 3.x | ||
version of this library. | ||
* See the migration section below if you have code that uses the "URDNA2015" | ||
algorithm name. | ||
* "[URGNA2012][]": No longer supported. | ||
* Older algorithm with significant differences from newer algorithms. | ||
* Use older versions of this library if support is needed. | ||
URDNA2015 Migration | ||
------------------- | ||
* The deprecated "URDNA2015" algorithm name is currently supported as an alias | ||
for "RDFC-1.0". | ||
* There is a minor difference that could cause compatibility issues. It is | ||
considered an edge case that will not be an issue in practice. See above for | ||
details. | ||
* Two tools are currently provided to help transition to "RDFC-1.0": | ||
* If the API option `rejectURDNA2015` is truthy, it will cause an error to be | ||
thrown if "URDNA2015" is used. | ||
* If the global `RDF_CANONIZE_TRACE_URDNA2015` is truthy, it will cause | ||
`console.trace()` to be called when "URDNA2015" is used. This is designed | ||
for *development use only* to find where "URDNA2015" is being used. It | ||
could be *very* verbose. | ||
Complexity Control | ||
------------------ | ||
Inputs may vary in complexity and some inputs may use more computational | ||
resources than desired. There also exists a class of inputs that are sometimes | ||
referred to as "poison" graphs. These are structured or designed specifically | ||
to be difficult to process but often do not provide any useful purpose. | ||
### Signals | ||
The `canonize` API accepts an | ||
[`AbortSignal`](https://developer.mozilla.org/en-US/docs/Web/API/AbortSignal) | ||
as the `signal` parameter that can be used to control processing of | ||
computationally difficult inputs. `signal` is not set by default. It can be | ||
used in a number of ways: | ||
- Abort processing manually with | ||
[`AbortController.abort()`](https://developer.mozilla.org/en-US/docs/Web/API/AbortController/abort) | ||
- Abort processing after a timeout with | ||
[`AbortSignal.timeout()`](https://developer.mozilla.org/en-US/docs/Web/API/AbortSignal/timeout_static) | ||
- Abort after any other desired condition with a custom `AbortSignal`. This | ||
could track memory pressure or system load. | ||
- A combination of conditions with an aggregated `AbortSignal` such as with | ||
[`AbortSignal.any()`](https://github.com/shaseley/abort-signal-any/) or | ||
[signals](https://github.com/toebeann/signals). | ||
For performance reasons this signal is only checked periodically during | ||
processing and is not immediate. | ||
### Limits | ||
The `canonize` API has parameters to limit how many times the blank node deep | ||
comparison algorithm can be run to assign blank node labels before throwing an | ||
error. It is designed to control exponential growth related to the number of | ||
blank nodes. Graphs without blank nodes, and those with simple blank nodes will | ||
not run the algorithms that use this parameter. Those with more complex deeply | ||
connected blank nodes can result in significant time complexity which these | ||
parameters can control. | ||
The `canonize` API has the following parameters to control limits: | ||
- `maxWorkFactor`: Used to calculate a maximum number of deep iterations based | ||
on the number of non-unique blank nodes. | ||
- `0`: Deep inspection disallowed. | ||
- `1`: Limit deep iterations to O(n). (default) | ||
- `2`: Limit deep iterations to O(n^2). | ||
- `3`: Limit deep iterations to O(n^3). Values at this level or higher will | ||
allow processing of complex "poison" graphs but may take significant | ||
amounts of computational resources. | ||
- `Infinity`: No limitation. | ||
- `maxDeepIterations`: The exact number of deep iterations. This parameter is | ||
for specialized use cases and use of `maxWorkFactor` is recommended. Defaults | ||
to `Infinity` and any other value will override `maxWorkFactor`. | ||
### Usage | ||
In practice, callers must balance system load, concurrent processing, expected | ||
input size and complexity, and other factors to determine which complexity | ||
controls to use. This library defaults to a `maxWorkFactor` of `1` and no | ||
timeout signal. These can be adjusted as needed. | ||
Related Modules | ||
@@ -93,12 +206,16 @@ --------------- | ||
This should be a sibling directory of the rdf-canonize directory or in a | ||
`test-suites` dir. To clone shallow copies into the `test-suites` dir you can | ||
use the following: | ||
This should be a sibling directory of the `rdf-canonize` directory or in a | ||
`test-suites` directory. To clone shallow copies into the `test-suites` | ||
directory you can use the following: | ||
npm run fetch-test-suite | ||
Node.js tests can be run with a simple command: | ||
Node.js tests: | ||
npm test | ||
Browser tests via Karma: | ||
npm run test-karma | ||
If you installed the test suites elsewhere, or wish to run other tests, use | ||
@@ -109,9 +226,14 @@ the `TEST_DIR` environment var: | ||
To generate earl reports: | ||
To generate EARL reports: | ||
# generate the earl report for node.js | ||
# generate a JSON-LD EARL report with Node.js | ||
EARL=earl-node.jsonld npm test | ||
Browser testing with karma is done indirectly through [jsonld.js][]. | ||
# generate a Turtle EARL report with Node.js | ||
EARL=js-rdf-canonize-earl.ttl npm test | ||
# generate official Turtle EARL report with Node.js | ||
# turns ASYNC on and SYNC and WEBCRYPTO off | ||
EARL_OFFICIAL=true EARL=js-rdf-canonize-earl.ttl npm test | ||
Benchmark | ||
@@ -137,4 +259,7 @@ --------- | ||
[JSON-LD]: https://json-ld.org/ | ||
[RDF Dataset Canonicalization Algorithm]: https://w3c.github.io/rdf-canon/spec/ | ||
[RDF Dataset Canonicalization]: https://w3c.github.io/rdf-canon/spec/ | ||
[RDFC-1.0]: https://w3c.github.io/rdf-canon/spec/ | ||
[URDNA2015]: https://w3c.github.io/rdf-canon/spec/#urdna2015 | ||
[URGNA2012]: https://w3c.github.io/rdf-canon/spec/#urgna2012 | ||
[jsonld.js]: https://github.com/digitalbazaar/jsonld.js | ||
[rdf-canonize-native]: https://github.com/digitalbazaar/rdf-canonize-native |
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
84473
1877
259
25
1