rdf-canonize - npm Package Compare versions

		@@ -11,5 +11,5 @@ /*
		*
		* @param prefix the prefix to use ('<prefix><counter>').
		* @param existing an existing Map to use.
		* @param counter the counter to use.
		* @param {string} prefix - The prefix to use ('<prefix><counter>').
		* @param {Map} [existing] - An existing Map to use.
		* @param {number} [counter] - The counter to use.
		*/
		@@ -25,3 +25,3 @@ constructor(prefix, existing = new Map(), counter = 0) {
		*
		* @return a copy of this IdentifierIssuer.
		* @returns {object} - A copy of this IdentifierIssuer.
		*/
		@@ -37,5 +37,5 @@ clone() {
		*
		* @param [old] the old identifier to get the new identifier for.
		* @param {string} [old] - The old identifier to get the new identifier for.
		*
		* @return the new identifier.
		* @returns {string} - The new identifier.
		*/
		@@ -65,6 +65,6 @@ getId(old) {
		*
		* @param old the old identifier to check.
		* @param {string} old - The old identifier to check.
		*
		* @return true if the old identifier has been assigned a new identifier,
		* false if not.
		* @returns {boolean} - True if the old identifier has been assigned a new
		* identifier, false if not.
		*/
		@@ -79,3 +79,4 @@ hasId(old) {
		*
		* @return the list of old IDs that has been issued new IDs in order.
		* @returns {Array} - The list of old IDs that has been issued new IDs in
		* order.
		*/
		@@ -82,0 +83,0 @@ getOldIds() {

173

lib/index.js

		@@ -37,6 +37,4 @@ /**

		const URDNA2015 = require('./URDNA2015');
		const URGNA2012 = require('./URGNA2012');
		const URDNA2015Sync = require('./URDNA2015Sync');
		const URGNA2012Sync = require('./URGNA2012Sync');
		const RDFC10 = require('./RDFC10');
		const RDFC10Sync = require('./RDFC10Sync');

		@@ -49,7 +47,13 @@ // optional native support

		// return a dataset from input dataset or legacy dataset
		function _inputToDataset(input/, options/) {
		// back-compat with legacy dataset
		if(!Array.isArray(input)) {
		return exports.NQuads.legacyDatasetToQuads(input);
		// return a dataset from input dataset or n-quads
		function _inputToDataset(input, options) {
		if(options.inputFormat) {
		if(options.inputFormat === 'application/n-quads') {
		if(typeof input !== 'string') {
		throw new Error('N-Quads input must be a string.');
		}
		return exports.NQuads.parse(input);
		}
		throw new Error(
		`Unknown canonicalization input format: "${options.inputFormat}".`);
		}
		@@ -59,2 +63,20 @@ return input;

		// check for valid output format
		function _checkOutputFormat(options) {
		// only N-Quads supported
		if(options.format) {
		if(options.format !== 'application/n-quads') {
		throw new Error(
		`Unknown canonicalization output format: "${options.format}".`);
		}
		}
		}

		// helper to trace URDNA2015 usage
		function _traceURDNA2015() {
		if(!!globalThis.RDF_CANONIZE_TRACE_URDNA2015) {
		console.trace('[rdf-canonize] URDNA2015 is deprecated, use RDFC-1.0');
		}
		}

		// expose helpers
		@@ -67,5 +89,5 @@ exports.NQuads = require('./NQuads');
		*
		* @param api the native API.
		* @param {object} [api] - The native API.
		*
		* @return the currently set native API.
		* @returns {object} - The currently set native API.
		*/
		@@ -83,6 +105,5 @@ exports._rdfCanonizeNative = function(api) {
		* @param {Array\|object\|string} input - The input to canonize given as a
		* dataset or legacy dataset.
		* dataset or format specified by 'inputFormat' option.
		* @param {object} options - The options to use:
		* {string} algorithm - The canonicalization algorithm to use, `URDNA2015` or
		* `URGNA2012`.
		* {string} algorithm - The canonicalization algorithm to use, `RDFC-1.0`.
		* {Function} [createMessageDigest] - A factory function for creating a
		@@ -93,17 +114,43 @@ * `MessageDigest` interface that overrides the built-in message digest
		* the canonize algorithm will result in different output.
		* {string} [messageDigestAlgorithm=sha256] - Message digest algorithm used
		* by the default implementation of `createMessageDigest`. Supported
		* algorithms are: 'sha256', 'sha384', 'sha512', and the 'SHA###' and
		* 'SHA-###' variations.
		* {Map} [canonicalIdMap] - An optional Map to be populated by the canonical
		* identifier issuer with the bnode identifier mapping generated by the
		* canonicalization algorithm.
		* {string} [inputFormat] - The format of the input. Use
		* 'application/n-quads' for a N-Quads string that will be parsed. Omit or
		* falsy for a JSON dataset.
		* {string} [format] - The format of the output. Omit or use
		* 'application/n-quads' for a N-Quads string.
		* {boolean} [useNative=false] - Use native implementation.
		* {number} [maxDeepIterations=Infinity] - The maximum number of times to run
		* {number} [maxWorkFactor=1] - Control of the maximum number of times to run
		* deep comparison algorithms (such as the N-Degree Hash Quads algorithm
		* used in URDNA2015) before bailing out and throwing an error; this is a
		* used in RDFC-1.0) before bailing out and throwing an error; this is a
		* useful setting for preventing wasted CPU cycles or DoS when canonizing
		* meaningless or potentially malicious datasets, a recommended value is
		* `1`.
		* meaningless or potentially malicious datasets. This parameter sets the
		* maximum number of iterations based on the number of non-unique blank
		* nodes. `0` to disable iterations, `1` for a O(n) limit, `2` for a O(n^2)
		* limit, `3` and higher may handle "poison" graphs but may take
		* significant computational resources, `Infinity` for no limitation.
		* Defaults to `1` which can handle many common inputs.
		* {number} [maxDeepIterations=-1] - The maximum number of times to run
		* deep comparison algorithms (such as the N-Degree Hash Quads algorithm
		* used in RDFC-1.0) before bailing out and throwing an error; this is a
		* useful setting for preventing wasted CPU cycles or DoS when canonizing
		* meaningless or potentially malicious datasets. If set to a value other
		* than `-1` it will explicitly set the number of iterations and override
		* `maxWorkFactor`. It is recommended to use `maxWorkFactor`.
		* {AbortSignal} [signal] - An AbortSignal used to abort the operation. The
		* aborted status is only periodically checked for performance reasons.
		* {boolean} [rejectURDNA2015=false] - Reject the "URDNA2015" algorithm name
		* instead of treating it as an alias for "RDFC-1.0".
		*
		* @return a Promise that resolves to the canonicalized RDF Dataset.
		* @returns {Promise<object>} - A Promise that resolves to the canonicalized
		* RDF Dataset.
		*/
		exports.canonize = async function(input, options) {
		exports.canonize = async function(input, options = {}) {
		const dataset = _inputToDataset(input, options);
		_checkOutputFormat(options);

		@@ -123,15 +170,13 @@ if(options.useNative) {

		if(options.algorithm === 'URDNA2015') {
		return new URDNA2015(options).main(dataset);
		}
		if(options.algorithm === 'URGNA2012') {
		if(options.createMessageDigest) {
		throw new Error(
		'"createMessageDigest" cannot be used with "URGNA2012".');
		}
		return new URGNA2012(options).main(dataset);
		}
		if(!('algorithm' in options)) {
		throw new Error('No RDF Dataset Canonicalization algorithm specified.');
		}
		if(options.algorithm === 'RDFC-1.0') {
		return new RDFC10(options).main(dataset);
		}
		// URDNA2015 deprecated, handled as alias for RDFC-1.0 if allowed
		if(options.algorithm === 'URDNA2015' && !options.rejectURDNA2015) {
		_traceURDNA2015();
		return new RDFC10(options).main(dataset);
		}
		throw new Error(
		@@ -147,6 +192,5 @@ 'Invalid RDF Dataset Canonicalization algorithm: ' + options.algorithm);
		* @param {Array\|object\|string} input - The input to canonize given as a
		* dataset or legacy dataset.
		* dataset or format specified by 'inputFormat' option.
		* @param {object} options - The options to use:
		* {string} algorithm - The canonicalization algorithm to use, `URDNA2015` or
		* `URGNA2012`.
		* {string} algorithm - The canonicalization algorithm to use, `RDFC-1.0`.
		* {Function} [createMessageDigest] - A factory function for creating a
		@@ -157,14 +201,47 @@ * `MessageDigest` interface that overrides the built-in message digest
		* the canonize algorithm will result in different output.
		* {string} [messageDigestAlgorithm=sha256] - Message digest algorithm used
		* by the default implementation of `createMessageDigest`. Supported
		* algorithms are: 'sha256', 'sha384', 'sha512', and the 'SHA###' and
		* 'SHA-###' variations.
		* {Map} [canonicalIdMap] - An optional Map to be populated by the canonical
		* identifier issuer with the bnode identifier mapping generated by the
		* canonicalization algorithm.
		* {string} [inputFormat] - The format of the input. Use
		* 'application/n-quads' for a N-Quads string that will be parsed. Omit or
		* falsy for a JSON dataset.
		* {string} [format] - The format of the output. Omit or use
		* 'application/n-quads' for a N-Quads string.
		* {boolean} [useNative=false] - Use native implementation.
		* {number} [maxDeepIterations=Infinity] - The maximum number of times to run
		* {number} [maxWorkFactor=1] - Control of the maximum number of times to run
		* deep comparison algorithms (such as the N-Degree Hash Quads algorithm
		* used in URDNA2015) before bailing out and throwing an error; this is a
		* used in RDFC-1.0) before bailing out and throwing an error; this is a
		* useful setting for preventing wasted CPU cycles or DoS when canonizing
		* meaningless or potentially malicious datasets, a recommended value is
		* `1`.
		* meaningless or potentially malicious datasets. This parameter sets the
		* maximum number of iterations based on the number of non-unique blank
		* nodes. `0` to disable iterations, `1` for a O(n) limit, `2` for a O(n^2)
		* limit, `3` and higher may handle "poison" graphs but may take
		* significant computational resources, `Infinity` for no limitation.
		* Defaults to `1` which can handle many common inputs.
		* {number} [maxDeepIterations=-1] - The maximum number of times to run
		* deep comparison algorithms (such as the N-Degree Hash Quads algorithm
		* used in RDFC-1.0) before bailing out and throwing an error; this is a
		* useful setting for preventing wasted CPU cycles or DoS when canonizing
		* meaningless or potentially malicious datasets. If set to a value other
		* than `-1` it will explicitly set the number of iterations and override
		* `maxWorkFactor`. It is recommended to use `maxWorkFactor`.
		* {number} [timeout=1000] - The maximum number of milliseconds before the
		* operation will timeout. This is only periodically checked for
		* performance reasons. Use 0 to disable. Note: This is a replacement for
		* the async canonize `signal` option common timeout use case. If complex
		* abort logic is required, use the async function and the `signal`
		* parameter.
		* {boolean} [rejectURDNA2015=false] - Reject the "URDNA2015" algorithm name
		* instead of treating it as an alias for "RDFC-1.0".
		*
		* @return the RDF dataset in canonical form.
		* @returns {Promise<object>} - A Promise that resolves to the canonicalized
		* RDF Dataset.
		*/
		exports._canonizeSync = function(input, options) {
		exports._canonizeSync = function(input, options = {}) {
		const dataset = _inputToDataset(input, options);
		_checkOutputFormat(options);

		@@ -181,17 +258,15 @@ if(options.useNative) {
		}
		if(options.algorithm === 'URDNA2015') {
		return new URDNA2015Sync(options).main(dataset);
		}
		if(options.algorithm === 'URGNA2012') {
		if(options.createMessageDigest) {
		throw new Error(
		'"createMessageDigest" cannot be used with "URGNA2012".');
		}
		return new URGNA2012Sync(options).main(dataset);
		}
		if(!('algorithm' in options)) {
		throw new Error('No RDF Dataset Canonicalization algorithm specified.');
		}
		if(options.algorithm === 'RDFC-1.0') {
		return new RDFC10Sync(options).main(dataset);
		}
		// URDNA2015 deprecated, handled as alias for RDFC-1.0 if allowed
		if(options.algorithm === 'URDNA2015' && !options.rejectURDNA2015) {
		_traceURDNA2015();
		return new RDFC10Sync(options).main(dataset);
		}
		throw new Error(
		'Invalid RDF Dataset Canonicalization algorithm: ' + options.algorithm);
		};

lib/MessageDigest.js

		/*
		* Copyright (c) 2016-2021 Digital Bazaar, Inc. All rights reserved.
		* Copyright (c) 2016-2023 Digital Bazaar, Inc. All rights reserved.
		*/
		@@ -8,2 +8,14 @@ 'use strict';

		const algorithmMap = new Map([
		['sha256', 'sha256'],
		['SHA256', 'sha256'],
		['SHA-256', 'sha256'],
		['sha384', 'sha384'],
		['SHA384', 'sha384'],
		['SHA-384', 'sha384'],
		['sha512', 'sha512'],
		['SHA512', 'sha512'],
		['SHA-512', 'sha512'],
		]);

		module.exports = class MessageDigest {
		@@ -13,6 +25,9 @@ /**
		*
		* @param algorithm the algorithm to use.
		* @param {string} algorithm - The algorithm to use.
		*/
		constructor(algorithm) {
		this.md = crypto.createHash(algorithm);
		if(!algorithmMap.has(algorithm)) {
		throw new Error(`Unsupported algorithm "${algorithm}".`);
		}
		this.md = crypto.createHash(algorithmMap.get(algorithm));
		}
		@@ -24,2 +39,4 @@

		// async code awaits this but it is not async to support
		// the sync code
		digest() {
		@@ -26,0 +43,0 @@ return this.md.digest('hex');

291

lib/NQuads.js

		@@ -20,3 +20,3 @@ /*!
		(() => {
		const iri = '(?:<([^:]+:[^>]*)>)';
		// https://www.w3.org/TR/n-quads/#sec-grammar
		// https://www.w3.org/TR/turtle/#grammar-production-BLANK_NODE_LABEL
		@@ -49,9 +49,17 @@ const PN_CHARS_BASE =
		const BLANK_NODE_LABEL =
		'(_:' +
		'_:(' +
		'(?:[' + PN_CHARS_U + '0-9])' +
		'(?:(?:[' + PN_CHARS + '.])*(?:[' + PN_CHARS + ']))?' +
		')';
		// Older simple regex: const IRI = '(?:<([^:]+:[^>]*)>)';
		const UCHAR4 = '\\\\u[0-9A-Fa-f]{4}';
		const UCHAR8 = '\\\\U[0-9A-Fa-f]{8}';
		const IRI = '(?:<((?:' +
		'[^\u0000-\u0020<>"{}\|^`\\\\]' + '\|' +
		UCHAR4 + '\|' +
		UCHAR8 +
		')*)>)';
		const bnode = BLANK_NODE_LABEL;
		const plain = '"([^"\\\\](?:\\\\.[^"\\\\])*)"';
		const datatype = '(?:\\^\\^' + iri + ')';
		const datatype = '(?:\\^\\^' + IRI + ')';
		const language = '(?:@([a-zA-Z]+(?:-[a-zA-Z0-9]+)*))';
		@@ -63,6 +71,6 @@ const literal = '(?:' + plain + '(?:' + datatype + '\|' + language + ')?)';
		// define quad part regexes
		const subject = '(?:' + iri + '\|' + bnode + ')' + ws;
		const property = iri + ws;
		const object = '(?:' + iri + '\|' + bnode + '\|' + literal + ')' + wso;
		const graphName = '(?:\\.\|(?:(?:' + iri + '\|' + bnode + ')' + wso + '\\.))';
		const subject = '(?:' + IRI + '\|' + bnode + ')' + ws;
		const property = IRI + ws;
		const object = '(?:' + IRI + '\|' + bnode + '\|' + literal + ')' + wso;
		const graphName = '(?:\\.\|(?:(?:' + IRI + '\|' + bnode + ')' + wso + '\\.))';

		@@ -82,5 +90,6 @@ // end of line and empty regexes
		*
		* @param input the N-Quads input to parse.
		* @param {string} input - The N-Quads input to parse.
		*
		* @return an RDF dataset (an array of quads per http://rdf.js.org/).
		* @returns {Array} - An RDF dataset (an array of quads per
		* https://rdf.js.org/).
		*/
		@@ -115,15 +124,30 @@ static parse(input) {
		if(match[1] !== undefined) {
		quad.subject = {termType: TYPE_NAMED_NODE, value: match[1]};
		quad.subject = {
		termType: TYPE_NAMED_NODE,
		value: _iriUnescape(match[1])
		};
		} else {
		quad.subject = {termType: TYPE_BLANK_NODE, value: match[2]};
		quad.subject = {
		termType: TYPE_BLANK_NODE,
		value: match[2]
		};
		}

		// get predicate
		quad.predicate = {termType: TYPE_NAMED_NODE, value: match[3]};
		quad.predicate = {
		termType: TYPE_NAMED_NODE,
		value: _iriUnescape(match[3])
		};

		// get object
		if(match[4] !== undefined) {
		quad.object = {termType: TYPE_NAMED_NODE, value: match[4]};
		quad.object = {
		termType: TYPE_NAMED_NODE,
		value: _iriUnescape(match[4])
		};
		} else if(match[5] !== undefined) {
		quad.object = {termType: TYPE_BLANK_NODE, value: match[5]};
		quad.object = {
		termType: TYPE_BLANK_NODE,
		value: match[5]
		};
		} else {
		@@ -138,3 +162,3 @@ quad.object = {
		if(match[7] !== undefined) {
		quad.object.datatype.value = match[7];
		quad.object.datatype.value = _iriUnescape(match[7]);
		} else if(match[8] !== undefined) {
		@@ -146,3 +170,3 @@ quad.object.datatype.value = RDF_LANGSTRING;
		}
		quad.object.value = _unescape(match[6]);
		quad.object.value = _stringLiteralUnescape(match[6]);
		}
		@@ -154,3 +178,3 @@
		termType: TYPE_NAMED_NODE,
		value: match[9]
		value: _iriUnescape(match[9])
		};
		@@ -195,10 +219,7 @@ } else if(match[10] !== undefined) {
		*
		* @param dataset (array of quads) the RDF dataset to convert.
		* @param {Array} dataset - The Array of quads RDF dataset to convert.
		*
		* @return the N-Quads string.
		* @returns {string} - The N-Quads string.
		*/
		static serialize(dataset) {
		if(!Array.isArray(dataset)) {
		dataset = NQuads.legacyDatasetToQuads(dataset);
		}
		const quads = [];
		@@ -214,8 +235,8 @@ for(const quad of dataset) {
		*
		* @param {Object} s - N-Quad subject component.
		* @param {Object} p - N-Quad predicate component.
		* @param {Object} o - N-Quad object component.
		* @param {Object} g - N-Quad graph component.
		* @param {object} s - N-Quad subject component.
		* @param {object} p - N-Quad predicate component.
		* @param {object} o - N-Quad object component.
		* @param {object} g - N-Quad graph component.
		*
		* @return {string} the N-Quad.
		* @returns {string} - The N-Quad.
		*/
		@@ -227,17 +248,21 @@ static serializeQuadComponents(s, p, o, g) {
		if(s.termType === TYPE_NAMED_NODE) {
		nquad += `<${s.value}>`;
		nquad += `<${_iriEscape(s.value)}>`;
		} else {
		nquad += `${s.value}`;
		nquad += `_:${s.value}`;
		}

		// predicate can only be NamedNode
		nquad += ` <${p.value}> `;
		// predicate normally a NamedNode, can be a BlankNode in generalized RDF
		if(p.termType === TYPE_NAMED_NODE) {
		nquad += ` <${_iriEscape(p.value)}> `;
		} else {
		nquad += ` _:${p.value} `;
		}

		// object is NamedNode, BlankNode, or Literal
		if(o.termType === TYPE_NAMED_NODE) {
		nquad += `<${o.value}>`;
		nquad += `<${_iriEscape(o.value)}>`;
		} else if(o.termType === TYPE_BLANK_NODE) {
		nquad += o.value;
		nquad += `_:${o.value}`;
		} else {
		nquad += `"${_escape(o.value)}"`;
		nquad += `"${_stringLiteralEscape(o.value)}"`;
		if(o.datatype.value === RDF_LANGSTRING) {
		@@ -248,3 +273,3 @@ if(o.language) {
		} else if(o.datatype.value !== XSD_STRING) {
		nquad += `^^<${o.datatype.value}>`;
		nquad += `^^<${_iriEscape(o.datatype.value)}>`;
		}
		@@ -256,5 +281,5 @@ }
		if(g.termType === TYPE_NAMED_NODE) {
		nquad += ` <${g.value}>`;
		nquad += ` <${_iriEscape(g.value)}>`;
		} else if(g.termType === TYPE_BLANK_NODE) {
		nquad += ` ${g.value}`;
		nquad += ` _:${g.value}`;
		}
		@@ -269,5 +294,5 @@
		*
		* @param quad the RDF quad convert.
		* @param {object} quad - The RDF quad convert.
		*
		* @return the N-Quad string.
		* @returns {string} - The N-Quad string.
		*/
		@@ -278,66 +303,2 @@ static serializeQuad(quad) {
		}

		/**
		* Converts a legacy-formatted dataset to an array of quads dataset per
		* http://rdf.js.org/.
		*
		* @param dataset the legacy dataset to convert.
		*
		* @return the array of quads dataset.
		*/
		static legacyDatasetToQuads(dataset) {
		const quads = [];

		const termTypeMap = {
		'blank node': TYPE_BLANK_NODE,
		IRI: TYPE_NAMED_NODE,
		literal: TYPE_LITERAL
		};

		for(const graphName in dataset) {
		const triples = dataset[graphName];
		triples.forEach(triple => {
		const quad = {};
		for(const componentName in triple) {
		const oldComponent = triple[componentName];
		const newComponent = {
		termType: termTypeMap[oldComponent.type],
		value: oldComponent.value
		};
		if(newComponent.termType === TYPE_LITERAL) {
		newComponent.datatype = {
		termType: TYPE_NAMED_NODE
		};
		if('datatype' in oldComponent) {
		newComponent.datatype.value = oldComponent.datatype;
		}
		if('language' in oldComponent) {
		if(!('datatype' in oldComponent)) {
		newComponent.datatype.value = RDF_LANGSTRING;
		}
		newComponent.language = oldComponent.language;
		} else if(!('datatype' in oldComponent)) {
		newComponent.datatype.value = XSD_STRING;
		}
		}
		quad[componentName] = newComponent;
		}
		if(graphName === '@default') {
		quad.graph = {
		termType: TYPE_DEFAULT_GRAPH,
		value: ''
		};
		} else {
		quad.graph = {
		termType: graphName.startsWith('_:') ?
		TYPE_BLANK_NODE : TYPE_NAMED_NODE,
		value: graphName
		};
		}
		quads.push(quad);
		});
		}

		return quads;
		}
		};
		@@ -348,6 +309,6 @@
		*
		* @param t1 the first triple.
		* @param t2 the second triple.
		* @param {object} t1 - The first triple.
		* @param {object} t2 - The second triple.
		*
		* @return true if the triples are the same, false if not.
		* @returns {boolean} - True if the triples are the same, false if not.
		*/
		@@ -377,31 +338,60 @@ function _compareTriples(t1, t2) {

		const _escapeRegex = /["\\\n\r]/g;
		const _stringLiteralEscapeRegex = /[\u0000-\u001F\u007F"\\]/g;
		const _stringLiteralEscapeMap = [];
		for(let n = 0; n <= 0x7f; ++n) {
		if(_stringLiteralEscapeRegex.test(String.fromCharCode(n))) {
		// default UCHAR mapping
		_stringLiteralEscapeMap[n] =
		'\\u' + n.toString(16).toUpperCase().padStart(4, '0');
		// reset regex
		_stringLiteralEscapeRegex.lastIndex = 0;
		}
		}
		// special ECHAR mappings
		_stringLiteralEscapeMap['\b'.codePointAt(0)] = '\\b';
		_stringLiteralEscapeMap['\t'.codePointAt(0)] = '\\t';
		_stringLiteralEscapeMap['\n'.codePointAt(0)] = '\\n';
		_stringLiteralEscapeMap['\f'.codePointAt(0)] = '\\f';
		_stringLiteralEscapeMap['\r'.codePointAt(0)] = '\\r';
		_stringLiteralEscapeMap['"' .codePointAt(0)] = '\\"';
		_stringLiteralEscapeMap['\\'.codePointAt(0)] = '\\\\';

		/**
		* Escape string to N-Quads literal
		* Escape string to N-Quads literal.
		*
		* @param {string} s - String to escape.
		*
		* @returns {string} - Escaped N-Quads literal.
		*/
		function _escape(s) {
		return s.replace(_escapeRegex, function(match) {
		switch(match) {
		case '"': return '\\"';
		case '\\': return '\\\\';
		case '\n': return '\\n';
		case '\r': return '\\r';
		}
		function _stringLiteralEscape(s) {
		if(!_stringLiteralEscapeRegex.test(s)) {
		return s;
		}
		return s.replace(_stringLiteralEscapeRegex, function(match) {
		return _stringLiteralEscapeMap[match.codePointAt(0)];
		});
		}

		const _unescapeRegex =
		/(?:\\([tbnrf"'\\]))\|(?:\\u([0-9A-Fa-f]{4}))\|(?:\\U([0-9A-Fa-f]{8}))/g;
		const _stringLiteralUnescapeRegex =
		/(?:\\([btnfr"'\\]))\|(?:\\u([0-9A-Fa-f]{4}))\|(?:\\U([0-9A-Fa-f]{8}))/g;

		/**
		* Unescape N-Quads literal to string
		* Unescape N-Quads literal to string.
		*
		* @param {string} s - String to unescape.
		*
		* @returns {string} - Unescaped N-Quads literal.
		*/
		function _unescape(s) {
		return s.replace(_unescapeRegex, function(match, code, u, U) {
		function _stringLiteralUnescape(s) {
		if(!_stringLiteralUnescapeRegex.test(s)) {
		return s;
		}
		return s.replace(_stringLiteralUnescapeRegex, function(match, code, u, U) {
		if(code) {
		switch(code) {
		case 'b': return '\b';
		case 't': return '\t';
		case 'b': return '\b';
		case 'n': return '\n';
		case 'f': return '\f';
		case 'r': return '\r';
		case 'f': return '\f';
		case '"': return '"';
		@@ -416,6 +406,57 @@ case '\'': return '\'';
		if(U) {
		// FIXME: support larger values
		throw new Error('Unsupported U escape');
		return String.fromCodePoint(parseInt(U, 16));
		}
		});
		}

		const _iriEscapeRegex = /[\u0000-\u0020<>"{}\|^`\\]/g;
		const _iriEscapeRegexMap = [];
		for(let n = 0; n <= 0x7f; ++n) {
		if(_iriEscapeRegex.test(String.fromCharCode(n))) {
		// UCHAR mapping
		_iriEscapeRegexMap[n] =
		'\\u' + n.toString(16).toUpperCase().padStart(4, '0');
		// reset regex
		_iriEscapeRegex.lastIndex = 0;
		}
		}

		/**
		* Escape IRI to N-Quads IRI.
		*
		* @param {string} s - IRI to escape.
		*
		* @returns {string} - Escaped N-Quads IRI.
		*/
		function _iriEscape(s) {
		if(!_iriEscapeRegex.test(s)) {
		return s;
		}
		return s.replace(_iriEscapeRegex, function(match) {
		return _iriEscapeRegexMap[match.codePointAt(0)];
		});
		}

		const _iriUnescapeRegex =
		/(?:\\u([0-9A-Fa-f]{4}))\|(?:\\U([0-9A-Fa-f]{8}))/g;

		/**
		* Unescape N-Quads IRI to IRI.
		*
		* @param {string} s - IRI to unescape.
		*
		* @returns {string} - Unescaped N-Quads IRI.
		*/
		function _iriUnescape(s) {
		if(!_iriUnescapeRegex.test(s)) {
		return s;
		}
		return s.replace(_iriUnescapeRegex, function(match, u, U) {
		if(u) {
		return String.fromCharCode(parseInt(u, 16));
		}
		if(U) {
		return String.fromCodePoint(parseInt(U, 16));
		}
		});
		}

lib/Permuter.js

		@@ -11,3 +11,3 @@ /*!
		*
		* @param list the array of elements to iterate over.
		* @param {Array} list - The array of elements to iterate over.
		*/
		@@ -29,3 +29,3 @@ constructor(list) {
		*
		* @return true if there is another permutation, false if not.
		* @returns {boolean} - True if there is another permutation, false if not.
		*/
		@@ -40,3 +40,3 @@ hasNext() {
		*
		* @return the next permutation.
		* @returns {any} - The next permutation.
		*/
		@@ -43,0 +43,0 @@ next() {

package.json

		{
		"name": "rdf-canonize",
		"version": "3.4.0",
		"description": "An implementation of the RDF Dataset Normalization Algorithm in JavaScript",
		"version": "4.0.0",
		"description": "An implementation of the RDF Dataset Canonicalization algorithm in JavaScript",
		"homepage": "https://github.com/digitalbazaar/rdf-canonize",
		@@ -33,21 +33,39 @@ "author": {
		"benchmark": "^2.1.4",
		"chai": "^4.2.0",
		"delay": "^5.0.0",
		"eslint": "^7.23.0",
		"eslint-config-digitalbazaar": "^2.6.1",
		"mocha": "^8.3.2",
		"browserify": "^17.0.0",
		"chai": "^4.3.10",
		"envify": "^4.1.0",
		"eslint": "^8.53.0",
		"eslint-config-digitalbazaar": "^5.0.1",
		"eslint-plugin-jsdoc": "^46.9.0",
		"esmify": "^2.1.1",
		"fs-extra": "^11.1.1",
		"join-path-js": "^0.0.0",
		"karma": "^6.4.2",
		"karma-babel-preprocessor": "^8.0.2",
		"karma-browserify": "^8.1.0",
		"karma-chrome-launcher": "^3.2.0",
		"karma-mocha": "^2.0.1",
		"karma-mocha-reporter": "^2.2.5",
		"karma-server-side": "github:fargies/karma-server-side#9397553473fcbc2aaabb7dc9f59e96f9ff26791c",
		"karma-sourcemap-loader": "^0.4.0",
		"karma-webpack": "^5.0.0",
		"klona": "^2.0.6",
		"mocha": "^10.2.0",
		"mocha-lcov-reporter": "^1.3.0",
		"nsolid": "0.0.0",
		"nyc": "^15.1.0"
		"nyc": "^15.1.0",
		"webpack": "^5.89.0"
		},
		"engines": {
		"node": ">=12"
		"node": ">=18"
		},
		"keywords": [
		"JSON",
		"JSON-LD",
		"Linked Data",
		"JSON-LD",
		"RDF",
		"RDF Dataset Canonicalization",
		"Semantic Web",
		"jsonld"
		"jsonld",
		"rdf-canon"
		],
		@@ -57,13 +75,27 @@ "scripts": {
		"test": "npm run test-node",
		"test-node": "NODE_ENV=test mocha -R spec --check-leaks",
		"test-node": "NODE_ENV=test mocha --delay -A -R spec --check-leaks test/test-node.js",
		"test-karma": "NODE_ENV=test karma start",
		"benchmark": "node benchmark/benchmark.js",
		"coverage": "NODE_ENV=test nyc --reporter=lcov --reporter=text-summary npm test",
		"coverage-ci": "NODE_ENV=test nyc --reporter=lcovonly npm run test",
		"coverage": "NODE_ENV=test nyc npm test",
		"coverage-ci": "NODE_ENV=test nyc --reporter=lcovonly --reporter=text-summary --reporter=text npm run test",
		"coverage-report": "nyc report",
		"lint": "eslint '.js' 'lib/.js' 'test/.js' 'benchmark/.js'"
		"lint": "eslint ."
		},
		"browser": {
		"./lib/MessageDigest.js": "./lib/MessageDigest-browser.js",
		"./lib/MessageDigest.js": "./lib/MessageDigest-webcrypto.js",
		"./lib/platform.js": "./lib/platform-browser.js",
		"rdf-canonize-native": false
		},
		"react-native": {
		"./lib/MessageDigest.js": "./lib/MessageDigest-webcrypto.js",
		"./lib/platform.js": "./lib/platform-browser.js",
		"rdf-canonize-native": false
		},
		"nyc": {
		"reporter": [
		"lcov",
		"text-summary",
		"text"
		]
		}
		}

171

README.md

		# rdf-canonize

		[![Build status](https://img.shields.io/github/workflow/status/digitalbazaar/rdf-canonize/Node.js%20CI)](https://github.com/digitalbazaar/rdf-canonize/actions?query=workflow%3A%22Node.js+CI%22)
		[![Build status](https://img.shields.io/github/actions/workflow/status/digitalbazaar/rdf-canonize/main.yml)](https://github.com/digitalbazaar/rdf-canonize/actions/workflows/main.yml)
		[![Coverage status](https://img.shields.io/codecov/c/github/digitalbazaar/rdf-canonize)](https://codecov.io/gh/digitalbazaar/rdf-canonize)
		[![Dependency Status](https://img.shields.io/david/digitalbazaar/rdf-canonize.svg)](https://david-dm.org/digitalbazaar/rdf-canonize)

		An implementation of the [RDF Dataset Canonicalization Algorithm][] in JavaScript.
		An implementation of the [RDF Dataset Canonicalization][] specification in
		JavaScript.

		@@ -12,3 +12,4 @@ Introduction

		...
		See the [RDF Dataset Canonicalization][] specification for details on the
		specification and algorithm this library implements.

		@@ -32,3 +33,3 @@ Installation
		useful if your application requires doing many canonizing operations
		asyncronously in parallel or in the background. It is highly recommended
		asynchronously in parallel or in the background. It is highly recommended
		that you understand your requirements and benchmark using JavaScript vs native
		@@ -55,3 +56,3 @@ bindings. The native bindings add overhead and the JavaScript implementation

		Install in your project with npm and use your favorite browser bundler tool.
		Install in your project with `npm` and use your favorite browser bundler tool.

		@@ -62,17 +63,129 @@ Examples
		```js
		const dataset = {
		// canonize a dataset with the default algorithm

		const dataset = [
		// ...
		};
		];
		const canonical = await canonize.canonize(dataset, {algorithm: 'RDFC-1.0'});

		// canonize a data set with a particular algorithm with async/await
		const canonical = await canonize.canonize(dataset, {algorithm: 'URDNA2015'});
		// parse and canonize N-Quads with the default algorithm

		// canonize a data set with a particular algorithm and force use of the
		// native implementation
		const canonical = await canonize.canonize(dataset, {
		algorithm: 'URDNA2015',
		useNative: true
		const nquads = "...";
		const canonical = await canonize.canonize(nquads, {
		algorithm: 'RDFC-1.0',
		inputFormat: 'application/n-quads'
		});
		```

		### Using with React Native

		Using this library with React Native requires a polyfill such as
		[`data-integrity-rn`](https://github.com/digitalcredentials/data-integrity-rn)
		to be imported before this library:

		```js
		import '@digitalcredentials/data-integrity-rn'
		import * as canonize from 'rdf-canonize'
		```

		The polyfill needs to provide the following globals:

		* `crypto.subtle`
		* `TextEncoder`

		Algorithm Support
		-----------------

		* "[RDFC-1.0][]": Supported.
		* Primary algorithm in the [RDF Dataset Canonicalization][] specification.
		* "[URDNA2015][]": Deprecated and supported as an alias for "RDFC-1.0".
		* Former algorithm name that evolved into "RDFC-1.0".
		* NOTE: There are minor differences in the [canonical N-Quads
		form](https://w3c.github.io/rdf-canon/spec/#canonical-quads) that could
		cause canonical output differences in some cases. See the 4.0.0 changelog
		or code for details. If strict "URDNA2015" support is required, use a 3.x
		version of this library.
		* See the migration section below if you have code that uses the "URDNA2015"
		algorithm name.
		* "[URGNA2012][]": No longer supported.
		* Older algorithm with significant differences from newer algorithms.
		* Use older versions of this library if support is needed.

		URDNA2015 Migration
		-------------------

		* The deprecated "URDNA2015" algorithm name is currently supported as an alias
		for "RDFC-1.0".
		* There is a minor difference that could cause compatibility issues. It is
		considered an edge case that will not be an issue in practice. See above for
		details.
		* Two tools are currently provided to help transition to "RDFC-1.0":
		* If the API option `rejectURDNA2015` is truthy, it will cause an error to be
		thrown if "URDNA2015" is used.
		* If the global `RDF_CANONIZE_TRACE_URDNA2015` is truthy, it will cause
		`console.trace()` to be called when "URDNA2015" is used. This is designed
		for development use only to find where "URDNA2015" is being used. It
		could be very verbose.

		Complexity Control
		------------------

		Inputs may vary in complexity and some inputs may use more computational
		resources than desired. There also exists a class of inputs that are sometimes
		referred to as "poison" graphs. These are structured or designed specifically
		to be difficult to process but often do not provide any useful purpose.

		### Signals

		The `canonize` API accepts an
		[`AbortSignal`](https://developer.mozilla.org/en-US/docs/Web/API/AbortSignal)
		as the `signal` parameter that can be used to control processing of
		computationally difficult inputs. `signal` is not set by default. It can be
		used in a number of ways:

		- Abort processing manually with
		[`AbortController.abort()`](https://developer.mozilla.org/en-US/docs/Web/API/AbortController/abort)
		- Abort processing after a timeout with
		[`AbortSignal.timeout()`](https://developer.mozilla.org/en-US/docs/Web/API/AbortSignal/timeout_static)
		- Abort after any other desired condition with a custom `AbortSignal`. This
		could track memory pressure or system load.
		- A combination of conditions with an aggregated `AbortSignal` such as with
		[`AbortSignal.any()`](https://github.com/shaseley/abort-signal-any/) or
		[signals](https://github.com/toebeann/signals).

		For performance reasons this signal is only checked periodically during
		processing and is not immediate.

		### Limits

		The `canonize` API has parameters to limit how many times the blank node deep
		comparison algorithm can be run to assign blank node labels before throwing an
		error. It is designed to control exponential growth related to the number of
		blank nodes. Graphs without blank nodes, and those with simple blank nodes will
		not run the algorithms that use this parameter. Those with more complex deeply
		connected blank nodes can result in significant time complexity which these
		parameters can control.

		The `canonize` API has the following parameters to control limits:

		- `maxWorkFactor`: Used to calculate a maximum number of deep iterations based
		on the number of non-unique blank nodes.
		- `0`: Deep inspection disallowed.
		- `1`: Limit deep iterations to O(n). (default)
		- `2`: Limit deep iterations to O(n^2).
		- `3`: Limit deep iterations to O(n^3). Values at this level or higher will
		allow processing of complex "poison" graphs but may take significant
		amounts of computational resources.
		- `Infinity`: No limitation.
		- `maxDeepIterations`: The exact number of deep iterations. This parameter is
		for specialized use cases and use of `maxWorkFactor` is recommended. Defaults
		to `Infinity` and any other value will override `maxWorkFactor`.

		### Usage

		In practice, callers must balance system load, concurrent processing, expected
		input size and complexity, and other factors to determine which complexity
		controls to use. This library defaults to a `maxWorkFactor` of `1` and no
		timeout signal. These can be adjusted as needed.

		Related Modules
		@@ -93,12 +206,16 @@ ---------------

		This should be a sibling directory of the rdf-canonize directory or in a
		`test-suites` dir. To clone shallow copies into the `test-suites` dir you can
		use the following:
		This should be a sibling directory of the `rdf-canonize` directory or in a
		`test-suites` directory. To clone shallow copies into the `test-suites`
		directory you can use the following:

		npm run fetch-test-suite

		Node.js tests can be run with a simple command:
		Node.js tests:

		npm test

		Browser tests via Karma:

		npm run test-karma

		If you installed the test suites elsewhere, or wish to run other tests, use
		@@ -109,9 +226,14 @@ the `TEST_DIR` environment var:

		To generate earl reports:
		To generate EARL reports:

		# generate the earl report for node.js
		# generate a JSON-LD EARL report with Node.js
		EARL=earl-node.jsonld npm test

		Browser testing with karma is done indirectly through [jsonld.js][].
		# generate a Turtle EARL report with Node.js
		EARL=js-rdf-canonize-earl.ttl npm test

		# generate official Turtle EARL report with Node.js
		# turns ASYNC on and SYNC and WEBCRYPTO off
		EARL_OFFICIAL=true EARL=js-rdf-canonize-earl.ttl npm test

		Benchmark
		@@ -137,4 +259,7 @@ ---------
		[JSON-LD]: https://json-ld.org/
		[RDF Dataset Canonicalization Algorithm]: https://w3c.github.io/rdf-canon/spec/
		[RDF Dataset Canonicalization]: https://w3c.github.io/rdf-canon/spec/
		[RDFC-1.0]: https://w3c.github.io/rdf-canon/spec/
		[URDNA2015]: https://w3c.github.io/rdf-canon/spec/#urdna2015
		[URGNA2012]: https://w3c.github.io/rdf-canon/spec/#urgna2012
		[jsonld.js]: https://github.com/digitalbazaar/jsonld.js
		[rdf-canonize-native]: https://github.com/digitalbazaar/rdf-canonize-native

lib/MessageDigest-browser.js

lib/URDNA2015.js

lib/URDNA2015Sync.js

lib/URGNA2012.js

lib/URGNA2012Sync.js

New alerts

Improved metrics

Worsened metrics