Socket
Socket
Sign inDemoInstall

rdf-canonize

Package Overview
Dependencies
1
Maintainers
4
Versions
26
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 1.2.0 to 2.0.0

dist/node6/lib/Permuter.js

17

CHANGELOG.md
# rdf-canonize ChangeLog
## 2.0.0 - 2020-01-20
### Removed
- **BREAKING**: Removed public API for `canonizeSync`. It is still available
for testing purposes but does not run in the browser.
- **BREAKING**: Removed dependency on `forge` which means that this library
will only run in browsers that have support for the WebCrypto API (or
an external polyfill for it).
- **BREAKING**: Do not expose `existing` on `IdentifierIssuer`. The old
IDs can be retrieved in order via `getOldIds`.
### Changed
- General optimizations and modernization of the library.
### Added
- Add `getOldIds` function to `IdentifierIssuer`.
## 1.2.0 - 2020-09-30

@@ -4,0 +21,0 @@

45

dist/node6/lib/IdentifierIssuer.js
/*
* Copyright (c) 2016-2017 Digital Bazaar, Inc. All rights reserved.
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved.
*/
'use strict';
const util = require('./util');
module.exports = class IdentifierIssuer {

@@ -14,7 +12,9 @@ /**

* @param prefix the prefix to use ('<prefix><counter>').
* @param existing an existing Map to use.
* @param counter the counter to use.
*/
constructor(prefix) {
constructor(prefix, existing = new Map(), counter = 0) {
this.prefix = prefix;
this.counter = 0;
this.existing = {};
this._existing = existing;
this.counter = counter;
}

@@ -29,6 +29,8 @@ /**

clone() {
const copy = new IdentifierIssuer(this.prefix);
copy.counter = this.counter;
copy.existing = util.clone(this.existing);
return copy;
const {
prefix,
_existing,
counter
} = this;
return new IdentifierIssuer(prefix, new Map(_existing), counter);
}

@@ -47,4 +49,6 @@ /**

// return existing old identifier
if (old && old in this.existing) {
return this.existing[old];
const existing = old && this._existing.get(old);
if (existing) {
return existing;
} // get next identifier

@@ -54,6 +58,6 @@

const identifier = this.prefix + this.counter;
this.counter += 1; // save mapping
this.counter++; // save mapping
if (old) {
this.existing[old] = identifier;
this._existing.set(old, identifier);
}

@@ -75,5 +79,16 @@

hasId(old) {
return old in this.existing;
return this._existing.has(old);
}
/**
* Returns all of the IDs that have been issued new IDs in the order in
* which they were issued new IDs.
*
* @return the list of old IDs that has been issued new IDs in order.
*/
getOldIds() {
return [...this._existing.keys()];
}
};

@@ -6,3 +6,3 @@ /**

* BSD 3-Clause License
* Copyright (c) 2016-2017 Digital Bazaar, Inc.
* Copyright (c) 2016-2020 Digital Bazaar, Inc.
* All rights reserved.

@@ -42,4 +42,2 @@ *

const util = require('./util');
const URDNA2015 = require('./URDNA2015');

@@ -88,3 +86,2 @@

* [useNative] use native implementation (default: false).
* @param [callback(err, canonical)] called once the operation completes.
*

@@ -95,44 +92,31 @@ * @return a Promise that resolves to the canonicalized RDF Dataset.

api.canonize = util.callbackify( /*#__PURE__*/function () {
api.canonize = /*#__PURE__*/function () {
var _ref = _asyncToGenerator(function* (dataset, options) {
let callback;
const promise = new Promise((resolve, reject) => {
callback = (err, canonical) => {
if (err) {
return reject(err);
}
/*if(options.format === 'application/n-quads') {
canonical = canonical.join('');
}
canonical = _parseNQuads(canonical.join(''));*/
resolve(canonical);
};
}); // back-compat with legacy dataset
// back-compat with legacy dataset
if (!Array.isArray(dataset)) {
dataset = api.NQuads.legacyDatasetToQuads(dataset);
} // TODO: convert algorithms to Promise-based async
}
if (options.useNative) {
if (rdfCanonizeNative) {
rdfCanonizeNative.canonize(dataset, options, callback);
} else {
if (!rdfCanonizeNative) {
throw new Error('rdf-canonize-native not available');
}
} else {
if (options.algorithm === 'URDNA2015') {
new URDNA2015(options).main(dataset, callback);
} else if (options.algorithm === 'URGNA2012') {
new URGNA2012(options).main(dataset, callback);
} else if (!('algorithm' in options)) {
throw new Error('No RDF Dataset Canonicalization algorithm specified.');
} else {
throw new Error('Invalid RDF Dataset Canonicalization algorithm: ' + options.algorithm);
}
} // TODO: convert native algorithm to Promise-based async
return new Promise((resolve, reject) => rdfCanonizeNative.canonize(dataset, options, (err, canonical) => err ? reject(err) : resolve(canonical)));
}
return promise;
if (options.algorithm === 'URDNA2015') {
return new URDNA2015(options).main(dataset);
}
if (options.algorithm === 'URGNA2012') {
return new URGNA2012(options).main(dataset);
}
if (!('algorithm' in options)) {
throw new Error('No RDF Dataset Canonicalization algorithm specified.');
}
throw new Error('Invalid RDF Dataset Canonicalization algorithm: ' + options.algorithm);
});

@@ -143,5 +127,7 @@

};
}());
}();
/**
* Synchronously canonizes an RDF dataset.
* This method is no longer available in the public API, it is for testing
* only. It synchronously canonizes an RDF dataset and does not work in the
* browser.
*

@@ -157,3 +143,4 @@ * @param dataset the dataset to canonize.

api.canonizeSync = function (dataset, options) {
api._canonizeSync = function (dataset, options) {
// back-compat with legacy dataset

@@ -174,3 +161,5 @@ if (!Array.isArray(dataset)) {

return new URDNA2015Sync(options).main(dataset);
} else if (options.algorithm === 'URGNA2012') {
}
if (options.algorithm === 'URGNA2012') {
return new URGNA2012Sync(options).main(dataset);

@@ -177,0 +166,0 @@ }

/*
* Copyright (c) 2016-2017 Digital Bazaar, Inc. All rights reserved.
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved.
*/
'use strict';
const forge = require('node-forge/lib/forge');
function asyncGeneratorStep(gen, resolve, reject, _next, _throw, key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { Promise.resolve(value).then(_next, _throw); } }
require('node-forge/lib/md');
function _asyncToGenerator(fn) { return function () { var self = this, args = arguments; return new Promise(function (resolve, reject) { var gen = fn.apply(self, args); function _next(value) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "next", value); } function _throw(err) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "throw", err); } _next(undefined); }); }; }
require('node-forge/lib/sha1');
const crypto = self.crypto || self.msCrypto; // TODO: synchronous version no longer supported in browser
require('node-forge/lib/sha256');
module.exports = class MessageDigest {

@@ -21,13 +19,44 @@ /**

constructor(algorithm) {
this.md = forge.md[algorithm].create();
// check if crypto.subtle is available
// check is here rather than top-level to only fail if class is used
if (!(crypto && crypto.subtle)) {
throw new Error('crypto.subtle not found.');
}
if (algorithm === 'sha256') {
this.algorithm = {
name: 'SHA-256'
};
} else if (algorithm === 'sha1') {
this.algorithm = {
name: 'SHA-1'
};
} else {
throw new Error(`Unsupport algorithm "${algorithm}".`);
}
this._content = '';
}
update(msg) {
this.md.update(msg, 'utf8');
this._content += msg;
}
digest() {
return this.md.digest().toHex();
var _this = this;
return _asyncToGenerator(function* () {
const data = new TextEncoder().encode(_this._content);
const buffer = new Uint8Array(yield crypto.subtle.digest(_this.algorithm, data)); // return digest in hex
let hex = '';
for (let i = 0; i < buffer.length; ++i) {
hex += buffer[i].toString(16).padStart(2, '0');
}
return hex;
})();
}
};
/*
* Copyright (c) 2016-2017 Digital Bazaar, Inc. All rights reserved.
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved.
*/

@@ -4,0 +4,0 @@ 'use strict';

/*
* Copyright (c) 2016-2017 Digital Bazaar, Inc. All rights reserved.
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved.
*/

@@ -9,3 +9,7 @@ 'use strict'; // eslint-disable-next-line no-unused-vars

const RDF_LANGSTRING = RDF + 'langString';
const XSD_STRING = 'http://www.w3.org/2001/XMLSchema#string'; // build regexes
const XSD_STRING = 'http://www.w3.org/2001/XMLSchema#string';
const TYPE_NAMED_NODE = 'NamedNode';
const TYPE_BLANK_NODE = 'BlankNode';
const TYPE_LITERAL = 'Literal';
const TYPE_DEFAULT_GRAPH = 'DefaultGraph'; // build regexes

@@ -73,7 +77,12 @@ const REGEX = {};

const quad = {}; // get subject
const quad = {
subject: null,
predicate: null,
object: null,
graph: null
}; // get subject
if (match[1] !== undefined) {
quad.subject = {
termType: 'NamedNode',
termType: TYPE_NAMED_NODE,
value: match[1]

@@ -83,3 +92,3 @@ };

quad.subject = {
termType: 'BlankNode',
termType: TYPE_BLANK_NODE,
value: match[2]

@@ -91,3 +100,3 @@ };

quad.predicate = {
termType: 'NamedNode',
termType: TYPE_NAMED_NODE,
value: match[3]

@@ -98,3 +107,3 @@ }; // get object

quad.object = {
termType: 'NamedNode',
termType: TYPE_NAMED_NODE,
value: match[4]

@@ -104,3 +113,3 @@ };

quad.object = {
termType: 'BlankNode',
termType: TYPE_BLANK_NODE,
value: match[5]

@@ -110,6 +119,6 @@ };

quad.object = {
termType: 'Literal',
termType: TYPE_LITERAL,
value: undefined,
datatype: {
termType: 'NamedNode'
termType: TYPE_NAMED_NODE
}

@@ -133,3 +142,3 @@ };

quad.graph = {
termType: 'NamedNode',
termType: TYPE_NAMED_NODE,
value: match[9]

@@ -139,3 +148,3 @@ };

quad.graph = {
termType: 'BlankNode',
termType: TYPE_BLANK_NODE,
value: match[10]

@@ -145,3 +154,3 @@ };

quad.graph = {
termType: 'DefaultGraph',
termType: TYPE_DEFAULT_GRAPH,
value: ''

@@ -211,27 +220,26 @@ };

const g = quad.graph;
let nquad = ''; // subject and predicate can only be NamedNode or BlankNode
let nquad = ''; // subject can only be NamedNode or BlankNode
[s, p].forEach(term => {
if (term.termType === 'NamedNode') {
nquad += '<' + term.value + '>';
} else {
nquad += term.value;
}
if (s.termType === TYPE_NAMED_NODE) {
nquad += `<${s.value}>`;
} else {
nquad += `${s.value}`;
} // predicate can only be NamedNode
nquad += ' ';
}); // object is NamedNode, BlankNode, or Literal
if (o.termType === 'NamedNode') {
nquad += '<' + o.value + '>';
} else if (o.termType === 'BlankNode') {
nquad += ` <${p.value}> `; // object is NamedNode, BlankNode, or Literal
if (o.termType === TYPE_NAMED_NODE) {
nquad += `<${o.value}>`;
} else if (o.termType === TYPE_BLANK_NODE) {
nquad += o.value;
} else {
nquad += '"' + _escape(o.value) + '"';
nquad += `"${_escape(o.value)}"`;
if (o.datatype.value === RDF_LANGSTRING) {
if (o.language) {
nquad += '@' + o.language;
nquad += `@${o.language}`;
}
} else if (o.datatype.value !== XSD_STRING) {
nquad += '^^<' + o.datatype.value + '>';
nquad += `^^<${o.datatype.value}>`;
}

@@ -242,6 +250,6 @@ } // graph can only be NamedNode or BlankNode (or DefaultGraph, but that

if (g.termType === 'NamedNode') {
nquad += ' <' + g.value + '>';
} else if (g.termType === 'BlankNode') {
nquad += ' ' + g.value;
if (g.termType === TYPE_NAMED_NODE) {
nquad += ` <${g.value}>`;
} else if (g.termType === TYPE_BLANK_NODE) {
nquad += ` ${g.value}`;
}

@@ -265,5 +273,5 @@

const termTypeMap = {
'blank node': 'BlankNode',
IRI: 'NamedNode',
literal: 'Literal'
'blank node': TYPE_BLANK_NODE,
IRI: TYPE_NAMED_NODE,
literal: TYPE_LITERAL
};

@@ -283,5 +291,5 @@

if (newComponent.termType === 'Literal') {
if (newComponent.termType === TYPE_LITERAL) {
newComponent.datatype = {
termType: 'NamedNode'
termType: TYPE_NAMED_NODE
};

@@ -309,3 +317,3 @@

quad.graph = {
termType: 'DefaultGraph',
termType: TYPE_DEFAULT_GRAPH,
value: ''

@@ -315,3 +323,3 @@ };

quad.graph = {
termType: graphName.startsWith('_:') ? 'BlankNode' : 'NamedNode',
termType: graphName.startsWith('_:') ? TYPE_BLANK_NODE : TYPE_NAMED_NODE,
value: graphName

@@ -339,13 +347,18 @@ };

function _compareTriples(t1, t2) {
for (const k in t1) {
if (t1[k].termType !== t2[k].termType || t1[k].value !== t2[k].value) {
return false;
}
// compare subject and object types first as it is the quickest check
if (!(t1.subject.termType === t2.subject.termType && t1.object.termType === t2.object.termType)) {
return false;
} // compare values
if (!(t1.subject.value === t2.subject.value && t1.predicate.value === t2.predicate.value && t1.object.value === t2.object.value)) {
return false;
}
if (t1.object.termType !== 'Literal') {
if (t1.object.termType !== TYPE_LITERAL) {
// no `datatype` or `language` to check
return true;
}
return t1.object.datatype.termType === t2.object.datatype.termType && t1.object.datatype.value === t2.object.datatype.value && t1.object.language === t2.object.language;
return t1.object.datatype.termType === t2.object.datatype.termType && t1.object.language === t2.object.language && t1.object.datatype.value === t2.object.datatype.value;
}

@@ -352,0 +365,0 @@

/*
* Copyright (c) 2016-2017 Digital Bazaar, Inc. All rights reserved.
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved.
*/
'use strict';
const AsyncAlgorithm = require('./AsyncAlgorithm');
function ownKeys(object, enumerableOnly) { var keys = Object.keys(object); if (Object.getOwnPropertySymbols) { var symbols = Object.getOwnPropertySymbols(object); if (enumerableOnly) symbols = symbols.filter(function (sym) { return Object.getOwnPropertyDescriptor(object, sym).enumerable; }); keys.push.apply(keys, symbols); } return keys; }
function _objectSpread(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { ownKeys(Object(source), true).forEach(function (key) { _defineProperty(target, key, source[key]); }); } else if (Object.getOwnPropertyDescriptors) { Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)); } else { ownKeys(Object(source)).forEach(function (key) { Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key)); }); } } return target; }
function _defineProperty(obj, key, value) { if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; }
function asyncGeneratorStep(gen, resolve, reject, _next, _throw, key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { Promise.resolve(value).then(_next, _throw); } }
function _asyncToGenerator(fn) { return function () { var self = this, args = arguments; return new Promise(function (resolve, reject) { var gen = fn.apply(self, args); function _next(value) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "next", value); } function _throw(err) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "throw", err); } _next(undefined); }); }; }
const IdentifierIssuer = require('./IdentifierIssuer');

@@ -12,129 +20,107 @@

const Permutator = require('./Permutator');
const Permuter = require('./Permuter');
const NQuads = require('./NQuads');
const util = require('./util');
const POSITIONS = {
subject: 's',
object: 'o',
graph: 'g'
};
module.exports = class URDNA2015 extends AsyncAlgorithm {
constructor(options) {
options = options || {};
super(options);
module.exports = class URDNA2015 {
constructor() {
this.name = 'URDNA2015';
this.options = Object.assign({}, options);
this.blankNodeInfo = {};
this.hashToBlankNodes = {};
this.blankNodeInfo = new Map();
this.canonicalIssuer = new IdentifierIssuer('_:c14n');
this.hashAlgorithm = 'sha256';
this.quads;
this.quads = null;
} // 4.4) Normalization Algorithm
main(dataset, callback) {
const self = this;
self.schedule.start = Date.now();
let result;
self.quads = dataset; // 1) Create the normalization state.
// Note: Optimize by generating non-normalized blank node map concurrently.
main(dataset) {
var _this = this;
const nonNormalized = {};
self.waterfall([callback => {
return _asyncToGenerator(function* () {
_this.quads = dataset; // 1) Create the normalization state.
// 2) For every quad in input dataset:
self.forEach(dataset, (quad, idx, callback) => {
for (const quad of dataset) {
// 2.1) For each blank node that occurs in the quad, add a reference
// to the quad using the blank node identifier in the blank node to
// quads map, creating a new entry if necessary.
self.forEachComponent(quad, component => {
if (component.termType !== 'BlankNode') {
return;
}
_this._addBlankNodeQuadInfo({
quad,
component: quad.subject
});
const id = component.value;
_this._addBlankNodeQuadInfo({
quad,
component: quad.object
});
if (id in self.blankNodeInfo) {
self.blankNodeInfo[id].quads.push(quad);
} else {
nonNormalized[id] = true;
self.blankNodeInfo[id] = {
quads: [quad]
};
}
_this._addBlankNodeQuadInfo({
quad,
component: quad.graph
});
callback();
}, callback);
}, callback => {
// 3) Create a list of non-normalized blank node identifiers
} // 3) Create a list of non-normalized blank node identifiers
// non-normalized identifiers and populate it using the keys from the
// blank node to quads map.
// Note: We use a map here and it was generated during step 2.
// 4) Initialize simple, a boolean flag, to true.
let simple = true; // 5) While simple is true, issue canonical identifiers for blank nodes:
// 4) `simple` flag is skipped -- loop is optimized away. This optimization
// is permitted because there was a typo in the hash first degree quads
// algorithm in the URDNA2015 spec that was implemented widely making it
// such that it could not be fixed; the result was that the loop only
// needs to be run once and the first degree quad hashes will never change.
// 5.1-5.2 are skipped; first degree quad hashes are generated just once
// for all non-normalized blank nodes.
// 5.3) For each blank node identifier identifier in non-normalized
// identifiers:
self.whilst(() => simple, callback => {
// 5.1) Set simple to false.
simple = false; // 5.2) Clear hash to blank nodes map.
self.hashToBlankNodes = {};
self.waterfall([callback => {
// 5.3) For each blank node identifier identifier in
// non-normalized identifiers:
self.forEach(nonNormalized, (value, id, callback) => {
// 5.3.1) Create a hash, hash, according to the Hash First
// Degree Quads algorithm.
self.hashFirstDegreeQuads(id, (err, hash) => {
if (err) {
return callback(err);
} // 5.3.2) Add hash and identifier to hash to blank nodes map,
// creating a new entry if necessary.
const hashToBlankNodes = new Map();
const nonNormalized = [..._this.blankNodeInfo.keys()];
let i = 0;
for (const id of nonNormalized) {
// Note: batch hashing first degree quads 100 at a time
if (++i % 100 === 0) {
yield _this._yield();
} // steps 5.3.1 and 5.3.2:
if (hash in self.hashToBlankNodes) {
self.hashToBlankNodes[hash].push(id);
} else {
self.hashToBlankNodes[hash] = [id];
}
callback();
});
}, callback);
}, callback => {
// 5.4) For each hash to identifier list mapping in hash to blank
// nodes map, lexicographically-sorted by hash:
const hashes = Object.keys(self.hashToBlankNodes).sort();
self.forEach(hashes, (hash, i, callback) => {
// 5.4.1) If the length of identifier list is greater than 1,
// continue to the next mapping.
const idList = self.hashToBlankNodes[hash];
yield _this._hashAndTrackBlankNode({
id,
hashToBlankNodes
});
} // 5.4) For each hash to identifier list mapping in hash to blank
// nodes map, lexicographically-sorted by hash:
if (idList.length > 1) {
return callback();
} // 5.4.2) Use the Issue Identifier algorithm, passing canonical
// issuer and the single blank node identifier in identifier
// list, identifier, to issue a canonical replacement identifier
// for identifier.
// TODO: consider changing `getId` to `issue`
const hashes = [...hashToBlankNodes.keys()].sort(); // optimize away second sort, gather non-unique hashes in order as we go
const id = idList[0];
self.canonicalIssuer.getId(id); // 5.4.3) Remove identifier from non-normalized identifiers.
const nonUnique = [];
delete nonNormalized[id]; // 5.4.4) Remove hash from the hash to blank nodes map.
for (const hash of hashes) {
// 5.4.1) If the length of identifier list is greater than 1,
// continue to the next mapping.
const idList = hashToBlankNodes.get(hash);
delete self.hashToBlankNodes[hash]; // 5.4.5) Set simple to true.
if (idList.length > 1) {
nonUnique.push(idList);
continue;
} // 5.4.2) Use the Issue Identifier algorithm, passing canonical
// issuer and the single blank node identifier in identifier
// list, identifier, to issue a canonical replacement identifier
// for identifier.
simple = true;
callback();
}, callback);
}], callback);
}, callback);
}, callback => {
// 6) For each hash to identifier list mapping in hash to blank nodes
// map, lexicographically-sorted by hash:
const hashes = Object.keys(self.hashToBlankNodes).sort();
self.forEach(hashes, (hash, idx, callback) => {
const id = idList[0];
_this.canonicalIssuer.getId(id); // Note: These steps are skipped, optimized away since the loop
// only needs to be run once.
// 5.4.3) Remove identifier from non-normalized identifiers.
// 5.4.4) Remove hash from the hash to blank nodes map.
// 5.4.5) Set simple to true.
} // 6) For each hash to identifier list mapping in hash to blank nodes map,
// lexicographically-sorted by hash:
// Note: sort optimized away, use `nonUnique`.
for (const idList of nonUnique) {
// 6.1) Create hash path list where each item will be a result of

@@ -144,50 +130,39 @@ // running the Hash N-Degree Quads algorithm.

const idList = self.hashToBlankNodes[hash];
self.waterfall([callback => {
self.forEach(idList, (id, idx, callback) => {
// 6.2.1) If a canonical identifier has already been issued for
// identifier, continue to the next identifier.
if (self.canonicalIssuer.hasId(id)) {
return callback();
} // 6.2.2) Create temporary issuer, an identifier issuer
// initialized with the prefix _:b.
for (const id of idList) {
// 6.2.1) If a canonical identifier has already been issued for
// identifier, continue to the next identifier.
if (_this.canonicalIssuer.hasId(id)) {
continue;
} // 6.2.2) Create temporary issuer, an identifier issuer
// initialized with the prefix _:b.
const issuer = new IdentifierIssuer('_:b'); // 6.2.3) Use the Issue Identifier algorithm, passing temporary
// issuer and identifier, to issue a new temporary blank node
// identifier for identifier.
const issuer = new IdentifierIssuer('_:b'); // 6.2.3) Use the Issue Identifier algorithm, passing temporary
// issuer and identifier, to issue a new temporary blank node
// identifier for identifier.
issuer.getId(id); // 6.2.4) Run the Hash N-Degree Quads algorithm, passing
// temporary issuer, and append the result to the hash path
// list.
issuer.getId(id); // 6.2.4) Run the Hash N-Degree Quads algorithm, passing
// temporary issuer, and append the result to the hash path list.
self.hashNDegreeQuads(id, issuer, (err, result) => {
if (err) {
return callback(err);
}
const result = yield _this.hashNDegreeQuads(id, issuer);
hashPathList.push(result);
} // 6.3) For each result in the hash path list,
// lexicographically-sorted by the hash in result:
hashPathList.push(result);
callback();
});
}, callback);
}, callback => {
// 6.3) For each result in the hash path list,
// lexicographically-sorted by the hash in result:
// TODO: use `String.localeCompare`?
hashPathList.sort((a, b) => a.hash < b.hash ? -1 : a.hash > b.hash ? 1 : 0);
self.forEach(hashPathList, (result, idx, callback) => {
// 6.3.1) For each blank node identifier, existing identifier,
// that was issued a temporary identifier by identifier issuer
// in result, issue a canonical identifier, in the same order,
// using the Issue Identifier algorithm, passing canonical
// issuer and existing identifier.
for (const existing in result.issuer.existing) {
self.canonicalIssuer.getId(existing);
}
callback();
}, callback);
}], callback);
}, callback);
}, callback => {
hashPathList.sort(_stringHashCompare);
for (const result of hashPathList) {
// 6.3.1) For each blank node identifier, existing identifier,
// that was issued a temporary identifier by identifier issuer
// in result, issue a canonical identifier, in the same order,
// using the Issue Identifier algorithm, passing canonical
// issuer and existing identifier.
const oldIds = result.issuer.getOldIds();
for (const id of oldIds) {
_this.canonicalIssuer.getId(id);
}
}
}
/* Note: At this point all blank nodes in the set of RDF quads have been

@@ -198,64 +173,65 @@ assigned canonical identifiers, which have been stored in the canonical

// 7) For each quad, quad, in input dataset:
const normalized = [];
self.waterfall([callback => {
self.forEach(self.quads, (quad, idx, callback) => {
// 7.1) Create a copy, quad copy, of quad and replace any existing
// blank node identifiers using the canonical identifiers
// previously issued by canonical issuer.
// Note: We optimize away the copy here.
self.forEachComponent(quad, component => {
if (component.termType === 'BlankNode' && !component.value.startsWith(self.canonicalIssuer.prefix)) {
component.value = self.canonicalIssuer.getId(component.value);
}
}); // 7.2) Add quad copy to the normalized dataset.
normalized.push(NQuads.serializeQuad(quad));
callback();
}, callback);
}, callback => {
// sort normalized output
normalized.sort(); // 8) Return the normalized dataset.
for (const quad of _this.quads) {
// 7.1) Create a copy, quad copy, of quad and replace any existing
// blank node identifiers using the canonical identifiers
// previously issued by canonical issuer.
// Note: We optimize with shallow copies here.
const q = _objectSpread({}, quad);
result = normalized.join('');
return callback();
}], callback);
}], err => callback(err, result));
q.subject = _this._useCanonicalId({
component: q.subject
});
q.object = _this._useCanonicalId({
component: q.object
});
q.graph = _this._useCanonicalId({
component: q.graph
}); // 7.2) Add quad copy to the normalized dataset.
normalized.push(NQuads.serializeQuad(q));
} // sort normalized output
normalized.sort(); // 8) Return the normalized dataset.
return normalized.join('');
})();
} // 4.6) Hash First Degree Quads
hashFirstDegreeQuads(id, callback) {
const self = this; // return cached hash
hashFirstDegreeQuads(id) {
var _this2 = this;
const info = self.blankNodeInfo[id];
return _asyncToGenerator(function* () {
// 1) Initialize nquads to an empty list. It will be used to store quads in
// N-Quads format.
const nquads = []; // 2) Get the list of quads `quads` associated with the reference blank node
// identifier in the blank node to quads map.
if ('hash' in info) {
return callback(null, info.hash);
} // 1) Initialize nquads to an empty list. It will be used to store quads in
// N-Quads format.
const info = _this2.blankNodeInfo.get(id);
const quads = info.quads; // 3) For each quad `quad` in `quads`:
const nquads = []; // 2) Get the list of quads quads associated with the reference blank node
// identifier in the blank node to quads map.
for (const quad of quads) {
// 3.1) Serialize the quad in N-Quads format with the following special
// rule:
// 3.1.1) If any component in quad is an blank node, then serialize it
// using a special identifier as follows:
const copy = {
subject: null,
predicate: quad.predicate,
object: null,
graph: null
}; // 3.1.2) If the blank node's existing blank node identifier matches
// the reference blank node identifier then use the blank node
// identifier _:a, otherwise, use the blank node identifier _:z.
const quads = info.quads; // 3) For each quad quad in quads:
self.forEach(quads, (quad, idx, callback) => {
// 3.1) Serialize the quad in N-Quads format with the following special
// rule:
// 3.1.1) If any component in quad is an blank node, then serialize it
// using a special identifier as follows:
const copy = {
predicate: quad.predicate
};
self.forEachComponent(quad, (component, key) => {
// 3.1.2) If the blank node's existing blank node identifier matches the
// reference blank node identifier then use the blank node identifier
// _:a, otherwise, use the blank node identifier _:z.
copy[key] = self.modifyFirstDegreeComponent(id, component, key);
});
nquads.push(NQuads.serializeQuad(copy));
callback();
}, err => {
if (err) {
return callback(err);
copy.subject = _this2.modifyFirstDegreeComponent(id, quad.subject, 'subject');
copy.object = _this2.modifyFirstDegreeComponent(id, quad.object, 'object');
copy.graph = _this2.modifyFirstDegreeComponent(id, quad.graph, 'graph');
nquads.push(NQuads.serializeQuad(copy));
} // 4) Sort nquads in lexicographical order.

@@ -267,44 +243,30 @@

const md = new MessageDigest(self.hashAlgorithm);
const md = new MessageDigest(_this2.hashAlgorithm);
for (let i = 0; i < nquads.length; ++i) {
md.update(nquads[i]);
} // TODO: represent as byte buffer instead to cut memory usage in half
for (const nquad of nquads) {
md.update(nquad);
}
info.hash = md.digest();
callback(null, info.hash);
});
info.hash = yield md.digest();
return info.hash;
})();
} // 4.7) Hash Related Blank Node
hashRelatedBlankNode(related, quad, issuer, position, callback) {
const self = this; // 1) Set the identifier to use for related, preferring first the canonical
// identifier for related if issued, second the identifier issued by issuer
// if issued, and last, if necessary, the result of the Hash First Degree
// Quads algorithm, passing related.
hashRelatedBlankNode(related, quad, issuer, position) {
var _this3 = this;
let id;
self.waterfall([callback => {
if (self.canonicalIssuer.hasId(related)) {
id = self.canonicalIssuer.getId(related);
return callback();
}
return _asyncToGenerator(function* () {
// 1) Set the identifier to use for related, preferring first the canonical
// identifier for related if issued, second the identifier issued by issuer
// if issued, and last, if necessary, the result of the Hash First Degree
// Quads algorithm, passing related.
let id;
if (issuer.hasId(related)) {
if (_this3.canonicalIssuer.hasId(related)) {
id = _this3.canonicalIssuer.getId(related);
} else if (issuer.hasId(related)) {
id = issuer.getId(related);
return callback();
}
self.hashFirstDegreeQuads(related, (err, hash) => {
if (err) {
return callback(err);
}
id = hash;
callback();
});
}], err => {
if (err) {
return callback(err);
} else {
id = _this3.blankNodeInfo.get(related).hash;
} // 2) Initialize a string input to the value of position.

@@ -314,3 +276,3 @@ // Note: We use a hash object instead.

const md = new MessageDigest(self.hashAlgorithm);
const md = new MessageDigest(_this3.hashAlgorithm);
md.update(position); // 3) If position is not g, append <, the value of the predicate in quad,

@@ -320,3 +282,3 @@ // and > to input.

if (position !== 'g') {
md.update(self.getRelatedPredicate(quad));
md.update(_this3.getRelatedPredicate(quad));
} // 4) Append identifier to input.

@@ -327,30 +289,24 @@

// algorithm.
// TODO: represent as byte buffer instead to cut memory usage in half
return callback(null, md.digest());
});
return md.digest();
})();
} // 4.8) Hash N-Degree Quads
hashNDegreeQuads(id, issuer, callback) {
const self = this; // 1) Create a hash to related blank nodes map for storing hashes that
// identify related blank nodes.
// Note: 2) and 3) handled within `createHashToRelated`
hashNDegreeQuads(id, issuer) {
var _this4 = this;
let hashToRelated;
const md = new MessageDigest(self.hashAlgorithm);
self.waterfall([callback => self.createHashToRelated(id, issuer, (err, result) => {
if (err) {
return callback(err);
}
return _asyncToGenerator(function* () {
// 1) Create a hash to related blank nodes map for storing hashes that
// identify related blank nodes.
// Note: 2) and 3) handled within `createHashToRelated`
const md = new MessageDigest(_this4.hashAlgorithm);
const hashToRelated = yield _this4.createHashToRelated(id, issuer); // 4) Create an empty string, data to hash.
// Note: We created a hash object `md` above instead.
// 5) For each related hash to blank node list mapping in hash to related
// blank nodes map, sorted lexicographically by related hash:
hashToRelated = result;
callback();
}), callback => {
// 4) Create an empty string, data to hash.
// Note: We created a hash object `md` above instead.
// 5) For each related hash to blank node list mapping in hash to
// related blank nodes map, sorted lexicographically by related hash:
const hashes = Object.keys(hashToRelated).sort();
self.forEach(hashes, (hash, idx, callback) => {
const hashes = [...hashToRelated.keys()].sort();
for (const hash of hashes) {
// 5.1) Append the related hash to the data to hash.

@@ -363,6 +319,13 @@ md.update(hash); // 5.2) Create a string chosen path.

const permutator = new Permutator(hashToRelated[hash]);
self.whilst(() => permutator.hasNext(), nextPermutation => {
const permutation = permutator.next(); // 5.4.1) Create a copy of issuer, issuer copy.
const permuter = new Permuter(hashToRelated.get(hash));
let i = 0;
while (permuter.hasNext()) {
const permutation = permuter.next(); // Note: batch permutations 3 at a time
if (++i % 3 === 0) {
yield _this4._yield();
} // 5.4.1) Create a copy of issuer, issuer copy.
let issuerCopy = issuer.clone(); // 5.4.2) Create a string path.

@@ -373,100 +336,92 @@

const recursionList = [];
self.waterfall([callback => {
// 5.4.4) For each related in permutation:
self.forEach(permutation, (related, idx, callback) => {
// 5.4.4.1) If a canonical identifier has been issued for
// related, append it to path.
if (self.canonicalIssuer.hasId(related)) {
path += self.canonicalIssuer.getId(related);
} else {
// 5.4.4.2) Otherwise:
// 5.4.4.2.1) If issuer copy has not issued an identifier
// for related, append related to recursion list.
if (!issuerCopy.hasId(related)) {
recursionList.push(related);
} // 5.4.4.2.2) Use the Issue Identifier algorithm, passing
// issuer copy and related and append the result to path.
const recursionList = []; // 5.4.4) For each related in permutation:
let nextPermutation = false;
path += issuerCopy.getId(related);
} // 5.4.4.3) If chosen path is not empty and the length of path
// is greater than or equal to the length of chosen path and
// path is lexicographically greater than chosen path, then
// skip to the next permutation.
// Note: Comparing path length to chosen path length can be
// optimized away; only compare lexicographically.
for (const related of permutation) {
// 5.4.4.1) If a canonical identifier has been issued for
// related, append it to path.
if (_this4.canonicalIssuer.hasId(related)) {
path += _this4.canonicalIssuer.getId(related);
} else {
// 5.4.4.2) Otherwise:
// 5.4.4.2.1) If issuer copy has not issued an identifier for
// related, append related to recursion list.
if (!issuerCopy.hasId(related)) {
recursionList.push(related);
} // 5.4.4.2.2) Use the Issue Identifier algorithm, passing
// issuer copy and related and append the result to path.
if (chosenPath.length !== 0 && path > chosenPath) {
// FIXME: may cause inaccurate total depth calculation
return nextPermutation();
}
path += issuerCopy.getId(related);
} // 5.4.4.3) If chosen path is not empty and the length of path
// is greater than or equal to the length of chosen path and
// path is lexicographically greater than chosen path, then
// skip to the next permutation.
// Note: Comparing path length to chosen path length can be optimized
// away; only compare lexicographically.
callback();
}, callback);
}, callback => {
// 5.4.5) For each related in recursion list:
self.forEach(recursionList, (related, idx, callback) => {
// 5.4.5.1) Set result to the result of recursively executing
// the Hash N-Degree Quads algorithm, passing related for
// identifier and issuer copy for path identifier issuer.
self.hashNDegreeQuads(related, issuerCopy, (err, result) => {
if (err) {
return callback(err);
} // 5.4.5.2) Use the Issue Identifier algorithm, passing
// issuer copy and related and append the result to path.
if (chosenPath.length !== 0 && path > chosenPath) {
nextPermutation = true;
break;
}
}
path += issuerCopy.getId(related); // 5.4.5.3) Append <, the hash in result, and > to path.
if (nextPermutation) {
continue;
} // 5.4.5) For each related in recursion list:
path += '<' + result.hash + '>'; // 5.4.5.4) Set issuer copy to the identifier issuer in
// result.
issuerCopy = result.issuer; // 5.4.5.5) If chosen path is not empty and the length of
// path is greater than or equal to the length of chosen
// path and path is lexicographically greater than chosen
// path, then skip to the next permutation.
// Note: Comparing path length to chosen path length can be
// optimized away; only compare lexicographically.
for (const related of recursionList) {
// 5.4.5.1) Set result to the result of recursively executing
// the Hash N-Degree Quads algorithm, passing related for
// identifier and issuer copy for path identifier issuer.
const result = yield _this4.hashNDegreeQuads(related, issuerCopy); // 5.4.5.2) Use the Issue Identifier algorithm, passing issuer
// copy and related and append the result to path.
if (chosenPath.length !== 0 && path > chosenPath) {
// FIXME: may cause inaccurate total depth calculation
return nextPermutation();
}
path += issuerCopy.getId(related); // 5.4.5.3) Append <, the hash in result, and > to path.
callback();
});
}, callback);
}, callback => {
// 5.4.6) If chosen path is empty or path is lexicographically
// less than chosen path, set chosen path to path and chosen
// issuer to issuer copy.
if (chosenPath.length === 0 || path < chosenPath) {
chosenPath = path;
chosenIssuer = issuerCopy;
path += `<${result.hash}>`; // 5.4.5.4) Set issuer copy to the identifier issuer in
// result.
issuerCopy = result.issuer; // 5.4.5.5) If chosen path is not empty and the length of path
// is greater than or equal to the length of chosen path and
// path is lexicographically greater than chosen path, then
// skip to the next permutation.
// Note: Comparing path length to chosen path length can be optimized
// away; only compare lexicographically.
if (chosenPath.length !== 0 && path > chosenPath) {
nextPermutation = true;
break;
}
}
callback();
}], nextPermutation);
}, err => {
if (err) {
return callback(err);
} // 5.5) Append chosen path to data to hash.
if (nextPermutation) {
continue;
} // 5.4.6) If chosen path is empty or path is lexicographically
// less than chosen path, set chosen path to path and chosen
// issuer to issuer copy.
md.update(chosenPath); // 5.6) Replace issuer, by reference, with chosen issuer.
if (chosenPath.length === 0 || path < chosenPath) {
chosenPath = path;
chosenIssuer = issuerCopy;
}
} // 5.5) Append chosen path to data to hash.
issuer = chosenIssuer;
callback();
});
}, callback);
}], err => {
// 6) Return issuer and the hash that results from passing data to hash
md.update(chosenPath); // 5.6) Replace issuer, by reference, with chosen issuer.
issuer = chosenIssuer;
} // 6) Return issuer and the hash that results from passing data to hash
// through the hash algorithm.
callback(err, {
hash: md.digest(),
return {
hash: yield md.digest(),
issuer
});
});
};
})();
} // helper for modifying component during Hash First Degree Quads

@@ -479,6 +434,13 @@

}
/* Note: A mistake in the URDNA2015 spec that made its way into
implementations (and therefore must stay to avoid interop breakage)
resulted in an assigned canonical ID, if available for
`component.value`, not being used in place of `_:a`/`_:z`, so
we don't use it here. */
component = util.clone(component);
component.value = component.value === id ? '_:a' : '_:z';
return component;
return {
termType: 'BlankNode',
value: component.value === id ? '_:a' : '_:z'
};
} // helper for getting a related predicate

@@ -488,63 +450,159 @@

getRelatedPredicate(quad) {
return '<' + quad.predicate.value + '>';
return `<${quad.predicate.value}>`;
} // helper for creating hash to related blank nodes map
createHashToRelated(id, issuer, callback) {
const self = this; // 1) Create a hash to related blank nodes map for storing hashes that
// identify related blank nodes.
createHashToRelated(id, issuer) {
var _this5 = this;
const hashToRelated = {}; // 2) Get a reference, quads, to the list of quads in the blank node to
// quads map for the key identifier.
return _asyncToGenerator(function* () {
// 1) Create a hash to related blank nodes map for storing hashes that
// identify related blank nodes.
const hashToRelated = new Map(); // 2) Get a reference, quads, to the list of quads in the blank node to
// quads map for the key identifier.
const quads = self.blankNodeInfo[id].quads; // 3) For each quad in quads:
const quads = _this5.blankNodeInfo.get(id).quads; // 3) For each quad in quads:
self.forEach(quads, (quad, idx, callback) => {
// 3.1) For each component in quad, if component is the subject, object,
// and graph name and it is a blank node that is not identified by
// identifier:
self.forEach(quad, (component, key, callback) => {
if (key === 'predicate' || !(component.termType === 'BlankNode' && component.value !== id)) {
return callback();
} // 3.1.1) Set hash to the result of the Hash Related Blank Node
// algorithm, passing the blank node identifier for component as
// related, quad, path identifier issuer as issuer, and position as
// either s, o, or g based on whether component is a subject, object,
// graph name, respectively.
let i = 0;
const related = component.value;
const position = POSITIONS[key];
self.hashRelatedBlankNode(related, quad, issuer, position, (err, hash) => {
if (err) {
return callback(err);
} // 3.1.2) Add a mapping of hash to the blank node identifier for
// component to hash to related blank nodes map, adding an entry as
// necessary.
for (const quad of quads) {
// Note: batch hashing related blank node quads 100 at a time
if (++i % 100 === 0) {
yield _this5._yield();
} // 3.1) For each component in quad, if component is the subject, object,
// and graph name and it is a blank node that is not identified by
// identifier:
// steps 3.1.1 and 3.1.2 occur in helpers:
if (hash in hashToRelated) {
hashToRelated[hash].push(related);
} else {
hashToRelated[hash] = [related];
}
yield Promise.all([_this5._addRelatedBlankNodeHash({
quad,
component: quad.subject,
position: 's',
id,
issuer,
hashToRelated
}), _this5._addRelatedBlankNodeHash({
quad,
component: quad.object,
position: 'o',
id,
issuer,
hashToRelated
}), _this5._addRelatedBlankNodeHash({
quad,
component: quad.graph,
position: 'g',
id,
issuer,
hashToRelated
})]);
}
callback();
});
}, callback);
}, err => callback(err, hashToRelated));
} // helper that iterates over quad components (skips predicate)
return hashToRelated;
})();
}
_hashAndTrackBlankNode({
id,
hashToBlankNodes
}) {
var _this6 = this;
forEachComponent(quad, op) {
for (const key in quad) {
// skip `predicate`
if (key === 'predicate') {
continue;
return _asyncToGenerator(function* () {
// 5.3.1) Create a hash, hash, according to the Hash First Degree
// Quads algorithm.
const hash = yield _this6.hashFirstDegreeQuads(id); // 5.3.2) Add hash and identifier to hash to blank nodes map,
// creating a new entry if necessary.
const idList = hashToBlankNodes.get(hash);
if (!idList) {
hashToBlankNodes.set(hash, [id]);
} else {
idList.push(id);
}
})();
}
op(quad[key], key, quad);
_addBlankNodeQuadInfo({
quad,
component
}) {
if (component.termType !== 'BlankNode') {
return;
}
const id = component.value;
const info = this.blankNodeInfo.get(id);
if (info) {
info.quads.add(quad);
} else {
this.blankNodeInfo.set(id, {
quads: new Set([quad]),
hash: null
});
}
}
};
_addRelatedBlankNodeHash({
quad,
component,
position,
id,
issuer,
hashToRelated
}) {
var _this7 = this;
return _asyncToGenerator(function* () {
if (!(component.termType === 'BlankNode' && component.value !== id)) {
return;
} // 3.1.1) Set hash to the result of the Hash Related Blank Node
// algorithm, passing the blank node identifier for component as
// related, quad, path identifier issuer as issuer, and position as
// either s, o, or g based on whether component is a subject, object,
// graph name, respectively.
const related = component.value;
const hash = yield _this7.hashRelatedBlankNode(related, quad, issuer, position); // 3.1.2) Add a mapping of hash to the blank node identifier for
// component to hash to related blank nodes map, adding an entry as
// necessary.
const entries = hashToRelated.get(hash);
if (entries) {
entries.push(related);
} else {
hashToRelated.set(hash, [related]);
}
})();
}
_useCanonicalId({
component
}) {
if (component.termType === 'BlankNode' && !component.value.startsWith(this.canonicalIssuer.prefix)) {
return {
termType: 'BlankNode',
value: this.canonicalIssuer.getId(component.value)
};
}
return component;
}
_yield() {
return _asyncToGenerator(function* () {
return new Promise(resolve => setImmediate(resolve));
})();
}
};
function _stringHashCompare(a, b) {
return a.hash < b.hash ? -1 : a.hash > b.hash ? 1 : 0;
}
/*
* Copyright (c) 2016 Digital Bazaar, Inc. All rights reserved.
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved.
*/
'use strict';
function ownKeys(object, enumerableOnly) { var keys = Object.keys(object); if (Object.getOwnPropertySymbols) { var symbols = Object.getOwnPropertySymbols(object); if (enumerableOnly) symbols = symbols.filter(function (sym) { return Object.getOwnPropertyDescriptor(object, sym).enumerable; }); keys.push.apply(keys, symbols); } return keys; }
function _objectSpread(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { ownKeys(Object(source), true).forEach(function (key) { _defineProperty(target, key, source[key]); }); } else if (Object.getOwnPropertyDescriptors) { Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)); } else { ownKeys(Object(source)).forEach(function (key) { Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key)); }); } } return target; }
function _defineProperty(obj, key, value) { if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; }
const IdentifierIssuer = require('./IdentifierIssuer');

@@ -10,21 +16,13 @@

const Permutator = require('./Permutator');
const Permuter = require('./Permuter');
const NQuads = require('./NQuads');
const util = require('./util');
const POSITIONS = {
subject: 's',
object: 'o',
graph: 'g'
};
module.exports = class URDNA2015Sync {
constructor() {
this.name = 'URDNA2015';
this.blankNodeInfo = {};
this.hashToBlankNodes = {};
this.blankNodeInfo = new Map();
this.canonicalIssuer = new IdentifierIssuer('_:c14n');
this.hashAlgorithm = 'sha256';
this.quads;
this.quads = null;
} // 4.4) Normalization Algorithm

@@ -34,8 +32,5 @@

main(dataset) {
const self = this;
self.quads = dataset; // 1) Create the normalization state.
// Note: Optimize by generating non-normalized blank node map concurrently.
this.quads = dataset; // 1) Create the normalization state.
// 2) For every quad in input dataset:
const nonNormalized = {}; // 2) For every quad in input dataset:
for (const quad of dataset) {

@@ -45,17 +40,15 @@ // 2.1) For each blank node that occurs in the quad, add a reference

// quads map, creating a new entry if necessary.
self.forEachComponent(quad, component => {
if (component.termType !== 'BlankNode') {
return;
}
this._addBlankNodeQuadInfo({
quad,
component: quad.subject
});
const id = component.value;
this._addBlankNodeQuadInfo({
quad,
component: quad.object
});
if (id in self.blankNodeInfo) {
self.blankNodeInfo[id].quads.push(quad);
} else {
nonNormalized[id] = true;
self.blankNodeInfo[id] = {
quads: [quad]
};
}
this._addBlankNodeQuadInfo({
quad,
component: quad.graph
});

@@ -66,62 +59,56 @@ } // 3) Create a list of non-normalized blank node identifiers

// Note: We use a map here and it was generated during step 2.
// 4) Initialize simple, a boolean flag, to true.
// 4) `simple` flag is skipped -- loop is optimized away. This optimization
// is permitted because there was a typo in the hash first degree quads
// algorithm in the URDNA2015 spec that was implemented widely making it
// such that it could not be fixed; the result was that the loop only
// needs to be run once and the first degree quad hashes will never change.
// 5.1-5.2 are skipped; first degree quad hashes are generated just once
// for all non-normalized blank nodes.
// 5.3) For each blank node identifier identifier in non-normalized
// identifiers:
let simple = true; // 5) While simple is true, issue canonical identifiers for blank nodes:
const hashToBlankNodes = new Map();
const nonNormalized = [...this.blankNodeInfo.keys()];
while (simple) {
// 5.1) Set simple to false.
simple = false; // 5.2) Clear hash to blank nodes map.
for (const id of nonNormalized) {
// steps 5.3.1 and 5.3.2:
this._hashAndTrackBlankNode({
id,
hashToBlankNodes
});
} // 5.4) For each hash to identifier list mapping in hash to blank
// nodes map, lexicographically-sorted by hash:
self.hashToBlankNodes = {}; // 5.3) For each blank node identifier identifier in non-normalized
// identifiers:
for (const id in nonNormalized) {
// 5.3.1) Create a hash, hash, according to the Hash First Degree
// Quads algorithm.
const hash = self.hashFirstDegreeQuads(id); // 5.3.2) Add hash and identifier to hash to blank nodes map,
// creating a new entry if necessary.
const hashes = [...hashToBlankNodes.keys()].sort(); // optimize away second sort, gather non-unique hashes in order as we go
if (hash in self.hashToBlankNodes) {
self.hashToBlankNodes[hash].push(id);
} else {
self.hashToBlankNodes[hash] = [id];
}
} // 5.4) For each hash to identifier list mapping in hash to blank
// nodes map, lexicographically-sorted by hash:
const nonUnique = [];
for (const hash of hashes) {
// 5.4.1) If the length of identifier list is greater than 1,
// continue to the next mapping.
const idList = hashToBlankNodes.get(hash);
const hashes = Object.keys(self.hashToBlankNodes).sort();
if (idList.length > 1) {
nonUnique.push(idList);
continue;
} // 5.4.2) Use the Issue Identifier algorithm, passing canonical
// issuer and the single blank node identifier in identifier
// list, identifier, to issue a canonical replacement identifier
// for identifier.
for (let i = 0; i < hashes.length; ++i) {
// 5.4.1) If the length of identifier list is greater than 1,
// continue to the next mapping.
const hash = hashes[i];
const idList = self.hashToBlankNodes[hash];
if (idList.length > 1) {
continue;
} // 5.4.2) Use the Issue Identifier algorithm, passing canonical
// issuer and the single blank node identifier in identifier
// list, identifier, to issue a canonical replacement identifier
// for identifier.
// TODO: consider changing `getId` to `issue`
const id = idList[0];
self.canonicalIssuer.getId(id); // 5.4.3) Remove identifier from non-normalized identifiers.
delete nonNormalized[id]; // 5.4.4) Remove hash from the hash to blank nodes map.
delete self.hashToBlankNodes[hash]; // 5.4.5) Set simple to true.
simple = true;
}
const id = idList[0];
this.canonicalIssuer.getId(id); // Note: These steps are skipped, optimized away since the loop
// only needs to be run once.
// 5.4.3) Remove identifier from non-normalized identifiers.
// 5.4.4) Remove hash from the hash to blank nodes map.
// 5.4.5) Set simple to true.
} // 6) For each hash to identifier list mapping in hash to blank nodes map,
// lexicographically-sorted by hash:
// Note: sort optimized away, use `nonUnique`.
const hashes = Object.keys(self.hashToBlankNodes).sort();
for (let i = 0; i < hashes.length; ++i) {
for (const idList of nonUnique) {
// 6.1) Create hash path list where each item will be a result of

@@ -131,11 +118,6 @@ // running the Hash N-Degree Quads algorithm.

const hash = hashes[i];
const idList = self.hashToBlankNodes[hash];
for (let j = 0; j < idList.length; ++j) {
for (const id of idList) {
// 6.2.1) If a canonical identifier has already been issued for
// identifier, continue to the next identifier.
const id = idList[j];
if (self.canonicalIssuer.hasId(id)) {
if (this.canonicalIssuer.hasId(id)) {
continue;

@@ -153,12 +135,11 @@ } // 6.2.2) Create temporary issuer, an identifier issuer

const result = self.hashNDegreeQuads(id, issuer);
const result = this.hashNDegreeQuads(id, issuer);
hashPathList.push(result);
} // 6.3) For each result in the hash path list,
// lexicographically-sorted by the hash in result:
// TODO: use `String.localeCompare`?
hashPathList.sort((a, b) => a.hash < b.hash ? -1 : a.hash > b.hash ? 1 : 0);
hashPathList.sort(_stringHashCompare);
for (let j = 0; j < hashPathList.length; ++j) {
for (const result of hashPathList) {
// 6.3.1) For each blank node identifier, existing identifier,

@@ -169,6 +150,6 @@ // that was issued a temporary identifier by identifier issuer

// issuer and existing identifier.
const result = hashPathList[j];
const oldIds = result.issuer.getOldIds();
for (const existing in result.issuer.existing) {
self.canonicalIssuer.getId(existing);
for (const id of oldIds) {
this.canonicalIssuer.getId(id);
}

@@ -186,15 +167,20 @@ }

for (let i = 0; i < self.quads.length; ++i) {
for (const quad of this.quads) {
// 7.1) Create a copy, quad copy, of quad and replace any existing
// blank node identifiers using the canonical identifiers
// previously issued by canonical issuer.
// Note: We optimize away the copy here.
const quad = self.quads[i];
self.forEachComponent(quad, component => {
if (component.termType === 'BlankNode' && !component.value.startsWith(self.canonicalIssuer.prefix)) {
component.value = self.canonicalIssuer.getId(component.value);
}
// Note: We optimize with shallow copies here.
const q = _objectSpread({}, quad);
q.subject = this._useCanonicalId({
component: q.subject
});
q.object = this._useCanonicalId({
component: q.object
});
q.graph = this._useCanonicalId({
component: q.graph
}); // 7.2) Add quad copy to the normalized dataset.
normalized.push(NQuads.serializeQuad(quad));
normalized.push(NQuads.serializeQuad(q));
} // sort normalized output

@@ -210,32 +196,27 @@

hashFirstDegreeQuads(id) {
const self = this; // return cached hash
const info = self.blankNodeInfo[id];
if ('hash' in info) {
return info.hash;
} // 1) Initialize nquads to an empty list. It will be used to store quads in
// 1) Initialize nquads to an empty list. It will be used to store quads in
// N-Quads format.
const nquads = []; // 2) Get the list of quads `quads` associated with the reference blank node
// identifier in the blank node to quads map.
const info = this.blankNodeInfo.get(id);
const quads = info.quads; // 3) For each quad `quad` in `quads`:
for (let i = 0; i < quads.length; ++i) {
const quad = quads[i]; // 3.1) Serialize the quad in N-Quads format with the following special
for (const quad of quads) {
// 3.1) Serialize the quad in N-Quads format with the following special
// rule:
// 3.1.1) If any component in quad is an blank node, then serialize it
// using a special identifier as follows:
const copy = {
subject: null,
predicate: quad.predicate,
object: null,
graph: null
}; // 3.1.2) If the blank node's existing blank node identifier matches
// the reference blank node identifier then use the blank node
// identifier _:a, otherwise, use the blank node identifier _:z.
const copy = {
predicate: quad.predicate
};
self.forEachComponent(quad, (component, key) => {
// 3.1.2) If the blank node's existing blank node identifier matches
// the reference blank node identifier then use the blank node
// identifier _:a, otherwise, use the blank node identifier _:z.
copy[key] = self.modifyFirstDegreeComponent(id, component, key);
});
copy.subject = this.modifyFirstDegreeComponent(id, quad.subject, 'subject');
copy.object = this.modifyFirstDegreeComponent(id, quad.object, 'object');
copy.graph = this.modifyFirstDegreeComponent(id, quad.graph, 'graph');
nquads.push(NQuads.serializeQuad(copy));

@@ -248,9 +229,8 @@ } // 4) Sort nquads in lexicographical order.

const md = new MessageDigest(self.hashAlgorithm);
const md = new MessageDigest(this.hashAlgorithm);
for (let i = 0; i < nquads.length; ++i) {
md.update(nquads[i]);
} // TODO: represent as byte buffer instead to cut memory usage in half
for (const nquad of nquads) {
md.update(nquad);
}
info.hash = md.digest();

@@ -262,15 +242,14 @@ return info.hash;

hashRelatedBlankNode(related, quad, issuer, position) {
const self = this; // 1) Set the identifier to use for related, preferring first the canonical
// 1) Set the identifier to use for related, preferring first the canonical
// identifier for related if issued, second the identifier issued by issuer
// if issued, and last, if necessary, the result of the Hash First Degree
// Quads algorithm, passing related.
let id;
if (self.canonicalIssuer.hasId(related)) {
id = self.canonicalIssuer.getId(related);
if (this.canonicalIssuer.hasId(related)) {
id = this.canonicalIssuer.getId(related);
} else if (issuer.hasId(related)) {
id = issuer.getId(related);
} else {
id = self.hashFirstDegreeQuads(related);
id = this.blankNodeInfo.get(related).hash;
} // 2) Initialize a string input to the value of position.

@@ -280,3 +259,3 @@ // Note: We use a hash object instead.

const md = new MessageDigest(self.hashAlgorithm);
const md = new MessageDigest(this.hashAlgorithm);
md.update(position); // 3) If position is not g, append <, the value of the predicate in quad,

@@ -286,3 +265,3 @@ // and > to input.

if (position !== 'g') {
md.update(self.getRelatedPredicate(quad));
md.update(this.getRelatedPredicate(quad));
} // 4) Append identifier to input.

@@ -293,3 +272,2 @@

// algorithm.
// TODO: represent as byte buffer instead to cut memory usage in half

@@ -301,8 +279,7 @@ return md.digest();

hashNDegreeQuads(id, issuer) {
const self = this; // 1) Create a hash to related blank nodes map for storing hashes that
// 1) Create a hash to related blank nodes map for storing hashes that
// identify related blank nodes.
// Note: 2) and 3) handled within `createHashToRelated`
const md = new MessageDigest(self.hashAlgorithm);
const hashToRelated = self.createHashToRelated(id, issuer); // 4) Create an empty string, data to hash.
const md = new MessageDigest(this.hashAlgorithm);
const hashToRelated = this.createHashToRelated(id, issuer); // 4) Create an empty string, data to hash.
// Note: We created a hash object `md` above instead.

@@ -312,7 +289,6 @@ // 5) For each related hash to blank node list mapping in hash to related

const hashes = Object.keys(hashToRelated).sort();
const hashes = [...hashToRelated.keys()].sort();
for (let i = 0; i < hashes.length; ++i) {
for (const hash of hashes) {
// 5.1) Append the related hash to the data to hash.
const hash = hashes[i];
md.update(hash); // 5.2) Create a string chosen path.

@@ -324,6 +300,6 @@

const permutator = new Permutator(hashToRelated[hash]);
const permuter = new Permuter(hashToRelated.get(hash));
while (permutator.hasNext()) {
const permutation = permutator.next(); // 5.4.1) Create a copy of issuer, issuer copy.
while (permuter.hasNext()) {
const permutation = permuter.next(); // 5.4.1) Create a copy of issuer, issuer copy.

@@ -339,9 +315,7 @@ let issuerCopy = issuer.clone(); // 5.4.2) Create a string path.

for (let j = 0; j < permutation.length; ++j) {
for (const related of permutation) {
// 5.4.4.1) If a canonical identifier has been issued for
// related, append it to path.
const related = permutation[j];
if (self.canonicalIssuer.hasId(related)) {
path += self.canonicalIssuer.getId(related);
if (this.canonicalIssuer.hasId(related)) {
path += this.canonicalIssuer.getId(related);
} else {

@@ -377,8 +351,7 @@ // 5.4.4.2) Otherwise:

for (let j = 0; j < recursionList.length; ++j) {
for (const related of recursionList) {
// 5.4.5.1) Set result to the result of recursively executing
// the Hash N-Degree Quads algorithm, passing related for
// identifier and issuer copy for path identifier issuer.
const related = recursionList[j];
const result = self.hashNDegreeQuads(related, issuerCopy); // 5.4.5.2) Use the Issue Identifier algorithm, passing issuer
const result = this.hashNDegreeQuads(related, issuerCopy); // 5.4.5.2) Use the Issue Identifier algorithm, passing issuer
// copy and related and append the result to path.

@@ -388,3 +361,3 @@

path += '<' + result.hash + '>'; // 5.4.5.4) Set issuer copy to the identifier issuer in
path += `<${result.hash}>`; // 5.4.5.4) Set issuer copy to the identifier issuer in
// result.

@@ -437,6 +410,13 @@

}
/* Note: A mistake in the URDNA2015 spec that made its way into
implementations (and therefore must stay to avoid interop breakage)
resulted in an assigned canonical ID, if available for
`component.value`, not being used in place of `_:a`/`_:z`, so
we don't use it here. */
component = util.clone(component);
component.value = component.value === id ? '_:a' : '_:z';
return component;
return {
termType: 'BlankNode',
value: component.value === id ? '_:a' : '_:z'
};
} // helper for getting a related predicate

@@ -446,3 +426,3 @@

getRelatedPredicate(quad) {
return '<' + quad.predicate.value + '>';
return `<${quad.predicate.value}>`;
} // helper for creating hash to related blank nodes map

@@ -452,57 +432,132 @@

createHashToRelated(id, issuer) {
const self = this; // 1) Create a hash to related blank nodes map for storing hashes that
// 1) Create a hash to related blank nodes map for storing hashes that
// identify related blank nodes.
const hashToRelated = {}; // 2) Get a reference, quads, to the list of quads in the blank node to
const hashToRelated = new Map(); // 2) Get a reference, quads, to the list of quads in the blank node to
// quads map for the key identifier.
const quads = self.blankNodeInfo[id].quads; // 3) For each quad in quads:
const quads = this.blankNodeInfo.get(id).quads; // 3) For each quad in quads:
for (let i = 0; i < quads.length; ++i) {
for (const quad of quads) {
// 3.1) For each component in quad, if component is the subject, object,
// and graph name and it is a blank node that is not identified by
// or graph name and it is a blank node that is not identified by
// identifier:
const quad = quads[i];
// steps 3.1.1 and 3.1.2 occur in helpers:
this._addRelatedBlankNodeHash({
quad,
component: quad.subject,
position: 's',
id,
issuer,
hashToRelated
});
for (const key in quad) {
const component = quad[key];
this._addRelatedBlankNodeHash({
quad,
component: quad.object,
position: 'o',
id,
issuer,
hashToRelated
});
if (key === 'predicate' || !(component.termType === 'BlankNode' && component.value !== id)) {
continue;
} // 3.1.1) Set hash to the result of the Hash Related Blank Node
// algorithm, passing the blank node identifier for component as
// related, quad, path identifier issuer as issuer, and position as
// either s, o, or g based on whether component is a subject, object,
// graph name, respectively.
this._addRelatedBlankNodeHash({
quad,
component: quad.graph,
position: 'g',
id,
issuer,
hashToRelated
});
}
return hashToRelated;
}
const related = component.value;
const position = POSITIONS[key];
const hash = self.hashRelatedBlankNode(related, quad, issuer, position); // 3.1.2) Add a mapping of hash to the blank node identifier for
// component to hash to related blank nodes map, adding an entry as
// necessary.
_hashAndTrackBlankNode({
id,
hashToBlankNodes
}) {
// 5.3.1) Create a hash, hash, according to the Hash First Degree
// Quads algorithm.
const hash = this.hashFirstDegreeQuads(id); // 5.3.2) Add hash and identifier to hash to blank nodes map,
// creating a new entry if necessary.
if (hash in hashToRelated) {
hashToRelated[hash].push(related);
} else {
hashToRelated[hash] = [related];
}
}
const idList = hashToBlankNodes.get(hash);
if (!idList) {
hashToBlankNodes.set(hash, [id]);
} else {
idList.push(id);
}
}
return hashToRelated;
} // helper that iterates over quad components (skips predicate)
_addBlankNodeQuadInfo({
quad,
component
}) {
if (component.termType !== 'BlankNode') {
return;
}
const id = component.value;
const info = this.blankNodeInfo.get(id);
forEachComponent(quad, op) {
for (const key in quad) {
// skip `predicate`
if (key === 'predicate') {
continue;
}
if (info) {
info.quads.add(quad);
} else {
this.blankNodeInfo.set(id, {
quads: new Set([quad]),
hash: null
});
}
}
op(quad[key], key, quad);
_addRelatedBlankNodeHash({
quad,
component,
position,
id,
issuer,
hashToRelated
}) {
if (!(component.termType === 'BlankNode' && component.value !== id)) {
return;
} // 3.1.1) Set hash to the result of the Hash Related Blank Node
// algorithm, passing the blank node identifier for component as
// related, quad, path identifier issuer as issuer, and position as
// either s, o, or g based on whether component is a subject, object,
// graph name, respectively.
const related = component.value;
const hash = this.hashRelatedBlankNode(related, quad, issuer, position); // 3.1.2) Add a mapping of hash to the blank node identifier for
// component to hash to related blank nodes map, adding an entry as
// necessary.
const entries = hashToRelated.get(hash);
if (entries) {
entries.push(related);
} else {
hashToRelated.set(hash, [related]);
}
}
};
_useCanonicalId({
component
}) {
if (component.termType === 'BlankNode' && !component.value.startsWith(this.canonicalIssuer.prefix)) {
return {
termType: 'BlankNode',
value: this.canonicalIssuer.getId(component.value)
};
}
return component;
}
};
function _stringHashCompare(a, b) {
return a.hash < b.hash ? -1 : a.hash > b.hash ? 1 : 0;
}
/*
* Copyright (c) 2016-2017 Digital Bazaar, Inc. All rights reserved.
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved.
*/
'use strict';
function asyncGeneratorStep(gen, resolve, reject, _next, _throw, key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { Promise.resolve(value).then(_next, _throw); } }
function _asyncToGenerator(fn) { return function () { var self = this, args = arguments; return new Promise(function (resolve, reject) { var gen = fn.apply(self, args); function _next(value) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "next", value); } function _throw(err) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "throw", err); } _next(undefined); }); }; }
const URDNA2015 = require('./URDNA2015');
const util = require('./util');
module.exports = class URDNA2012 extends URDNA2015 {
constructor(options) {
super(options);
constructor() {
super();
this.name = 'URGNA2012';

@@ -23,11 +25,13 @@ this.hashAlgorithm = 'sha1';

component = util.clone(component);
if (key === 'name') {
component.value = '_:g';
} else {
component.value = component.value === id ? '_:a' : '_:z';
if (key === 'graph') {
return {
termType: 'BlankNode',
value: '_:g'
};
}
return component;
return {
termType: 'BlankNode',
value: component.value === id ? '_:a' : '_:z'
};
} // helper for getting a related predicate

@@ -41,53 +45,61 @@

createHashToRelated(id, issuer, callback) {
const self = this; // 1) Create a hash to related blank nodes map for storing hashes that
// identify related blank nodes.
createHashToRelated(id, issuer) {
var _this = this;
const hashToRelated = {}; // 2) Get a reference, quads, to the list of quads in the blank node to
// quads map for the key identifier.
return _asyncToGenerator(function* () {
// 1) Create a hash to related blank nodes map for storing hashes that
// identify related blank nodes.
const hashToRelated = new Map(); // 2) Get a reference, quads, to the list of quads in the blank node to
// quads map for the key identifier.
const quads = self.blankNodeInfo[id].quads; // 3) For each quad in quads:
const quads = _this.blankNodeInfo.get(id).quads; // 3) For each quad in quads:
self.forEach(quads, (quad, idx, callback) => {
// 3.1) If the quad's subject is a blank node that does not match
// identifier, set hash to the result of the Hash Related Blank Node
// algorithm, passing the blank node identifier for subject as related,
// quad, path identifier issuer as issuer, and p as position.
let position;
let related;
if (quad.subject.termType === 'BlankNode' && quad.subject.value !== id) {
related = quad.subject.value;
position = 'p';
} else if (quad.object.termType === 'BlankNode' && quad.object.value !== id) {
// 3.2) Otherwise, if quad's object is a blank node that does not match
// identifier, to the result of the Hash Related Blank Node algorithm,
// passing the blank node identifier for object as related, quad, path
// identifier issuer as issuer, and r as position.
related = quad.object.value;
position = 'r';
} else {
// 3.3) Otherwise, continue to the next quad.
return callback();
} // 3.4) Add a mapping of hash to the blank node identifier for the
// component that matched (subject or object) to hash to related blank
// nodes map, adding an entry as necessary.
let i = 0;
for (const quad of quads) {
// 3.1) If the quad's subject is a blank node that does not match
// identifier, set hash to the result of the Hash Related Blank Node
// algorithm, passing the blank node identifier for subject as related,
// quad, path identifier issuer as issuer, and p as position.
let position;
let related;
self.hashRelatedBlankNode(related, quad, issuer, position, (err, hash) => {
if (err) {
return callback(err);
}
if (quad.subject.termType === 'BlankNode' && quad.subject.value !== id) {
related = quad.subject.value;
position = 'p';
} else if (quad.object.termType === 'BlankNode' && quad.object.value !== id) {
// 3.2) Otherwise, if quad's object is a blank node that does not match
// identifier, to the result of the Hash Related Blank Node algorithm,
// passing the blank node identifier for object as related, quad, path
// identifier issuer as issuer, and r as position.
related = quad.object.value;
position = 'r';
} else {
// 3.3) Otherwise, continue to the next quad.
continue;
} // Note: batch hashing related blank nodes 100 at a time
if (hash in hashToRelated) {
hashToRelated[hash].push(related);
if (++i % 100 === 0) {
yield _this._yield();
} // 3.4) Add a mapping of hash to the blank node identifier for the
// component that matched (subject or object) to hash to related blank
// nodes map, adding an entry as necessary.
const hash = yield _this.hashRelatedBlankNode(related, quad, issuer, position);
const entries = hashToRelated.get(hash);
if (entries) {
entries.push(related);
} else {
hashToRelated[hash] = [related];
hashToRelated.set(hash, [related]);
}
}
callback();
});
}, err => callback(err, hashToRelated));
return hashToRelated;
})();
}
};
/*
* Copyright (c) 2016 Digital Bazaar, Inc. All rights reserved.
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved.
*/

@@ -8,4 +8,2 @@ 'use strict';

const util = require('./util');
module.exports = class URDNA2012Sync extends URDNA2015Sync {

@@ -24,11 +22,13 @@ constructor() {

component = util.clone(component);
if (key === 'name') {
component.value = '_:g';
} else {
component.value = component.value === id ? '_:a' : '_:z';
if (key === 'graph') {
return {
termType: 'BlankNode',
value: '_:g'
};
}
return component;
return {
termType: 'BlankNode',
value: component.value === id ? '_:a' : '_:z'
};
} // helper for getting a related predicate

@@ -43,11 +43,10 @@

createHashToRelated(id, issuer) {
const self = this; // 1) Create a hash to related blank nodes map for storing hashes that
// 1) Create a hash to related blank nodes map for storing hashes that
// identify related blank nodes.
const hashToRelated = {}; // 2) Get a reference, quads, to the list of quads in the blank node to
const hashToRelated = new Map(); // 2) Get a reference, quads, to the list of quads in the blank node to
// quads map for the key identifier.
const quads = self.blankNodeInfo[id].quads; // 3) For each quad in quads:
const quads = this.blankNodeInfo.get(id).quads; // 3) For each quad in quads:
for (let i = 0; i < quads.length; ++i) {
for (const quad of quads) {
// 3.1) If the quad's subject is a blank node that does not match

@@ -57,3 +56,2 @@ // identifier, set hash to the result of the Hash Related Blank Node

// quad, path identifier issuer as issuer, and p as position.
const quad = quads[i];
let position;

@@ -80,8 +78,9 @@ let related;

const hash = self.hashRelatedBlankNode(related, quad, issuer, position);
const hash = this.hashRelatedBlankNode(related, quad, issuer, position);
const entries = hashToRelated.get(hash);
if (hash in hashToRelated) {
hashToRelated[hash].push(related);
if (entries) {
entries.push(related);
} else {
hashToRelated[hash] = [related];
hashToRelated.set(hash, [related]);
}

@@ -88,0 +87,0 @@ }

/*
* Copyright (c) 2016-2017 Digital Bazaar, Inc. All rights reserved.
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved.
*/
'use strict';
const util = require('./util');
module.exports = class IdentifierIssuer {

@@ -14,7 +12,9 @@ /**

* @param prefix the prefix to use ('<prefix><counter>').
* @param existing an existing Map to use.
* @param counter the counter to use.
*/
constructor(prefix) {
constructor(prefix, existing = new Map(), counter = 0) {
this.prefix = prefix;
this.counter = 0;
this.existing = {};
this._existing = existing;
this.counter = counter;
}

@@ -28,6 +28,4 @@

clone() {
const copy = new IdentifierIssuer(this.prefix);
copy.counter = this.counter;
copy.existing = util.clone(this.existing);
return copy;
const {prefix, _existing, counter} = this;
return new IdentifierIssuer(prefix, new Map(_existing), counter);
}

@@ -45,4 +43,5 @@

// return existing old identifier
if(old && old in this.existing) {
return this.existing[old];
const existing = old && this._existing.get(old);
if(existing) {
return existing;
}

@@ -52,7 +51,7 @@

const identifier = this.prefix + this.counter;
this.counter += 1;
this.counter++;
// save mapping
if(old) {
this.existing[old] = identifier;
this._existing.set(old, identifier);
}

@@ -73,4 +72,14 @@

hasId(old) {
return (old in this.existing);
return this._existing.has(old);
}
/**
* Returns all of the IDs that have been issued new IDs in the order in
* which they were issued new IDs.
*
* @return the list of old IDs that has been issued new IDs in order.
*/
getOldIds() {
return [...this._existing.keys()];
}
};

@@ -6,3 +6,3 @@ /**

* BSD 3-Clause License
* Copyright (c) 2016-2017 Digital Bazaar, Inc.
* Copyright (c) 2016-2020 Digital Bazaar, Inc.
* All rights reserved.

@@ -38,3 +38,2 @@ *

const util = require('./util');
const URDNA2015 = require('./URDNA2015');

@@ -80,23 +79,6 @@ const URGNA2012 = require('./URGNA2012');

* [useNative] use native implementation (default: false).
* @param [callback(err, canonical)] called once the operation completes.
*
* @return a Promise that resolves to the canonicalized RDF Dataset.
*/
api.canonize = util.callbackify(async function(dataset, options) {
let callback;
const promise = new Promise((resolve, reject) => {
callback = (err, canonical) => {
if(err) {
return reject(err);
}
/*if(options.format === 'application/n-quads') {
canonical = canonical.join('');
}
canonical = _parseNQuads(canonical.join(''));*/
resolve(canonical);
};
});
api.canonize = async function(dataset, options) {
// back-compat with legacy dataset

@@ -107,27 +89,29 @@ if(!Array.isArray(dataset)) {

// TODO: convert algorithms to Promise-based async
if(options.useNative) {
if(rdfCanonizeNative) {
rdfCanonizeNative.canonize(dataset, options, callback);
} else {
if(!rdfCanonizeNative) {
throw new Error('rdf-canonize-native not available');
}
} else {
if(options.algorithm === 'URDNA2015') {
new URDNA2015(options).main(dataset, callback);
} else if(options.algorithm === 'URGNA2012') {
new URGNA2012(options).main(dataset, callback);
} else if(!('algorithm' in options)) {
throw new Error('No RDF Dataset Canonicalization algorithm specified.');
} else {
throw new Error(
'Invalid RDF Dataset Canonicalization algorithm: ' + options.algorithm);
}
// TODO: convert native algorithm to Promise-based async
return new Promise((resolve, reject) =>
rdfCanonizeNative.canonize(dataset, options, (err, canonical) =>
err ? reject(err) : resolve(canonical)));
}
return promise;
});
if(options.algorithm === 'URDNA2015') {
return new URDNA2015(options).main(dataset);
}
if(options.algorithm === 'URGNA2012') {
return new URGNA2012(options).main(dataset);
}
if(!('algorithm' in options)) {
throw new Error('No RDF Dataset Canonicalization algorithm specified.');
}
throw new Error(
'Invalid RDF Dataset Canonicalization algorithm: ' + options.algorithm);
};
/**
* Synchronously canonizes an RDF dataset.
* This method is no longer available in the public API, it is for testing
* only. It synchronously canonizes an RDF dataset and does not work in the
* browser.
*

@@ -142,3 +126,3 @@ * @param dataset the dataset to canonize.

*/
api.canonizeSync = function(dataset, options) {
api._canonizeSync = function(dataset, options) {
// back-compat with legacy dataset

@@ -157,3 +141,4 @@ if(!Array.isArray(dataset)) {

return new URDNA2015Sync(options).main(dataset);
} else if(options.algorithm === 'URGNA2012') {
}
if(options.algorithm === 'URGNA2012') {
return new URGNA2012Sync(options).main(dataset);

@@ -160,0 +145,0 @@ }

/*
* Copyright (c) 2016-2017 Digital Bazaar, Inc. All rights reserved.
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved.
*/
'use strict';
const forge = require('node-forge/lib/forge');
require('node-forge/lib/md');
require('node-forge/lib/sha1');
require('node-forge/lib/sha256');
const crypto = self.crypto || self.msCrypto;
// TODO: synchronous version no longer supported in browser
module.exports = class MessageDigest {

@@ -18,12 +17,32 @@ /**

constructor(algorithm) {
this.md = forge.md[algorithm].create();
// check if crypto.subtle is available
// check is here rather than top-level to only fail if class is used
if(!(crypto && crypto.subtle)) {
throw new Error('crypto.subtle not found.');
}
if(algorithm === 'sha256') {
this.algorithm = {name: 'SHA-256'};
} else if(algorithm === 'sha1') {
this.algorithm = {name: 'SHA-1'};
} else {
throw new Error(`Unsupport algorithm "${algorithm}".`);
}
this._content = '';
}
update(msg) {
this.md.update(msg, 'utf8');
this._content += msg;
}
digest() {
return this.md.digest().toHex();
async digest() {
const data = new TextEncoder().encode(this._content);
const buffer = new Uint8Array(
await crypto.subtle.digest(this.algorithm, data));
// return digest in hex
let hex = '';
for(let i = 0; i < buffer.length; ++i) {
hex += buffer[i].toString(16).padStart(2, '0');
}
return hex;
}
};
/*
* Copyright (c) 2016-2017 Digital Bazaar, Inc. All rights reserved.
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved.
*/

@@ -4,0 +4,0 @@ 'use strict';

/*
* Copyright (c) 2016-2017 Digital Bazaar, Inc. All rights reserved.
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved.
*/

@@ -12,2 +12,7 @@ 'use strict';

const TYPE_NAMED_NODE = 'NamedNode';
const TYPE_BLANK_NODE = 'BlankNode';
const TYPE_LITERAL = 'Literal';
const TYPE_DEFAULT_GRAPH = 'DefaultGraph';
// build regexes

@@ -103,25 +108,25 @@ const REGEX = {};

// create RDF quad
const quad = {};
const quad = {subject: null, predicate: null, object: null, graph: null};
// get subject
if(match[1] !== undefined) {
quad.subject = {termType: 'NamedNode', value: match[1]};
quad.subject = {termType: TYPE_NAMED_NODE, value: match[1]};
} else {
quad.subject = {termType: 'BlankNode', value: match[2]};
quad.subject = {termType: TYPE_BLANK_NODE, value: match[2]};
}
// get predicate
quad.predicate = {termType: 'NamedNode', value: match[3]};
quad.predicate = {termType: TYPE_NAMED_NODE, value: match[3]};
// get object
if(match[4] !== undefined) {
quad.object = {termType: 'NamedNode', value: match[4]};
quad.object = {termType: TYPE_NAMED_NODE, value: match[4]};
} else if(match[5] !== undefined) {
quad.object = {termType: 'BlankNode', value: match[5]};
quad.object = {termType: TYPE_BLANK_NODE, value: match[5]};
} else {
quad.object = {
termType: 'Literal',
termType: TYPE_LITERAL,
value: undefined,
datatype: {
termType: 'NamedNode'
termType: TYPE_NAMED_NODE
}

@@ -143,3 +148,3 @@ };

quad.graph = {
termType: 'NamedNode',
termType: TYPE_NAMED_NODE,
value: match[9]

@@ -149,3 +154,3 @@ };

quad.graph = {
termType: 'BlankNode',
termType: TYPE_BLANK_NODE,
value: match[10]

@@ -155,3 +160,3 @@ };

quad.graph = {
termType: 'DefaultGraph',
termType: TYPE_DEFAULT_GRAPH,
value: ''

@@ -217,25 +222,25 @@ };

// subject and predicate can only be NamedNode or BlankNode
[s, p].forEach(term => {
if(term.termType === 'NamedNode') {
nquad += '<' + term.value + '>';
} else {
nquad += term.value;
}
nquad += ' ';
});
// subject can only be NamedNode or BlankNode
if(s.termType === TYPE_NAMED_NODE) {
nquad += `<${s.value}>`;
} else {
nquad += `${s.value}`;
}
// predicate can only be NamedNode
nquad += ` <${p.value}> `;
// object is NamedNode, BlankNode, or Literal
if(o.termType === 'NamedNode') {
nquad += '<' + o.value + '>';
} else if(o.termType === 'BlankNode') {
if(o.termType === TYPE_NAMED_NODE) {
nquad += `<${o.value}>`;
} else if(o.termType === TYPE_BLANK_NODE) {
nquad += o.value;
} else {
nquad += '"' + _escape(o.value) + '"';
nquad += `"${_escape(o.value)}"`;
if(o.datatype.value === RDF_LANGSTRING) {
if(o.language) {
nquad += '@' + o.language;
nquad += `@${o.language}`;
}
} else if(o.datatype.value !== XSD_STRING) {
nquad += '^^<' + o.datatype.value + '>';
nquad += `^^<${o.datatype.value}>`;
}

@@ -246,6 +251,6 @@ }

// does not add to `nquad`)
if(g.termType === 'NamedNode') {
nquad += ' <' + g.value + '>';
} else if(g.termType === 'BlankNode') {
nquad += ' ' + g.value;
if(g.termType === TYPE_NAMED_NODE) {
nquad += ` <${g.value}>`;
} else if(g.termType === TYPE_BLANK_NODE) {
nquad += ` ${g.value}`;
}

@@ -269,5 +274,5 @@

const termTypeMap = {
'blank node': 'BlankNode',
IRI: 'NamedNode',
literal: 'Literal'
'blank node': TYPE_BLANK_NODE,
IRI: TYPE_NAMED_NODE,
literal: TYPE_LITERAL
};

@@ -285,5 +290,5 @@

};
if(newComponent.termType === 'Literal') {
if(newComponent.termType === TYPE_LITERAL) {
newComponent.datatype = {
termType: 'NamedNode'
termType: TYPE_NAMED_NODE
};

@@ -306,3 +311,3 @@ if('datatype' in oldComponent) {

quad.graph = {
termType: 'DefaultGraph',
termType: TYPE_DEFAULT_GRAPH,
value: ''

@@ -312,3 +317,4 @@ };

quad.graph = {
termType: graphName.startsWith('_:') ? 'BlankNode' : 'NamedNode',
termType: graphName.startsWith('_:') ?
TYPE_BLANK_NODE : TYPE_NAMED_NODE,
value: graphName

@@ -334,8 +340,15 @@ };

function _compareTriples(t1, t2) {
for(const k in t1) {
if(t1[k].termType !== t2[k].termType || t1[k].value !== t2[k].value) {
return false;
}
// compare subject and object types first as it is the quickest check
if(!(t1.subject.termType === t2.subject.termType &&
t1.object.termType === t2.object.termType)) {
return false;
}
if(t1.object.termType !== 'Literal') {
// compare values
if(!(t1.subject.value === t2.subject.value &&
t1.predicate.value === t2.predicate.value &&
t1.object.value === t2.object.value)) {
return false;
}
if(t1.object.termType !== TYPE_LITERAL) {
// no `datatype` or `language` to check
return true;

@@ -345,4 +358,4 @@ }

(t1.object.datatype.termType === t2.object.datatype.termType) &&
(t1.object.datatype.value === t2.object.datatype.value) &&
(t1.object.language === t2.object.language)
(t1.object.language === t2.object.language) &&
(t1.object.datatype.value === t2.object.datatype.value)
);

@@ -349,0 +362,0 @@ }

/*
* Copyright (c) 2016-2017 Digital Bazaar, Inc. All rights reserved.
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved.
*/
'use strict';
const AsyncAlgorithm = require('./AsyncAlgorithm');
const IdentifierIssuer = require('./IdentifierIssuer');
const MessageDigest = require('./MessageDigest');
const Permutator = require('./Permutator');
const Permuter = require('./Permuter');
const NQuads = require('./NQuads');
const util = require('./util');
const POSITIONS = {subject: 's', object: 'o', graph: 'g'};
module.exports = class URDNA2015 extends AsyncAlgorithm {
constructor(options) {
options = options || {};
super(options);
module.exports = class URDNA2015 {
constructor() {
this.name = 'URDNA2015';
this.options = Object.assign({}, options);
this.blankNodeInfo = {};
this.hashToBlankNodes = {};
this.blankNodeInfo = new Map();
this.canonicalIssuer = new IdentifierIssuer('_:c14n');
this.hashAlgorithm = 'sha256';
this.quads;
this.quads = null;
}
// 4.4) Normalization Algorithm
main(dataset, callback) {
const self = this;
self.schedule.start = Date.now();
let result;
self.quads = dataset;
async main(dataset) {
this.quads = dataset;
// 1) Create the normalization state.
// 2) For every quad in input dataset:
for(const quad of dataset) {
// 2.1) For each blank node that occurs in the quad, add a reference
// to the quad using the blank node identifier in the blank node to
// quads map, creating a new entry if necessary.
this._addBlankNodeQuadInfo({quad, component: quad.subject});
this._addBlankNodeQuadInfo({quad, component: quad.object});
this._addBlankNodeQuadInfo({quad, component: quad.graph});
}
// Note: Optimize by generating non-normalized blank node map concurrently.
const nonNormalized = {};
// 3) Create a list of non-normalized blank node identifiers
// non-normalized identifiers and populate it using the keys from the
// blank node to quads map.
// Note: We use a map here and it was generated during step 2.
self.waterfall([
callback => {
// 2) For every quad in input dataset:
self.forEach(dataset, (quad, idx, callback) => {
// 2.1) For each blank node that occurs in the quad, add a reference
// to the quad using the blank node identifier in the blank node to
// quads map, creating a new entry if necessary.
self.forEachComponent(quad, component => {
if(component.termType !== 'BlankNode') {
return;
}
const id = component.value;
if(id in self.blankNodeInfo) {
self.blankNodeInfo[id].quads.push(quad);
} else {
nonNormalized[id] = true;
self.blankNodeInfo[id] = {quads: [quad]};
}
});
// 4) `simple` flag is skipped -- loop is optimized away. This optimization
// is permitted because there was a typo in the hash first degree quads
// algorithm in the URDNA2015 spec that was implemented widely making it
// such that it could not be fixed; the result was that the loop only
// needs to be run once and the first degree quad hashes will never change.
// 5.1-5.2 are skipped; first degree quad hashes are generated just once
// for all non-normalized blank nodes.
callback();
}, callback);
},
callback => {
// 3) Create a list of non-normalized blank node identifiers
// non-normalized identifiers and populate it using the keys from the
// blank node to quads map.
// Note: We use a map here and it was generated during step 2.
// 5.3) For each blank node identifier identifier in non-normalized
// identifiers:
const hashToBlankNodes = new Map();
const nonNormalized = [...this.blankNodeInfo.keys()];
let i = 0;
for(const id of nonNormalized) {
// Note: batch hashing first degree quads 100 at a time
if(++i % 100 === 0) {
await this._yield();
}
// steps 5.3.1 and 5.3.2:
await this._hashAndTrackBlankNode({id, hashToBlankNodes});
}
// 4) Initialize simple, a boolean flag, to true.
let simple = true;
// 5.4) For each hash to identifier list mapping in hash to blank
// nodes map, lexicographically-sorted by hash:
const hashes = [...hashToBlankNodes.keys()].sort();
// optimize away second sort, gather non-unique hashes in order as we go
const nonUnique = [];
for(const hash of hashes) {
// 5.4.1) If the length of identifier list is greater than 1,
// continue to the next mapping.
const idList = hashToBlankNodes.get(hash);
if(idList.length > 1) {
nonUnique.push(idList);
continue;
}
// 5) While simple is true, issue canonical identifiers for blank nodes:
self.whilst(() => simple, callback => {
// 5.1) Set simple to false.
simple = false;
// 5.4.2) Use the Issue Identifier algorithm, passing canonical
// issuer and the single blank node identifier in identifier
// list, identifier, to issue a canonical replacement identifier
// for identifier.
const id = idList[0];
this.canonicalIssuer.getId(id);
// 5.2) Clear hash to blank nodes map.
self.hashToBlankNodes = {};
// Note: These steps are skipped, optimized away since the loop
// only needs to be run once.
// 5.4.3) Remove identifier from non-normalized identifiers.
// 5.4.4) Remove hash from the hash to blank nodes map.
// 5.4.5) Set simple to true.
}
self.waterfall([
callback => {
// 5.3) For each blank node identifier identifier in
// non-normalized identifiers:
self.forEach(nonNormalized, (value, id, callback) => {
// 5.3.1) Create a hash, hash, according to the Hash First
// Degree Quads algorithm.
self.hashFirstDegreeQuads(id, (err, hash) => {
if(err) {
return callback(err);
}
// 5.3.2) Add hash and identifier to hash to blank nodes map,
// creating a new entry if necessary.
if(hash in self.hashToBlankNodes) {
self.hashToBlankNodes[hash].push(id);
} else {
self.hashToBlankNodes[hash] = [id];
}
callback();
});
}, callback);
},
callback => {
// 5.4) For each hash to identifier list mapping in hash to blank
// nodes map, lexicographically-sorted by hash:
const hashes = Object.keys(self.hashToBlankNodes).sort();
self.forEach(hashes, (hash, i, callback) => {
// 5.4.1) If the length of identifier list is greater than 1,
// continue to the next mapping.
const idList = self.hashToBlankNodes[hash];
if(idList.length > 1) {
return callback();
}
// 6) For each hash to identifier list mapping in hash to blank nodes map,
// lexicographically-sorted by hash:
// Note: sort optimized away, use `nonUnique`.
for(const idList of nonUnique) {
// 6.1) Create hash path list where each item will be a result of
// running the Hash N-Degree Quads algorithm.
const hashPathList = [];
// 5.4.2) Use the Issue Identifier algorithm, passing canonical
// issuer and the single blank node identifier in identifier
// list, identifier, to issue a canonical replacement identifier
// for identifier.
// TODO: consider changing `getId` to `issue`
const id = idList[0];
self.canonicalIssuer.getId(id);
// 6.2) For each blank node identifier identifier in identifier list:
for(const id of idList) {
// 6.2.1) If a canonical identifier has already been issued for
// identifier, continue to the next identifier.
if(this.canonicalIssuer.hasId(id)) {
continue;
}
// 5.4.3) Remove identifier from non-normalized identifiers.
delete nonNormalized[id];
// 6.2.2) Create temporary issuer, an identifier issuer
// initialized with the prefix _:b.
const issuer = new IdentifierIssuer('_:b');
// 5.4.4) Remove hash from the hash to blank nodes map.
delete self.hashToBlankNodes[hash];
// 6.2.3) Use the Issue Identifier algorithm, passing temporary
// issuer and identifier, to issue a new temporary blank node
// identifier for identifier.
issuer.getId(id);
// 5.4.5) Set simple to true.
simple = true;
callback();
}, callback);
}
], callback);
}, callback);
},
callback => {
// 6) For each hash to identifier list mapping in hash to blank nodes
// map, lexicographically-sorted by hash:
const hashes = Object.keys(self.hashToBlankNodes).sort();
self.forEach(hashes, (hash, idx, callback) => {
// 6.1) Create hash path list where each item will be a result of
// running the Hash N-Degree Quads algorithm.
const hashPathList = [];
// 6.2.4) Run the Hash N-Degree Quads algorithm, passing
// temporary issuer, and append the result to the hash path list.
const result = await this.hashNDegreeQuads(id, issuer);
hashPathList.push(result);
}
// 6.2) For each blank node identifier identifier in identifier list:
const idList = self.hashToBlankNodes[hash];
self.waterfall([
callback => {
self.forEach(idList, (id, idx, callback) => {
// 6.2.1) If a canonical identifier has already been issued for
// identifier, continue to the next identifier.
if(self.canonicalIssuer.hasId(id)) {
return callback();
}
// 6.3) For each result in the hash path list,
// lexicographically-sorted by the hash in result:
hashPathList.sort(_stringHashCompare);
for(const result of hashPathList) {
// 6.3.1) For each blank node identifier, existing identifier,
// that was issued a temporary identifier by identifier issuer
// in result, issue a canonical identifier, in the same order,
// using the Issue Identifier algorithm, passing canonical
// issuer and existing identifier.
const oldIds = result.issuer.getOldIds();
for(const id of oldIds) {
this.canonicalIssuer.getId(id);
}
}
}
// 6.2.2) Create temporary issuer, an identifier issuer
// initialized with the prefix _:b.
const issuer = new IdentifierIssuer('_:b');
/* Note: At this point all blank nodes in the set of RDF quads have been
assigned canonical identifiers, which have been stored in the canonical
issuer. Here each quad is updated by assigning each of its blank nodes
its new identifier. */
// 6.2.3) Use the Issue Identifier algorithm, passing temporary
// issuer and identifier, to issue a new temporary blank node
// identifier for identifier.
issuer.getId(id);
// 7) For each quad, quad, in input dataset:
const normalized = [];
for(const quad of this.quads) {
// 7.1) Create a copy, quad copy, of quad and replace any existing
// blank node identifiers using the canonical identifiers
// previously issued by canonical issuer.
// Note: We optimize with shallow copies here.
const q = {...quad};
q.subject = this._useCanonicalId({component: q.subject});
q.object = this._useCanonicalId({component: q.object});
q.graph = this._useCanonicalId({component: q.graph});
// 7.2) Add quad copy to the normalized dataset.
normalized.push(NQuads.serializeQuad(q));
}
// 6.2.4) Run the Hash N-Degree Quads algorithm, passing
// temporary issuer, and append the result to the hash path
// list.
self.hashNDegreeQuads(id, issuer, (err, result) => {
if(err) {
return callback(err);
}
hashPathList.push(result);
callback();
});
}, callback);
},
callback => {
// 6.3) For each result in the hash path list,
// lexicographically-sorted by the hash in result:
// TODO: use `String.localeCompare`?
hashPathList.sort((a, b) =>
(a.hash < b.hash) ? -1 : ((a.hash > b.hash) ? 1 : 0));
self.forEach(hashPathList, (result, idx, callback) => {
// 6.3.1) For each blank node identifier, existing identifier,
// that was issued a temporary identifier by identifier issuer
// in result, issue a canonical identifier, in the same order,
// using the Issue Identifier algorithm, passing canonical
// issuer and existing identifier.
for(const existing in result.issuer.existing) {
self.canonicalIssuer.getId(existing);
}
callback();
}, callback);
}
], callback);
}, callback);
}, callback => {
/* Note: At this point all blank nodes in the set of RDF quads have been
assigned canonical identifiers, which have been stored in the canonical
issuer. Here each quad is updated by assigning each of its blank nodes
its new identifier. */
// sort normalized output
normalized.sort();
// 7) For each quad, quad, in input dataset:
const normalized = [];
self.waterfall([
callback => {
self.forEach(self.quads, (quad, idx, callback) => {
// 7.1) Create a copy, quad copy, of quad and replace any existing
// blank node identifiers using the canonical identifiers
// previously issued by canonical issuer.
// Note: We optimize away the copy here.
self.forEachComponent(quad, component => {
if(component.termType === 'BlankNode' &&
!component.value.startsWith(self.canonicalIssuer.prefix)) {
component.value = self.canonicalIssuer.getId(component.value);
}
});
// 7.2) Add quad copy to the normalized dataset.
normalized.push(NQuads.serializeQuad(quad));
callback();
}, callback);
},
callback => {
// sort normalized output
normalized.sort();
// 8) Return the normalized dataset.
result = normalized.join('');
return callback();
}
], callback);
}
], err => callback(err, result));
// 8) Return the normalized dataset.
return normalized.join('');
}
// 4.6) Hash First Degree Quads
hashFirstDegreeQuads(id, callback) {
const self = this;
// return cached hash
const info = self.blankNodeInfo[id];
if('hash' in info) {
return callback(null, info.hash);
}
async hashFirstDegreeQuads(id) {
// 1) Initialize nquads to an empty list. It will be used to store quads in

@@ -250,8 +170,9 @@ // N-Quads format.

// 2) Get the list of quads quads associated with the reference blank node
// 2) Get the list of quads `quads` associated with the reference blank node
// identifier in the blank node to quads map.
const info = this.blankNodeInfo.get(id);
const quads = info.quads;
// 3) For each quad quad in quads:
self.forEach(quads, (quad, idx, callback) => {
// 3) For each quad `quad` in `quads`:
for(const quad of quads) {
// 3.1) Serialize the quad in N-Quads format with the following special

@@ -262,34 +183,32 @@ // rule:

// using a special identifier as follows:
const copy = {predicate: quad.predicate};
self.forEachComponent(quad, (component, key) => {
// 3.1.2) If the blank node's existing blank node identifier matches the
// reference blank node identifier then use the blank node identifier
// _:a, otherwise, use the blank node identifier _:z.
copy[key] = self.modifyFirstDegreeComponent(id, component, key);
});
const copy = {
subject: null, predicate: quad.predicate, object: null, graph: null
};
// 3.1.2) If the blank node's existing blank node identifier matches
// the reference blank node identifier then use the blank node
// identifier _:a, otherwise, use the blank node identifier _:z.
copy.subject = this.modifyFirstDegreeComponent(
id, quad.subject, 'subject');
copy.object = this.modifyFirstDegreeComponent(
id, quad.object, 'object');
copy.graph = this.modifyFirstDegreeComponent(
id, quad.graph, 'graph');
nquads.push(NQuads.serializeQuad(copy));
callback();
}, err => {
if(err) {
return callback(err);
}
// 4) Sort nquads in lexicographical order.
nquads.sort();
}
// 5) Return the hash that results from passing the sorted, joined nquads
// through the hash algorithm.
const md = new MessageDigest(self.hashAlgorithm);
for(let i = 0; i < nquads.length; ++i) {
md.update(nquads[i]);
}
// TODO: represent as byte buffer instead to cut memory usage in half
info.hash = md.digest();
callback(null, info.hash);
});
// 4) Sort nquads in lexicographical order.
nquads.sort();
// 5) Return the hash that results from passing the sorted, joined nquads
// through the hash algorithm.
const md = new MessageDigest(this.hashAlgorithm);
for(const nquad of nquads) {
md.update(nquad);
}
info.hash = await md.digest();
return info.hash;
}
// 4.7) Hash Related Blank Node
hashRelatedBlankNode(related, quad, issuer, position, callback) {
const self = this;
async hashRelatedBlankNode(related, quad, issuer, position) {
// 1) Set the identifier to use for related, preferring first the canonical

@@ -300,194 +219,161 @@ // identifier for related if issued, second the identifier issued by issuer

let id;
self.waterfall([
callback => {
if(self.canonicalIssuer.hasId(related)) {
id = self.canonicalIssuer.getId(related);
return callback();
}
if(issuer.hasId(related)) {
id = issuer.getId(related);
return callback();
}
self.hashFirstDegreeQuads(related, (err, hash) => {
if(err) {
return callback(err);
}
id = hash;
callback();
});
}
], err => {
if(err) {
return callback(err);
}
if(this.canonicalIssuer.hasId(related)) {
id = this.canonicalIssuer.getId(related);
} else if(issuer.hasId(related)) {
id = issuer.getId(related);
} else {
id = this.blankNodeInfo.get(related).hash;
}
// 2) Initialize a string input to the value of position.
// Note: We use a hash object instead.
const md = new MessageDigest(self.hashAlgorithm);
md.update(position);
// 2) Initialize a string input to the value of position.
// Note: We use a hash object instead.
const md = new MessageDigest(this.hashAlgorithm);
md.update(position);
// 3) If position is not g, append <, the value of the predicate in quad,
// and > to input.
if(position !== 'g') {
md.update(self.getRelatedPredicate(quad));
}
// 3) If position is not g, append <, the value of the predicate in quad,
// and > to input.
if(position !== 'g') {
md.update(this.getRelatedPredicate(quad));
}
// 4) Append identifier to input.
md.update(id);
// 4) Append identifier to input.
md.update(id);
// 5) Return the hash that results from passing input through the hash
// algorithm.
// TODO: represent as byte buffer instead to cut memory usage in half
return callback(null, md.digest());
});
// 5) Return the hash that results from passing input through the hash
// algorithm.
return md.digest();
}
// 4.8) Hash N-Degree Quads
hashNDegreeQuads(id, issuer, callback) {
const self = this;
async hashNDegreeQuads(id, issuer) {
// 1) Create a hash to related blank nodes map for storing hashes that
// identify related blank nodes.
// Note: 2) and 3) handled within `createHashToRelated`
let hashToRelated;
const md = new MessageDigest(self.hashAlgorithm);
self.waterfall([
callback => self.createHashToRelated(id, issuer, (err, result) => {
if(err) {
return callback(err);
const md = new MessageDigest(this.hashAlgorithm);
const hashToRelated = await this.createHashToRelated(id, issuer);
// 4) Create an empty string, data to hash.
// Note: We created a hash object `md` above instead.
// 5) For each related hash to blank node list mapping in hash to related
// blank nodes map, sorted lexicographically by related hash:
const hashes = [...hashToRelated.keys()].sort();
for(const hash of hashes) {
// 5.1) Append the related hash to the data to hash.
md.update(hash);
// 5.2) Create a string chosen path.
let chosenPath = '';
// 5.3) Create an unset chosen issuer variable.
let chosenIssuer;
// 5.4) For each permutation of blank node list:
const permuter = new Permuter(hashToRelated.get(hash));
let i = 0;
while(permuter.hasNext()) {
const permutation = permuter.next();
// Note: batch permutations 3 at a time
if(++i % 3 === 0) {
await this._yield();
}
hashToRelated = result;
callback();
}),
callback => {
// 4) Create an empty string, data to hash.
// Note: We created a hash object `md` above instead.
// 5) For each related hash to blank node list mapping in hash to
// related blank nodes map, sorted lexicographically by related hash:
const hashes = Object.keys(hashToRelated).sort();
self.forEach(hashes, (hash, idx, callback) => {
// 5.1) Append the related hash to the data to hash.
md.update(hash);
// 5.4.1) Create a copy of issuer, issuer copy.
let issuerCopy = issuer.clone();
// 5.2) Create a string chosen path.
let chosenPath = '';
// 5.4.2) Create a string path.
let path = '';
// 5.3) Create an unset chosen issuer variable.
let chosenIssuer;
// 5.4.3) Create a recursion list, to store blank node identifiers
// that must be recursively processed by this algorithm.
const recursionList = [];
// 5.4) For each permutation of blank node list:
const permutator = new Permutator(hashToRelated[hash]);
self.whilst(() => permutator.hasNext(), nextPermutation => {
const permutation = permutator.next();
// 5.4.4) For each related in permutation:
let nextPermutation = false;
for(const related of permutation) {
// 5.4.4.1) If a canonical identifier has been issued for
// related, append it to path.
if(this.canonicalIssuer.hasId(related)) {
path += this.canonicalIssuer.getId(related);
} else {
// 5.4.4.2) Otherwise:
// 5.4.4.2.1) If issuer copy has not issued an identifier for
// related, append related to recursion list.
if(!issuerCopy.hasId(related)) {
recursionList.push(related);
}
// 5.4.4.2.2) Use the Issue Identifier algorithm, passing
// issuer copy and related and append the result to path.
path += issuerCopy.getId(related);
}
// 5.4.1) Create a copy of issuer, issuer copy.
let issuerCopy = issuer.clone();
// 5.4.4.3) If chosen path is not empty and the length of path
// is greater than or equal to the length of chosen path and
// path is lexicographically greater than chosen path, then
// skip to the next permutation.
// Note: Comparing path length to chosen path length can be optimized
// away; only compare lexicographically.
if(chosenPath.length !== 0 && path > chosenPath) {
nextPermutation = true;
break;
}
}
// 5.4.2) Create a string path.
let path = '';
if(nextPermutation) {
continue;
}
// 5.4.3) Create a recursion list, to store blank node identifiers
// that must be recursively processed by this algorithm.
const recursionList = [];
// 5.4.5) For each related in recursion list:
for(const related of recursionList) {
// 5.4.5.1) Set result to the result of recursively executing
// the Hash N-Degree Quads algorithm, passing related for
// identifier and issuer copy for path identifier issuer.
const result = await this.hashNDegreeQuads(related, issuerCopy);
self.waterfall([
callback => {
// 5.4.4) For each related in permutation:
self.forEach(permutation, (related, idx, callback) => {
// 5.4.4.1) If a canonical identifier has been issued for
// related, append it to path.
if(self.canonicalIssuer.hasId(related)) {
path += self.canonicalIssuer.getId(related);
} else {
// 5.4.4.2) Otherwise:
// 5.4.4.2.1) If issuer copy has not issued an identifier
// for related, append related to recursion list.
if(!issuerCopy.hasId(related)) {
recursionList.push(related);
}
// 5.4.4.2.2) Use the Issue Identifier algorithm, passing
// issuer copy and related and append the result to path.
path += issuerCopy.getId(related);
}
// 5.4.5.2) Use the Issue Identifier algorithm, passing issuer
// copy and related and append the result to path.
path += issuerCopy.getId(related);
// 5.4.4.3) If chosen path is not empty and the length of path
// is greater than or equal to the length of chosen path and
// path is lexicographically greater than chosen path, then
// skip to the next permutation.
// Note: Comparing path length to chosen path length can be
// optimized away; only compare lexicographically.
if(chosenPath.length !== 0 && path > chosenPath) {
// FIXME: may cause inaccurate total depth calculation
return nextPermutation();
}
callback();
}, callback);
},
callback => {
// 5.4.5) For each related in recursion list:
self.forEach(recursionList, (related, idx, callback) => {
// 5.4.5.1) Set result to the result of recursively executing
// the Hash N-Degree Quads algorithm, passing related for
// identifier and issuer copy for path identifier issuer.
self.hashNDegreeQuads(related, issuerCopy, (err, result) => {
if(err) {
return callback(err);
}
// 5.4.5.3) Append <, the hash in result, and > to path.
path += `<${result.hash}>`;
// 5.4.5.2) Use the Issue Identifier algorithm, passing
// issuer copy and related and append the result to path.
path += issuerCopy.getId(related);
// 5.4.5.4) Set issuer copy to the identifier issuer in
// result.
issuerCopy = result.issuer;
// 5.4.5.3) Append <, the hash in result, and > to path.
path += '<' + result.hash + '>';
// 5.4.5.5) If chosen path is not empty and the length of path
// is greater than or equal to the length of chosen path and
// path is lexicographically greater than chosen path, then
// skip to the next permutation.
// Note: Comparing path length to chosen path length can be optimized
// away; only compare lexicographically.
if(chosenPath.length !== 0 && path > chosenPath) {
nextPermutation = true;
break;
}
}
// 5.4.5.4) Set issuer copy to the identifier issuer in
// result.
issuerCopy = result.issuer;
if(nextPermutation) {
continue;
}
// 5.4.5.5) If chosen path is not empty and the length of
// path is greater than or equal to the length of chosen
// path and path is lexicographically greater than chosen
// path, then skip to the next permutation.
// Note: Comparing path length to chosen path length can be
// optimized away; only compare lexicographically.
if(chosenPath.length !== 0 && path > chosenPath) {
// FIXME: may cause inaccurate total depth calculation
return nextPermutation();
}
callback();
});
}, callback);
},
callback => {
// 5.4.6) If chosen path is empty or path is lexicographically
// less than chosen path, set chosen path to path and chosen
// issuer to issuer copy.
if(chosenPath.length === 0 || path < chosenPath) {
chosenPath = path;
chosenIssuer = issuerCopy;
}
callback();
}
], nextPermutation);
}, err => {
if(err) {
return callback(err);
}
// 5.4.6) If chosen path is empty or path is lexicographically
// less than chosen path, set chosen path to path and chosen
// issuer to issuer copy.
if(chosenPath.length === 0 || path < chosenPath) {
chosenPath = path;
chosenIssuer = issuerCopy;
}
}
// 5.5) Append chosen path to data to hash.
md.update(chosenPath);
// 5.5) Append chosen path to data to hash.
md.update(chosenPath);
// 5.6) Replace issuer, by reference, with chosen issuer.
issuer = chosenIssuer;
callback();
});
}, callback);
}
], err => {
// 6) Return issuer and the hash that results from passing data to hash
// through the hash algorithm.
callback(err, {hash: md.digest(), issuer});
});
// 5.6) Replace issuer, by reference, with chosen issuer.
issuer = chosenIssuer;
}
// 6) Return issuer and the hash that results from passing data to hash
// through the hash algorithm.
return {hash: await md.digest(), issuer};
}

@@ -500,5 +386,11 @@

}
component = util.clone(component);
component.value = (component.value === id ? '_:a' : '_:z');
return component;
/* Note: A mistake in the URDNA2015 spec that made its way into
implementations (and therefore must stay to avoid interop breakage)
resulted in an assigned canonical ID, if available for
`component.value`, not being used in place of `_:a`/`_:z`, so
we don't use it here. */
return {
termType: 'BlankNode',
value: component.value === id ? '_:a' : '_:z'
};
}

@@ -508,63 +400,116 @@

getRelatedPredicate(quad) {
return '<' + quad.predicate.value + '>';
return `<${quad.predicate.value}>`;
}
// helper for creating hash to related blank nodes map
createHashToRelated(id, issuer, callback) {
const self = this;
async createHashToRelated(id, issuer) {
// 1) Create a hash to related blank nodes map for storing hashes that
// identify related blank nodes.
const hashToRelated = {};
const hashToRelated = new Map();
// 2) Get a reference, quads, to the list of quads in the blank node to
// quads map for the key identifier.
const quads = self.blankNodeInfo[id].quads;
const quads = this.blankNodeInfo.get(id).quads;
// 3) For each quad in quads:
self.forEach(quads, (quad, idx, callback) => {
let i = 0;
for(const quad of quads) {
// Note: batch hashing related blank node quads 100 at a time
if(++i % 100 === 0) {
await this._yield();
}
// 3.1) For each component in quad, if component is the subject, object,
// and graph name and it is a blank node that is not identified by
// identifier:
self.forEach(quad, (component, key, callback) => {
if(key === 'predicate' ||
!(component.termType === 'BlankNode' && component.value !== id)) {
return callback();
}
// 3.1.1) Set hash to the result of the Hash Related Blank Node
// algorithm, passing the blank node identifier for component as
// related, quad, path identifier issuer as issuer, and position as
// either s, o, or g based on whether component is a subject, object,
// graph name, respectively.
const related = component.value;
const position = POSITIONS[key];
self.hashRelatedBlankNode(
related, quad, issuer, position, (err, hash) => {
if(err) {
return callback(err);
}
// 3.1.2) Add a mapping of hash to the blank node identifier for
// component to hash to related blank nodes map, adding an entry as
// necessary.
if(hash in hashToRelated) {
hashToRelated[hash].push(related);
} else {
hashToRelated[hash] = [related];
}
callback();
});
}, callback);
}, err => callback(err, hashToRelated));
// steps 3.1.1 and 3.1.2 occur in helpers:
await Promise.all([
this._addRelatedBlankNodeHash({
quad, component: quad.subject, position: 's',
id, issuer, hashToRelated
}),
this._addRelatedBlankNodeHash({
quad, component: quad.object, position: 'o',
id, issuer, hashToRelated
}),
this._addRelatedBlankNodeHash({
quad, component: quad.graph, position: 'g',
id, issuer, hashToRelated
})
]);
}
return hashToRelated;
}
// helper that iterates over quad components (skips predicate)
forEachComponent(quad, op) {
for(const key in quad) {
// skip `predicate`
if(key === 'predicate') {
continue;
}
op(quad[key], key, quad);
async _hashAndTrackBlankNode({id, hashToBlankNodes}) {
// 5.3.1) Create a hash, hash, according to the Hash First Degree
// Quads algorithm.
const hash = await this.hashFirstDegreeQuads(id);
// 5.3.2) Add hash and identifier to hash to blank nodes map,
// creating a new entry if necessary.
const idList = hashToBlankNodes.get(hash);
if(!idList) {
hashToBlankNodes.set(hash, [id]);
} else {
idList.push(id);
}
}
_addBlankNodeQuadInfo({quad, component}) {
if(component.termType !== 'BlankNode') {
return;
}
const id = component.value;
const info = this.blankNodeInfo.get(id);
if(info) {
info.quads.add(quad);
} else {
this.blankNodeInfo.set(id, {quads: new Set([quad]), hash: null});
}
}
async _addRelatedBlankNodeHash(
{quad, component, position, id, issuer, hashToRelated}) {
if(!(component.termType === 'BlankNode' && component.value !== id)) {
return;
}
// 3.1.1) Set hash to the result of the Hash Related Blank Node
// algorithm, passing the blank node identifier for component as
// related, quad, path identifier issuer as issuer, and position as
// either s, o, or g based on whether component is a subject, object,
// graph name, respectively.
const related = component.value;
const hash = await this.hashRelatedBlankNode(
related, quad, issuer, position);
// 3.1.2) Add a mapping of hash to the blank node identifier for
// component to hash to related blank nodes map, adding an entry as
// necessary.
const entries = hashToRelated.get(hash);
if(entries) {
entries.push(related);
} else {
hashToRelated.set(hash, [related]);
}
}
_useCanonicalId({component}) {
if(component.termType === 'BlankNode' &&
!component.value.startsWith(this.canonicalIssuer.prefix)) {
return {
termType: 'BlankNode',
value: this.canonicalIssuer.getId(component.value)
};
}
return component;
}
async _yield() {
return new Promise(resolve => setImmediate(resolve));
}
};
function _stringHashCompare(a, b) {
return a.hash < b.hash ? -1 : a.hash > b.hash ? 1 : 0;
}
/*
* Copyright (c) 2016 Digital Bazaar, Inc. All rights reserved.
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved.
*/

@@ -8,16 +8,12 @@ 'use strict';

const MessageDigest = require('./MessageDigest');
const Permutator = require('./Permutator');
const Permuter = require('./Permuter');
const NQuads = require('./NQuads');
const util = require('./util');
const POSITIONS = {subject: 's', object: 'o', graph: 'g'};
module.exports = class URDNA2015Sync {
constructor() {
this.name = 'URDNA2015';
this.blankNodeInfo = {};
this.hashToBlankNodes = {};
this.blankNodeInfo = new Map();
this.canonicalIssuer = new IdentifierIssuer('_:c14n');
this.hashAlgorithm = 'sha256';
this.quads;
this.quads = null;
}

@@ -27,10 +23,5 @@

main(dataset) {
const self = this;
self.quads = dataset;
this.quads = dataset;
// 1) Create the normalization state.
// Note: Optimize by generating non-normalized blank node map concurrently.
const nonNormalized = {};
// 2) For every quad in input dataset:

@@ -41,14 +32,5 @@ for(const quad of dataset) {

// quads map, creating a new entry if necessary.
self.forEachComponent(quad, component => {
if(component.termType !== 'BlankNode') {
return;
}
const id = component.value;
if(id in self.blankNodeInfo) {
self.blankNodeInfo[id].quads.push(quad);
} else {
nonNormalized[id] = true;
self.blankNodeInfo[id] = {quads: [quad]};
}
});
this._addBlankNodeQuadInfo({quad, component: quad.subject});
this._addBlankNodeQuadInfo({quad, component: quad.object});
this._addBlankNodeQuadInfo({quad, component: quad.graph});
}

@@ -61,58 +43,45 @@

// 4) Initialize simple, a boolean flag, to true.
let simple = true;
// 4) `simple` flag is skipped -- loop is optimized away. This optimization
// is permitted because there was a typo in the hash first degree quads
// algorithm in the URDNA2015 spec that was implemented widely making it
// such that it could not be fixed; the result was that the loop only
// needs to be run once and the first degree quad hashes will never change.
// 5.1-5.2 are skipped; first degree quad hashes are generated just once
// for all non-normalized blank nodes.
// 5) While simple is true, issue canonical identifiers for blank nodes:
while(simple) {
// 5.1) Set simple to false.
simple = false;
// 5.3) For each blank node identifier identifier in non-normalized
// identifiers:
const hashToBlankNodes = new Map();
const nonNormalized = [...this.blankNodeInfo.keys()];
for(const id of nonNormalized) {
// steps 5.3.1 and 5.3.2:
this._hashAndTrackBlankNode({id, hashToBlankNodes});
}
// 5.2) Clear hash to blank nodes map.
self.hashToBlankNodes = {};
// 5.3) For each blank node identifier identifier in non-normalized
// identifiers:
for(const id in nonNormalized) {
// 5.3.1) Create a hash, hash, according to the Hash First Degree
// Quads algorithm.
const hash = self.hashFirstDegreeQuads(id);
// 5.3.2) Add hash and identifier to hash to blank nodes map,
// creating a new entry if necessary.
if(hash in self.hashToBlankNodes) {
self.hashToBlankNodes[hash].push(id);
} else {
self.hashToBlankNodes[hash] = [id];
}
// 5.4) For each hash to identifier list mapping in hash to blank
// nodes map, lexicographically-sorted by hash:
const hashes = [...hashToBlankNodes.keys()].sort();
// optimize away second sort, gather non-unique hashes in order as we go
const nonUnique = [];
for(const hash of hashes) {
// 5.4.1) If the length of identifier list is greater than 1,
// continue to the next mapping.
const idList = hashToBlankNodes.get(hash);
if(idList.length > 1) {
nonUnique.push(idList);
continue;
}
// 5.4) For each hash to identifier list mapping in hash to blank
// nodes map, lexicographically-sorted by hash:
const hashes = Object.keys(self.hashToBlankNodes).sort();
for(let i = 0; i < hashes.length; ++i) {
// 5.4.1) If the length of identifier list is greater than 1,
// continue to the next mapping.
const hash = hashes[i];
const idList = self.hashToBlankNodes[hash];
if(idList.length > 1) {
continue;
}
// 5.4.2) Use the Issue Identifier algorithm, passing canonical
// issuer and the single blank node identifier in identifier
// list, identifier, to issue a canonical replacement identifier
// for identifier.
const id = idList[0];
this.canonicalIssuer.getId(id);
// 5.4.2) Use the Issue Identifier algorithm, passing canonical
// issuer and the single blank node identifier in identifier
// list, identifier, to issue a canonical replacement identifier
// for identifier.
// TODO: consider changing `getId` to `issue`
const id = idList[0];
self.canonicalIssuer.getId(id);
// 5.4.3) Remove identifier from non-normalized identifiers.
delete nonNormalized[id];
// 5.4.4) Remove hash from the hash to blank nodes map.
delete self.hashToBlankNodes[hash];
// 5.4.5) Set simple to true.
simple = true;
}
// Note: These steps are skipped, optimized away since the loop
// only needs to be run once.
// 5.4.3) Remove identifier from non-normalized identifiers.
// 5.4.4) Remove hash from the hash to blank nodes map.
// 5.4.5) Set simple to true.
}

@@ -122,4 +91,4 @@

// lexicographically-sorted by hash:
const hashes = Object.keys(self.hashToBlankNodes).sort();
for(let i = 0; i < hashes.length; ++i) {
// Note: sort optimized away, use `nonUnique`.
for(const idList of nonUnique) {
// 6.1) Create hash path list where each item will be a result of

@@ -130,9 +99,6 @@ // running the Hash N-Degree Quads algorithm.

// 6.2) For each blank node identifier identifier in identifier list:
const hash = hashes[i];
const idList = self.hashToBlankNodes[hash];
for(let j = 0; j < idList.length; ++j) {
for(const id of idList) {
// 6.2.1) If a canonical identifier has already been issued for
// identifier, continue to the next identifier.
const id = idList[j];
if(self.canonicalIssuer.hasId(id)) {
if(this.canonicalIssuer.hasId(id)) {
continue;

@@ -152,3 +118,3 @@ }

// temporary issuer, and append the result to the hash path list.
const result = self.hashNDegreeQuads(id, issuer);
const result = this.hashNDegreeQuads(id, issuer);
hashPathList.push(result);

@@ -159,6 +125,4 @@ }

// lexicographically-sorted by the hash in result:
// TODO: use `String.localeCompare`?
hashPathList.sort((a, b) =>
(a.hash < b.hash) ? -1 : ((a.hash > b.hash) ? 1 : 0));
for(let j = 0; j < hashPathList.length; ++j) {
hashPathList.sort(_stringHashCompare);
for(const result of hashPathList) {
// 6.3.1) For each blank node identifier, existing identifier,

@@ -169,5 +133,5 @@ // that was issued a temporary identifier by identifier issuer

// issuer and existing identifier.
const result = hashPathList[j];
for(const existing in result.issuer.existing) {
self.canonicalIssuer.getId(existing);
const oldIds = result.issuer.getOldIds();
for(const id of oldIds) {
this.canonicalIssuer.getId(id);
}

@@ -184,16 +148,13 @@ }

const normalized = [];
for(let i = 0; i < self.quads.length; ++i) {
for(const quad of this.quads) {
// 7.1) Create a copy, quad copy, of quad and replace any existing
// blank node identifiers using the canonical identifiers
// previously issued by canonical issuer.
// Note: We optimize away the copy here.
const quad = self.quads[i];
self.forEachComponent(quad, component => {
if(component.termType === 'BlankNode' &&
!component.value.startsWith(self.canonicalIssuer.prefix)) {
component.value = self.canonicalIssuer.getId(component.value);
}
});
// Note: We optimize with shallow copies here.
const q = {...quad};
q.subject = this._useCanonicalId({component: q.subject});
q.object = this._useCanonicalId({component: q.object});
q.graph = this._useCanonicalId({component: q.graph});
// 7.2) Add quad copy to the normalized dataset.
normalized.push(NQuads.serializeQuad(quad));
normalized.push(NQuads.serializeQuad(q));
}

@@ -210,10 +171,2 @@

hashFirstDegreeQuads(id) {
const self = this;
// return cached hash
const info = self.blankNodeInfo[id];
if('hash' in info) {
return info.hash;
}
// 1) Initialize nquads to an empty list. It will be used to store quads in

@@ -225,8 +178,7 @@ // N-Quads format.

// identifier in the blank node to quads map.
const info = this.blankNodeInfo.get(id);
const quads = info.quads;
// 3) For each quad `quad` in `quads`:
for(let i = 0; i < quads.length; ++i) {
const quad = quads[i];
for(const quad of quads) {
// 3.1) Serialize the quad in N-Quads format with the following special

@@ -237,9 +189,14 @@ // rule:

// using a special identifier as follows:
const copy = {predicate: quad.predicate};
self.forEachComponent(quad, (component, key) => {
// 3.1.2) If the blank node's existing blank node identifier matches
// the reference blank node identifier then use the blank node
// identifier _:a, otherwise, use the blank node identifier _:z.
copy[key] = self.modifyFirstDegreeComponent(id, component, key);
});
const copy = {
subject: null, predicate: quad.predicate, object: null, graph: null
};
// 3.1.2) If the blank node's existing blank node identifier matches
// the reference blank node identifier then use the blank node
// identifier _:a, otherwise, use the blank node identifier _:z.
copy.subject = this.modifyFirstDegreeComponent(
id, quad.subject, 'subject');
copy.object = this.modifyFirstDegreeComponent(
id, quad.object, 'object');
copy.graph = this.modifyFirstDegreeComponent(
id, quad.graph, 'graph');
nquads.push(NQuads.serializeQuad(copy));

@@ -253,7 +210,6 @@ }

// through the hash algorithm.
const md = new MessageDigest(self.hashAlgorithm);
for(let i = 0; i < nquads.length; ++i) {
md.update(nquads[i]);
const md = new MessageDigest(this.hashAlgorithm);
for(const nquad of nquads) {
md.update(nquad);
}
// TODO: represent as byte buffer instead to cut memory usage in half
info.hash = md.digest();

@@ -265,4 +221,2 @@ return info.hash;

hashRelatedBlankNode(related, quad, issuer, position) {
const self = this;
// 1) Set the identifier to use for related, preferring first the canonical

@@ -273,8 +227,8 @@ // identifier for related if issued, second the identifier issued by issuer

let id;
if(self.canonicalIssuer.hasId(related)) {
id = self.canonicalIssuer.getId(related);
if(this.canonicalIssuer.hasId(related)) {
id = this.canonicalIssuer.getId(related);
} else if(issuer.hasId(related)) {
id = issuer.getId(related);
} else {
id = self.hashFirstDegreeQuads(related);
id = this.blankNodeInfo.get(related).hash;
}

@@ -284,3 +238,3 @@

// Note: We use a hash object instead.
const md = new MessageDigest(self.hashAlgorithm);
const md = new MessageDigest(this.hashAlgorithm);
md.update(position);

@@ -291,3 +245,3 @@

if(position !== 'g') {
md.update(self.getRelatedPredicate(quad));
md.update(this.getRelatedPredicate(quad));
}

@@ -300,3 +254,2 @@

// algorithm.
// TODO: represent as byte buffer instead to cut memory usage in half
return md.digest();

@@ -307,9 +260,7 @@ }

hashNDegreeQuads(id, issuer) {
const self = this;
// 1) Create a hash to related blank nodes map for storing hashes that
// identify related blank nodes.
// Note: 2) and 3) handled within `createHashToRelated`
const md = new MessageDigest(self.hashAlgorithm);
const hashToRelated = self.createHashToRelated(id, issuer);
const md = new MessageDigest(this.hashAlgorithm);
const hashToRelated = this.createHashToRelated(id, issuer);

@@ -321,6 +272,5 @@ // 4) Create an empty string, data to hash.

// blank nodes map, sorted lexicographically by related hash:
const hashes = Object.keys(hashToRelated).sort();
for(let i = 0; i < hashes.length; ++i) {
const hashes = [...hashToRelated.keys()].sort();
for(const hash of hashes) {
// 5.1) Append the related hash to the data to hash.
const hash = hashes[i];
md.update(hash);

@@ -335,5 +285,5 @@

// 5.4) For each permutation of blank node list:
const permutator = new Permutator(hashToRelated[hash]);
while(permutator.hasNext()) {
const permutation = permutator.next();
const permuter = new Permuter(hashToRelated.get(hash));
while(permuter.hasNext()) {
const permutation = permuter.next();

@@ -352,8 +302,7 @@ // 5.4.1) Create a copy of issuer, issuer copy.

let nextPermutation = false;
for(let j = 0; j < permutation.length; ++j) {
for(const related of permutation) {
// 5.4.4.1) If a canonical identifier has been issued for
// related, append it to path.
const related = permutation[j];
if(self.canonicalIssuer.hasId(related)) {
path += self.canonicalIssuer.getId(related);
if(this.canonicalIssuer.hasId(related)) {
path += this.canonicalIssuer.getId(related);
} else {

@@ -388,8 +337,7 @@ // 5.4.4.2) Otherwise:

// 5.4.5) For each related in recursion list:
for(let j = 0; j < recursionList.length; ++j) {
for(const related of recursionList) {
// 5.4.5.1) Set result to the result of recursively executing
// the Hash N-Degree Quads algorithm, passing related for
// identifier and issuer copy for path identifier issuer.
const related = recursionList[j];
const result = self.hashNDegreeQuads(related, issuerCopy);
const result = this.hashNDegreeQuads(related, issuerCopy);

@@ -401,3 +349,3 @@ // 5.4.5.2) Use the Issue Identifier algorithm, passing issuer

// 5.4.5.3) Append <, the hash in result, and > to path.
path += '<' + result.hash + '>';
path += `<${result.hash}>`;

@@ -450,5 +398,11 @@ // 5.4.5.4) Set issuer copy to the identifier issuer in

}
component = util.clone(component);
component.value = (component.value === id ? '_:a' : '_:z');
return component;
/* Note: A mistake in the URDNA2015 spec that made its way into
implementations (and therefore must stay to avoid interop breakage)
resulted in an assigned canonical ID, if available for
`component.value`, not being used in place of `_:a`/`_:z`, so
we don't use it here. */
return {
termType: 'BlankNode',
value: component.value === id ? '_:a' : '_:z'
};
}

@@ -458,3 +412,3 @@

getRelatedPredicate(quad) {
return '<' + quad.predicate.value + '>';
return `<${quad.predicate.value}>`;
}

@@ -464,42 +418,28 @@

createHashToRelated(id, issuer) {
const self = this;
// 1) Create a hash to related blank nodes map for storing hashes that
// identify related blank nodes.
const hashToRelated = {};
const hashToRelated = new Map();
// 2) Get a reference, quads, to the list of quads in the blank node to
// quads map for the key identifier.
const quads = self.blankNodeInfo[id].quads;
const quads = this.blankNodeInfo.get(id).quads;
// 3) For each quad in quads:
for(let i = 0; i < quads.length; ++i) {
for(const quad of quads) {
// 3.1) For each component in quad, if component is the subject, object,
// and graph name and it is a blank node that is not identified by
// or graph name and it is a blank node that is not identified by
// identifier:
const quad = quads[i];
for(const key in quad) {
const component = quad[key];
if(key === 'predicate' ||
!(component.termType === 'BlankNode' && component.value !== id)) {
continue;
}
// 3.1.1) Set hash to the result of the Hash Related Blank Node
// algorithm, passing the blank node identifier for component as
// related, quad, path identifier issuer as issuer, and position as
// either s, o, or g based on whether component is a subject, object,
// graph name, respectively.
const related = component.value;
const position = POSITIONS[key];
const hash = self.hashRelatedBlankNode(related, quad, issuer, position);
// 3.1.2) Add a mapping of hash to the blank node identifier for
// component to hash to related blank nodes map, adding an entry as
// necessary.
if(hash in hashToRelated) {
hashToRelated[hash].push(related);
} else {
hashToRelated[hash] = [related];
}
}
// steps 3.1.1 and 3.1.2 occur in helpers:
this._addRelatedBlankNodeHash({
quad, component: quad.subject, position: 's',
id, issuer, hashToRelated
});
this._addRelatedBlankNodeHash({
quad, component: quad.object, position: 'o',
id, issuer, hashToRelated
});
this._addRelatedBlankNodeHash({
quad, component: quad.graph, position: 'g',
id, issuer, hashToRelated
});
}

@@ -510,12 +450,68 @@

// helper that iterates over quad components (skips predicate)
forEachComponent(quad, op) {
for(const key in quad) {
// skip `predicate`
if(key === 'predicate') {
continue;
}
op(quad[key], key, quad);
_hashAndTrackBlankNode({id, hashToBlankNodes}) {
// 5.3.1) Create a hash, hash, according to the Hash First Degree
// Quads algorithm.
const hash = this.hashFirstDegreeQuads(id);
// 5.3.2) Add hash and identifier to hash to blank nodes map,
// creating a new entry if necessary.
const idList = hashToBlankNodes.get(hash);
if(!idList) {
hashToBlankNodes.set(hash, [id]);
} else {
idList.push(id);
}
}
_addBlankNodeQuadInfo({quad, component}) {
if(component.termType !== 'BlankNode') {
return;
}
const id = component.value;
const info = this.blankNodeInfo.get(id);
if(info) {
info.quads.add(quad);
} else {
this.blankNodeInfo.set(id, {quads: new Set([quad]), hash: null});
}
}
_addRelatedBlankNodeHash(
{quad, component, position, id, issuer, hashToRelated}) {
if(!(component.termType === 'BlankNode' && component.value !== id)) {
return;
}
// 3.1.1) Set hash to the result of the Hash Related Blank Node
// algorithm, passing the blank node identifier for component as
// related, quad, path identifier issuer as issuer, and position as
// either s, o, or g based on whether component is a subject, object,
// graph name, respectively.
const related = component.value;
const hash = this.hashRelatedBlankNode(related, quad, issuer, position);
// 3.1.2) Add a mapping of hash to the blank node identifier for
// component to hash to related blank nodes map, adding an entry as
// necessary.
const entries = hashToRelated.get(hash);
if(entries) {
entries.push(related);
} else {
hashToRelated.set(hash, [related]);
}
}
_useCanonicalId({component}) {
if(component.termType === 'BlankNode' &&
!component.value.startsWith(this.canonicalIssuer.prefix)) {
return {
termType: 'BlankNode',
value: this.canonicalIssuer.getId(component.value)
};
}
return component;
}
};
function _stringHashCompare(a, b) {
return a.hash < b.hash ? -1 : a.hash > b.hash ? 1 : 0;
}
/*
* Copyright (c) 2016-2017 Digital Bazaar, Inc. All rights reserved.
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved.
*/

@@ -7,7 +7,6 @@ 'use strict';

const URDNA2015 = require('./URDNA2015');
const util = require('./util');
module.exports = class URDNA2012 extends URDNA2015 {
constructor(options) {
super(options);
constructor() {
super();
this.name = 'URGNA2012';

@@ -22,9 +21,12 @@ this.hashAlgorithm = 'sha1';

}
component = util.clone(component);
if(key === 'name') {
component.value = '_:g';
} else {
component.value = (component.value === id ? '_:a' : '_:z');
if(key === 'graph') {
return {
termType: 'BlankNode',
value: '_:g'
};
}
return component;
return {
termType: 'BlankNode',
value: (component.value === id ? '_:a' : '_:z')
};
}

@@ -38,15 +40,14 @@

// helper for creating hash to related blank nodes map
createHashToRelated(id, issuer, callback) {
const self = this;
async createHashToRelated(id, issuer) {
// 1) Create a hash to related blank nodes map for storing hashes that
// identify related blank nodes.
const hashToRelated = {};
const hashToRelated = new Map();
// 2) Get a reference, quads, to the list of quads in the blank node to
// quads map for the key identifier.
const quads = self.blankNodeInfo[id].quads;
const quads = this.blankNodeInfo.get(id).quads;
// 3) For each quad in quads:
self.forEach(quads, (quad, idx, callback) => {
let i = 0;
for(const quad of quads) {
// 3.1) If the quad's subject is a blank node that does not match

@@ -71,21 +72,23 @@ // identifier, set hash to the result of the Hash Related Blank Node

// 3.3) Otherwise, continue to the next quad.
return callback();
continue;
}
// Note: batch hashing related blank nodes 100 at a time
if(++i % 100 === 0) {
await this._yield();
}
// 3.4) Add a mapping of hash to the blank node identifier for the
// component that matched (subject or object) to hash to related blank
// nodes map, adding an entry as necessary.
self.hashRelatedBlankNode(
related, quad, issuer, position, (err, hash) => {
if(err) {
return callback(err);
}
if(hash in hashToRelated) {
hashToRelated[hash].push(related);
} else {
hashToRelated[hash] = [related];
}
callback();
});
}, err => callback(err, hashToRelated));
const hash = await this.hashRelatedBlankNode(
related, quad, issuer, position);
const entries = hashToRelated.get(hash);
if(entries) {
entries.push(related);
} else {
hashToRelated.set(hash, [related]);
}
}
return hashToRelated;
}
};
/*
* Copyright (c) 2016 Digital Bazaar, Inc. All rights reserved.
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved.
*/

@@ -7,3 +7,2 @@ 'use strict';

const URDNA2015Sync = require('./URDNA2015Sync');
const util = require('./util');

@@ -22,9 +21,12 @@ module.exports = class URDNA2012Sync extends URDNA2015Sync {

}
component = util.clone(component);
if(key === 'name') {
component.value = '_:g';
} else {
component.value = (component.value === id ? '_:a' : '_:z');
if(key === 'graph') {
return {
termType: 'BlankNode',
value: '_:g'
};
}
return component;
return {
termType: 'BlankNode',
value: (component.value === id ? '_:a' : '_:z')
};
}

@@ -39,14 +41,12 @@

createHashToRelated(id, issuer) {
const self = this;
// 1) Create a hash to related blank nodes map for storing hashes that
// identify related blank nodes.
const hashToRelated = {};
const hashToRelated = new Map();
// 2) Get a reference, quads, to the list of quads in the blank node to
// quads map for the key identifier.
const quads = self.blankNodeInfo[id].quads;
const quads = this.blankNodeInfo.get(id).quads;
// 3) For each quad in quads:
for(let i = 0; i < quads.length; ++i) {
for(const quad of quads) {
// 3.1) If the quad's subject is a blank node that does not match

@@ -56,3 +56,2 @@ // identifier, set hash to the result of the Hash Related Blank Node

// quad, path identifier issuer as issuer, and p as position.
const quad = quads[i];
let position;

@@ -78,7 +77,8 @@ let related;

// nodes map, adding an entry as necessary.
const hash = self.hashRelatedBlankNode(related, quad, issuer, position);
if(hash in hashToRelated) {
hashToRelated[hash].push(related);
const hash = this.hashRelatedBlankNode(related, quad, issuer, position);
const entries = hashToRelated.get(hash);
if(entries) {
entries.push(related);
} else {
hashToRelated[hash] = [related];
hashToRelated.set(hash, [related]);
}

@@ -85,0 +85,0 @@ }

{
"name": "rdf-canonize",
"version": "1.2.0",
"version": "2.0.0",
"description": "An implementation of the RDF Dataset Normalization Algorithm in JavaScript",

@@ -32,3 +32,2 @@ "homepage": "https://github.com/digitalbazaar/rdf-canonize",

"dependencies": {
"node-forge": "^0.10.0",
"semver": "^6.3.0"

@@ -48,5 +47,7 @@ },

"core-js": "^2.6.3",
"delay": "^4.4.0",
"eslint": "^7.10.0",
"eslint-config-digitalbazaar": "^2.6.1",
"mocha": "^5.2.0",
"nsolid": "0.0.0",
"nyc": "^13.1.0",

@@ -53,0 +54,0 @@ "webpack": "^4.29.0",

@@ -70,17 +70,10 @@ # rdf-canonize

// canonize a data set with a particular algorithm with callback
canonize.canonize(dataset, {algorithm: 'URDNA2015'}, function(err, canonical) {
// ...
});
// canonize a data set with a particular algorithm with async/await
const canonical = await canonize.canonize(dataset, {algorithm: 'URDNA2015'});
// canonize a data set with a particular algorithm with callback
// force use of the native implementation
canonize.canonize(dataset, {
algorithm: 'URDNA2015',
useNative: true
}, function(err, canonical) {
// ...
// canonize a data set with a particular algorithm and force use of the
// native implementation
const canonical = await canonize.canonize(dataset, {
algorithm: 'URDNA2015',
useNative: true
});

@@ -87,0 +80,0 @@ ```

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is too big to display

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc