rdf-canonize
Advanced tools
Comparing version 1.2.0 to 2.0.0
# rdf-canonize ChangeLog | ||
## 2.0.0 - 2020-01-20 | ||
### Removed | ||
- **BREAKING**: Removed public API for `canonizeSync`. It is still available | ||
for testing purposes but does not run in the browser. | ||
- **BREAKING**: Removed dependency on `forge` which means that this library | ||
will only run in browsers that have support for the WebCrypto API (or | ||
an external polyfill for it). | ||
- **BREAKING**: Do not expose `existing` on `IdentifierIssuer`. The old | ||
IDs can be retrieved in order via `getOldIds`. | ||
### Changed | ||
- General optimizations and modernization of the library. | ||
### Added | ||
- Add `getOldIds` function to `IdentifierIssuer`. | ||
## 1.2.0 - 2020-09-30 | ||
@@ -4,0 +21,0 @@ |
/* | ||
* Copyright (c) 2016-2017 Digital Bazaar, Inc. All rights reserved. | ||
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved. | ||
*/ | ||
'use strict'; | ||
const util = require('./util'); | ||
module.exports = class IdentifierIssuer { | ||
@@ -14,7 +12,9 @@ /** | ||
* @param prefix the prefix to use ('<prefix><counter>'). | ||
* @param existing an existing Map to use. | ||
* @param counter the counter to use. | ||
*/ | ||
constructor(prefix) { | ||
constructor(prefix, existing = new Map(), counter = 0) { | ||
this.prefix = prefix; | ||
this.counter = 0; | ||
this.existing = {}; | ||
this._existing = existing; | ||
this.counter = counter; | ||
} | ||
@@ -29,6 +29,8 @@ /** | ||
clone() { | ||
const copy = new IdentifierIssuer(this.prefix); | ||
copy.counter = this.counter; | ||
copy.existing = util.clone(this.existing); | ||
return copy; | ||
const { | ||
prefix, | ||
_existing, | ||
counter | ||
} = this; | ||
return new IdentifierIssuer(prefix, new Map(_existing), counter); | ||
} | ||
@@ -47,4 +49,6 @@ /** | ||
// return existing old identifier | ||
if (old && old in this.existing) { | ||
return this.existing[old]; | ||
const existing = old && this._existing.get(old); | ||
if (existing) { | ||
return existing; | ||
} // get next identifier | ||
@@ -54,6 +58,6 @@ | ||
const identifier = this.prefix + this.counter; | ||
this.counter += 1; // save mapping | ||
this.counter++; // save mapping | ||
if (old) { | ||
this.existing[old] = identifier; | ||
this._existing.set(old, identifier); | ||
} | ||
@@ -75,5 +79,16 @@ | ||
hasId(old) { | ||
return old in this.existing; | ||
return this._existing.has(old); | ||
} | ||
/** | ||
* Returns all of the IDs that have been issued new IDs in the order in | ||
* which they were issued new IDs. | ||
* | ||
* @return the list of old IDs that has been issued new IDs in order. | ||
*/ | ||
getOldIds() { | ||
return [...this._existing.keys()]; | ||
} | ||
}; |
@@ -6,3 +6,3 @@ /** | ||
* BSD 3-Clause License | ||
* Copyright (c) 2016-2017 Digital Bazaar, Inc. | ||
* Copyright (c) 2016-2020 Digital Bazaar, Inc. | ||
* All rights reserved. | ||
@@ -42,4 +42,2 @@ * | ||
const util = require('./util'); | ||
const URDNA2015 = require('./URDNA2015'); | ||
@@ -88,3 +86,2 @@ | ||
* [useNative] use native implementation (default: false). | ||
* @param [callback(err, canonical)] called once the operation completes. | ||
* | ||
@@ -95,44 +92,31 @@ * @return a Promise that resolves to the canonicalized RDF Dataset. | ||
api.canonize = util.callbackify( /*#__PURE__*/function () { | ||
api.canonize = /*#__PURE__*/function () { | ||
var _ref = _asyncToGenerator(function* (dataset, options) { | ||
let callback; | ||
const promise = new Promise((resolve, reject) => { | ||
callback = (err, canonical) => { | ||
if (err) { | ||
return reject(err); | ||
} | ||
/*if(options.format === 'application/n-quads') { | ||
canonical = canonical.join(''); | ||
} | ||
canonical = _parseNQuads(canonical.join(''));*/ | ||
resolve(canonical); | ||
}; | ||
}); // back-compat with legacy dataset | ||
// back-compat with legacy dataset | ||
if (!Array.isArray(dataset)) { | ||
dataset = api.NQuads.legacyDatasetToQuads(dataset); | ||
} // TODO: convert algorithms to Promise-based async | ||
} | ||
if (options.useNative) { | ||
if (rdfCanonizeNative) { | ||
rdfCanonizeNative.canonize(dataset, options, callback); | ||
} else { | ||
if (!rdfCanonizeNative) { | ||
throw new Error('rdf-canonize-native not available'); | ||
} | ||
} else { | ||
if (options.algorithm === 'URDNA2015') { | ||
new URDNA2015(options).main(dataset, callback); | ||
} else if (options.algorithm === 'URGNA2012') { | ||
new URGNA2012(options).main(dataset, callback); | ||
} else if (!('algorithm' in options)) { | ||
throw new Error('No RDF Dataset Canonicalization algorithm specified.'); | ||
} else { | ||
throw new Error('Invalid RDF Dataset Canonicalization algorithm: ' + options.algorithm); | ||
} | ||
} // TODO: convert native algorithm to Promise-based async | ||
return new Promise((resolve, reject) => rdfCanonizeNative.canonize(dataset, options, (err, canonical) => err ? reject(err) : resolve(canonical))); | ||
} | ||
return promise; | ||
if (options.algorithm === 'URDNA2015') { | ||
return new URDNA2015(options).main(dataset); | ||
} | ||
if (options.algorithm === 'URGNA2012') { | ||
return new URGNA2012(options).main(dataset); | ||
} | ||
if (!('algorithm' in options)) { | ||
throw new Error('No RDF Dataset Canonicalization algorithm specified.'); | ||
} | ||
throw new Error('Invalid RDF Dataset Canonicalization algorithm: ' + options.algorithm); | ||
}); | ||
@@ -143,5 +127,7 @@ | ||
}; | ||
}()); | ||
}(); | ||
/** | ||
* Synchronously canonizes an RDF dataset. | ||
* This method is no longer available in the public API, it is for testing | ||
* only. It synchronously canonizes an RDF dataset and does not work in the | ||
* browser. | ||
* | ||
@@ -157,3 +143,4 @@ * @param dataset the dataset to canonize. | ||
api.canonizeSync = function (dataset, options) { | ||
api._canonizeSync = function (dataset, options) { | ||
// back-compat with legacy dataset | ||
@@ -174,3 +161,5 @@ if (!Array.isArray(dataset)) { | ||
return new URDNA2015Sync(options).main(dataset); | ||
} else if (options.algorithm === 'URGNA2012') { | ||
} | ||
if (options.algorithm === 'URGNA2012') { | ||
return new URGNA2012Sync(options).main(dataset); | ||
@@ -177,0 +166,0 @@ } |
/* | ||
* Copyright (c) 2016-2017 Digital Bazaar, Inc. All rights reserved. | ||
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved. | ||
*/ | ||
'use strict'; | ||
const forge = require('node-forge/lib/forge'); | ||
function asyncGeneratorStep(gen, resolve, reject, _next, _throw, key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { Promise.resolve(value).then(_next, _throw); } } | ||
require('node-forge/lib/md'); | ||
function _asyncToGenerator(fn) { return function () { var self = this, args = arguments; return new Promise(function (resolve, reject) { var gen = fn.apply(self, args); function _next(value) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "next", value); } function _throw(err) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "throw", err); } _next(undefined); }); }; } | ||
require('node-forge/lib/sha1'); | ||
const crypto = self.crypto || self.msCrypto; // TODO: synchronous version no longer supported in browser | ||
require('node-forge/lib/sha256'); | ||
module.exports = class MessageDigest { | ||
@@ -21,13 +19,44 @@ /** | ||
constructor(algorithm) { | ||
this.md = forge.md[algorithm].create(); | ||
// check if crypto.subtle is available | ||
// check is here rather than top-level to only fail if class is used | ||
if (!(crypto && crypto.subtle)) { | ||
throw new Error('crypto.subtle not found.'); | ||
} | ||
if (algorithm === 'sha256') { | ||
this.algorithm = { | ||
name: 'SHA-256' | ||
}; | ||
} else if (algorithm === 'sha1') { | ||
this.algorithm = { | ||
name: 'SHA-1' | ||
}; | ||
} else { | ||
throw new Error(`Unsupport algorithm "${algorithm}".`); | ||
} | ||
this._content = ''; | ||
} | ||
update(msg) { | ||
this.md.update(msg, 'utf8'); | ||
this._content += msg; | ||
} | ||
digest() { | ||
return this.md.digest().toHex(); | ||
var _this = this; | ||
return _asyncToGenerator(function* () { | ||
const data = new TextEncoder().encode(_this._content); | ||
const buffer = new Uint8Array(yield crypto.subtle.digest(_this.algorithm, data)); // return digest in hex | ||
let hex = ''; | ||
for (let i = 0; i < buffer.length; ++i) { | ||
hex += buffer[i].toString(16).padStart(2, '0'); | ||
} | ||
return hex; | ||
})(); | ||
} | ||
}; |
/* | ||
* Copyright (c) 2016-2017 Digital Bazaar, Inc. All rights reserved. | ||
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved. | ||
*/ | ||
@@ -4,0 +4,0 @@ 'use strict'; |
/* | ||
* Copyright (c) 2016-2017 Digital Bazaar, Inc. All rights reserved. | ||
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved. | ||
*/ | ||
@@ -9,3 +9,7 @@ 'use strict'; // eslint-disable-next-line no-unused-vars | ||
const RDF_LANGSTRING = RDF + 'langString'; | ||
const XSD_STRING = 'http://www.w3.org/2001/XMLSchema#string'; // build regexes | ||
const XSD_STRING = 'http://www.w3.org/2001/XMLSchema#string'; | ||
const TYPE_NAMED_NODE = 'NamedNode'; | ||
const TYPE_BLANK_NODE = 'BlankNode'; | ||
const TYPE_LITERAL = 'Literal'; | ||
const TYPE_DEFAULT_GRAPH = 'DefaultGraph'; // build regexes | ||
@@ -73,7 +77,12 @@ const REGEX = {}; | ||
const quad = {}; // get subject | ||
const quad = { | ||
subject: null, | ||
predicate: null, | ||
object: null, | ||
graph: null | ||
}; // get subject | ||
if (match[1] !== undefined) { | ||
quad.subject = { | ||
termType: 'NamedNode', | ||
termType: TYPE_NAMED_NODE, | ||
value: match[1] | ||
@@ -83,3 +92,3 @@ }; | ||
quad.subject = { | ||
termType: 'BlankNode', | ||
termType: TYPE_BLANK_NODE, | ||
value: match[2] | ||
@@ -91,3 +100,3 @@ }; | ||
quad.predicate = { | ||
termType: 'NamedNode', | ||
termType: TYPE_NAMED_NODE, | ||
value: match[3] | ||
@@ -98,3 +107,3 @@ }; // get object | ||
quad.object = { | ||
termType: 'NamedNode', | ||
termType: TYPE_NAMED_NODE, | ||
value: match[4] | ||
@@ -104,3 +113,3 @@ }; | ||
quad.object = { | ||
termType: 'BlankNode', | ||
termType: TYPE_BLANK_NODE, | ||
value: match[5] | ||
@@ -110,6 +119,6 @@ }; | ||
quad.object = { | ||
termType: 'Literal', | ||
termType: TYPE_LITERAL, | ||
value: undefined, | ||
datatype: { | ||
termType: 'NamedNode' | ||
termType: TYPE_NAMED_NODE | ||
} | ||
@@ -133,3 +142,3 @@ }; | ||
quad.graph = { | ||
termType: 'NamedNode', | ||
termType: TYPE_NAMED_NODE, | ||
value: match[9] | ||
@@ -139,3 +148,3 @@ }; | ||
quad.graph = { | ||
termType: 'BlankNode', | ||
termType: TYPE_BLANK_NODE, | ||
value: match[10] | ||
@@ -145,3 +154,3 @@ }; | ||
quad.graph = { | ||
termType: 'DefaultGraph', | ||
termType: TYPE_DEFAULT_GRAPH, | ||
value: '' | ||
@@ -211,27 +220,26 @@ }; | ||
const g = quad.graph; | ||
let nquad = ''; // subject and predicate can only be NamedNode or BlankNode | ||
let nquad = ''; // subject can only be NamedNode or BlankNode | ||
[s, p].forEach(term => { | ||
if (term.termType === 'NamedNode') { | ||
nquad += '<' + term.value + '>'; | ||
} else { | ||
nquad += term.value; | ||
} | ||
if (s.termType === TYPE_NAMED_NODE) { | ||
nquad += `<${s.value}>`; | ||
} else { | ||
nquad += `${s.value}`; | ||
} // predicate can only be NamedNode | ||
nquad += ' '; | ||
}); // object is NamedNode, BlankNode, or Literal | ||
if (o.termType === 'NamedNode') { | ||
nquad += '<' + o.value + '>'; | ||
} else if (o.termType === 'BlankNode') { | ||
nquad += ` <${p.value}> `; // object is NamedNode, BlankNode, or Literal | ||
if (o.termType === TYPE_NAMED_NODE) { | ||
nquad += `<${o.value}>`; | ||
} else if (o.termType === TYPE_BLANK_NODE) { | ||
nquad += o.value; | ||
} else { | ||
nquad += '"' + _escape(o.value) + '"'; | ||
nquad += `"${_escape(o.value)}"`; | ||
if (o.datatype.value === RDF_LANGSTRING) { | ||
if (o.language) { | ||
nquad += '@' + o.language; | ||
nquad += `@${o.language}`; | ||
} | ||
} else if (o.datatype.value !== XSD_STRING) { | ||
nquad += '^^<' + o.datatype.value + '>'; | ||
nquad += `^^<${o.datatype.value}>`; | ||
} | ||
@@ -242,6 +250,6 @@ } // graph can only be NamedNode or BlankNode (or DefaultGraph, but that | ||
if (g.termType === 'NamedNode') { | ||
nquad += ' <' + g.value + '>'; | ||
} else if (g.termType === 'BlankNode') { | ||
nquad += ' ' + g.value; | ||
if (g.termType === TYPE_NAMED_NODE) { | ||
nquad += ` <${g.value}>`; | ||
} else if (g.termType === TYPE_BLANK_NODE) { | ||
nquad += ` ${g.value}`; | ||
} | ||
@@ -265,5 +273,5 @@ | ||
const termTypeMap = { | ||
'blank node': 'BlankNode', | ||
IRI: 'NamedNode', | ||
literal: 'Literal' | ||
'blank node': TYPE_BLANK_NODE, | ||
IRI: TYPE_NAMED_NODE, | ||
literal: TYPE_LITERAL | ||
}; | ||
@@ -283,5 +291,5 @@ | ||
if (newComponent.termType === 'Literal') { | ||
if (newComponent.termType === TYPE_LITERAL) { | ||
newComponent.datatype = { | ||
termType: 'NamedNode' | ||
termType: TYPE_NAMED_NODE | ||
}; | ||
@@ -309,3 +317,3 @@ | ||
quad.graph = { | ||
termType: 'DefaultGraph', | ||
termType: TYPE_DEFAULT_GRAPH, | ||
value: '' | ||
@@ -315,3 +323,3 @@ }; | ||
quad.graph = { | ||
termType: graphName.startsWith('_:') ? 'BlankNode' : 'NamedNode', | ||
termType: graphName.startsWith('_:') ? TYPE_BLANK_NODE : TYPE_NAMED_NODE, | ||
value: graphName | ||
@@ -339,13 +347,18 @@ }; | ||
function _compareTriples(t1, t2) { | ||
for (const k in t1) { | ||
if (t1[k].termType !== t2[k].termType || t1[k].value !== t2[k].value) { | ||
return false; | ||
} | ||
// compare subject and object types first as it is the quickest check | ||
if (!(t1.subject.termType === t2.subject.termType && t1.object.termType === t2.object.termType)) { | ||
return false; | ||
} // compare values | ||
if (!(t1.subject.value === t2.subject.value && t1.predicate.value === t2.predicate.value && t1.object.value === t2.object.value)) { | ||
return false; | ||
} | ||
if (t1.object.termType !== 'Literal') { | ||
if (t1.object.termType !== TYPE_LITERAL) { | ||
// no `datatype` or `language` to check | ||
return true; | ||
} | ||
return t1.object.datatype.termType === t2.object.datatype.termType && t1.object.datatype.value === t2.object.datatype.value && t1.object.language === t2.object.language; | ||
return t1.object.datatype.termType === t2.object.datatype.termType && t1.object.language === t2.object.language && t1.object.datatype.value === t2.object.datatype.value; | ||
} | ||
@@ -352,0 +365,0 @@ |
/* | ||
* Copyright (c) 2016-2017 Digital Bazaar, Inc. All rights reserved. | ||
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved. | ||
*/ | ||
'use strict'; | ||
const AsyncAlgorithm = require('./AsyncAlgorithm'); | ||
function ownKeys(object, enumerableOnly) { var keys = Object.keys(object); if (Object.getOwnPropertySymbols) { var symbols = Object.getOwnPropertySymbols(object); if (enumerableOnly) symbols = symbols.filter(function (sym) { return Object.getOwnPropertyDescriptor(object, sym).enumerable; }); keys.push.apply(keys, symbols); } return keys; } | ||
function _objectSpread(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { ownKeys(Object(source), true).forEach(function (key) { _defineProperty(target, key, source[key]); }); } else if (Object.getOwnPropertyDescriptors) { Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)); } else { ownKeys(Object(source)).forEach(function (key) { Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key)); }); } } return target; } | ||
function _defineProperty(obj, key, value) { if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; } | ||
function asyncGeneratorStep(gen, resolve, reject, _next, _throw, key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { Promise.resolve(value).then(_next, _throw); } } | ||
function _asyncToGenerator(fn) { return function () { var self = this, args = arguments; return new Promise(function (resolve, reject) { var gen = fn.apply(self, args); function _next(value) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "next", value); } function _throw(err) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "throw", err); } _next(undefined); }); }; } | ||
const IdentifierIssuer = require('./IdentifierIssuer'); | ||
@@ -12,129 +20,107 @@ | ||
const Permutator = require('./Permutator'); | ||
const Permuter = require('./Permuter'); | ||
const NQuads = require('./NQuads'); | ||
const util = require('./util'); | ||
const POSITIONS = { | ||
subject: 's', | ||
object: 'o', | ||
graph: 'g' | ||
}; | ||
module.exports = class URDNA2015 extends AsyncAlgorithm { | ||
constructor(options) { | ||
options = options || {}; | ||
super(options); | ||
module.exports = class URDNA2015 { | ||
constructor() { | ||
this.name = 'URDNA2015'; | ||
this.options = Object.assign({}, options); | ||
this.blankNodeInfo = {}; | ||
this.hashToBlankNodes = {}; | ||
this.blankNodeInfo = new Map(); | ||
this.canonicalIssuer = new IdentifierIssuer('_:c14n'); | ||
this.hashAlgorithm = 'sha256'; | ||
this.quads; | ||
this.quads = null; | ||
} // 4.4) Normalization Algorithm | ||
main(dataset, callback) { | ||
const self = this; | ||
self.schedule.start = Date.now(); | ||
let result; | ||
self.quads = dataset; // 1) Create the normalization state. | ||
// Note: Optimize by generating non-normalized blank node map concurrently. | ||
main(dataset) { | ||
var _this = this; | ||
const nonNormalized = {}; | ||
self.waterfall([callback => { | ||
return _asyncToGenerator(function* () { | ||
_this.quads = dataset; // 1) Create the normalization state. | ||
// 2) For every quad in input dataset: | ||
self.forEach(dataset, (quad, idx, callback) => { | ||
for (const quad of dataset) { | ||
// 2.1) For each blank node that occurs in the quad, add a reference | ||
// to the quad using the blank node identifier in the blank node to | ||
// quads map, creating a new entry if necessary. | ||
self.forEachComponent(quad, component => { | ||
if (component.termType !== 'BlankNode') { | ||
return; | ||
} | ||
_this._addBlankNodeQuadInfo({ | ||
quad, | ||
component: quad.subject | ||
}); | ||
const id = component.value; | ||
_this._addBlankNodeQuadInfo({ | ||
quad, | ||
component: quad.object | ||
}); | ||
if (id in self.blankNodeInfo) { | ||
self.blankNodeInfo[id].quads.push(quad); | ||
} else { | ||
nonNormalized[id] = true; | ||
self.blankNodeInfo[id] = { | ||
quads: [quad] | ||
}; | ||
} | ||
_this._addBlankNodeQuadInfo({ | ||
quad, | ||
component: quad.graph | ||
}); | ||
callback(); | ||
}, callback); | ||
}, callback => { | ||
// 3) Create a list of non-normalized blank node identifiers | ||
} // 3) Create a list of non-normalized blank node identifiers | ||
// non-normalized identifiers and populate it using the keys from the | ||
// blank node to quads map. | ||
// Note: We use a map here and it was generated during step 2. | ||
// 4) Initialize simple, a boolean flag, to true. | ||
let simple = true; // 5) While simple is true, issue canonical identifiers for blank nodes: | ||
// 4) `simple` flag is skipped -- loop is optimized away. This optimization | ||
// is permitted because there was a typo in the hash first degree quads | ||
// algorithm in the URDNA2015 spec that was implemented widely making it | ||
// such that it could not be fixed; the result was that the loop only | ||
// needs to be run once and the first degree quad hashes will never change. | ||
// 5.1-5.2 are skipped; first degree quad hashes are generated just once | ||
// for all non-normalized blank nodes. | ||
// 5.3) For each blank node identifier identifier in non-normalized | ||
// identifiers: | ||
self.whilst(() => simple, callback => { | ||
// 5.1) Set simple to false. | ||
simple = false; // 5.2) Clear hash to blank nodes map. | ||
self.hashToBlankNodes = {}; | ||
self.waterfall([callback => { | ||
// 5.3) For each blank node identifier identifier in | ||
// non-normalized identifiers: | ||
self.forEach(nonNormalized, (value, id, callback) => { | ||
// 5.3.1) Create a hash, hash, according to the Hash First | ||
// Degree Quads algorithm. | ||
self.hashFirstDegreeQuads(id, (err, hash) => { | ||
if (err) { | ||
return callback(err); | ||
} // 5.3.2) Add hash and identifier to hash to blank nodes map, | ||
// creating a new entry if necessary. | ||
const hashToBlankNodes = new Map(); | ||
const nonNormalized = [..._this.blankNodeInfo.keys()]; | ||
let i = 0; | ||
for (const id of nonNormalized) { | ||
// Note: batch hashing first degree quads 100 at a time | ||
if (++i % 100 === 0) { | ||
yield _this._yield(); | ||
} // steps 5.3.1 and 5.3.2: | ||
if (hash in self.hashToBlankNodes) { | ||
self.hashToBlankNodes[hash].push(id); | ||
} else { | ||
self.hashToBlankNodes[hash] = [id]; | ||
} | ||
callback(); | ||
}); | ||
}, callback); | ||
}, callback => { | ||
// 5.4) For each hash to identifier list mapping in hash to blank | ||
// nodes map, lexicographically-sorted by hash: | ||
const hashes = Object.keys(self.hashToBlankNodes).sort(); | ||
self.forEach(hashes, (hash, i, callback) => { | ||
// 5.4.1) If the length of identifier list is greater than 1, | ||
// continue to the next mapping. | ||
const idList = self.hashToBlankNodes[hash]; | ||
yield _this._hashAndTrackBlankNode({ | ||
id, | ||
hashToBlankNodes | ||
}); | ||
} // 5.4) For each hash to identifier list mapping in hash to blank | ||
// nodes map, lexicographically-sorted by hash: | ||
if (idList.length > 1) { | ||
return callback(); | ||
} // 5.4.2) Use the Issue Identifier algorithm, passing canonical | ||
// issuer and the single blank node identifier in identifier | ||
// list, identifier, to issue a canonical replacement identifier | ||
// for identifier. | ||
// TODO: consider changing `getId` to `issue` | ||
const hashes = [...hashToBlankNodes.keys()].sort(); // optimize away second sort, gather non-unique hashes in order as we go | ||
const id = idList[0]; | ||
self.canonicalIssuer.getId(id); // 5.4.3) Remove identifier from non-normalized identifiers. | ||
const nonUnique = []; | ||
delete nonNormalized[id]; // 5.4.4) Remove hash from the hash to blank nodes map. | ||
for (const hash of hashes) { | ||
// 5.4.1) If the length of identifier list is greater than 1, | ||
// continue to the next mapping. | ||
const idList = hashToBlankNodes.get(hash); | ||
delete self.hashToBlankNodes[hash]; // 5.4.5) Set simple to true. | ||
if (idList.length > 1) { | ||
nonUnique.push(idList); | ||
continue; | ||
} // 5.4.2) Use the Issue Identifier algorithm, passing canonical | ||
// issuer and the single blank node identifier in identifier | ||
// list, identifier, to issue a canonical replacement identifier | ||
// for identifier. | ||
simple = true; | ||
callback(); | ||
}, callback); | ||
}], callback); | ||
}, callback); | ||
}, callback => { | ||
// 6) For each hash to identifier list mapping in hash to blank nodes | ||
// map, lexicographically-sorted by hash: | ||
const hashes = Object.keys(self.hashToBlankNodes).sort(); | ||
self.forEach(hashes, (hash, idx, callback) => { | ||
const id = idList[0]; | ||
_this.canonicalIssuer.getId(id); // Note: These steps are skipped, optimized away since the loop | ||
// only needs to be run once. | ||
// 5.4.3) Remove identifier from non-normalized identifiers. | ||
// 5.4.4) Remove hash from the hash to blank nodes map. | ||
// 5.4.5) Set simple to true. | ||
} // 6) For each hash to identifier list mapping in hash to blank nodes map, | ||
// lexicographically-sorted by hash: | ||
// Note: sort optimized away, use `nonUnique`. | ||
for (const idList of nonUnique) { | ||
// 6.1) Create hash path list where each item will be a result of | ||
@@ -144,50 +130,39 @@ // running the Hash N-Degree Quads algorithm. | ||
const idList = self.hashToBlankNodes[hash]; | ||
self.waterfall([callback => { | ||
self.forEach(idList, (id, idx, callback) => { | ||
// 6.2.1) If a canonical identifier has already been issued for | ||
// identifier, continue to the next identifier. | ||
if (self.canonicalIssuer.hasId(id)) { | ||
return callback(); | ||
} // 6.2.2) Create temporary issuer, an identifier issuer | ||
// initialized with the prefix _:b. | ||
for (const id of idList) { | ||
// 6.2.1) If a canonical identifier has already been issued for | ||
// identifier, continue to the next identifier. | ||
if (_this.canonicalIssuer.hasId(id)) { | ||
continue; | ||
} // 6.2.2) Create temporary issuer, an identifier issuer | ||
// initialized with the prefix _:b. | ||
const issuer = new IdentifierIssuer('_:b'); // 6.2.3) Use the Issue Identifier algorithm, passing temporary | ||
// issuer and identifier, to issue a new temporary blank node | ||
// identifier for identifier. | ||
const issuer = new IdentifierIssuer('_:b'); // 6.2.3) Use the Issue Identifier algorithm, passing temporary | ||
// issuer and identifier, to issue a new temporary blank node | ||
// identifier for identifier. | ||
issuer.getId(id); // 6.2.4) Run the Hash N-Degree Quads algorithm, passing | ||
// temporary issuer, and append the result to the hash path | ||
// list. | ||
issuer.getId(id); // 6.2.4) Run the Hash N-Degree Quads algorithm, passing | ||
// temporary issuer, and append the result to the hash path list. | ||
self.hashNDegreeQuads(id, issuer, (err, result) => { | ||
if (err) { | ||
return callback(err); | ||
} | ||
const result = yield _this.hashNDegreeQuads(id, issuer); | ||
hashPathList.push(result); | ||
} // 6.3) For each result in the hash path list, | ||
// lexicographically-sorted by the hash in result: | ||
hashPathList.push(result); | ||
callback(); | ||
}); | ||
}, callback); | ||
}, callback => { | ||
// 6.3) For each result in the hash path list, | ||
// lexicographically-sorted by the hash in result: | ||
// TODO: use `String.localeCompare`? | ||
hashPathList.sort((a, b) => a.hash < b.hash ? -1 : a.hash > b.hash ? 1 : 0); | ||
self.forEach(hashPathList, (result, idx, callback) => { | ||
// 6.3.1) For each blank node identifier, existing identifier, | ||
// that was issued a temporary identifier by identifier issuer | ||
// in result, issue a canonical identifier, in the same order, | ||
// using the Issue Identifier algorithm, passing canonical | ||
// issuer and existing identifier. | ||
for (const existing in result.issuer.existing) { | ||
self.canonicalIssuer.getId(existing); | ||
} | ||
callback(); | ||
}, callback); | ||
}], callback); | ||
}, callback); | ||
}, callback => { | ||
hashPathList.sort(_stringHashCompare); | ||
for (const result of hashPathList) { | ||
// 6.3.1) For each blank node identifier, existing identifier, | ||
// that was issued a temporary identifier by identifier issuer | ||
// in result, issue a canonical identifier, in the same order, | ||
// using the Issue Identifier algorithm, passing canonical | ||
// issuer and existing identifier. | ||
const oldIds = result.issuer.getOldIds(); | ||
for (const id of oldIds) { | ||
_this.canonicalIssuer.getId(id); | ||
} | ||
} | ||
} | ||
/* Note: At this point all blank nodes in the set of RDF quads have been | ||
@@ -198,64 +173,65 @@ assigned canonical identifiers, which have been stored in the canonical | ||
// 7) For each quad, quad, in input dataset: | ||
const normalized = []; | ||
self.waterfall([callback => { | ||
self.forEach(self.quads, (quad, idx, callback) => { | ||
// 7.1) Create a copy, quad copy, of quad and replace any existing | ||
// blank node identifiers using the canonical identifiers | ||
// previously issued by canonical issuer. | ||
// Note: We optimize away the copy here. | ||
self.forEachComponent(quad, component => { | ||
if (component.termType === 'BlankNode' && !component.value.startsWith(self.canonicalIssuer.prefix)) { | ||
component.value = self.canonicalIssuer.getId(component.value); | ||
} | ||
}); // 7.2) Add quad copy to the normalized dataset. | ||
normalized.push(NQuads.serializeQuad(quad)); | ||
callback(); | ||
}, callback); | ||
}, callback => { | ||
// sort normalized output | ||
normalized.sort(); // 8) Return the normalized dataset. | ||
for (const quad of _this.quads) { | ||
// 7.1) Create a copy, quad copy, of quad and replace any existing | ||
// blank node identifiers using the canonical identifiers | ||
// previously issued by canonical issuer. | ||
// Note: We optimize with shallow copies here. | ||
const q = _objectSpread({}, quad); | ||
result = normalized.join(''); | ||
return callback(); | ||
}], callback); | ||
}], err => callback(err, result)); | ||
q.subject = _this._useCanonicalId({ | ||
component: q.subject | ||
}); | ||
q.object = _this._useCanonicalId({ | ||
component: q.object | ||
}); | ||
q.graph = _this._useCanonicalId({ | ||
component: q.graph | ||
}); // 7.2) Add quad copy to the normalized dataset. | ||
normalized.push(NQuads.serializeQuad(q)); | ||
} // sort normalized output | ||
normalized.sort(); // 8) Return the normalized dataset. | ||
return normalized.join(''); | ||
})(); | ||
} // 4.6) Hash First Degree Quads | ||
hashFirstDegreeQuads(id, callback) { | ||
const self = this; // return cached hash | ||
hashFirstDegreeQuads(id) { | ||
var _this2 = this; | ||
const info = self.blankNodeInfo[id]; | ||
return _asyncToGenerator(function* () { | ||
// 1) Initialize nquads to an empty list. It will be used to store quads in | ||
// N-Quads format. | ||
const nquads = []; // 2) Get the list of quads `quads` associated with the reference blank node | ||
// identifier in the blank node to quads map. | ||
if ('hash' in info) { | ||
return callback(null, info.hash); | ||
} // 1) Initialize nquads to an empty list. It will be used to store quads in | ||
// N-Quads format. | ||
const info = _this2.blankNodeInfo.get(id); | ||
const quads = info.quads; // 3) For each quad `quad` in `quads`: | ||
const nquads = []; // 2) Get the list of quads quads associated with the reference blank node | ||
// identifier in the blank node to quads map. | ||
for (const quad of quads) { | ||
// 3.1) Serialize the quad in N-Quads format with the following special | ||
// rule: | ||
// 3.1.1) If any component in quad is an blank node, then serialize it | ||
// using a special identifier as follows: | ||
const copy = { | ||
subject: null, | ||
predicate: quad.predicate, | ||
object: null, | ||
graph: null | ||
}; // 3.1.2) If the blank node's existing blank node identifier matches | ||
// the reference blank node identifier then use the blank node | ||
// identifier _:a, otherwise, use the blank node identifier _:z. | ||
const quads = info.quads; // 3) For each quad quad in quads: | ||
self.forEach(quads, (quad, idx, callback) => { | ||
// 3.1) Serialize the quad in N-Quads format with the following special | ||
// rule: | ||
// 3.1.1) If any component in quad is an blank node, then serialize it | ||
// using a special identifier as follows: | ||
const copy = { | ||
predicate: quad.predicate | ||
}; | ||
self.forEachComponent(quad, (component, key) => { | ||
// 3.1.2) If the blank node's existing blank node identifier matches the | ||
// reference blank node identifier then use the blank node identifier | ||
// _:a, otherwise, use the blank node identifier _:z. | ||
copy[key] = self.modifyFirstDegreeComponent(id, component, key); | ||
}); | ||
nquads.push(NQuads.serializeQuad(copy)); | ||
callback(); | ||
}, err => { | ||
if (err) { | ||
return callback(err); | ||
copy.subject = _this2.modifyFirstDegreeComponent(id, quad.subject, 'subject'); | ||
copy.object = _this2.modifyFirstDegreeComponent(id, quad.object, 'object'); | ||
copy.graph = _this2.modifyFirstDegreeComponent(id, quad.graph, 'graph'); | ||
nquads.push(NQuads.serializeQuad(copy)); | ||
} // 4) Sort nquads in lexicographical order. | ||
@@ -267,44 +243,30 @@ | ||
const md = new MessageDigest(self.hashAlgorithm); | ||
const md = new MessageDigest(_this2.hashAlgorithm); | ||
for (let i = 0; i < nquads.length; ++i) { | ||
md.update(nquads[i]); | ||
} // TODO: represent as byte buffer instead to cut memory usage in half | ||
for (const nquad of nquads) { | ||
md.update(nquad); | ||
} | ||
info.hash = md.digest(); | ||
callback(null, info.hash); | ||
}); | ||
info.hash = yield md.digest(); | ||
return info.hash; | ||
})(); | ||
} // 4.7) Hash Related Blank Node | ||
hashRelatedBlankNode(related, quad, issuer, position, callback) { | ||
const self = this; // 1) Set the identifier to use for related, preferring first the canonical | ||
// identifier for related if issued, second the identifier issued by issuer | ||
// if issued, and last, if necessary, the result of the Hash First Degree | ||
// Quads algorithm, passing related. | ||
hashRelatedBlankNode(related, quad, issuer, position) { | ||
var _this3 = this; | ||
let id; | ||
self.waterfall([callback => { | ||
if (self.canonicalIssuer.hasId(related)) { | ||
id = self.canonicalIssuer.getId(related); | ||
return callback(); | ||
} | ||
return _asyncToGenerator(function* () { | ||
// 1) Set the identifier to use for related, preferring first the canonical | ||
// identifier for related if issued, second the identifier issued by issuer | ||
// if issued, and last, if necessary, the result of the Hash First Degree | ||
// Quads algorithm, passing related. | ||
let id; | ||
if (issuer.hasId(related)) { | ||
if (_this3.canonicalIssuer.hasId(related)) { | ||
id = _this3.canonicalIssuer.getId(related); | ||
} else if (issuer.hasId(related)) { | ||
id = issuer.getId(related); | ||
return callback(); | ||
} | ||
self.hashFirstDegreeQuads(related, (err, hash) => { | ||
if (err) { | ||
return callback(err); | ||
} | ||
id = hash; | ||
callback(); | ||
}); | ||
}], err => { | ||
if (err) { | ||
return callback(err); | ||
} else { | ||
id = _this3.blankNodeInfo.get(related).hash; | ||
} // 2) Initialize a string input to the value of position. | ||
@@ -314,3 +276,3 @@ // Note: We use a hash object instead. | ||
const md = new MessageDigest(self.hashAlgorithm); | ||
const md = new MessageDigest(_this3.hashAlgorithm); | ||
md.update(position); // 3) If position is not g, append <, the value of the predicate in quad, | ||
@@ -320,3 +282,3 @@ // and > to input. | ||
if (position !== 'g') { | ||
md.update(self.getRelatedPredicate(quad)); | ||
md.update(_this3.getRelatedPredicate(quad)); | ||
} // 4) Append identifier to input. | ||
@@ -327,30 +289,24 @@ | ||
// algorithm. | ||
// TODO: represent as byte buffer instead to cut memory usage in half | ||
return callback(null, md.digest()); | ||
}); | ||
return md.digest(); | ||
})(); | ||
} // 4.8) Hash N-Degree Quads | ||
hashNDegreeQuads(id, issuer, callback) { | ||
const self = this; // 1) Create a hash to related blank nodes map for storing hashes that | ||
// identify related blank nodes. | ||
// Note: 2) and 3) handled within `createHashToRelated` | ||
hashNDegreeQuads(id, issuer) { | ||
var _this4 = this; | ||
let hashToRelated; | ||
const md = new MessageDigest(self.hashAlgorithm); | ||
self.waterfall([callback => self.createHashToRelated(id, issuer, (err, result) => { | ||
if (err) { | ||
return callback(err); | ||
} | ||
return _asyncToGenerator(function* () { | ||
// 1) Create a hash to related blank nodes map for storing hashes that | ||
// identify related blank nodes. | ||
// Note: 2) and 3) handled within `createHashToRelated` | ||
const md = new MessageDigest(_this4.hashAlgorithm); | ||
const hashToRelated = yield _this4.createHashToRelated(id, issuer); // 4) Create an empty string, data to hash. | ||
// Note: We created a hash object `md` above instead. | ||
// 5) For each related hash to blank node list mapping in hash to related | ||
// blank nodes map, sorted lexicographically by related hash: | ||
hashToRelated = result; | ||
callback(); | ||
}), callback => { | ||
// 4) Create an empty string, data to hash. | ||
// Note: We created a hash object `md` above instead. | ||
// 5) For each related hash to blank node list mapping in hash to | ||
// related blank nodes map, sorted lexicographically by related hash: | ||
const hashes = Object.keys(hashToRelated).sort(); | ||
self.forEach(hashes, (hash, idx, callback) => { | ||
const hashes = [...hashToRelated.keys()].sort(); | ||
for (const hash of hashes) { | ||
// 5.1) Append the related hash to the data to hash. | ||
@@ -363,6 +319,13 @@ md.update(hash); // 5.2) Create a string chosen path. | ||
const permutator = new Permutator(hashToRelated[hash]); | ||
self.whilst(() => permutator.hasNext(), nextPermutation => { | ||
const permutation = permutator.next(); // 5.4.1) Create a copy of issuer, issuer copy. | ||
const permuter = new Permuter(hashToRelated.get(hash)); | ||
let i = 0; | ||
while (permuter.hasNext()) { | ||
const permutation = permuter.next(); // Note: batch permutations 3 at a time | ||
if (++i % 3 === 0) { | ||
yield _this4._yield(); | ||
} // 5.4.1) Create a copy of issuer, issuer copy. | ||
let issuerCopy = issuer.clone(); // 5.4.2) Create a string path. | ||
@@ -373,100 +336,92 @@ | ||
const recursionList = []; | ||
self.waterfall([callback => { | ||
// 5.4.4) For each related in permutation: | ||
self.forEach(permutation, (related, idx, callback) => { | ||
// 5.4.4.1) If a canonical identifier has been issued for | ||
// related, append it to path. | ||
if (self.canonicalIssuer.hasId(related)) { | ||
path += self.canonicalIssuer.getId(related); | ||
} else { | ||
// 5.4.4.2) Otherwise: | ||
// 5.4.4.2.1) If issuer copy has not issued an identifier | ||
// for related, append related to recursion list. | ||
if (!issuerCopy.hasId(related)) { | ||
recursionList.push(related); | ||
} // 5.4.4.2.2) Use the Issue Identifier algorithm, passing | ||
// issuer copy and related and append the result to path. | ||
const recursionList = []; // 5.4.4) For each related in permutation: | ||
let nextPermutation = false; | ||
path += issuerCopy.getId(related); | ||
} // 5.4.4.3) If chosen path is not empty and the length of path | ||
// is greater than or equal to the length of chosen path and | ||
// path is lexicographically greater than chosen path, then | ||
// skip to the next permutation. | ||
// Note: Comparing path length to chosen path length can be | ||
// optimized away; only compare lexicographically. | ||
for (const related of permutation) { | ||
// 5.4.4.1) If a canonical identifier has been issued for | ||
// related, append it to path. | ||
if (_this4.canonicalIssuer.hasId(related)) { | ||
path += _this4.canonicalIssuer.getId(related); | ||
} else { | ||
// 5.4.4.2) Otherwise: | ||
// 5.4.4.2.1) If issuer copy has not issued an identifier for | ||
// related, append related to recursion list. | ||
if (!issuerCopy.hasId(related)) { | ||
recursionList.push(related); | ||
} // 5.4.4.2.2) Use the Issue Identifier algorithm, passing | ||
// issuer copy and related and append the result to path. | ||
if (chosenPath.length !== 0 && path > chosenPath) { | ||
// FIXME: may cause inaccurate total depth calculation | ||
return nextPermutation(); | ||
} | ||
path += issuerCopy.getId(related); | ||
} // 5.4.4.3) If chosen path is not empty and the length of path | ||
// is greater than or equal to the length of chosen path and | ||
// path is lexicographically greater than chosen path, then | ||
// skip to the next permutation. | ||
// Note: Comparing path length to chosen path length can be optimized | ||
// away; only compare lexicographically. | ||
callback(); | ||
}, callback); | ||
}, callback => { | ||
// 5.4.5) For each related in recursion list: | ||
self.forEach(recursionList, (related, idx, callback) => { | ||
// 5.4.5.1) Set result to the result of recursively executing | ||
// the Hash N-Degree Quads algorithm, passing related for | ||
// identifier and issuer copy for path identifier issuer. | ||
self.hashNDegreeQuads(related, issuerCopy, (err, result) => { | ||
if (err) { | ||
return callback(err); | ||
} // 5.4.5.2) Use the Issue Identifier algorithm, passing | ||
// issuer copy and related and append the result to path. | ||
if (chosenPath.length !== 0 && path > chosenPath) { | ||
nextPermutation = true; | ||
break; | ||
} | ||
} | ||
path += issuerCopy.getId(related); // 5.4.5.3) Append <, the hash in result, and > to path. | ||
if (nextPermutation) { | ||
continue; | ||
} // 5.4.5) For each related in recursion list: | ||
path += '<' + result.hash + '>'; // 5.4.5.4) Set issuer copy to the identifier issuer in | ||
// result. | ||
issuerCopy = result.issuer; // 5.4.5.5) If chosen path is not empty and the length of | ||
// path is greater than or equal to the length of chosen | ||
// path and path is lexicographically greater than chosen | ||
// path, then skip to the next permutation. | ||
// Note: Comparing path length to chosen path length can be | ||
// optimized away; only compare lexicographically. | ||
for (const related of recursionList) { | ||
// 5.4.5.1) Set result to the result of recursively executing | ||
// the Hash N-Degree Quads algorithm, passing related for | ||
// identifier and issuer copy for path identifier issuer. | ||
const result = yield _this4.hashNDegreeQuads(related, issuerCopy); // 5.4.5.2) Use the Issue Identifier algorithm, passing issuer | ||
// copy and related and append the result to path. | ||
if (chosenPath.length !== 0 && path > chosenPath) { | ||
// FIXME: may cause inaccurate total depth calculation | ||
return nextPermutation(); | ||
} | ||
path += issuerCopy.getId(related); // 5.4.5.3) Append <, the hash in result, and > to path. | ||
callback(); | ||
}); | ||
}, callback); | ||
}, callback => { | ||
// 5.4.6) If chosen path is empty or path is lexicographically | ||
// less than chosen path, set chosen path to path and chosen | ||
// issuer to issuer copy. | ||
if (chosenPath.length === 0 || path < chosenPath) { | ||
chosenPath = path; | ||
chosenIssuer = issuerCopy; | ||
path += `<${result.hash}>`; // 5.4.5.4) Set issuer copy to the identifier issuer in | ||
// result. | ||
issuerCopy = result.issuer; // 5.4.5.5) If chosen path is not empty and the length of path | ||
// is greater than or equal to the length of chosen path and | ||
// path is lexicographically greater than chosen path, then | ||
// skip to the next permutation. | ||
// Note: Comparing path length to chosen path length can be optimized | ||
// away; only compare lexicographically. | ||
if (chosenPath.length !== 0 && path > chosenPath) { | ||
nextPermutation = true; | ||
break; | ||
} | ||
} | ||
callback(); | ||
}], nextPermutation); | ||
}, err => { | ||
if (err) { | ||
return callback(err); | ||
} // 5.5) Append chosen path to data to hash. | ||
if (nextPermutation) { | ||
continue; | ||
} // 5.4.6) If chosen path is empty or path is lexicographically | ||
// less than chosen path, set chosen path to path and chosen | ||
// issuer to issuer copy. | ||
md.update(chosenPath); // 5.6) Replace issuer, by reference, with chosen issuer. | ||
if (chosenPath.length === 0 || path < chosenPath) { | ||
chosenPath = path; | ||
chosenIssuer = issuerCopy; | ||
} | ||
} // 5.5) Append chosen path to data to hash. | ||
issuer = chosenIssuer; | ||
callback(); | ||
}); | ||
}, callback); | ||
}], err => { | ||
// 6) Return issuer and the hash that results from passing data to hash | ||
md.update(chosenPath); // 5.6) Replace issuer, by reference, with chosen issuer. | ||
issuer = chosenIssuer; | ||
} // 6) Return issuer and the hash that results from passing data to hash | ||
// through the hash algorithm. | ||
callback(err, { | ||
hash: md.digest(), | ||
return { | ||
hash: yield md.digest(), | ||
issuer | ||
}); | ||
}); | ||
}; | ||
})(); | ||
} // helper for modifying component during Hash First Degree Quads | ||
@@ -479,6 +434,13 @@ | ||
} | ||
/* Note: A mistake in the URDNA2015 spec that made its way into | ||
implementations (and therefore must stay to avoid interop breakage) | ||
resulted in an assigned canonical ID, if available for | ||
`component.value`, not being used in place of `_:a`/`_:z`, so | ||
we don't use it here. */ | ||
component = util.clone(component); | ||
component.value = component.value === id ? '_:a' : '_:z'; | ||
return component; | ||
return { | ||
termType: 'BlankNode', | ||
value: component.value === id ? '_:a' : '_:z' | ||
}; | ||
} // helper for getting a related predicate | ||
@@ -488,63 +450,159 @@ | ||
getRelatedPredicate(quad) { | ||
return '<' + quad.predicate.value + '>'; | ||
return `<${quad.predicate.value}>`; | ||
} // helper for creating hash to related blank nodes map | ||
createHashToRelated(id, issuer, callback) { | ||
const self = this; // 1) Create a hash to related blank nodes map for storing hashes that | ||
// identify related blank nodes. | ||
createHashToRelated(id, issuer) { | ||
var _this5 = this; | ||
const hashToRelated = {}; // 2) Get a reference, quads, to the list of quads in the blank node to | ||
// quads map for the key identifier. | ||
return _asyncToGenerator(function* () { | ||
// 1) Create a hash to related blank nodes map for storing hashes that | ||
// identify related blank nodes. | ||
const hashToRelated = new Map(); // 2) Get a reference, quads, to the list of quads in the blank node to | ||
// quads map for the key identifier. | ||
const quads = self.blankNodeInfo[id].quads; // 3) For each quad in quads: | ||
const quads = _this5.blankNodeInfo.get(id).quads; // 3) For each quad in quads: | ||
self.forEach(quads, (quad, idx, callback) => { | ||
// 3.1) For each component in quad, if component is the subject, object, | ||
// and graph name and it is a blank node that is not identified by | ||
// identifier: | ||
self.forEach(quad, (component, key, callback) => { | ||
if (key === 'predicate' || !(component.termType === 'BlankNode' && component.value !== id)) { | ||
return callback(); | ||
} // 3.1.1) Set hash to the result of the Hash Related Blank Node | ||
// algorithm, passing the blank node identifier for component as | ||
// related, quad, path identifier issuer as issuer, and position as | ||
// either s, o, or g based on whether component is a subject, object, | ||
// graph name, respectively. | ||
let i = 0; | ||
const related = component.value; | ||
const position = POSITIONS[key]; | ||
self.hashRelatedBlankNode(related, quad, issuer, position, (err, hash) => { | ||
if (err) { | ||
return callback(err); | ||
} // 3.1.2) Add a mapping of hash to the blank node identifier for | ||
// component to hash to related blank nodes map, adding an entry as | ||
// necessary. | ||
for (const quad of quads) { | ||
// Note: batch hashing related blank node quads 100 at a time | ||
if (++i % 100 === 0) { | ||
yield _this5._yield(); | ||
} // 3.1) For each component in quad, if component is the subject, object, | ||
// and graph name and it is a blank node that is not identified by | ||
// identifier: | ||
// steps 3.1.1 and 3.1.2 occur in helpers: | ||
if (hash in hashToRelated) { | ||
hashToRelated[hash].push(related); | ||
} else { | ||
hashToRelated[hash] = [related]; | ||
} | ||
yield Promise.all([_this5._addRelatedBlankNodeHash({ | ||
quad, | ||
component: quad.subject, | ||
position: 's', | ||
id, | ||
issuer, | ||
hashToRelated | ||
}), _this5._addRelatedBlankNodeHash({ | ||
quad, | ||
component: quad.object, | ||
position: 'o', | ||
id, | ||
issuer, | ||
hashToRelated | ||
}), _this5._addRelatedBlankNodeHash({ | ||
quad, | ||
component: quad.graph, | ||
position: 'g', | ||
id, | ||
issuer, | ||
hashToRelated | ||
})]); | ||
} | ||
callback(); | ||
}); | ||
}, callback); | ||
}, err => callback(err, hashToRelated)); | ||
} // helper that iterates over quad components (skips predicate) | ||
return hashToRelated; | ||
})(); | ||
} | ||
_hashAndTrackBlankNode({ | ||
id, | ||
hashToBlankNodes | ||
}) { | ||
var _this6 = this; | ||
forEachComponent(quad, op) { | ||
for (const key in quad) { | ||
// skip `predicate` | ||
if (key === 'predicate') { | ||
continue; | ||
return _asyncToGenerator(function* () { | ||
// 5.3.1) Create a hash, hash, according to the Hash First Degree | ||
// Quads algorithm. | ||
const hash = yield _this6.hashFirstDegreeQuads(id); // 5.3.2) Add hash and identifier to hash to blank nodes map, | ||
// creating a new entry if necessary. | ||
const idList = hashToBlankNodes.get(hash); | ||
if (!idList) { | ||
hashToBlankNodes.set(hash, [id]); | ||
} else { | ||
idList.push(id); | ||
} | ||
})(); | ||
} | ||
op(quad[key], key, quad); | ||
_addBlankNodeQuadInfo({ | ||
quad, | ||
component | ||
}) { | ||
if (component.termType !== 'BlankNode') { | ||
return; | ||
} | ||
const id = component.value; | ||
const info = this.blankNodeInfo.get(id); | ||
if (info) { | ||
info.quads.add(quad); | ||
} else { | ||
this.blankNodeInfo.set(id, { | ||
quads: new Set([quad]), | ||
hash: null | ||
}); | ||
} | ||
} | ||
}; | ||
_addRelatedBlankNodeHash({ | ||
quad, | ||
component, | ||
position, | ||
id, | ||
issuer, | ||
hashToRelated | ||
}) { | ||
var _this7 = this; | ||
return _asyncToGenerator(function* () { | ||
if (!(component.termType === 'BlankNode' && component.value !== id)) { | ||
return; | ||
} // 3.1.1) Set hash to the result of the Hash Related Blank Node | ||
// algorithm, passing the blank node identifier for component as | ||
// related, quad, path identifier issuer as issuer, and position as | ||
// either s, o, or g based on whether component is a subject, object, | ||
// graph name, respectively. | ||
const related = component.value; | ||
const hash = yield _this7.hashRelatedBlankNode(related, quad, issuer, position); // 3.1.2) Add a mapping of hash to the blank node identifier for | ||
// component to hash to related blank nodes map, adding an entry as | ||
// necessary. | ||
const entries = hashToRelated.get(hash); | ||
if (entries) { | ||
entries.push(related); | ||
} else { | ||
hashToRelated.set(hash, [related]); | ||
} | ||
})(); | ||
} | ||
_useCanonicalId({ | ||
component | ||
}) { | ||
if (component.termType === 'BlankNode' && !component.value.startsWith(this.canonicalIssuer.prefix)) { | ||
return { | ||
termType: 'BlankNode', | ||
value: this.canonicalIssuer.getId(component.value) | ||
}; | ||
} | ||
return component; | ||
} | ||
_yield() { | ||
return _asyncToGenerator(function* () { | ||
return new Promise(resolve => setImmediate(resolve)); | ||
})(); | ||
} | ||
}; | ||
function _stringHashCompare(a, b) { | ||
return a.hash < b.hash ? -1 : a.hash > b.hash ? 1 : 0; | ||
} |
/* | ||
* Copyright (c) 2016 Digital Bazaar, Inc. All rights reserved. | ||
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved. | ||
*/ | ||
'use strict'; | ||
function ownKeys(object, enumerableOnly) { var keys = Object.keys(object); if (Object.getOwnPropertySymbols) { var symbols = Object.getOwnPropertySymbols(object); if (enumerableOnly) symbols = symbols.filter(function (sym) { return Object.getOwnPropertyDescriptor(object, sym).enumerable; }); keys.push.apply(keys, symbols); } return keys; } | ||
function _objectSpread(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { ownKeys(Object(source), true).forEach(function (key) { _defineProperty(target, key, source[key]); }); } else if (Object.getOwnPropertyDescriptors) { Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)); } else { ownKeys(Object(source)).forEach(function (key) { Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key)); }); } } return target; } | ||
function _defineProperty(obj, key, value) { if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; } | ||
const IdentifierIssuer = require('./IdentifierIssuer'); | ||
@@ -10,21 +16,13 @@ | ||
const Permutator = require('./Permutator'); | ||
const Permuter = require('./Permuter'); | ||
const NQuads = require('./NQuads'); | ||
const util = require('./util'); | ||
const POSITIONS = { | ||
subject: 's', | ||
object: 'o', | ||
graph: 'g' | ||
}; | ||
module.exports = class URDNA2015Sync { | ||
constructor() { | ||
this.name = 'URDNA2015'; | ||
this.blankNodeInfo = {}; | ||
this.hashToBlankNodes = {}; | ||
this.blankNodeInfo = new Map(); | ||
this.canonicalIssuer = new IdentifierIssuer('_:c14n'); | ||
this.hashAlgorithm = 'sha256'; | ||
this.quads; | ||
this.quads = null; | ||
} // 4.4) Normalization Algorithm | ||
@@ -34,8 +32,5 @@ | ||
main(dataset) { | ||
const self = this; | ||
self.quads = dataset; // 1) Create the normalization state. | ||
// Note: Optimize by generating non-normalized blank node map concurrently. | ||
this.quads = dataset; // 1) Create the normalization state. | ||
// 2) For every quad in input dataset: | ||
const nonNormalized = {}; // 2) For every quad in input dataset: | ||
for (const quad of dataset) { | ||
@@ -45,17 +40,15 @@ // 2.1) For each blank node that occurs in the quad, add a reference | ||
// quads map, creating a new entry if necessary. | ||
self.forEachComponent(quad, component => { | ||
if (component.termType !== 'BlankNode') { | ||
return; | ||
} | ||
this._addBlankNodeQuadInfo({ | ||
quad, | ||
component: quad.subject | ||
}); | ||
const id = component.value; | ||
this._addBlankNodeQuadInfo({ | ||
quad, | ||
component: quad.object | ||
}); | ||
if (id in self.blankNodeInfo) { | ||
self.blankNodeInfo[id].quads.push(quad); | ||
} else { | ||
nonNormalized[id] = true; | ||
self.blankNodeInfo[id] = { | ||
quads: [quad] | ||
}; | ||
} | ||
this._addBlankNodeQuadInfo({ | ||
quad, | ||
component: quad.graph | ||
}); | ||
@@ -66,62 +59,56 @@ } // 3) Create a list of non-normalized blank node identifiers | ||
// Note: We use a map here and it was generated during step 2. | ||
// 4) Initialize simple, a boolean flag, to true. | ||
// 4) `simple` flag is skipped -- loop is optimized away. This optimization | ||
// is permitted because there was a typo in the hash first degree quads | ||
// algorithm in the URDNA2015 spec that was implemented widely making it | ||
// such that it could not be fixed; the result was that the loop only | ||
// needs to be run once and the first degree quad hashes will never change. | ||
// 5.1-5.2 are skipped; first degree quad hashes are generated just once | ||
// for all non-normalized blank nodes. | ||
// 5.3) For each blank node identifier identifier in non-normalized | ||
// identifiers: | ||
let simple = true; // 5) While simple is true, issue canonical identifiers for blank nodes: | ||
const hashToBlankNodes = new Map(); | ||
const nonNormalized = [...this.blankNodeInfo.keys()]; | ||
while (simple) { | ||
// 5.1) Set simple to false. | ||
simple = false; // 5.2) Clear hash to blank nodes map. | ||
for (const id of nonNormalized) { | ||
// steps 5.3.1 and 5.3.2: | ||
this._hashAndTrackBlankNode({ | ||
id, | ||
hashToBlankNodes | ||
}); | ||
} // 5.4) For each hash to identifier list mapping in hash to blank | ||
// nodes map, lexicographically-sorted by hash: | ||
self.hashToBlankNodes = {}; // 5.3) For each blank node identifier identifier in non-normalized | ||
// identifiers: | ||
for (const id in nonNormalized) { | ||
// 5.3.1) Create a hash, hash, according to the Hash First Degree | ||
// Quads algorithm. | ||
const hash = self.hashFirstDegreeQuads(id); // 5.3.2) Add hash and identifier to hash to blank nodes map, | ||
// creating a new entry if necessary. | ||
const hashes = [...hashToBlankNodes.keys()].sort(); // optimize away second sort, gather non-unique hashes in order as we go | ||
if (hash in self.hashToBlankNodes) { | ||
self.hashToBlankNodes[hash].push(id); | ||
} else { | ||
self.hashToBlankNodes[hash] = [id]; | ||
} | ||
} // 5.4) For each hash to identifier list mapping in hash to blank | ||
// nodes map, lexicographically-sorted by hash: | ||
const nonUnique = []; | ||
for (const hash of hashes) { | ||
// 5.4.1) If the length of identifier list is greater than 1, | ||
// continue to the next mapping. | ||
const idList = hashToBlankNodes.get(hash); | ||
const hashes = Object.keys(self.hashToBlankNodes).sort(); | ||
if (idList.length > 1) { | ||
nonUnique.push(idList); | ||
continue; | ||
} // 5.4.2) Use the Issue Identifier algorithm, passing canonical | ||
// issuer and the single blank node identifier in identifier | ||
// list, identifier, to issue a canonical replacement identifier | ||
// for identifier. | ||
for (let i = 0; i < hashes.length; ++i) { | ||
// 5.4.1) If the length of identifier list is greater than 1, | ||
// continue to the next mapping. | ||
const hash = hashes[i]; | ||
const idList = self.hashToBlankNodes[hash]; | ||
if (idList.length > 1) { | ||
continue; | ||
} // 5.4.2) Use the Issue Identifier algorithm, passing canonical | ||
// issuer and the single blank node identifier in identifier | ||
// list, identifier, to issue a canonical replacement identifier | ||
// for identifier. | ||
// TODO: consider changing `getId` to `issue` | ||
const id = idList[0]; | ||
self.canonicalIssuer.getId(id); // 5.4.3) Remove identifier from non-normalized identifiers. | ||
delete nonNormalized[id]; // 5.4.4) Remove hash from the hash to blank nodes map. | ||
delete self.hashToBlankNodes[hash]; // 5.4.5) Set simple to true. | ||
simple = true; | ||
} | ||
const id = idList[0]; | ||
this.canonicalIssuer.getId(id); // Note: These steps are skipped, optimized away since the loop | ||
// only needs to be run once. | ||
// 5.4.3) Remove identifier from non-normalized identifiers. | ||
// 5.4.4) Remove hash from the hash to blank nodes map. | ||
// 5.4.5) Set simple to true. | ||
} // 6) For each hash to identifier list mapping in hash to blank nodes map, | ||
// lexicographically-sorted by hash: | ||
// Note: sort optimized away, use `nonUnique`. | ||
const hashes = Object.keys(self.hashToBlankNodes).sort(); | ||
for (let i = 0; i < hashes.length; ++i) { | ||
for (const idList of nonUnique) { | ||
// 6.1) Create hash path list where each item will be a result of | ||
@@ -131,11 +118,6 @@ // running the Hash N-Degree Quads algorithm. | ||
const hash = hashes[i]; | ||
const idList = self.hashToBlankNodes[hash]; | ||
for (let j = 0; j < idList.length; ++j) { | ||
for (const id of idList) { | ||
// 6.2.1) If a canonical identifier has already been issued for | ||
// identifier, continue to the next identifier. | ||
const id = idList[j]; | ||
if (self.canonicalIssuer.hasId(id)) { | ||
if (this.canonicalIssuer.hasId(id)) { | ||
continue; | ||
@@ -153,12 +135,11 @@ } // 6.2.2) Create temporary issuer, an identifier issuer | ||
const result = self.hashNDegreeQuads(id, issuer); | ||
const result = this.hashNDegreeQuads(id, issuer); | ||
hashPathList.push(result); | ||
} // 6.3) For each result in the hash path list, | ||
// lexicographically-sorted by the hash in result: | ||
// TODO: use `String.localeCompare`? | ||
hashPathList.sort((a, b) => a.hash < b.hash ? -1 : a.hash > b.hash ? 1 : 0); | ||
hashPathList.sort(_stringHashCompare); | ||
for (let j = 0; j < hashPathList.length; ++j) { | ||
for (const result of hashPathList) { | ||
// 6.3.1) For each blank node identifier, existing identifier, | ||
@@ -169,6 +150,6 @@ // that was issued a temporary identifier by identifier issuer | ||
// issuer and existing identifier. | ||
const result = hashPathList[j]; | ||
const oldIds = result.issuer.getOldIds(); | ||
for (const existing in result.issuer.existing) { | ||
self.canonicalIssuer.getId(existing); | ||
for (const id of oldIds) { | ||
this.canonicalIssuer.getId(id); | ||
} | ||
@@ -186,15 +167,20 @@ } | ||
for (let i = 0; i < self.quads.length; ++i) { | ||
for (const quad of this.quads) { | ||
// 7.1) Create a copy, quad copy, of quad and replace any existing | ||
// blank node identifiers using the canonical identifiers | ||
// previously issued by canonical issuer. | ||
// Note: We optimize away the copy here. | ||
const quad = self.quads[i]; | ||
self.forEachComponent(quad, component => { | ||
if (component.termType === 'BlankNode' && !component.value.startsWith(self.canonicalIssuer.prefix)) { | ||
component.value = self.canonicalIssuer.getId(component.value); | ||
} | ||
// Note: We optimize with shallow copies here. | ||
const q = _objectSpread({}, quad); | ||
q.subject = this._useCanonicalId({ | ||
component: q.subject | ||
}); | ||
q.object = this._useCanonicalId({ | ||
component: q.object | ||
}); | ||
q.graph = this._useCanonicalId({ | ||
component: q.graph | ||
}); // 7.2) Add quad copy to the normalized dataset. | ||
normalized.push(NQuads.serializeQuad(quad)); | ||
normalized.push(NQuads.serializeQuad(q)); | ||
} // sort normalized output | ||
@@ -210,32 +196,27 @@ | ||
hashFirstDegreeQuads(id) { | ||
const self = this; // return cached hash | ||
const info = self.blankNodeInfo[id]; | ||
if ('hash' in info) { | ||
return info.hash; | ||
} // 1) Initialize nquads to an empty list. It will be used to store quads in | ||
// 1) Initialize nquads to an empty list. It will be used to store quads in | ||
// N-Quads format. | ||
const nquads = []; // 2) Get the list of quads `quads` associated with the reference blank node | ||
// identifier in the blank node to quads map. | ||
const info = this.blankNodeInfo.get(id); | ||
const quads = info.quads; // 3) For each quad `quad` in `quads`: | ||
for (let i = 0; i < quads.length; ++i) { | ||
const quad = quads[i]; // 3.1) Serialize the quad in N-Quads format with the following special | ||
for (const quad of quads) { | ||
// 3.1) Serialize the quad in N-Quads format with the following special | ||
// rule: | ||
// 3.1.1) If any component in quad is an blank node, then serialize it | ||
// using a special identifier as follows: | ||
const copy = { | ||
subject: null, | ||
predicate: quad.predicate, | ||
object: null, | ||
graph: null | ||
}; // 3.1.2) If the blank node's existing blank node identifier matches | ||
// the reference blank node identifier then use the blank node | ||
// identifier _:a, otherwise, use the blank node identifier _:z. | ||
const copy = { | ||
predicate: quad.predicate | ||
}; | ||
self.forEachComponent(quad, (component, key) => { | ||
// 3.1.2) If the blank node's existing blank node identifier matches | ||
// the reference blank node identifier then use the blank node | ||
// identifier _:a, otherwise, use the blank node identifier _:z. | ||
copy[key] = self.modifyFirstDegreeComponent(id, component, key); | ||
}); | ||
copy.subject = this.modifyFirstDegreeComponent(id, quad.subject, 'subject'); | ||
copy.object = this.modifyFirstDegreeComponent(id, quad.object, 'object'); | ||
copy.graph = this.modifyFirstDegreeComponent(id, quad.graph, 'graph'); | ||
nquads.push(NQuads.serializeQuad(copy)); | ||
@@ -248,9 +229,8 @@ } // 4) Sort nquads in lexicographical order. | ||
const md = new MessageDigest(self.hashAlgorithm); | ||
const md = new MessageDigest(this.hashAlgorithm); | ||
for (let i = 0; i < nquads.length; ++i) { | ||
md.update(nquads[i]); | ||
} // TODO: represent as byte buffer instead to cut memory usage in half | ||
for (const nquad of nquads) { | ||
md.update(nquad); | ||
} | ||
info.hash = md.digest(); | ||
@@ -262,15 +242,14 @@ return info.hash; | ||
hashRelatedBlankNode(related, quad, issuer, position) { | ||
const self = this; // 1) Set the identifier to use for related, preferring first the canonical | ||
// 1) Set the identifier to use for related, preferring first the canonical | ||
// identifier for related if issued, second the identifier issued by issuer | ||
// if issued, and last, if necessary, the result of the Hash First Degree | ||
// Quads algorithm, passing related. | ||
let id; | ||
if (self.canonicalIssuer.hasId(related)) { | ||
id = self.canonicalIssuer.getId(related); | ||
if (this.canonicalIssuer.hasId(related)) { | ||
id = this.canonicalIssuer.getId(related); | ||
} else if (issuer.hasId(related)) { | ||
id = issuer.getId(related); | ||
} else { | ||
id = self.hashFirstDegreeQuads(related); | ||
id = this.blankNodeInfo.get(related).hash; | ||
} // 2) Initialize a string input to the value of position. | ||
@@ -280,3 +259,3 @@ // Note: We use a hash object instead. | ||
const md = new MessageDigest(self.hashAlgorithm); | ||
const md = new MessageDigest(this.hashAlgorithm); | ||
md.update(position); // 3) If position is not g, append <, the value of the predicate in quad, | ||
@@ -286,3 +265,3 @@ // and > to input. | ||
if (position !== 'g') { | ||
md.update(self.getRelatedPredicate(quad)); | ||
md.update(this.getRelatedPredicate(quad)); | ||
} // 4) Append identifier to input. | ||
@@ -293,3 +272,2 @@ | ||
// algorithm. | ||
// TODO: represent as byte buffer instead to cut memory usage in half | ||
@@ -301,8 +279,7 @@ return md.digest(); | ||
hashNDegreeQuads(id, issuer) { | ||
const self = this; // 1) Create a hash to related blank nodes map for storing hashes that | ||
// 1) Create a hash to related blank nodes map for storing hashes that | ||
// identify related blank nodes. | ||
// Note: 2) and 3) handled within `createHashToRelated` | ||
const md = new MessageDigest(self.hashAlgorithm); | ||
const hashToRelated = self.createHashToRelated(id, issuer); // 4) Create an empty string, data to hash. | ||
const md = new MessageDigest(this.hashAlgorithm); | ||
const hashToRelated = this.createHashToRelated(id, issuer); // 4) Create an empty string, data to hash. | ||
// Note: We created a hash object `md` above instead. | ||
@@ -312,7 +289,6 @@ // 5) For each related hash to blank node list mapping in hash to related | ||
const hashes = Object.keys(hashToRelated).sort(); | ||
const hashes = [...hashToRelated.keys()].sort(); | ||
for (let i = 0; i < hashes.length; ++i) { | ||
for (const hash of hashes) { | ||
// 5.1) Append the related hash to the data to hash. | ||
const hash = hashes[i]; | ||
md.update(hash); // 5.2) Create a string chosen path. | ||
@@ -324,6 +300,6 @@ | ||
const permutator = new Permutator(hashToRelated[hash]); | ||
const permuter = new Permuter(hashToRelated.get(hash)); | ||
while (permutator.hasNext()) { | ||
const permutation = permutator.next(); // 5.4.1) Create a copy of issuer, issuer copy. | ||
while (permuter.hasNext()) { | ||
const permutation = permuter.next(); // 5.4.1) Create a copy of issuer, issuer copy. | ||
@@ -339,9 +315,7 @@ let issuerCopy = issuer.clone(); // 5.4.2) Create a string path. | ||
for (let j = 0; j < permutation.length; ++j) { | ||
for (const related of permutation) { | ||
// 5.4.4.1) If a canonical identifier has been issued for | ||
// related, append it to path. | ||
const related = permutation[j]; | ||
if (self.canonicalIssuer.hasId(related)) { | ||
path += self.canonicalIssuer.getId(related); | ||
if (this.canonicalIssuer.hasId(related)) { | ||
path += this.canonicalIssuer.getId(related); | ||
} else { | ||
@@ -377,8 +351,7 @@ // 5.4.4.2) Otherwise: | ||
for (let j = 0; j < recursionList.length; ++j) { | ||
for (const related of recursionList) { | ||
// 5.4.5.1) Set result to the result of recursively executing | ||
// the Hash N-Degree Quads algorithm, passing related for | ||
// identifier and issuer copy for path identifier issuer. | ||
const related = recursionList[j]; | ||
const result = self.hashNDegreeQuads(related, issuerCopy); // 5.4.5.2) Use the Issue Identifier algorithm, passing issuer | ||
const result = this.hashNDegreeQuads(related, issuerCopy); // 5.4.5.2) Use the Issue Identifier algorithm, passing issuer | ||
// copy and related and append the result to path. | ||
@@ -388,3 +361,3 @@ | ||
path += '<' + result.hash + '>'; // 5.4.5.4) Set issuer copy to the identifier issuer in | ||
path += `<${result.hash}>`; // 5.4.5.4) Set issuer copy to the identifier issuer in | ||
// result. | ||
@@ -437,6 +410,13 @@ | ||
} | ||
/* Note: A mistake in the URDNA2015 spec that made its way into | ||
implementations (and therefore must stay to avoid interop breakage) | ||
resulted in an assigned canonical ID, if available for | ||
`component.value`, not being used in place of `_:a`/`_:z`, so | ||
we don't use it here. */ | ||
component = util.clone(component); | ||
component.value = component.value === id ? '_:a' : '_:z'; | ||
return component; | ||
return { | ||
termType: 'BlankNode', | ||
value: component.value === id ? '_:a' : '_:z' | ||
}; | ||
} // helper for getting a related predicate | ||
@@ -446,3 +426,3 @@ | ||
getRelatedPredicate(quad) { | ||
return '<' + quad.predicate.value + '>'; | ||
return `<${quad.predicate.value}>`; | ||
} // helper for creating hash to related blank nodes map | ||
@@ -452,57 +432,132 @@ | ||
createHashToRelated(id, issuer) { | ||
const self = this; // 1) Create a hash to related blank nodes map for storing hashes that | ||
// 1) Create a hash to related blank nodes map for storing hashes that | ||
// identify related blank nodes. | ||
const hashToRelated = {}; // 2) Get a reference, quads, to the list of quads in the blank node to | ||
const hashToRelated = new Map(); // 2) Get a reference, quads, to the list of quads in the blank node to | ||
// quads map for the key identifier. | ||
const quads = self.blankNodeInfo[id].quads; // 3) For each quad in quads: | ||
const quads = this.blankNodeInfo.get(id).quads; // 3) For each quad in quads: | ||
for (let i = 0; i < quads.length; ++i) { | ||
for (const quad of quads) { | ||
// 3.1) For each component in quad, if component is the subject, object, | ||
// and graph name and it is a blank node that is not identified by | ||
// or graph name and it is a blank node that is not identified by | ||
// identifier: | ||
const quad = quads[i]; | ||
// steps 3.1.1 and 3.1.2 occur in helpers: | ||
this._addRelatedBlankNodeHash({ | ||
quad, | ||
component: quad.subject, | ||
position: 's', | ||
id, | ||
issuer, | ||
hashToRelated | ||
}); | ||
for (const key in quad) { | ||
const component = quad[key]; | ||
this._addRelatedBlankNodeHash({ | ||
quad, | ||
component: quad.object, | ||
position: 'o', | ||
id, | ||
issuer, | ||
hashToRelated | ||
}); | ||
if (key === 'predicate' || !(component.termType === 'BlankNode' && component.value !== id)) { | ||
continue; | ||
} // 3.1.1) Set hash to the result of the Hash Related Blank Node | ||
// algorithm, passing the blank node identifier for component as | ||
// related, quad, path identifier issuer as issuer, and position as | ||
// either s, o, or g based on whether component is a subject, object, | ||
// graph name, respectively. | ||
this._addRelatedBlankNodeHash({ | ||
quad, | ||
component: quad.graph, | ||
position: 'g', | ||
id, | ||
issuer, | ||
hashToRelated | ||
}); | ||
} | ||
return hashToRelated; | ||
} | ||
const related = component.value; | ||
const position = POSITIONS[key]; | ||
const hash = self.hashRelatedBlankNode(related, quad, issuer, position); // 3.1.2) Add a mapping of hash to the blank node identifier for | ||
// component to hash to related blank nodes map, adding an entry as | ||
// necessary. | ||
_hashAndTrackBlankNode({ | ||
id, | ||
hashToBlankNodes | ||
}) { | ||
// 5.3.1) Create a hash, hash, according to the Hash First Degree | ||
// Quads algorithm. | ||
const hash = this.hashFirstDegreeQuads(id); // 5.3.2) Add hash and identifier to hash to blank nodes map, | ||
// creating a new entry if necessary. | ||
if (hash in hashToRelated) { | ||
hashToRelated[hash].push(related); | ||
} else { | ||
hashToRelated[hash] = [related]; | ||
} | ||
} | ||
const idList = hashToBlankNodes.get(hash); | ||
if (!idList) { | ||
hashToBlankNodes.set(hash, [id]); | ||
} else { | ||
idList.push(id); | ||
} | ||
} | ||
return hashToRelated; | ||
} // helper that iterates over quad components (skips predicate) | ||
_addBlankNodeQuadInfo({ | ||
quad, | ||
component | ||
}) { | ||
if (component.termType !== 'BlankNode') { | ||
return; | ||
} | ||
const id = component.value; | ||
const info = this.blankNodeInfo.get(id); | ||
forEachComponent(quad, op) { | ||
for (const key in quad) { | ||
// skip `predicate` | ||
if (key === 'predicate') { | ||
continue; | ||
} | ||
if (info) { | ||
info.quads.add(quad); | ||
} else { | ||
this.blankNodeInfo.set(id, { | ||
quads: new Set([quad]), | ||
hash: null | ||
}); | ||
} | ||
} | ||
op(quad[key], key, quad); | ||
_addRelatedBlankNodeHash({ | ||
quad, | ||
component, | ||
position, | ||
id, | ||
issuer, | ||
hashToRelated | ||
}) { | ||
if (!(component.termType === 'BlankNode' && component.value !== id)) { | ||
return; | ||
} // 3.1.1) Set hash to the result of the Hash Related Blank Node | ||
// algorithm, passing the blank node identifier for component as | ||
// related, quad, path identifier issuer as issuer, and position as | ||
// either s, o, or g based on whether component is a subject, object, | ||
// graph name, respectively. | ||
const related = component.value; | ||
const hash = this.hashRelatedBlankNode(related, quad, issuer, position); // 3.1.2) Add a mapping of hash to the blank node identifier for | ||
// component to hash to related blank nodes map, adding an entry as | ||
// necessary. | ||
const entries = hashToRelated.get(hash); | ||
if (entries) { | ||
entries.push(related); | ||
} else { | ||
hashToRelated.set(hash, [related]); | ||
} | ||
} | ||
}; | ||
_useCanonicalId({ | ||
component | ||
}) { | ||
if (component.termType === 'BlankNode' && !component.value.startsWith(this.canonicalIssuer.prefix)) { | ||
return { | ||
termType: 'BlankNode', | ||
value: this.canonicalIssuer.getId(component.value) | ||
}; | ||
} | ||
return component; | ||
} | ||
}; | ||
function _stringHashCompare(a, b) { | ||
return a.hash < b.hash ? -1 : a.hash > b.hash ? 1 : 0; | ||
} |
/* | ||
* Copyright (c) 2016-2017 Digital Bazaar, Inc. All rights reserved. | ||
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved. | ||
*/ | ||
'use strict'; | ||
function asyncGeneratorStep(gen, resolve, reject, _next, _throw, key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { Promise.resolve(value).then(_next, _throw); } } | ||
function _asyncToGenerator(fn) { return function () { var self = this, args = arguments; return new Promise(function (resolve, reject) { var gen = fn.apply(self, args); function _next(value) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "next", value); } function _throw(err) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "throw", err); } _next(undefined); }); }; } | ||
const URDNA2015 = require('./URDNA2015'); | ||
const util = require('./util'); | ||
module.exports = class URDNA2012 extends URDNA2015 { | ||
constructor(options) { | ||
super(options); | ||
constructor() { | ||
super(); | ||
this.name = 'URGNA2012'; | ||
@@ -23,11 +25,13 @@ this.hashAlgorithm = 'sha1'; | ||
component = util.clone(component); | ||
if (key === 'name') { | ||
component.value = '_:g'; | ||
} else { | ||
component.value = component.value === id ? '_:a' : '_:z'; | ||
if (key === 'graph') { | ||
return { | ||
termType: 'BlankNode', | ||
value: '_:g' | ||
}; | ||
} | ||
return component; | ||
return { | ||
termType: 'BlankNode', | ||
value: component.value === id ? '_:a' : '_:z' | ||
}; | ||
} // helper for getting a related predicate | ||
@@ -41,53 +45,61 @@ | ||
createHashToRelated(id, issuer, callback) { | ||
const self = this; // 1) Create a hash to related blank nodes map for storing hashes that | ||
// identify related blank nodes. | ||
createHashToRelated(id, issuer) { | ||
var _this = this; | ||
const hashToRelated = {}; // 2) Get a reference, quads, to the list of quads in the blank node to | ||
// quads map for the key identifier. | ||
return _asyncToGenerator(function* () { | ||
// 1) Create a hash to related blank nodes map for storing hashes that | ||
// identify related blank nodes. | ||
const hashToRelated = new Map(); // 2) Get a reference, quads, to the list of quads in the blank node to | ||
// quads map for the key identifier. | ||
const quads = self.blankNodeInfo[id].quads; // 3) For each quad in quads: | ||
const quads = _this.blankNodeInfo.get(id).quads; // 3) For each quad in quads: | ||
self.forEach(quads, (quad, idx, callback) => { | ||
// 3.1) If the quad's subject is a blank node that does not match | ||
// identifier, set hash to the result of the Hash Related Blank Node | ||
// algorithm, passing the blank node identifier for subject as related, | ||
// quad, path identifier issuer as issuer, and p as position. | ||
let position; | ||
let related; | ||
if (quad.subject.termType === 'BlankNode' && quad.subject.value !== id) { | ||
related = quad.subject.value; | ||
position = 'p'; | ||
} else if (quad.object.termType === 'BlankNode' && quad.object.value !== id) { | ||
// 3.2) Otherwise, if quad's object is a blank node that does not match | ||
// identifier, to the result of the Hash Related Blank Node algorithm, | ||
// passing the blank node identifier for object as related, quad, path | ||
// identifier issuer as issuer, and r as position. | ||
related = quad.object.value; | ||
position = 'r'; | ||
} else { | ||
// 3.3) Otherwise, continue to the next quad. | ||
return callback(); | ||
} // 3.4) Add a mapping of hash to the blank node identifier for the | ||
// component that matched (subject or object) to hash to related blank | ||
// nodes map, adding an entry as necessary. | ||
let i = 0; | ||
for (const quad of quads) { | ||
// 3.1) If the quad's subject is a blank node that does not match | ||
// identifier, set hash to the result of the Hash Related Blank Node | ||
// algorithm, passing the blank node identifier for subject as related, | ||
// quad, path identifier issuer as issuer, and p as position. | ||
let position; | ||
let related; | ||
self.hashRelatedBlankNode(related, quad, issuer, position, (err, hash) => { | ||
if (err) { | ||
return callback(err); | ||
} | ||
if (quad.subject.termType === 'BlankNode' && quad.subject.value !== id) { | ||
related = quad.subject.value; | ||
position = 'p'; | ||
} else if (quad.object.termType === 'BlankNode' && quad.object.value !== id) { | ||
// 3.2) Otherwise, if quad's object is a blank node that does not match | ||
// identifier, to the result of the Hash Related Blank Node algorithm, | ||
// passing the blank node identifier for object as related, quad, path | ||
// identifier issuer as issuer, and r as position. | ||
related = quad.object.value; | ||
position = 'r'; | ||
} else { | ||
// 3.3) Otherwise, continue to the next quad. | ||
continue; | ||
} // Note: batch hashing related blank nodes 100 at a time | ||
if (hash in hashToRelated) { | ||
hashToRelated[hash].push(related); | ||
if (++i % 100 === 0) { | ||
yield _this._yield(); | ||
} // 3.4) Add a mapping of hash to the blank node identifier for the | ||
// component that matched (subject or object) to hash to related blank | ||
// nodes map, adding an entry as necessary. | ||
const hash = yield _this.hashRelatedBlankNode(related, quad, issuer, position); | ||
const entries = hashToRelated.get(hash); | ||
if (entries) { | ||
entries.push(related); | ||
} else { | ||
hashToRelated[hash] = [related]; | ||
hashToRelated.set(hash, [related]); | ||
} | ||
} | ||
callback(); | ||
}); | ||
}, err => callback(err, hashToRelated)); | ||
return hashToRelated; | ||
})(); | ||
} | ||
}; |
/* | ||
* Copyright (c) 2016 Digital Bazaar, Inc. All rights reserved. | ||
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved. | ||
*/ | ||
@@ -8,4 +8,2 @@ 'use strict'; | ||
const util = require('./util'); | ||
module.exports = class URDNA2012Sync extends URDNA2015Sync { | ||
@@ -24,11 +22,13 @@ constructor() { | ||
component = util.clone(component); | ||
if (key === 'name') { | ||
component.value = '_:g'; | ||
} else { | ||
component.value = component.value === id ? '_:a' : '_:z'; | ||
if (key === 'graph') { | ||
return { | ||
termType: 'BlankNode', | ||
value: '_:g' | ||
}; | ||
} | ||
return component; | ||
return { | ||
termType: 'BlankNode', | ||
value: component.value === id ? '_:a' : '_:z' | ||
}; | ||
} // helper for getting a related predicate | ||
@@ -43,11 +43,10 @@ | ||
createHashToRelated(id, issuer) { | ||
const self = this; // 1) Create a hash to related blank nodes map for storing hashes that | ||
// 1) Create a hash to related blank nodes map for storing hashes that | ||
// identify related blank nodes. | ||
const hashToRelated = {}; // 2) Get a reference, quads, to the list of quads in the blank node to | ||
const hashToRelated = new Map(); // 2) Get a reference, quads, to the list of quads in the blank node to | ||
// quads map for the key identifier. | ||
const quads = self.blankNodeInfo[id].quads; // 3) For each quad in quads: | ||
const quads = this.blankNodeInfo.get(id).quads; // 3) For each quad in quads: | ||
for (let i = 0; i < quads.length; ++i) { | ||
for (const quad of quads) { | ||
// 3.1) If the quad's subject is a blank node that does not match | ||
@@ -57,3 +56,2 @@ // identifier, set hash to the result of the Hash Related Blank Node | ||
// quad, path identifier issuer as issuer, and p as position. | ||
const quad = quads[i]; | ||
let position; | ||
@@ -80,8 +78,9 @@ let related; | ||
const hash = self.hashRelatedBlankNode(related, quad, issuer, position); | ||
const hash = this.hashRelatedBlankNode(related, quad, issuer, position); | ||
const entries = hashToRelated.get(hash); | ||
if (hash in hashToRelated) { | ||
hashToRelated[hash].push(related); | ||
if (entries) { | ||
entries.push(related); | ||
} else { | ||
hashToRelated[hash] = [related]; | ||
hashToRelated.set(hash, [related]); | ||
} | ||
@@ -88,0 +87,0 @@ } |
/* | ||
* Copyright (c) 2016-2017 Digital Bazaar, Inc. All rights reserved. | ||
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved. | ||
*/ | ||
'use strict'; | ||
const util = require('./util'); | ||
module.exports = class IdentifierIssuer { | ||
@@ -14,7 +12,9 @@ /** | ||
* @param prefix the prefix to use ('<prefix><counter>'). | ||
* @param existing an existing Map to use. | ||
* @param counter the counter to use. | ||
*/ | ||
constructor(prefix) { | ||
constructor(prefix, existing = new Map(), counter = 0) { | ||
this.prefix = prefix; | ||
this.counter = 0; | ||
this.existing = {}; | ||
this._existing = existing; | ||
this.counter = counter; | ||
} | ||
@@ -28,6 +28,4 @@ | ||
clone() { | ||
const copy = new IdentifierIssuer(this.prefix); | ||
copy.counter = this.counter; | ||
copy.existing = util.clone(this.existing); | ||
return copy; | ||
const {prefix, _existing, counter} = this; | ||
return new IdentifierIssuer(prefix, new Map(_existing), counter); | ||
} | ||
@@ -45,4 +43,5 @@ | ||
// return existing old identifier | ||
if(old && old in this.existing) { | ||
return this.existing[old]; | ||
const existing = old && this._existing.get(old); | ||
if(existing) { | ||
return existing; | ||
} | ||
@@ -52,7 +51,7 @@ | ||
const identifier = this.prefix + this.counter; | ||
this.counter += 1; | ||
this.counter++; | ||
// save mapping | ||
if(old) { | ||
this.existing[old] = identifier; | ||
this._existing.set(old, identifier); | ||
} | ||
@@ -73,4 +72,14 @@ | ||
hasId(old) { | ||
return (old in this.existing); | ||
return this._existing.has(old); | ||
} | ||
/** | ||
* Returns all of the IDs that have been issued new IDs in the order in | ||
* which they were issued new IDs. | ||
* | ||
* @return the list of old IDs that has been issued new IDs in order. | ||
*/ | ||
getOldIds() { | ||
return [...this._existing.keys()]; | ||
} | ||
}; |
@@ -6,3 +6,3 @@ /** | ||
* BSD 3-Clause License | ||
* Copyright (c) 2016-2017 Digital Bazaar, Inc. | ||
* Copyright (c) 2016-2020 Digital Bazaar, Inc. | ||
* All rights reserved. | ||
@@ -38,3 +38,2 @@ * | ||
const util = require('./util'); | ||
const URDNA2015 = require('./URDNA2015'); | ||
@@ -80,23 +79,6 @@ const URGNA2012 = require('./URGNA2012'); | ||
* [useNative] use native implementation (default: false). | ||
* @param [callback(err, canonical)] called once the operation completes. | ||
* | ||
* @return a Promise that resolves to the canonicalized RDF Dataset. | ||
*/ | ||
api.canonize = util.callbackify(async function(dataset, options) { | ||
let callback; | ||
const promise = new Promise((resolve, reject) => { | ||
callback = (err, canonical) => { | ||
if(err) { | ||
return reject(err); | ||
} | ||
/*if(options.format === 'application/n-quads') { | ||
canonical = canonical.join(''); | ||
} | ||
canonical = _parseNQuads(canonical.join(''));*/ | ||
resolve(canonical); | ||
}; | ||
}); | ||
api.canonize = async function(dataset, options) { | ||
// back-compat with legacy dataset | ||
@@ -107,27 +89,29 @@ if(!Array.isArray(dataset)) { | ||
// TODO: convert algorithms to Promise-based async | ||
if(options.useNative) { | ||
if(rdfCanonizeNative) { | ||
rdfCanonizeNative.canonize(dataset, options, callback); | ||
} else { | ||
if(!rdfCanonizeNative) { | ||
throw new Error('rdf-canonize-native not available'); | ||
} | ||
} else { | ||
if(options.algorithm === 'URDNA2015') { | ||
new URDNA2015(options).main(dataset, callback); | ||
} else if(options.algorithm === 'URGNA2012') { | ||
new URGNA2012(options).main(dataset, callback); | ||
} else if(!('algorithm' in options)) { | ||
throw new Error('No RDF Dataset Canonicalization algorithm specified.'); | ||
} else { | ||
throw new Error( | ||
'Invalid RDF Dataset Canonicalization algorithm: ' + options.algorithm); | ||
} | ||
// TODO: convert native algorithm to Promise-based async | ||
return new Promise((resolve, reject) => | ||
rdfCanonizeNative.canonize(dataset, options, (err, canonical) => | ||
err ? reject(err) : resolve(canonical))); | ||
} | ||
return promise; | ||
}); | ||
if(options.algorithm === 'URDNA2015') { | ||
return new URDNA2015(options).main(dataset); | ||
} | ||
if(options.algorithm === 'URGNA2012') { | ||
return new URGNA2012(options).main(dataset); | ||
} | ||
if(!('algorithm' in options)) { | ||
throw new Error('No RDF Dataset Canonicalization algorithm specified.'); | ||
} | ||
throw new Error( | ||
'Invalid RDF Dataset Canonicalization algorithm: ' + options.algorithm); | ||
}; | ||
/** | ||
* Synchronously canonizes an RDF dataset. | ||
* This method is no longer available in the public API, it is for testing | ||
* only. It synchronously canonizes an RDF dataset and does not work in the | ||
* browser. | ||
* | ||
@@ -142,3 +126,3 @@ * @param dataset the dataset to canonize. | ||
*/ | ||
api.canonizeSync = function(dataset, options) { | ||
api._canonizeSync = function(dataset, options) { | ||
// back-compat with legacy dataset | ||
@@ -157,3 +141,4 @@ if(!Array.isArray(dataset)) { | ||
return new URDNA2015Sync(options).main(dataset); | ||
} else if(options.algorithm === 'URGNA2012') { | ||
} | ||
if(options.algorithm === 'URGNA2012') { | ||
return new URGNA2012Sync(options).main(dataset); | ||
@@ -160,0 +145,0 @@ } |
/* | ||
* Copyright (c) 2016-2017 Digital Bazaar, Inc. All rights reserved. | ||
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved. | ||
*/ | ||
'use strict'; | ||
const forge = require('node-forge/lib/forge'); | ||
require('node-forge/lib/md'); | ||
require('node-forge/lib/sha1'); | ||
require('node-forge/lib/sha256'); | ||
const crypto = self.crypto || self.msCrypto; | ||
// TODO: synchronous version no longer supported in browser | ||
module.exports = class MessageDigest { | ||
@@ -18,12 +17,32 @@ /** | ||
constructor(algorithm) { | ||
this.md = forge.md[algorithm].create(); | ||
// check if crypto.subtle is available | ||
// check is here rather than top-level to only fail if class is used | ||
if(!(crypto && crypto.subtle)) { | ||
throw new Error('crypto.subtle not found.'); | ||
} | ||
if(algorithm === 'sha256') { | ||
this.algorithm = {name: 'SHA-256'}; | ||
} else if(algorithm === 'sha1') { | ||
this.algorithm = {name: 'SHA-1'}; | ||
} else { | ||
throw new Error(`Unsupport algorithm "${algorithm}".`); | ||
} | ||
this._content = ''; | ||
} | ||
update(msg) { | ||
this.md.update(msg, 'utf8'); | ||
this._content += msg; | ||
} | ||
digest() { | ||
return this.md.digest().toHex(); | ||
async digest() { | ||
const data = new TextEncoder().encode(this._content); | ||
const buffer = new Uint8Array( | ||
await crypto.subtle.digest(this.algorithm, data)); | ||
// return digest in hex | ||
let hex = ''; | ||
for(let i = 0; i < buffer.length; ++i) { | ||
hex += buffer[i].toString(16).padStart(2, '0'); | ||
} | ||
return hex; | ||
} | ||
}; |
/* | ||
* Copyright (c) 2016-2017 Digital Bazaar, Inc. All rights reserved. | ||
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved. | ||
*/ | ||
@@ -4,0 +4,0 @@ 'use strict'; |
/* | ||
* Copyright (c) 2016-2017 Digital Bazaar, Inc. All rights reserved. | ||
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved. | ||
*/ | ||
@@ -12,2 +12,7 @@ 'use strict'; | ||
const TYPE_NAMED_NODE = 'NamedNode'; | ||
const TYPE_BLANK_NODE = 'BlankNode'; | ||
const TYPE_LITERAL = 'Literal'; | ||
const TYPE_DEFAULT_GRAPH = 'DefaultGraph'; | ||
// build regexes | ||
@@ -103,25 +108,25 @@ const REGEX = {}; | ||
// create RDF quad | ||
const quad = {}; | ||
const quad = {subject: null, predicate: null, object: null, graph: null}; | ||
// get subject | ||
if(match[1] !== undefined) { | ||
quad.subject = {termType: 'NamedNode', value: match[1]}; | ||
quad.subject = {termType: TYPE_NAMED_NODE, value: match[1]}; | ||
} else { | ||
quad.subject = {termType: 'BlankNode', value: match[2]}; | ||
quad.subject = {termType: TYPE_BLANK_NODE, value: match[2]}; | ||
} | ||
// get predicate | ||
quad.predicate = {termType: 'NamedNode', value: match[3]}; | ||
quad.predicate = {termType: TYPE_NAMED_NODE, value: match[3]}; | ||
// get object | ||
if(match[4] !== undefined) { | ||
quad.object = {termType: 'NamedNode', value: match[4]}; | ||
quad.object = {termType: TYPE_NAMED_NODE, value: match[4]}; | ||
} else if(match[5] !== undefined) { | ||
quad.object = {termType: 'BlankNode', value: match[5]}; | ||
quad.object = {termType: TYPE_BLANK_NODE, value: match[5]}; | ||
} else { | ||
quad.object = { | ||
termType: 'Literal', | ||
termType: TYPE_LITERAL, | ||
value: undefined, | ||
datatype: { | ||
termType: 'NamedNode' | ||
termType: TYPE_NAMED_NODE | ||
} | ||
@@ -143,3 +148,3 @@ }; | ||
quad.graph = { | ||
termType: 'NamedNode', | ||
termType: TYPE_NAMED_NODE, | ||
value: match[9] | ||
@@ -149,3 +154,3 @@ }; | ||
quad.graph = { | ||
termType: 'BlankNode', | ||
termType: TYPE_BLANK_NODE, | ||
value: match[10] | ||
@@ -155,3 +160,3 @@ }; | ||
quad.graph = { | ||
termType: 'DefaultGraph', | ||
termType: TYPE_DEFAULT_GRAPH, | ||
value: '' | ||
@@ -217,25 +222,25 @@ }; | ||
// subject and predicate can only be NamedNode or BlankNode | ||
[s, p].forEach(term => { | ||
if(term.termType === 'NamedNode') { | ||
nquad += '<' + term.value + '>'; | ||
} else { | ||
nquad += term.value; | ||
} | ||
nquad += ' '; | ||
}); | ||
// subject can only be NamedNode or BlankNode | ||
if(s.termType === TYPE_NAMED_NODE) { | ||
nquad += `<${s.value}>`; | ||
} else { | ||
nquad += `${s.value}`; | ||
} | ||
// predicate can only be NamedNode | ||
nquad += ` <${p.value}> `; | ||
// object is NamedNode, BlankNode, or Literal | ||
if(o.termType === 'NamedNode') { | ||
nquad += '<' + o.value + '>'; | ||
} else if(o.termType === 'BlankNode') { | ||
if(o.termType === TYPE_NAMED_NODE) { | ||
nquad += `<${o.value}>`; | ||
} else if(o.termType === TYPE_BLANK_NODE) { | ||
nquad += o.value; | ||
} else { | ||
nquad += '"' + _escape(o.value) + '"'; | ||
nquad += `"${_escape(o.value)}"`; | ||
if(o.datatype.value === RDF_LANGSTRING) { | ||
if(o.language) { | ||
nquad += '@' + o.language; | ||
nquad += `@${o.language}`; | ||
} | ||
} else if(o.datatype.value !== XSD_STRING) { | ||
nquad += '^^<' + o.datatype.value + '>'; | ||
nquad += `^^<${o.datatype.value}>`; | ||
} | ||
@@ -246,6 +251,6 @@ } | ||
// does not add to `nquad`) | ||
if(g.termType === 'NamedNode') { | ||
nquad += ' <' + g.value + '>'; | ||
} else if(g.termType === 'BlankNode') { | ||
nquad += ' ' + g.value; | ||
if(g.termType === TYPE_NAMED_NODE) { | ||
nquad += ` <${g.value}>`; | ||
} else if(g.termType === TYPE_BLANK_NODE) { | ||
nquad += ` ${g.value}`; | ||
} | ||
@@ -269,5 +274,5 @@ | ||
const termTypeMap = { | ||
'blank node': 'BlankNode', | ||
IRI: 'NamedNode', | ||
literal: 'Literal' | ||
'blank node': TYPE_BLANK_NODE, | ||
IRI: TYPE_NAMED_NODE, | ||
literal: TYPE_LITERAL | ||
}; | ||
@@ -285,5 +290,5 @@ | ||
}; | ||
if(newComponent.termType === 'Literal') { | ||
if(newComponent.termType === TYPE_LITERAL) { | ||
newComponent.datatype = { | ||
termType: 'NamedNode' | ||
termType: TYPE_NAMED_NODE | ||
}; | ||
@@ -306,3 +311,3 @@ if('datatype' in oldComponent) { | ||
quad.graph = { | ||
termType: 'DefaultGraph', | ||
termType: TYPE_DEFAULT_GRAPH, | ||
value: '' | ||
@@ -312,3 +317,4 @@ }; | ||
quad.graph = { | ||
termType: graphName.startsWith('_:') ? 'BlankNode' : 'NamedNode', | ||
termType: graphName.startsWith('_:') ? | ||
TYPE_BLANK_NODE : TYPE_NAMED_NODE, | ||
value: graphName | ||
@@ -334,8 +340,15 @@ }; | ||
function _compareTriples(t1, t2) { | ||
for(const k in t1) { | ||
if(t1[k].termType !== t2[k].termType || t1[k].value !== t2[k].value) { | ||
return false; | ||
} | ||
// compare subject and object types first as it is the quickest check | ||
if(!(t1.subject.termType === t2.subject.termType && | ||
t1.object.termType === t2.object.termType)) { | ||
return false; | ||
} | ||
if(t1.object.termType !== 'Literal') { | ||
// compare values | ||
if(!(t1.subject.value === t2.subject.value && | ||
t1.predicate.value === t2.predicate.value && | ||
t1.object.value === t2.object.value)) { | ||
return false; | ||
} | ||
if(t1.object.termType !== TYPE_LITERAL) { | ||
// no `datatype` or `language` to check | ||
return true; | ||
@@ -345,4 +358,4 @@ } | ||
(t1.object.datatype.termType === t2.object.datatype.termType) && | ||
(t1.object.datatype.value === t2.object.datatype.value) && | ||
(t1.object.language === t2.object.language) | ||
(t1.object.language === t2.object.language) && | ||
(t1.object.datatype.value === t2.object.datatype.value) | ||
); | ||
@@ -349,0 +362,0 @@ } |
/* | ||
* Copyright (c) 2016-2017 Digital Bazaar, Inc. All rights reserved. | ||
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved. | ||
*/ | ||
'use strict'; | ||
const AsyncAlgorithm = require('./AsyncAlgorithm'); | ||
const IdentifierIssuer = require('./IdentifierIssuer'); | ||
const MessageDigest = require('./MessageDigest'); | ||
const Permutator = require('./Permutator'); | ||
const Permuter = require('./Permuter'); | ||
const NQuads = require('./NQuads'); | ||
const util = require('./util'); | ||
const POSITIONS = {subject: 's', object: 'o', graph: 'g'}; | ||
module.exports = class URDNA2015 extends AsyncAlgorithm { | ||
constructor(options) { | ||
options = options || {}; | ||
super(options); | ||
module.exports = class URDNA2015 { | ||
constructor() { | ||
this.name = 'URDNA2015'; | ||
this.options = Object.assign({}, options); | ||
this.blankNodeInfo = {}; | ||
this.hashToBlankNodes = {}; | ||
this.blankNodeInfo = new Map(); | ||
this.canonicalIssuer = new IdentifierIssuer('_:c14n'); | ||
this.hashAlgorithm = 'sha256'; | ||
this.quads; | ||
this.quads = null; | ||
} | ||
// 4.4) Normalization Algorithm | ||
main(dataset, callback) { | ||
const self = this; | ||
self.schedule.start = Date.now(); | ||
let result; | ||
self.quads = dataset; | ||
async main(dataset) { | ||
this.quads = dataset; | ||
// 1) Create the normalization state. | ||
// 2) For every quad in input dataset: | ||
for(const quad of dataset) { | ||
// 2.1) For each blank node that occurs in the quad, add a reference | ||
// to the quad using the blank node identifier in the blank node to | ||
// quads map, creating a new entry if necessary. | ||
this._addBlankNodeQuadInfo({quad, component: quad.subject}); | ||
this._addBlankNodeQuadInfo({quad, component: quad.object}); | ||
this._addBlankNodeQuadInfo({quad, component: quad.graph}); | ||
} | ||
// Note: Optimize by generating non-normalized blank node map concurrently. | ||
const nonNormalized = {}; | ||
// 3) Create a list of non-normalized blank node identifiers | ||
// non-normalized identifiers and populate it using the keys from the | ||
// blank node to quads map. | ||
// Note: We use a map here and it was generated during step 2. | ||
self.waterfall([ | ||
callback => { | ||
// 2) For every quad in input dataset: | ||
self.forEach(dataset, (quad, idx, callback) => { | ||
// 2.1) For each blank node that occurs in the quad, add a reference | ||
// to the quad using the blank node identifier in the blank node to | ||
// quads map, creating a new entry if necessary. | ||
self.forEachComponent(quad, component => { | ||
if(component.termType !== 'BlankNode') { | ||
return; | ||
} | ||
const id = component.value; | ||
if(id in self.blankNodeInfo) { | ||
self.blankNodeInfo[id].quads.push(quad); | ||
} else { | ||
nonNormalized[id] = true; | ||
self.blankNodeInfo[id] = {quads: [quad]}; | ||
} | ||
}); | ||
// 4) `simple` flag is skipped -- loop is optimized away. This optimization | ||
// is permitted because there was a typo in the hash first degree quads | ||
// algorithm in the URDNA2015 spec that was implemented widely making it | ||
// such that it could not be fixed; the result was that the loop only | ||
// needs to be run once and the first degree quad hashes will never change. | ||
// 5.1-5.2 are skipped; first degree quad hashes are generated just once | ||
// for all non-normalized blank nodes. | ||
callback(); | ||
}, callback); | ||
}, | ||
callback => { | ||
// 3) Create a list of non-normalized blank node identifiers | ||
// non-normalized identifiers and populate it using the keys from the | ||
// blank node to quads map. | ||
// Note: We use a map here and it was generated during step 2. | ||
// 5.3) For each blank node identifier identifier in non-normalized | ||
// identifiers: | ||
const hashToBlankNodes = new Map(); | ||
const nonNormalized = [...this.blankNodeInfo.keys()]; | ||
let i = 0; | ||
for(const id of nonNormalized) { | ||
// Note: batch hashing first degree quads 100 at a time | ||
if(++i % 100 === 0) { | ||
await this._yield(); | ||
} | ||
// steps 5.3.1 and 5.3.2: | ||
await this._hashAndTrackBlankNode({id, hashToBlankNodes}); | ||
} | ||
// 4) Initialize simple, a boolean flag, to true. | ||
let simple = true; | ||
// 5.4) For each hash to identifier list mapping in hash to blank | ||
// nodes map, lexicographically-sorted by hash: | ||
const hashes = [...hashToBlankNodes.keys()].sort(); | ||
// optimize away second sort, gather non-unique hashes in order as we go | ||
const nonUnique = []; | ||
for(const hash of hashes) { | ||
// 5.4.1) If the length of identifier list is greater than 1, | ||
// continue to the next mapping. | ||
const idList = hashToBlankNodes.get(hash); | ||
if(idList.length > 1) { | ||
nonUnique.push(idList); | ||
continue; | ||
} | ||
// 5) While simple is true, issue canonical identifiers for blank nodes: | ||
self.whilst(() => simple, callback => { | ||
// 5.1) Set simple to false. | ||
simple = false; | ||
// 5.4.2) Use the Issue Identifier algorithm, passing canonical | ||
// issuer and the single blank node identifier in identifier | ||
// list, identifier, to issue a canonical replacement identifier | ||
// for identifier. | ||
const id = idList[0]; | ||
this.canonicalIssuer.getId(id); | ||
// 5.2) Clear hash to blank nodes map. | ||
self.hashToBlankNodes = {}; | ||
// Note: These steps are skipped, optimized away since the loop | ||
// only needs to be run once. | ||
// 5.4.3) Remove identifier from non-normalized identifiers. | ||
// 5.4.4) Remove hash from the hash to blank nodes map. | ||
// 5.4.5) Set simple to true. | ||
} | ||
self.waterfall([ | ||
callback => { | ||
// 5.3) For each blank node identifier identifier in | ||
// non-normalized identifiers: | ||
self.forEach(nonNormalized, (value, id, callback) => { | ||
// 5.3.1) Create a hash, hash, according to the Hash First | ||
// Degree Quads algorithm. | ||
self.hashFirstDegreeQuads(id, (err, hash) => { | ||
if(err) { | ||
return callback(err); | ||
} | ||
// 5.3.2) Add hash and identifier to hash to blank nodes map, | ||
// creating a new entry if necessary. | ||
if(hash in self.hashToBlankNodes) { | ||
self.hashToBlankNodes[hash].push(id); | ||
} else { | ||
self.hashToBlankNodes[hash] = [id]; | ||
} | ||
callback(); | ||
}); | ||
}, callback); | ||
}, | ||
callback => { | ||
// 5.4) For each hash to identifier list mapping in hash to blank | ||
// nodes map, lexicographically-sorted by hash: | ||
const hashes = Object.keys(self.hashToBlankNodes).sort(); | ||
self.forEach(hashes, (hash, i, callback) => { | ||
// 5.4.1) If the length of identifier list is greater than 1, | ||
// continue to the next mapping. | ||
const idList = self.hashToBlankNodes[hash]; | ||
if(idList.length > 1) { | ||
return callback(); | ||
} | ||
// 6) For each hash to identifier list mapping in hash to blank nodes map, | ||
// lexicographically-sorted by hash: | ||
// Note: sort optimized away, use `nonUnique`. | ||
for(const idList of nonUnique) { | ||
// 6.1) Create hash path list where each item will be a result of | ||
// running the Hash N-Degree Quads algorithm. | ||
const hashPathList = []; | ||
// 5.4.2) Use the Issue Identifier algorithm, passing canonical | ||
// issuer and the single blank node identifier in identifier | ||
// list, identifier, to issue a canonical replacement identifier | ||
// for identifier. | ||
// TODO: consider changing `getId` to `issue` | ||
const id = idList[0]; | ||
self.canonicalIssuer.getId(id); | ||
// 6.2) For each blank node identifier identifier in identifier list: | ||
for(const id of idList) { | ||
// 6.2.1) If a canonical identifier has already been issued for | ||
// identifier, continue to the next identifier. | ||
if(this.canonicalIssuer.hasId(id)) { | ||
continue; | ||
} | ||
// 5.4.3) Remove identifier from non-normalized identifiers. | ||
delete nonNormalized[id]; | ||
// 6.2.2) Create temporary issuer, an identifier issuer | ||
// initialized with the prefix _:b. | ||
const issuer = new IdentifierIssuer('_:b'); | ||
// 5.4.4) Remove hash from the hash to blank nodes map. | ||
delete self.hashToBlankNodes[hash]; | ||
// 6.2.3) Use the Issue Identifier algorithm, passing temporary | ||
// issuer and identifier, to issue a new temporary blank node | ||
// identifier for identifier. | ||
issuer.getId(id); | ||
// 5.4.5) Set simple to true. | ||
simple = true; | ||
callback(); | ||
}, callback); | ||
} | ||
], callback); | ||
}, callback); | ||
}, | ||
callback => { | ||
// 6) For each hash to identifier list mapping in hash to blank nodes | ||
// map, lexicographically-sorted by hash: | ||
const hashes = Object.keys(self.hashToBlankNodes).sort(); | ||
self.forEach(hashes, (hash, idx, callback) => { | ||
// 6.1) Create hash path list where each item will be a result of | ||
// running the Hash N-Degree Quads algorithm. | ||
const hashPathList = []; | ||
// 6.2.4) Run the Hash N-Degree Quads algorithm, passing | ||
// temporary issuer, and append the result to the hash path list. | ||
const result = await this.hashNDegreeQuads(id, issuer); | ||
hashPathList.push(result); | ||
} | ||
// 6.2) For each blank node identifier identifier in identifier list: | ||
const idList = self.hashToBlankNodes[hash]; | ||
self.waterfall([ | ||
callback => { | ||
self.forEach(idList, (id, idx, callback) => { | ||
// 6.2.1) If a canonical identifier has already been issued for | ||
// identifier, continue to the next identifier. | ||
if(self.canonicalIssuer.hasId(id)) { | ||
return callback(); | ||
} | ||
// 6.3) For each result in the hash path list, | ||
// lexicographically-sorted by the hash in result: | ||
hashPathList.sort(_stringHashCompare); | ||
for(const result of hashPathList) { | ||
// 6.3.1) For each blank node identifier, existing identifier, | ||
// that was issued a temporary identifier by identifier issuer | ||
// in result, issue a canonical identifier, in the same order, | ||
// using the Issue Identifier algorithm, passing canonical | ||
// issuer and existing identifier. | ||
const oldIds = result.issuer.getOldIds(); | ||
for(const id of oldIds) { | ||
this.canonicalIssuer.getId(id); | ||
} | ||
} | ||
} | ||
// 6.2.2) Create temporary issuer, an identifier issuer | ||
// initialized with the prefix _:b. | ||
const issuer = new IdentifierIssuer('_:b'); | ||
/* Note: At this point all blank nodes in the set of RDF quads have been | ||
assigned canonical identifiers, which have been stored in the canonical | ||
issuer. Here each quad is updated by assigning each of its blank nodes | ||
its new identifier. */ | ||
// 6.2.3) Use the Issue Identifier algorithm, passing temporary | ||
// issuer and identifier, to issue a new temporary blank node | ||
// identifier for identifier. | ||
issuer.getId(id); | ||
// 7) For each quad, quad, in input dataset: | ||
const normalized = []; | ||
for(const quad of this.quads) { | ||
// 7.1) Create a copy, quad copy, of quad and replace any existing | ||
// blank node identifiers using the canonical identifiers | ||
// previously issued by canonical issuer. | ||
// Note: We optimize with shallow copies here. | ||
const q = {...quad}; | ||
q.subject = this._useCanonicalId({component: q.subject}); | ||
q.object = this._useCanonicalId({component: q.object}); | ||
q.graph = this._useCanonicalId({component: q.graph}); | ||
// 7.2) Add quad copy to the normalized dataset. | ||
normalized.push(NQuads.serializeQuad(q)); | ||
} | ||
// 6.2.4) Run the Hash N-Degree Quads algorithm, passing | ||
// temporary issuer, and append the result to the hash path | ||
// list. | ||
self.hashNDegreeQuads(id, issuer, (err, result) => { | ||
if(err) { | ||
return callback(err); | ||
} | ||
hashPathList.push(result); | ||
callback(); | ||
}); | ||
}, callback); | ||
}, | ||
callback => { | ||
// 6.3) For each result in the hash path list, | ||
// lexicographically-sorted by the hash in result: | ||
// TODO: use `String.localeCompare`? | ||
hashPathList.sort((a, b) => | ||
(a.hash < b.hash) ? -1 : ((a.hash > b.hash) ? 1 : 0)); | ||
self.forEach(hashPathList, (result, idx, callback) => { | ||
// 6.3.1) For each blank node identifier, existing identifier, | ||
// that was issued a temporary identifier by identifier issuer | ||
// in result, issue a canonical identifier, in the same order, | ||
// using the Issue Identifier algorithm, passing canonical | ||
// issuer and existing identifier. | ||
for(const existing in result.issuer.existing) { | ||
self.canonicalIssuer.getId(existing); | ||
} | ||
callback(); | ||
}, callback); | ||
} | ||
], callback); | ||
}, callback); | ||
}, callback => { | ||
/* Note: At this point all blank nodes in the set of RDF quads have been | ||
assigned canonical identifiers, which have been stored in the canonical | ||
issuer. Here each quad is updated by assigning each of its blank nodes | ||
its new identifier. */ | ||
// sort normalized output | ||
normalized.sort(); | ||
// 7) For each quad, quad, in input dataset: | ||
const normalized = []; | ||
self.waterfall([ | ||
callback => { | ||
self.forEach(self.quads, (quad, idx, callback) => { | ||
// 7.1) Create a copy, quad copy, of quad and replace any existing | ||
// blank node identifiers using the canonical identifiers | ||
// previously issued by canonical issuer. | ||
// Note: We optimize away the copy here. | ||
self.forEachComponent(quad, component => { | ||
if(component.termType === 'BlankNode' && | ||
!component.value.startsWith(self.canonicalIssuer.prefix)) { | ||
component.value = self.canonicalIssuer.getId(component.value); | ||
} | ||
}); | ||
// 7.2) Add quad copy to the normalized dataset. | ||
normalized.push(NQuads.serializeQuad(quad)); | ||
callback(); | ||
}, callback); | ||
}, | ||
callback => { | ||
// sort normalized output | ||
normalized.sort(); | ||
// 8) Return the normalized dataset. | ||
result = normalized.join(''); | ||
return callback(); | ||
} | ||
], callback); | ||
} | ||
], err => callback(err, result)); | ||
// 8) Return the normalized dataset. | ||
return normalized.join(''); | ||
} | ||
// 4.6) Hash First Degree Quads | ||
hashFirstDegreeQuads(id, callback) { | ||
const self = this; | ||
// return cached hash | ||
const info = self.blankNodeInfo[id]; | ||
if('hash' in info) { | ||
return callback(null, info.hash); | ||
} | ||
async hashFirstDegreeQuads(id) { | ||
// 1) Initialize nquads to an empty list. It will be used to store quads in | ||
@@ -250,8 +170,9 @@ // N-Quads format. | ||
// 2) Get the list of quads quads associated with the reference blank node | ||
// 2) Get the list of quads `quads` associated with the reference blank node | ||
// identifier in the blank node to quads map. | ||
const info = this.blankNodeInfo.get(id); | ||
const quads = info.quads; | ||
// 3) For each quad quad in quads: | ||
self.forEach(quads, (quad, idx, callback) => { | ||
// 3) For each quad `quad` in `quads`: | ||
for(const quad of quads) { | ||
// 3.1) Serialize the quad in N-Quads format with the following special | ||
@@ -262,34 +183,32 @@ // rule: | ||
// using a special identifier as follows: | ||
const copy = {predicate: quad.predicate}; | ||
self.forEachComponent(quad, (component, key) => { | ||
// 3.1.2) If the blank node's existing blank node identifier matches the | ||
// reference blank node identifier then use the blank node identifier | ||
// _:a, otherwise, use the blank node identifier _:z. | ||
copy[key] = self.modifyFirstDegreeComponent(id, component, key); | ||
}); | ||
const copy = { | ||
subject: null, predicate: quad.predicate, object: null, graph: null | ||
}; | ||
// 3.1.2) If the blank node's existing blank node identifier matches | ||
// the reference blank node identifier then use the blank node | ||
// identifier _:a, otherwise, use the blank node identifier _:z. | ||
copy.subject = this.modifyFirstDegreeComponent( | ||
id, quad.subject, 'subject'); | ||
copy.object = this.modifyFirstDegreeComponent( | ||
id, quad.object, 'object'); | ||
copy.graph = this.modifyFirstDegreeComponent( | ||
id, quad.graph, 'graph'); | ||
nquads.push(NQuads.serializeQuad(copy)); | ||
callback(); | ||
}, err => { | ||
if(err) { | ||
return callback(err); | ||
} | ||
// 4) Sort nquads in lexicographical order. | ||
nquads.sort(); | ||
} | ||
// 5) Return the hash that results from passing the sorted, joined nquads | ||
// through the hash algorithm. | ||
const md = new MessageDigest(self.hashAlgorithm); | ||
for(let i = 0; i < nquads.length; ++i) { | ||
md.update(nquads[i]); | ||
} | ||
// TODO: represent as byte buffer instead to cut memory usage in half | ||
info.hash = md.digest(); | ||
callback(null, info.hash); | ||
}); | ||
// 4) Sort nquads in lexicographical order. | ||
nquads.sort(); | ||
// 5) Return the hash that results from passing the sorted, joined nquads | ||
// through the hash algorithm. | ||
const md = new MessageDigest(this.hashAlgorithm); | ||
for(const nquad of nquads) { | ||
md.update(nquad); | ||
} | ||
info.hash = await md.digest(); | ||
return info.hash; | ||
} | ||
// 4.7) Hash Related Blank Node | ||
hashRelatedBlankNode(related, quad, issuer, position, callback) { | ||
const self = this; | ||
async hashRelatedBlankNode(related, quad, issuer, position) { | ||
// 1) Set the identifier to use for related, preferring first the canonical | ||
@@ -300,194 +219,161 @@ // identifier for related if issued, second the identifier issued by issuer | ||
let id; | ||
self.waterfall([ | ||
callback => { | ||
if(self.canonicalIssuer.hasId(related)) { | ||
id = self.canonicalIssuer.getId(related); | ||
return callback(); | ||
} | ||
if(issuer.hasId(related)) { | ||
id = issuer.getId(related); | ||
return callback(); | ||
} | ||
self.hashFirstDegreeQuads(related, (err, hash) => { | ||
if(err) { | ||
return callback(err); | ||
} | ||
id = hash; | ||
callback(); | ||
}); | ||
} | ||
], err => { | ||
if(err) { | ||
return callback(err); | ||
} | ||
if(this.canonicalIssuer.hasId(related)) { | ||
id = this.canonicalIssuer.getId(related); | ||
} else if(issuer.hasId(related)) { | ||
id = issuer.getId(related); | ||
} else { | ||
id = this.blankNodeInfo.get(related).hash; | ||
} | ||
// 2) Initialize a string input to the value of position. | ||
// Note: We use a hash object instead. | ||
const md = new MessageDigest(self.hashAlgorithm); | ||
md.update(position); | ||
// 2) Initialize a string input to the value of position. | ||
// Note: We use a hash object instead. | ||
const md = new MessageDigest(this.hashAlgorithm); | ||
md.update(position); | ||
// 3) If position is not g, append <, the value of the predicate in quad, | ||
// and > to input. | ||
if(position !== 'g') { | ||
md.update(self.getRelatedPredicate(quad)); | ||
} | ||
// 3) If position is not g, append <, the value of the predicate in quad, | ||
// and > to input. | ||
if(position !== 'g') { | ||
md.update(this.getRelatedPredicate(quad)); | ||
} | ||
// 4) Append identifier to input. | ||
md.update(id); | ||
// 4) Append identifier to input. | ||
md.update(id); | ||
// 5) Return the hash that results from passing input through the hash | ||
// algorithm. | ||
// TODO: represent as byte buffer instead to cut memory usage in half | ||
return callback(null, md.digest()); | ||
}); | ||
// 5) Return the hash that results from passing input through the hash | ||
// algorithm. | ||
return md.digest(); | ||
} | ||
// 4.8) Hash N-Degree Quads | ||
hashNDegreeQuads(id, issuer, callback) { | ||
const self = this; | ||
async hashNDegreeQuads(id, issuer) { | ||
// 1) Create a hash to related blank nodes map for storing hashes that | ||
// identify related blank nodes. | ||
// Note: 2) and 3) handled within `createHashToRelated` | ||
let hashToRelated; | ||
const md = new MessageDigest(self.hashAlgorithm); | ||
self.waterfall([ | ||
callback => self.createHashToRelated(id, issuer, (err, result) => { | ||
if(err) { | ||
return callback(err); | ||
const md = new MessageDigest(this.hashAlgorithm); | ||
const hashToRelated = await this.createHashToRelated(id, issuer); | ||
// 4) Create an empty string, data to hash. | ||
// Note: We created a hash object `md` above instead. | ||
// 5) For each related hash to blank node list mapping in hash to related | ||
// blank nodes map, sorted lexicographically by related hash: | ||
const hashes = [...hashToRelated.keys()].sort(); | ||
for(const hash of hashes) { | ||
// 5.1) Append the related hash to the data to hash. | ||
md.update(hash); | ||
// 5.2) Create a string chosen path. | ||
let chosenPath = ''; | ||
// 5.3) Create an unset chosen issuer variable. | ||
let chosenIssuer; | ||
// 5.4) For each permutation of blank node list: | ||
const permuter = new Permuter(hashToRelated.get(hash)); | ||
let i = 0; | ||
while(permuter.hasNext()) { | ||
const permutation = permuter.next(); | ||
// Note: batch permutations 3 at a time | ||
if(++i % 3 === 0) { | ||
await this._yield(); | ||
} | ||
hashToRelated = result; | ||
callback(); | ||
}), | ||
callback => { | ||
// 4) Create an empty string, data to hash. | ||
// Note: We created a hash object `md` above instead. | ||
// 5) For each related hash to blank node list mapping in hash to | ||
// related blank nodes map, sorted lexicographically by related hash: | ||
const hashes = Object.keys(hashToRelated).sort(); | ||
self.forEach(hashes, (hash, idx, callback) => { | ||
// 5.1) Append the related hash to the data to hash. | ||
md.update(hash); | ||
// 5.4.1) Create a copy of issuer, issuer copy. | ||
let issuerCopy = issuer.clone(); | ||
// 5.2) Create a string chosen path. | ||
let chosenPath = ''; | ||
// 5.4.2) Create a string path. | ||
let path = ''; | ||
// 5.3) Create an unset chosen issuer variable. | ||
let chosenIssuer; | ||
// 5.4.3) Create a recursion list, to store blank node identifiers | ||
// that must be recursively processed by this algorithm. | ||
const recursionList = []; | ||
// 5.4) For each permutation of blank node list: | ||
const permutator = new Permutator(hashToRelated[hash]); | ||
self.whilst(() => permutator.hasNext(), nextPermutation => { | ||
const permutation = permutator.next(); | ||
// 5.4.4) For each related in permutation: | ||
let nextPermutation = false; | ||
for(const related of permutation) { | ||
// 5.4.4.1) If a canonical identifier has been issued for | ||
// related, append it to path. | ||
if(this.canonicalIssuer.hasId(related)) { | ||
path += this.canonicalIssuer.getId(related); | ||
} else { | ||
// 5.4.4.2) Otherwise: | ||
// 5.4.4.2.1) If issuer copy has not issued an identifier for | ||
// related, append related to recursion list. | ||
if(!issuerCopy.hasId(related)) { | ||
recursionList.push(related); | ||
} | ||
// 5.4.4.2.2) Use the Issue Identifier algorithm, passing | ||
// issuer copy and related and append the result to path. | ||
path += issuerCopy.getId(related); | ||
} | ||
// 5.4.1) Create a copy of issuer, issuer copy. | ||
let issuerCopy = issuer.clone(); | ||
// 5.4.4.3) If chosen path is not empty and the length of path | ||
// is greater than or equal to the length of chosen path and | ||
// path is lexicographically greater than chosen path, then | ||
// skip to the next permutation. | ||
// Note: Comparing path length to chosen path length can be optimized | ||
// away; only compare lexicographically. | ||
if(chosenPath.length !== 0 && path > chosenPath) { | ||
nextPermutation = true; | ||
break; | ||
} | ||
} | ||
// 5.4.2) Create a string path. | ||
let path = ''; | ||
if(nextPermutation) { | ||
continue; | ||
} | ||
// 5.4.3) Create a recursion list, to store blank node identifiers | ||
// that must be recursively processed by this algorithm. | ||
const recursionList = []; | ||
// 5.4.5) For each related in recursion list: | ||
for(const related of recursionList) { | ||
// 5.4.5.1) Set result to the result of recursively executing | ||
// the Hash N-Degree Quads algorithm, passing related for | ||
// identifier and issuer copy for path identifier issuer. | ||
const result = await this.hashNDegreeQuads(related, issuerCopy); | ||
self.waterfall([ | ||
callback => { | ||
// 5.4.4) For each related in permutation: | ||
self.forEach(permutation, (related, idx, callback) => { | ||
// 5.4.4.1) If a canonical identifier has been issued for | ||
// related, append it to path. | ||
if(self.canonicalIssuer.hasId(related)) { | ||
path += self.canonicalIssuer.getId(related); | ||
} else { | ||
// 5.4.4.2) Otherwise: | ||
// 5.4.4.2.1) If issuer copy has not issued an identifier | ||
// for related, append related to recursion list. | ||
if(!issuerCopy.hasId(related)) { | ||
recursionList.push(related); | ||
} | ||
// 5.4.4.2.2) Use the Issue Identifier algorithm, passing | ||
// issuer copy and related and append the result to path. | ||
path += issuerCopy.getId(related); | ||
} | ||
// 5.4.5.2) Use the Issue Identifier algorithm, passing issuer | ||
// copy and related and append the result to path. | ||
path += issuerCopy.getId(related); | ||
// 5.4.4.3) If chosen path is not empty and the length of path | ||
// is greater than or equal to the length of chosen path and | ||
// path is lexicographically greater than chosen path, then | ||
// skip to the next permutation. | ||
// Note: Comparing path length to chosen path length can be | ||
// optimized away; only compare lexicographically. | ||
if(chosenPath.length !== 0 && path > chosenPath) { | ||
// FIXME: may cause inaccurate total depth calculation | ||
return nextPermutation(); | ||
} | ||
callback(); | ||
}, callback); | ||
}, | ||
callback => { | ||
// 5.4.5) For each related in recursion list: | ||
self.forEach(recursionList, (related, idx, callback) => { | ||
// 5.4.5.1) Set result to the result of recursively executing | ||
// the Hash N-Degree Quads algorithm, passing related for | ||
// identifier and issuer copy for path identifier issuer. | ||
self.hashNDegreeQuads(related, issuerCopy, (err, result) => { | ||
if(err) { | ||
return callback(err); | ||
} | ||
// 5.4.5.3) Append <, the hash in result, and > to path. | ||
path += `<${result.hash}>`; | ||
// 5.4.5.2) Use the Issue Identifier algorithm, passing | ||
// issuer copy and related and append the result to path. | ||
path += issuerCopy.getId(related); | ||
// 5.4.5.4) Set issuer copy to the identifier issuer in | ||
// result. | ||
issuerCopy = result.issuer; | ||
// 5.4.5.3) Append <, the hash in result, and > to path. | ||
path += '<' + result.hash + '>'; | ||
// 5.4.5.5) If chosen path is not empty and the length of path | ||
// is greater than or equal to the length of chosen path and | ||
// path is lexicographically greater than chosen path, then | ||
// skip to the next permutation. | ||
// Note: Comparing path length to chosen path length can be optimized | ||
// away; only compare lexicographically. | ||
if(chosenPath.length !== 0 && path > chosenPath) { | ||
nextPermutation = true; | ||
break; | ||
} | ||
} | ||
// 5.4.5.4) Set issuer copy to the identifier issuer in | ||
// result. | ||
issuerCopy = result.issuer; | ||
if(nextPermutation) { | ||
continue; | ||
} | ||
// 5.4.5.5) If chosen path is not empty and the length of | ||
// path is greater than or equal to the length of chosen | ||
// path and path is lexicographically greater than chosen | ||
// path, then skip to the next permutation. | ||
// Note: Comparing path length to chosen path length can be | ||
// optimized away; only compare lexicographically. | ||
if(chosenPath.length !== 0 && path > chosenPath) { | ||
// FIXME: may cause inaccurate total depth calculation | ||
return nextPermutation(); | ||
} | ||
callback(); | ||
}); | ||
}, callback); | ||
}, | ||
callback => { | ||
// 5.4.6) If chosen path is empty or path is lexicographically | ||
// less than chosen path, set chosen path to path and chosen | ||
// issuer to issuer copy. | ||
if(chosenPath.length === 0 || path < chosenPath) { | ||
chosenPath = path; | ||
chosenIssuer = issuerCopy; | ||
} | ||
callback(); | ||
} | ||
], nextPermutation); | ||
}, err => { | ||
if(err) { | ||
return callback(err); | ||
} | ||
// 5.4.6) If chosen path is empty or path is lexicographically | ||
// less than chosen path, set chosen path to path and chosen | ||
// issuer to issuer copy. | ||
if(chosenPath.length === 0 || path < chosenPath) { | ||
chosenPath = path; | ||
chosenIssuer = issuerCopy; | ||
} | ||
} | ||
// 5.5) Append chosen path to data to hash. | ||
md.update(chosenPath); | ||
// 5.5) Append chosen path to data to hash. | ||
md.update(chosenPath); | ||
// 5.6) Replace issuer, by reference, with chosen issuer. | ||
issuer = chosenIssuer; | ||
callback(); | ||
}); | ||
}, callback); | ||
} | ||
], err => { | ||
// 6) Return issuer and the hash that results from passing data to hash | ||
// through the hash algorithm. | ||
callback(err, {hash: md.digest(), issuer}); | ||
}); | ||
// 5.6) Replace issuer, by reference, with chosen issuer. | ||
issuer = chosenIssuer; | ||
} | ||
// 6) Return issuer and the hash that results from passing data to hash | ||
// through the hash algorithm. | ||
return {hash: await md.digest(), issuer}; | ||
} | ||
@@ -500,5 +386,11 @@ | ||
} | ||
component = util.clone(component); | ||
component.value = (component.value === id ? '_:a' : '_:z'); | ||
return component; | ||
/* Note: A mistake in the URDNA2015 spec that made its way into | ||
implementations (and therefore must stay to avoid interop breakage) | ||
resulted in an assigned canonical ID, if available for | ||
`component.value`, not being used in place of `_:a`/`_:z`, so | ||
we don't use it here. */ | ||
return { | ||
termType: 'BlankNode', | ||
value: component.value === id ? '_:a' : '_:z' | ||
}; | ||
} | ||
@@ -508,63 +400,116 @@ | ||
getRelatedPredicate(quad) { | ||
return '<' + quad.predicate.value + '>'; | ||
return `<${quad.predicate.value}>`; | ||
} | ||
// helper for creating hash to related blank nodes map | ||
createHashToRelated(id, issuer, callback) { | ||
const self = this; | ||
async createHashToRelated(id, issuer) { | ||
// 1) Create a hash to related blank nodes map for storing hashes that | ||
// identify related blank nodes. | ||
const hashToRelated = {}; | ||
const hashToRelated = new Map(); | ||
// 2) Get a reference, quads, to the list of quads in the blank node to | ||
// quads map for the key identifier. | ||
const quads = self.blankNodeInfo[id].quads; | ||
const quads = this.blankNodeInfo.get(id).quads; | ||
// 3) For each quad in quads: | ||
self.forEach(quads, (quad, idx, callback) => { | ||
let i = 0; | ||
for(const quad of quads) { | ||
// Note: batch hashing related blank node quads 100 at a time | ||
if(++i % 100 === 0) { | ||
await this._yield(); | ||
} | ||
// 3.1) For each component in quad, if component is the subject, object, | ||
// and graph name and it is a blank node that is not identified by | ||
// identifier: | ||
self.forEach(quad, (component, key, callback) => { | ||
if(key === 'predicate' || | ||
!(component.termType === 'BlankNode' && component.value !== id)) { | ||
return callback(); | ||
} | ||
// 3.1.1) Set hash to the result of the Hash Related Blank Node | ||
// algorithm, passing the blank node identifier for component as | ||
// related, quad, path identifier issuer as issuer, and position as | ||
// either s, o, or g based on whether component is a subject, object, | ||
// graph name, respectively. | ||
const related = component.value; | ||
const position = POSITIONS[key]; | ||
self.hashRelatedBlankNode( | ||
related, quad, issuer, position, (err, hash) => { | ||
if(err) { | ||
return callback(err); | ||
} | ||
// 3.1.2) Add a mapping of hash to the blank node identifier for | ||
// component to hash to related blank nodes map, adding an entry as | ||
// necessary. | ||
if(hash in hashToRelated) { | ||
hashToRelated[hash].push(related); | ||
} else { | ||
hashToRelated[hash] = [related]; | ||
} | ||
callback(); | ||
}); | ||
}, callback); | ||
}, err => callback(err, hashToRelated)); | ||
// steps 3.1.1 and 3.1.2 occur in helpers: | ||
await Promise.all([ | ||
this._addRelatedBlankNodeHash({ | ||
quad, component: quad.subject, position: 's', | ||
id, issuer, hashToRelated | ||
}), | ||
this._addRelatedBlankNodeHash({ | ||
quad, component: quad.object, position: 'o', | ||
id, issuer, hashToRelated | ||
}), | ||
this._addRelatedBlankNodeHash({ | ||
quad, component: quad.graph, position: 'g', | ||
id, issuer, hashToRelated | ||
}) | ||
]); | ||
} | ||
return hashToRelated; | ||
} | ||
// helper that iterates over quad components (skips predicate) | ||
forEachComponent(quad, op) { | ||
for(const key in quad) { | ||
// skip `predicate` | ||
if(key === 'predicate') { | ||
continue; | ||
} | ||
op(quad[key], key, quad); | ||
async _hashAndTrackBlankNode({id, hashToBlankNodes}) { | ||
// 5.3.1) Create a hash, hash, according to the Hash First Degree | ||
// Quads algorithm. | ||
const hash = await this.hashFirstDegreeQuads(id); | ||
// 5.3.2) Add hash and identifier to hash to blank nodes map, | ||
// creating a new entry if necessary. | ||
const idList = hashToBlankNodes.get(hash); | ||
if(!idList) { | ||
hashToBlankNodes.set(hash, [id]); | ||
} else { | ||
idList.push(id); | ||
} | ||
} | ||
_addBlankNodeQuadInfo({quad, component}) { | ||
if(component.termType !== 'BlankNode') { | ||
return; | ||
} | ||
const id = component.value; | ||
const info = this.blankNodeInfo.get(id); | ||
if(info) { | ||
info.quads.add(quad); | ||
} else { | ||
this.blankNodeInfo.set(id, {quads: new Set([quad]), hash: null}); | ||
} | ||
} | ||
async _addRelatedBlankNodeHash( | ||
{quad, component, position, id, issuer, hashToRelated}) { | ||
if(!(component.termType === 'BlankNode' && component.value !== id)) { | ||
return; | ||
} | ||
// 3.1.1) Set hash to the result of the Hash Related Blank Node | ||
// algorithm, passing the blank node identifier for component as | ||
// related, quad, path identifier issuer as issuer, and position as | ||
// either s, o, or g based on whether component is a subject, object, | ||
// graph name, respectively. | ||
const related = component.value; | ||
const hash = await this.hashRelatedBlankNode( | ||
related, quad, issuer, position); | ||
// 3.1.2) Add a mapping of hash to the blank node identifier for | ||
// component to hash to related blank nodes map, adding an entry as | ||
// necessary. | ||
const entries = hashToRelated.get(hash); | ||
if(entries) { | ||
entries.push(related); | ||
} else { | ||
hashToRelated.set(hash, [related]); | ||
} | ||
} | ||
_useCanonicalId({component}) { | ||
if(component.termType === 'BlankNode' && | ||
!component.value.startsWith(this.canonicalIssuer.prefix)) { | ||
return { | ||
termType: 'BlankNode', | ||
value: this.canonicalIssuer.getId(component.value) | ||
}; | ||
} | ||
return component; | ||
} | ||
async _yield() { | ||
return new Promise(resolve => setImmediate(resolve)); | ||
} | ||
}; | ||
function _stringHashCompare(a, b) { | ||
return a.hash < b.hash ? -1 : a.hash > b.hash ? 1 : 0; | ||
} |
/* | ||
* Copyright (c) 2016 Digital Bazaar, Inc. All rights reserved. | ||
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved. | ||
*/ | ||
@@ -8,16 +8,12 @@ 'use strict'; | ||
const MessageDigest = require('./MessageDigest'); | ||
const Permutator = require('./Permutator'); | ||
const Permuter = require('./Permuter'); | ||
const NQuads = require('./NQuads'); | ||
const util = require('./util'); | ||
const POSITIONS = {subject: 's', object: 'o', graph: 'g'}; | ||
module.exports = class URDNA2015Sync { | ||
constructor() { | ||
this.name = 'URDNA2015'; | ||
this.blankNodeInfo = {}; | ||
this.hashToBlankNodes = {}; | ||
this.blankNodeInfo = new Map(); | ||
this.canonicalIssuer = new IdentifierIssuer('_:c14n'); | ||
this.hashAlgorithm = 'sha256'; | ||
this.quads; | ||
this.quads = null; | ||
} | ||
@@ -27,10 +23,5 @@ | ||
main(dataset) { | ||
const self = this; | ||
self.quads = dataset; | ||
this.quads = dataset; | ||
// 1) Create the normalization state. | ||
// Note: Optimize by generating non-normalized blank node map concurrently. | ||
const nonNormalized = {}; | ||
// 2) For every quad in input dataset: | ||
@@ -41,14 +32,5 @@ for(const quad of dataset) { | ||
// quads map, creating a new entry if necessary. | ||
self.forEachComponent(quad, component => { | ||
if(component.termType !== 'BlankNode') { | ||
return; | ||
} | ||
const id = component.value; | ||
if(id in self.blankNodeInfo) { | ||
self.blankNodeInfo[id].quads.push(quad); | ||
} else { | ||
nonNormalized[id] = true; | ||
self.blankNodeInfo[id] = {quads: [quad]}; | ||
} | ||
}); | ||
this._addBlankNodeQuadInfo({quad, component: quad.subject}); | ||
this._addBlankNodeQuadInfo({quad, component: quad.object}); | ||
this._addBlankNodeQuadInfo({quad, component: quad.graph}); | ||
} | ||
@@ -61,58 +43,45 @@ | ||
// 4) Initialize simple, a boolean flag, to true. | ||
let simple = true; | ||
// 4) `simple` flag is skipped -- loop is optimized away. This optimization | ||
// is permitted because there was a typo in the hash first degree quads | ||
// algorithm in the URDNA2015 spec that was implemented widely making it | ||
// such that it could not be fixed; the result was that the loop only | ||
// needs to be run once and the first degree quad hashes will never change. | ||
// 5.1-5.2 are skipped; first degree quad hashes are generated just once | ||
// for all non-normalized blank nodes. | ||
// 5) While simple is true, issue canonical identifiers for blank nodes: | ||
while(simple) { | ||
// 5.1) Set simple to false. | ||
simple = false; | ||
// 5.3) For each blank node identifier identifier in non-normalized | ||
// identifiers: | ||
const hashToBlankNodes = new Map(); | ||
const nonNormalized = [...this.blankNodeInfo.keys()]; | ||
for(const id of nonNormalized) { | ||
// steps 5.3.1 and 5.3.2: | ||
this._hashAndTrackBlankNode({id, hashToBlankNodes}); | ||
} | ||
// 5.2) Clear hash to blank nodes map. | ||
self.hashToBlankNodes = {}; | ||
// 5.3) For each blank node identifier identifier in non-normalized | ||
// identifiers: | ||
for(const id in nonNormalized) { | ||
// 5.3.1) Create a hash, hash, according to the Hash First Degree | ||
// Quads algorithm. | ||
const hash = self.hashFirstDegreeQuads(id); | ||
// 5.3.2) Add hash and identifier to hash to blank nodes map, | ||
// creating a new entry if necessary. | ||
if(hash in self.hashToBlankNodes) { | ||
self.hashToBlankNodes[hash].push(id); | ||
} else { | ||
self.hashToBlankNodes[hash] = [id]; | ||
} | ||
// 5.4) For each hash to identifier list mapping in hash to blank | ||
// nodes map, lexicographically-sorted by hash: | ||
const hashes = [...hashToBlankNodes.keys()].sort(); | ||
// optimize away second sort, gather non-unique hashes in order as we go | ||
const nonUnique = []; | ||
for(const hash of hashes) { | ||
// 5.4.1) If the length of identifier list is greater than 1, | ||
// continue to the next mapping. | ||
const idList = hashToBlankNodes.get(hash); | ||
if(idList.length > 1) { | ||
nonUnique.push(idList); | ||
continue; | ||
} | ||
// 5.4) For each hash to identifier list mapping in hash to blank | ||
// nodes map, lexicographically-sorted by hash: | ||
const hashes = Object.keys(self.hashToBlankNodes).sort(); | ||
for(let i = 0; i < hashes.length; ++i) { | ||
// 5.4.1) If the length of identifier list is greater than 1, | ||
// continue to the next mapping. | ||
const hash = hashes[i]; | ||
const idList = self.hashToBlankNodes[hash]; | ||
if(idList.length > 1) { | ||
continue; | ||
} | ||
// 5.4.2) Use the Issue Identifier algorithm, passing canonical | ||
// issuer and the single blank node identifier in identifier | ||
// list, identifier, to issue a canonical replacement identifier | ||
// for identifier. | ||
const id = idList[0]; | ||
this.canonicalIssuer.getId(id); | ||
// 5.4.2) Use the Issue Identifier algorithm, passing canonical | ||
// issuer and the single blank node identifier in identifier | ||
// list, identifier, to issue a canonical replacement identifier | ||
// for identifier. | ||
// TODO: consider changing `getId` to `issue` | ||
const id = idList[0]; | ||
self.canonicalIssuer.getId(id); | ||
// 5.4.3) Remove identifier from non-normalized identifiers. | ||
delete nonNormalized[id]; | ||
// 5.4.4) Remove hash from the hash to blank nodes map. | ||
delete self.hashToBlankNodes[hash]; | ||
// 5.4.5) Set simple to true. | ||
simple = true; | ||
} | ||
// Note: These steps are skipped, optimized away since the loop | ||
// only needs to be run once. | ||
// 5.4.3) Remove identifier from non-normalized identifiers. | ||
// 5.4.4) Remove hash from the hash to blank nodes map. | ||
// 5.4.5) Set simple to true. | ||
} | ||
@@ -122,4 +91,4 @@ | ||
// lexicographically-sorted by hash: | ||
const hashes = Object.keys(self.hashToBlankNodes).sort(); | ||
for(let i = 0; i < hashes.length; ++i) { | ||
// Note: sort optimized away, use `nonUnique`. | ||
for(const idList of nonUnique) { | ||
// 6.1) Create hash path list where each item will be a result of | ||
@@ -130,9 +99,6 @@ // running the Hash N-Degree Quads algorithm. | ||
// 6.2) For each blank node identifier identifier in identifier list: | ||
const hash = hashes[i]; | ||
const idList = self.hashToBlankNodes[hash]; | ||
for(let j = 0; j < idList.length; ++j) { | ||
for(const id of idList) { | ||
// 6.2.1) If a canonical identifier has already been issued for | ||
// identifier, continue to the next identifier. | ||
const id = idList[j]; | ||
if(self.canonicalIssuer.hasId(id)) { | ||
if(this.canonicalIssuer.hasId(id)) { | ||
continue; | ||
@@ -152,3 +118,3 @@ } | ||
// temporary issuer, and append the result to the hash path list. | ||
const result = self.hashNDegreeQuads(id, issuer); | ||
const result = this.hashNDegreeQuads(id, issuer); | ||
hashPathList.push(result); | ||
@@ -159,6 +125,4 @@ } | ||
// lexicographically-sorted by the hash in result: | ||
// TODO: use `String.localeCompare`? | ||
hashPathList.sort((a, b) => | ||
(a.hash < b.hash) ? -1 : ((a.hash > b.hash) ? 1 : 0)); | ||
for(let j = 0; j < hashPathList.length; ++j) { | ||
hashPathList.sort(_stringHashCompare); | ||
for(const result of hashPathList) { | ||
// 6.3.1) For each blank node identifier, existing identifier, | ||
@@ -169,5 +133,5 @@ // that was issued a temporary identifier by identifier issuer | ||
// issuer and existing identifier. | ||
const result = hashPathList[j]; | ||
for(const existing in result.issuer.existing) { | ||
self.canonicalIssuer.getId(existing); | ||
const oldIds = result.issuer.getOldIds(); | ||
for(const id of oldIds) { | ||
this.canonicalIssuer.getId(id); | ||
} | ||
@@ -184,16 +148,13 @@ } | ||
const normalized = []; | ||
for(let i = 0; i < self.quads.length; ++i) { | ||
for(const quad of this.quads) { | ||
// 7.1) Create a copy, quad copy, of quad and replace any existing | ||
// blank node identifiers using the canonical identifiers | ||
// previously issued by canonical issuer. | ||
// Note: We optimize away the copy here. | ||
const quad = self.quads[i]; | ||
self.forEachComponent(quad, component => { | ||
if(component.termType === 'BlankNode' && | ||
!component.value.startsWith(self.canonicalIssuer.prefix)) { | ||
component.value = self.canonicalIssuer.getId(component.value); | ||
} | ||
}); | ||
// Note: We optimize with shallow copies here. | ||
const q = {...quad}; | ||
q.subject = this._useCanonicalId({component: q.subject}); | ||
q.object = this._useCanonicalId({component: q.object}); | ||
q.graph = this._useCanonicalId({component: q.graph}); | ||
// 7.2) Add quad copy to the normalized dataset. | ||
normalized.push(NQuads.serializeQuad(quad)); | ||
normalized.push(NQuads.serializeQuad(q)); | ||
} | ||
@@ -210,10 +171,2 @@ | ||
hashFirstDegreeQuads(id) { | ||
const self = this; | ||
// return cached hash | ||
const info = self.blankNodeInfo[id]; | ||
if('hash' in info) { | ||
return info.hash; | ||
} | ||
// 1) Initialize nquads to an empty list. It will be used to store quads in | ||
@@ -225,8 +178,7 @@ // N-Quads format. | ||
// identifier in the blank node to quads map. | ||
const info = this.blankNodeInfo.get(id); | ||
const quads = info.quads; | ||
// 3) For each quad `quad` in `quads`: | ||
for(let i = 0; i < quads.length; ++i) { | ||
const quad = quads[i]; | ||
for(const quad of quads) { | ||
// 3.1) Serialize the quad in N-Quads format with the following special | ||
@@ -237,9 +189,14 @@ // rule: | ||
// using a special identifier as follows: | ||
const copy = {predicate: quad.predicate}; | ||
self.forEachComponent(quad, (component, key) => { | ||
// 3.1.2) If the blank node's existing blank node identifier matches | ||
// the reference blank node identifier then use the blank node | ||
// identifier _:a, otherwise, use the blank node identifier _:z. | ||
copy[key] = self.modifyFirstDegreeComponent(id, component, key); | ||
}); | ||
const copy = { | ||
subject: null, predicate: quad.predicate, object: null, graph: null | ||
}; | ||
// 3.1.2) If the blank node's existing blank node identifier matches | ||
// the reference blank node identifier then use the blank node | ||
// identifier _:a, otherwise, use the blank node identifier _:z. | ||
copy.subject = this.modifyFirstDegreeComponent( | ||
id, quad.subject, 'subject'); | ||
copy.object = this.modifyFirstDegreeComponent( | ||
id, quad.object, 'object'); | ||
copy.graph = this.modifyFirstDegreeComponent( | ||
id, quad.graph, 'graph'); | ||
nquads.push(NQuads.serializeQuad(copy)); | ||
@@ -253,7 +210,6 @@ } | ||
// through the hash algorithm. | ||
const md = new MessageDigest(self.hashAlgorithm); | ||
for(let i = 0; i < nquads.length; ++i) { | ||
md.update(nquads[i]); | ||
const md = new MessageDigest(this.hashAlgorithm); | ||
for(const nquad of nquads) { | ||
md.update(nquad); | ||
} | ||
// TODO: represent as byte buffer instead to cut memory usage in half | ||
info.hash = md.digest(); | ||
@@ -265,4 +221,2 @@ return info.hash; | ||
hashRelatedBlankNode(related, quad, issuer, position) { | ||
const self = this; | ||
// 1) Set the identifier to use for related, preferring first the canonical | ||
@@ -273,8 +227,8 @@ // identifier for related if issued, second the identifier issued by issuer | ||
let id; | ||
if(self.canonicalIssuer.hasId(related)) { | ||
id = self.canonicalIssuer.getId(related); | ||
if(this.canonicalIssuer.hasId(related)) { | ||
id = this.canonicalIssuer.getId(related); | ||
} else if(issuer.hasId(related)) { | ||
id = issuer.getId(related); | ||
} else { | ||
id = self.hashFirstDegreeQuads(related); | ||
id = this.blankNodeInfo.get(related).hash; | ||
} | ||
@@ -284,3 +238,3 @@ | ||
// Note: We use a hash object instead. | ||
const md = new MessageDigest(self.hashAlgorithm); | ||
const md = new MessageDigest(this.hashAlgorithm); | ||
md.update(position); | ||
@@ -291,3 +245,3 @@ | ||
if(position !== 'g') { | ||
md.update(self.getRelatedPredicate(quad)); | ||
md.update(this.getRelatedPredicate(quad)); | ||
} | ||
@@ -300,3 +254,2 @@ | ||
// algorithm. | ||
// TODO: represent as byte buffer instead to cut memory usage in half | ||
return md.digest(); | ||
@@ -307,9 +260,7 @@ } | ||
hashNDegreeQuads(id, issuer) { | ||
const self = this; | ||
// 1) Create a hash to related blank nodes map for storing hashes that | ||
// identify related blank nodes. | ||
// Note: 2) and 3) handled within `createHashToRelated` | ||
const md = new MessageDigest(self.hashAlgorithm); | ||
const hashToRelated = self.createHashToRelated(id, issuer); | ||
const md = new MessageDigest(this.hashAlgorithm); | ||
const hashToRelated = this.createHashToRelated(id, issuer); | ||
@@ -321,6 +272,5 @@ // 4) Create an empty string, data to hash. | ||
// blank nodes map, sorted lexicographically by related hash: | ||
const hashes = Object.keys(hashToRelated).sort(); | ||
for(let i = 0; i < hashes.length; ++i) { | ||
const hashes = [...hashToRelated.keys()].sort(); | ||
for(const hash of hashes) { | ||
// 5.1) Append the related hash to the data to hash. | ||
const hash = hashes[i]; | ||
md.update(hash); | ||
@@ -335,5 +285,5 @@ | ||
// 5.4) For each permutation of blank node list: | ||
const permutator = new Permutator(hashToRelated[hash]); | ||
while(permutator.hasNext()) { | ||
const permutation = permutator.next(); | ||
const permuter = new Permuter(hashToRelated.get(hash)); | ||
while(permuter.hasNext()) { | ||
const permutation = permuter.next(); | ||
@@ -352,8 +302,7 @@ // 5.4.1) Create a copy of issuer, issuer copy. | ||
let nextPermutation = false; | ||
for(let j = 0; j < permutation.length; ++j) { | ||
for(const related of permutation) { | ||
// 5.4.4.1) If a canonical identifier has been issued for | ||
// related, append it to path. | ||
const related = permutation[j]; | ||
if(self.canonicalIssuer.hasId(related)) { | ||
path += self.canonicalIssuer.getId(related); | ||
if(this.canonicalIssuer.hasId(related)) { | ||
path += this.canonicalIssuer.getId(related); | ||
} else { | ||
@@ -388,8 +337,7 @@ // 5.4.4.2) Otherwise: | ||
// 5.4.5) For each related in recursion list: | ||
for(let j = 0; j < recursionList.length; ++j) { | ||
for(const related of recursionList) { | ||
// 5.4.5.1) Set result to the result of recursively executing | ||
// the Hash N-Degree Quads algorithm, passing related for | ||
// identifier and issuer copy for path identifier issuer. | ||
const related = recursionList[j]; | ||
const result = self.hashNDegreeQuads(related, issuerCopy); | ||
const result = this.hashNDegreeQuads(related, issuerCopy); | ||
@@ -401,3 +349,3 @@ // 5.4.5.2) Use the Issue Identifier algorithm, passing issuer | ||
// 5.4.5.3) Append <, the hash in result, and > to path. | ||
path += '<' + result.hash + '>'; | ||
path += `<${result.hash}>`; | ||
@@ -450,5 +398,11 @@ // 5.4.5.4) Set issuer copy to the identifier issuer in | ||
} | ||
component = util.clone(component); | ||
component.value = (component.value === id ? '_:a' : '_:z'); | ||
return component; | ||
/* Note: A mistake in the URDNA2015 spec that made its way into | ||
implementations (and therefore must stay to avoid interop breakage) | ||
resulted in an assigned canonical ID, if available for | ||
`component.value`, not being used in place of `_:a`/`_:z`, so | ||
we don't use it here. */ | ||
return { | ||
termType: 'BlankNode', | ||
value: component.value === id ? '_:a' : '_:z' | ||
}; | ||
} | ||
@@ -458,3 +412,3 @@ | ||
getRelatedPredicate(quad) { | ||
return '<' + quad.predicate.value + '>'; | ||
return `<${quad.predicate.value}>`; | ||
} | ||
@@ -464,42 +418,28 @@ | ||
createHashToRelated(id, issuer) { | ||
const self = this; | ||
// 1) Create a hash to related blank nodes map for storing hashes that | ||
// identify related blank nodes. | ||
const hashToRelated = {}; | ||
const hashToRelated = new Map(); | ||
// 2) Get a reference, quads, to the list of quads in the blank node to | ||
// quads map for the key identifier. | ||
const quads = self.blankNodeInfo[id].quads; | ||
const quads = this.blankNodeInfo.get(id).quads; | ||
// 3) For each quad in quads: | ||
for(let i = 0; i < quads.length; ++i) { | ||
for(const quad of quads) { | ||
// 3.1) For each component in quad, if component is the subject, object, | ||
// and graph name and it is a blank node that is not identified by | ||
// or graph name and it is a blank node that is not identified by | ||
// identifier: | ||
const quad = quads[i]; | ||
for(const key in quad) { | ||
const component = quad[key]; | ||
if(key === 'predicate' || | ||
!(component.termType === 'BlankNode' && component.value !== id)) { | ||
continue; | ||
} | ||
// 3.1.1) Set hash to the result of the Hash Related Blank Node | ||
// algorithm, passing the blank node identifier for component as | ||
// related, quad, path identifier issuer as issuer, and position as | ||
// either s, o, or g based on whether component is a subject, object, | ||
// graph name, respectively. | ||
const related = component.value; | ||
const position = POSITIONS[key]; | ||
const hash = self.hashRelatedBlankNode(related, quad, issuer, position); | ||
// 3.1.2) Add a mapping of hash to the blank node identifier for | ||
// component to hash to related blank nodes map, adding an entry as | ||
// necessary. | ||
if(hash in hashToRelated) { | ||
hashToRelated[hash].push(related); | ||
} else { | ||
hashToRelated[hash] = [related]; | ||
} | ||
} | ||
// steps 3.1.1 and 3.1.2 occur in helpers: | ||
this._addRelatedBlankNodeHash({ | ||
quad, component: quad.subject, position: 's', | ||
id, issuer, hashToRelated | ||
}); | ||
this._addRelatedBlankNodeHash({ | ||
quad, component: quad.object, position: 'o', | ||
id, issuer, hashToRelated | ||
}); | ||
this._addRelatedBlankNodeHash({ | ||
quad, component: quad.graph, position: 'g', | ||
id, issuer, hashToRelated | ||
}); | ||
} | ||
@@ -510,12 +450,68 @@ | ||
// helper that iterates over quad components (skips predicate) | ||
forEachComponent(quad, op) { | ||
for(const key in quad) { | ||
// skip `predicate` | ||
if(key === 'predicate') { | ||
continue; | ||
} | ||
op(quad[key], key, quad); | ||
_hashAndTrackBlankNode({id, hashToBlankNodes}) { | ||
// 5.3.1) Create a hash, hash, according to the Hash First Degree | ||
// Quads algorithm. | ||
const hash = this.hashFirstDegreeQuads(id); | ||
// 5.3.2) Add hash and identifier to hash to blank nodes map, | ||
// creating a new entry if necessary. | ||
const idList = hashToBlankNodes.get(hash); | ||
if(!idList) { | ||
hashToBlankNodes.set(hash, [id]); | ||
} else { | ||
idList.push(id); | ||
} | ||
} | ||
_addBlankNodeQuadInfo({quad, component}) { | ||
if(component.termType !== 'BlankNode') { | ||
return; | ||
} | ||
const id = component.value; | ||
const info = this.blankNodeInfo.get(id); | ||
if(info) { | ||
info.quads.add(quad); | ||
} else { | ||
this.blankNodeInfo.set(id, {quads: new Set([quad]), hash: null}); | ||
} | ||
} | ||
_addRelatedBlankNodeHash( | ||
{quad, component, position, id, issuer, hashToRelated}) { | ||
if(!(component.termType === 'BlankNode' && component.value !== id)) { | ||
return; | ||
} | ||
// 3.1.1) Set hash to the result of the Hash Related Blank Node | ||
// algorithm, passing the blank node identifier for component as | ||
// related, quad, path identifier issuer as issuer, and position as | ||
// either s, o, or g based on whether component is a subject, object, | ||
// graph name, respectively. | ||
const related = component.value; | ||
const hash = this.hashRelatedBlankNode(related, quad, issuer, position); | ||
// 3.1.2) Add a mapping of hash to the blank node identifier for | ||
// component to hash to related blank nodes map, adding an entry as | ||
// necessary. | ||
const entries = hashToRelated.get(hash); | ||
if(entries) { | ||
entries.push(related); | ||
} else { | ||
hashToRelated.set(hash, [related]); | ||
} | ||
} | ||
_useCanonicalId({component}) { | ||
if(component.termType === 'BlankNode' && | ||
!component.value.startsWith(this.canonicalIssuer.prefix)) { | ||
return { | ||
termType: 'BlankNode', | ||
value: this.canonicalIssuer.getId(component.value) | ||
}; | ||
} | ||
return component; | ||
} | ||
}; | ||
function _stringHashCompare(a, b) { | ||
return a.hash < b.hash ? -1 : a.hash > b.hash ? 1 : 0; | ||
} |
/* | ||
* Copyright (c) 2016-2017 Digital Bazaar, Inc. All rights reserved. | ||
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved. | ||
*/ | ||
@@ -7,7 +7,6 @@ 'use strict'; | ||
const URDNA2015 = require('./URDNA2015'); | ||
const util = require('./util'); | ||
module.exports = class URDNA2012 extends URDNA2015 { | ||
constructor(options) { | ||
super(options); | ||
constructor() { | ||
super(); | ||
this.name = 'URGNA2012'; | ||
@@ -22,9 +21,12 @@ this.hashAlgorithm = 'sha1'; | ||
} | ||
component = util.clone(component); | ||
if(key === 'name') { | ||
component.value = '_:g'; | ||
} else { | ||
component.value = (component.value === id ? '_:a' : '_:z'); | ||
if(key === 'graph') { | ||
return { | ||
termType: 'BlankNode', | ||
value: '_:g' | ||
}; | ||
} | ||
return component; | ||
return { | ||
termType: 'BlankNode', | ||
value: (component.value === id ? '_:a' : '_:z') | ||
}; | ||
} | ||
@@ -38,15 +40,14 @@ | ||
// helper for creating hash to related blank nodes map | ||
createHashToRelated(id, issuer, callback) { | ||
const self = this; | ||
async createHashToRelated(id, issuer) { | ||
// 1) Create a hash to related blank nodes map for storing hashes that | ||
// identify related blank nodes. | ||
const hashToRelated = {}; | ||
const hashToRelated = new Map(); | ||
// 2) Get a reference, quads, to the list of quads in the blank node to | ||
// quads map for the key identifier. | ||
const quads = self.blankNodeInfo[id].quads; | ||
const quads = this.blankNodeInfo.get(id).quads; | ||
// 3) For each quad in quads: | ||
self.forEach(quads, (quad, idx, callback) => { | ||
let i = 0; | ||
for(const quad of quads) { | ||
// 3.1) If the quad's subject is a blank node that does not match | ||
@@ -71,21 +72,23 @@ // identifier, set hash to the result of the Hash Related Blank Node | ||
// 3.3) Otherwise, continue to the next quad. | ||
return callback(); | ||
continue; | ||
} | ||
// Note: batch hashing related blank nodes 100 at a time | ||
if(++i % 100 === 0) { | ||
await this._yield(); | ||
} | ||
// 3.4) Add a mapping of hash to the blank node identifier for the | ||
// component that matched (subject or object) to hash to related blank | ||
// nodes map, adding an entry as necessary. | ||
self.hashRelatedBlankNode( | ||
related, quad, issuer, position, (err, hash) => { | ||
if(err) { | ||
return callback(err); | ||
} | ||
if(hash in hashToRelated) { | ||
hashToRelated[hash].push(related); | ||
} else { | ||
hashToRelated[hash] = [related]; | ||
} | ||
callback(); | ||
}); | ||
}, err => callback(err, hashToRelated)); | ||
const hash = await this.hashRelatedBlankNode( | ||
related, quad, issuer, position); | ||
const entries = hashToRelated.get(hash); | ||
if(entries) { | ||
entries.push(related); | ||
} else { | ||
hashToRelated.set(hash, [related]); | ||
} | ||
} | ||
return hashToRelated; | ||
} | ||
}; |
/* | ||
* Copyright (c) 2016 Digital Bazaar, Inc. All rights reserved. | ||
* Copyright (c) 2016-2020 Digital Bazaar, Inc. All rights reserved. | ||
*/ | ||
@@ -7,3 +7,2 @@ 'use strict'; | ||
const URDNA2015Sync = require('./URDNA2015Sync'); | ||
const util = require('./util'); | ||
@@ -22,9 +21,12 @@ module.exports = class URDNA2012Sync extends URDNA2015Sync { | ||
} | ||
component = util.clone(component); | ||
if(key === 'name') { | ||
component.value = '_:g'; | ||
} else { | ||
component.value = (component.value === id ? '_:a' : '_:z'); | ||
if(key === 'graph') { | ||
return { | ||
termType: 'BlankNode', | ||
value: '_:g' | ||
}; | ||
} | ||
return component; | ||
return { | ||
termType: 'BlankNode', | ||
value: (component.value === id ? '_:a' : '_:z') | ||
}; | ||
} | ||
@@ -39,14 +41,12 @@ | ||
createHashToRelated(id, issuer) { | ||
const self = this; | ||
// 1) Create a hash to related blank nodes map for storing hashes that | ||
// identify related blank nodes. | ||
const hashToRelated = {}; | ||
const hashToRelated = new Map(); | ||
// 2) Get a reference, quads, to the list of quads in the blank node to | ||
// quads map for the key identifier. | ||
const quads = self.blankNodeInfo[id].quads; | ||
const quads = this.blankNodeInfo.get(id).quads; | ||
// 3) For each quad in quads: | ||
for(let i = 0; i < quads.length; ++i) { | ||
for(const quad of quads) { | ||
// 3.1) If the quad's subject is a blank node that does not match | ||
@@ -56,3 +56,2 @@ // identifier, set hash to the result of the Hash Related Blank Node | ||
// quad, path identifier issuer as issuer, and p as position. | ||
const quad = quads[i]; | ||
let position; | ||
@@ -78,7 +77,8 @@ let related; | ||
// nodes map, adding an entry as necessary. | ||
const hash = self.hashRelatedBlankNode(related, quad, issuer, position); | ||
if(hash in hashToRelated) { | ||
hashToRelated[hash].push(related); | ||
const hash = this.hashRelatedBlankNode(related, quad, issuer, position); | ||
const entries = hashToRelated.get(hash); | ||
if(entries) { | ||
entries.push(related); | ||
} else { | ||
hashToRelated[hash] = [related]; | ||
hashToRelated.set(hash, [related]); | ||
} | ||
@@ -85,0 +85,0 @@ } |
{ | ||
"name": "rdf-canonize", | ||
"version": "1.2.0", | ||
"version": "2.0.0", | ||
"description": "An implementation of the RDF Dataset Normalization Algorithm in JavaScript", | ||
@@ -32,3 +32,2 @@ "homepage": "https://github.com/digitalbazaar/rdf-canonize", | ||
"dependencies": { | ||
"node-forge": "^0.10.0", | ||
"semver": "^6.3.0" | ||
@@ -48,5 +47,7 @@ }, | ||
"core-js": "^2.6.3", | ||
"delay": "^4.4.0", | ||
"eslint": "^7.10.0", | ||
"eslint-config-digitalbazaar": "^2.6.1", | ||
"mocha": "^5.2.0", | ||
"nsolid": "0.0.0", | ||
"nyc": "^13.1.0", | ||
@@ -53,0 +54,0 @@ "webpack": "^4.29.0", |
@@ -70,17 +70,10 @@ # rdf-canonize | ||
// canonize a data set with a particular algorithm with callback | ||
canonize.canonize(dataset, {algorithm: 'URDNA2015'}, function(err, canonical) { | ||
// ... | ||
}); | ||
// canonize a data set with a particular algorithm with async/await | ||
const canonical = await canonize.canonize(dataset, {algorithm: 'URDNA2015'}); | ||
// canonize a data set with a particular algorithm with callback | ||
// force use of the native implementation | ||
canonize.canonize(dataset, { | ||
algorithm: 'URDNA2015', | ||
useNative: true | ||
}, function(err, canonical) { | ||
// ... | ||
// canonize a data set with a particular algorithm and force use of the | ||
// native implementation | ||
const canonical = await canonize.canonize(dataset, { | ||
algorithm: 'URDNA2015', | ||
useNative: true | ||
}); | ||
@@ -87,0 +80,0 @@ ``` |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
New author
Supply chain riskA new npm collaborator published a version of the package for the first time. New collaborators are usually benign additions to a project, but do indicate a change to the security surface area of a package.
Found 1 instance in 1 package
1
0
502142
20
28
5466
139
- Removednode-forge@^0.10.0
- Removednode-forge@0.10.0(transitive)