Comparing version 0.7.0 to 0.8.0
@@ -20,5 +20,6 @@ // **N3Lexer** tokenizes N3 documents. | ||
return new N3Lexer(options); | ||
options = options || {}; | ||
// In line mode (N-Triples or N-Quads), only simple features may be parsed | ||
if (options && options.lineMode) { | ||
if (options.lineMode) { | ||
// Don't tokenize special literals | ||
@@ -38,2 +39,6 @@ this._tripleQuotedString = this._number = this._boolean = /$0^/; | ||
} | ||
// Enable N3 functionality by default | ||
this._n3Mode = options.n3 !== false; | ||
// Disable comment tokens by default | ||
this._comments = !!options.comments; | ||
} | ||
@@ -45,3 +50,3 @@ | ||
_iri: /^<((?:[^>\\]|\\[uU])+)>/, // IRI with escape sequences; needs sanity check after unescaping | ||
_iri: /^<((?:[^ <>{}\\]|\\[uU])+)>/, // IRI with escape sequences; needs sanity check after unescaping | ||
_unescapedIri: /^<([^\x00-\x20<>\\"\{\}\|\^\`]*)>/, // IRI without escape sequences; no unescaping | ||
@@ -53,10 +58,12 @@ _unescapedString: /^"[^"\\]+"(?=[^"\\])/, // non-empty string without escape sequences | ||
_prefix: /^((?:[A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])(?:\.?[\-0-9A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])*)?:(?=[#\s<])/, | ||
_prefixed: /^((?:[A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])(?:\.?[\-0-9A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])*)?:((?:(?:[0-:A-Z_a-z\xc0-\xd6\xd8-\xf6\xf8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff]|%[0-9a-fA-F]{2}|\\[!#-\/;=?\-@_~])(?:(?:[\.\-0-:A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff]|%[0-9a-fA-F]{2}|\\[!#-\/;=?\-@_~])*(?:[\-0-:A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff]|%[0-9a-fA-F]{2}|\\[!#-\/;=?\-@_~]))?)?)(?=\.?[,;\s#()\[\]\{\}"'<])/, | ||
_prefixed: /^((?:[A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])(?:\.?[\-0-9A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])*)?:((?:(?:[0-:A-Z_a-z\xc0-\xd6\xd8-\xf6\xf8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff]|%[0-9a-fA-F]{2}|\\[!#-\/;=?\-@_~])(?:(?:[\.\-0-:A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff]|%[0-9a-fA-F]{2}|\\[!#-\/;=?\-@_~])*(?:[\-0-:A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff]|%[0-9a-fA-F]{2}|\\[!#-\/;=?\-@_~]))?)?)(?=\.?[,;!\^\s#()\[\]\{\}"'<])/, | ||
_variable: /^\?(?:(?:[A-Z_a-z\xc0-\xd6\xd8-\xf6\xf8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])(?:[\-0-:A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])*)(?=[.,;!\^\s#()\[\]\{\}"'<])/, | ||
_blank: /^_:((?:[0-9A-Z_a-z\xc0-\xd6\xd8-\xf6\xf8-\u02ff\u0370-\u037d\u037f-\u1fff\u200c\u200d\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])(?:\.?[\-0-9A-Z_a-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u037f-\u1fff\u200c\u200d\u203f\u2040\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd]|[\ud800-\udb7f][\udc00-\udfff])*)(?=\.?[,;:\s#()\[\]\{\}"'<])/, | ||
_number: /^[\-+]?(?:\d+\.?\d*([eE](?:[\-\+])?\d+)|\d*\.?\d+)(?=[.,;:\s#()\[\]\{\}"'<])/, | ||
_boolean: /^(?:true|false)(?=[.,;:\s#()\[\]\{\}"'<])/, | ||
_keyword: /^@[a-z]+(?=[\s#<:])/, | ||
_boolean: /^(?:true|false)(?=[.,;\s#()\[\]\{\}"'<])/, | ||
_keyword: /^@[a-z]+(?=[\s#<:])/i, | ||
_sparqlKeyword: /^(?:PREFIX|BASE|GRAPH)(?=[\s#<:])/i, | ||
_shortPredicates: /^a(?=\s+|<)/, | ||
_newline: /^[ \t]*(?:#[^\n\r]*)?(?:\r\n|\n|\r)[ \t]*/, | ||
_comment: /#([^\n\r]*)/, | ||
_whitespace: /^[ \t]+/, | ||
@@ -70,8 +77,14 @@ _endOfFile: /^(?:#[^\n\r]*)?$/, | ||
// Continue parsing as far as possible; the loop will return eventually. | ||
var input = this._input; | ||
var input = this._input, outputComments = this._comments; | ||
while (true) { | ||
// Count and skip whitespace lines. | ||
var whiteSpaceMatch; | ||
while (whiteSpaceMatch = this._newline.exec(input)) | ||
input = input.substr(whiteSpaceMatch[0].length, input.length), this._line++; | ||
var whiteSpaceMatch, comment; | ||
while (whiteSpaceMatch = this._newline.exec(input)) { | ||
// Try to find a comment | ||
if (outputComments && (comment = this._comment.exec(whiteSpaceMatch[0]))) | ||
callback(null, { line: this._line, type: 'comment', value: comment[1], prefix: '' }); | ||
// Advance the input | ||
input = input.substr(whiteSpaceMatch[0].length, input.length); | ||
this._line++; | ||
} | ||
// Skip whitespace on current line. | ||
@@ -84,4 +97,8 @@ if (whiteSpaceMatch = this._whitespace.exec(input)) | ||
// If the input is finished, emit EOF. | ||
if (inputFinished) | ||
if (inputFinished) { | ||
// Try to find a final comment | ||
if (outputComments && (comment = this._comment.exec(input))) | ||
callback(null, { line: this._line, type: 'comment', value: comment[1], prefix: '' }); | ||
callback(input = null, { line: this._line, type: 'eof', value: '', prefix: '' }); | ||
} | ||
return this._input = input; | ||
@@ -95,14 +112,24 @@ } | ||
case '^': | ||
// We need at least 3 tokens lookahead to distinguish ^^<IRI> and ^^pre:fixed | ||
if (input.length < 3) | ||
break; | ||
// Try to match a type. | ||
if (input.length === 1) break; | ||
else if (input[1] !== '^') return reportSyntaxError(this); | ||
this._prevTokenType = '^'; | ||
// Move to type IRI or prefixed name. | ||
input = input.substr(2); | ||
if (input[0] !== '<') { | ||
inconclusive = true; | ||
else if (input[1] === '^') { | ||
this._prevTokenType = '^^'; | ||
// Move to type IRI or prefixed name. | ||
input = input.substr(2); | ||
if (input[0] !== '<') { | ||
inconclusive = true; | ||
break; | ||
} | ||
} | ||
// If no type, it must be a path expression. | ||
else { | ||
if (this._n3Mode) { | ||
matchLength = 1; | ||
type = '^'; | ||
} | ||
break; | ||
} | ||
// Fall through in case the type is an IRI. | ||
case '<': | ||
@@ -119,2 +146,5 @@ // Try to find a full IRI without escape sequences. | ||
} | ||
// Try to find a backwards implication arrow. | ||
else if (this._n3Mode && input.length > 1 && input[1] === '=') | ||
type = 'inverse', matchLength = 2, value = 'http://www.w3.org/2000/10/swap/log#implies'; | ||
break; | ||
@@ -128,3 +158,3 @@ | ||
inputFinished && (match = this._blank.exec(input + ' '))) | ||
type = 'prefixed', prefix = '_', value = match[1]; | ||
type = 'blank', prefix = '_', value = match[1]; | ||
break; | ||
@@ -156,2 +186,8 @@ | ||
case '?': | ||
// Try to find a variable | ||
if (this._n3Mode && (match = this._variable.exec(input))) | ||
type = 'var', value = match[0]; | ||
break; | ||
case '@': | ||
@@ -225,2 +261,16 @@ // Try to find a language code. | ||
case '=': | ||
// Try to find an implication arrow or equals sign. | ||
if (this._n3Mode && input.length > 1) { | ||
type = 'abbreviation'; | ||
if (input[1] !== '>') | ||
matchLength = 1, value = 'http://www.w3.org/2002/07/owl#sameAs'; | ||
else | ||
matchLength = 2, value = 'http://www.w3.org/2000/10/swap/log#implies'; | ||
} | ||
break; | ||
case '!': | ||
if (!this._n3Mode) | ||
break; | ||
case ',': | ||
@@ -258,4 +308,9 @@ case ';': | ||
// A type token is special: it can only be emitted after an IRI or prefixed name is read. | ||
if (this._prevTokenType === '^') | ||
type = (type === 'IRI' || type === 'prefixed') ? 'type' : ''; | ||
if (this._prevTokenType === '^^') { | ||
switch (type) { | ||
case 'prefixed': type = 'type'; break; | ||
case 'IRI': type = 'typeIRI'; break; | ||
default: type = ''; | ||
} | ||
} | ||
@@ -262,0 +317,0 @@ // What if nothing of the above was found? |
@@ -20,3 +20,3 @@ // **N3Parser** parses N3 documents. | ||
return new N3Parser(options); | ||
this._tripleStack = []; | ||
this._contextStack = []; | ||
this._graph = null; | ||
@@ -29,9 +29,11 @@ | ||
// Set supported features depending on the format. | ||
var format = (typeof options.format === 'string') && options.format.match(/\w*$/)[0].toLowerCase(), | ||
var format = (typeof options.format === 'string') ? | ||
options.format.match(/\w*$/)[0].toLowerCase() : '', | ||
isTurtle = format === 'turtle', isTriG = format === 'trig', | ||
isNTriples = /triple/.test(format), isNQuads = /quad/.test(format), | ||
isN3 = this._n3Mode = /n3/.test(format), | ||
isLineMode = isNTriples || isNQuads; | ||
if (!(this._supportsNamedGraphs = !isTurtle)) | ||
if (!(this._supportsNamedGraphs = !(isTurtle || isN3))) | ||
this._readPredicateOrNamedGraph = this._readPredicate; | ||
this._supportsQuads = !(isTurtle || isTriG || isNTriples); | ||
this._supportsQuads = !(isTurtle || isTriG || isNTriples || isN3); | ||
// Disable relative IRIs in N-Triples or N-Quads mode | ||
@@ -47,3 +49,3 @@ if (isLineMode) { | ||
'_:' + options.blankNodePrefix.replace(/^_:/, ''); | ||
this._lexer = options.lexer || new N3Lexer({ lineMode: isLineMode }); | ||
this._lexer = options.lexer || new N3Lexer({ lineMode: isLineMode, n3: isN3 }); | ||
} | ||
@@ -79,2 +81,41 @@ | ||
// ### `_saveContext` stores the current parsing context | ||
// when entering a new scope (list, blank node, formula) | ||
_saveContext: function (type, graph, subject, predicate, object) { | ||
var n3Mode = this._n3Mode; | ||
this._contextStack.push({ | ||
subject: subject, predicate: predicate, object: object, | ||
graph: graph, type: type, | ||
inverse: n3Mode ? this._inversePredicate : false, | ||
blankPrefix: n3Mode ? this._prefixes._ : '', | ||
quantified: n3Mode ? this._quantified : null, | ||
}); | ||
// The settings below only apply to N3 streams | ||
if (n3Mode) { | ||
// Every new scope resets the predicate direction | ||
this._inversePredicate = false; | ||
// In N3, blank nodes are scoped to a formula | ||
// (using a dot as separator, as a blank node label cannot start with it) | ||
this._prefixes._ = this._graph + '.'; | ||
// Quantifiers are scoped to a formula | ||
this._quantified = Object.create(this._quantified); | ||
} | ||
}, | ||
// ### `_restoreContext` restores the parent context | ||
// when leaving a scope (list, blank node, formula) | ||
_restoreContext: function () { | ||
var context = this._contextStack.pop(), n3Mode = this._n3Mode; | ||
this._subject = context.subject; | ||
this._predicate = context.predicate; | ||
this._object = context.object; | ||
this._graph = context.graph; | ||
// The settings below only apply to N3 streams | ||
if (n3Mode) { | ||
this._inversePredicate = context.inverse; | ||
this._prefixes._ = context.blankPrefix; | ||
this._quantified = context.quantified; | ||
} | ||
}, | ||
// ### `_readInTopContext` reads a token when in the top context. | ||
@@ -90,14 +131,10 @@ _readInTopContext: function (token) { | ||
// It could be a prefix declaration. | ||
case '@prefix': | ||
this._sparqlStyle = false; | ||
return this._readPrefix; | ||
case 'PREFIX': | ||
this._sparqlStyle = true; | ||
case '@prefix': | ||
return this._readPrefix; | ||
// It could be a base declaration. | ||
case '@base': | ||
this._sparqlStyle = false; | ||
return this._readBaseIRI; | ||
case 'BASE': | ||
this._sparqlStyle = true; | ||
case '@base': | ||
return this._readBaseIRI; | ||
@@ -120,12 +157,15 @@ // It could be a graph. | ||
// ### `_readSubject` reads a triple's subject. | ||
_readSubject: function (token) { | ||
this._predicate = null; | ||
// ### `_readEntity` reads an IRI, prefixed name, blank node, or variable. | ||
_readEntity: function (token, quantifier) { | ||
var value; | ||
switch (token.type) { | ||
// Read a relative or absolute IRI | ||
case 'IRI': | ||
if (this._base === null || absoluteIRI.test(token.value)) | ||
this._subject = token.value; | ||
else | ||
this._subject = this._resolveIRI(token); | ||
case 'typeIRI': | ||
value = (this._base === null || absoluteIRI.test(token.value)) ? | ||
token.value : this._resolveIRI(token); | ||
break; | ||
// Read a blank node or prefixed name | ||
case 'type': | ||
case 'blank': | ||
case 'prefixed': | ||
@@ -135,19 +175,56 @@ var prefix = this._prefixes[token.prefix]; | ||
return this._error('Undefined prefix "' + token.prefix + ':"', token); | ||
this._subject = prefix + token.value; | ||
value = prefix + token.value; | ||
break; | ||
// Read a variable | ||
case 'var': | ||
return token.value; | ||
// Everything else is not an entity | ||
default: | ||
return this._error('Expected entity but got ' + token.type, token); | ||
} | ||
// In N3 mode, replace the entity if it is quantified | ||
if (!quantifier && this._n3Mode && (value in this._quantified)) | ||
value = this._quantified[value]; | ||
return value; | ||
}, | ||
// ### `_readSubject` reads a triple's subject. | ||
_readSubject: function (token) { | ||
this._predicate = null; | ||
switch (token.type) { | ||
case '[': | ||
// Start a new triple with a new blank node as subject. | ||
this._subject = '_:b' + blankNodeCount++; | ||
this._tripleStack.push({ subject: this._subject, predicate: null, object: null, type: 'blank' }); | ||
this._saveContext('blank', this._graph, | ||
this._subject = '_:b' + blankNodeCount++, null, null); | ||
return this._readBlankNodeHead; | ||
case '(': | ||
// Start a new list | ||
this._tripleStack.push({ subject: RDF_NIL, predicate: null, object: null, type: 'list' }); | ||
this._saveContext('list', this._graph, RDF_NIL, null, null); | ||
this._subject = null; | ||
return this._readListItem; | ||
case '{': | ||
// Start a new formula | ||
if (!this._n3Mode) | ||
return this._error('Unexpected graph', token); | ||
this._saveContext('formula', this._graph, | ||
this._graph = '_:b' + blankNodeCount++, null, null); | ||
return this._readSubject; | ||
case '}': | ||
// No subject; the graph in which we are reading is closed instead. | ||
return this._readPunctuation(token); | ||
case '@forSome': | ||
this._quantifiedPrefix = '_:b'; | ||
return this._readQuantifierList; | ||
case '@forAll': | ||
this._quantifiedPrefix = '?b-'; | ||
return this._readQuantifierList; | ||
default: | ||
return this._error('Expected subject but got ' + token.type, token); | ||
// Read the subject entity | ||
if (!(this._subject = this._readEntity(token))) | ||
return; | ||
// In N3 mode, the subject might be a path | ||
if (this._n3Mode) | ||
return this._getPathReader(this._readPredicateOrNamedGraph); | ||
} | ||
// The next token must be a predicate, | ||
@@ -162,17 +239,7 @@ // or, if the subject was actually a graph IRI, a named graph. | ||
switch (type) { | ||
case 'IRI': | ||
case 'inverse': | ||
this._inversePredicate = true; | ||
case 'abbreviation': | ||
if (this._base === null || absoluteIRI.test(token.value)) | ||
this._predicate = token.value; | ||
else | ||
this._predicate = this._resolveIRI(token); | ||
this._predicate = token.value; | ||
break; | ||
case 'prefixed': | ||
if (token.prefix === '_') | ||
return this._error('Disallowed blank node as predicate', token); | ||
var prefix = this._prefixes[token.prefix]; | ||
if (prefix === undefined) | ||
return this._error('Undefined prefix "' + token.prefix + ':"', token); | ||
this._predicate = prefix + token.value; | ||
break; | ||
case '.': | ||
@@ -189,4 +256,8 @@ case ']': | ||
return this._readPredicate; | ||
case 'blank': | ||
if (!this._n3Mode) | ||
return this._error('Disallowed blank node as predicate', token); | ||
default: | ||
return this._error('Expected predicate to follow "' + this._subject + '"', token); | ||
if (!(this._predicate = this._readEntity(token))) | ||
return; | ||
} | ||
@@ -200,14 +271,2 @@ // The next token must be an object. | ||
switch (token.type) { | ||
case 'IRI': | ||
if (this._base === null || absoluteIRI.test(token.value)) | ||
this._object = token.value; | ||
else | ||
this._object = this._resolveIRI(token); | ||
break; | ||
case 'prefixed': | ||
var prefix = this._prefixes[token.prefix]; | ||
if (prefix === undefined) | ||
return this._error('Undefined prefix "' + token.prefix + ':"', token); | ||
this._object = prefix + token.value; | ||
break; | ||
case 'literal': | ||
@@ -218,13 +277,24 @@ this._object = token.value; | ||
// Start a new triple with a new blank node as subject. | ||
var blank = '_:b' + blankNodeCount++; | ||
this._tripleStack.push({ subject: this._subject, predicate: this._predicate, object: blank, type: 'blank' }); | ||
this._subject = blank; | ||
this._saveContext('blank', this._graph, this._subject, this._predicate, | ||
this._subject = '_:b' + blankNodeCount++); | ||
return this._readBlankNodeHead; | ||
case '(': | ||
// Start a new list | ||
this._tripleStack.push({ subject: this._subject, predicate: this._predicate, object: RDF_NIL, type: 'list' }); | ||
this._saveContext('list', this._graph, this._subject, this._predicate, RDF_NIL); | ||
this._subject = null; | ||
return this._readListItem; | ||
case '{': | ||
// Start a new formula | ||
if (!this._n3Mode) | ||
return this._error('Unexpected graph', token); | ||
this._saveContext('formula', this._graph, this._subject, this._predicate, | ||
this._graph = '_:b' + blankNodeCount++); | ||
return this._readSubject; | ||
default: | ||
return this._error('Expected object to follow "' + this._predicate + '"', token); | ||
// Read the object entity | ||
if (!(this._object = this._readEntity(token))) | ||
return; | ||
// In N3 mode, the object might be a path | ||
if (this._n3Mode) | ||
return this._getPathReader(this._getTripleEndReader()); | ||
} | ||
@@ -267,20 +337,14 @@ return this._getTripleEndReader(); | ||
if (this._subject !== null) | ||
this._callback(null, { subject: this._subject, | ||
predicate: this._predicate, | ||
object: this._object, | ||
graph: this._graph || '' }); | ||
this._triple(this._subject, this._predicate, this._object, this._graph); | ||
// Restore parent triple that contains the blank node. | ||
var triple = this._tripleStack.pop(); | ||
this._subject = triple.subject; | ||
// Was the blank node the object? | ||
if (triple.object !== null) { | ||
// Restore predicate and object as well, and continue by reading punctuation. | ||
this._predicate = triple.predicate; | ||
this._object = triple.object; | ||
// Restore the parent context containing this blank node. | ||
var empty = this._predicate === null; | ||
this._restoreContext(); | ||
// If the blank node was the subject, continue reading the predicate. | ||
if (this._object === null) | ||
// If the blank node was empty, it could be a named graph label. | ||
return empty ? this._readPredicateOrNamedGraph : this._readPredicate; | ||
// If the blank node was the object, restore previous context and read punctuation. | ||
else | ||
return this._getTripleEndReader(); | ||
} | ||
// The blank node was the subject, so continue reading the predicate. | ||
// If the blank node didn't contain any predicates, it could also be the label of a named graph. | ||
return this._predicate !== null ? this._readPredicate : this._readPredicateOrNamedGraph; | ||
}, | ||
@@ -290,25 +354,23 @@ | ||
_readDataTypeOrLang: function (token) { | ||
// Determine the suffix of the literal | ||
var suffix; | ||
switch (token.type) { | ||
// Add a "^^type" suffix for types (IRIs and blank nodes) | ||
case 'type': | ||
var value; | ||
if (token.prefix === '') { | ||
if (this._base === null || absoluteIRI.test(token.value)) | ||
value = token.value; | ||
else | ||
value = this._resolveIRI(token); | ||
} | ||
else { | ||
var prefix = this._prefixes[token.prefix]; | ||
if (prefix === undefined) | ||
return this._error('Undefined prefix "' + token.prefix + ':"', token); | ||
value = prefix + token.value; | ||
} | ||
this._object += '^^' + value; | ||
return this._getTripleEndReader(); | ||
case 'typeIRI': | ||
suffix = '^^' + this._readEntity(token); | ||
break; | ||
// Add a "@lang" suffix for languages | ||
case 'langcode': | ||
this._object += '@' + token.value.toLowerCase(); | ||
return this._getTripleEndReader(); | ||
suffix = '@' + token.value.toLowerCase(); | ||
break; | ||
// If no datatype or language present, read the end of the triple | ||
default: | ||
return this._getTripleEndReader().call(this, token); | ||
this._readCallback = this._getTripleEndReader(); | ||
return this._readCallback(token); | ||
} | ||
// Add the suffix and read the end of the triple | ||
this._object += suffix; | ||
return this._getTripleEndReader(); | ||
}, | ||
@@ -318,32 +380,14 @@ | ||
_readListItem: function (token) { | ||
var item = null, // The actual list item. | ||
itemHead = null, // The head of the rdf:first predicate. | ||
prevItemHead = this._subject, // The head of the previous rdf:first predicate. | ||
stack = this._tripleStack, // The stack of triples part of recursion (lists, blanks, etc.). | ||
parentTriple = stack[stack.length - 1], // The triple containing the current list. | ||
next = this._readListItem; // The next function to execute. | ||
var item = null, // The item of the list. | ||
list = null, // The list itself. | ||
prevList = this._subject, // The previous list that contains this list. | ||
stack = this._contextStack, // The stack of parent contexts. | ||
parent = stack[stack.length - 1], // The parent containing the current list. | ||
next = this._readListItem; // The next function to execute. | ||
switch (token.type) { | ||
case 'IRI': | ||
if (this._base === null || absoluteIRI.test(token.value)) | ||
item = token.value; | ||
else | ||
item = this._resolveIRI(token); | ||
break; | ||
case 'prefixed': | ||
var prefix = this._prefixes[token.prefix]; | ||
if (prefix === undefined) | ||
return this._error('Undefined prefix "' + token.prefix + ':"', token); | ||
item = prefix + token.value; | ||
break; | ||
case 'literal': | ||
item = token.value; | ||
next = this._readDataTypeOrLang; | ||
break; | ||
case '[': | ||
// Stack the current list triple and start a new triple with a blank node as subject. | ||
itemHead = '_:b' + blankNodeCount++; | ||
item = '_:b' + blankNodeCount++; | ||
stack.push({ subject: itemHead, predicate: RDF_FIRST, object: item, type: 'blank' }); | ||
this._subject = item; | ||
this._saveContext('blank', this._graph, list = '_:b' + blankNodeCount++, | ||
RDF_FIRST, this._subject = item = '_:b' + blankNodeCount++); | ||
next = this._readBlankNodeHead; | ||
@@ -353,75 +397,92 @@ break; | ||
// Stack the current list triple and start a new list | ||
itemHead = '_:b' + blankNodeCount++; | ||
stack.push({ subject: itemHead, predicate: RDF_FIRST, object: RDF_NIL, type: 'list' }); | ||
this._saveContext('list', this._graph, list = '_:b' + blankNodeCount++, | ||
RDF_FIRST, RDF_NIL); | ||
this._subject = null; | ||
next = this._readListItem; | ||
break; | ||
case ')': | ||
// Restore the parent triple. | ||
stack.pop(); | ||
// Closing the list; restore the parent context. | ||
this._restoreContext(); | ||
// If this list is contained within a parent list, return the membership triple here. | ||
// This will be `<parent list element> rdf:first <this list>.`. | ||
if (stack.length !== 0 && stack[stack.length - 1].type === 'list') | ||
this._callback(null, { subject: parentTriple.subject, | ||
predicate: parentTriple.predicate, | ||
object: parentTriple.object, | ||
graph: this._graph || '' }); | ||
// Restore the parent triple's subject. | ||
this._subject = parentTriple.subject; | ||
// Was this list in the parent triple's subject? | ||
if (parentTriple.predicate === null) { | ||
this._triple(this._subject, this._predicate, this._object, this._graph); | ||
// Was this list the parent's subject? | ||
if (this._predicate === null) { | ||
// The next token is the predicate. | ||
next = this._readPredicate; | ||
// Skip writing the list tail if this was an empty list. | ||
if (parentTriple.subject === RDF_NIL) | ||
// No list tail if this was an empty list. | ||
if (this._subject === RDF_NIL) | ||
return next; | ||
} | ||
// The list was in the parent triple's object. | ||
// The list was in the parent context's object. | ||
else { | ||
// Restore the parent triple's predicate and object as well. | ||
this._predicate = parentTriple.predicate; | ||
this._object = parentTriple.object; | ||
next = this._getTripleEndReader(); | ||
// Skip writing the list tail if this was an empty list. | ||
if (parentTriple.object === RDF_NIL) | ||
// No list tail if this was an empty list. | ||
if (this._object === RDF_NIL) | ||
return next; | ||
} | ||
// Close the list by making the item head nil. | ||
itemHead = RDF_NIL; | ||
// Close the list by making the head nil. | ||
list = RDF_NIL; | ||
break; | ||
case 'literal': | ||
item = token.value; | ||
next = this._readDataTypeOrLang; | ||
break; | ||
default: | ||
return this._error('Expected list item instead of "' + token.type + '"', token); | ||
if (!(item = this._readEntity(token))) | ||
return; | ||
} | ||
// Create a new blank node if no item head was assigned yet. | ||
if (itemHead === null) | ||
this._subject = itemHead = '_:b' + blankNodeCount++; | ||
if (list === null) | ||
this._subject = list = '_:b' + blankNodeCount++; | ||
// Is this the first element of the list? | ||
if (prevItemHead === null) { | ||
// This list is either the object or the subject. | ||
if (parentTriple.object === RDF_NIL) | ||
parentTriple.object = itemHead; | ||
if (prevList === null) { | ||
// This list is either the subject or the object of its parent. | ||
if (parent.predicate === null) | ||
parent.subject = list; | ||
else | ||
parentTriple.subject = itemHead; | ||
parent.object = list; | ||
} | ||
else { | ||
// The rest of the list is in the current head. | ||
this._callback(null, { subject: prevItemHead, | ||
predicate: RDF_REST, | ||
object: itemHead, | ||
graph: this._graph || '' }); | ||
// Continue the previous list with the current list. | ||
this._triple(prevList, RDF_REST, list, this._graph); | ||
} | ||
// Add the item's value. | ||
if (item !== null) | ||
this._callback(null, { subject: itemHead, | ||
predicate: RDF_FIRST, | ||
object: item, | ||
graph: this._graph || '' }); | ||
if (item !== null) { | ||
// In N3 mode, the item might be a path | ||
if (this._n3Mode && (token.type === 'IRI' || token.type === 'prefixed')) { | ||
// Create a new context to add the item's path | ||
this._saveContext('item', this._graph, list, RDF_FIRST, item); | ||
this._subject = item, this._predicate = null; | ||
// _readPath will restore the context and output the item | ||
return this._getPathReader(this._readListItem); | ||
} | ||
// Output the item | ||
this._triple(list, RDF_FIRST, item, this._graph); | ||
} | ||
return next; | ||
}, | ||
// ### `_readFormulaTail` reads the end of a formula. | ||
_readFormulaTail: function (token) { | ||
if (token.type !== '}') | ||
return this._readPunctuation(token); | ||
// Store the last triple of the formula. | ||
if (this._subject !== null) | ||
this._triple(this._subject, this._predicate, this._object, this._graph); | ||
// Restore the parent context containing this formula. | ||
this._restoreContext(); | ||
// If the formula was the subject, continue reading the predicate. | ||
// If the formula was the object, read punctuation. | ||
return this._object === null ? this._readPredicate : this._getTripleEndReader(); | ||
}, | ||
// ### `_readPunctuation` reads punctuation between triples or triple parts. | ||
_readPunctuation: function (token) { | ||
var next, subject = this._subject, graph = this._graph; | ||
var next, subject = this._subject, graph = this._graph, | ||
inversePredicate = this._inversePredicate; | ||
switch (token.type) { | ||
@@ -432,2 +493,4 @@ // A closing brace ends a graph | ||
return this._error('Unexpected graph closing', token); | ||
if (this._n3Mode) | ||
return this._readFormulaTail(token); | ||
this._graph = null; | ||
@@ -437,3 +500,4 @@ // A dot just ends the statement, without sharing anything with the next. | ||
this._subject = null; | ||
next = this._readInTopContext; | ||
next = this._contextStack.length ? this._readSubject : this._readInTopContext; | ||
if (inversePredicate) this._inversePredicate = false; | ||
break; | ||
@@ -448,32 +512,18 @@ // Semicolon means the subject is shared; predicate and object are different. | ||
break; | ||
// An IRI means this is a quad (only allowed if not already inside a graph). | ||
case 'IRI': | ||
if (this._supportsQuads && this._graph === null) { | ||
if (this._base === null || absoluteIRI.test(token.value)) | ||
graph = token.value; | ||
else | ||
graph = this._resolveIRI(token); | ||
subject = this._subject; | ||
default: | ||
// An entity means this is a quad (only allowed if not already inside a graph). | ||
if (this._supportsQuads && this._graph === null && (graph = this._readEntity(token))) { | ||
next = this._readQuadPunctuation; | ||
break; | ||
} | ||
// An prefixed name means this is a quad (only allowed if not already inside a graph). | ||
case 'prefixed': | ||
if (this._supportsQuads && this._graph === null) { | ||
var prefix = this._prefixes[token.prefix]; | ||
if (prefix === undefined) | ||
return this._error('Undefined prefix "' + token.prefix + ':"', token); | ||
graph = prefix + token.value; | ||
next = this._readQuadPunctuation; | ||
break; | ||
} | ||
default: | ||
return this._error('Expected punctuation to follow "' + this._object + '"', token); | ||
} | ||
// A triple has been completed now, so return it. | ||
if (subject !== null) | ||
this._callback(null, { subject: subject, | ||
predicate: this._predicate, | ||
object: this._object, | ||
graph: graph || '' }); | ||
if (subject !== null) { | ||
var predicate = this._predicate, object = this._object; | ||
if (!inversePredicate) | ||
this._triple(subject, predicate, object, graph); | ||
else | ||
this._triple(object, predicate, subject, graph); | ||
} | ||
return next; | ||
@@ -498,6 +548,3 @@ }, | ||
// A triple has been completed now, so return it. | ||
this._callback(null, { subject: this._subject, | ||
predicate: this._predicate, | ||
object: this._object, | ||
graph: this._graph || '' }); | ||
this._triple(this._subject, this._predicate, this._object, this._graph); | ||
return next; | ||
@@ -525,7 +572,3 @@ }, | ||
return this._error('Expected IRI to follow prefix "' + this._prefix + ':"', token); | ||
var prefixIRI; | ||
if (this._base === null || absoluteIRI.test(token.value)) | ||
prefixIRI = token.value; | ||
else | ||
prefixIRI = this._resolveIRI(token); | ||
var prefixIRI = this._readEntity(token); | ||
this._prefixes[this._prefix] = prefixIRI; | ||
@@ -549,2 +592,3 @@ this._prefixCallback(this._prefix, prefixIRI); | ||
case 'IRI': | ||
case 'blank': | ||
case 'prefixed': | ||
@@ -570,4 +614,6 @@ return this._readSubject(token), this._readGraph; | ||
// SPARQL-style declarations don't have punctuation. | ||
if (this._sparqlStyle) | ||
if (this._sparqlStyle) { | ||
this._sparqlStyle = false; | ||
return this._readInTopContext(token); | ||
} | ||
@@ -579,9 +625,100 @@ if (token.type !== '.') | ||
// Reads a list of quantified symbols from a @forSome or @forAll statement. | ||
_readQuantifierList: function (token) { | ||
var entity; | ||
switch (token.type) { | ||
case 'IRI': | ||
case 'prefixed': | ||
if (entity = this._readEntity(token, true)) | ||
break; | ||
default: | ||
return this._error('Unexpected ' + token.type, token); | ||
} | ||
this._subject = null; | ||
this._quantified[entity] = this._quantifiedPrefix + blankNodeCount++; | ||
return this._readQuantifierPunctuation; | ||
}, | ||
// Reads punctuation from a @forSome or @forAll statement. | ||
_readQuantifierPunctuation: function (token) { | ||
// Read more quantifiers | ||
if (token.type === ',') | ||
return this._readQuantifierList; | ||
// Read a dot | ||
else { | ||
this._readCallback = this._getTripleEndReader(); | ||
return this._readCallback(token); | ||
} | ||
}, | ||
// ### `_getPathReader` reads a potential path and then resumes with the given function. | ||
_getPathReader: function (afterPath) { | ||
this._afterPath = afterPath; | ||
return this._readPath; | ||
}, | ||
// ### `_readPath` reads a potential path. | ||
_readPath: function (token) { | ||
switch (token.type) { | ||
// Forward path | ||
case '!': return this._readForwardPath; | ||
// Backward path | ||
case '^': return this._readBackwardPath; | ||
// Not a path; resume reading where we left off | ||
default: | ||
var stack = this._contextStack, parent = stack.length && stack[stack.length - 1]; | ||
// If we were reading a list item, we still need to output it | ||
if (parent && parent.type === 'item') { | ||
// The list item is the remaining subejct after reading the path | ||
var item = this._subject; | ||
// Switch back to the context of the list | ||
this._restoreContext(); | ||
// Output the list item | ||
this._triple(this._subject, RDF_FIRST, item, this._graph); | ||
} | ||
return this._afterPath(token); | ||
} | ||
}, | ||
// ### `_readForwardPath` reads a '!' path. | ||
_readForwardPath: function (token) { | ||
var subject, predicate, object = '_:b' + blankNodeCount++; | ||
// The next token is the predicate | ||
if (!(predicate = this._readEntity(token))) | ||
return; | ||
// If we were reading a subject, replace the subject by the path's object | ||
if (this._predicate === null) | ||
subject = this._subject, this._subject = object; | ||
// If we were reading an object, replace the subject by the path's object | ||
else | ||
subject = this._object, this._object = object; | ||
// Emit the path's current triple and read its next section | ||
this._triple(subject, predicate, object, this._graph); | ||
return this._readPath; | ||
}, | ||
// ### `_readBackwardPath` reads a '^' path. | ||
_readBackwardPath: function (token) { | ||
var subject = '_:b' + blankNodeCount++, predicate, object; | ||
// The next token is the predicate | ||
if (!(predicate = this._readEntity(token))) | ||
return; | ||
// If we were reading a subject, replace the subject by the path's subject | ||
if (this._predicate === null) | ||
object = this._subject, this._subject = subject; | ||
// If we were reading an object, replace the subject by the path's subject | ||
else | ||
object = this._object, this._object = subject; | ||
// Emit the path's current triple and read its next section | ||
this._triple(subject, predicate, object, this._graph); | ||
return this._readPath; | ||
}, | ||
// ### `_getTripleEndReader` gets the next reader function at the end of a triple. | ||
_getTripleEndReader: function () { | ||
var stack = this._tripleStack; | ||
if (stack.length === 0) | ||
var contextStack = this._contextStack; | ||
if (!contextStack.length) | ||
return this._readPunctuation; | ||
switch (stack[stack.length - 1].type) { | ||
switch (contextStack[contextStack.length - 1].type) { | ||
case 'blank': | ||
@@ -591,5 +728,13 @@ return this._readBlankNodeTail; | ||
return this._readListItem; | ||
case 'formula': | ||
return this._readFormulaTail; | ||
} | ||
}, | ||
// ### `_triple` emits a triple through the callback. | ||
_triple: function (subject, predicate, object, graph) { | ||
this._callback(null, { subject: subject, predicate: predicate, object: object, | ||
graph: graph || '' }); | ||
}, | ||
// ### `_error` emits an error message through the callback. | ||
@@ -691,4 +836,7 @@ _error: function (message, token) { | ||
this._readCallback = this._readInTopContext; | ||
this._sparqlStyle = false; | ||
this._prefixes = Object.create(null); | ||
this._prefixes._ = this._blankNodePrefix || '_:b' + blankNodePrefix++ + '_'; | ||
this._inversePredicate = false; | ||
this._quantified = Object.create(null); | ||
@@ -695,0 +843,0 @@ // Parse synchronously if no callbacks are given. |
# License | ||
The MIT License (MIT) | ||
Copyright ©2012–2013 Ruben Verborgh | ||
Copyright ©2012–2016 Ruben Verborgh | ||
@@ -5,0 +5,0 @@ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: |
{ | ||
"name": "n3", | ||
"version": "0.7.0", | ||
"version": "0.8.0", | ||
"description": "Lightning fast, asynchronous, streaming Turtle / N3 / RDF library.", | ||
@@ -5,0 +5,0 @@ "author": "Ruben Verborgh <ruben.verborgh@gmail.com>", |
@@ -9,8 +9,9 @@ # Lightning fast, asynchronous, streaming RDF for JavaScript | ||
[TriG](http://www.w3.org/TR/trig/), | ||
[N-Triples](http://www.w3.org/TR/n-triples/) | ||
and [N-Quads](http://www.w3.org/TR/n-quads/). | ||
- [**Writing**](#Writing) triples/quads to | ||
[N-Triples](http://www.w3.org/TR/n-triples/), | ||
[N-Quads](http://www.w3.org/TR/n-quads/), | ||
and [Notation3 (N3)](https://www.w3.org/TeamSubmission/n3/). | ||
- [**Writing**](#writing) triples/quads to | ||
[Turtle](http://www.w3.org/TR/turtle/), | ||
[TriG](http://www.w3.org/TR/trig/), | ||
[N-Triples](http://www.w3.org/TR/n-triples/) | ||
[N-Triples](http://www.w3.org/TR/n-triples/), | ||
and [N-Quads](http://www.w3.org/TR/n-quads/). | ||
@@ -141,2 +142,10 @@ - **Storage** of triples/quads in memory | ||
Notation3 (N3) is supported _only_ through the `format` argument: | ||
``` js | ||
var parser3 = N3.Parser({ format: 'N3' }); | ||
var parser4 = N3.Parser({ format: 'Notation3' }); | ||
var parser5 = N3.Parser({ format: 'text/n3' }); | ||
``` | ||
### From an RDF stream to triples | ||
@@ -372,14 +381,7 @@ | ||
In addition, the N3.js parser also supports [Notation3 (N3)](https://www.w3.org/TeamSubmission/n3/) (no official specification yet). | ||
Pass a `format` option to the constructor with the name or MIME type of a format | ||
for strict, fault-intolerant behavior. | ||
Note that the library does not support full [Notation3](http://www.w3.org/TeamSubmission/n3/) yet | ||
(and a standardized specification for this syntax is currently lacking). | ||
### Breaking changes | ||
N3.js 0.4.x introduces the following breaking changes from 0.3.x versions: | ||
- The fourth element of a quad is named `graph` instead of `context`. | ||
- `N3.Writer` and `N3.Store` constructor options are passed as a hash `{ prefixes: { … } }`. | ||
- `N3.Util` URI methods such as `isUri` are now IRI methods such as `isIRI`. | ||
## License, status and contributions | ||
@@ -386,0 +388,0 @@ The N3.js library is copyrighted by [Ruben Verborgh](http://ruben.verborgh.org/) |
@@ -145,2 +145,14 @@ var N3Lexer = require('../N3').Lexer; | ||
it('should tokenize prefixed names starting with true', | ||
shouldTokenize('true:a truer:b ', | ||
{ type: 'prefixed', prefix: 'true', value: 'a', line: 1 }, | ||
{ type: 'prefixed', prefix: 'truer', value: 'b', line: 1 }, | ||
{ type: 'eof', line: 1 })); | ||
it('should tokenize prefixed names starting with false', | ||
shouldTokenize('false:a falser:b ', | ||
{ type: 'prefixed', prefix: 'false', value: 'a', line: 1 }, | ||
{ type: 'prefixed', prefix: 'falser', value: 'b', line: 1 }, | ||
{ type: 'eof', line: 1 })); | ||
it('should tokenize prefixed names with non-leading colons', | ||
@@ -174,2 +186,12 @@ shouldTokenize('og:video:height ', | ||
it('should tokenize a blank node with a dot, split in half while streaming', | ||
shouldTokenize(streamOf('_:Anthony_J._Batt', 'aglia '), | ||
{ type: 'blank', prefix: '_', value: 'Anthony_J._Battaglia', line: 1 }, | ||
{ type: 'eof', line: 1 })); | ||
it('should tokenize a blank node with a dot, split after the dot while streaming', | ||
shouldTokenize(streamOf('_:Anthony_J.', '_Battaglia '), | ||
{ type: 'blank', prefix: '_', value: 'Anthony_J._Battaglia', line: 1 }, | ||
{ type: 'eof', line: 1 })); | ||
it('should not decode a prefixed name', | ||
@@ -231,7 +253,11 @@ shouldTokenize('ex:%66oo-bar ', | ||
it('should tokenize a single comment', | ||
shouldTokenize(streamOf('#comment'), | ||
shouldTokenize('#mycomment', | ||
{ type: 'eof', line: 1 })); | ||
it('should tokenize a stream with split comment', | ||
shouldTokenize(streamOf('#mycom', 'ment'), | ||
{ type: 'eof', line: 1 })); | ||
it('should ignore comments', | ||
shouldTokenize('<#foo> #comment\n <#foo> #comment \r# comment\n\n<#bla>#', | ||
shouldTokenize('<#foo> #mycomment\n <#foo> #mycomment \r# mycomment\n\n<#bla>#', | ||
{ type: 'IRI', value: '#foo', line: 1 }, | ||
@@ -252,3 +278,3 @@ { type: 'IRI', value: '#foo', line: 2 }, | ||
it('should tokenize a triple quoted string literal with quotes newlines inside', | ||
it('should tokenize a triple quoted string literal with quoted newlines inside', | ||
shouldTokenize('"""st"r\ni""ng"""', | ||
@@ -285,3 +311,3 @@ { type: 'literal', value: '"st"r\ni""ng"', line: 1 }, | ||
{ type: 'literal', value: '"stringA"', line: 1 }, | ||
{ type: 'type', value: 'type', line: 1 }, | ||
{ type: 'typeIRI', value: 'type', line: 1 }, | ||
{ type: 'literal', value: '"stringB"', line: 1 }, | ||
@@ -291,6 +317,2 @@ { type: 'type', value: 'mytype', prefix: 'ns', line: 1 }, | ||
it('should not tokenize a quoted string literal with incorrect type', | ||
shouldNotTokenize('"stringA"^<type> "stringB"^^ns:mytype ', | ||
'Unexpected "^<type>" on line 1.')); | ||
it('should not tokenize a single hat', | ||
@@ -338,3 +360,3 @@ shouldNotTokenize('^', | ||
{ type: 'literal', value: '"stringA"', line: 1 }, | ||
{ type: 'type', value: 'type', line: 1 }, | ||
{ type: 'typeIRI', value: 'type', line: 1 }, | ||
{ type: 'literal', value: '"stringB"', line: 1 }, | ||
@@ -448,6 +470,2 @@ { type: 'type', value: 'mytype', prefix: 'ns', line: 1 }, | ||
it('should tokenize a stream with split comment', | ||
shouldTokenize(streamOf('#com', 'ment'), | ||
{ type: 'eof', line: 1 })); | ||
it('should immediately signal an error if a linebreak occurs anywhere outside a triple-quoted literal', | ||
@@ -470,2 +488,10 @@ shouldNotTokenize(streamOf('abc\n', null), 'Unexpected "abc" on line 1.')); | ||
it('should tokenize @ keywords', | ||
shouldTokenize('@prefix @base @forSome @forAll ', | ||
{ type: '@prefix', line: 1 }, | ||
{ type: '@base', line: 1 }, | ||
{ type: '@forSome', line: 1 }, | ||
{ type: '@forAll', line: 1 }, | ||
{ type: 'eof', line: 1 })); | ||
it('should tokenize @prefix declarations', | ||
@@ -533,3 +559,3 @@ shouldTokenize('@prefix : <http://iri.org/#>.\n@prefix abc:<http://iri.org/#>.', | ||
{ type: ']', line: 1 }, | ||
{ type: 'prefixed', prefix: '_', value: 'a', line: 1 }, | ||
{ type: 'blank', prefix: '_', value: 'a', line: 1 }, | ||
{ type: 'prefixed', prefix: '', value: 'b', line: 1 }, | ||
@@ -617,3 +643,3 @@ { type: '.', line: 1 }, | ||
shouldTokenize('_:g{}', | ||
{ type: 'prefixed', prefix: '_', value: 'g', line: 1 }, | ||
{ type: 'blank', prefix: '_', value: 'g', line: 1 }, | ||
{ type: '{', line: 1 }, | ||
@@ -625,3 +651,3 @@ { type: '}', line: 1 }, | ||
shouldTokenize('_:g {<a> <b> c:d}', | ||
{ type: 'prefixed', prefix: '_', value: 'g', line: 1 }, | ||
{ type: 'blank', prefix: '_', value: 'g', line: 1 }, | ||
{ type: '{', line: 1 }, | ||
@@ -653,2 +679,51 @@ { type: 'IRI', value: 'a', line: 1 }, | ||
it('should tokenize variables', | ||
shouldTokenize('?a ?abc ?a_B_c.', | ||
{ type: 'var', value: '?a', line: 1 }, | ||
{ type: 'var', value: '?abc', line: 1 }, | ||
{ type: 'var', value: '?a_B_c', line: 1 }, | ||
{ type: '.', value: '', line: 1 }, | ||
{ type: 'eof', line: 1 })); | ||
it('should not tokenize invalid variables', | ||
shouldNotTokenize('?0a ', 'Unexpected "?0a" on line 1.')); | ||
it('should tokenize the equality sign', | ||
shouldTokenize('<a> = <b> ', | ||
{ type: 'IRI', value: 'a', line: 1 }, | ||
{ type: 'abbreviation', value: 'http://www.w3.org/2002/07/owl#sameAs', line: 1 }, | ||
{ type: 'IRI', value: 'b', line: 1 }, | ||
{ type: 'eof', line: 1 })); | ||
it('should tokenize the right implication', | ||
shouldTokenize('<a> => <b> ', | ||
{ type: 'IRI', value: 'a', line: 1 }, | ||
{ type: 'abbreviation', value: 'http://www.w3.org/2000/10/swap/log#implies', line: 1 }, | ||
{ type: 'IRI', value: 'b', line: 1 }, | ||
{ type: 'eof', line: 1 })); | ||
it('should tokenize the left implication', | ||
shouldTokenize('<a> <= <b> ', | ||
{ type: 'IRI', value: 'a', line: 1 }, | ||
{ type: 'inverse', value: 'http://www.w3.org/2000/10/swap/log#implies', line: 1 }, | ||
{ type: 'IRI', value: 'b', line: 1 }, | ||
{ type: 'eof', line: 1 })); | ||
it('should tokenize paths', | ||
shouldTokenize(':joe!fam:mother!loc:office!loc:zip :joe!fam:mother^fam:mother', | ||
{ type: 'prefixed', prefix: '', value: 'joe', line: 1 }, | ||
{ type: '!', line: 1 }, | ||
{ type: 'prefixed', prefix: 'fam', value: 'mother', line: 1 }, | ||
{ type: '!', line: 1 }, | ||
{ type: 'prefixed', prefix: 'loc', value: 'office', line: 1 }, | ||
{ type: '!', line: 1 }, | ||
{ type: 'prefixed', prefix: 'loc', value: 'zip', line: 1 }, | ||
{ type: 'prefixed', prefix: '', value: 'joe', line: 1 }, | ||
{ type: '!', line: 1 }, | ||
{ type: 'prefixed', prefix: 'fam', value: 'mother', line: 1 }, | ||
{ type: '^', line: 1 }, | ||
{ type: 'prefixed', prefix: 'fam', value: 'mother', line: 1 }, | ||
{ type: 'eof', line: 1 })); | ||
it('should not tokenize an invalid document', | ||
@@ -674,3 +749,3 @@ shouldNotTokenize(' \n @!', 'Unexpected "@!" on line 2.')); | ||
describe('passing data after an error has occured', function () { | ||
describe('passing data after an error has occurred', function () { | ||
var tokens = [], stream = new EventEmitter(), lexer = new N3Lexer(); | ||
@@ -715,7 +790,60 @@ lexer.tokenize(stream, function (error, token) { !error && tokens.push(token); }); | ||
function shouldTokenize(input) { | ||
describe('An N3Lexer instance with the n3 option set to false', function () { | ||
function createLexer() { return new N3Lexer({ n3: false }); } | ||
it('should not tokenize a variable', | ||
shouldNotTokenize(createLexer(), '?a', 'Unexpected "?a" on line 1.')); | ||
it('should not tokenize a right implication', | ||
shouldNotTokenize(createLexer(), '<a> => <c>.', 'Unexpected "=>" on line 1.')); | ||
it('should not tokenize a left implication', | ||
shouldNotTokenize(createLexer(), '<a> <= <c>.', 'Unexpected "<=" on line 1.')); | ||
it('should not tokenize an equality', | ||
shouldNotTokenize(createLexer(), '<a> = <c>.', 'Unexpected "=" on line 1.')); | ||
it('should not tokenize a ! path', | ||
shouldNotTokenize(createLexer(), ':joe!fam:mother', 'Unexpected "!fam:mother" on line 1.')); | ||
it('should not tokenize a ^ path', | ||
shouldNotTokenize(createLexer(), ':joe^fam:father', 'Unexpected "^fam:father" on line 1.')); | ||
}); | ||
describe('An N3Lexer instance with the comment option set to true', function () { | ||
function createLexer() { return new N3Lexer({ comments: true }); } | ||
it('should tokenize a single comment', | ||
shouldTokenize(createLexer(), '#mycomment', | ||
{ type: 'comment', value: 'mycomment', line: 1 }, | ||
{ type: 'eof', line: 1 })); | ||
it('should tokenize a stream with split comment', | ||
shouldTokenize(createLexer(), streamOf('#mycom', 'ment'), | ||
{ type: 'comment', value: 'mycomment', line: 1 }, | ||
{ type: 'eof', line: 1 })); | ||
it('should tokenize comments', | ||
shouldTokenize(createLexer(), '<#foo> #mycomment\n <#foo> #mycomment \r# mycomment\n\n<#bla>#', | ||
{ type: 'IRI', value: '#foo', line: 1 }, | ||
{ type: 'comment', value: 'mycomment', line: 1 }, | ||
{ type: 'IRI', value: '#foo', line: 2 }, | ||
{ type: 'comment', value: 'mycomment ', line: 2 }, | ||
{ type: 'comment', value: ' mycomment', line: 3 }, | ||
{ type: 'IRI', value: '#bla', line: 5 }, | ||
{ type: 'comment', value: '', line: 5 }, | ||
{ type: 'eof', line: 5 })); | ||
}); | ||
function shouldTokenize(lexer, input) { | ||
var expected = Array.prototype.slice.call(arguments, 1); | ||
// Shift parameters if necessary | ||
if (lexer instanceof N3Lexer) | ||
expected.shift(); | ||
else | ||
input = lexer, lexer = new N3Lexer(); | ||
return function (done) { | ||
var result = []; | ||
new N3Lexer().tokenize(input, tokenCallback); | ||
lexer.tokenize(input, tokenCallback); | ||
@@ -739,5 +867,9 @@ function tokenCallback(error, token) { | ||
function shouldNotTokenize(input, expectedError) { | ||
function shouldNotTokenize(lexer, input, expectedError) { | ||
// Shift parameters if necessary | ||
if (!(lexer instanceof N3Lexer)) | ||
expectedError = input, input = lexer, lexer = new N3Lexer(); | ||
return function (done) { | ||
new N3Lexer().tokenize(input, tokenCallback); | ||
lexer.tokenize(input, tokenCallback); | ||
function tokenCallback(error, token) { | ||
@@ -748,3 +880,2 @@ if (error) { | ||
error.message.should.eql(expectedError); | ||
error.should.be.an.instanceof(Error); | ||
done(); | ||
@@ -751,0 +882,0 @@ } |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
347483
6284
394