Comparing version 0.5.0 to 0.6.1
<!-- vim: set spelllang=en : --> | ||
# Changelog | ||
## v0.6.1 | ||
### Breaking changes | ||
* Node v6 reached end-of-life and is no longer supported. | ||
### Fixed bugs | ||
* Properly process CDATA and comment tags that sits astride two chunks. | ||
## v0.5.0 | ||
@@ -5,0 +15,0 @@ |
@@ -111,2 +111,3 @@ const {Writable} = require('readable-stream'); | ||
comment: 'comment', | ||
markupDeclaration: 'markupDeclaration', | ||
processingInstruction: 'processinginstruction', | ||
@@ -147,28 +148,32 @@ tagOpen: 'tagopen', | ||
// Not stalled initially | ||
this._stall(null); | ||
// Not waiting initially | ||
this._waiting = null; | ||
} | ||
/** | ||
* Put the stream in stalled mode, which means we need more data | ||
* Put the stream into waiting mode, which means we need more data | ||
* to finish parsing the current token. | ||
* | ||
* @private | ||
* @param token Type of token that is being parsed. If null, unstalls | ||
* the stream and returns any pending data. | ||
* @param pending Pending data. | ||
* @return Pending data if the stream has been unstalled. | ||
* @param token Type of token that is being parsed. | ||
* @param data Pending data. | ||
*/ | ||
_stall(token, pending) { | ||
if (token === null) { | ||
const data = this._pending || ''; | ||
_wait(token, data) { | ||
this._waiting = {token, data}; | ||
} | ||
this._stalled = null; | ||
this._pending = ''; | ||
return data; | ||
/** | ||
* Put the stream out of waiting mode. | ||
* | ||
* @private | ||
* @return Any data that was pending. | ||
*/ | ||
_unwait() { | ||
if (this._waiting === null) { | ||
return ''; | ||
} | ||
this._stalled = token; | ||
this._pending = pending; | ||
const data = this._waiting.data; | ||
this._waiting = null; | ||
return data; | ||
} | ||
@@ -201,4 +206,4 @@ | ||
_parseChunk(input, callback) { | ||
// Restore "stalled" state and prepend pending data | ||
input = this._stall(null) + input; | ||
// Use pending data if applicable and get out of waiting mode | ||
input = this._unwait() + input; | ||
@@ -213,5 +218,5 @@ let chunkPos = 0; | ||
// We read a TEXT node but there might be some | ||
// more text data left, so we stall | ||
// more text data left, so we wait | ||
if (nextTag === -1) { | ||
this._stall( | ||
this._wait( | ||
Node.text, | ||
@@ -243,5 +248,18 @@ input.slice(chunkPos) | ||
// Unclosed markup declaration section of unknown type, | ||
// we need to wait for upcoming data | ||
if (nextNextChar === undefined) { | ||
this._wait( | ||
Node.markupDeclaration, | ||
input.slice(chunkPos - 2) | ||
); | ||
break; | ||
} | ||
if ( | ||
nextNextChar === '[' && | ||
input.slice(chunkPos + 1, chunkPos + 7) === 'CDATA[' | ||
'CDATA['.indexOf(input.slice( | ||
chunkPos + 1, | ||
chunkPos + 7 | ||
)) > -1 | ||
) { | ||
@@ -251,6 +269,6 @@ chunkPos += 7; | ||
// Unclosed CDATA section, we need to wait for | ||
// Incomplete CDATA section, we need to wait for | ||
// upcoming data | ||
if (cdataClose === -1) { | ||
this._stall( | ||
this._wait( | ||
Node.cdata, | ||
@@ -271,10 +289,15 @@ input.slice(chunkPos - 9) | ||
if (nextNextChar === '-' && input[chunkPos + 1] === '-') { | ||
if ( | ||
nextNextChar === '-' && ( | ||
input[chunkPos + 1] === undefined || | ||
input[chunkPos + 1] === '-' | ||
) | ||
) { | ||
chunkPos += 2; | ||
const commentClose = input.indexOf('--', chunkPos); | ||
// Unclosed comment node, we need to wait for | ||
// Incomplete comment node, we need to wait for | ||
// upcoming data | ||
if (commentClose === -1) { | ||
this._stall( | ||
this._wait( | ||
Node.comment, | ||
@@ -312,3 +335,3 @@ input.slice(chunkPos - 4) | ||
if (piClose === -1) { | ||
this._stall( | ||
this._wait( | ||
Node.processingInstruction, | ||
@@ -333,3 +356,3 @@ input.slice(chunkPos - 2) | ||
if (tagClose === -1) { | ||
this._stall( | ||
this._wait( | ||
Node.tagOpen, | ||
@@ -425,31 +448,35 @@ input.slice(chunkPos - 1) | ||
// Handle unclosed nodes | ||
switch (this._stalled) { | ||
case Node.text: | ||
// Text nodes are implicitly closed | ||
this.emit( | ||
'text', | ||
{contents: this._stall(null)} | ||
); | ||
break; | ||
case Node.cdata: | ||
callback(new Error('Unclosed CDATA section')); | ||
return; | ||
case Node.comment: | ||
callback(new Error('Unclosed comment')); | ||
return; | ||
case Node.processingInstruction: | ||
callback(new Error('Unclosed processing instruction')); | ||
return; | ||
case Node.tagOpen: | ||
case Node.tagClose: | ||
// We do not distinguish between unclosed opening | ||
// or unclosed closing tags | ||
callback(new Error('Unclosed tag')); | ||
return; | ||
default: | ||
// Pass | ||
if (this._waiting !== null) { | ||
switch (this._waiting.token) { | ||
case Node.text: | ||
// Text nodes are implicitly closed | ||
this.emit( | ||
'text', | ||
{contents: this._waiting.data} | ||
); | ||
break; | ||
case Node.cdata: | ||
callback(new Error('Unclosed CDATA section')); | ||
return; | ||
case Node.comment: | ||
callback(new Error('Unclosed comment')); | ||
return; | ||
case Node.processingInstruction: | ||
callback(new Error('Unclosed processing instruction')); | ||
return; | ||
case Node.tagOpen: | ||
case Node.tagClose: | ||
// We do not distinguish between unclosed opening | ||
// or unclosed closing tags | ||
callback(new Error('Unclosed tag')); | ||
return; | ||
default: | ||
// Pass | ||
} | ||
} | ||
if (this._tagStack.length !== 0) { | ||
callback(new Error(`Unclosed tags: ${this._tagStack.join(',')}`)); | ||
callback(new Error( | ||
`Unclosed tags: ${this._tagStack.join(',')}` | ||
)); | ||
return; | ||
@@ -456,0 +483,0 @@ } |
@@ -8,2 +8,9 @@ const {Readable} = require('readable-stream'); | ||
/** | ||
* Verify that an XML text is parsed as the specified stream of events. | ||
* | ||
* @param assert Assertion function. | ||
* @param xml XML string or array of XML chunks. | ||
* @param events Sequence of events that must be emitted in order. | ||
*/ | ||
const expectEvents = (assert, xml, events) => { | ||
@@ -44,6 +51,17 @@ let eventsIndex = 0; | ||
for (let i = 0; i < xml.length; i += 9) { | ||
parser.write(xml.slice(i, i + 9)); | ||
if (!Array.isArray(xml)) { | ||
// By default, split data in chunks of size 10 | ||
const chunks = []; | ||
for (let i = 0; i < xml.length; i += 10) { | ||
chunks.push(xml.slice(i, i + 10)); | ||
} | ||
xml = chunks; | ||
} | ||
for (let chunk of xml) { | ||
parser.write(chunk); | ||
} | ||
parser.end(); | ||
@@ -59,2 +77,9 @@ }; | ||
test('should parse comments between two chunks', assert => { | ||
expectEvents(assert, | ||
['<', '!', '-', '-', ' this is a comment -->'], | ||
[['comment', {contents: ' this is a comment '}]] | ||
); | ||
}); | ||
test('should not parse unclosed comments', assert => { | ||
@@ -81,2 +106,16 @@ expectEvents(assert, | ||
test('should parse CDATA sections between two chunks', assert => { | ||
expectEvents(assert, | ||
['<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[', 'contents]]>'], | ||
[['cdata', {contents: 'contents'}]] | ||
); | ||
}); | ||
test('should not parse invalid CDATA sections', assert => { | ||
expectEvents(assert, | ||
['<![CDAthis is NOT a c&data s<>ction]]>'], | ||
[['error', new Error('Unrecognized sequence: <![')]] | ||
); | ||
}); | ||
test('should not parse unclosed CDATA sections', assert => { | ||
@@ -83,0 +122,0 @@ expectEvents(assert, |
{ | ||
"name": "saxophone", | ||
"description": "Fast and lightweight event-driven XML parser in pure JavaScript", | ||
"version": "0.5.0", | ||
"version": "0.6.1", | ||
"license": "MIT", | ||
@@ -26,3 +26,3 @@ "main": "lib/index.js", | ||
"engines": { | ||
"node": "6 || 8 || 10" | ||
"node": "8 || 10 || 12 || 13" | ||
}, | ||
@@ -36,2 +36,5 @@ "keywords": [ | ||
], | ||
"files": [ | ||
"lib/**/*" | ||
], | ||
"author": { | ||
@@ -44,16 +47,15 @@ "name": "Mattéo Delabre", | ||
"benchmark": "^2.1.4", | ||
"common-tags": "^1.6.0", | ||
"coveralls": "^3.0.0", | ||
"eslint": "^4.14.0", | ||
"common-tags": "^1.8.0", | ||
"coveralls": "^3.0.8", | ||
"eslint": "^6.7.0", | ||
"faucet": "0.0.1", | ||
"lodash.uniq": "^4.5.0", | ||
"nyc": "^13.0.1", | ||
"nyc": "^14.1.1", | ||
"tap-spec": "^5.0.0", | ||
"tape": "^4.8.0", | ||
"webpack": "^3.10.0" | ||
"tape": "^4.11.0" | ||
}, | ||
"dependencies": { | ||
"readable-stream": "^3.0.3", | ||
"string_decoder": "^1.1.1" | ||
"readable-stream": "^3.4.0", | ||
"string_decoder": "^1.3.0" | ||
} | ||
} |
@@ -1,2 +0,1 @@ | ||
<!-- vim: set spelllang=en : --> | ||
# Saxophone 🎷 | ||
@@ -12,5 +11,7 @@ | ||
Saxophone is inspired by SAX parsers such as [sax-js](https://github.com/isaacs/sax-js) and [EasySax](https://github.com/vflash/easysax): unlike most XML parsers, it does not create a Document Object Model ([DOM](https://en.wikipedia.org/wiki/Document_Object_Model)) tree as a result of parsing documents. Instead, it emits events for each tag or text node encountered as the parsing goes on. This means that Saxophone has a really low memory footprint and can easily parse large documents. | ||
Saxophone is inspired by SAX parsers such as [sax-js](https://github.com/isaacs/sax-js) and [EasySax](https://github.com/vflash/easysax): unlike most XML parsers, it does not create a Document Object Model ([DOM](https://en.wikipedia.org/wiki/Document_Object_Model)) tree as a result of parsing documents. | ||
Instead, it emits events for each tag or text node encountered as the parsing goes on, which makes it an online algorithm. | ||
This means that Saxophone has a low memory footprint, can easily parse large documents, and can parse documents as they come from a stream. | ||
The parser does not keep track of the document state while parsing and, in particular, does not check whether the document is well-formed or valid, making it super-fast (see [benchmarks](#benchmarks) below). | ||
The parser does not keep track of the document state while parsing and, in particular, does not check whether the document is well-formed or valid, making it super-fast (see the [benchmark](#Benchmark) below). | ||
@@ -34,10 +35,8 @@ This library is best suited when you need to extract simple data out of an XML document that you know is well-formed. The parser will not report precise errors in case of syntax problems. An example would be reading data from an API endpoint. | ||
-------------------|--------:|----------------------------------------: | ||
**Saxophone** | 0.5.0 | **6,840 ±1.48%** | ||
**EasySax** | 0.3.2 | **7,354 ±1.16%** | ||
node-expat | 2.3.17 | 1,251 ±0.60% | ||
libxmljs.SaxParser | 0.19.5 | 1,007 ±0.81% | ||
sax-js | 1.2.4 | 982 ±1.50% | ||
**Saxophone** | 0.5.0 | **6,797 ±2.99%** | ||
**EasySax** | 0.3.2 | **7,919 ±1.21%** | ||
node-expat | 2.3.18 | 904 ±1.77% | ||
libxmljs.SaxParser | 0.19.7 | 869 ±0.98% | ||
sax-js | 1.2.4 | 634 ±2.01% | ||
To run the benchmark by yourself, use the following commands: | ||
```sh | ||
@@ -235,6 +234,9 @@ $ git clone https://github.com/matteodelabre/saxophone.git | ||
Thanks to [Norman Rzepka](https://github.com/normanrz) for implementing the streaming API and the check for opening and closing tags mismatch. | ||
Thanks to: | ||
* [Norman Rzepka](https://github.com/normanrz) for implementing the streaming API and the check for opening and closing tags mismatch. | ||
* [winston01](https://github.com/winston01) for spotting and fixing an error in the parser when a tag sits astride two chunks. | ||
## License | ||
Released under the MIT license. [See the full license text.](LICENSE) |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Dynamic require
Supply chain riskDynamic require can indicate the package is performing dangerous or unsafe dynamic code execution.
Found 1 instance in 1 package
Filesystem access
Supply chain riskAccesses the file system, and could potentially read sensitive data.
Found 1 instance in 1 package
9
240
0
44873
11
909
Updatedreadable-stream@^3.4.0
Updatedstring_decoder@^1.3.0