Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

saxophone

Package Overview
Dependencies
Maintainers
1
Versions
13
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

saxophone - npm Package Compare versions

Comparing version 0.5.0 to 0.6.1

10

CHANGELOG.md
<!-- vim: set spelllang=en : -->
# Changelog
## v0.6.1
### Breaking changes
* Node v6 reached end-of-life and is no longer supported.
### Fixed bugs
* Properly process CDATA and comment tags that sits astride two chunks.
## v0.5.0

@@ -5,0 +15,0 @@

135

lib/Saxophone.js

@@ -111,2 +111,3 @@ const {Writable} = require('readable-stream');

comment: 'comment',
markupDeclaration: 'markupDeclaration',
processingInstruction: 'processinginstruction',

@@ -147,28 +148,32 @@ tagOpen: 'tagopen',

// Not stalled initially
this._stall(null);
// Not waiting initially
this._waiting = null;
}
/**
* Put the stream in stalled mode, which means we need more data
* Put the stream into waiting mode, which means we need more data
* to finish parsing the current token.
*
* @private
* @param token Type of token that is being parsed. If null, unstalls
* the stream and returns any pending data.
* @param pending Pending data.
* @return Pending data if the stream has been unstalled.
* @param token Type of token that is being parsed.
* @param data Pending data.
*/
_stall(token, pending) {
if (token === null) {
const data = this._pending || '';
_wait(token, data) {
this._waiting = {token, data};
}
this._stalled = null;
this._pending = '';
return data;
/**
* Put the stream out of waiting mode.
*
* @private
* @return Any data that was pending.
*/
_unwait() {
if (this._waiting === null) {
return '';
}
this._stalled = token;
this._pending = pending;
const data = this._waiting.data;
this._waiting = null;
return data;
}

@@ -201,4 +206,4 @@

_parseChunk(input, callback) {
// Restore "stalled" state and prepend pending data
input = this._stall(null) + input;
// Use pending data if applicable and get out of waiting mode
input = this._unwait() + input;

@@ -213,5 +218,5 @@ let chunkPos = 0;

// We read a TEXT node but there might be some
// more text data left, so we stall
// more text data left, so we wait
if (nextTag === -1) {
this._stall(
this._wait(
Node.text,

@@ -243,5 +248,18 @@ input.slice(chunkPos)

// Unclosed markup declaration section of unknown type,
// we need to wait for upcoming data
if (nextNextChar === undefined) {
this._wait(
Node.markupDeclaration,
input.slice(chunkPos - 2)
);
break;
}
if (
nextNextChar === '[' &&
input.slice(chunkPos + 1, chunkPos + 7) === 'CDATA['
'CDATA['.indexOf(input.slice(
chunkPos + 1,
chunkPos + 7
)) > -1
) {

@@ -251,6 +269,6 @@ chunkPos += 7;

// Unclosed CDATA section, we need to wait for
// Incomplete CDATA section, we need to wait for
// upcoming data
if (cdataClose === -1) {
this._stall(
this._wait(
Node.cdata,

@@ -271,10 +289,15 @@ input.slice(chunkPos - 9)

if (nextNextChar === '-' && input[chunkPos + 1] === '-') {
if (
nextNextChar === '-' && (
input[chunkPos + 1] === undefined ||
input[chunkPos + 1] === '-'
)
) {
chunkPos += 2;
const commentClose = input.indexOf('--', chunkPos);
// Unclosed comment node, we need to wait for
// Incomplete comment node, we need to wait for
// upcoming data
if (commentClose === -1) {
this._stall(
this._wait(
Node.comment,

@@ -312,3 +335,3 @@ input.slice(chunkPos - 4)

if (piClose === -1) {
this._stall(
this._wait(
Node.processingInstruction,

@@ -333,3 +356,3 @@ input.slice(chunkPos - 2)

if (tagClose === -1) {
this._stall(
this._wait(
Node.tagOpen,

@@ -425,31 +448,35 @@ input.slice(chunkPos - 1)

// Handle unclosed nodes
switch (this._stalled) {
case Node.text:
// Text nodes are implicitly closed
this.emit(
'text',
{contents: this._stall(null)}
);
break;
case Node.cdata:
callback(new Error('Unclosed CDATA section'));
return;
case Node.comment:
callback(new Error('Unclosed comment'));
return;
case Node.processingInstruction:
callback(new Error('Unclosed processing instruction'));
return;
case Node.tagOpen:
case Node.tagClose:
// We do not distinguish between unclosed opening
// or unclosed closing tags
callback(new Error('Unclosed tag'));
return;
default:
// Pass
if (this._waiting !== null) {
switch (this._waiting.token) {
case Node.text:
// Text nodes are implicitly closed
this.emit(
'text',
{contents: this._waiting.data}
);
break;
case Node.cdata:
callback(new Error('Unclosed CDATA section'));
return;
case Node.comment:
callback(new Error('Unclosed comment'));
return;
case Node.processingInstruction:
callback(new Error('Unclosed processing instruction'));
return;
case Node.tagOpen:
case Node.tagClose:
// We do not distinguish between unclosed opening
// or unclosed closing tags
callback(new Error('Unclosed tag'));
return;
default:
// Pass
}
}
if (this._tagStack.length !== 0) {
callback(new Error(`Unclosed tags: ${this._tagStack.join(',')}`));
callback(new Error(
`Unclosed tags: ${this._tagStack.join(',')}`
));
return;

@@ -456,0 +483,0 @@ }

43

lib/Saxophone.test.js

@@ -8,2 +8,9 @@ const {Readable} = require('readable-stream');

/**
* Verify that an XML text is parsed as the specified stream of events.
*
* @param assert Assertion function.
* @param xml XML string or array of XML chunks.
* @param events Sequence of events that must be emitted in order.
*/
const expectEvents = (assert, xml, events) => {

@@ -44,6 +51,17 @@ let eventsIndex = 0;

for (let i = 0; i < xml.length; i += 9) {
parser.write(xml.slice(i, i + 9));
if (!Array.isArray(xml)) {
// By default, split data in chunks of size 10
const chunks = [];
for (let i = 0; i < xml.length; i += 10) {
chunks.push(xml.slice(i, i + 10));
}
xml = chunks;
}
for (let chunk of xml) {
parser.write(chunk);
}
parser.end();

@@ -59,2 +77,9 @@ };

test('should parse comments between two chunks', assert => {
expectEvents(assert,
['<', '!', '-', '-', ' this is a comment -->'],
[['comment', {contents: ' this is a comment '}]]
);
});
test('should not parse unclosed comments', assert => {

@@ -81,2 +106,16 @@ expectEvents(assert,

test('should parse CDATA sections between two chunks', assert => {
expectEvents(assert,
['<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[', 'contents]]>'],
[['cdata', {contents: 'contents'}]]
);
});
test('should not parse invalid CDATA sections', assert => {
expectEvents(assert,
['<![CDAthis is NOT a c&data s<>ction]]>'],
[['error', new Error('Unrecognized sequence: <![')]]
);
});
test('should not parse unclosed CDATA sections', assert => {

@@ -83,0 +122,0 @@ expectEvents(assert,

{
"name": "saxophone",
"description": "Fast and lightweight event-driven XML parser in pure JavaScript",
"version": "0.5.0",
"version": "0.6.1",
"license": "MIT",

@@ -26,3 +26,3 @@ "main": "lib/index.js",

"engines": {
"node": "6 || 8 || 10"
"node": "8 || 10 || 12 || 13"
},

@@ -36,2 +36,5 @@ "keywords": [

],
"files": [
"lib/**/*"
],
"author": {

@@ -44,16 +47,15 @@ "name": "Mattéo Delabre",

"benchmark": "^2.1.4",
"common-tags": "^1.6.0",
"coveralls": "^3.0.0",
"eslint": "^4.14.0",
"common-tags": "^1.8.0",
"coveralls": "^3.0.8",
"eslint": "^6.7.0",
"faucet": "0.0.1",
"lodash.uniq": "^4.5.0",
"nyc": "^13.0.1",
"nyc": "^14.1.1",
"tap-spec": "^5.0.0",
"tape": "^4.8.0",
"webpack": "^3.10.0"
"tape": "^4.11.0"
},
"dependencies": {
"readable-stream": "^3.0.3",
"string_decoder": "^1.1.1"
"readable-stream": "^3.4.0",
"string_decoder": "^1.3.0"
}
}

@@ -1,2 +0,1 @@

<!-- vim: set spelllang=en : -->
# Saxophone 🎷

@@ -12,5 +11,7 @@

Saxophone is inspired by SAX parsers such as [sax-js](https://github.com/isaacs/sax-js) and [EasySax](https://github.com/vflash/easysax): unlike most XML parsers, it does not create a Document Object Model ([DOM](https://en.wikipedia.org/wiki/Document_Object_Model)) tree as a result of parsing documents. Instead, it emits events for each tag or text node encountered as the parsing goes on. This means that Saxophone has a really low memory footprint and can easily parse large documents.
Saxophone is inspired by SAX parsers such as [sax-js](https://github.com/isaacs/sax-js) and [EasySax](https://github.com/vflash/easysax): unlike most XML parsers, it does not create a Document Object Model ([DOM](https://en.wikipedia.org/wiki/Document_Object_Model)) tree as a result of parsing documents.
Instead, it emits events for each tag or text node encountered as the parsing goes on, which makes it an online algorithm.
This means that Saxophone has a low memory footprint, can easily parse large documents, and can parse documents as they come from a stream.
The parser does not keep track of the document state while parsing and, in particular, does not check whether the document is well-formed or valid, making it super-fast (see [benchmarks](#benchmarks) below).
The parser does not keep track of the document state while parsing and, in particular, does not check whether the document is well-formed or valid, making it super-fast (see the [benchmark](#Benchmark) below).

@@ -34,10 +35,8 @@ This library is best suited when you need to extract simple data out of an XML document that you know is well-formed. The parser will not report precise errors in case of syntax problems. An example would be reading data from an API endpoint.

-------------------|--------:|----------------------------------------:
**Saxophone** | 0.5.0 | **6,840 ±1.48%**
**EasySax** | 0.3.2 | **7,354 ±1.16%**
node-expat | 2.3.17 | 1,251 ±0.60%
libxmljs.SaxParser | 0.19.5 | 1,007 ±0.81%
sax-js | 1.2.4 | 982 ±1.50%
**Saxophone** | 0.5.0 | **6,797 ±2.99%**
**EasySax** | 0.3.2 | **7,919 ±1.21%**
node-expat | 2.3.18 | 904 ±1.77%
libxmljs.SaxParser | 0.19.7 | 869 ±0.98%
sax-js | 1.2.4 | 634 ±2.01%
To run the benchmark by yourself, use the following commands:
```sh

@@ -235,6 +234,9 @@ $ git clone https://github.com/matteodelabre/saxophone.git

Thanks to [Norman Rzepka](https://github.com/normanrz) for implementing the streaming API and the check for opening and closing tags mismatch.
Thanks to:
* [Norman Rzepka](https://github.com/normanrz) for implementing the streaming API and the check for opening and closing tags mismatch.
* [winston01](https://github.com/winston01) for spotting and fixing an error in the parser when a tag sits astride two chunks.
## License
Released under the MIT license. [See the full license text.](LICENSE)
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc