@@ -66,2 +66,3 @@ ;(function (sax) {
		parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
		parser.encoding = null;
		parser.opt = opt \|\| {}
		@@ -211,2 +212,35 @@ parser.opt.lowercase = parser.opt.lowercase \|\| parser.opt.lowercasetags

		function determineBufferEncoding(data, isEnd) {
		// BOM-based detection is the most reliable signal when present.
		if (data.length >= 2) {
		if (data[0] === 0xff && data[1] === 0xfe) {
		return 'utf-16le'
		}

		if (data[0] === 0xfe && data[1] === 0xff) {
		return 'utf-16be'
		}
		}

		if (data.length >= 3 && data[0] === 0xef && data[1] === 0xbb && data[2] === 0xbf) {
		return 'utf8'
		}

		if (data.length >= 4) {
		// XML documents without a BOM still start with "<?xml", which is enough
		// to distinguish UTF-16LE/BE from UTF-8 by looking at the zero bytes.
		if (data[0] === 0x3c && data[1] === 0x00 && data[2] === 0x3f && data[3] === 0x00) {
		return 'utf-16le'
		}

		if (data[0] === 0x00 && data[1] === 0x3c && data[2] === 0x00 && data[3] === 0x3f) {
		return 'utf-16be'
		}

		return 'utf8'
		}

		return isEnd ? 'utf8' : null
		}

		function SAXStream(strict, opt) {
		@@ -238,3 +272,3 @@ if (!(this instanceof SAXStream)) {
		this._decoder = null

		this._decoderBuffer = null
		streamWraps.forEach(function (ev) {
		@@ -265,2 +299,28 @@ Object.defineProperty(me, 'on' + ev, {

		SAXStream.prototype._decodeBuffer = function (data, isEnd) {
		if (this._decoderBuffer) {
		// Keep incomplete leading bytes until we have enough data to infer the
		// stream encoding, then decode the buffered prefix together with the next chunk.
		data = Buffer.concat([this._decoderBuffer, data])
		this._decoderBuffer = null
		}

		if (!this._decoder) {
		var encoding = determineBufferEncoding(data, isEnd)
		if (!encoding) {
		// A very short first chunk may not contain enough bytes to detect the
		// encoding yet, so defer decoding until the next write/end call.
		this._decoderBuffer = data
		return ''
		}

		// Store the detected transport encoding so strict mode can compare it
		// with the optional encoding declared in the XML prolog later on.
		this._parser.encoding = encoding
		this._decoder = new TextDecoder(encoding)
		}

		return this._decoder.decode(data, { stream: !isEnd })
		}

		SAXStream.prototype.write = function (data) {
		@@ -272,6 +332,11 @@ if (
		) {
		if (!this._decoder) {
		this._decoder = new TextDecoder('utf8')
		data = this._decodeBuffer(data, false)
		} else if (this._decoderBuffer) {
		// Flush any buffered binary prefix before handling a string chunk.
		// This only matters if the caller mixes Buffer and string writes (used in test).
		var remaining = this._decodeBuffer(Buffer.alloc(0), true)
		if (remaining) {
		this._parser.write(remaining)
		this.emit('data', remaining)
		}
		data = this._decoder.decode(data, { stream: true })
		}
		@@ -289,3 +354,9 @@
		// Flush any remaining decoded data from the TextDecoder
		if (this._decoder) {
		if (this._decoderBuffer) {
		var finalChunk = this._decodeBuffer(Buffer.alloc(0), true)
		if (finalChunk) {
		this._parser.write(finalChunk)
		this.emit('data', finalChunk)
		}
		} else if (this._decoder) {
		var remaining = this._decoder.decode()
		@@ -683,2 +754,55 @@ if (remaining) {

		function getDeclaredEncoding(body) {
		var match = body && body.match(/(?:^\|\s)encoding\s=\s(['"])([^'"]+)\1/i)
		return match ? match[2] : null
		}

		function normalizeEncodingName(encoding) {
		if (!encoding) {
		return null
		}

		return encoding.toLowerCase().replace(/[^a-z0-9]/g, '')
		}

		function encodingsMatch(detectedEncoding, declaredEncoding) {
		const detected = normalizeEncodingName(detectedEncoding)
		const declared = normalizeEncodingName(declaredEncoding)

		if (!detected \|\| !declared) {
		return true
		}

		if (declared === 'utf16') {
		return detected === 'utf16le' \|\| detected === 'utf16be'
		}

		return detected === declared
		}

		function validateXmlDeclarationEncoding(parser, data) {
		if (
		!parser.strict \|\|
		!parser.encoding \|\|
		!data \|\|
		data.name !== 'xml'
		) {
		return
		}

		var declaredEncoding = getDeclaredEncoding(data.body)
		if (
		declaredEncoding &&
		!encodingsMatch(parser.encoding, declaredEncoding)
		) {
		strictFail(
		parser,
		'XML declaration encoding ' +
		declaredEncoding +
		' does not match detected stream encoding ' +
		parser.encoding.toUpperCase()
		)
		}
		}

		function emitNode(parser, nodeType, data) {
		@@ -1389,6 +1513,8 @@ if (parser.textNode) closeText(parser)
		if (c === '>') {
		emitNode(parser, 'onprocessinginstruction', {
		const procInstEndData = {
		name: parser.procInstName,
		body: parser.procInstBody,
		})
		}
		validateXmlDeclarationEncoding(parser, procInstEndData)
		emitNode(parser, 'onprocessinginstruction', procInstEndData)
		parser.procInstName = parser.procInstBody = ''
		@@ -1395,0 +1521,0 @@ parser.state = S.TEXT

+5

-2

package.json

		@@ -5,3 +5,3 @@ {
		"author": "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me/)",
		"version": "1.5.0",
		"version": "1.6.0",
		"main": "lib/sax.js",
		@@ -16,3 +16,6 @@ "license": "BlueOak-1.0.0",
		},
		"repository": "git://github.com/isaacs/sax-js.git",
		"repository": {
		"type": "git",
		"url": "git+ssh://git@github.com/isaacs/sax-js.git"
		},
		"files": [
		@@ -19,0 +22,0 @@ "lib/sax.js",

sax - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics