@nodable/flexible-xml-parser - npm Package Compare versions

Comparing version

1.1.1

1.2.0

+4

-3

package.json

		{
		"name": "@nodable/flexible-xml-parser",
		"version": "1.1.1",
		"version": "1.2.0",
		"description": "Fastest XML parser in pure JS with fully customizable ouput",
		@@ -48,5 +48,5 @@ "main": "./lib/fxp.cjs",
		"@nodable/base-output-builder": "^1.0.5",
		"@nodable/compact-builder": "^1.0.6",
		"@nodable/compact-builder": "^1.0.8",
		"path-expression-matcher": "^1.5.0",
		"strnum": "^2.2.2"
		"xml-naming": "^0.1.0"
		},
		@@ -58,2 +58,3 @@ "devDependencies": {
		"@babel/register": "^7.28.6",
		"@byspec/xml": "^0.1.0",
		"@nodable/entities": "^2.1.0",
		@@ -60,0 +61,0 @@ "@types/node": "^20.19.37",

+85

-32

src/AttributeProcessor.js

		'use strict';
		import { ParseError, ErrorCode } from './ParseError.js';
		import { isSpaceCode } from "./util.js"

		@@ -24,7 +25,85 @@ /**

		// Module-level regex. Stateless between calls because getAllMatches() always
		// resets lastIndex to 0 before iterating — see getAllMatches() below.
		const attrsRegx = new RegExp('([^\\s=]+)\\s(=\\s([\'"])([\\s\\S]*?)\\3)?', 'gm');
		// Module-level regex kept for reference only — no longer called from this
		// module. parseAttributes() below replaces it with an O(n) linear scanner
		// that is immune to catastrophic backtracking and stack overflow.
		// const attrsRegx = new RegExp('([^\\s=]+)\\s(=\\s([\'"])([\\s\\S]*?)\\3)?', 'gm');

		/**
		* Parse an attribute expression string into an array of match tuples.
		*
		* Each element has the same shape the old getAllMatches() returned so that
		* callers are unchanged:
		* [fullMatch, name, '=value' \| undefined, quote \| undefined, value \| undefined]
		*
		* The implementation is a single O(n) pass over char codes with no regex and
		* no recursion, making it safe for arbitrarily long attribute strings.
		*
		* State machine:
		* SEEK_NAME — skipping whitespace looking for the start of an attr name
		* IN_NAME — accumulating a name token until whitespace or '='
		* SEEK_VALUE — saw name + optional whitespace, now expecting '=' or next name
		* IN_VALUE — inside a quoted value, accumulating until the closing quote
		*
		* @param {string} attrStr
		* @returns {Array} array of match tuples (see shape above)
		*/
		function parseAttributes(attrStr) {
		const results = [];
		const len = attrStr.length;
		let i = 0;

		while (i < len) {
		// Skip whitespace between attributes
		while (i < len && isSpaceCode(attrStr.charCodeAt(i))) i++;
		if (i >= len) break;

		// Read name
		const nameStart = i;
		while (i < len && attrStr.charCodeAt(i) !== 61 && !isSpaceCode(attrStr.charCodeAt(i))) i++;
		const name = attrStr.substring(nameStart, i);

		// Skip whitespace before '='
		while (i < len && isSpaceCode(attrStr.charCodeAt(i))) i++;

		if (i >= len \|\| attrStr.charCodeAt(i) !== 61) {
		// Boolean attribute — no '='
		const m = [name, name, undefined, undefined, undefined];
		m.startIndex = nameStart;
		results.push(m);
		continue;
		}

		i++; // skip '='

		// Skip whitespace after '='
		while (i < len && isSpaceCode(attrStr.charCodeAt(i))) i++;

		// Read quoted value
		const quote = attrStr.charCodeAt(i);
		if (quote === 34 \|\| quote === 39) { // " or '
		i++; // skip opening quote
		const valueStart = i;
		let value = '';
		let segStart = i;
		while (i < len && attrStr.charCodeAt(i) !== quote) {
		const c = attrStr.charCodeAt(i);
		if (c === 10 \|\| c === 13) { // \n or \r → space per XML §3.3.3
		value += attrStr.substring(segStart, i) + ' ';
		segStart = i + 1;
		}
		i++;
		}
		value += attrStr.substring(segStart, i);
		i++; // skip closing quote
		const quoteChar = String.fromCharCode(quote);
		const m = [name + '=' + quoteChar + value + quoteChar, name, '=' + quoteChar + value + quoteChar, quoteChar, value];
		m.startIndex = nameStart;
		results.push(m);
		}
		}

		return results;
		}

		/**
		* Pass 1: extract raw (unparsed) attribute values into rawAttributes.
		@@ -37,5 +116,5 @@ *
		export function collectRawAttributes(attrStr, parser, tagExp) {
		if (!attrStr \|\| attrStr.length === 0) return;

		if (!attrStr \|\| attrStr.length === 0) return;
		const matches = getAllMatches(attrStr, attrsRegx);
		const matches = parseAttributes(attrStr);
		const len = matches.length;
		@@ -61,3 +140,3 @@ let count = 0;
		if (!attrStr \|\| attrStr.length === 0) return;
		const matches = getAllMatches(attrStr, attrsRegx);
		const matches = parseAttributes(attrStr);
		const len = matches.length;
		@@ -84,28 +163,2 @@
		}
		}

		/**
		* Run the regex against the string and return all capture groups.
		* lastIndex is always reset to 0 before iterating so the module-level
		* stateful regex is safe to share across calls.
		*
		* @param {string} string
		* @param {RegExp} regex
		* @returns {Array}
		*/
		function getAllMatches(string, regex) {
		regex.lastIndex = 0;
		const matches = [];
		let match = regex.exec(string);
		while (match) {
		const allmatches = [];
		allmatches.startIndex = regex.lastIndex - match[0].length;
		const len = match.length;
		for (let index = 0; index < len; index++) {
		allmatches.push(match[index]);
		}
		matches.push(allmatches);
		match = regex.exec(string);
		}
		return matches;
		}

+6

-6

src/DocTypeReader.js

		@@ -1,3 +0,3 @@
		import { isName } from './util.js';
		import { ParseError, ErrorCode } from './ParseError.js';
		import { name as isName, qName as isQName } from 'xml-naming';

		@@ -270,3 +270,3 @@ export function readDocType(parser) {

		validateEntityName(entityName);
		validateEntityName(entityName, parser.xmlVersion);
		skipSourceWhitespace(source);
		@@ -350,3 +350,3 @@

		if (!isName(elementName)) {
		if (!isName(elementName, parser.xmlVersion)) {
		throw new ParseError(`Invalid element name: "${elementName}"`,
		@@ -439,3 +439,3 @@ ErrorCode.INVALID_TAG,

		validateEntityName(notationName);
		validateEntityName(notationName, parser.xmlVersion);
		skipSourceWhitespace(source);
		@@ -518,4 +518,4 @@

		function validateEntityName(name) {
		if (isName(name)) return name;
		function validateEntityName(name, xmlVersion) {
		if (isName(name, xmlVersion)) return name;
		throw new ParseError(
		@@ -522,0 +522,0 @@ `Invalid entity name "${name}"`,

+1

-0

src/OptionsBuilder.js

		@@ -135,2 +135,3 @@ import { CompactBuilderFactory } from '@nodable/compact-builder';
		flushThreshold: 1024,
		bufferSize: 256
		},
		@@ -137,0 +138,0 @@

+9

-10

src/util.js

		@@ -1,8 +0,1 @@
		'use strict';

		const nameStartChar = ':A-Za-z_\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u02FF\\u0370-\\u037D\\u037F-\\u1FFF\\u200C-\\u200D\\u2070-\\u218F\\u2C00-\\u2FEF\\u3001-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFFD';
		const nameChar = nameStartChar + '\\-.\\d\\u00B7\\u0300-\\u036F\\u203F-\\u2040';
		export const nameRegexp = '[' + nameStartChar + '][' + nameChar + ']*';
		const regexName = new RegExp('^' + nameRegexp + '$');

		export function getAllMatches(string, regex) {
		@@ -24,7 +17,13 @@ const matches = [];

		export const isName = function (string) {
		const match = regexName.exec(string);
		return !(match === null \|\| typeof match === 'undefined');


		export function isSpace(char) {
		return char === " " \|\| char === "\t" \|\| char === "\n" \|\| char === "\r" \|\| char === "\f";
		}


		export function isSpaceCode(code) {
		return code === 32 \|\| code === 9 \|\| code === 10 \|\| code === 13 \|\| code === 12; // space \t \n \r \f
		}

		export function isExist(v) {
		@@ -31,0 +30,0 @@ return typeof v !== 'undefined';

+23

-5

src/Xml2JsParser.js

		@@ -8,5 +8,6 @@ import StringSource from './InputSource/StringSource.js';
		import { readDocType } from './DocTypeReader.js';
		import { isName, DANGEROUS_PROPERTY_NAMES, criticalProperties } from './util.js';
		import { DANGEROUS_PROPERTY_NAMES, criticalProperties } from './util.js';
		import AutoCloseHandler from './AutoCloseHandler.js';
		import { ParseError, ErrorCode } from './ParseError.js';
		import { name as isName, qName as isQName } from 'xml-naming';

		@@ -64,2 +65,3 @@ class TagDetail {
		this._exitIfTriggered = false;
		this.xmlVersion = '1.0';

		@@ -288,2 +290,14 @@ if (!this.matcher) {

		// Extract namespace prefix and local name from raw tag name (e.g. "ns:tag" → "ns", "tag").
		// Always done from the raw name (tagExp.tagName), before processTagName strips the prefix,
		// so these values are stable regardless of skip.nsPrefix.
		const colonIdx = tagExp.tagName.indexOf(':');
		const tagNamespace = colonIdx !== -1 ? tagExp.tagName.slice(0, colonIdx) : undefined;
		// Local name for the matcher: prefix-free always (e.g. "code" from "ns:code").
		// The matcher library tracks namespace separately via the 3rd push() argument —
		// passing the full "ns:code" as the tag name would break ns::code expression matching.
		const matcherTagName = tagNamespace !== undefined
		? tagExp.tagName.slice(colonIdx + 1)
		: processedTagName;

		// ── Limit: maxNestedTags ─────────────────────────────────────────────────
		@@ -310,3 +324,3 @@ const maxNested = options.limits?.maxNestedTags;

		this.matcher.push(processedTagName, {});
		this.matcher.push(matcherTagName, {}, tagNamespace);
		if (raeAttrLen > 0) {
		@@ -341,3 +355,6 @@ this.matcher.updateCurrent(rawAttributes);
		// Create a fresh processor with the matching nested + skipEnclosures config.
		this._stopNodeProcessor = new StopNodeProcessor(processedTagName, {
		// Raw tag name (tagExp.tagName) is used — the processor scans the source
		// character-by-character and must match the prefix-as-written (e.g. "ns:code"),
		// independent of what skip.nsPrefix does to the processed output name.
		this._stopNodeProcessor = new StopNodeProcessor(tagExp.tagName, {
		nested: stopNodeConfig.nested,
		@@ -359,3 +376,4 @@ skipEnclosures: stopNodeConfig.skipEnclosures,
		// but call no output builder methods — the tag is silently dropped.
		this._stopNodeProcessor = new StopNodeProcessor(processedTagName, {
		// Raw tag name used for the same reason as the stop-node branch above.
		this._stopNodeProcessor = new StopNodeProcessor(tagExp.tagName, {
		nested: skipTagConfig.nested,
		@@ -469,3 +487,3 @@ skipEnclosures: skipTagConfig.skipEnclosures,
		attrName = resolveNsPrefix(attrName, options.skip.nsPrefix);
		if (!isName(attrName)) { //TODO: make it optional
		if (!isQName(attrName, this.xmlVersion)) { //TODO: make it optional
		throw new ParseError(`Invalid attribute name: ${attrName}`, ErrorCode.INVALID_ATTRIBUTE_NAME);
		@@ -472,0 +490,0 @@ }

+42

-12

src/XMLParser.js

		@@ -16,2 +16,6 @@ import { buildOptions } from './OptionsBuilder.js';
		this._isFeeding = false;

		// ── Batching state ──────────────────────────────────
		this._pendingBytes = 0;
		this._batchThreshold = this.options.feedable?.bufferSize;
		}
		@@ -130,2 +134,33 @@

		_runParse() {
		if (!this._feedParser) return;

		const beforePos = this._feedSource.startIndex; // bytes consumed so far

		try {
		this._feedParser.parseXml();
		} catch (err) {
		if (err.code === ErrorCode.UNEXPECTED_END) {
		this._feedSource.rewindToMark();
		} else {
		throw err;
		}
		}

		const afterPos = this._feedSource.startIndex;
		const didAdvance = afterPos > beforePos;

		if (didAdvance) {
		// Real progress made — reset threshold normally
		this._pendingBytes = 0;
		} else {
		// Parser is stuck mid-token — grow the threshold to avoid
		// hammering parseXml() until significantly more data arrives
		this._batchThreshold = Math.min(
		this._batchThreshold * 2,
		this.options.feedable.maxBufferSize
		);
		}
		}

		/**
		@@ -165,16 +200,8 @@ * Feed an XML data chunk for incremental parsing.
		this._feedSource.feed(str);
		this._pendingBytes += str.length;

		try {
		this._feedParser.parseXml();
		} catch (err) {
		if (err.code === ErrorCode.UNEXPECTED_END) {
		// Chunk boundary fell mid-token. Rewind to the token start so the
		// incomplete bytes are re-parsed when the next chunk arrives.
		this._feedSource.rewindToMark();
		} else {
		// Real parse error — clean up and propagate.
		this._cleanupFeedSession();
		throw err;
		}
		if (this._pendingBytes >= this._batchThreshold) {
		this._runParse();
		}
		// Otherwise, delay parsing until next feed() or end()

		@@ -207,2 +234,5 @@ return this;

		// Force a final parse (any pending bytes are now processed)
		this._runParse();

		try {
		@@ -209,0 +239,0 @@ // Mark the source as complete so readers know there is no more data.

+10

-7

src/XmlPartReader.js

		'use strict';
		import { ParseError, ErrorCode } from './ParseError.js';
		import { collectRawAttributes } from './AttributeProcessor.js';
		import { isName } from "./util.js"
		import { isSpace } from "./util.js"
		import { name as isName, qName as isQName } from 'xml-naming';
		// Re-export flushAttributes so Xml2JsParser and XmlSpecialTagsReader can
		@@ -160,4 +161,5 @@ // continue to import it from here without changing their import lines.

		for (; i < expLen; i++) {
		if (exp[i] === " ") {
		for (; i < exp.length; i++) {
		const c = exp[i];
		if (isSpace(c)) {
		tagExp.tagName = exp.substring(0, i);
		@@ -169,7 +171,7 @@ attrsExp = exp.substring(i + 1);
		//only tag
		if (tagExp.tagName.length === 0 && i === expLen) tagExp.tagName = exp;
		if (tagExp.tagName.length === 0 && i === exp.length) tagExp.tagName = exp;
		tagExp.tagName = tagExp.tagName.trimEnd();
		tagExp._attrsExp = attrsExp;

		if (!isName(tagExp.tagName)) {
		if (!isQName(tagExp.tagName, parser.xmlVersion)) {
		throw new ParseError("Invalid tag name", ErrorCode.INVALID_TAG_NAME);
		@@ -183,4 +185,5 @@ }
		}
		// console.log(tagExp)
		return tagExp;
		}

		return tagExp;
		}

+15

-5

src/XmlSpecialTagsReader.js

		@@ -39,7 +39,17 @@ import { readPiExp, flushAttributes } from './XmlPartReader.js';
		let tagExp = readPiExp(parser, "?>");
		if (!tagExp) throw new ParseError(
		"Invalid Pi Tag expression.",
		ErrorCode.INVALID_TAG,
		{ line: parser.source.line, col: parser.source.cols, index: parser.source.startIndex }
		);
		if (!tagExp) {
		throw new ParseError(
		"Invalid Pi Tag expression.",
		ErrorCode.INVALID_TAG,
		{ line: parser.source.line, col: parser.source.cols, index: parser.source.startIndex }
		)
		} else if (tagExp.tagName === "xml") {
		// Read version from the declaration and store it on the parser for validators.
		const version = tagExp.rawAttributes?.version;
		if (version === '1.1') {
		parser.xmlVersion = 1.1;
		} else {
		parser.xmlVersion = 1.0; // default
		}
		}

		@@ -46,0 +56,0 @@ // Flush attributes into the output builder's this.attributes accumulator

CHANGELOG.md→CHANGELOG.md

@nodable/flexible-xml-parser - npm Package Compare versions

Improved metrics

Worsened metrics

Dependency changes