🚀. Socket Launch Week Day 3:Socket Firewall Now Blocks Malicious VS Code and Open VSX Extensions.Learn more
Sign In

@nodable/flexible-xml-parser

Package Overview
Dependencies
Maintainers
1
Versions
8
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@nodable/flexible-xml-parser - npm Package Compare versions

Comparing version
1.1.1
to
1.2.0
+4
-3
package.json
{
"name": "@nodable/flexible-xml-parser",
"version": "1.1.1",
"version": "1.2.0",
"description": "Fastest XML parser in pure JS with fully customizable ouput",

@@ -48,5 +48,5 @@ "main": "./lib/fxp.cjs",

"@nodable/base-output-builder": "^1.0.5",
"@nodable/compact-builder": "^1.0.6",
"@nodable/compact-builder": "^1.0.8",
"path-expression-matcher": "^1.5.0",
"strnum": "^2.2.2"
"xml-naming": "^0.1.0"
},

@@ -58,2 +58,3 @@ "devDependencies": {

"@babel/register": "^7.28.6",
"@byspec/xml": "^0.1.0",
"@nodable/entities": "^2.1.0",

@@ -60,0 +61,0 @@ "@types/node": "^20.19.37",

'use strict';
import { ParseError, ErrorCode } from './ParseError.js';
import { isSpaceCode } from "./util.js"

@@ -24,7 +25,85 @@ /**

// Module-level regex. Stateless between calls because getAllMatches() always
// resets lastIndex to 0 before iterating — see getAllMatches() below.
const attrsRegx = new RegExp('([^\\s=]+)\\s*(=\\s*([\'"])([\\s\\S]*?)\\3)?', 'gm');
// Module-level regex kept for reference only — no longer called from this
// module. parseAttributes() below replaces it with an O(n) linear scanner
// that is immune to catastrophic backtracking and stack overflow.
// const attrsRegx = new RegExp('([^\\s=]+)\\s*(=\\s*([\'"])([\\s\\S]*?)\\3)?', 'gm');
/**
* Parse an attribute expression string into an array of match tuples.
*
* Each element has the same shape the old getAllMatches() returned so that
* callers are unchanged:
* [fullMatch, name, '=value' | undefined, quote | undefined, value | undefined]
*
* The implementation is a single O(n) pass over char codes with no regex and
* no recursion, making it safe for arbitrarily long attribute strings.
*
* State machine:
* SEEK_NAME — skipping whitespace looking for the start of an attr name
* IN_NAME — accumulating a name token until whitespace or '='
* SEEK_VALUE — saw name + optional whitespace, now expecting '=' or next name
* IN_VALUE — inside a quoted value, accumulating until the closing quote
*
* @param {string} attrStr
* @returns {Array} array of match tuples (see shape above)
*/
function parseAttributes(attrStr) {
const results = [];
const len = attrStr.length;
let i = 0;
while (i < len) {
// Skip whitespace between attributes
while (i < len && isSpaceCode(attrStr.charCodeAt(i))) i++;
if (i >= len) break;
// Read name
const nameStart = i;
while (i < len && attrStr.charCodeAt(i) !== 61 && !isSpaceCode(attrStr.charCodeAt(i))) i++;
const name = attrStr.substring(nameStart, i);
// Skip whitespace before '='
while (i < len && isSpaceCode(attrStr.charCodeAt(i))) i++;
if (i >= len || attrStr.charCodeAt(i) !== 61) {
// Boolean attribute — no '='
const m = [name, name, undefined, undefined, undefined];
m.startIndex = nameStart;
results.push(m);
continue;
}
i++; // skip '='
// Skip whitespace after '='
while (i < len && isSpaceCode(attrStr.charCodeAt(i))) i++;
// Read quoted value
const quote = attrStr.charCodeAt(i);
if (quote === 34 || quote === 39) { // " or '
i++; // skip opening quote
const valueStart = i;
let value = '';
let segStart = i;
while (i < len && attrStr.charCodeAt(i) !== quote) {
const c = attrStr.charCodeAt(i);
if (c === 10 || c === 13) { // \n or \r → space per XML §3.3.3
value += attrStr.substring(segStart, i) + ' ';
segStart = i + 1;
}
i++;
}
value += attrStr.substring(segStart, i);
i++; // skip closing quote
const quoteChar = String.fromCharCode(quote);
const m = [name + '=' + quoteChar + value + quoteChar, name, '=' + quoteChar + value + quoteChar, quoteChar, value];
m.startIndex = nameStart;
results.push(m);
}
}
return results;
}
/**
* Pass 1: extract raw (unparsed) attribute values into rawAttributes.

@@ -37,5 +116,5 @@ *

export function collectRawAttributes(attrStr, parser, tagExp) {
if (!attrStr || attrStr.length === 0) return;
if (!attrStr || attrStr.length === 0) return;
const matches = getAllMatches(attrStr, attrsRegx);
const matches = parseAttributes(attrStr);
const len = matches.length;

@@ -61,3 +140,3 @@ let count = 0;

if (!attrStr || attrStr.length === 0) return;
const matches = getAllMatches(attrStr, attrsRegx);
const matches = parseAttributes(attrStr);
const len = matches.length;

@@ -84,28 +163,2 @@

}
}
/**
* Run the regex against the string and return all capture groups.
* lastIndex is always reset to 0 before iterating so the module-level
* stateful regex is safe to share across calls.
*
* @param {string} string
* @param {RegExp} regex
* @returns {Array}
*/
function getAllMatches(string, regex) {
regex.lastIndex = 0;
const matches = [];
let match = regex.exec(string);
while (match) {
const allmatches = [];
allmatches.startIndex = regex.lastIndex - match[0].length;
const len = match.length;
for (let index = 0; index < len; index++) {
allmatches.push(match[index]);
}
matches.push(allmatches);
match = regex.exec(string);
}
return matches;
}

@@ -1,3 +0,3 @@

import { isName } from './util.js';
import { ParseError, ErrorCode } from './ParseError.js';
import { name as isName, qName as isQName } from 'xml-naming';

@@ -270,3 +270,3 @@ export function readDocType(parser) {

validateEntityName(entityName);
validateEntityName(entityName, parser.xmlVersion);
skipSourceWhitespace(source);

@@ -350,3 +350,3 @@

if (!isName(elementName)) {
if (!isName(elementName, parser.xmlVersion)) {
throw new ParseError(`Invalid element name: "${elementName}"`,

@@ -439,3 +439,3 @@ ErrorCode.INVALID_TAG,

validateEntityName(notationName);
validateEntityName(notationName, parser.xmlVersion);
skipSourceWhitespace(source);

@@ -518,4 +518,4 @@

function validateEntityName(name) {
if (isName(name)) return name;
function validateEntityName(name, xmlVersion) {
if (isName(name, xmlVersion)) return name;
throw new ParseError(

@@ -522,0 +522,0 @@ `Invalid entity name "${name}"`,

@@ -135,2 +135,3 @@ import { CompactBuilderFactory } from '@nodable/compact-builder';

flushThreshold: 1024,
bufferSize: 256
},

@@ -137,0 +138,0 @@

@@ -1,8 +0,1 @@

'use strict';
const nameStartChar = ':A-Za-z_\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u02FF\\u0370-\\u037D\\u037F-\\u1FFF\\u200C-\\u200D\\u2070-\\u218F\\u2C00-\\u2FEF\\u3001-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFFD';
const nameChar = nameStartChar + '\\-.\\d\\u00B7\\u0300-\\u036F\\u203F-\\u2040';
export const nameRegexp = '[' + nameStartChar + '][' + nameChar + ']*';
const regexName = new RegExp('^' + nameRegexp + '$');
export function getAllMatches(string, regex) {

@@ -24,7 +17,13 @@ const matches = [];

export const isName = function (string) {
const match = regexName.exec(string);
return !(match === null || typeof match === 'undefined');
export function isSpace(char) {
return char === " " || char === "\t" || char === "\n" || char === "\r" || char === "\f";
}
export function isSpaceCode(code) {
return code === 32 || code === 9 || code === 10 || code === 13 || code === 12; // space \t \n \r \f
}
export function isExist(v) {

@@ -31,0 +30,0 @@ return typeof v !== 'undefined';

@@ -8,5 +8,6 @@ import StringSource from './InputSource/StringSource.js';

import { readDocType } from './DocTypeReader.js';
import { isName, DANGEROUS_PROPERTY_NAMES, criticalProperties } from './util.js';
import { DANGEROUS_PROPERTY_NAMES, criticalProperties } from './util.js';
import AutoCloseHandler from './AutoCloseHandler.js';
import { ParseError, ErrorCode } from './ParseError.js';
import { name as isName, qName as isQName } from 'xml-naming';

@@ -64,2 +65,3 @@ class TagDetail {

this._exitIfTriggered = false;
this.xmlVersion = '1.0';

@@ -288,2 +290,14 @@ if (!this.matcher) {

// Extract namespace prefix and local name from raw tag name (e.g. "ns:tag" → "ns", "tag").
// Always done from the raw name (tagExp.tagName), before processTagName strips the prefix,
// so these values are stable regardless of skip.nsPrefix.
const colonIdx = tagExp.tagName.indexOf(':');
const tagNamespace = colonIdx !== -1 ? tagExp.tagName.slice(0, colonIdx) : undefined;
// Local name for the matcher: prefix-free always (e.g. "code" from "ns:code").
// The matcher library tracks namespace separately via the 3rd push() argument —
// passing the full "ns:code" as the tag name would break ns::code expression matching.
const matcherTagName = tagNamespace !== undefined
? tagExp.tagName.slice(colonIdx + 1)
: processedTagName;
// ── Limit: maxNestedTags ─────────────────────────────────────────────────

@@ -310,3 +324,3 @@ const maxNested = options.limits?.maxNestedTags;

this.matcher.push(processedTagName, {});
this.matcher.push(matcherTagName, {}, tagNamespace);
if (raeAttrLen > 0) {

@@ -341,3 +355,6 @@ this.matcher.updateCurrent(rawAttributes);

// Create a fresh processor with the matching nested + skipEnclosures config.
this._stopNodeProcessor = new StopNodeProcessor(processedTagName, {
// Raw tag name (tagExp.tagName) is used — the processor scans the source
// character-by-character and must match the prefix-as-written (e.g. "ns:code"),
// independent of what skip.nsPrefix does to the processed output name.
this._stopNodeProcessor = new StopNodeProcessor(tagExp.tagName, {
nested: stopNodeConfig.nested,

@@ -359,3 +376,4 @@ skipEnclosures: stopNodeConfig.skipEnclosures,

// but call no output builder methods — the tag is silently dropped.
this._stopNodeProcessor = new StopNodeProcessor(processedTagName, {
// Raw tag name used for the same reason as the stop-node branch above.
this._stopNodeProcessor = new StopNodeProcessor(tagExp.tagName, {
nested: skipTagConfig.nested,

@@ -469,3 +487,3 @@ skipEnclosures: skipTagConfig.skipEnclosures,

attrName = resolveNsPrefix(attrName, options.skip.nsPrefix);
if (!isName(attrName)) { //TODO: make it optional
if (!isQName(attrName, this.xmlVersion)) { //TODO: make it optional
throw new ParseError(`Invalid attribute name: ${attrName}`, ErrorCode.INVALID_ATTRIBUTE_NAME);

@@ -472,0 +490,0 @@ }

@@ -16,2 +16,6 @@ import { buildOptions } from './OptionsBuilder.js';

this._isFeeding = false;
// ── Batching state ──────────────────────────────────
this._pendingBytes = 0;
this._batchThreshold = this.options.feedable?.bufferSize;
}

@@ -130,2 +134,33 @@

_runParse() {
if (!this._feedParser) return;
const beforePos = this._feedSource.startIndex; // bytes consumed so far
try {
this._feedParser.parseXml();
} catch (err) {
if (err.code === ErrorCode.UNEXPECTED_END) {
this._feedSource.rewindToMark();
} else {
throw err;
}
}
const afterPos = this._feedSource.startIndex;
const didAdvance = afterPos > beforePos;
if (didAdvance) {
// Real progress made — reset threshold normally
this._pendingBytes = 0;
} else {
// Parser is stuck mid-token — grow the threshold to avoid
// hammering parseXml() until significantly more data arrives
this._batchThreshold = Math.min(
this._batchThreshold * 2,
this.options.feedable.maxBufferSize
);
}
}
/**

@@ -165,16 +200,8 @@ * Feed an XML data chunk for incremental parsing.

this._feedSource.feed(str);
this._pendingBytes += str.length;
try {
this._feedParser.parseXml();
} catch (err) {
if (err.code === ErrorCode.UNEXPECTED_END) {
// Chunk boundary fell mid-token. Rewind to the token start so the
// incomplete bytes are re-parsed when the next chunk arrives.
this._feedSource.rewindToMark();
} else {
// Real parse error — clean up and propagate.
this._cleanupFeedSession();
throw err;
}
if (this._pendingBytes >= this._batchThreshold) {
this._runParse();
}
// Otherwise, delay parsing until next feed() or end()

@@ -207,2 +234,5 @@ return this;

// Force a final parse (any pending bytes are now processed)
this._runParse();
try {

@@ -209,0 +239,0 @@ // Mark the source as complete so readers know there is no more data.

'use strict';
import { ParseError, ErrorCode } from './ParseError.js';
import { collectRawAttributes } from './AttributeProcessor.js';
import { isName } from "./util.js"
import { isSpace } from "./util.js"
import { name as isName, qName as isQName } from 'xml-naming';
// Re-export flushAttributes so Xml2JsParser and XmlSpecialTagsReader can

@@ -160,4 +161,5 @@ // continue to import it from here without changing their import lines.

for (; i < expLen; i++) {
if (exp[i] === " ") {
for (; i < exp.length; i++) {
const c = exp[i];
if (isSpace(c)) {
tagExp.tagName = exp.substring(0, i);

@@ -169,7 +171,7 @@ attrsExp = exp.substring(i + 1);

//only tag
if (tagExp.tagName.length === 0 && i === expLen) tagExp.tagName = exp;
if (tagExp.tagName.length === 0 && i === exp.length) tagExp.tagName = exp;
tagExp.tagName = tagExp.tagName.trimEnd();
tagExp._attrsExp = attrsExp;
if (!isName(tagExp.tagName)) {
if (!isQName(tagExp.tagName, parser.xmlVersion)) {
throw new ParseError("Invalid tag name", ErrorCode.INVALID_TAG_NAME);

@@ -183,4 +185,5 @@ }

}
// console.log(tagExp)
return tagExp;
}
return tagExp;
}

@@ -39,7 +39,17 @@ import { readPiExp, flushAttributes } from './XmlPartReader.js';

let tagExp = readPiExp(parser, "?>");
if (!tagExp) throw new ParseError(
"Invalid Pi Tag expression.",
ErrorCode.INVALID_TAG,
{ line: parser.source.line, col: parser.source.cols, index: parser.source.startIndex }
);
if (!tagExp) {
throw new ParseError(
"Invalid Pi Tag expression.",
ErrorCode.INVALID_TAG,
{ line: parser.source.line, col: parser.source.cols, index: parser.source.startIndex }
)
} else if (tagExp.tagName === "xml") {
// Read version from the declaration and store it on the parser for validators.
const version = tagExp.rawAttributes?.version;
if (version === '1.1') {
parser.xmlVersion = 1.1;
} else {
parser.xmlVersion = 1.0; // default
}
}

@@ -46,0 +56,0 @@ // Flush attributes into the output builder's this.attributes accumulator