Socket
Socket
Sign inDemoInstall

fast-xml-parser

Package Overview
Dependencies
Maintainers
1
Versions
136
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

fast-xml-parser - npm Package Compare versions

Comparing version 2.8.3 to 2.9.0

5

bin/parser.js

@@ -154,5 +154,3 @@ var getAllMatches = require("./util").getAllMatches;

//var attrsRegx = new RegExp("(\\S+)=\\s*[\"']?((?:.(?![\"']?\\s+(?:\\S+)=|[>\"']))+.)[\"']?","g");
//var attrsRegx = new RegExp("(\\S+)=\\s*(['\"])((?:.(?!\\2))*.)","g");
var attrsRegx = new RegExp("(\\S+)\\s*=\\s*(['\"])(.*?)\\2","g");
var attrsRegx = new RegExp("([\\w\\-\\.\\:]+)\\s*=\\s*(['\"])(.*?)\\2","g");
function buildAttributesArr(attrStr,ignore,prefix,attrNodeName,ignoreNS,conversion){

@@ -171,2 +169,3 @@ attrStr = attrStr || attrStr.trim();

var attrName = prefix + resolveNameSpace( matches[i][1],ignoreNS);
attrsCollection[attrName] = parseValue(matches[i][3],conversion);

@@ -173,0 +172,0 @@ }

255

bin/validator.js

@@ -7,82 +7,209 @@ var util = require("./util");

xmlData = xmlData.replace(/\n/g,"");//make it single line
xmlData = xmlData.replace(/(<!\[CDATA\[.*?\]\]>)/g,"");//Remove all CDATA
xmlData = xmlData.replace(/(<!--.*?(?:-->))/g,"");//Remove all comments
if(validateAttributes(xmlData) !== true) return false;
xmlData = xmlData.replace(/(\s+(?:[\w:\-]+)\s*=\s*(['\"]).*?\2)/g,"");//Remove all attributes
xmlData = xmlData.replace(/(^\s*<\?xml\s*\?>)/g,"");//Remove XML starting tag
if(xmlData.indexOf("<![CDATA[") > 0 || xmlData.indexOf("<!--") > 0 ) return false;
var tags = util.getAllMatches(xmlData,tagsPattern);
if(tags.length === 0) return false; //non xml string
var result = checkForMatchingTag(tags,0);
xmlData = xmlData.replace(/(^\s*<\?xml.*?\?>)/g,"");//Remove XML starting tag
xmlData = xmlData.replace(/(<!DOCTYPE[\s\w\"\.\/\-\:]+(\[.*\])*\s*>)/g,"");//Remove DOCTYPE
if(result !== true) return false; else return true;
}
var tags = [];
for (var i = 0; i < xmlData.length; i++) {
if(xmlData[i] === "<"){//starting of tag
//read until you reach to '>' avoiding any '>' in attribute value
i++;
if(xmlData[i] === "!"){
i = readCommentAndCDATA(xmlData,i);
continue;
}else{
var closingTag = false;
if(xmlData[i] === "/"){//closing tag
closingTag = true;
i++;
}
//read tagname
var tagName = "";
for(;i < xmlData.length
&& xmlData[i] !== ">"
&& xmlData[i] !== " "
&& xmlData[i] !== "\t" ; i++) {
var startsWithXML = new RegExp("^[Xx][Mm][Ll]");
var startsWith = new RegExp("^([a-zA-Z]|_)[\\w\.\\-_:]*");
tagName +=xmlData[i];
}
tagName = tagName.trim();
if(tagName[tagName.length-1] === "/"){//self closing tag without attributes
tagName = tagName.substring(0,tagName.length-2);
return validateTagName(tagName);
}
if(!validateTagName(tagName)) return false;
function validateTagName(tagname){
if(util.doesMatch(tagname,startsWithXML)) return false;
else if(util.doesNotMatch(tagname,startsWith)) return false;
else return true;
}
var attrStringPattern = new RegExp("<[\\w:\\-_\.]+(.*?)\/?>","g");
var attrPattern = new RegExp("\\s+([\\w:\-]+)\\s*=\\s*(['\"])(.*?)\\2","g");
function validateAttributes(xmlData){
var attrStrings = util.getAllMatches(xmlData,attrStringPattern);
for (i=0;i<attrStrings.length;i++){
if(attrStrings[i][1].trim().length > 0 && attrStrings[i][1].trim().length < 4){ //invalid attributes
return false;
}else if(attrStrings[i][1].trim().length !== 0){
var attrsList = util.getAllMatches(attrStrings[i][1],attrPattern);
var attrNames=[];
for (j=0;j<attrsList.length;j++){
if(attrNames.hasOwnProperty(attrsList[j][1])){//duplicate attributes
return false;
var attrStr = "";
var startChar = "";
for(;i < xmlData.length ;i++){
if(xmlData[i] === '"' || xmlData[i] === "'"){
if(startChar === ""){
startChar = xmlData[i];
}else{
startChar = "";
}
}else if(xmlData[i] === ">"){
if(startChar === ""){
break;
}
}
attrStr += xmlData[i];
}
if(startChar !== "") return false;//You have forgot to close the quote
attrStr = attrStr.trim();
if(attrStr[attrStr.length-1] === "/" ){//self closing tag
attrStr = attrStr.substring(0,attrStr.length-2);
if(!validateAttributeString(attrStr)){
return false;
}else{
continue;
}
}else if(closingTag){
if(attrStr.length > 0){
return false;
//throw new Error("XML validation error: closing tag should not have any attribute");
}else{
var otg = tags.pop();
if(tagName !== otg){
return false;
//throw new Error("XML validation error: no mathicng closing tag");
}
}
}else{
attrNames[attrsList[j][1]]=1;
//validate attribute value
//if(!validateAttrValue(attrsList[3])) return false;
if(!validateAttributeString(attrStr)){
return false;
}
tags.push(tagName);
}
//skip tag text value
//It may include comments and CDATA value
for(i++;i < xmlData.length ; i++){
if(xmlData[i] === "<"){
if(xmlData[i+1] === "!"){//comment or CADATA
i++;
i = readCommentAndCDATA(xmlData,i);
continue;
}else{
break;
}
}
}//end of reading tag text value
if(xmlData[i] === "<") i--;
}
}else{
if(xmlData[i] === " " || xmlData[i] === "\t") continue;
return false;
}
}
if(tags.length > 0){
return false;
//throw new Error("XML validation error");
}
return true;
}
function checkForMatchingTag(tags,i){
if(tags.length === i) {
return true;
}else if(tags[i][0].indexOf("</") === 0) {//closing tag
return i;
}else if(tags[i][0].indexOf("/>") === tags[i][0].length-2){//Self closing tag
if(validateTagName(tags[i][0].substring(1)) === false) return -1;
return checkForMatchingTag(tags,i+1);
function readCommentAndCDATA(xmlData,i){
if(xmlData.length > i+5 && xmlData[i+1] === "-" && xmlData[i+2] === "-"){//comment
for(i+=3;i<xmlData.length;i++){
if(xmlData[i] === "-" && xmlData[i+1] === "-" && xmlData[i+2] === ">"){
i+=2;
break;
}
}
}else if( xmlData.length > i+9
&& xmlData[i+1] === "["
&& xmlData[i+2] === "C"
&& xmlData[i+3] === "D"
&& xmlData[i+4] === "A"
&& xmlData[i+5] === "T"
&& xmlData[i+6] === "A"
&& xmlData[i+7] === "["){
}else if(tags.length > i+1){
if(tags[i+1][0].indexOf("</") === 0){//next tag
if(validateTagName(tags[i][1]) === false) return -1;
if(tags[i][1] === tags[i+1][1]) {//matching with next closing tag
return checkForMatchingTag(tags,i+2);
}else {
return -1;//not matching
for(i+=8;i<xmlData.length;i++){
if(xmlData[i] === "]" && xmlData[i+1] === "]" && xmlData[i+2] === ">" ){
i+=2;
break;
}
}else
var nextIndex = checkForMatchingTag(tags,i+1);
if(nextIndex !== -1 && tags[nextIndex] !== undefined && tags[nextIndex][0].indexOf("</") === 0){
if(validateTagName(tags[i][1]) === false) return -1;
if(tags[i][1] === tags[nextIndex][1]) {
return checkForMatchingTag(tags,nextIndex+1);
}else {
return -1;//not matching
}
}
}
}
return -1;
return i;
}
//attr, ="sd", a="amit's", a="sd"b="saf",
function validateAttributeString(attrStr){
var attrNames = [];
for(var i=0; i< attrStr.length; i++){
var startChar = "";
//read attribute name
var attrName = "";
for(;i < attrStr.length && attrStr[i] !== "=" ; i++) {
attrName +=attrStr[i];
}
//validate attrName
attrName = attrName.trim();
if(!attrNames.hasOwnProperty(attrName)){
attrNames[attrName]=1;
}else{
return false;
}
if(!validateAttrName(attrName)){
return false;
}
i++;
//skip whitespaces
for(;i < attrStr.length
&& (attrStr[i] === " "
|| attrStr[i] === "\t") ; i++);
//read attribute value
startChar = attrStr[i++];
/*if(startChar !== "'" || startChar !== '"'){
return false;
}*/
var attrVal = "";
for(;i < attrStr.length && attrStr[i] !== startChar; i++) {
attrVal +=attrStr[i];
}
//validate attrVal
if(startChar !== ""){
startChar = "";
}
}
return true;
}
var validAttrRegxp = new RegExp("^[_a-zA-Z][\\w\\-\\.\\:]*$");
function validateAttrName(attrName){
return util.doesMatch(attrName,validAttrRegxp);
}
var startsWithXML = new RegExp("^[Xx][Mm][Ll]");
var startsWith = new RegExp("^([a-zA-Z]|_)[\\w\.\\-_:]*");
function validateTagName(tagname){
if(util.doesMatch(tagname,startsWithXML)) return false;
else if(util.doesNotMatch(tagname,startsWith)) return false;
else return true;
}

@@ -155,5 +155,3 @@ (function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.parser = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){

//var attrsRegx = new RegExp("(\\S+)=\\s*[\"']?((?:.(?![\"']?\\s+(?:\\S+)=|[>\"']))+.)[\"']?","g");
//var attrsRegx = new RegExp("(\\S+)=\\s*(['\"])((?:.(?!\\2))*.)","g");
var attrsRegx = new RegExp("(\\S+)\\s*=\\s*(['\"])(.*?)\\2","g");
var attrsRegx = new RegExp("([\\w\\-\\.\\:]+)\\s*=\\s*(['\"])(.*?)\\2","g");
function buildAttributesArr(attrStr,ignore,prefix,attrNodeName,ignoreNS,conversion){

@@ -172,2 +170,3 @@ attrStr = attrStr || attrStr.trim();

var attrName = prefix + resolveNameSpace( matches[i][1],ignoreNS);
attrsCollection[attrName] = parseValue(matches[i][3],conversion);

@@ -243,85 +242,212 @@ }

xmlData = xmlData.replace(/\n/g,"");//make it single line
xmlData = xmlData.replace(/(<!\[CDATA\[.*?\]\]>)/g,"");//Remove all CDATA
xmlData = xmlData.replace(/(<!--.*?(?:-->))/g,"");//Remove all comments
if(validateAttributes(xmlData) !== true) return false;
xmlData = xmlData.replace(/(\s+(?:[\w:\-]+)\s*=\s*(['\"]).*?\2)/g,"");//Remove all attributes
xmlData = xmlData.replace(/(^\s*<\?xml\s*\?>)/g,"");//Remove XML starting tag
if(xmlData.indexOf("<![CDATA[") > 0 || xmlData.indexOf("<!--") > 0 ) return false;
var tags = util.getAllMatches(xmlData,tagsPattern);
if(tags.length === 0) return false; //non xml string
var result = checkForMatchingTag(tags,0);
xmlData = xmlData.replace(/(^\s*<\?xml.*?\?>)/g,"");//Remove XML starting tag
xmlData = xmlData.replace(/(<!DOCTYPE[\s\w\"\.\/\-\:]+(\[.*\])*\s*>)/g,"");//Remove DOCTYPE
if(result !== true) return false; else return true;
}
var tags = [];
for (var i = 0; i < xmlData.length; i++) {
if(xmlData[i] === "<"){//starting of tag
//read until you reach to '>' avoiding any '>' in attribute value
i++;
if(xmlData[i] === "!"){
i = readCommentAndCDATA(xmlData,i);
continue;
}else{
var closingTag = false;
if(xmlData[i] === "/"){//closing tag
closingTag = true;
i++;
}
//read tagname
var tagName = "";
for(;i < xmlData.length
&& xmlData[i] !== ">"
&& xmlData[i] !== " "
&& xmlData[i] !== "\t" ; i++) {
var startsWithXML = new RegExp("^[Xx][Mm][Ll]");
var startsWith = new RegExp("^([a-zA-Z]|_)[\\w\.\\-_:]*");
tagName +=xmlData[i];
}
tagName = tagName.trim();
if(tagName[tagName.length-1] === "/"){//self closing tag without attributes
tagName = tagName.substring(0,tagName.length-2);
return validateTagName(tagName);
}
if(!validateTagName(tagName)) return false;
function validateTagName(tagname){
if(util.doesMatch(tagname,startsWithXML)) return false;
else if(util.doesNotMatch(tagname,startsWith)) return false;
else return true;
}
var attrStringPattern = new RegExp("<[\\w:\\-_\.]+(.*?)\/?>","g");
var attrPattern = new RegExp("\\s+([\\w:\-]+)\\s*=\\s*(['\"])(.*?)\\2","g");
function validateAttributes(xmlData){
var attrStrings = util.getAllMatches(xmlData,attrStringPattern);
for (i=0;i<attrStrings.length;i++){
if(attrStrings[i][1].trim().length > 0 && attrStrings[i][1].trim().length < 4){ //invalid attributes
return false;
}else if(attrStrings[i][1].trim().length !== 0){
var attrsList = util.getAllMatches(attrStrings[i][1],attrPattern);
var attrNames=[];
for (j=0;j<attrsList.length;j++){
if(attrNames.hasOwnProperty(attrsList[j][1])){//duplicate attributes
return false;
var attrStr = "";
var startChar = "";
for(;i < xmlData.length ;i++){
if(xmlData[i] === '"' || xmlData[i] === "'"){
if(startChar === ""){
startChar = xmlData[i];
}else{
startChar = "";
}
}else if(xmlData[i] === ">"){
if(startChar === ""){
break;
}
}
attrStr += xmlData[i];
}
if(startChar !== "") return false;//You have forgot to close the quote
attrStr = attrStr.trim();
if(attrStr[attrStr.length-1] === "/" ){//self closing tag
attrStr = attrStr.substring(0,attrStr.length-2);
if(!validateAttributeString(attrStr)){
return false;
}else{
continue;
}
}else if(closingTag){
if(attrStr.length > 0){
return false;
//throw new Error("XML validation error: closing tag should not have any attribute");
}else{
var otg = tags.pop();
if(tagName !== otg){
return false;
//throw new Error("XML validation error: no mathicng closing tag");
}
}
}else{
attrNames[attrsList[j][1]]=1;
//validate attribute value
//if(!validateAttrValue(attrsList[3])) return false;
if(!validateAttributeString(attrStr)){
return false;
}
tags.push(tagName);
}
//skip tag text value
//It may include comments and CDATA value
for(i++;i < xmlData.length ; i++){
if(xmlData[i] === "<"){
if(xmlData[i+1] === "!"){//comment or CADATA
i++;
i = readCommentAndCDATA(xmlData,i);
continue;
}else{
break;
}
}
}//end of reading tag text value
if(xmlData[i] === "<") i--;
}
}else{
if(xmlData[i] === " " || xmlData[i] === "\t") continue;
return false;
}
}
if(tags.length > 0){
return false;
//throw new Error("XML validation error");
}
return true;
}
function checkForMatchingTag(tags,i){
if(tags.length === i) {
return true;
}else if(tags[i][0].indexOf("</") === 0) {//closing tag
return i;
}else if(tags[i][0].indexOf("/>") === tags[i][0].length-2){//Self closing tag
if(validateTagName(tags[i][0].substring(1)) === false) return -1;
return checkForMatchingTag(tags,i+1);
function readCommentAndCDATA(xmlData,i){
if(xmlData.length > i+5 && xmlData[i+1] === "-" && xmlData[i+2] === "-"){//comment
for(i+=3;i<xmlData.length;i++){
if(xmlData[i] === "-" && xmlData[i+1] === "-" && xmlData[i+2] === ">"){
i+=2;
break;
}
}
}else if( xmlData.length > i+9
&& xmlData[i+1] === "["
&& xmlData[i+2] === "C"
&& xmlData[i+3] === "D"
&& xmlData[i+4] === "A"
&& xmlData[i+5] === "T"
&& xmlData[i+6] === "A"
&& xmlData[i+7] === "["){
}else if(tags.length > i+1){
if(tags[i+1][0].indexOf("</") === 0){//next tag
if(validateTagName(tags[i][1]) === false) return -1;
if(tags[i][1] === tags[i+1][1]) {//matching with next closing tag
return checkForMatchingTag(tags,i+2);
}else {
return -1;//not matching
for(i+=8;i<xmlData.length;i++){
if(xmlData[i] === "]" && xmlData[i+1] === "]" && xmlData[i+2] === ">" ){
i+=2;
break;
}
}else
var nextIndex = checkForMatchingTag(tags,i+1);
if(nextIndex !== -1 && tags[nextIndex] !== undefined && tags[nextIndex][0].indexOf("</") === 0){
if(validateTagName(tags[i][1]) === false) return -1;
if(tags[i][1] === tags[nextIndex][1]) {
return checkForMatchingTag(tags,nextIndex+1);
}else {
return -1;//not matching
}
}
}
}
return -1;
return i;
}
//attr, ="sd", a="amit's", a="sd"b="saf",
function validateAttributeString(attrStr){
var attrNames = [];
for(var i=0; i< attrStr.length; i++){
var startChar = "";
//read attribute name
var attrName = "";
for(;i < attrStr.length && attrStr[i] !== "=" ; i++) {
attrName +=attrStr[i];
}
//validate attrName
attrName = attrName.trim();
if(!attrNames.hasOwnProperty(attrName)){
attrNames[attrName]=1;
}else{
return false;
}
if(!validateAttrName(attrName)){
return false;
}
i++;
//skip whitespaces
for(;i < attrStr.length
&& (attrStr[i] === " "
|| attrStr[i] === "\t") ; i++);
//read attribute value
startChar = attrStr[i++];
/*if(startChar !== "'" || startChar !== '"'){
return false;
}*/
var attrVal = "";
for(;i < attrStr.length && attrStr[i] !== startChar; i++) {
attrVal +=attrStr[i];
}
//validate attrVal
if(startChar !== ""){
startChar = "";
}
}
return true;
}
var validAttrRegxp = new RegExp("^[_a-zA-Z][\\w\\-\\.\\:]*$");
function validateAttrName(attrName){
return util.doesMatch(attrName,validAttrRegxp);
}
var startsWithXML = new RegExp("^[Xx][Mm][Ll]");
var startsWith = new RegExp("^([a-zA-Z]|_)[\\w\.\\-_:]*");
function validateTagName(tagname){
if(util.doesMatch(tagname,startsWithXML)) return false;
else if(util.doesNotMatch(tagname,startsWith)) return false;
else return true;
}
},{"./util":2}]},{},[1])(1)
});
{
"name": "fast-xml-parser",
"version": "2.8.3",
"version": "2.9.0",
"description": "Validate XML or Parse XML to JS/JSON very fast without C/C++ based libraries",

@@ -5,0 +5,0 @@ "main": "./bin/parser.js",

@@ -91,3 +91,3 @@ # [fast-xml-parser](https://www.npmjs.com/package/fast-xml-parser)

* Parser doesn't check if the XML is valid or not. If the XML is not valid you may get invalid result. So you can call the validator function first to check the structure.
* This is based on JS regular expression engine. So due to it's limitation fast-xml-parser face performance issue when it process XML string(data) which is very large like 10mb or more. (I'll look into this as soon as I get some free time)
* This is based on JS regular expression engine. So due to it's limitation fast-xml-parser face performance issue when it process XML string(data) which is very large like 10mb or more. (I'll look into this as soon as I get some free time). **UPDATE**: from v2.9.0, I have rewritten the validator code. So that validator can handle large files as well. I have tested it up to 98mb xml file. I have some more ideas to increase the speed. And I'll work whenever I get the time.

@@ -94,0 +94,0 @@ Report an issue or request for a feature [here](https://github.com/NaturalIntelligence/fast-xml-parser/issues)

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc