Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

fast-xml-parser

Package Overview
Dependencies
Maintainers
1
Versions
136
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

fast-xml-parser - npm Package Compare versions

Comparing version 2.9.4 to 3.0.0

src/xmlNode.js

14

cli.js

@@ -19,6 +19,5 @@ #!/usr/bin/env node

ignoreNameSpace : true,
ignoreNonTextNodeAttr : false,
ignoreTextNodeAttr : false,
textNodeConversion : true,
textAttrConversion : true
ignoreAttributes : false,
parseNodeValue : true,
parseAttributeValue : true
};

@@ -31,7 +30,6 @@ var fileName = "";

}else if(process.argv[i] === "-a"){
options.ignoreNonTextNodeAttr = true;
options.ignoreTextNodeAttr = true;
options.ignoreAttributes = true;
}else if(process.argv[i] === "-c"){
options.textNodeConversion = false;
options.textAttrConversion = false;
options.parseNodeValue = false;
options.parseAttributeValue = false;
}else if(process.argv[i] === "-o"){

@@ -38,0 +36,0 @@ outputFileName = process.argv[++i];

{
"name": "fast-xml-parser",
"version": "2.9.4",
"version": "3.0.0",
"description": "Validate XML or Parse XML to JS/JSON very fast without C/C++ based libraries",

@@ -36,3 +36,4 @@ "main": "./src/parser.js",

"assert",
"arrayMode"
"arrayMode",
"big"
],

@@ -69,3 +70,4 @@ "author": "Amit Gupta (https://github.com/amitguptagwl)",

"portfinder": "^1.0.13",
"zombie": "^5.0.7"
"zombie": "^5.0.7",
"xml2js": "^0.4.19"
},

@@ -72,0 +74,0 @@ "dependencies": {

# [fast-xml-parser](https://www.npmjs.com/package/fast-xml-parser)
Validate XML or Parse XML to JS/JSON very fast without C/C++ based libraries and no callback
<p style="color:red;"> **Note**: If you are using v3, your code may start failing in parsing and validation both. I apologize for the breaking changes. But code was supposed to be changed to support large files and many other options. Please refer the code example below for more detail.</p>
You can use this library online (press try me button above), or as command from CLI, or in your website, or in npm repo.

@@ -43,3 +44,3 @@

// when a tag has attributes
/* upto 2.9.x
var options = {

@@ -57,2 +58,16 @@ attrPrefix : "@_",

};
*/
//from 3.0.0
var options = {
attributeNamePrefix : "@_",
attrNodeName: false,
textNodeName : "#text",
ignoreAttributes : true,
ignoreNameSpace : false,
allowBooleanAttributes : false,
parseNodeValue : true,
parseAttributeValue : false,
trimValues: true,
decodeHTMLchar: false,
};
if(fastXmlParser.validate(xmlData)=== true){//optional

@@ -64,17 +79,17 @@ var jsonObj = fastXmlParser.parse(xmlData,options);

var tObj = fastXmlParser.getTraversalObj(xmlData,options);
var jsonObj = fastXmlParser.convertToJson(tObj);
var jsonObj = fastXmlParser.convertToJson(tObj,options);
```
**OPTIONS** :
* **attributeNamePrefix** : prepend given string to attribute name for identification
* **attrNodeName**: (Valid name) Group all the attributes as properties of given name.
* **ignoreNonTextNodeAttr** : Ignore attributes of non-text node.
* **ignoreTextNodeAttr** : Ignore attributes for text node
* **ignoreAttributes** : Ignore attributes to be parsed.
* **ignoreNameSpace** : Remove namespace string from tag and attribute names.
* **ignoreRootElement** : Remove root element from parsed JSON.
* **textNodeConversion** : Parse the value of text node to float or integer.
* **textAttrConversion** : Parse the value of an attribute to float or integer.
* **arrayMode** : Put the value(s) of a tag or attribute in an array.
* **allowBooleanAttributes** : a tag can have attributes without any value
* **parseNodeValue** : Parse the value of text node to float, integer, or boolean.
* **parseAttributeValue** : Parse the value of an attribute to float, integer, or boolean.
* **trimValues** : trim string values of an attribute or node
* **decodeHTMLchar** : decodes any named and numerical character HTML references excluding CDATA part.
To use from command line

@@ -94,3 +109,4 @@ ```bash

```js
var isValid = parser.validate(xmlData);
var result = parser.validate(xmlData);
if(result !== true) cnosole.log(result.err);
var jsonObj = parser.parse(xmlData);

@@ -115,29 +131,47 @@ ```

### Benchmark report
![npm_xml2json_compare](https://cloud.githubusercontent.com/assets/7692328/22402086/7526a3a6-e5e2-11e6-8e6b-301691725c21.png)
Don't forget to check the performance report on [comparejs](https://naturalintelligence.github.io/comparejs/?q=xml2json).
| file size | fxp 3.0 validator (rps) | fxp 3.0 parser (rps) | xml2js 0.4.19 (rps) |
| ---------- | ----------------------- | ------------------- | ------------------- |
| 1.5k | 16581.06758 | 14032.09323 | 4615.930805 |
| 1.5m | 14918.47793 | 13.23366098 | 5.90682005 |
| 13m | 1.834479235 | 1.135582008 | -1 |
| 1.3k with CDATA | 30583.35319 | 43160.52342 | 8398.556349 |
| 1.3m with CDATA | 27.29266471 | 52.68877009 | 7.966000795 |
| 1.6k with cdata,prolog,doctype | 27690.26082 | 41433.98547 | 7872.399268 |
| 98m | 0.08473858148 | 0.2600104004 | -1 |
**validator benchmark: 21000 tps**
* -1 indicates error or incorrect output.
### Limitation
* Parser doesn't check if the XML is valid or not. If the XML is not valid you may get invalid result. So you can call the validator function first to check the structure.
* This is based on JS regular expression engine. So due to it's limitation fast-xml-parser face performance issue when it process XML string(data) which is very large like 10mb or more. (I'll look into this as soon as I get some free time). **UPDATE**: from v2.9.0, I have rewritten the validator code. So that validator can handle large files as well. I have tested it up to 98mb xml file. I have some more ideas to increase the speed. And I'll work whenever I get the time.
![npm_xml2json_compare](static/img/fxpv3-vs-xml2jsv0419_chart.png)
Report an issue or request for a feature [here](https://github.com/NaturalIntelligence/fast-xml-parser/issues)
![npm_xml2json_compare](static/img/fxp-validatorv3.png)
Your contribution in terms of donation, testing, bug fixes, code development etc. can help me to write fast algorithms.
[<img src="https://www.paypalobjects.com/webstatic/en_US/btn/btn_donate_92x26.png" alt="Stubmatic donate button"/>](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=KQJAX48SPUKNC)
# Changes from v3
**Give me a [star](https://github.com/NaturalIntelligence/fast-xml-parser)**, if you really like this project.
* It can handle big file now (I have tested up to 98mb). Performance report is given above.
* Meaningful error messages from validator
**Fund collected (since the starting of the project)** : $0
```
"err": {
"code": "InvalidAttr",
"msg": "Attributes for rootNode have open quote"
}
```
* Updated options : check snippet aboove
* Parse boolean values as well. E.g. `"true"` to `true`
* You can set pasrer not to *trim* whitespaces from attribute or tag /node value.
* You can set pasrer to HTML decode Tag / node and attribute values. However CDATA value will not be HTML decoded.
* Tag / node value will not be parsed if CDATA presents.
* You can set validator and parser to allow boolean values.
* Few validation and parsing bugs are also fixed
Some of my other NPM pojects
- [stubmatic](https://github.com/NaturalIntelligence/Stubmatic) : A stub server to mock behaviour of HTTP(s) / REST / SOAP services. Stubbing redis is on the way.
- [compare js](https://github.com/NaturalIntelligence/comparejs) : compare the features of JS code, libraries, and NPM repos.
- [fast-lorem-ipsum](https://github.com/amitguptagwl/fast-lorem-ipsum) : Generate lorem ipsum words, sentences, paragraph very quickly.
- [fast-lorem-ipsum](https://github.com/amitguptagwl/fast-lorem-ipsum) : Generate lorem ipsum words, sentences, paragraph very quickly.
### TODO
* P2: parser online demo with more options
* P2: validating XML stream data
* P2: validator cli
* P2: fast XML prettyfier

@@ -0,31 +1,26 @@

var util = require("./util");
var xmlNode = require("./xmlNode");
var he = require("he");
var getAllMatches = require("./util").getAllMatches;
var TagType = {"OPENING":1, "CLOSING":2, "SELF":3, "CDATA": 4};
var xmlNode = function(tagname,parent,val){
this.tagname = tagname;
this.parent = parent;
this.child = [];
this.val = val;
this.addChild = function (child){
this.child.push(child);
};
};
//var tagsRegx = new RegExp("<(\\/?[\\w:\\-\._]+)([^>]*)>(\\s*"+cdataRegx+")*([^<]+)?","g");
//var tagsRegx = new RegExp("<(\\/?)((\\w*:)?([\\w:\\-\._]+))([^>]*)>([^<]*)("+cdataRegx+"([^<]*))*([^<]+)?","g");
//var tagsRegx = new RegExp("<(\\/?[a-zA-Z0-9_:]+)([^>\\/]*)(\\/?)>([^<]+)?","g");
//var tagsRegx = new RegExp("<(\\/?[\\w:-]+)([^>]*)>([^<]+)?","g");
//var cdataRegx = "<!\\[CDATA\\[([^\\]\\]]*)\\]\\]>";
var cdataRegx = "<!\\[CDATA\\[(.*?)(\\]\\]>)";
var tagsRegx = new RegExp("<(\\/?[\\w:\\-\._]+)([^>]*)>(\\s*"+cdataRegx+")*([^<]+)?","g");
//treat cdata as a tag
var defaultOptions = {
attrPrefix : "@_",
attributeNamePrefix : "@_",
attrNodeName: false,
textNodeName : "#text",
ignoreNonTextNodeAttr : true,
ignoreTextNodeAttr : true,
ignoreAttributes : true,
ignoreNameSpace : false,
ignoreRootElement : false,
textNodeConversion : true,
textAttrConversion : false,
arrayMode : false
allowBooleanAttributes : false, //a tag can have attributes without any value
//ignoreRootElement : false,
parseNodeValue : true,
parseAttributeValue : false,
arrayMode : false,
trimValues: true, //Trim string values of tag and attributes
decodeHTMLchar: false,
//decodeStrict: false,
};

@@ -35,5 +30,15 @@

if(!options) options = {};
var props = ["attrPrefix","attrNodeName","ignoreNonTextNodeAttr","ignoreTextNodeAttr","ignoreNameSpace","ignoreRootElement","textNodeName","textNodeConversion","textAttrConversion","arrayMode"];
for (var i = 0; i < props.length; i++) {
if(options[props[i]] === undefined){
var props = ["attributeNamePrefix",
"attrNodeName",
"ignoreAttributes",
"ignoreNameSpace",
"textNodeName",
"parseNodeValue",
"parseAttributeValue",
"arrayMode",
"trimValues",
];
var len = props.length;
for (var i = 0; i < len; i++) {
if(typeof options[props[i]] === "undefined"){
options[props[i]] = defaultOptions[props[i]];

@@ -47,75 +52,85 @@ }

options = buildOptions(options);
//xmlData = xmlData.replace(/>(\s+)/g, ">");//Remove spaces and make it single line.
xmlData = xmlData.replace(/<!--(.|\n)*?-->/g, "");//Remove single and multiline comments
var tags = getAllMatches(xmlData,tagsRegx);
//console.log(tags);
xmlData = xmlData.replace(/\r?\n/g, " ");//make it single line
xmlData = xmlData.replace(/<!--.*?-->/g, "");//Remove comments
var xmlObj = new xmlNode('!xml');
var currentNode = xmlObj;
for (var i = 0; i < tags.length ; i++) {
var tag = resolveNameSpace(tags[i][1],options.ignoreNameSpace),
nexttag = i+1 < tags.length ? resolveNameSpace(tags[i+1][1],options.ignoreNameSpace) : undefined,
attrsStr = tags[i][2], attrs,
val = tags[i][4] === undefined ? tags[i][6] : simplifyCDATA(tags[i][0]);
if(tag.indexOf("/") === 0){//ending tag
currentNode = currentNode.parent;
continue;
}
var tagsRegx = new RegExp("<((!\\[CDATA\\[(.*?)(\\]\\]>))|((\\w*:)?([\\w:\\-\\._]+))([^>]*)>|((\\/)((\\w*:)?([\\w:\\-\\._]+))>))([^<]*)","g");
var tag = tagsRegx.exec(xmlData);
var nextTag = tagsRegx.exec(xmlData);
var previousMatch,nextMatch;
while (tag) {
var tagType = checkForTagType(tag);
var selfClosingTag = attrsStr.charAt(attrsStr.length-1) === '/';
var childNode = new xmlNode(tag,currentNode);
if(tagType === TagType.CLOSING){
//add parsed data to parent node
if(currentNode.parent && tag[14]){
currentNode.parent.val = util.getValue(currentNode.parent.val) + "" + processTagValue(tag[14],options);
}
if(selfClosingTag){
attrs = buildAttributesArr(attrsStr,options.ignoreTextNodeAttr,options.attrPrefix,options.attrNodeName,options.ignoreNameSpace,options.textAttrConversion);
childNode.val = attrs || "";
currentNode.addChild(childNode);
}else if( ("/" + tag) === nexttag){ //Text node
attrs = buildAttributesArr(attrsStr,options.ignoreTextNodeAttr,options.attrPrefix,options.attrNodeName,options.ignoreNameSpace,options.textAttrConversion);
val = parseValue(val,options.textNodeConversion);
if(attrs){
attrs[options.textNodeName] = val;
childNode.val = attrs;
}else{
childNode.val = val;
currentNode = currentNode.parent;
}else if(tagType === TagType.CDATA){
//no attribute
//add text to parent node
//add parsed data to parent node
currentNode.val = (currentNode.val || "") + (tag[3] || "") + processTagValue(tag[14],options);
}else if(tagType === TagType.SELF){
var childNode = new xmlNode( options.ignoreNameSpace ? tag[7] : tag[5],currentNode, "");
if(tag[8] && tag[8].length > 1){
tag[8] = tag[8].substr(0,tag[8].length -1);
}
childNode.attrsMap = buildAttributesMap(tag[8],options);
currentNode.addChild(childNode);
i++;
}else if( (nexttag && nexttag.indexOf("/") === -1) && (val !== undefined && val != null && val.trim() !== "" )){ //Text node with sub nodes
val = parseValue(val,options.textNodeConversion);
childNode.addChild(new xmlNode(options.textNodeName,childNode,val));
}else{//TagType.OPENING
var childNode = new xmlNode( options.ignoreNameSpace ? tag[7] : tag[5],currentNode,processTagValue(tag[14],options));
childNode.attrsMap = buildAttributesMap(tag[8],options);
currentNode.addChild(childNode);
currentNode = childNode;
}else{//starting tag
attrs = buildAttributesArr(attrsStr,options.ignoreNonTextNodeAttr,options.attrPrefix,options.attrNodeName,options.ignoreNameSpace,options.textAttrConversion);
if(attrs){
for (var prop in attrs) {
if(attrs.hasOwnProperty(prop)){
childNode.addChild(new xmlNode(prop,childNode,attrs[prop]));
}
}
}
currentNode.addChild(childNode);
currentNode = childNode;
}
tag = nextTag;
nextTag = tagsRegx.exec(xmlData);
}
return xmlObj;
};
var xml2json = function (xmlData,options){
return convertToJson(getTraversalObj(xmlData,options), buildOptions(options).arrayMode);
};
function processTagValue(val,options){
if(val){
if(options.trimValues){
val = val.trim();
}
if(options.decodeHTMLchar){
val = he.decode(val);
}
val = parseValue(val,options.parseNodeValue);
}
var cdRegx = new RegExp(cdataRegx,"g");
return val;
}
function simplifyCDATA(cdata){
var result = getAllMatches(cdata,cdRegx);
var val = "";
for (var i = 0; i < result.length ; i++) {
val+=result[i][1];
function checkForTagType(match){
if(match[4] === "]]>"){
return TagType.CDATA;
}else if(match[10] === "/"){
return TagType.CLOSING;
}else if(typeof match[8] !== "undefined" && match[8].substr(match[8].length-1) === "/"){
return TagType.SELF;
}else{
return TagType.OPENING;
}
return val;
}
function resolveNameSpace(tagname,ignore){
if(ignore){
var fakeCall = function(a) {return a;}
var fakeCallNoReturn = function() {}
var xml2json = function (xmlData,options){
options = buildOptions(options);
return convertToJson(getTraversalObj(xmlData,options), options.textNodeName, options.arrayMode);
};
function resolveNameSpace(tagname,options){
if(options.ignoreNameSpace ){
var tags = tagname.split(":");

@@ -131,44 +146,48 @@ var prefix = tagname.charAt(0) === "/" ? "/" : "";

return tagname;
}
function parseValue(val,conversion,isAttribute){
if(val){
if(!conversion || isNaN(val)){
val = "" + he.decode(val, {isAttributeValue:isAttribute, strict:true});
if(isAttribute) {
val = val.replace(/\r?\n/g, " ");
}
function parseValue(val,shouldParse){
if(shouldParse && typeof val === "string"){
if(val.trim() === "" || isNaN(val)){
val = val === "true" ? true : val === "false" ? false : val;
}else{
if(val.indexOf(".") !== -1){
if(parseFloat){
val = parseFloat(val);
}else{
val = Number.parseFloat(val);
}
val = Number.parseFloat(val);
}else{
if(parseInt){
val = parseInt(val,10);
}else{
val = Number.parseInt(val,10);
}
val = Number.parseInt(val,10);
}
}
return val;
}else{
val = "";
if(util.isExist(val)) return val;
else return "";
}
return val;
}
var attrsRegx = new RegExp("([\\w\\-\\.\\:]+)\\s*=\\s*(['\"])((.|\n)*?)\\2","gm");
function buildAttributesArr(attrStr,ignore,prefix,attrNodeName,ignoreNS,conversion){
attrStr = attrStr || attrStr.trim();
if(!ignore && attrStr.length > 3){
var matches = getAllMatches(attrStr,attrsRegx);
//TODO: change regex to capture NS
//var attrsRegx = new RegExp("([\\w\\-\\.\\:]+)\\s*=\\s*(['\"])((.|\n)*?)\\2","gm");
var attrsRegx = new RegExp("([^\\s=]+)\\s*(=\\s*(['\"])(.*?)\\3)?","g");
function buildAttributesMap(attrStr,options){
if( !options.ignoreAttributes && typeof attrStr === "string" ){
//attrStr = attrStr || attrStr.trim();
var matches = util.getAllMatches(attrStr,attrsRegx);
var len = matches.length; //don't make it inline
var attrs = {};
for (var i = 0; i < matches.length; i++) {
var attrName = resolveNameSpace(matches[i][1],ignoreNS);
for (var i = 0; i < len ; i++) {
var attrName = resolveNameSpace(matches[i][1],options);
if(attrName.length && attrName !== "xmlns") {
attrs[prefix + attrName] = parseValue(matches[i][3], conversion, true);
if(matches[i][4]){
if(options.trimValues){
matches[i][4] = matches[i][4].trim();
}
if(options.decodeHTMLchar){
matches[i][4] = he.decode(matches[i][4], {isAttributeValue : true});
}
attrs[options.attributeNamePrefix + attrName] = parseValue(matches[i][4],options.parseAttributeValue);
}else if(options.allowBooleanAttributes){
attrs[options.attributeNamePrefix + attrName] = true;
}
}

@@ -179,5 +198,5 @@ }

}
if(attrNodeName){
if(options.attrNodeName){
var attrCollection = {};
attrCollection[attrNodeName] = attrs;
attrCollection[options.attrNodeName] = attrs;
return attrCollection;

@@ -189,25 +208,38 @@ }

var convertToJson = function (node, arrayMode){
var convertToJson = function (node, textNodeName,arrayMode){
var jObj = {};
if(node.val !== undefined && node.val != null || node.val === "") {
return node.val;
//traver through all the children
for (var index = 0; index < node.child.length; index++) {
var prop = node.child[index].tagname;
var obj = convertToJson(node.child[index],textNodeName, arrayMode);
if(typeof jObj[prop] !== "undefined"){
if(!Array.isArray(jObj[prop])){
var swap = jObj[prop];
jObj[prop] = [];
jObj[prop].push(swap);
}
jObj[prop].push(obj);
}else{
jObj[prop] = arrayMode ? [obj] : obj;
}
}
util.merge(jObj,node.attrsMap);
//add attrsMap as new children
if(util.isEmptyObject(jObj)){
return util.isExist(node.val)? node.val : "";
}else{
for (var index = 0; index < node.child.length; index++) {
var prop = node.child[index].tagname;
var obj = convertToJson(node.child[index], arrayMode);
if(jObj[prop] !== undefined){
if(!Array.isArray(jObj[prop])){
var swap = jObj[prop];
jObj[prop] = [];
jObj[prop].push(swap);
}
jObj[prop].push(obj);
}else{
jObj[prop] = arrayMode ? [obj] : obj;
if(util.isExist(node.val)){
if(!(typeof node.val === "string" && node.val === "")){
jObj[textNodeName] = node.val;
}
}
}
//add value
return jObj;
};
exports.parse = xml2json;

@@ -214,0 +246,0 @@ exports.getTraversalObj = getTraversalObj;

@@ -5,4 +5,5 @@ var getAllMatches = function(string, regex) {

while (match) {
var allmatches = [];
for (var index = 0; index < match.length; index++) {
var allmatches = [];
var len = match.length;
for (var index = 0; index < len; index++) {
allmatches.push(match[index]);

@@ -19,3 +20,3 @@ }

var match = regex.exec(string);
if(match === null || match === undefined) return false;
if(match === null || typeof match === "undefined") return false;
else return true;

@@ -28,4 +29,39 @@ }

exports.isExist= function(v){
return typeof v !== "undefined";
}
exports.isEmptyObject= function(obj) {
return Object.keys(obj).length === 0
}
/**
* Copy all the properties of a into b.
* @param {*} target
* @param {*} a
*/
exports.merge =function (target,a){
if(a){
var keys = Object.keys(a) // will return an array of own properties
var len = keys.length; //don't make it inline
for(var i = 0; i < len; i++){
target[keys[i]] = a[keys[i]] ;
}
}
}
/* exports.merge =function (b,a){
return Object.assign(b,a);
} */
exports.getValue = function (v){
if(exports.isExist(v)){
return v;
}else{
return "";
}
}
exports.doesMatch = doesMatch
exports.doesNotMatch = doesNotMatch
exports.getAllMatches = getAllMatches;
var util = require("./util");
var tagsPattern = new RegExp("<\\/?([\\w:\\-_\.]+)\\s*\/?>","g");
exports.validate = function(xmlData){
xmlData = xmlData.replace(/(\r\n|\n|\r)/gm,"");//make it single line
xmlData = xmlData.replace(/(^\s*<\?xml.*?\?>)/g,"");//Remove XML starting tag
xmlData = xmlData.replace(/(<!DOCTYPE[\s\w\"\.\/\-\:]+(\[.*\])*\s*>)/g,"");//Remove DOCTYPE
var defaultOptions = {
allowBooleanAttributes : false, //A tag can have attributes without any value
};
var buildOptions = function (options){
if(!options) options = {};
var props = ["allowBooleanAttributes"];
for (var i = 0; i < props.length; i++) {
if(options[props[i]] === undefined){
options[props[i]] = defaultOptions[props[i]];
}
}
return options;
};
//var tagsPattern = new RegExp("<\\/?([\\w:\\-_\.]+)\\s*\/?>","g");
exports.validate = function(xmlData, options){
options = buildOptions(options);
//xmlData = xmlData.replace(/(\r\n|\n|\r)/gm,"");//make it single line
//xmlData = xmlData.replace(/(^\s*<\?xml.*?\?>)/g,"");//Remove XML starting tag
//xmlData = xmlData.replace(/(<!DOCTYPE[\s\w\"\.\/\-\:]+(\[.*\])*\s*>)/g,"");//Remove DOCTYPE
var tags = [];
var tagFound = false;
for (var i = 0; i < xmlData.length; i++) {

@@ -17,4 +36,15 @@

i++;
if(xmlData[i] === "!"){
if(xmlData[i] === "?"){
if(i !== 1){
return {err : { code : "InvalidXml", msg : "XML declaration allowed only at the start of the document."}};
}else{
//read until ?> is found
for(;i<xmlData.length;i++){
if(xmlData[i] == "?" && xmlData[i+1] == ">"){
i++;
break;
}
}
}
}else if(xmlData[i] === "!"){
i = readCommentAndCDATA(xmlData,i);

@@ -41,53 +71,40 @@ continue;

if(tagName[tagName.length-1] === "/"){//self closing tag without attributes
tagName = tagName.substring(0,tagName.length-2);
return validateTagName(tagName);
tagName = tagName.substring(0,tagName.length-1);
continue;
}
if(!validateTagName(tagName)) return false;
if(!validateTagName(tagName))
return { err: { code:"InvalidTag",msg:"Tag " + tagName + " is an invalid name."}};
var attrStr = "";
var startChar = "";
for(;i < xmlData.length ;i++){
if(xmlData[i] === '"' || xmlData[i] === "'"){
if(startChar === ""){
startChar = xmlData[i];
}else{
startChar = "";
}
}else if(xmlData[i] === ">"){
if(startChar === ""){
break;
}
}
attrStr += xmlData[i];
var result = readAttributeStr(xmlData,i);
if(result === false) {
return { err: { code:"InvalidAttr",msg:"Attributes for " + tagName + " have open quote"}};
}
if(startChar !== "") return false;//Unclosed quote
attrStr = attrStr.trim();
//console.log(attrStr, attrStr);
var attrStr = result.value;
i = result.index;
if(attrStr[attrStr.length-1] === "/" ){//self closing tag
attrStr = attrStr.substring(0,attrStr.length-2);
if(!validateAttributeString(attrStr)){
return false;
attrStr = attrStr.substring(0,attrStr.length-1);
var isValid = validateAttributeString(attrStr, options);
if(isValid === true){
tagFound = true;
continue;
}else{
continue;
return isValid;
}
}else if(closingTag){
if(attrStr.length > 0){
return false;
//throw new Error("XML validation error: closing tag should not have any attribute");
if(attrStr.trim().length > 0){
return { err: { code:"InvalidTag",msg:"closing tag " + tagName + " can't have attributes or invalid starting."}};
}else{
var otg = tags.pop();
if(tagName !== otg){
return false;
//throw new Error("XML validation error: no mathicng closing tag");
return { err: { code:"InvalidTag",msg:"closing tag " + otg + " is expected inplace of "+tagName+"."}};
}
}
}else{
if(!validateAttributeString(attrStr)){
return false;
var isValid = validateAttributeString(attrStr, options);
if(isValid !== true ){
return isValid;
}
tags.push(tagName);
tags.push(tagName); tagFound = true;
}

@@ -112,4 +129,4 @@

if(xmlData[i] === " " || xmlData[i] === "\t") continue;
return false;
if(xmlData[i] === " " || xmlData[i] === "\t" || xmlData[i] === "\n" || xmlData[i] === "\r") continue;
return { err: { code:"InvalidChar",msg:"char " + xmlData[i] +" is not expected ."}};
}

@@ -119,5 +136,6 @@ }

if(tags.length > 0){
return false;
//throw new Error("XML validation error");
if(!tagFound){
return {err : { code : "InvalidXml", msg : "Start tag expected."}};
}else if(tags.length > 0){
return { err: { code:"InvalidXml",msg:"Invalid " + JSON.stringify(tags,null,4).replace(/\r?\n/g,"") +" found."}};
}

@@ -136,2 +154,18 @@

}
}else if( xmlData.length > i+8
&& xmlData[i+1] === "D"
&& xmlData[i+2] === "O"
&& xmlData[i+3] === "C"
&& xmlData[i+4] === "T"
&& xmlData[i+5] === "Y"
&& xmlData[i+6] === "P"
&& xmlData[i+7] === "E"){
var angleBracketsCount = 1;
for(i+=8;i<xmlData.length;i++){
if(xmlData[i] == "<") {angleBracketsCount++;}
else if(xmlData[i] == ">") {
angleBracketsCount--;
if(angleBracketsCount === 0) break;
}
}
}else if( xmlData.length > i+9

@@ -156,52 +190,70 @@ && xmlData[i+1] === "["

}
//attr, ="sd", a="amit's", a="sd"b="saf",
function validateAttributeString(attrStr){
var attrNames = [];
for(var i=0; i< attrStr.length; i++){
var startChar = "";
//read attribute name
var attrName = "";
for(;i < attrStr.length && attrStr[i] !== "=" ; i++) {
attrName +=attrStr[i];
/**
* Keep reading xmlData until '<' is found outside the attribute value.
* @param {string} xmlData
* @param {number} i
*/
function readAttributeStr(xmlData,i){
var attrStr = "";
var startChar = "";
for(;i < xmlData.length ;i++){
if(xmlData[i] === '"' || xmlData[i] === "'"){
if(startChar === ""){
startChar = xmlData[i];
}else{
startChar = "";
}
}else if(xmlData[i] === ">"){
if(startChar === ""){
break;
}
}
//validate attrName
attrName = attrName.trim();
attrStr += xmlData[i];
}
if(startChar !== "") return false;
return { value: attrStr, index: i};
}
/**
* Select all the attributes whether valid or invalid.
*/
var validAttrStrRegxp = new RegExp("(\\s*)([^\\s=]+)(\\s*=)?(\\s*(['\"])((.|\\n)*?)\\5)?", "g");
if(!attrNames.hasOwnProperty(attrName)){
attrNames[attrName]=1;
}else{
return false;
}
if(!validateAttrName(attrName)){
return false;
}
i++;
//attr, ="sd", a="amit's", a="sd"b="saf", ab cd=""
//skip whitespaces
for(;i < attrStr.length
&& (attrStr[i] === " "
|| attrStr[i] === "\t") ; i++);
function validateAttributeString(attrStr,options){
//console.log("start:"+attrStr+":end");
//read attribute value
startChar = attrStr[i++];
//if(attrStr.trim().length === 0) return true; //empty string
var attrVal = "";
for(;i < attrStr.length && attrStr[i] !== startChar; i++) {
attrVal +=attrStr[i];
}
var matches = util.getAllMatches(attrStr,validAttrStrRegxp);
var attrNames = [];
var attrObj = {};
//validate attrVal
if(startChar !== ""){
i++;
if(i<attrStr.length && (attrStr[i] !== " " && attrStr[i] !== "\t") ){//when no spce between 2 attributes : a="sd"b="saf"
return false;
}
startChar = "";
for(var i=0;i<matches.length;i++){
//console.log(matches[i]);
if(matches[i][1].length === 0){//nospace before attribute name: a="sd"b="saf"
return { err: { code:"InvalidAttr",msg:"attribute " + matches[i][2] + " has no space in starting."}};
}else if(matches[i][3] === undefined && !options.allowBooleanAttributes){//independent attribute: ab
return { err: { code:"InvalidAttr",msg:"boolean attribute " + matches[i][2] + " is not allowed."}};
}/* else if(matches[i][6] === undefined){//attribute without value: ab=
return { err: { code:"InvalidAttr",msg:"attribute " + matches[i][2] + " has no value assigned."}};
} */
var attrName=matches[i][2];
if(!validateAttrName(attrName)){
return { err: { code:"InvalidAttr",msg:"attribute " + attrName + " is an invalid name."}};
}
if(!attrNames.hasOwnProperty(attrName)){//check for duplicate attribute.
attrNames[attrName]=1;
}else{
return { err: { code:"InvalidAttr",msg:"attribute " + attrName + " is repeated."}};
}
}
return true;
}

@@ -211,2 +263,3 @@

function validateAttrName(attrName){

@@ -213,0 +266,0 @@ return util.doesMatch(attrName,validAttrRegxp);

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc