fast-xml-parser
Advanced tools
Comparing version 2.9.4 to 3.0.0
14
cli.js
@@ -19,6 +19,5 @@ #!/usr/bin/env node | ||
ignoreNameSpace : true, | ||
ignoreNonTextNodeAttr : false, | ||
ignoreTextNodeAttr : false, | ||
textNodeConversion : true, | ||
textAttrConversion : true | ||
ignoreAttributes : false, | ||
parseNodeValue : true, | ||
parseAttributeValue : true | ||
}; | ||
@@ -31,7 +30,6 @@ var fileName = ""; | ||
}else if(process.argv[i] === "-a"){ | ||
options.ignoreNonTextNodeAttr = true; | ||
options.ignoreTextNodeAttr = true; | ||
options.ignoreAttributes = true; | ||
}else if(process.argv[i] === "-c"){ | ||
options.textNodeConversion = false; | ||
options.textAttrConversion = false; | ||
options.parseNodeValue = false; | ||
options.parseAttributeValue = false; | ||
}else if(process.argv[i] === "-o"){ | ||
@@ -38,0 +36,0 @@ outputFileName = process.argv[++i]; |
{ | ||
"name": "fast-xml-parser", | ||
"version": "2.9.4", | ||
"version": "3.0.0", | ||
"description": "Validate XML or Parse XML to JS/JSON very fast without C/C++ based libraries", | ||
@@ -36,3 +36,4 @@ "main": "./src/parser.js", | ||
"assert", | ||
"arrayMode" | ||
"arrayMode", | ||
"big" | ||
], | ||
@@ -69,3 +70,4 @@ "author": "Amit Gupta (https://github.com/amitguptagwl)", | ||
"portfinder": "^1.0.13", | ||
"zombie": "^5.0.7" | ||
"zombie": "^5.0.7", | ||
"xml2js": "^0.4.19" | ||
}, | ||
@@ -72,0 +74,0 @@ "dependencies": { |
# [fast-xml-parser](https://www.npmjs.com/package/fast-xml-parser) | ||
Validate XML or Parse XML to JS/JSON very fast without C/C++ based libraries and no callback | ||
<p style="color:red;"> **Note**: If you are using v3, your code may start failing in parsing and validation both. I apologize for the breaking changes. But code was supposed to be changed to support large files and many other options. Please refer the code example below for more detail.</p> | ||
You can use this library online (press try me button above), or as command from CLI, or in your website, or in npm repo. | ||
@@ -43,3 +44,3 @@ | ||
// when a tag has attributes | ||
/* upto 2.9.x | ||
var options = { | ||
@@ -57,2 +58,16 @@ attrPrefix : "@_", | ||
}; | ||
*/ | ||
//from 3.0.0 | ||
var options = { | ||
attributeNamePrefix : "@_", | ||
attrNodeName: false, | ||
textNodeName : "#text", | ||
ignoreAttributes : true, | ||
ignoreNameSpace : false, | ||
allowBooleanAttributes : false, | ||
parseNodeValue : true, | ||
parseAttributeValue : false, | ||
trimValues: true, | ||
decodeHTMLchar: false, | ||
}; | ||
if(fastXmlParser.validate(xmlData)=== true){//optional | ||
@@ -64,17 +79,17 @@ var jsonObj = fastXmlParser.parse(xmlData,options); | ||
var tObj = fastXmlParser.getTraversalObj(xmlData,options); | ||
var jsonObj = fastXmlParser.convertToJson(tObj); | ||
var jsonObj = fastXmlParser.convertToJson(tObj,options); | ||
``` | ||
**OPTIONS** : | ||
* **attributeNamePrefix** : prepend given string to attribute name for identification | ||
* **attrNodeName**: (Valid name) Group all the attributes as properties of given name. | ||
* **ignoreNonTextNodeAttr** : Ignore attributes of non-text node. | ||
* **ignoreTextNodeAttr** : Ignore attributes for text node | ||
* **ignoreAttributes** : Ignore attributes to be parsed. | ||
* **ignoreNameSpace** : Remove namespace string from tag and attribute names. | ||
* **ignoreRootElement** : Remove root element from parsed JSON. | ||
* **textNodeConversion** : Parse the value of text node to float or integer. | ||
* **textAttrConversion** : Parse the value of an attribute to float or integer. | ||
* **arrayMode** : Put the value(s) of a tag or attribute in an array. | ||
* **allowBooleanAttributes** : a tag can have attributes without any value | ||
* **parseNodeValue** : Parse the value of text node to float, integer, or boolean. | ||
* **parseAttributeValue** : Parse the value of an attribute to float, integer, or boolean. | ||
* **trimValues** : trim string values of an attribute or node | ||
* **decodeHTMLchar** : decodes any named and numerical character HTML references excluding CDATA part. | ||
To use from command line | ||
@@ -94,3 +109,4 @@ ```bash | ||
```js | ||
var isValid = parser.validate(xmlData); | ||
var result = parser.validate(xmlData); | ||
if(result !== true) cnosole.log(result.err); | ||
var jsonObj = parser.parse(xmlData); | ||
@@ -115,29 +131,47 @@ ``` | ||
### Benchmark report | ||
![npm_xml2json_compare](https://cloud.githubusercontent.com/assets/7692328/22402086/7526a3a6-e5e2-11e6-8e6b-301691725c21.png) | ||
Don't forget to check the performance report on [comparejs](https://naturalintelligence.github.io/comparejs/?q=xml2json). | ||
| file size | fxp 3.0 validator (rps) | fxp 3.0 parser (rps) | xml2js 0.4.19 (rps) | | ||
| ---------- | ----------------------- | ------------------- | ------------------- | | ||
| 1.5k | 16581.06758 | 14032.09323 | 4615.930805 | | ||
| 1.5m | 14918.47793 | 13.23366098 | 5.90682005 | | ||
| 13m | 1.834479235 | 1.135582008 | -1 | | ||
| 1.3k with CDATA | 30583.35319 | 43160.52342 | 8398.556349 | | ||
| 1.3m with CDATA | 27.29266471 | 52.68877009 | 7.966000795 | | ||
| 1.6k with cdata,prolog,doctype | 27690.26082 | 41433.98547 | 7872.399268 | | ||
| 98m | 0.08473858148 | 0.2600104004 | -1 | | ||
**validator benchmark: 21000 tps** | ||
* -1 indicates error or incorrect output. | ||
### Limitation | ||
* Parser doesn't check if the XML is valid or not. If the XML is not valid you may get invalid result. So you can call the validator function first to check the structure. | ||
* This is based on JS regular expression engine. So due to it's limitation fast-xml-parser face performance issue when it process XML string(data) which is very large like 10mb or more. (I'll look into this as soon as I get some free time). **UPDATE**: from v2.9.0, I have rewritten the validator code. So that validator can handle large files as well. I have tested it up to 98mb xml file. I have some more ideas to increase the speed. And I'll work whenever I get the time. | ||
![npm_xml2json_compare](static/img/fxpv3-vs-xml2jsv0419_chart.png) | ||
Report an issue or request for a feature [here](https://github.com/NaturalIntelligence/fast-xml-parser/issues) | ||
![npm_xml2json_compare](static/img/fxp-validatorv3.png) | ||
Your contribution in terms of donation, testing, bug fixes, code development etc. can help me to write fast algorithms. | ||
[<img src="https://www.paypalobjects.com/webstatic/en_US/btn/btn_donate_92x26.png" alt="Stubmatic donate button"/>](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=KQJAX48SPUKNC) | ||
# Changes from v3 | ||
**Give me a [star](https://github.com/NaturalIntelligence/fast-xml-parser)**, if you really like this project. | ||
* It can handle big file now (I have tested up to 98mb). Performance report is given above. | ||
* Meaningful error messages from validator | ||
**Fund collected (since the starting of the project)** : $0 | ||
``` | ||
"err": { | ||
"code": "InvalidAttr", | ||
"msg": "Attributes for rootNode have open quote" | ||
} | ||
``` | ||
* Updated options : check snippet aboove | ||
* Parse boolean values as well. E.g. `"true"` to `true` | ||
* You can set pasrer not to *trim* whitespaces from attribute or tag /node value. | ||
* You can set pasrer to HTML decode Tag / node and attribute values. However CDATA value will not be HTML decoded. | ||
* Tag / node value will not be parsed if CDATA presents. | ||
* You can set validator and parser to allow boolean values. | ||
* Few validation and parsing bugs are also fixed | ||
Some of my other NPM pojects | ||
- [stubmatic](https://github.com/NaturalIntelligence/Stubmatic) : A stub server to mock behaviour of HTTP(s) / REST / SOAP services. Stubbing redis is on the way. | ||
- [compare js](https://github.com/NaturalIntelligence/comparejs) : compare the features of JS code, libraries, and NPM repos. | ||
- [fast-lorem-ipsum](https://github.com/amitguptagwl/fast-lorem-ipsum) : Generate lorem ipsum words, sentences, paragraph very quickly. | ||
- [fast-lorem-ipsum](https://github.com/amitguptagwl/fast-lorem-ipsum) : Generate lorem ipsum words, sentences, paragraph very quickly. | ||
### TODO | ||
* P2: parser online demo with more options | ||
* P2: validating XML stream data | ||
* P2: validator cli | ||
* P2: fast XML prettyfier |
@@ -0,31 +1,26 @@ | ||
var util = require("./util"); | ||
var xmlNode = require("./xmlNode"); | ||
var he = require("he"); | ||
var getAllMatches = require("./util").getAllMatches; | ||
var TagType = {"OPENING":1, "CLOSING":2, "SELF":3, "CDATA": 4}; | ||
var xmlNode = function(tagname,parent,val){ | ||
this.tagname = tagname; | ||
this.parent = parent; | ||
this.child = []; | ||
this.val = val; | ||
this.addChild = function (child){ | ||
this.child.push(child); | ||
}; | ||
}; | ||
//var tagsRegx = new RegExp("<(\\/?[\\w:\\-\._]+)([^>]*)>(\\s*"+cdataRegx+")*([^<]+)?","g"); | ||
//var tagsRegx = new RegExp("<(\\/?)((\\w*:)?([\\w:\\-\._]+))([^>]*)>([^<]*)("+cdataRegx+"([^<]*))*([^<]+)?","g"); | ||
//var tagsRegx = new RegExp("<(\\/?[a-zA-Z0-9_:]+)([^>\\/]*)(\\/?)>([^<]+)?","g"); | ||
//var tagsRegx = new RegExp("<(\\/?[\\w:-]+)([^>]*)>([^<]+)?","g"); | ||
//var cdataRegx = "<!\\[CDATA\\[([^\\]\\]]*)\\]\\]>"; | ||
var cdataRegx = "<!\\[CDATA\\[(.*?)(\\]\\]>)"; | ||
var tagsRegx = new RegExp("<(\\/?[\\w:\\-\._]+)([^>]*)>(\\s*"+cdataRegx+")*([^<]+)?","g"); | ||
//treat cdata as a tag | ||
var defaultOptions = { | ||
attrPrefix : "@_", | ||
attributeNamePrefix : "@_", | ||
attrNodeName: false, | ||
textNodeName : "#text", | ||
ignoreNonTextNodeAttr : true, | ||
ignoreTextNodeAttr : true, | ||
ignoreAttributes : true, | ||
ignoreNameSpace : false, | ||
ignoreRootElement : false, | ||
textNodeConversion : true, | ||
textAttrConversion : false, | ||
arrayMode : false | ||
allowBooleanAttributes : false, //a tag can have attributes without any value | ||
//ignoreRootElement : false, | ||
parseNodeValue : true, | ||
parseAttributeValue : false, | ||
arrayMode : false, | ||
trimValues: true, //Trim string values of tag and attributes | ||
decodeHTMLchar: false, | ||
//decodeStrict: false, | ||
}; | ||
@@ -35,5 +30,15 @@ | ||
if(!options) options = {}; | ||
var props = ["attrPrefix","attrNodeName","ignoreNonTextNodeAttr","ignoreTextNodeAttr","ignoreNameSpace","ignoreRootElement","textNodeName","textNodeConversion","textAttrConversion","arrayMode"]; | ||
for (var i = 0; i < props.length; i++) { | ||
if(options[props[i]] === undefined){ | ||
var props = ["attributeNamePrefix", | ||
"attrNodeName", | ||
"ignoreAttributes", | ||
"ignoreNameSpace", | ||
"textNodeName", | ||
"parseNodeValue", | ||
"parseAttributeValue", | ||
"arrayMode", | ||
"trimValues", | ||
]; | ||
var len = props.length; | ||
for (var i = 0; i < len; i++) { | ||
if(typeof options[props[i]] === "undefined"){ | ||
options[props[i]] = defaultOptions[props[i]]; | ||
@@ -47,75 +52,85 @@ } | ||
options = buildOptions(options); | ||
//xmlData = xmlData.replace(/>(\s+)/g, ">");//Remove spaces and make it single line. | ||
xmlData = xmlData.replace(/<!--(.|\n)*?-->/g, "");//Remove single and multiline comments | ||
var tags = getAllMatches(xmlData,tagsRegx); | ||
//console.log(tags); | ||
xmlData = xmlData.replace(/\r?\n/g, " ");//make it single line | ||
xmlData = xmlData.replace(/<!--.*?-->/g, "");//Remove comments | ||
var xmlObj = new xmlNode('!xml'); | ||
var currentNode = xmlObj; | ||
for (var i = 0; i < tags.length ; i++) { | ||
var tag = resolveNameSpace(tags[i][1],options.ignoreNameSpace), | ||
nexttag = i+1 < tags.length ? resolveNameSpace(tags[i+1][1],options.ignoreNameSpace) : undefined, | ||
attrsStr = tags[i][2], attrs, | ||
val = tags[i][4] === undefined ? tags[i][6] : simplifyCDATA(tags[i][0]); | ||
if(tag.indexOf("/") === 0){//ending tag | ||
currentNode = currentNode.parent; | ||
continue; | ||
} | ||
var tagsRegx = new RegExp("<((!\\[CDATA\\[(.*?)(\\]\\]>))|((\\w*:)?([\\w:\\-\\._]+))([^>]*)>|((\\/)((\\w*:)?([\\w:\\-\\._]+))>))([^<]*)","g"); | ||
var tag = tagsRegx.exec(xmlData); | ||
var nextTag = tagsRegx.exec(xmlData); | ||
var previousMatch,nextMatch; | ||
while (tag) { | ||
var tagType = checkForTagType(tag); | ||
var selfClosingTag = attrsStr.charAt(attrsStr.length-1) === '/'; | ||
var childNode = new xmlNode(tag,currentNode); | ||
if(tagType === TagType.CLOSING){ | ||
//add parsed data to parent node | ||
if(currentNode.parent && tag[14]){ | ||
currentNode.parent.val = util.getValue(currentNode.parent.val) + "" + processTagValue(tag[14],options); | ||
} | ||
if(selfClosingTag){ | ||
attrs = buildAttributesArr(attrsStr,options.ignoreTextNodeAttr,options.attrPrefix,options.attrNodeName,options.ignoreNameSpace,options.textAttrConversion); | ||
childNode.val = attrs || ""; | ||
currentNode.addChild(childNode); | ||
}else if( ("/" + tag) === nexttag){ //Text node | ||
attrs = buildAttributesArr(attrsStr,options.ignoreTextNodeAttr,options.attrPrefix,options.attrNodeName,options.ignoreNameSpace,options.textAttrConversion); | ||
val = parseValue(val,options.textNodeConversion); | ||
if(attrs){ | ||
attrs[options.textNodeName] = val; | ||
childNode.val = attrs; | ||
}else{ | ||
childNode.val = val; | ||
currentNode = currentNode.parent; | ||
}else if(tagType === TagType.CDATA){ | ||
//no attribute | ||
//add text to parent node | ||
//add parsed data to parent node | ||
currentNode.val = (currentNode.val || "") + (tag[3] || "") + processTagValue(tag[14],options); | ||
}else if(tagType === TagType.SELF){ | ||
var childNode = new xmlNode( options.ignoreNameSpace ? tag[7] : tag[5],currentNode, ""); | ||
if(tag[8] && tag[8].length > 1){ | ||
tag[8] = tag[8].substr(0,tag[8].length -1); | ||
} | ||
childNode.attrsMap = buildAttributesMap(tag[8],options); | ||
currentNode.addChild(childNode); | ||
i++; | ||
}else if( (nexttag && nexttag.indexOf("/") === -1) && (val !== undefined && val != null && val.trim() !== "" )){ //Text node with sub nodes | ||
val = parseValue(val,options.textNodeConversion); | ||
childNode.addChild(new xmlNode(options.textNodeName,childNode,val)); | ||
}else{//TagType.OPENING | ||
var childNode = new xmlNode( options.ignoreNameSpace ? tag[7] : tag[5],currentNode,processTagValue(tag[14],options)); | ||
childNode.attrsMap = buildAttributesMap(tag[8],options); | ||
currentNode.addChild(childNode); | ||
currentNode = childNode; | ||
}else{//starting tag | ||
attrs = buildAttributesArr(attrsStr,options.ignoreNonTextNodeAttr,options.attrPrefix,options.attrNodeName,options.ignoreNameSpace,options.textAttrConversion); | ||
if(attrs){ | ||
for (var prop in attrs) { | ||
if(attrs.hasOwnProperty(prop)){ | ||
childNode.addChild(new xmlNode(prop,childNode,attrs[prop])); | ||
} | ||
} | ||
} | ||
currentNode.addChild(childNode); | ||
currentNode = childNode; | ||
} | ||
tag = nextTag; | ||
nextTag = tagsRegx.exec(xmlData); | ||
} | ||
return xmlObj; | ||
}; | ||
var xml2json = function (xmlData,options){ | ||
return convertToJson(getTraversalObj(xmlData,options), buildOptions(options).arrayMode); | ||
}; | ||
function processTagValue(val,options){ | ||
if(val){ | ||
if(options.trimValues){ | ||
val = val.trim(); | ||
} | ||
if(options.decodeHTMLchar){ | ||
val = he.decode(val); | ||
} | ||
val = parseValue(val,options.parseNodeValue); | ||
} | ||
var cdRegx = new RegExp(cdataRegx,"g"); | ||
return val; | ||
} | ||
function simplifyCDATA(cdata){ | ||
var result = getAllMatches(cdata,cdRegx); | ||
var val = ""; | ||
for (var i = 0; i < result.length ; i++) { | ||
val+=result[i][1]; | ||
function checkForTagType(match){ | ||
if(match[4] === "]]>"){ | ||
return TagType.CDATA; | ||
}else if(match[10] === "/"){ | ||
return TagType.CLOSING; | ||
}else if(typeof match[8] !== "undefined" && match[8].substr(match[8].length-1) === "/"){ | ||
return TagType.SELF; | ||
}else{ | ||
return TagType.OPENING; | ||
} | ||
return val; | ||
} | ||
function resolveNameSpace(tagname,ignore){ | ||
if(ignore){ | ||
var fakeCall = function(a) {return a;} | ||
var fakeCallNoReturn = function() {} | ||
var xml2json = function (xmlData,options){ | ||
options = buildOptions(options); | ||
return convertToJson(getTraversalObj(xmlData,options), options.textNodeName, options.arrayMode); | ||
}; | ||
function resolveNameSpace(tagname,options){ | ||
if(options.ignoreNameSpace ){ | ||
var tags = tagname.split(":"); | ||
@@ -131,44 +146,48 @@ var prefix = tagname.charAt(0) === "/" ? "/" : ""; | ||
return tagname; | ||
} | ||
function parseValue(val,conversion,isAttribute){ | ||
if(val){ | ||
if(!conversion || isNaN(val)){ | ||
val = "" + he.decode(val, {isAttributeValue:isAttribute, strict:true}); | ||
if(isAttribute) { | ||
val = val.replace(/\r?\n/g, " "); | ||
} | ||
function parseValue(val,shouldParse){ | ||
if(shouldParse && typeof val === "string"){ | ||
if(val.trim() === "" || isNaN(val)){ | ||
val = val === "true" ? true : val === "false" ? false : val; | ||
}else{ | ||
if(val.indexOf(".") !== -1){ | ||
if(parseFloat){ | ||
val = parseFloat(val); | ||
}else{ | ||
val = Number.parseFloat(val); | ||
} | ||
val = Number.parseFloat(val); | ||
}else{ | ||
if(parseInt){ | ||
val = parseInt(val,10); | ||
}else{ | ||
val = Number.parseInt(val,10); | ||
} | ||
val = Number.parseInt(val,10); | ||
} | ||
} | ||
return val; | ||
}else{ | ||
val = ""; | ||
if(util.isExist(val)) return val; | ||
else return ""; | ||
} | ||
return val; | ||
} | ||
var attrsRegx = new RegExp("([\\w\\-\\.\\:]+)\\s*=\\s*(['\"])((.|\n)*?)\\2","gm"); | ||
function buildAttributesArr(attrStr,ignore,prefix,attrNodeName,ignoreNS,conversion){ | ||
attrStr = attrStr || attrStr.trim(); | ||
if(!ignore && attrStr.length > 3){ | ||
var matches = getAllMatches(attrStr,attrsRegx); | ||
//TODO: change regex to capture NS | ||
//var attrsRegx = new RegExp("([\\w\\-\\.\\:]+)\\s*=\\s*(['\"])((.|\n)*?)\\2","gm"); | ||
var attrsRegx = new RegExp("([^\\s=]+)\\s*(=\\s*(['\"])(.*?)\\3)?","g"); | ||
function buildAttributesMap(attrStr,options){ | ||
if( !options.ignoreAttributes && typeof attrStr === "string" ){ | ||
//attrStr = attrStr || attrStr.trim(); | ||
var matches = util.getAllMatches(attrStr,attrsRegx); | ||
var len = matches.length; //don't make it inline | ||
var attrs = {}; | ||
for (var i = 0; i < matches.length; i++) { | ||
var attrName = resolveNameSpace(matches[i][1],ignoreNS); | ||
for (var i = 0; i < len ; i++) { | ||
var attrName = resolveNameSpace(matches[i][1],options); | ||
if(attrName.length && attrName !== "xmlns") { | ||
attrs[prefix + attrName] = parseValue(matches[i][3], conversion, true); | ||
if(matches[i][4]){ | ||
if(options.trimValues){ | ||
matches[i][4] = matches[i][4].trim(); | ||
} | ||
if(options.decodeHTMLchar){ | ||
matches[i][4] = he.decode(matches[i][4], {isAttributeValue : true}); | ||
} | ||
attrs[options.attributeNamePrefix + attrName] = parseValue(matches[i][4],options.parseAttributeValue); | ||
}else if(options.allowBooleanAttributes){ | ||
attrs[options.attributeNamePrefix + attrName] = true; | ||
} | ||
} | ||
@@ -179,5 +198,5 @@ } | ||
} | ||
if(attrNodeName){ | ||
if(options.attrNodeName){ | ||
var attrCollection = {}; | ||
attrCollection[attrNodeName] = attrs; | ||
attrCollection[options.attrNodeName] = attrs; | ||
return attrCollection; | ||
@@ -189,25 +208,38 @@ } | ||
var convertToJson = function (node, arrayMode){ | ||
var convertToJson = function (node, textNodeName,arrayMode){ | ||
var jObj = {}; | ||
if(node.val !== undefined && node.val != null || node.val === "") { | ||
return node.val; | ||
//traver through all the children | ||
for (var index = 0; index < node.child.length; index++) { | ||
var prop = node.child[index].tagname; | ||
var obj = convertToJson(node.child[index],textNodeName, arrayMode); | ||
if(typeof jObj[prop] !== "undefined"){ | ||
if(!Array.isArray(jObj[prop])){ | ||
var swap = jObj[prop]; | ||
jObj[prop] = []; | ||
jObj[prop].push(swap); | ||
} | ||
jObj[prop].push(obj); | ||
}else{ | ||
jObj[prop] = arrayMode ? [obj] : obj; | ||
} | ||
} | ||
util.merge(jObj,node.attrsMap); | ||
//add attrsMap as new children | ||
if(util.isEmptyObject(jObj)){ | ||
return util.isExist(node.val)? node.val : ""; | ||
}else{ | ||
for (var index = 0; index < node.child.length; index++) { | ||
var prop = node.child[index].tagname; | ||
var obj = convertToJson(node.child[index], arrayMode); | ||
if(jObj[prop] !== undefined){ | ||
if(!Array.isArray(jObj[prop])){ | ||
var swap = jObj[prop]; | ||
jObj[prop] = []; | ||
jObj[prop].push(swap); | ||
} | ||
jObj[prop].push(obj); | ||
}else{ | ||
jObj[prop] = arrayMode ? [obj] : obj; | ||
if(util.isExist(node.val)){ | ||
if(!(typeof node.val === "string" && node.val === "")){ | ||
jObj[textNodeName] = node.val; | ||
} | ||
} | ||
} | ||
//add value | ||
return jObj; | ||
}; | ||
exports.parse = xml2json; | ||
@@ -214,0 +246,0 @@ exports.getTraversalObj = getTraversalObj; |
@@ -5,4 +5,5 @@ var getAllMatches = function(string, regex) { | ||
while (match) { | ||
var allmatches = []; | ||
for (var index = 0; index < match.length; index++) { | ||
var allmatches = []; | ||
var len = match.length; | ||
for (var index = 0; index < len; index++) { | ||
allmatches.push(match[index]); | ||
@@ -19,3 +20,3 @@ } | ||
var match = regex.exec(string); | ||
if(match === null || match === undefined) return false; | ||
if(match === null || typeof match === "undefined") return false; | ||
else return true; | ||
@@ -28,4 +29,39 @@ } | ||
exports.isExist= function(v){ | ||
return typeof v !== "undefined"; | ||
} | ||
exports.isEmptyObject= function(obj) { | ||
return Object.keys(obj).length === 0 | ||
} | ||
/** | ||
* Copy all the properties of a into b. | ||
* @param {*} target | ||
* @param {*} a | ||
*/ | ||
exports.merge =function (target,a){ | ||
if(a){ | ||
var keys = Object.keys(a) // will return an array of own properties | ||
var len = keys.length; //don't make it inline | ||
for(var i = 0; i < len; i++){ | ||
target[keys[i]] = a[keys[i]] ; | ||
} | ||
} | ||
} | ||
/* exports.merge =function (b,a){ | ||
return Object.assign(b,a); | ||
} */ | ||
exports.getValue = function (v){ | ||
if(exports.isExist(v)){ | ||
return v; | ||
}else{ | ||
return ""; | ||
} | ||
} | ||
exports.doesMatch = doesMatch | ||
exports.doesNotMatch = doesNotMatch | ||
exports.getAllMatches = getAllMatches; |
var util = require("./util"); | ||
var tagsPattern = new RegExp("<\\/?([\\w:\\-_\.]+)\\s*\/?>","g"); | ||
exports.validate = function(xmlData){ | ||
xmlData = xmlData.replace(/(\r\n|\n|\r)/gm,"");//make it single line | ||
xmlData = xmlData.replace(/(^\s*<\?xml.*?\?>)/g,"");//Remove XML starting tag | ||
xmlData = xmlData.replace(/(<!DOCTYPE[\s\w\"\.\/\-\:]+(\[.*\])*\s*>)/g,"");//Remove DOCTYPE | ||
var defaultOptions = { | ||
allowBooleanAttributes : false, //A tag can have attributes without any value | ||
}; | ||
var buildOptions = function (options){ | ||
if(!options) options = {}; | ||
var props = ["allowBooleanAttributes"]; | ||
for (var i = 0; i < props.length; i++) { | ||
if(options[props[i]] === undefined){ | ||
options[props[i]] = defaultOptions[props[i]]; | ||
} | ||
} | ||
return options; | ||
}; | ||
//var tagsPattern = new RegExp("<\\/?([\\w:\\-_\.]+)\\s*\/?>","g"); | ||
exports.validate = function(xmlData, options){ | ||
options = buildOptions(options); | ||
//xmlData = xmlData.replace(/(\r\n|\n|\r)/gm,"");//make it single line | ||
//xmlData = xmlData.replace(/(^\s*<\?xml.*?\?>)/g,"");//Remove XML starting tag | ||
//xmlData = xmlData.replace(/(<!DOCTYPE[\s\w\"\.\/\-\:]+(\[.*\])*\s*>)/g,"");//Remove DOCTYPE | ||
var tags = []; | ||
var tagFound = false; | ||
for (var i = 0; i < xmlData.length; i++) { | ||
@@ -17,4 +36,15 @@ | ||
i++; | ||
if(xmlData[i] === "!"){ | ||
if(xmlData[i] === "?"){ | ||
if(i !== 1){ | ||
return {err : { code : "InvalidXml", msg : "XML declaration allowed only at the start of the document."}}; | ||
}else{ | ||
//read until ?> is found | ||
for(;i<xmlData.length;i++){ | ||
if(xmlData[i] == "?" && xmlData[i+1] == ">"){ | ||
i++; | ||
break; | ||
} | ||
} | ||
} | ||
}else if(xmlData[i] === "!"){ | ||
i = readCommentAndCDATA(xmlData,i); | ||
@@ -41,53 +71,40 @@ continue; | ||
if(tagName[tagName.length-1] === "/"){//self closing tag without attributes | ||
tagName = tagName.substring(0,tagName.length-2); | ||
return validateTagName(tagName); | ||
tagName = tagName.substring(0,tagName.length-1); | ||
continue; | ||
} | ||
if(!validateTagName(tagName)) return false; | ||
if(!validateTagName(tagName)) | ||
return { err: { code:"InvalidTag",msg:"Tag " + tagName + " is an invalid name."}}; | ||
var attrStr = ""; | ||
var startChar = ""; | ||
for(;i < xmlData.length ;i++){ | ||
if(xmlData[i] === '"' || xmlData[i] === "'"){ | ||
if(startChar === ""){ | ||
startChar = xmlData[i]; | ||
}else{ | ||
startChar = ""; | ||
} | ||
}else if(xmlData[i] === ">"){ | ||
if(startChar === ""){ | ||
break; | ||
} | ||
} | ||
attrStr += xmlData[i]; | ||
var result = readAttributeStr(xmlData,i); | ||
if(result === false) { | ||
return { err: { code:"InvalidAttr",msg:"Attributes for " + tagName + " have open quote"}}; | ||
} | ||
if(startChar !== "") return false;//Unclosed quote | ||
attrStr = attrStr.trim(); | ||
//console.log(attrStr, attrStr); | ||
var attrStr = result.value; | ||
i = result.index; | ||
if(attrStr[attrStr.length-1] === "/" ){//self closing tag | ||
attrStr = attrStr.substring(0,attrStr.length-2); | ||
if(!validateAttributeString(attrStr)){ | ||
return false; | ||
attrStr = attrStr.substring(0,attrStr.length-1); | ||
var isValid = validateAttributeString(attrStr, options); | ||
if(isValid === true){ | ||
tagFound = true; | ||
continue; | ||
}else{ | ||
continue; | ||
return isValid; | ||
} | ||
}else if(closingTag){ | ||
if(attrStr.length > 0){ | ||
return false; | ||
//throw new Error("XML validation error: closing tag should not have any attribute"); | ||
if(attrStr.trim().length > 0){ | ||
return { err: { code:"InvalidTag",msg:"closing tag " + tagName + " can't have attributes or invalid starting."}}; | ||
}else{ | ||
var otg = tags.pop(); | ||
if(tagName !== otg){ | ||
return false; | ||
//throw new Error("XML validation error: no mathicng closing tag"); | ||
return { err: { code:"InvalidTag",msg:"closing tag " + otg + " is expected inplace of "+tagName+"."}}; | ||
} | ||
} | ||
}else{ | ||
if(!validateAttributeString(attrStr)){ | ||
return false; | ||
var isValid = validateAttributeString(attrStr, options); | ||
if(isValid !== true ){ | ||
return isValid; | ||
} | ||
tags.push(tagName); | ||
tags.push(tagName); tagFound = true; | ||
} | ||
@@ -112,4 +129,4 @@ | ||
if(xmlData[i] === " " || xmlData[i] === "\t") continue; | ||
return false; | ||
if(xmlData[i] === " " || xmlData[i] === "\t" || xmlData[i] === "\n" || xmlData[i] === "\r") continue; | ||
return { err: { code:"InvalidChar",msg:"char " + xmlData[i] +" is not expected ."}}; | ||
} | ||
@@ -119,5 +136,6 @@ } | ||
if(tags.length > 0){ | ||
return false; | ||
//throw new Error("XML validation error"); | ||
if(!tagFound){ | ||
return {err : { code : "InvalidXml", msg : "Start tag expected."}}; | ||
}else if(tags.length > 0){ | ||
return { err: { code:"InvalidXml",msg:"Invalid " + JSON.stringify(tags,null,4).replace(/\r?\n/g,"") +" found."}}; | ||
} | ||
@@ -136,2 +154,18 @@ | ||
} | ||
}else if( xmlData.length > i+8 | ||
&& xmlData[i+1] === "D" | ||
&& xmlData[i+2] === "O" | ||
&& xmlData[i+3] === "C" | ||
&& xmlData[i+4] === "T" | ||
&& xmlData[i+5] === "Y" | ||
&& xmlData[i+6] === "P" | ||
&& xmlData[i+7] === "E"){ | ||
var angleBracketsCount = 1; | ||
for(i+=8;i<xmlData.length;i++){ | ||
if(xmlData[i] == "<") {angleBracketsCount++;} | ||
else if(xmlData[i] == ">") { | ||
angleBracketsCount--; | ||
if(angleBracketsCount === 0) break; | ||
} | ||
} | ||
}else if( xmlData.length > i+9 | ||
@@ -156,52 +190,70 @@ && xmlData[i+1] === "[" | ||
} | ||
//attr, ="sd", a="amit's", a="sd"b="saf", | ||
function validateAttributeString(attrStr){ | ||
var attrNames = []; | ||
for(var i=0; i< attrStr.length; i++){ | ||
var startChar = ""; | ||
//read attribute name | ||
var attrName = ""; | ||
for(;i < attrStr.length && attrStr[i] !== "=" ; i++) { | ||
attrName +=attrStr[i]; | ||
/** | ||
* Keep reading xmlData until '<' is found outside the attribute value. | ||
* @param {string} xmlData | ||
* @param {number} i | ||
*/ | ||
function readAttributeStr(xmlData,i){ | ||
var attrStr = ""; | ||
var startChar = ""; | ||
for(;i < xmlData.length ;i++){ | ||
if(xmlData[i] === '"' || xmlData[i] === "'"){ | ||
if(startChar === ""){ | ||
startChar = xmlData[i]; | ||
}else{ | ||
startChar = ""; | ||
} | ||
}else if(xmlData[i] === ">"){ | ||
if(startChar === ""){ | ||
break; | ||
} | ||
} | ||
//validate attrName | ||
attrName = attrName.trim(); | ||
attrStr += xmlData[i]; | ||
} | ||
if(startChar !== "") return false; | ||
return { value: attrStr, index: i}; | ||
} | ||
/** | ||
* Select all the attributes whether valid or invalid. | ||
*/ | ||
var validAttrStrRegxp = new RegExp("(\\s*)([^\\s=]+)(\\s*=)?(\\s*(['\"])((.|\\n)*?)\\5)?", "g"); | ||
if(!attrNames.hasOwnProperty(attrName)){ | ||
attrNames[attrName]=1; | ||
}else{ | ||
return false; | ||
} | ||
if(!validateAttrName(attrName)){ | ||
return false; | ||
} | ||
i++; | ||
//attr, ="sd", a="amit's", a="sd"b="saf", ab cd="" | ||
//skip whitespaces | ||
for(;i < attrStr.length | ||
&& (attrStr[i] === " " | ||
|| attrStr[i] === "\t") ; i++); | ||
function validateAttributeString(attrStr,options){ | ||
//console.log("start:"+attrStr+":end"); | ||
//read attribute value | ||
startChar = attrStr[i++]; | ||
//if(attrStr.trim().length === 0) return true; //empty string | ||
var attrVal = ""; | ||
for(;i < attrStr.length && attrStr[i] !== startChar; i++) { | ||
attrVal +=attrStr[i]; | ||
} | ||
var matches = util.getAllMatches(attrStr,validAttrStrRegxp); | ||
var attrNames = []; | ||
var attrObj = {}; | ||
//validate attrVal | ||
if(startChar !== ""){ | ||
i++; | ||
if(i<attrStr.length && (attrStr[i] !== " " && attrStr[i] !== "\t") ){//when no spce between 2 attributes : a="sd"b="saf" | ||
return false; | ||
} | ||
startChar = ""; | ||
for(var i=0;i<matches.length;i++){ | ||
//console.log(matches[i]); | ||
if(matches[i][1].length === 0){//nospace before attribute name: a="sd"b="saf" | ||
return { err: { code:"InvalidAttr",msg:"attribute " + matches[i][2] + " has no space in starting."}}; | ||
}else if(matches[i][3] === undefined && !options.allowBooleanAttributes){//independent attribute: ab | ||
return { err: { code:"InvalidAttr",msg:"boolean attribute " + matches[i][2] + " is not allowed."}}; | ||
}/* else if(matches[i][6] === undefined){//attribute without value: ab= | ||
return { err: { code:"InvalidAttr",msg:"attribute " + matches[i][2] + " has no value assigned."}}; | ||
} */ | ||
var attrName=matches[i][2]; | ||
if(!validateAttrName(attrName)){ | ||
return { err: { code:"InvalidAttr",msg:"attribute " + attrName + " is an invalid name."}}; | ||
} | ||
if(!attrNames.hasOwnProperty(attrName)){//check for duplicate attribute. | ||
attrNames[attrName]=1; | ||
}else{ | ||
return { err: { code:"InvalidAttr",msg:"attribute " + attrName + " is repeated."}}; | ||
} | ||
} | ||
return true; | ||
} | ||
@@ -211,2 +263,3 @@ | ||
function validateAttrName(attrName){ | ||
@@ -213,0 +266,0 @@ return util.doesMatch(attrName,validAttrRegxp); |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
200590
15
637
173
0
10