@file-type/xml
Advanced tools
Comparing version 0.2.1 to 0.3.0
278
lib/index.js
@@ -1,42 +0,43 @@ | ||
import sax from 'sax'; | ||
function startsWith(array, prefix) { | ||
if (prefix.length > array.length) { | ||
return false; | ||
} | ||
for (let i = 0; i < prefix.length; i++) { | ||
if (array[i] !== prefix[i]) { | ||
return false; | ||
if (prefix.length > array.length) { | ||
return false; | ||
} | ||
} | ||
return true; | ||
for (let i = 0; i < prefix.length; i++) { | ||
if (array[i] !== prefix[i]) { | ||
return false; | ||
} | ||
} | ||
return true; | ||
} | ||
function isXml(array) { | ||
if (startsWith(array,[60, 63, 120, 109, 108, 32])) { | ||
return {xml: true, encoding: 'utf-8', offset: 0} | ||
} else if (startsWith(array,[0xEF, 0xBB, 0xBF, 60, 63, 120, 109, 108, 32])) { // UTF-8 BOM | ||
return {xml: true, encoding: 'utf-8', offset: 3} | ||
} else if (startsWith(array,[0xFE, 0xFF, 0, 60, 0, 63, 0, 120, 0, 109, 0, 108, 0, 32 ])) { | ||
return {xml: true, encoding: 'utf-16be', offset: 2} | ||
} else if (startsWith(array,[0xFF, 0xFE, 60, 0, 63, 0, 120, 0, 109, 0, 108, 0, 32, 0 ])) { | ||
return {xml: true, encoding: 'utf-16le', offset: 2} | ||
} else if (startsWith(array,[0, 60, 0, 63, 0, 120, 0, 109, 0, 108, 0, 32 ])) { | ||
return {xml: true, encoding: 'utf-16be', offset: 0} | ||
} else if (startsWith(array,[60, 0, 63, 0, 120, 0, 109, 0, 108, 0, 32, 0 ])) { | ||
return {xml: true, encoding: 'utf-16le', offset: 0} | ||
} | ||
return {xml: false, encoding: undefined} | ||
if (startsWith(array, [60, 63, 120, 109, 108, 32])) { | ||
return { xml: true, encoding: 'utf-8', offset: 0 }; | ||
} | ||
else if (startsWith(array, [0xEF, 0xBB, 0xBF, 60, 63, 120, 109, 108, 32])) { // UTF-8 BOM | ||
return { xml: true, encoding: 'utf-8', offset: 3 }; | ||
} | ||
else if (startsWith(array, [0xFE, 0xFF, 0, 60, 0, 63, 0, 120, 0, 109, 0, 108, 0, 32])) { | ||
return { xml: true, encoding: 'utf-16be', offset: 2 }; | ||
} | ||
else if (startsWith(array, [0xFF, 0xFE, 60, 0, 63, 0, 120, 0, 109, 0, 108, 0, 32, 0])) { | ||
return { xml: true, encoding: 'utf-16le', offset: 2 }; | ||
} | ||
else if (startsWith(array, [0, 60, 0, 63, 0, 120, 0, 109, 0, 108, 0, 32])) { | ||
return { xml: true, encoding: 'utf-16be', offset: 0 }; | ||
} | ||
else if (startsWith(array, [60, 0, 63, 0, 120, 0, 109, 0, 108, 0, 32, 0])) { | ||
return { xml: true, encoding: 'utf-16le', offset: 0 }; | ||
} | ||
return { xml: false }; | ||
} | ||
function extractNsElement(node) { | ||
const parts = node.name.split(':'); | ||
if(parts.length === 1) { | ||
return {name: parts[0], ns: node.attributes['xmlns']}; | ||
} else if (parts.length === 2) { | ||
return {name: parts[1], ns: node.attributes[`xmlns:${parts[0]}`]}; | ||
} | ||
const parts = node.name.split(':'); | ||
if (parts.length === 1) { | ||
return { name: parts[0], ns: node.attributes['xmlns'] }; | ||
} | ||
else if (parts.length === 2) { | ||
return { name: parts[1], ns: node.attributes[`xmlns:${parts[0]}`] }; | ||
} | ||
} | ||
/** | ||
@@ -46,20 +47,19 @@ * Maps the root element namespace to corresponding file-type | ||
const namespaceMapping = { | ||
'http://www.w3.org/2000/svg': { | ||
ext: 'svg', | ||
mime: 'image/svg+xml', | ||
}, | ||
'http://www.w3.org/1999/xhtml': { | ||
ext: 'xhtml', | ||
mime: 'application/xhtml+xml', | ||
}, | ||
'http://www.opengis.net/kml/2.2': { | ||
ext: 'kml', | ||
mime: 'application/vnd.google-earth.kml+xml', | ||
}, | ||
'http://www.opengis.net/gml': { | ||
ext: 'gml', | ||
mime: 'application/gml+xml', | ||
} | ||
} | ||
'http://www.w3.org/2000/svg': { | ||
ext: 'svg', | ||
mime: 'image/svg+xml' | ||
}, | ||
'http://www.w3.org/1999/xhtml': { | ||
ext: 'xhtml', | ||
mime: 'application/xhtml+xml' | ||
}, | ||
'http://www.opengis.net/kml/2.2': { | ||
ext: 'kml', | ||
mime: 'application/vnd.google-earth.kml+xml' | ||
}, | ||
'http://www.opengis.net/gml': { | ||
ext: 'gml', | ||
mime: 'application/gml+xml' | ||
} | ||
}; | ||
/** | ||
@@ -71,104 +71,86 @@ * Maps the root element name to corresponding file-type. | ||
const rootNameMapping = { | ||
rss: { | ||
ext: 'rss', | ||
mime: 'application/rss+xml', | ||
}, | ||
'score-partwise': { | ||
ext: 'musicxml', | ||
mime: 'application/vnd.recordare.musicxml+xml', | ||
}, | ||
svg: { | ||
ext: 'svg', | ||
mime: 'image/svg+xml', | ||
}, | ||
} | ||
rss: { | ||
ext: 'rss', | ||
mime: 'application/rss+xml' | ||
}, | ||
'score-partwise': { | ||
ext: 'musicxml', | ||
mime: 'application/vnd.recordare.musicxml+xml' | ||
}, | ||
svg: { | ||
ext: 'svg', | ||
mime: 'image/svg+xml' | ||
} | ||
}; | ||
export class XmlTextDetector { | ||
constructor(options) { | ||
this.options = options ?? {}; | ||
this.firstTag = true; | ||
this.onEnd = false; | ||
this.parser = sax.parser(true); | ||
this.nesting = 0; | ||
this.parser.onerror = e => { | ||
if (e.message.startsWith('Invalid character entity')) { // Allow entity reference | ||
return; | ||
} | ||
this.fileType = undefined; | ||
this.onEnd = true; | ||
}; | ||
this.parser.onopentag = node => { | ||
++this.nesting; | ||
if (!this.firstTag || this.onEnd) { | ||
return; | ||
} | ||
this.firstTag = false; | ||
const nsNode = extractNsElement(node); | ||
if (nsNode.ns) { | ||
// Resolve file-type boot root element namespace | ||
this.fileType = namespaceMapping[nsNode.ns.toLowerCase()]; | ||
} else { | ||
// Fall back on element name if there is no namespace | ||
this.fileType = rootNameMapping[nsNode.name?.toLowerCase()]; | ||
} | ||
if (this.fileType && !this.options.fullScan) { | ||
constructor(options) { | ||
this.options = options ?? {}; | ||
this.firstTag = true; | ||
this.onEnd = false; | ||
this.parser = sax.parser(true); | ||
this.nesting = 0; | ||
this.parser.onerror = e => { | ||
if (e.message.startsWith('Invalid character entity')) { // Allow entity reference | ||
return; | ||
} | ||
this.fileType = undefined; | ||
this.onEnd = true; | ||
}; | ||
this.parser.onopentag = node => { | ||
++this.nesting; | ||
if (!this.firstTag || this.onEnd) { | ||
return; | ||
} | ||
this.firstTag = false; | ||
const nsNode = extractNsElement(node); | ||
if (nsNode?.ns) { | ||
// Resolve file-type boot root element namespace | ||
this.fileType = namespaceMapping[nsNode.ns.toLowerCase()]; | ||
} | ||
else if (nsNode && nsNode.name) { | ||
// Fall back on element name if there is no namespace | ||
this.fileType = rootNameMapping[nsNode.name.toLowerCase()]; | ||
} | ||
if (this.fileType && !this.options.fullScan) { | ||
this.onEnd = true; | ||
} | ||
}; | ||
this.parser.onclosetag = () => { | ||
--this.nesting; | ||
}; | ||
} | ||
write(text) { | ||
this.parser.write(text); | ||
} | ||
close() { | ||
this.parser.close(); | ||
this.onEnd = true; | ||
} | ||
}; | ||
this.parser.onclosetag = () => { | ||
--this.nesting; | ||
} | ||
} | ||
write(text) { | ||
this.parser.write(text); | ||
} | ||
close() { | ||
this.parser.close(); | ||
this.onEnd = true; | ||
} | ||
isValid() { | ||
return this.nesting === 0; | ||
} | ||
isValid() { | ||
return this.nesting === 0; | ||
} | ||
} | ||
export const detectXml = async tokenizer => { | ||
const buffer = new Uint8Array(512); | ||
// Increase sample size from 12 to 256. | ||
await tokenizer.peekBuffer(buffer, {length: 128, mayBeLess: true}); | ||
const {xml, encoding, offset} = isXml(buffer); | ||
if (xml) { | ||
await tokenizer.ignore(offset); | ||
const xmlTextDetector = new XmlTextDetector(); | ||
const textDecoder = new TextDecoder(encoding); | ||
do { | ||
const len = await tokenizer.readBuffer(buffer, {mayBeLess: true}); | ||
const portion = buffer.subarray(0, len); | ||
const text = textDecoder.decode(portion); | ||
xmlTextDetector.write(text); | ||
if (len < buffer.length) { | ||
xmlTextDetector.close(); | ||
} | ||
} while(!xmlTextDetector.onEnd) | ||
return xmlTextDetector.fileType ?? { | ||
ext: 'xml', | ||
mime: 'application/xml', | ||
export const detectXml = async (tokenizer) => { | ||
const buffer = new Uint8Array(512); | ||
// Increase sample size from 12 to 256. | ||
await tokenizer.peekBuffer(buffer, { length: 128, mayBeLess: true }); | ||
const xmlDetection = isXml(buffer); | ||
if (xmlDetection.xml) { | ||
await tokenizer.ignore(xmlDetection.offset); | ||
const xmlTextDetector = new XmlTextDetector(); | ||
const textDecoder = new TextDecoder(xmlDetection.encoding); | ||
do { | ||
const len = await tokenizer.readBuffer(buffer, { mayBeLess: true }); | ||
const portion = buffer.subarray(0, len); | ||
const text = textDecoder.decode(portion); | ||
xmlTextDetector.write(text); | ||
if (len < buffer.length) { | ||
xmlTextDetector.close(); | ||
} | ||
} while (!xmlTextDetector.onEnd); | ||
return xmlTextDetector.fileType ?? { | ||
ext: 'xml', | ||
mime: 'application/xml' | ||
}; | ||
} | ||
} | ||
}; | ||
101
package.json
{ | ||
"name": "@file-type/xml", | ||
"version": "0.2.1", | ||
"description": "XML detection plugin", | ||
"type": "module", | ||
"exports": "./lib/index.js", | ||
"scripts": { | ||
"test": "mocha" | ||
}, | ||
"keywords": [ | ||
"file-type", | ||
"detect", | ||
"detection", | ||
"detector", | ||
"XML", | ||
"signature", | ||
"namespace", | ||
"SVG", | ||
"XHTML", | ||
"RSS", | ||
"KML", | ||
"GML", | ||
"MusicXML" | ||
], | ||
"dependencies": { | ||
"sax": "^1.4.1", | ||
"strtok3": "^10.0.1" | ||
}, | ||
"devDependencies": { | ||
"chai": "^5.1.2", | ||
"mocha": "^11.0.1" | ||
}, | ||
"files": [ | ||
"lib/**/*.js", | ||
"lib/**/*.d.ts", | ||
"lib/*.cjs" | ||
], | ||
"author": { | ||
"name": "Borewit", | ||
"url": "https://github.com/Borewit" | ||
}, | ||
"repository": { | ||
"type": "git", | ||
"url": "git+https://github.com/Borewit/file-type-xml.git" | ||
}, | ||
"license": "MIT", | ||
"packageManager": "yarn@4.5.3" | ||
"name": "@file-type/xml", | ||
"version": "0.3.0", | ||
"description": "XML detection plugin", | ||
"type": "module", | ||
"types": "./lib/index.d.ts", | ||
"exports": "./lib/index.js", | ||
"scripts": { | ||
"clean": "del-cli 'lib/**/*.js' 'lib/**/*.js.map' 'lib/**/*.d.ts' 'src/**/*.d.ts'", | ||
"compile-src": "tsc -p lib", | ||
"compile": "yarn run compile-src", | ||
"build": "yarn run clean && yarn compile", | ||
"test": "mocha" | ||
}, | ||
"keywords": [ | ||
"file-type", | ||
"detect", | ||
"detection", | ||
"detector", | ||
"XML", | ||
"signature", | ||
"namespace", | ||
"SVG", | ||
"XHTML", | ||
"RSS", | ||
"KML", | ||
"GML", | ||
"MusicXML" | ||
], | ||
"dependencies": { | ||
"sax": "^1.4.1", | ||
"strtok3": "^10.0.1" | ||
}, | ||
"devDependencies": { | ||
"@types/sax": "^1.2.7", | ||
"chai": "^5.1.2", | ||
"del-cli": "^6.0.0", | ||
"file-type": "^19.6.0", | ||
"mocha": "^11.0.1", | ||
"typescript": "^5.7.2" | ||
}, | ||
"files": [ | ||
"lib/**/*.js", | ||
"lib/**/*.d.ts", | ||
"lib/*.cjs" | ||
], | ||
"author": { | ||
"name": "Borewit", | ||
"url": "https://github.com/Borewit" | ||
}, | ||
"repository": { | ||
"type": "git", | ||
"url": "git+https://github.com/Borewit/file-type-xml.git" | ||
}, | ||
"license": "MIT", | ||
"packageManager": "yarn@4.5.3" | ||
} |
@@ -27,3 +27,3 @@ [![NPM version](https://img.shields.io/npm/v/@file-type/xml.svg)](https://npmjs.org/package/@file-type/xml) | ||
```js | ||
import {XmlTextDetector} from 'index.js'; | ||
import {XmlTextDetector} from '@file-type/xml'; | ||
@@ -30,0 +30,0 @@ xmlTextDetector.write('<svg xmlns="http://www.w3.org/2000/svg"><path fill="#00CD9F"/></svg>'); |
8440
4
180
6