parse5
Advanced tools
Comparing version 4.0.0 to 5.0.0
'use strict'; | ||
var DOCUMENT_MODE = require('./html').DOCUMENT_MODE; | ||
const { DOCUMENT_MODE } = require('./html'); | ||
//Const | ||
var VALID_DOCTYPE_NAME = 'html', | ||
QUIRKS_MODE_SYSTEM_ID = 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd', | ||
QUIRKS_MODE_PUBLIC_ID_PREFIXES = [ | ||
'+//silmaril//dtd html pro v0r11 19970101//en', | ||
'-//advasoft ltd//dtd html 3.0 aswedit + extensions//en', | ||
'-//as//dtd html 3.0 aswedit + extensions//en', | ||
'-//ietf//dtd html 2.0 level 1//en', | ||
'-//ietf//dtd html 2.0 level 2//en', | ||
'-//ietf//dtd html 2.0 strict level 1//en', | ||
'-//ietf//dtd html 2.0 strict level 2//en', | ||
'-//ietf//dtd html 2.0 strict//en', | ||
'-//ietf//dtd html 2.0//en', | ||
'-//ietf//dtd html 2.1e//en', | ||
'-//ietf//dtd html 3.0//en', | ||
'-//ietf//dtd html 3.0//en//', | ||
'-//ietf//dtd html 3.2 final//en', | ||
'-//ietf//dtd html 3.2//en', | ||
'-//ietf//dtd html 3//en', | ||
'-//ietf//dtd html level 0//en', | ||
'-//ietf//dtd html level 0//en//2.0', | ||
'-//ietf//dtd html level 1//en', | ||
'-//ietf//dtd html level 1//en//2.0', | ||
'-//ietf//dtd html level 2//en', | ||
'-//ietf//dtd html level 2//en//2.0', | ||
'-//ietf//dtd html level 3//en', | ||
'-//ietf//dtd html level 3//en//3.0', | ||
'-//ietf//dtd html strict level 0//en', | ||
'-//ietf//dtd html strict level 0//en//2.0', | ||
'-//ietf//dtd html strict level 1//en', | ||
'-//ietf//dtd html strict level 1//en//2.0', | ||
'-//ietf//dtd html strict level 2//en', | ||
'-//ietf//dtd html strict level 2//en//2.0', | ||
'-//ietf//dtd html strict level 3//en', | ||
'-//ietf//dtd html strict level 3//en//3.0', | ||
'-//ietf//dtd html strict//en', | ||
'-//ietf//dtd html strict//en//2.0', | ||
'-//ietf//dtd html strict//en//3.0', | ||
'-//ietf//dtd html//en', | ||
'-//ietf//dtd html//en//2.0', | ||
'-//ietf//dtd html//en//3.0', | ||
'-//metrius//dtd metrius presentational//en', | ||
'-//microsoft//dtd internet explorer 2.0 html strict//en', | ||
'-//microsoft//dtd internet explorer 2.0 html//en', | ||
'-//microsoft//dtd internet explorer 2.0 tables//en', | ||
'-//microsoft//dtd internet explorer 3.0 html strict//en', | ||
'-//microsoft//dtd internet explorer 3.0 html//en', | ||
'-//microsoft//dtd internet explorer 3.0 tables//en', | ||
'-//netscape comm. corp.//dtd html//en', | ||
'-//netscape comm. corp.//dtd strict html//en', | ||
'-//o\'reilly and associates//dtd html 2.0//en', | ||
'-//o\'reilly and associates//dtd html extended 1.0//en', | ||
'-//spyglass//dtd html 2.0 extended//en', | ||
'-//sq//dtd html 2.0 hotmetal + extensions//en', | ||
'-//sun microsystems corp.//dtd hotjava html//en', | ||
'-//sun microsystems corp.//dtd hotjava strict html//en', | ||
'-//w3c//dtd html 3 1995-03-24//en', | ||
'-//w3c//dtd html 3.2 draft//en', | ||
'-//w3c//dtd html 3.2 final//en', | ||
'-//w3c//dtd html 3.2//en', | ||
'-//w3c//dtd html 3.2s draft//en', | ||
'-//w3c//dtd html 4.0 frameset//en', | ||
'-//w3c//dtd html 4.0 transitional//en', | ||
'-//w3c//dtd html experimental 19960712//en', | ||
'-//w3c//dtd html experimental 970421//en', | ||
'-//w3c//dtd w3 html//en', | ||
'-//w3o//dtd w3 html 3.0//en', | ||
'-//w3o//dtd w3 html 3.0//en//', | ||
'-//webtechs//dtd mozilla html 2.0//en', | ||
'-//webtechs//dtd mozilla html//en' | ||
], | ||
QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES = QUIRKS_MODE_PUBLIC_ID_PREFIXES.concat([ | ||
'-//w3c//dtd html 4.01 frameset//', | ||
'-//w3c//dtd html 4.01 transitional//' | ||
]), | ||
QUIRKS_MODE_PUBLIC_IDS = [ | ||
'-//w3o//dtd w3 html strict 3.0//en//', | ||
'-/w3c/dtd html 4.0 transitional/en', | ||
'html' | ||
], | ||
LIMITED_QUIRKS_PUBLIC_ID_PREFIXES = [ | ||
'-//W3C//DTD XHTML 1.0 Frameset//', | ||
'-//W3C//DTD XHTML 1.0 Transitional//' | ||
], | ||
LIMITED_QUIRKS_WITH_SYSTEM_ID_PUBLIC_ID_PREFIXES = LIMITED_QUIRKS_PUBLIC_ID_PREFIXES.concat([ | ||
'-//W3C//DTD HTML 4.01 Frameset//', | ||
'-//W3C//DTD HTML 4.01 Transitional//' | ||
]); | ||
const VALID_DOCTYPE_NAME = 'html'; | ||
const VALID_SYSTEM_ID = 'about:legacy-compat'; | ||
const QUIRKS_MODE_SYSTEM_ID = 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd'; | ||
const QUIRKS_MODE_PUBLIC_ID_PREFIXES = [ | ||
'+//silmaril//dtd html pro v0r11 19970101//en', | ||
'-//advasoft ltd//dtd html 3.0 aswedit + extensions//en', | ||
'-//as//dtd html 3.0 aswedit + extensions//en', | ||
'-//ietf//dtd html 2.0 level 1//en', | ||
'-//ietf//dtd html 2.0 level 2//en', | ||
'-//ietf//dtd html 2.0 strict level 1//en', | ||
'-//ietf//dtd html 2.0 strict level 2//en', | ||
'-//ietf//dtd html 2.0 strict//en', | ||
'-//ietf//dtd html 2.0//en', | ||
'-//ietf//dtd html 2.1e//en', | ||
'-//ietf//dtd html 3.0//en', | ||
'-//ietf//dtd html 3.0//en//', | ||
'-//ietf//dtd html 3.2 final//en', | ||
'-//ietf//dtd html 3.2//en', | ||
'-//ietf//dtd html 3//en', | ||
'-//ietf//dtd html level 0//en', | ||
'-//ietf//dtd html level 0//en//2.0', | ||
'-//ietf//dtd html level 1//en', | ||
'-//ietf//dtd html level 1//en//2.0', | ||
'-//ietf//dtd html level 2//en', | ||
'-//ietf//dtd html level 2//en//2.0', | ||
'-//ietf//dtd html level 3//en', | ||
'-//ietf//dtd html level 3//en//3.0', | ||
'-//ietf//dtd html strict level 0//en', | ||
'-//ietf//dtd html strict level 0//en//2.0', | ||
'-//ietf//dtd html strict level 1//en', | ||
'-//ietf//dtd html strict level 1//en//2.0', | ||
'-//ietf//dtd html strict level 2//en', | ||
'-//ietf//dtd html strict level 2//en//2.0', | ||
'-//ietf//dtd html strict level 3//en', | ||
'-//ietf//dtd html strict level 3//en//3.0', | ||
'-//ietf//dtd html strict//en', | ||
'-//ietf//dtd html strict//en//2.0', | ||
'-//ietf//dtd html strict//en//3.0', | ||
'-//ietf//dtd html//en', | ||
'-//ietf//dtd html//en//2.0', | ||
'-//ietf//dtd html//en//3.0', | ||
'-//metrius//dtd metrius presentational//en', | ||
'-//microsoft//dtd internet explorer 2.0 html strict//en', | ||
'-//microsoft//dtd internet explorer 2.0 html//en', | ||
'-//microsoft//dtd internet explorer 2.0 tables//en', | ||
'-//microsoft//dtd internet explorer 3.0 html strict//en', | ||
'-//microsoft//dtd internet explorer 3.0 html//en', | ||
'-//microsoft//dtd internet explorer 3.0 tables//en', | ||
'-//netscape comm. corp.//dtd html//en', | ||
'-//netscape comm. corp.//dtd strict html//en', | ||
"-//o'reilly and associates//dtd html 2.0//en", | ||
"-//o'reilly and associates//dtd html extended 1.0//en", | ||
'-//spyglass//dtd html 2.0 extended//en', | ||
'-//sq//dtd html 2.0 hotmetal + extensions//en', | ||
'-//sun microsystems corp.//dtd hotjava html//en', | ||
'-//sun microsystems corp.//dtd hotjava strict html//en', | ||
'-//w3c//dtd html 3 1995-03-24//en', | ||
'-//w3c//dtd html 3.2 draft//en', | ||
'-//w3c//dtd html 3.2 final//en', | ||
'-//w3c//dtd html 3.2//en', | ||
'-//w3c//dtd html 3.2s draft//en', | ||
'-//w3c//dtd html 4.0 frameset//en', | ||
'-//w3c//dtd html 4.0 transitional//en', | ||
'-//w3c//dtd html experimental 19960712//en', | ||
'-//w3c//dtd html experimental 970421//en', | ||
'-//w3c//dtd w3 html//en', | ||
'-//w3o//dtd w3 html 3.0//en', | ||
'-//w3o//dtd w3 html 3.0//en//', | ||
'-//webtechs//dtd mozilla html 2.0//en', | ||
'-//webtechs//dtd mozilla html//en' | ||
]; | ||
const QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES = QUIRKS_MODE_PUBLIC_ID_PREFIXES.concat([ | ||
'-//w3c//dtd html 4.01 frameset//', | ||
'-//w3c//dtd html 4.01 transitional//' | ||
]); | ||
const QUIRKS_MODE_PUBLIC_IDS = ['-//w3o//dtd w3 html strict 3.0//en//', '-/w3c/dtd html 4.0 transitional/en', 'html']; | ||
const LIMITED_QUIRKS_PUBLIC_ID_PREFIXES = ['-//W3C//DTD XHTML 1.0 Frameset//', '-//W3C//DTD XHTML 1.0 Transitional//']; | ||
const LIMITED_QUIRKS_WITH_SYSTEM_ID_PUBLIC_ID_PREFIXES = LIMITED_QUIRKS_PUBLIC_ID_PREFIXES.concat([ | ||
'-//W3C//DTD HTML 4.01 Frameset//', | ||
'-//W3C//DTD HTML 4.01 Transitional//' | ||
]); | ||
//Utils | ||
function enquoteDoctypeId(id) { | ||
var quote = id.indexOf('"') !== -1 ? '\'' : '"'; | ||
const quote = id.indexOf('"') !== -1 ? "'" : '"'; | ||
@@ -103,5 +100,6 @@ return quote + id + quote; | ||
function hasPrefix(publicId, prefixes) { | ||
for (var i = 0; i < prefixes.length; i++) { | ||
if (publicId.indexOf(prefixes[i]) === 0) | ||
for (let i = 0; i < prefixes.length; i++) { | ||
if (publicId.indexOf(prefixes[i]) === 0) { | ||
return true; | ||
} | ||
} | ||
@@ -112,26 +110,43 @@ | ||
//API | ||
exports.isConforming = function(token) { | ||
return ( | ||
token.name === VALID_DOCTYPE_NAME && | ||
token.publicId === null && | ||
(token.systemId === null || token.systemId === VALID_SYSTEM_ID) | ||
); | ||
}; | ||
//API | ||
exports.getDocumentMode = function (name, publicId, systemId) { | ||
if (name !== VALID_DOCTYPE_NAME) | ||
exports.getDocumentMode = function(token) { | ||
if (token.name !== VALID_DOCTYPE_NAME) { | ||
return DOCUMENT_MODE.QUIRKS; | ||
} | ||
if (systemId && systemId.toLowerCase() === QUIRKS_MODE_SYSTEM_ID) | ||
const systemId = token.systemId; | ||
if (systemId && systemId.toLowerCase() === QUIRKS_MODE_SYSTEM_ID) { | ||
return DOCUMENT_MODE.QUIRKS; | ||
} | ||
let publicId = token.publicId; | ||
if (publicId !== null) { | ||
publicId = publicId.toLowerCase(); | ||
if (QUIRKS_MODE_PUBLIC_IDS.indexOf(publicId) > -1) | ||
if (QUIRKS_MODE_PUBLIC_IDS.indexOf(publicId) > -1) { | ||
return DOCUMENT_MODE.QUIRKS; | ||
} | ||
var prefixes = systemId === null ? QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES : QUIRKS_MODE_PUBLIC_ID_PREFIXES; | ||
let prefixes = systemId === null ? QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES : QUIRKS_MODE_PUBLIC_ID_PREFIXES; | ||
if (hasPrefix(publicId, prefixes)) | ||
if (hasPrefix(publicId, prefixes)) { | ||
return DOCUMENT_MODE.QUIRKS; | ||
} | ||
prefixes = systemId === null ? LIMITED_QUIRKS_PUBLIC_ID_PREFIXES : LIMITED_QUIRKS_WITH_SYSTEM_ID_PUBLIC_ID_PREFIXES; | ||
prefixes = | ||
systemId === null ? LIMITED_QUIRKS_PUBLIC_ID_PREFIXES : LIMITED_QUIRKS_WITH_SYSTEM_ID_PUBLIC_ID_PREFIXES; | ||
if (hasPrefix(publicId, prefixes)) | ||
if (hasPrefix(publicId, prefixes)) { | ||
return DOCUMENT_MODE.LIMITED_QUIRKS; | ||
} | ||
} | ||
@@ -142,18 +157,20 @@ | ||
exports.serializeContent = function (name, publicId, systemId) { | ||
var str = '!DOCTYPE '; | ||
exports.serializeContent = function(name, publicId, systemId) { | ||
let str = '!DOCTYPE '; | ||
if (name) | ||
if (name) { | ||
str += name; | ||
} | ||
if (publicId !== null) | ||
if (publicId) { | ||
str += ' PUBLIC ' + enquoteDoctypeId(publicId); | ||
else if (systemId !== null) | ||
} else if (systemId) { | ||
str += ' SYSTEM'; | ||
} | ||
if (systemId !== null) | ||
if (systemId !== null) { | ||
str += ' ' + enquoteDoctypeId(systemId); | ||
} | ||
return str; | ||
}; |
'use strict'; | ||
var NS = exports.NAMESPACES = { | ||
const NS = (exports.NAMESPACES = { | ||
HTML: 'http://www.w3.org/1999/xhtml', | ||
@@ -10,3 +10,3 @@ MATHML: 'http://www.w3.org/1998/Math/MathML', | ||
XMLNS: 'http://www.w3.org/2000/xmlns/' | ||
}; | ||
}); | ||
@@ -30,3 +30,3 @@ exports.ATTRS = { | ||
var $ = exports.TAG_NAMES = { | ||
const $ = (exports.TAG_NAMES = { | ||
A: 'a', | ||
@@ -108,3 +108,2 @@ ADDRESS: 'address', | ||
MENU: 'menu', | ||
MENUITEM: 'menuitem', | ||
META: 'meta', | ||
@@ -177,100 +176,101 @@ MGLYPH: 'mglyph', | ||
XMP: 'xmp' | ||
}); | ||
exports.SPECIAL_ELEMENTS = { | ||
[NS.HTML]: { | ||
[$.ADDRESS]: true, | ||
[$.APPLET]: true, | ||
[$.AREA]: true, | ||
[$.ARTICLE]: true, | ||
[$.ASIDE]: true, | ||
[$.BASE]: true, | ||
[$.BASEFONT]: true, | ||
[$.BGSOUND]: true, | ||
[$.BLOCKQUOTE]: true, | ||
[$.BODY]: true, | ||
[$.BR]: true, | ||
[$.BUTTON]: true, | ||
[$.CAPTION]: true, | ||
[$.CENTER]: true, | ||
[$.COL]: true, | ||
[$.COLGROUP]: true, | ||
[$.DD]: true, | ||
[$.DETAILS]: true, | ||
[$.DIR]: true, | ||
[$.DIV]: true, | ||
[$.DL]: true, | ||
[$.DT]: true, | ||
[$.EMBED]: true, | ||
[$.FIELDSET]: true, | ||
[$.FIGCAPTION]: true, | ||
[$.FIGURE]: true, | ||
[$.FOOTER]: true, | ||
[$.FORM]: true, | ||
[$.FRAME]: true, | ||
[$.FRAMESET]: true, | ||
[$.H1]: true, | ||
[$.H2]: true, | ||
[$.H3]: true, | ||
[$.H4]: true, | ||
[$.H5]: true, | ||
[$.H6]: true, | ||
[$.HEAD]: true, | ||
[$.HEADER]: true, | ||
[$.HGROUP]: true, | ||
[$.HR]: true, | ||
[$.HTML]: true, | ||
[$.IFRAME]: true, | ||
[$.IMG]: true, | ||
[$.INPUT]: true, | ||
[$.LI]: true, | ||
[$.LINK]: true, | ||
[$.LISTING]: true, | ||
[$.MAIN]: true, | ||
[$.MARQUEE]: true, | ||
[$.MENU]: true, | ||
[$.META]: true, | ||
[$.NAV]: true, | ||
[$.NOEMBED]: true, | ||
[$.NOFRAMES]: true, | ||
[$.NOSCRIPT]: true, | ||
[$.OBJECT]: true, | ||
[$.OL]: true, | ||
[$.P]: true, | ||
[$.PARAM]: true, | ||
[$.PLAINTEXT]: true, | ||
[$.PRE]: true, | ||
[$.SCRIPT]: true, | ||
[$.SECTION]: true, | ||
[$.SELECT]: true, | ||
[$.SOURCE]: true, | ||
[$.STYLE]: true, | ||
[$.SUMMARY]: true, | ||
[$.TABLE]: true, | ||
[$.TBODY]: true, | ||
[$.TD]: true, | ||
[$.TEMPLATE]: true, | ||
[$.TEXTAREA]: true, | ||
[$.TFOOT]: true, | ||
[$.TH]: true, | ||
[$.THEAD]: true, | ||
[$.TITLE]: true, | ||
[$.TR]: true, | ||
[$.TRACK]: true, | ||
[$.UL]: true, | ||
[$.WBR]: true, | ||
[$.XMP]: true | ||
}, | ||
[NS.MATHML]: { | ||
[$.MI]: true, | ||
[$.MO]: true, | ||
[$.MN]: true, | ||
[$.MS]: true, | ||
[$.MTEXT]: true, | ||
[$.ANNOTATION_XML]: true | ||
}, | ||
[NS.SVG]: { | ||
[$.TITLE]: true, | ||
[$.FOREIGN_OBJECT]: true, | ||
[$.DESC]: true | ||
} | ||
}; | ||
var SPECIAL_ELEMENTS = exports.SPECIAL_ELEMENTS = Object.create(null); | ||
SPECIAL_ELEMENTS[NS.HTML] = Object.create(null); | ||
SPECIAL_ELEMENTS[NS.HTML][$.ADDRESS] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.APPLET] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.AREA] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.ARTICLE] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.ASIDE] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.BASE] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.BASEFONT] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.BGSOUND] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.BLOCKQUOTE] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.BODY] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.BR] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.BUTTON] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.CAPTION] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.CENTER] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.COL] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.COLGROUP] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.DD] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.DETAILS] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.DIR] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.DIV] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.DL] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.DT] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.EMBED] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.FIELDSET] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.FIGCAPTION] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.FIGURE] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.FOOTER] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.FORM] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.FRAME] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.FRAMESET] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.H1] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.H2] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.H3] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.H4] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.H5] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.H6] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.HEAD] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.HEADER] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.HGROUP] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.HR] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.HTML] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.IFRAME] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.IMG] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.INPUT] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.LI] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.LINK] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.LISTING] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.MAIN] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.MARQUEE] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.MENU] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.META] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.NAV] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.NOEMBED] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.NOFRAMES] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.NOSCRIPT] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.OBJECT] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.OL] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.P] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.PARAM] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.PLAINTEXT] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.PRE] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.SCRIPT] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.SECTION] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.SELECT] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.SOURCE] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.STYLE] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.SUMMARY] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.TABLE] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.TBODY] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.TD] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.TEMPLATE] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.TEXTAREA] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.TFOOT] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.TH] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.THEAD] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.TITLE] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.TR] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.TRACK] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.UL] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.WBR] = true; | ||
SPECIAL_ELEMENTS[NS.HTML][$.XMP] = true; | ||
SPECIAL_ELEMENTS[NS.MATHML] = Object.create(null); | ||
SPECIAL_ELEMENTS[NS.MATHML][$.MI] = true; | ||
SPECIAL_ELEMENTS[NS.MATHML][$.MO] = true; | ||
SPECIAL_ELEMENTS[NS.MATHML][$.MN] = true; | ||
SPECIAL_ELEMENTS[NS.MATHML][$.MS] = true; | ||
SPECIAL_ELEMENTS[NS.MATHML][$.MTEXT] = true; | ||
SPECIAL_ELEMENTS[NS.MATHML][$.ANNOTATION_XML] = true; | ||
SPECIAL_ELEMENTS[NS.SVG] = Object.create(null); | ||
SPECIAL_ELEMENTS[NS.SVG][$.TITLE] = true; | ||
SPECIAL_ELEMENTS[NS.SVG][$.FOREIGN_OBJECT] = true; | ||
SPECIAL_ELEMENTS[NS.SVG][$.DESC] = true; |
'use strict'; | ||
const UNDEFINED_CODE_POINTS = [ | ||
0xfffe, | ||
0xffff, | ||
0x1fffe, | ||
0x1ffff, | ||
0x2fffe, | ||
0x2ffff, | ||
0x3fffe, | ||
0x3ffff, | ||
0x4fffe, | ||
0x4ffff, | ||
0x5fffe, | ||
0x5ffff, | ||
0x6fffe, | ||
0x6ffff, | ||
0x7fffe, | ||
0x7ffff, | ||
0x8fffe, | ||
0x8ffff, | ||
0x9fffe, | ||
0x9ffff, | ||
0xafffe, | ||
0xaffff, | ||
0xbfffe, | ||
0xbffff, | ||
0xcfffe, | ||
0xcffff, | ||
0xdfffe, | ||
0xdffff, | ||
0xefffe, | ||
0xeffff, | ||
0xffffe, | ||
0xfffff, | ||
0x10fffe, | ||
0x10ffff | ||
]; | ||
exports.REPLACEMENT_CHARACTER = '\uFFFD'; | ||
@@ -9,5 +46,5 @@ | ||
TABULATION: 0x09, | ||
CARRIAGE_RETURN: 0x0D, | ||
LINE_FEED: 0x0A, | ||
FORM_FEED: 0x0C, | ||
CARRIAGE_RETURN: 0x0d, | ||
LINE_FEED: 0x0a, | ||
FORM_FEED: 0x0c, | ||
SPACE: 0x20, | ||
@@ -19,15 +56,16 @@ EXCLAMATION_MARK: 0x21, | ||
APOSTROPHE: 0x27, | ||
HYPHEN_MINUS: 0x2D, | ||
SOLIDUS: 0x2F, | ||
HYPHEN_MINUS: 0x2d, | ||
SOLIDUS: 0x2f, | ||
DIGIT_0: 0x30, | ||
DIGIT_9: 0x39, | ||
SEMICOLON: 0x3B, | ||
LESS_THAN_SIGN: 0x3C, | ||
EQUALS_SIGN: 0x3D, | ||
GREATER_THAN_SIGN: 0x3E, | ||
QUESTION_MARK: 0x3F, | ||
SEMICOLON: 0x3b, | ||
LESS_THAN_SIGN: 0x3c, | ||
EQUALS_SIGN: 0x3d, | ||
GREATER_THAN_SIGN: 0x3e, | ||
QUESTION_MARK: 0x3f, | ||
LATIN_CAPITAL_A: 0x41, | ||
LATIN_CAPITAL_F: 0x46, | ||
LATIN_CAPITAL_X: 0x58, | ||
LATIN_CAPITAL_Z: 0x5A, | ||
LATIN_CAPITAL_Z: 0x5a, | ||
RIGHT_SQUARE_BRACKET: 0x5d, | ||
GRAVE_ACCENT: 0x60, | ||
@@ -37,14 +75,38 @@ LATIN_SMALL_A: 0x61, | ||
LATIN_SMALL_X: 0x78, | ||
LATIN_SMALL_Z: 0x7A, | ||
REPLACEMENT_CHARACTER: 0xFFFD | ||
LATIN_SMALL_Z: 0x7a, | ||
REPLACEMENT_CHARACTER: 0xfffd | ||
}; | ||
exports.CODE_POINT_SEQUENCES = { | ||
DASH_DASH_STRING: [0x2D, 0x2D], //-- | ||
DOCTYPE_STRING: [0x44, 0x4F, 0x43, 0x54, 0x59, 0x50, 0x45], //DOCTYPE | ||
CDATA_START_STRING: [0x5B, 0x43, 0x44, 0x41, 0x54, 0x41, 0x5B], //[CDATA[ | ||
CDATA_END_STRING: [0x5D, 0x5D, 0x3E], //]]> | ||
DASH_DASH_STRING: [0x2d, 0x2d], //-- | ||
DOCTYPE_STRING: [0x44, 0x4f, 0x43, 0x54, 0x59, 0x50, 0x45], //DOCTYPE | ||
CDATA_START_STRING: [0x5b, 0x43, 0x44, 0x41, 0x54, 0x41, 0x5b], //[CDATA[ | ||
SCRIPT_STRING: [0x73, 0x63, 0x72, 0x69, 0x70, 0x74], //script | ||
PUBLIC_STRING: [0x50, 0x55, 0x42, 0x4C, 0x49, 0x43], //PUBLIC | ||
SYSTEM_STRING: [0x53, 0x59, 0x53, 0x54, 0x45, 0x4D] //SYSTEM | ||
PUBLIC_STRING: [0x50, 0x55, 0x42, 0x4c, 0x49, 0x43], //PUBLIC | ||
SYSTEM_STRING: [0x53, 0x59, 0x53, 0x54, 0x45, 0x4d] //SYSTEM | ||
}; | ||
//Surrogates | ||
exports.isSurrogate = function(cp) { | ||
return cp >= 0xd800 && cp <= 0xdfff; | ||
}; | ||
exports.isSurrogatePair = function(cp) { | ||
return cp >= 0xdc00 && cp <= 0xdfff; | ||
}; | ||
exports.getSurrogatePairCodePoint = function(cp1, cp2) { | ||
return (cp1 - 0xd800) * 0x400 + 0x2400 + cp2; | ||
}; | ||
//NOTE: excluding NULL and ASCII whitespace | ||
exports.isControlCodePoint = function(cp) { | ||
return ( | ||
(cp !== 0x20 && cp !== 0x0a && cp !== 0x0d && cp !== 0x09 && cp !== 0x0c && cp >= 0x01 && cp <= 0x1f) || | ||
(cp >= 0x7f && cp <= 0x9f) | ||
); | ||
}; | ||
exports.isUndefinedCodePoint = function(cp) { | ||
return (cp >= 0xfdd0 && cp <= 0xfdef) || UNDEFINED_CODE_POINTS.indexOf(cp) > -1; | ||
}; |
'use strict'; | ||
var Parser = require('./parser'), | ||
Serializer = require('./serializer'); | ||
const Parser = require('./parser'); | ||
const Serializer = require('./serializer'); | ||
// Shorthands | ||
exports.parse = function parse(html, options) { | ||
var parser = new Parser(options); | ||
const parser = new Parser(options); | ||
@@ -21,3 +20,3 @@ return parser.parse(html); | ||
var parser = new Parser(options); | ||
const parser = new Parser(options); | ||
@@ -27,42 +26,6 @@ return parser.parseFragment(html, fragmentContext); | ||
exports.serialize = function (node, options) { | ||
var serializer = new Serializer(node, options); | ||
exports.serialize = function(node, options) { | ||
const serializer = new Serializer(node, options); | ||
return serializer.serialize(); | ||
}; | ||
// Tree adapters | ||
exports.treeAdapters = { | ||
default: require('./tree_adapters/default'), | ||
htmlparser2: require('./tree_adapters/htmlparser2') | ||
}; | ||
// Streaming | ||
// NOTE: streaming API is lazy loadable to enable bundling for platforms | ||
// that are different from Node.js. | ||
// See https://github.com/inikulin/parse5/issues/235. | ||
var streamingAPI = { | ||
ParserStream: './parser/parser_stream', | ||
PlainTextConversionStream: './parser/plain_text_conversion_stream', | ||
SerializerStream: './serializer/serializer_stream', | ||
SAXParser: './sax' | ||
}; | ||
Object.keys(streamingAPI).forEach(function (cls) { | ||
Object.defineProperty(exports, cls, { | ||
get: function () { | ||
try { | ||
return require(streamingAPI[cls]); | ||
} | ||
catch (e) { | ||
throw new Error( | ||
cls + ' is supported only for Node.js.' + | ||
'See https://github.com/inikulin/parse5/issues/235 for the details.' | ||
); | ||
} | ||
} | ||
}); | ||
}); |
'use strict'; | ||
var defaultTreeAdapter = require('../tree_adapters/default'), | ||
mergeOptions = require('../utils/merge_options'), | ||
doctype = require('../common/doctype'), | ||
HTML = require('../common/html'); | ||
const defaultTreeAdapter = require('../tree-adapters/default'); | ||
const mergeOptions = require('../utils/merge-options'); | ||
const doctype = require('../common/doctype'); | ||
const HTML = require('../common/html'); | ||
//Aliases | ||
var $ = HTML.TAG_NAMES, | ||
NS = HTML.NAMESPACES; | ||
const $ = HTML.TAG_NAMES; | ||
const NS = HTML.NAMESPACES; | ||
//Default serializer options | ||
var DEFAULT_OPTIONS = { | ||
const DEFAULT_OPTIONS = { | ||
treeAdapter: defaultTreeAdapter | ||
@@ -18,146 +18,160 @@ }; | ||
//Escaping regexes | ||
var AMP_REGEX = /&/g, | ||
NBSP_REGEX = /\u00a0/g, | ||
DOUBLE_QUOTE_REGEX = /"/g, | ||
LT_REGEX = /</g, | ||
GT_REGEX = />/g; | ||
const AMP_REGEX = /&/g; | ||
const NBSP_REGEX = /\u00a0/g; | ||
const DOUBLE_QUOTE_REGEX = /"/g; | ||
const LT_REGEX = /</g; | ||
const GT_REGEX = />/g; | ||
//Serializer | ||
var Serializer = module.exports = function (node, options) { | ||
this.options = mergeOptions(DEFAULT_OPTIONS, options); | ||
this.treeAdapter = this.options.treeAdapter; | ||
class Serializer { | ||
constructor(node, options) { | ||
this.options = mergeOptions(DEFAULT_OPTIONS, options); | ||
this.treeAdapter = this.options.treeAdapter; | ||
this.html = ''; | ||
this.startNode = node; | ||
}; | ||
this.html = ''; | ||
this.startNode = node; | ||
} | ||
// NOTE: exported as static method for the testing purposes | ||
Serializer.escapeString = function (str, attrMode) { | ||
str = str | ||
.replace(AMP_REGEX, '&') | ||
.replace(NBSP_REGEX, ' '); | ||
//API | ||
serialize() { | ||
this._serializeChildNodes(this.startNode); | ||
if (attrMode) | ||
str = str.replace(DOUBLE_QUOTE_REGEX, '"'); | ||
else { | ||
str = str | ||
.replace(LT_REGEX, '<') | ||
.replace(GT_REGEX, '>'); | ||
return this.html; | ||
} | ||
return str; | ||
}; | ||
//Internals | ||
_serializeChildNodes(parentNode) { | ||
const childNodes = this.treeAdapter.getChildNodes(parentNode); | ||
if (childNodes) { | ||
for (let i = 0, cnLength = childNodes.length; i < cnLength; i++) { | ||
const currentNode = childNodes[i]; | ||
//API | ||
Serializer.prototype.serialize = function () { | ||
this._serializeChildNodes(this.startNode); | ||
return this.html; | ||
}; | ||
//Internals | ||
Serializer.prototype._serializeChildNodes = function (parentNode) { | ||
var childNodes = this.treeAdapter.getChildNodes(parentNode); | ||
if (childNodes) { | ||
for (var i = 0, cnLength = childNodes.length; i < cnLength; i++) { | ||
var currentNode = childNodes[i]; | ||
if (this.treeAdapter.isElementNode(currentNode)) | ||
this._serializeElement(currentNode); | ||
else if (this.treeAdapter.isTextNode(currentNode)) | ||
this._serializeTextNode(currentNode); | ||
else if (this.treeAdapter.isCommentNode(currentNode)) | ||
this._serializeCommentNode(currentNode); | ||
else if (this.treeAdapter.isDocumentTypeNode(currentNode)) | ||
this._serializeDocumentTypeNode(currentNode); | ||
if (this.treeAdapter.isElementNode(currentNode)) { | ||
this._serializeElement(currentNode); | ||
} else if (this.treeAdapter.isTextNode(currentNode)) { | ||
this._serializeTextNode(currentNode); | ||
} else if (this.treeAdapter.isCommentNode(currentNode)) { | ||
this._serializeCommentNode(currentNode); | ||
} else if (this.treeAdapter.isDocumentTypeNode(currentNode)) { | ||
this._serializeDocumentTypeNode(currentNode); | ||
} | ||
} | ||
} | ||
} | ||
}; | ||
Serializer.prototype._serializeElement = function (node) { | ||
var tn = this.treeAdapter.getTagName(node), | ||
ns = this.treeAdapter.getNamespaceURI(node); | ||
_serializeElement(node) { | ||
const tn = this.treeAdapter.getTagName(node); | ||
const ns = this.treeAdapter.getNamespaceURI(node); | ||
this.html += '<' + tn; | ||
this._serializeAttributes(node); | ||
this.html += '>'; | ||
this.html += '<' + tn; | ||
this._serializeAttributes(node); | ||
this.html += '>'; | ||
if (tn !== $.AREA && tn !== $.BASE && tn !== $.BASEFONT && tn !== $.BGSOUND && tn !== $.BR && tn !== $.BR && | ||
tn !== $.COL && tn !== $.EMBED && tn !== $.FRAME && tn !== $.HR && tn !== $.IMG && tn !== $.INPUT && | ||
tn !== $.KEYGEN && tn !== $.LINK && tn !== $.MENUITEM && tn !== $.META && tn !== $.PARAM && tn !== $.SOURCE && | ||
tn !== $.TRACK && tn !== $.WBR) { | ||
if ( | ||
tn !== $.AREA && | ||
tn !== $.BASE && | ||
tn !== $.BASEFONT && | ||
tn !== $.BGSOUND && | ||
tn !== $.BR && | ||
tn !== $.COL && | ||
tn !== $.EMBED && | ||
tn !== $.FRAME && | ||
tn !== $.HR && | ||
tn !== $.IMG && | ||
tn !== $.INPUT && | ||
tn !== $.KEYGEN && | ||
tn !== $.LINK && | ||
tn !== $.META && | ||
tn !== $.PARAM && | ||
tn !== $.SOURCE && | ||
tn !== $.TRACK && | ||
tn !== $.WBR | ||
) { | ||
const childNodesHolder = | ||
tn === $.TEMPLATE && ns === NS.HTML ? this.treeAdapter.getTemplateContent(node) : node; | ||
var childNodesHolder = tn === $.TEMPLATE && ns === NS.HTML ? | ||
this.treeAdapter.getTemplateContent(node) : | ||
node; | ||
this._serializeChildNodes(childNodesHolder); | ||
this.html += '</' + tn + '>'; | ||
this._serializeChildNodes(childNodesHolder); | ||
this.html += '</' + tn + '>'; | ||
} | ||
} | ||
}; | ||
Serializer.prototype._serializeAttributes = function (node) { | ||
var attrs = this.treeAdapter.getAttrList(node); | ||
_serializeAttributes(node) { | ||
const attrs = this.treeAdapter.getAttrList(node); | ||
for (var i = 0, attrsLength = attrs.length; i < attrsLength; i++) { | ||
var attr = attrs[i], | ||
value = Serializer.escapeString(attr.value, true); | ||
for (let i = 0, attrsLength = attrs.length; i < attrsLength; i++) { | ||
const attr = attrs[i]; | ||
const value = Serializer.escapeString(attr.value, true); | ||
this.html += ' '; | ||
this.html += ' '; | ||
if (!attr.namespace) | ||
this.html += attr.name; | ||
if (!attr.namespace) { | ||
this.html += attr.name; | ||
} else if (attr.namespace === NS.XML) { | ||
this.html += 'xml:' + attr.name; | ||
} else if (attr.namespace === NS.XMLNS) { | ||
if (attr.name !== 'xmlns') { | ||
this.html += 'xmlns:'; | ||
} | ||
else if (attr.namespace === NS.XML) | ||
this.html += 'xml:' + attr.name; | ||
this.html += attr.name; | ||
} else if (attr.namespace === NS.XLINK) { | ||
this.html += 'xlink:' + attr.name; | ||
} else { | ||
this.html += attr.namespace + ':' + attr.name; | ||
} | ||
else if (attr.namespace === NS.XMLNS) { | ||
if (attr.name !== 'xmlns') | ||
this.html += 'xmlns:'; | ||
this.html += attr.name; | ||
this.html += '="' + value + '"'; | ||
} | ||
} | ||
else if (attr.namespace === NS.XLINK) | ||
this.html += 'xlink:' + attr.name; | ||
_serializeTextNode(node) { | ||
const content = this.treeAdapter.getTextNodeContent(node); | ||
const parent = this.treeAdapter.getParentNode(node); | ||
let parentTn = void 0; | ||
else | ||
this.html += attr.namespace + ':' + attr.name; | ||
if (parent && this.treeAdapter.isElementNode(parent)) { | ||
parentTn = this.treeAdapter.getTagName(parent); | ||
} | ||
this.html += '="' + value + '"'; | ||
if ( | ||
parentTn === $.STYLE || | ||
parentTn === $.SCRIPT || | ||
parentTn === $.XMP || | ||
parentTn === $.IFRAME || | ||
parentTn === $.NOEMBED || | ||
parentTn === $.NOFRAMES || | ||
parentTn === $.PLAINTEXT || | ||
parentTn === $.NOSCRIPT | ||
) { | ||
this.html += content; | ||
} else { | ||
this.html += Serializer.escapeString(content, false); | ||
} | ||
} | ||
}; | ||
Serializer.prototype._serializeTextNode = function (node) { | ||
var content = this.treeAdapter.getTextNodeContent(node), | ||
parent = this.treeAdapter.getParentNode(node), | ||
parentTn = void 0; | ||
_serializeCommentNode(node) { | ||
this.html += '<!--' + this.treeAdapter.getCommentNodeContent(node) + '-->'; | ||
} | ||
if (parent && this.treeAdapter.isElementNode(parent)) | ||
parentTn = this.treeAdapter.getTagName(parent); | ||
_serializeDocumentTypeNode(node) { | ||
const name = this.treeAdapter.getDocumentTypeNodeName(node); | ||
if (parentTn === $.STYLE || parentTn === $.SCRIPT || parentTn === $.XMP || parentTn === $.IFRAME || | ||
parentTn === $.NOEMBED || parentTn === $.NOFRAMES || parentTn === $.PLAINTEXT || parentTn === $.NOSCRIPT) | ||
this.html += '<' + doctype.serializeContent(name, null, null) + '>'; | ||
} | ||
} | ||
this.html += content; | ||
// NOTE: used in tests and by rewriting stream | ||
Serializer.escapeString = function(str, attrMode) { | ||
str = str.replace(AMP_REGEX, '&').replace(NBSP_REGEX, ' '); | ||
else | ||
this.html += Serializer.escapeString(content, false); | ||
}; | ||
if (attrMode) { | ||
str = str.replace(DOUBLE_QUOTE_REGEX, '"'); | ||
} else { | ||
str = str.replace(LT_REGEX, '<').replace(GT_REGEX, '>'); | ||
} | ||
Serializer.prototype._serializeCommentNode = function (node) { | ||
this.html += '<!--' + this.treeAdapter.getCommentNodeContent(node) + '-->'; | ||
return str; | ||
}; | ||
Serializer.prototype._serializeDocumentTypeNode = function (node) { | ||
var name = this.treeAdapter.getDocumentTypeNodeName(node); | ||
this.html += '<' + doctype.serializeContent(name, null, null) + '>'; | ||
}; | ||
module.exports = Serializer; |
'use strict'; | ||
var UNICODE = require('../common/unicode'); | ||
const unicode = require('../common/unicode'); | ||
const ERR = require('../common/error-codes'); | ||
//Aliases | ||
var $ = UNICODE.CODE_POINTS; | ||
const $ = unicode.CODE_POINTS; | ||
//Utils | ||
//OPTIMIZATION: these utility functions should not be moved out of this module. V8 Crankshaft will not inline | ||
//this functions if they will be situated in another module due to context switch. | ||
//Always perform inlining check before modifying this functions ('node --trace-inlining'). | ||
function isSurrogatePair(cp1, cp2) { | ||
return cp1 >= 0xD800 && cp1 <= 0xDBFF && cp2 >= 0xDC00 && cp2 <= 0xDFFF; | ||
} | ||
function getSurrogatePairCodePoint(cp1, cp2) { | ||
return (cp1 - 0xD800) * 0x400 + 0x2400 + cp2; | ||
} | ||
//Const | ||
var DEFAULT_BUFFER_WATERLINE = 1 << 16; | ||
const DEFAULT_BUFFER_WATERLINE = 1 << 16; | ||
//Preprocessor | ||
//NOTE: HTML input preprocessing | ||
//(see: http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#preprocessing-the-input-stream) | ||
var Preprocessor = module.exports = function () { | ||
this.html = null; | ||
class Preprocessor { | ||
constructor() { | ||
this.html = null; | ||
this.pos = -1; | ||
this.lastGapPos = -1; | ||
this.lastCharPos = -1; | ||
this.pos = -1; | ||
this.lastGapPos = -1; | ||
this.lastCharPos = -1; | ||
this.gapStack = []; | ||
this.gapStack = []; | ||
this.skipNextNewLine = false; | ||
this.skipNextNewLine = false; | ||
this.lastChunkWritten = false; | ||
this.endOfChunkHit = false; | ||
this.bufferWaterline = DEFAULT_BUFFER_WATERLINE; | ||
}; | ||
this.lastChunkWritten = false; | ||
this.endOfChunkHit = false; | ||
this.bufferWaterline = DEFAULT_BUFFER_WATERLINE; | ||
} | ||
Preprocessor.prototype.dropParsedChunk = function () { | ||
if (this.pos > this.bufferWaterline) { | ||
this.lastCharPos -= this.pos; | ||
this.html = this.html.substring(this.pos); | ||
this.pos = 0; | ||
this.lastGapPos = -1; | ||
this.gapStack = []; | ||
_err() { | ||
// NOTE: err reporting is noop by default. Enabled by mixin. | ||
} | ||
}; | ||
Preprocessor.prototype._addGap = function () { | ||
this.gapStack.push(this.lastGapPos); | ||
this.lastGapPos = this.pos; | ||
}; | ||
_addGap() { | ||
this.gapStack.push(this.lastGapPos); | ||
this.lastGapPos = this.pos; | ||
} | ||
Preprocessor.prototype._processHighRangeCodePoint = function (cp) { | ||
//NOTE: try to peek a surrogate pair | ||
if (this.pos !== this.lastCharPos) { | ||
var nextCp = this.html.charCodeAt(this.pos + 1); | ||
_processSurrogate(cp) { | ||
//NOTE: try to peek a surrogate pair | ||
if (this.pos !== this.lastCharPos) { | ||
const nextCp = this.html.charCodeAt(this.pos + 1); | ||
if (isSurrogatePair(cp, nextCp)) { | ||
//NOTE: we have a surrogate pair. Peek pair character and recalculate code point. | ||
this.pos++; | ||
cp = getSurrogatePairCodePoint(cp, nextCp); | ||
if (unicode.isSurrogatePair(nextCp)) { | ||
//NOTE: we have a surrogate pair. Peek pair character and recalculate code point. | ||
this.pos++; | ||
//NOTE: add gap that should be avoided during retreat | ||
this._addGap(); | ||
//NOTE: add gap that should be avoided during retreat | ||
this._addGap(); | ||
return unicode.getSurrogatePairCodePoint(cp, nextCp); | ||
} | ||
} | ||
//NOTE: we are at the end of a chunk, therefore we can't infer surrogate pair yet. | ||
else if (!this.lastChunkWritten) { | ||
this.endOfChunkHit = true; | ||
return $.EOF; | ||
} | ||
//NOTE: isolated surrogate | ||
this._err(ERR.surrogateInInputStream); | ||
return cp; | ||
} | ||
// NOTE: we've hit the end of chunk, stop processing at this point | ||
else if (!this.lastChunkWritten) { | ||
this.endOfChunkHit = true; | ||
return $.EOF; | ||
dropParsedChunk() { | ||
if (this.pos > this.bufferWaterline) { | ||
this.lastCharPos -= this.pos; | ||
this.html = this.html.substring(this.pos); | ||
this.pos = 0; | ||
this.lastGapPos = -1; | ||
this.gapStack = []; | ||
} | ||
} | ||
return cp; | ||
}; | ||
write(chunk, isLastChunk) { | ||
if (this.html) { | ||
this.html += chunk; | ||
} else { | ||
this.html = chunk; | ||
} | ||
Preprocessor.prototype.write = function (chunk, isLastChunk) { | ||
if (this.html) | ||
this.html += chunk; | ||
this.lastCharPos = this.html.length - 1; | ||
this.endOfChunkHit = false; | ||
this.lastChunkWritten = isLastChunk; | ||
} | ||
else | ||
this.html = chunk; | ||
insertHtmlAtCurrentPos(chunk) { | ||
this.html = this.html.substring(0, this.pos + 1) + chunk + this.html.substring(this.pos + 1, this.html.length); | ||
this.lastCharPos = this.html.length - 1; | ||
this.endOfChunkHit = false; | ||
this.lastChunkWritten = isLastChunk; | ||
}; | ||
this.lastCharPos = this.html.length - 1; | ||
this.endOfChunkHit = false; | ||
} | ||
Preprocessor.prototype.insertHtmlAtCurrentPos = function (chunk) { | ||
this.html = this.html.substring(0, this.pos + 1) + | ||
chunk + | ||
this.html.substring(this.pos + 1, this.html.length); | ||
advance() { | ||
this.pos++; | ||
this.lastCharPos = this.html.length - 1; | ||
this.endOfChunkHit = false; | ||
}; | ||
if (this.pos > this.lastCharPos) { | ||
this.endOfChunkHit = !this.lastChunkWritten; | ||
return $.EOF; | ||
} | ||
let cp = this.html.charCodeAt(this.pos); | ||
Preprocessor.prototype.advance = function () { | ||
this.pos++; | ||
//NOTE: any U+000A LINE FEED (LF) characters that immediately follow a U+000D CARRIAGE RETURN (CR) character | ||
//must be ignored. | ||
if (this.skipNextNewLine && cp === $.LINE_FEED) { | ||
this.skipNextNewLine = false; | ||
this._addGap(); | ||
return this.advance(); | ||
} | ||
if (this.pos > this.lastCharPos) { | ||
if (!this.lastChunkWritten) | ||
this.endOfChunkHit = true; | ||
//NOTE: all U+000D CARRIAGE RETURN (CR) characters must be converted to U+000A LINE FEED (LF) characters | ||
if (cp === $.CARRIAGE_RETURN) { | ||
this.skipNextNewLine = true; | ||
return $.LINE_FEED; | ||
} | ||
return $.EOF; | ||
} | ||
this.skipNextNewLine = false; | ||
var cp = this.html.charCodeAt(this.pos); | ||
if (unicode.isSurrogate(cp)) { | ||
cp = this._processSurrogate(cp); | ||
} | ||
//NOTE: any U+000A LINE FEED (LF) characters that immediately follow a U+000D CARRIAGE RETURN (CR) character | ||
//must be ignored. | ||
if (this.skipNextNewLine && cp === $.LINE_FEED) { | ||
this.skipNextNewLine = false; | ||
this._addGap(); | ||
return this.advance(); | ||
//OPTIMIZATION: first check if code point is in the common allowed | ||
//range (ASCII alphanumeric, whitespaces, big chunk of BMP) | ||
//before going into detailed performance cost validation. | ||
const isCommonValidRange = | ||
(cp > 0x1f && cp < 0x7f) || cp === $.LINE_FEED || cp === $.CARRIAGE_RETURN || (cp > 0x9f && cp < 0xfdd0); | ||
if (!isCommonValidRange) { | ||
this._checkForProblematicCharacters(cp); | ||
} | ||
return cp; | ||
} | ||
//NOTE: all U+000D CARRIAGE RETURN (CR) characters must be converted to U+000A LINE FEED (LF) characters | ||
if (cp === $.CARRIAGE_RETURN) { | ||
this.skipNextNewLine = true; | ||
return $.LINE_FEED; | ||
_checkForProblematicCharacters(cp) { | ||
if (unicode.isControlCodePoint(cp)) { | ||
this._err(ERR.controlCharacterInInputStream); | ||
} else if (unicode.isUndefinedCodePoint(cp)) { | ||
this._err(ERR.noncharacterInInputStream); | ||
} | ||
} | ||
this.skipNextNewLine = false; | ||
retreat() { | ||
if (this.pos === this.lastGapPos) { | ||
this.lastGapPos = this.gapStack.pop(); | ||
this.pos--; | ||
} | ||
//OPTIMIZATION: first perform check if the code point in the allowed range that covers most common | ||
//HTML input (e.g. ASCII codes) to avoid performance-cost operations for high-range code points. | ||
return cp >= 0xD800 ? this._processHighRangeCodePoint(cp) : cp; | ||
}; | ||
Preprocessor.prototype.retreat = function () { | ||
if (this.pos === this.lastGapPos) { | ||
this.lastGapPos = this.gapStack.pop(); | ||
this.pos--; | ||
} | ||
} | ||
this.pos--; | ||
}; | ||
module.exports = Preprocessor; |
'use strict'; | ||
var Mixin = module.exports = function (host) { | ||
var originalMethods = {}, | ||
overriddenMethods = this._getOverriddenMethods(this, originalMethods); | ||
class Mixin { | ||
constructor(host) { | ||
const originalMethods = {}; | ||
const overriddenMethods = this._getOverriddenMethods(this, originalMethods); | ||
Object.keys(overriddenMethods).forEach(function (key) { | ||
if (typeof overriddenMethods[key] === 'function') { | ||
originalMethods[key] = host[key]; | ||
host[key] = overriddenMethods[key]; | ||
for (const key of Object.keys(overriddenMethods)) { | ||
if (typeof overriddenMethods[key] === 'function') { | ||
originalMethods[key] = host[key]; | ||
host[key] = overriddenMethods[key]; | ||
} | ||
} | ||
}); | ||
}; | ||
} | ||
Mixin.prototype._getOverriddenMethods = function () { | ||
throw new Error('Not implemented'); | ||
_getOverriddenMethods() { | ||
throw new Error('Not implemented'); | ||
} | ||
} | ||
Mixin.install = function(host, Ctor, opts) { | ||
if (!host.__mixins) { | ||
host.__mixins = []; | ||
} | ||
for (let i = 0; i < host.__mixins.length; i++) { | ||
if (host.__mixins[i].constructor === Ctor) { | ||
return host.__mixins[i]; | ||
} | ||
} | ||
const mixin = new Ctor(host, opts); | ||
host.__mixins.push(mixin); | ||
return mixin; | ||
}; | ||
module.exports = Mixin; |
{ | ||
"name": "parse5", | ||
"description": "HTML parsing/serialization toolset for Node.js. WHATWG HTML Living Standard (aka HTML5)-compliant.", | ||
"version": "4.0.0", | ||
"author": "Ivan Nikulin <ifaaan@gmail.com> (https://github.com/inikulin)", | ||
"contributors": "https://github.com/inikulin/parse5/graphs/contributors", | ||
"homepage": "https://github.com/inikulin/parse5", | ||
"devDependencies": { | ||
"@types/node": "*", | ||
"del": "^2.0.2", | ||
"gulp": "^3.9.0", | ||
"gulp-benchmark": "^1.1.1", | ||
"gulp-download": "0.0.1", | ||
"gulp-eslint": "^3.0.1", | ||
"gulp-install": "^0.6.0", | ||
"gulp-mocha": "^2.1.3", | ||
"gulp-rename": "^1.2.2", | ||
"gulp-typedoc": "^2.0.0", | ||
"gulp-typescript": "^3.1.2", | ||
"publish-please": "^2.2.0", | ||
"through2": "^2.0.0", | ||
"typedoc": "^0.5.1", | ||
"typescript": "^2.0.6" | ||
}, | ||
"keywords": [ | ||
"html", | ||
"parser", | ||
"html5", | ||
"WHATWG", | ||
"specification", | ||
"fast", | ||
"html parser", | ||
"html5 parser", | ||
"htmlparser", | ||
"parse5", | ||
"serializer", | ||
"html serializer", | ||
"htmlserializer", | ||
"sax", | ||
"simple api", | ||
"parse", | ||
"tokenize", | ||
"serialize", | ||
"tokenizer" | ||
], | ||
"license": "MIT", | ||
"main": "./lib/index.js", | ||
"repository": { | ||
"type": "git", | ||
"url": "git://github.com/inikulin/parse5.git" | ||
}, | ||
"scripts": { | ||
"test": "gulp test", | ||
"publish-please": "publish-please", | ||
"prepublish": "publish-please guard" | ||
}, | ||
"files": [ | ||
"lib" | ||
] | ||
"name": "parse5", | ||
"description": "HTML parser and serializer.", | ||
"version": "5.0.0", | ||
"author": "Ivan Nikulin <ifaaan@gmail.com> (https://github.com/inikulin)", | ||
"contributors": "https://github.com/inikulin/parse5/graphs/contributors", | ||
"homepage": "https://github.com/inikulin/parse5", | ||
"keywords": [ | ||
"html", | ||
"parser", | ||
"html5", | ||
"WHATWG", | ||
"specification", | ||
"fast", | ||
"html parser", | ||
"html5 parser", | ||
"htmlparser", | ||
"parse5", | ||
"serializer", | ||
"html serializer", | ||
"htmlserializer", | ||
"parse", | ||
"serialize" | ||
], | ||
"license": "MIT", | ||
"main": "./lib/index.js", | ||
"repository": { | ||
"type": "git", | ||
"url": "git://github.com/inikulin/parse5.git" | ||
}, | ||
"files": ["lib"] | ||
} |
@@ -7,27 +7,25 @@ <p align="center"> | ||
<p align="center"> | ||
<i>HTML parsing/serialization toolset for Node.js. <a href="https://html.spec.whatwg.org/multipage/">WHATWG HTML Living Standard (aka HTML5)</a>-compliant.</i> | ||
</p> | ||
<div align="center"> | ||
<h1>parse5</h1> | ||
<i><b>HTML parser and serializer.</b></i> | ||
</div> | ||
<br> | ||
<p align="center"> | ||
<a href="https://travis-ci.org/inikulin/parse5"><img alt="Build Status" src="https://api.travis-ci.org/inikulin/parse5.svg"></a> | ||
<a href="https://www.npmjs.com/package/parse5"><img alt="NPM Version" src="https://img.shields.io/npm/v/parse5.svg"></a> | ||
<a href="https://npmjs.org/package/parse5"><img alt="Downloads" src="http://img.shields.io/npm/dm/parse5.svg"></a> | ||
<a href="https://npmjs.org/package/parse5"><img alt="Downloads total" src="http://img.shields.io/npm/dt/parse5.svg"></a> | ||
</p> | ||
<div align="center"> | ||
<code>npm install --save parse5</code> | ||
</div> | ||
<br> | ||
<p align="center"> | ||
<b><i>parse5</i></b> provides nearly everything you may need when dealing with HTML. It's the fastest spec-compliant HTML parser | ||
for Node to date. It parses HTML the way the latest version of your browser does. It has proven itself reliable in such projects | ||
as <a href="https://github.com/tmpvar/jsdom">jsdom</a>, <a href="https://github.com/angular/angular">Angular2</a>, <a href="https://www.polymer-project.org">Polymer</a> and many more. | ||
📖 <a href="https://github.com/inikulin/parse5/tree/master/packages/parse5/docs/index.md"><b>Documentation</b></a> 📖 | ||
</p> | ||
---- | ||
--- | ||
<p align="center"> | ||
<a href="http://inikulin.github.io/parse5">Documentation</a> | ||
<a href="https://github.com/inikulin/parse5/tree/master/docs/list-of-packages.md">List of parse5 toolset packages</a> | ||
</p> | ||
<p align="center"> | ||
<a href="http://inikulin.github.io/parse5#version-history">Version history</a> | ||
<a href="https://github.com/inikulin/parse5">GitHub</a> | ||
</p> | ||
@@ -40,3 +38,3 @@ | ||
<p align="center"> | ||
<a href="https://github.com/inikulin/parse5/issues">Issue tracker</a> | ||
<a href="https://github.com/inikulin/parse5/tree/master/docs/version-history.md">Version history</a> | ||
</p> |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
No tests
QualityPackage does not have any tests. This is a strong signal of a poorly maintained or low quality package.
Found 1 instance in 1 package
Dynamic require
Supply chain riskDynamic require can indicate the package is performing dangerous or unsafe dynamic code execution.
Found 1 instance in 1 package
0
0
330102
26
6780
1
39