markmap-html-parser
Advanced tools
Comparing version 0.15.9-alpha.0 to 0.15.9-alpha.5
@@ -19,11 +19,15 @@ import { IPureNode } from 'markmap-common'; | ||
level: Levels; | ||
parent: number; | ||
childrenLevel: Levels; | ||
children?: IHtmlNode[]; | ||
parent: number; | ||
comments?: string[]; | ||
} | ||
export interface IHtmlParserOptions { | ||
/** Matches nodes to be displayed on markmap. */ | ||
selector: string; | ||
/** If matches, the whole content will be preserved and no any other nodes will be selected. */ | ||
selectorPreserveContent: string; | ||
/** If matches, the current tag will be preserved along with its content. */ | ||
/** Matches wrapper elements that should be ignored. */ | ||
selectorWrapper: string; | ||
/** Matches elements that can be nested, such as `li`. */ | ||
selectorNesting: string; | ||
/** Matches elements whose tag name should be preserved, such as `<pre>`, `<img>`. */ | ||
selectorPreserveTag: string; | ||
@@ -33,2 +37,3 @@ } | ||
export declare function parseHtml(html: string, opts?: Partial<IHtmlParserOptions>): IHtmlNode; | ||
export declare function convertNode(htmlRoot: IHtmlNode): IPureNode; | ||
export declare function buildTree(html: string, opts?: Partial<IHtmlParserOptions>): IPureNode; |
@@ -18,2 +18,3 @@ "use strict"; | ||
})(Levels || {}); | ||
const MARKMAP_COMMENT_PREFIX = "markmap: "; | ||
const SELECTOR_HEADING = /^h[1-6]$/; | ||
@@ -24,3 +25,4 @@ const SELECTOR_LIST = /^[uo]l$/; | ||
selector: "h1,h2,h3,h4,h5,h6,ul,ol,li,table,pre,p>img:first-child:last-child", | ||
selectorPreserveContent: "table,pre", | ||
selectorWrapper: "div,p", | ||
selectorNesting: "ul,ol,li", | ||
selectorPreserveTag: "table,pre,img" | ||
@@ -43,7 +45,5 @@ }; | ||
const $ = cheerio.load(html); | ||
const $root = $("body"); | ||
let id = 0; | ||
const elMap = { | ||
[id]: $("<div>") | ||
}; | ||
const root = { | ||
const rootNode = { | ||
id, | ||
@@ -53,62 +53,124 @@ tag: "", | ||
level: 0, | ||
parent: 0, | ||
childrenLevel: 0, | ||
children: [], | ||
parent: 0 | ||
children: [] | ||
}; | ||
const nodeMap = { | ||
[id]: root | ||
const elMap = { | ||
[id]: $("<div>") | ||
}; | ||
let cur = root; | ||
$(options.selector).each((_, el) => { | ||
const headingStack = []; | ||
let skippingHeading = 0; | ||
checkNode($root); | ||
markmapCommon.walkTree(rootNode, (node, next) => { | ||
next(); | ||
const $el = elMap[node.id]; | ||
$el.contents().each((_, child) => { | ||
if (child.type === "comment") { | ||
const data = child.data.trim(); | ||
if (data.startsWith(MARKMAP_COMMENT_PREFIX)) { | ||
node.comments || (node.comments = []); | ||
node.comments.push(data.slice(MARKMAP_COMMENT_PREFIX.length).trim()); | ||
$(child).remove(); | ||
} | ||
} | ||
}); | ||
node.html = (($el.is(options.selectorPreserveTag) ? $.html($el) : $el.html()) || "").trimEnd(); | ||
}); | ||
return rootNode; | ||
function addChild($child, parent, nesting) { | ||
const child = $child[0]; | ||
const node = { | ||
id: ++id, | ||
tag: el.tagName, | ||
tag: child.tagName, | ||
html: "", | ||
level: getLevel(el.tagName), | ||
level: getLevel(child.tagName), | ||
childrenLevel: 0, | ||
children: $(el).is(options.selectorPreserveContent) ? void 0 : [], | ||
parent: 0 | ||
children: nesting ? [] : void 0, | ||
parent: parent.id | ||
}; | ||
elMap[node.id] = $(el); | ||
nodeMap[node.id] = node; | ||
if (node.level <= 6) { | ||
while (cur !== root && (cur.level > 6 || cur.tag >= node.tag)) { | ||
cur = nodeMap[cur.parent]; | ||
if (parent.children) { | ||
if (parent.childrenLevel === 0 || parent.childrenLevel > node.level) { | ||
parent.children = []; | ||
parent.childrenLevel = node.level; | ||
} | ||
} else { | ||
while (cur !== root && cur.level > 6 && !elMap[cur.id].find(el).length) { | ||
cur = nodeMap[cur.parent]; | ||
if (parent.childrenLevel === node.level) { | ||
parent.children.push(node); | ||
} | ||
} | ||
if (cur.children) { | ||
node.parent = cur.id; | ||
const level = getLevel(node.tag); | ||
if (!cur.childrenLevel || cur.childrenLevel > level) { | ||
cur.childrenLevel = level; | ||
cur.children = []; | ||
elMap[node.id] = $child; | ||
return node; | ||
} | ||
function getCurrentHeading(level) { | ||
let heading; | ||
while ((heading = headingStack.at(-1)) && heading.level >= level) { | ||
headingStack.pop(); | ||
} | ||
return heading || rootNode; | ||
} | ||
function checkNode($el, node) { | ||
$el.children().each((_, child) => { | ||
const $child = $(child); | ||
if ($child.is(options.selectorWrapper)) { | ||
checkNode($child); | ||
return; | ||
} | ||
if (cur.childrenLevel === level) { | ||
cur.children.push(node); | ||
if (!$child.is(options.selector)) { | ||
const level2 = getLevel(child.tagName); | ||
if (level2 <= 6) { | ||
skippingHeading = level2; | ||
} | ||
return; | ||
} | ||
cur = node; | ||
const level = getLevel(child.tagName); | ||
if (skippingHeading > 0 && level > skippingHeading) { | ||
return; | ||
} | ||
skippingHeading = 0; | ||
if ($child.is(options.selectorNesting)) { | ||
const childNode2 = addChild( | ||
$child, | ||
node || getCurrentHeading(level), | ||
true | ||
); | ||
if (childNode2) { | ||
checkNode($child, childNode2); | ||
$child.remove(); | ||
} | ||
return; | ||
} | ||
const isHeading = level <= 6; | ||
const childNode = addChild( | ||
$child, | ||
node || getCurrentHeading(level), | ||
isHeading | ||
); | ||
if (isHeading) | ||
headingStack.push(childNode); | ||
}); | ||
} | ||
} | ||
function convertNode(htmlRoot) { | ||
return markmapCommon.walkTree(htmlRoot, (htmlNode, next) => { | ||
const node = { | ||
content: htmlNode.html, | ||
children: next() || [] | ||
}; | ||
if (htmlNode.comments) { | ||
if (htmlNode.comments.includes("foldAll")) { | ||
node.payload = { fold: 2 }; | ||
} else if (htmlNode.comments.includes("fold")) { | ||
node.payload = { fold: 1 }; | ||
} | ||
} | ||
return node; | ||
}); | ||
markmapCommon.walkTree(root, (node, next) => { | ||
next(); | ||
const $el = elMap[node.id]; | ||
$el.remove(); | ||
node.html = (($el.is(options.selectorPreserveTag) ? $.html($el) : $el.html()) || "").trimEnd(); | ||
}); | ||
return root; | ||
} | ||
function buildTree(html, opts) { | ||
const htmlRoot = parseHtml(html, opts); | ||
return markmapCommon.walkTree(htmlRoot, (node, next) => ({ | ||
content: node.html, | ||
children: next() || [] | ||
})); | ||
return convertNode(htmlRoot); | ||
} | ||
exports.Levels = Levels; | ||
exports.buildTree = buildTree; | ||
exports.convertNode = convertNode; | ||
exports.defaultOptions = defaultOptions; | ||
exports.parseHtml = parseHtml; |
{ | ||
"name": "markmap-html-parser", | ||
"version": "0.15.9-alpha.0+9cb40e7", | ||
"version": "0.15.9-alpha.5+c64866f", | ||
"description": "Parse HTML into markmap data structure", | ||
@@ -44,3 +44,3 @@ "author": "Gerald <gera2ld@live.com>", | ||
"devDependencies": { | ||
"markmap-common": "0.15.9-alpha.0+9cb40e7" | ||
"markmap-common": "0.15.9-alpha.5+c64866f" | ||
}, | ||
@@ -64,3 +64,3 @@ "dependencies": { | ||
}, | ||
"gitHead": "9cb40e7e9ec0aa1f25a2de6a74912cd0c21f52ed" | ||
"gitHead": "c64866fb4873a99923be18ea70715aa49a6dabef" | ||
} |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Manifest confusion
Supply chain riskThis package has inconsistent metadata. This could be malicious or caused by an error when publishing the package.
Found 1 instance in 1 package
Manifest confusion
Supply chain riskThis package has inconsistent metadata. This could be malicious or caused by an error when publishing the package.
Found 1 instance in 1 package
502392
14308