markmap-html-parser
Advanced tools
Comparing version 0.16.0 to 0.16.1
@@ -0,1 +1,2 @@ | ||
import { AnyNode, Cheerio, CheerioAPI, Element } from 'cheerio'; | ||
import { IPureNode } from 'markmap-common'; | ||
@@ -25,11 +26,20 @@ export declare enum Levels { | ||
} | ||
export interface IHtmlParserContext { | ||
$node: Cheerio<Element>; | ||
$: CheerioAPI; | ||
getContent($node: Cheerio<AnyNode>, preserveTag?: boolean): { | ||
html?: string; | ||
comments?: string[]; | ||
}; | ||
} | ||
export interface IHtmlParserResult { | ||
html?: string | null; | ||
comments?: string[]; | ||
queue?: Cheerio<Element>; | ||
nesting?: boolean; | ||
} | ||
export type IHtmlParserSelectorRules = Record<string, (context: IHtmlParserContext) => IHtmlParserResult>; | ||
export interface IHtmlParserOptions { | ||
/** Matches nodes to be displayed on markmap. */ | ||
selector: string; | ||
/** Matches wrapper elements that should be ignored. */ | ||
selectorWrapper: string; | ||
/** Matches elements that can be nested, such as `li`. */ | ||
selectorNesting: string; | ||
/** Matches elements whose tag name should be preserved, such as `<pre>`, `<img>`. */ | ||
selectorPreserveTag: string; | ||
selectorRules: IHtmlParserSelectorRules; | ||
} | ||
@@ -36,0 +46,0 @@ export declare const defaultOptions: IHtmlParserOptions; |
@@ -18,2 +18,39 @@ "use strict"; | ||
})(Levels || {}); | ||
const defaultSelectorRules = { | ||
"div,p": ({ $node }) => ({ | ||
queue: $node.children() | ||
}), | ||
"h1,h2,h3,h4,h5,h6": ({ $node, getContent }) => ({ | ||
...getContent($node.contents()) | ||
}), | ||
"ul,ol": ({ $node }) => ({ | ||
queue: $node.children(), | ||
nesting: true | ||
}), | ||
li: ({ $node, getContent }) => { | ||
const queue = $node.children().filter("ul,ol"); | ||
let content; | ||
if ($node.contents().first().is("div,p")) { | ||
content = getContent($node.children().first()); | ||
} else { | ||
let $contents = $node.contents(); | ||
const i = $contents.index(queue); | ||
if (i >= 0) | ||
$contents = $contents.slice(0, i); | ||
content = getContent($contents); | ||
} | ||
return { | ||
queue, | ||
nesting: true, | ||
...content | ||
}; | ||
}, | ||
"table,pre,p>img:only-child": ({ $node, getContent }) => ({ | ||
...getContent($node) | ||
}) | ||
}; | ||
const defaultOptions = { | ||
selector: "h1,h2,h3,h4,h5,h6,ul,ol,li,table,pre,p>img:only-child", | ||
selectorRules: defaultSelectorRules | ||
}; | ||
const MARKMAP_COMMENT_PREFIX = "markmap: "; | ||
@@ -23,8 +60,2 @@ const SELECTOR_HEADING = /^h[1-6]$/; | ||
const SELECTOR_LIST_ITEM = /^li$/; | ||
const defaultOptions = { | ||
selector: "h1,h2,h3,h4,h5,h6,ul,ol,li,table,pre,p>img:only-child", | ||
selectorWrapper: "div,p", | ||
selectorNesting: "ul,ol,li", | ||
selectorPreserveTag: "table,pre,img" | ||
}; | ||
function getLevel(tagName) { | ||
@@ -56,39 +87,24 @@ if (SELECTOR_HEADING.test(tagName)) | ||
}; | ||
const elMap = { | ||
[id]: $("<div>") | ||
}; | ||
const headingStack = []; | ||
let skippingHeading = 0; | ||
checkNode($root); | ||
markmapCommon.walkTree(rootNode, (node, next) => { | ||
next(); | ||
const $el = elMap[node.id]; | ||
$el.contents().each((_, child) => { | ||
if (child.type === "comment") { | ||
const data2 = child.data.trim(); | ||
if (data2.startsWith(MARKMAP_COMMENT_PREFIX)) { | ||
node.comments || (node.comments = []); | ||
node.comments.push(data2.slice(MARKMAP_COMMENT_PREFIX.length).trim()); | ||
$(child).remove(); | ||
} | ||
} | ||
}); | ||
const data = $el.data(); | ||
if (data && Object.keys(data).length) { | ||
node.data = data; | ||
} | ||
node.html = (($el.is(options.selectorPreserveTag) ? $.html($el) : $el.html()) || "").trimEnd(); | ||
}); | ||
checkNodes($root.children()); | ||
return rootNode; | ||
function addChild($child, parent, nesting) { | ||
const child = $child[0]; | ||
function addChild(props) { | ||
var _a; | ||
const { parent } = props; | ||
const node = { | ||
id: ++id, | ||
tag: child.tagName, | ||
html: "", | ||
level: getLevel(child.tagName), | ||
tag: props.tagName, | ||
level: props.level, | ||
html: props.html, | ||
childrenLevel: 0, | ||
children: nesting ? [] : void 0, | ||
children: props.nesting ? [] : void 0, | ||
parent: parent.id | ||
}; | ||
if ((_a = props.comments) == null ? void 0 : _a.length) { | ||
node.comments = props.comments; | ||
} | ||
if (Object.keys(props.data || {}).length) { | ||
node.data = props.data; | ||
} | ||
if (parent.children) { | ||
@@ -103,3 +119,2 @@ if (parent.childrenLevel === 0 || parent.childrenLevel > node.level) { | ||
} | ||
elMap[node.id] = $child; | ||
return node; | ||
@@ -114,41 +129,60 @@ } | ||
} | ||
function checkNode($el, node) { | ||
$el.children().each((_, child) => { | ||
function getContent($node) { | ||
var _a; | ||
const result = extractMagicComments($node); | ||
const html2 = (_a = $.html(result.$node)) == null ? void 0 : _a.trimEnd(); | ||
return { comments: result.comments, html: html2 }; | ||
} | ||
function extractMagicComments($node) { | ||
const comments = []; | ||
$node = $node.filter((_, child) => { | ||
if (child.type === "comment") { | ||
const data = child.data.trim(); | ||
if (data.startsWith(MARKMAP_COMMENT_PREFIX)) { | ||
comments.push(data.slice(MARKMAP_COMMENT_PREFIX.length).trim()); | ||
return false; | ||
} | ||
} | ||
return true; | ||
}); | ||
return { $node, comments }; | ||
} | ||
function checkNodes($els, node) { | ||
$els.each((_, child) => { | ||
var _a; | ||
const $child = $(child); | ||
if ($child.is(options.selectorWrapper)) { | ||
checkNode($child); | ||
const rule = (_a = Object.entries(options.selectorRules).find( | ||
([selector]) => $child.is(selector) | ||
)) == null ? void 0 : _a[1]; | ||
const result = rule == null ? void 0 : rule({ $node: $child, $, getContent }); | ||
if ((result == null ? void 0 : result.queue) && !result.nesting) { | ||
checkNodes(result.queue, node); | ||
return; | ||
} | ||
if (!$child.is(options.selector)) { | ||
const level2 = getLevel(child.tagName); | ||
if (level2 <= 6) { | ||
skippingHeading = level2; | ||
const level = getLevel(child.tagName); | ||
if (!result) { | ||
if (level <= 6) { | ||
skippingHeading = level; | ||
} | ||
return; | ||
} | ||
const level = getLevel(child.tagName); | ||
if (skippingHeading > 0 && level > skippingHeading) { | ||
if (skippingHeading > 0 && level > skippingHeading) | ||
return; | ||
} | ||
if (!$child.is(options.selector)) | ||
return; | ||
skippingHeading = 0; | ||
if ($child.is(options.selectorNesting)) { | ||
const childNode2 = addChild( | ||
$child, | ||
node || getCurrentHeading(level), | ||
true | ||
); | ||
if (childNode2) { | ||
checkNode($child, childNode2); | ||
$child.remove(); | ||
} | ||
return; | ||
} | ||
const isHeading = level <= 6; | ||
const childNode = addChild( | ||
$child, | ||
node || getCurrentHeading(level), | ||
isHeading | ||
); | ||
const childNode = addChild({ | ||
parent: node || getCurrentHeading(level), | ||
nesting: !!result.queue || isHeading, | ||
tagName: child.tagName, | ||
level, | ||
html: result.html || "", | ||
comments: result.comments, | ||
data: $child.data() | ||
}); | ||
if (isHeading) | ||
headingStack.push(childNode); | ||
if (result.queue) | ||
checkNodes(result.queue, childNode); | ||
}); | ||
@@ -155,0 +189,0 @@ } |
{ | ||
"name": "markmap-html-parser", | ||
"version": "0.16.0", | ||
"version": "0.16.1", | ||
"description": "Parse HTML into markmap data structure", | ||
@@ -64,3 +64,3 @@ "author": "Gerald <gera2ld@live.com>", | ||
}, | ||
"gitHead": "caf3f840ad39c240ba522bfea83c57de62c6cac4" | ||
"gitHead": "f10ad62ef03e02a1843484cca51521f48989fab7" | ||
} |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
506653
14448