@mdream/js
Advanced tools
+224
-1
@@ -985,2 +985,201 @@ import { BLOCKQUOTE_SPACING, LIST_ITEM_SPACING, NO_SPACING, TABLE_ROW_SPACING, TagIdMap } from "./const.mjs"; | ||
| const CLOSE_BRACKET_CHAR = 93; | ||
| const HEAD_CONTENT_TAGS = new Set([ | ||
| 4, | ||
| 5, | ||
| 54, | ||
| 56, | ||
| 53, | ||
| 52, | ||
| 84, | ||
| 78 | ||
| ]); | ||
| const CLOSES_P = (() => { | ||
| const t = new Uint8Array(109); | ||
| for (const id of [ | ||
| 36, | ||
| 35, | ||
| 24, | ||
| 33, | ||
| 101, | ||
| 25, | ||
| 98, | ||
| 99, | ||
| 28, | ||
| 7, | ||
| 8, | ||
| 9, | ||
| 10, | ||
| 11, | ||
| 12, | ||
| 22, | ||
| 51, | ||
| 50, | ||
| 105, | ||
| 47, | ||
| 41, | ||
| 88, | ||
| 34, | ||
| 13, | ||
| 40, | ||
| 68, | ||
| 102, | ||
| 106, | ||
| 100, | ||
| 104, | ||
| 45, | ||
| 2, | ||
| 3, | ||
| 75 | ||
| ]) t[id] = 1; | ||
| return t; | ||
| })(); | ||
| const NEEDS_IMPLIED_END_RECOVERY = (() => { | ||
| const t = CLOSES_P.slice(); | ||
| for (const id of [ | ||
| 26, | ||
| 32, | ||
| 31, | ||
| 30, | ||
| 29, | ||
| 38, | ||
| 39 | ||
| ]) t[id] = 1; | ||
| return t; | ||
| })(); | ||
| const P_SCOPE_BOUNDARY = new Set([ | ||
| 43, | ||
| 32, | ||
| 31, | ||
| 107, | ||
| 28, | ||
| 78, | ||
| 0, | ||
| 24, | ||
| 33, | ||
| 101, | ||
| 25 | ||
| ]); | ||
| const LI_SCOPE_BOUNDARY = new Set([ | ||
| 24, | ||
| 33, | ||
| 28, | ||
| 32, | ||
| 31, | ||
| 107, | ||
| 78, | ||
| 0 | ||
| ]); | ||
| const DL_SCOPE_BOUNDARY = new Set([ | ||
| 101, | ||
| 24, | ||
| 33, | ||
| 25, | ||
| 28, | ||
| 32, | ||
| 31, | ||
| 107, | ||
| 78, | ||
| 0 | ||
| ]); | ||
| const CELL_SCOPE_BOUNDARY = new Set([ | ||
| 30, | ||
| 29, | ||
| 38, | ||
| 39, | ||
| 28, | ||
| 107, | ||
| 78, | ||
| 0 | ||
| ]); | ||
| const A_SCOPE_BOUNDARY = new Set([ | ||
| 35, | ||
| 36, | ||
| 25, | ||
| 24, | ||
| 33, | ||
| 101, | ||
| 98, | ||
| 99, | ||
| 28, | ||
| 32, | ||
| 31, | ||
| 30, | ||
| 107, | ||
| 22, | ||
| 51, | ||
| 50, | ||
| 105, | ||
| 47, | ||
| 41, | ||
| 88, | ||
| 104, | ||
| 40, | ||
| 68, | ||
| 102, | ||
| 43, | ||
| 7, | ||
| 8, | ||
| 9, | ||
| 10, | ||
| 11, | ||
| 12, | ||
| 78, | ||
| 0 | ||
| ]); | ||
| const HEADINGS = new Set([ | ||
| 7, | ||
| 8, | ||
| 9, | ||
| 10, | ||
| 11, | ||
| 12 | ||
| ]); | ||
| const SINGLE_P = new Set([35]); | ||
| const SINGLE_LI = new Set([25]); | ||
| const SINGLE_A = new Set([26]); | ||
| const DT_DD = new Set([99, 98]); | ||
| const TD_TH = new Set([32, 31]); | ||
| const TR_CELLS = new Set([ | ||
| 32, | ||
| 31, | ||
| 30 | ||
| ]); | ||
| const SECTION_CELLS = new Set([ | ||
| 32, | ||
| 31, | ||
| 30, | ||
| 29, | ||
| 38, | ||
| 39, | ||
| 107 | ||
| ]); | ||
| function closeImpliedTo(state, target, boundary, handleEvent) { | ||
| let found = false; | ||
| for (let node = state.currentNode; node; node = node.parent) { | ||
| const id = node.tagId; | ||
| if (id !== void 0 && target.has(id)) { | ||
| found = true; | ||
| break; | ||
| } | ||
| if (id !== void 0 && boundary.has(id)) break; | ||
| } | ||
| if (!found) return; | ||
| while (state.currentNode) { | ||
| const id = state.currentNode.tagId; | ||
| const isTarget = id !== void 0 && target.has(id); | ||
| closeNode(state.currentNode, state, handleEvent); | ||
| if (isTarget) break; | ||
| } | ||
| } | ||
| function closeTableContext(state, closeable, handleEvent) { | ||
| while (state.currentNode) { | ||
| const id = state.currentNode.tagId; | ||
| if (id === void 0 || !closeable.has(id)) break; | ||
| closeNode(state.currentNode, state, handleEvent); | ||
| } | ||
| } | ||
| function finalizeParse(leftover, state, handleEvent) { | ||
| if (leftover.length > 0 && leftover.charCodeAt(0) !== LT_CHAR) processTextBuffer(leftover, state, handleEvent); | ||
| while (state.currentNode) closeNode(state.currentNode, state, handleEvent); | ||
| } | ||
| const EMPTY_ATTRIBUTES = Object.freeze({}); | ||
@@ -1383,2 +1582,26 @@ function copyDepthMap(depthMap) { | ||
| }; | ||
| if ((state.depthMap[1] || 0) > 0 && !HEAD_CONTENT_TAGS.has(tagId)) { | ||
| while (state.currentNode && state.currentNode.tagId !== 1) closeNode(state.currentNode, state, handleEvent); | ||
| const headNode = state.currentNode; | ||
| if (headNode && headNode.tagId === 1) closeNode(headNode, state, handleEvent); | ||
| } | ||
| if (tagId >= 0 && tagId < 109 && NEEDS_IMPLIED_END_RECOVERY[tagId] === 1) if (tagId === 26) { | ||
| if ((state.depthMap[26] || 0) > 0) closeImpliedTo(state, SINGLE_A, A_SCOPE_BOUNDARY, handleEvent); | ||
| } else if (tagId === 32 || tagId === 31 || tagId === 30 || tagId === 29 || tagId === 38 || tagId === 39) { | ||
| if ((state.depthMap[28] || 0) > 0) if (tagId === 32 || tagId === 31) { | ||
| if ((state.depthMap[32] || 0) > 0 || (state.depthMap[31] || 0) > 0) closeImpliedTo(state, TD_TH, CELL_SCOPE_BOUNDARY, handleEvent); | ||
| } else if (tagId === 30) { | ||
| if ((state.depthMap[30] || 0) > 0) closeTableContext(state, TR_CELLS, handleEvent); | ||
| } else closeTableContext(state, SECTION_CELLS, handleEvent); | ||
| } else { | ||
| if ((state.depthMap[35] || 0) > 0) closeImpliedTo(state, SINGLE_P, P_SCOPE_BOUNDARY, handleEvent); | ||
| if (HEADINGS.has(tagId)) { | ||
| const top = state.currentNode; | ||
| if (top && top.tagId !== void 0 && HEADINGS.has(top.tagId)) closeNode(top, state, handleEvent); | ||
| } else if (tagId === 25) { | ||
| if ((state.depthMap[25] || 0) > 0) closeImpliedTo(state, SINGLE_LI, LI_SCOPE_BOUNDARY, handleEvent); | ||
| } else if (tagId === 99 || tagId === 98) { | ||
| if ((state.depthMap[99] || 0) > 0 || (state.depthMap[98] || 0) > 0) closeImpliedTo(state, DT_DD, DL_SCOPE_BOUNDARY, handleEvent); | ||
| } | ||
| } | ||
| const currentTagCount = result.attributes && result.attributes.id ? void 0 : state.depthMap[tagId] || 0; | ||
@@ -1559,2 +1782,2 @@ state.depthMap[tagId] = (currentTagCount || 0) + 1; | ||
| } | ||
| export { buildTagOverrideHandlers, parseAttributes, parseHtml, parseHtmlStream }; | ||
| export { buildTagOverrideHandlers, finalizeParse, parseAttributes, parseHtml, parseHtmlStream }; |
| import { DEFAULT_BLOCK_SPACING, NO_SPACING } from "./const.mjs"; | ||
| import { parseHtmlStream } from "./parse.mjs"; | ||
| import { finalizeParse, parseHtmlStream } from "./parse.mjs"; | ||
| import { extractionCollectorPlugin, filterPlugin, frontmatterPlugin, isolateMainPlugin, tailwindPlugin } from "./plugins.mjs"; | ||
@@ -308,3 +308,3 @@ function processPluginsForEvent(event, plugins, state, processEvent) { | ||
| function processHtml(html) { | ||
| parseHtmlStream(html, { | ||
| const parseState = { | ||
| depthMap: state.depthMap, | ||
@@ -314,5 +314,7 @@ depth: 0, | ||
| tagOverrideHandlers | ||
| }, (event) => { | ||
| }; | ||
| const handleEvent = (event) => { | ||
| processPluginsForEvent(event, resolvedPlugins, state, processEvent); | ||
| }); | ||
| }; | ||
| finalizeParse(parseHtmlStream(html, parseState, handleEvent), parseState, handleEvent); | ||
| } | ||
@@ -319,0 +321,0 @@ function getMarkdown() { |
| import "./const.mjs"; | ||
| import { buildTagOverrideHandlers, parseHtmlStream } from "./parse.mjs"; | ||
| import { buildTagOverrideHandlers, finalizeParse, parseHtmlStream } from "./parse.mjs"; | ||
| import { createMarkdownProcessor, processPluginsForEvent, resolvePlugins } from "./resolve-plugins.mjs"; | ||
@@ -313,5 +313,6 @@ import "./plugins.mjs"; | ||
| } | ||
| if (remainingHtml) parseHtmlStream(remainingHtml, parseState, (event) => { | ||
| const handleEvent = (event) => { | ||
| processPluginsForEvent(event, resolvedPlugins, processor.state, processor.processEvent); | ||
| }); | ||
| }; | ||
| finalizeParse(remainingHtml ? parseHtmlStream(remainingHtml, parseState, handleEvent) : "", parseState, handleEvent); | ||
| const finalChunk = processor.getMarkdownChunk(); | ||
@@ -318,0 +319,0 @@ if (finalChunk) yield finalChunk; |
+15
-1
| import { ElementNode, Node, NodeEvent, TagHandler, TransformPlugin } from "./_chunks/types.mjs"; | ||
| /** | ||
| * Commit end-of-input state: flush trailing buffered text and close any open | ||
| * elements. The streaming parser keeps trailing text and unclosed elements | ||
| * pending (a later chunk might continue them); at true EOF they must be | ||
| * committed so trailing content is not dropped (e.g. `<p>a<p>b`). | ||
| * | ||
| * `leftover` is the residual returned by the final `parseHtmlStream`. Pure | ||
| * trailing text (no leading `<`) is emitted; a residual that is an incomplete | ||
| * start tag (leading `<`) is dropped, matching the browser tokenizer's | ||
| * EOF-in-tag behaviour. The text-buffer flags set while the trailing text was | ||
| * scanned persist on `state`, so `processTextBuffer` commits it as if the next | ||
| * tag had triggered the flush. | ||
| */ | ||
| declare function finalizeParse(leftover: string, state: ParseState, handleEvent: (event: NodeEvent) => void): void; | ||
| interface ParseOptions { | ||
@@ -61,2 +75,2 @@ resolvedPlugins?: TransformPlugin[]; | ||
| declare function parseAttributes(attrStr: string): Record<string, string>; | ||
| export { ParseOptions, ParseResult, ParseState, parseAttributes, parseHtml, parseHtmlStream }; | ||
| export { ParseOptions, ParseResult, ParseState, finalizeParse, parseAttributes, parseHtml, parseHtmlStream }; |
+2
-2
@@ -1,2 +0,2 @@ | ||
| import { parseAttributes, parseHtml, parseHtmlStream } from "./_chunks/parse.mjs"; | ||
| export { parseAttributes, parseHtml, parseHtmlStream }; | ||
| import { finalizeParse, parseAttributes, parseHtml, parseHtmlStream } from "./_chunks/parse.mjs"; | ||
| export { finalizeParse, parseAttributes, parseHtml, parseHtmlStream }; |
+1
-1
| { | ||
| "name": "@mdream/js", | ||
| "type": "module", | ||
| "version": "1.4.0", | ||
| "version": "1.4.1", | ||
| "description": "JavaScript HTML-to-Markdown engine for mdream. Escape hatch for hooks and edge runtimes.", | ||
@@ -6,0 +6,0 @@ "author": { |
172917
3.06%3999
5.99%