@mdream/js
Advanced tools
@@ -88,2 +88,69 @@ import { DEFAULT_BLOCK_SPACING, NO_SPACING } from "./const.mjs"; | ||
| } | ||
| function canWrapHere(depthMap) { | ||
| if (depthMap[34] || depthMap[23] || depthMap[32] || depthMap[31]) return false; | ||
| for (let h = 7; h <= 12; h++) if (depthMap[h]) return false; | ||
| return true; | ||
| } | ||
| function currentColumn(buffer) { | ||
| let col = 0; | ||
| for (let i = buffer.length - 1; i >= 0; i--) { | ||
| const s = buffer[i]; | ||
| const nl = s.lastIndexOf("\n"); | ||
| if (nl >= 0) return col + [...s.slice(nl + 1)].length; | ||
| col += [...s].length; | ||
| } | ||
| return col; | ||
| } | ||
| function wrapContinuationPrefix(state, node) { | ||
| const chain = []; | ||
| let cur = node.parent; | ||
| while (cur) { | ||
| chain.push(cur); | ||
| cur = cur.parent; | ||
| } | ||
| let p = ""; | ||
| let liIdx = 0; | ||
| for (let i = chain.length - 1; i >= 0; i--) { | ||
| const tagId = chain[i].tagId; | ||
| if (tagId === 22) p += "> "; | ||
| else if (tagId === 25) { | ||
| const w = state.listIndentWidths[liIdx] ?? 2; | ||
| p += " ".repeat(w); | ||
| liIdx++; | ||
| } | ||
| } | ||
| return p; | ||
| } | ||
| function wrapText(value, col, width, prefix) { | ||
| const leading = value.charCodeAt(0) === 32; | ||
| const trailing = value.charCodeAt(value.length - 1) === 32; | ||
| const prefixLen = [...prefix].length; | ||
| let out = ""; | ||
| let first = true; | ||
| let i = 0; | ||
| const len = value.length; | ||
| while (i < len) { | ||
| let next = value.indexOf(" ", i); | ||
| if (next === -1) next = len; | ||
| if (next > i) { | ||
| const word = value.slice(i, next); | ||
| const wordLen = [...word].length; | ||
| const needSpace = first ? leading : true; | ||
| if (needSpace && col > prefixLen && col + 1 + wordLen > width) { | ||
| out += `\n${prefix}`; | ||
| col = prefixLen; | ||
| } else if (needSpace) { | ||
| out += " "; | ||
| col += 1; | ||
| } | ||
| out += word; | ||
| col += wordLen; | ||
| first = false; | ||
| } | ||
| i = next + 1; | ||
| } | ||
| if (trailing && out !== "" && !out.endsWith(" ") && !out.endsWith("\n")) out += " "; | ||
| if (out === "" && (leading || trailing)) out = " "; | ||
| return out; | ||
| } | ||
| function calculateNewLineConfig(node) { | ||
@@ -167,4 +234,11 @@ const tagId = node.tagId; | ||
| } | ||
| state.buffer.push(textNode.value); | ||
| state.lastContentCache = textNode.value; | ||
| const wrapWidth = state.options?.wrapWidth; | ||
| if (wrapWidth && canWrapHere(state.depthMap)) { | ||
| const wrapped = wrapText(textNode.value, currentColumn(state.buffer), wrapWidth, wrapContinuationPrefix(state, textNode)); | ||
| state.buffer.push(wrapped); | ||
| state.lastContentCache = wrapped; | ||
| } else { | ||
| state.buffer.push(textNode.value); | ||
| state.lastContentCache = textNode.value; | ||
| } | ||
| } | ||
@@ -171,0 +245,0 @@ state.lastTextNode = textNode; |
@@ -140,2 +140,9 @@ declare const TAG_H1 = 7; | ||
| clean?: boolean | CleanOptions; | ||
| /** | ||
| * Hard-wrap prose at this many characters, breaking on word boundaries. | ||
| * Applied inline during conversion (zero-cost when unset). Code blocks | ||
| * (`<pre>`/`<code>`), tables, and headings are never wrapped. `0` disables | ||
| * wrapping. | ||
| */ | ||
| wrapWidth?: number; | ||
| } | ||
@@ -142,0 +149,0 @@ interface ElementNode extends Node { |
+5
-2
@@ -9,3 +9,6 @@ import { streamHtmlToMarkdown } from "./_chunks/src.mjs"; | ||
| async function streamingConvert(options = {}) { | ||
| let conversionOptions = { origin: options.origin }; | ||
| let conversionOptions = { | ||
| origin: options.origin, | ||
| wrapWidth: options.wrapWidth ? Number(options.wrapWidth) || void 0 : void 0 | ||
| }; | ||
| if (options.preset === "minimal") conversionOptions = withMinimalPreset(conversionOptions); | ||
@@ -18,3 +21,3 @@ const markdownGenerator = streamHtmlToMarkdown(Readable.toWeb(process.stdin), conversionOptions); | ||
| const cli = cac(); | ||
| cli.command("[options]", "Convert HTML from stdin to Markdown on stdout (JS engine)").option("--origin <url>", "Origin URL for resolving relative image paths").option("--preset <preset>", "Conversion presets: minimal").action(async (_, opts) => { | ||
| cli.command("[options]", "Convert HTML from stdin to Markdown on stdout (JS engine)").option("--origin <url>", "Origin URL for resolving relative image paths").option("--preset <preset>", "Conversion presets: minimal").option("--wrap-width <n>", "Hard-wrap prose at <n> characters on word boundaries").action(async (_, opts) => { | ||
| await streamingConvert(opts); | ||
@@ -21,0 +24,0 @@ }); |
+1
-1
| { | ||
| "name": "@mdream/js", | ||
| "type": "module", | ||
| "version": "1.3.0", | ||
| "version": "1.4.0", | ||
| "description": "JavaScript HTML-to-Markdown engine for mdream. Escape hatch for hooks and edge runtimes.", | ||
@@ -6,0 +6,0 @@ "author": { |
167780
1.52%3773
2.11%