@mdream/js
Advanced tools
@@ -127,3 +127,2 @@ const TAG_H1 = 7; | ||
| const TABLE_ROW_SPACING = [0, 1]; | ||
| //#endregion | ||
| export { NO_SPACING as a, TABLE_ROW_SPACING as c, TAG_H3 as d, TAG_H4 as f, TagIdMap as g, TEXT_NODE as h, LIST_ITEM_SPACING as i, TAG_H1 as l, TAG_H6 as m, DEFAULT_BLOCK_SPACING as n, NodeEventEnter as o, TAG_H5 as p, ELEMENT_NODE as r, NodeEventExit as s, BLOCKQUOTE_SPACING as t, TAG_H2 as u }; |
| import { _ as TransformPlugin, c as MdreamOptions } from "./types.mjs"; | ||
| //#region src/pluggable/plugin.d.ts | ||
| /** | ||
@@ -9,7 +7,4 @@ * Create a plugin with type-safe hook definitions. | ||
| declare function createPlugin(plugin: TransformPlugin): TransformPlugin; | ||
| //#endregion | ||
| //#region src/index.d.ts | ||
| declare function htmlToMarkdown(html: string, options?: Partial<MdreamOptions>): string; | ||
| declare function streamHtmlToMarkdown(htmlStream: ReadableStream<Uint8Array | string> | null, options?: Partial<MdreamOptions>): AsyncIterable<string>; | ||
| //#endregion | ||
| export { streamHtmlToMarkdown as n, createPlugin as r, htmlToMarkdown as t }; |
| import { i as EngineOptions } from "./types.mjs"; | ||
| //#region src/preset/minimal.d.ts | ||
| /** | ||
@@ -9,3 +7,2 @@ * Creates a configurable minimal preset with advanced options. | ||
| declare function withMinimalPreset<T extends EngineOptions>(options?: T): T; | ||
| //#endregion | ||
| export { withMinimalPreset as t }; |
| import { a as NO_SPACING, c as TABLE_ROW_SPACING, g as TagIdMap, i as LIST_ITEM_SPACING, t as BLOCKQUOTE_SPACING } from "./const.mjs"; | ||
| //#region src/tags.ts | ||
| function resolveUrl(url, origin) { | ||
@@ -563,7 +562,2 @@ if (!url) return url; | ||
| }; | ||
| /** | ||
| * Build a map of tag name → TagHandler from declarative tagOverrides config. | ||
| * For alias (string value): clone the handler for the aliased tag. | ||
| * For override object: overlay fields onto the base handler (if tag is known). | ||
| */ | ||
| function buildTagOverrideHandlers(overrides) { | ||
@@ -601,4 +595,2 @@ const result = /* @__PURE__ */ new Map(); | ||
| } | ||
| //#endregion | ||
| //#region src/entities.ts | ||
| const HTML_ENTITIES = { | ||
@@ -851,7 +843,2 @@ "á": "á", | ||
| }; | ||
| //#endregion | ||
| //#region src/utils.ts | ||
| /** | ||
| * Decode HTML entities - single pass with O(1) named entity lookup | ||
| */ | ||
| function decodeHTMLEntities(text) { | ||
@@ -907,4 +894,2 @@ let result = ""; | ||
| } | ||
| //#endregion | ||
| //#region src/parse.ts | ||
| const LT_CHAR = 60; | ||
@@ -932,12 +917,5 @@ const GT_CHAR = 62; | ||
| } | ||
| /** | ||
| * Fast whitespace check using direct character code comparison | ||
| */ | ||
| function isWhitespace(charCode) { | ||
| return charCode === SPACE_CHAR || charCode === TAB_CHAR || charCode === NEWLINE_CHAR || charCode === CARRIAGE_RETURN_CHAR; | ||
| } | ||
| /** | ||
| * Pure HTML parser that emits DOM events | ||
| * Completely decoupled from markdown generation | ||
| */ | ||
| function parseHtml(html, options = {}) { | ||
@@ -956,11 +934,5 @@ const events = []; | ||
| } | ||
| /** | ||
| * Streaming HTML parser - calls onEvent for each DOM event | ||
| */ | ||
| function parseHtmlStream(html, state, onEvent) { | ||
| return parseHtmlInternal(html, state, onEvent); | ||
| } | ||
| /** | ||
| * Internal parsing function - extracted from original parseHTML | ||
| */ | ||
| function parseHtmlInternal(htmlChunk, state, handleEvent) { | ||
@@ -1107,5 +1079,2 @@ let textBuffer = ""; | ||
| } | ||
| /** | ||
| * Process accumulated text buffer and create text node event | ||
| */ | ||
| function processTextBuffer(textBuffer, state, handleEvent) { | ||
@@ -1149,5 +1118,2 @@ const containsNonWhitespace = state.textBufferContainsNonWhitespace; | ||
| } | ||
| /** | ||
| * Process HTML closing tag | ||
| */ | ||
| function processClosingTag(htmlChunk, position, state, handleEvent) { | ||
@@ -1193,5 +1159,2 @@ let i = position + 2; | ||
| } | ||
| /** | ||
| * Close a node and emit exit event | ||
| */ | ||
| function closeNode(node, state, handleEvent) { | ||
@@ -1232,5 +1195,2 @@ if (!node) return; | ||
| } | ||
| /** | ||
| * Process HTML comment or doctype | ||
| */ | ||
| function processCommentOrDoctype(htmlChunk, position) { | ||
@@ -1277,5 +1237,2 @@ let i = position; | ||
| } | ||
| /** | ||
| * Process HTML opening tag | ||
| */ | ||
| function processOpeningTag(tagName, tagId, htmlChunk, i, state, handleEvent) { | ||
@@ -1334,5 +1291,2 @@ if (state.currentNode?.tagHandler?.isNonNesting) closeNode(state.currentNode, state, handleEvent); | ||
| } | ||
| /** | ||
| * Extract and process HTML tag attributes | ||
| */ | ||
| function processTagAttributes(htmlChunk, position, tagHandler) { | ||
@@ -1385,5 +1339,2 @@ let i = position; | ||
| } | ||
| /** | ||
| * Parse HTML attributes string into key-value object | ||
| */ | ||
| function parseAttributes(attrStr) { | ||
@@ -1473,3 +1424,2 @@ if (!attrStr) return EMPTY_ATTRIBUTES; | ||
| } | ||
| //#endregion | ||
| export { buildTagOverrideHandlers as i, parseHtml as n, parseHtmlStream as r, parseAttributes as t }; |
+0
-128
| import { g as TagIdMap } from "./const.mjs"; | ||
| //#region src/pluggable/plugin.ts | ||
| /** | ||
| * Create a plugin with type-safe hook definitions. | ||
| * All TransformPlugin fields are optional, so this is a typed identity function. | ||
| */ | ||
| function createPlugin(plugin) { | ||
| return plugin; | ||
| } | ||
| //#endregion | ||
| //#region src/libs/query-selector.ts | ||
| /** | ||
| * Creates a tag selector matcher (e.g., 'div', 'p', 'h1') | ||
| */ | ||
| function createTagSelector(tagName) { | ||
@@ -21,5 +11,2 @@ return { | ||
| } | ||
| /** | ||
| * Creates an ID selector matcher (e.g., '#main', '#content') | ||
| */ | ||
| function createIdSelector(selector) { | ||
@@ -32,5 +19,2 @@ const id = selector.slice(1); | ||
| } | ||
| /** | ||
| * Creates a class selector matcher (e.g., '.container', '.header') | ||
| */ | ||
| function createClassSelector(selector) { | ||
@@ -46,6 +30,2 @@ const className = selector.slice(1); | ||
| } | ||
| /** | ||
| * Parses attribute selectors like [attr], [attr=value], [attr^="value"] | ||
| * Uses a manual parser to avoid polynomial regex backtracking (CodeQL ReDoS). | ||
| */ | ||
| function parseAttributeSelector(selector) { | ||
@@ -77,5 +57,2 @@ if (selector.charCodeAt(0) !== 91) return null; | ||
| } | ||
| /** | ||
| * Creates an attribute selector matcher (e.g., '[data-id]', '[href="https://example.com"]') | ||
| */ | ||
| function createAttributeSelector(selector) { | ||
@@ -108,5 +85,2 @@ const parsed = parseAttributeSelector(selector); | ||
| } | ||
| /** | ||
| * Creates a compound selector that combines multiple selectors (e.g., 'div.container', 'h1#title') | ||
| */ | ||
| function createCompoundSelector(selectors) { | ||
@@ -118,5 +92,2 @@ return { | ||
| } | ||
| /** | ||
| * Parses a CSS selector into a matcher | ||
| */ | ||
| function parseSelector(selector) { | ||
@@ -150,7 +121,2 @@ selector = selector.trim(); | ||
| } | ||
| //#endregion | ||
| //#region src/plugins/extraction.ts | ||
| /** | ||
| * @deprecated Use `plugins.extraction` config for declarative extraction that works with both JS and Rust engines. | ||
| */ | ||
| function extractionPlugin(selectors) { | ||
@@ -192,7 +158,2 @@ const matcherCallbacks = Object.entries(selectors).map(([selector, callback]) => ({ | ||
| } | ||
| /** | ||
| * Extraction collector for `plugins.extraction` config. | ||
| * Collects results during processing; callbacks are called post-conversion | ||
| * to match Rust engine behavior. | ||
| */ | ||
| function extractionCollectorPlugin(config) { | ||
@@ -254,9 +215,2 @@ const matchers = Object.entries(config).map(([selector, callback]) => ({ | ||
| } | ||
| //#endregion | ||
| //#region src/plugins/filter.ts | ||
| /** | ||
| * Compiles a selector (string or TAG_* number) into a fast matcher. | ||
| * String tag names (e.g. 'form') are compiled to TAG_* ID comparisons at creation time, | ||
| * avoiding per-element string comparison. CSS selectors (e.g. '.class', '#id') use parseSelector. | ||
| */ | ||
| function compileSelector(selector) { | ||
@@ -274,14 +228,2 @@ if (typeof selector === "number") return { | ||
| } | ||
| /** | ||
| * Plugin that filters nodes based on CSS selectors. | ||
| * Allows including or excluding nodes based on selectors. | ||
| * | ||
| * @example | ||
| * // Include only heading elements and their children | ||
| * withQuerySelectorPlugin({ include: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] }) | ||
| * | ||
| * @example | ||
| * // Exclude navigation, sidebar, and footer | ||
| * withQuerySelectorPlugin({ exclude: ['nav', '#sidebar', '.footer'] }) | ||
| */ | ||
| function filterPlugin(options = {}) { | ||
@@ -325,11 +267,5 @@ const includeSelectors = options.include?.map((selector) => compileSelector(selector)) || []; | ||
| } | ||
| //#endregion | ||
| //#region src/plugins/frontmatter.ts | ||
| const BACKSLASH_RE = /\\/g; | ||
| const DOUBLE_QUOTE_RE = /"/g; | ||
| const ESCAPED_DOUBLE_QUOTE_RE = /\\"/g; | ||
| /** | ||
| * A plugin that manages frontmatter generation from HTML head elements | ||
| * Extracts metadata from meta tags and title and generates YAML frontmatter | ||
| */ | ||
| function frontmatterPlugin(options = {}) { | ||
@@ -413,5 +349,2 @@ const additionalFields = options.additionalFields || {}; | ||
| return plugin; | ||
| /** | ||
| * Generate YAML frontmatter string from collected metadata | ||
| */ | ||
| function generateFrontmatter() { | ||
@@ -436,33 +369,2 @@ if (Object.keys(frontmatter).length === 0) return ""; | ||
| } | ||
| //#endregion | ||
| //#region src/plugins/isolate-main.ts | ||
| /** | ||
| * Plugin that isolates main content using the following priority order: | ||
| * 1. If an explicit <main> element exists (within 5 depth levels), use its content exclusively | ||
| * 2. Otherwise, find content between the first header tag (h1-h6) and first footer | ||
| * 3. If footer is within 5 levels of nesting from the header, use it as the end boundary | ||
| * 4. Exclude all content before the start marker and after the end marker | ||
| * | ||
| * @example | ||
| * ```html | ||
| * <body> | ||
| * <nav>Navigation (excluded)</nav> | ||
| * <main> | ||
| * <h1>Main Title (included)</h1> | ||
| * <p>Main content (included)</p> | ||
| * </main> | ||
| * <footer>Footer (excluded)</footer> | ||
| * </body> | ||
| * ``` | ||
| * | ||
| * @example | ||
| * ```html | ||
| * <body> | ||
| * <nav>Navigation (excluded)</nav> | ||
| * <h1>Main Title (included)</h1> | ||
| * <p>Main content (included)</p> | ||
| * <footer>Footer (excluded)</footer> | ||
| * </body> | ||
| * ``` | ||
| */ | ||
| function isolateMainPlugin() { | ||
@@ -558,7 +460,2 @@ let mainElement = null; | ||
| } | ||
| //#endregion | ||
| //#region src/plugins/tailwind.ts | ||
| /** | ||
| * Mapping of Tailwind classes to Markdown formatting | ||
| */ | ||
| const TAILWIND_TO_MARKDOWN_MAP = { | ||
@@ -603,5 +500,2 @@ "font-bold": { | ||
| }; | ||
| /** | ||
| * Extract base class name from a responsive breakpoint variant | ||
| */ | ||
| function extractBaseClass(className) { | ||
@@ -623,5 +517,2 @@ for (const bp of [ | ||
| } | ||
| /** | ||
| * Sort classes by breakpoint for mobile-first processing | ||
| */ | ||
| function sortByBreakpoint(classes) { | ||
@@ -642,5 +533,2 @@ const breakpointOrder = { | ||
| } | ||
| /** | ||
| * Group classes by their formatting type to handle overrides | ||
| */ | ||
| function groupByFormattingType(classes) { | ||
@@ -671,5 +559,2 @@ const sorted = sortByBreakpoint(classes); | ||
| } | ||
| /** | ||
| * Fix redundant markdown delimiters without regex | ||
| */ | ||
| function fixRedundantDelimiters(content) { | ||
@@ -684,5 +569,2 @@ content = content.replaceAll("****", "**"); | ||
| } | ||
| /** | ||
| * Normalizes a list of Tailwind classes by processing breakpoints and resolving conflicts | ||
| */ | ||
| function normalizeClasses(classes) { | ||
@@ -696,5 +578,2 @@ const result = []; | ||
| } | ||
| /** | ||
| * Check if a class has a breakpoint prefix | ||
| */ | ||
| function hasBreakpoint(className) { | ||
@@ -704,5 +583,2 @@ const { breakpoint } = extractBaseClass(className); | ||
| } | ||
| /** | ||
| * Process Tailwind classes for an element with mobile-first approach | ||
| */ | ||
| function processTailwindClasses(classes) { | ||
@@ -759,5 +635,2 @@ let prefix = ""; | ||
| } | ||
| /** | ||
| * Creates a plugin that adds Tailwind class processing | ||
| */ | ||
| function tailwindPlugin() { | ||
@@ -812,3 +685,2 @@ return createPlugin({ | ||
| } | ||
| //#endregion | ||
| export { extractionCollectorPlugin as a, filterPlugin as i, isolateMainPlugin as n, extractionPlugin as o, frontmatterPlugin as r, createPlugin as s, tailwindPlugin as t }; |
| import { a as NO_SPACING, n as DEFAULT_BLOCK_SPACING } from "./const.mjs"; | ||
| import { r as parseHtmlStream } from "./parse.mjs"; | ||
| import { a as extractionCollectorPlugin, i as filterPlugin, n as isolateMainPlugin, r as frontmatterPlugin, t as tailwindPlugin } from "./plugins.mjs"; | ||
| //#region src/plugin-processor.ts | ||
| /** | ||
| * Processes plugins for a given node event | ||
| * Shared logic between markdown-processor.ts and stream.ts | ||
| * | ||
| * @param event - The node event to process | ||
| * @param plugins - Array of plugins to apply | ||
| * @param state - The current runtime state | ||
| * @param processEvent - Callback to process the event after plugin processing | ||
| * @returns true if the event should be skipped, false to continue processing | ||
| */ | ||
| function processPluginsForEvent(event, plugins, state, processEvent) { | ||
@@ -50,7 +39,2 @@ if (plugins?.length) { | ||
| } | ||
| //#endregion | ||
| //#region src/markdown-processor.ts | ||
| /** | ||
| * Determines if spacing is needed between two characters | ||
| */ | ||
| function needsSpacing(lastChar, firstChar, state) { | ||
@@ -85,5 +69,2 @@ if (lastChar === " " || lastChar === "\n" || lastChar === " ") return false; | ||
| } | ||
| /** | ||
| * Determines if spacing should be added before text content | ||
| */ | ||
| function shouldAddSpacingBeforeText(lastChar, lastNode, textNode) { | ||
@@ -97,5 +78,2 @@ if (!lastChar || lastChar === "\n" || lastChar === " " || lastChar === "[" || lastChar === ">") return false; | ||
| } | ||
| /** | ||
| * Calculate newline configuration based on tag handler spacing config | ||
| */ | ||
| function calculateNewLineConfig(node) { | ||
@@ -120,5 +98,2 @@ const tagId = node.tagId; | ||
| } | ||
| /** | ||
| * Creates a markdown processor that consumes DOM events and generates markdown | ||
| */ | ||
| function createMarkdownProcessor(options = {}, resolvedPlugins = [], tagOverrideHandlers) { | ||
@@ -131,5 +106,2 @@ const state = { | ||
| let lastYieldedLength = 0; | ||
| /** | ||
| * Process a DOM event and generate markdown | ||
| */ | ||
| function processEvent(event) { | ||
@@ -220,5 +192,2 @@ const { type: eventType, node } = event; | ||
| } | ||
| /** | ||
| * Process HTML string and generate events | ||
| */ | ||
| function processHtml(html) { | ||
@@ -234,5 +203,2 @@ parseHtmlStream(html, { | ||
| } | ||
| /** | ||
| * Get the final markdown output | ||
| */ | ||
| function getMarkdown() { | ||
@@ -243,5 +209,2 @@ const result = state.buffer.join("").trimStart(); | ||
| } | ||
| /** | ||
| * Get new markdown content since the last call (for streaming) | ||
| */ | ||
| function getMarkdownChunk() { | ||
@@ -265,4 +228,2 @@ const currentContent = state.buffer.join("").trimStart(); | ||
| } | ||
| //#endregion | ||
| //#region src/resolve-plugins.ts | ||
| function resolveFrontmatterOpt(opt) { | ||
@@ -279,6 +240,2 @@ if (typeof opt === "function") return { | ||
| } | ||
| /** | ||
| * Resolves declarative BuiltinPlugins config into a flat TransformPlugin array. | ||
| * Optionally appends imperative transform plugins. | ||
| */ | ||
| function resolvePlugins(options, hooks) { | ||
@@ -315,3 +272,2 @@ const plugins = []; | ||
| } | ||
| //#endregion | ||
| export { createMarkdownProcessor as n, processPluginsForEvent as r, resolvePlugins as t }; |
+3
-20
@@ -0,4 +1,6 @@ | ||
| import "./const.mjs"; | ||
| import { i as buildTagOverrideHandlers, r as parseHtmlStream } from "./parse.mjs"; | ||
| import { n as createMarkdownProcessor, r as processPluginsForEvent, t as resolvePlugins } from "./resolve-plugins.mjs"; | ||
| //#region src/clean.ts | ||
| import "./plugins.mjs"; | ||
| import "../preset/minimal.mjs"; | ||
| function resolveClean(clean) { | ||
@@ -17,7 +19,2 @@ if (clean === true) return { | ||
| } | ||
| /** | ||
| * Try to parse a markdown link `[text](url)` starting at position `start` (the `[`). | ||
| * Returns { text, url, end } or null if not a link. | ||
| * Handles balanced parens in URLs (e.g. `javascript:void(0)`). | ||
| */ | ||
| function parseLink(md, start) { | ||
@@ -80,3 +77,2 @@ const len = md.length; | ||
| } | ||
| /** Strip inline markdown formatting from heading text for slug generation */ | ||
| function stripHeadingFormatting(text) { | ||
@@ -278,12 +274,2 @@ let result = ""; | ||
| } | ||
| //#endregion | ||
| //#region src/stream.ts | ||
| /** | ||
| * Creates a markdown stream from an HTML stream | ||
| * @param htmlStream - ReadableStream of HTML content (as Uint8Array or string) | ||
| * @param options - Configuration options for conversion | ||
| * @param resolvedPlugins - Pre-resolved plugin instances | ||
| * @param tagOverrideHandlers - Tag override handlers from declarative config | ||
| * @returns An async generator yielding markdown chunks | ||
| */ | ||
| async function* streamHtmlToMarkdown$1(htmlStream, options = {}, resolvedPlugins = [], tagOverrideHandlers) { | ||
@@ -321,4 +307,2 @@ if (!htmlStream) throw new Error("Invalid HTML stream provided"); | ||
| } | ||
| //#endregion | ||
| //#region src/index.ts | ||
| function resolveHooks(options) { | ||
@@ -347,3 +331,2 @@ return options.hooks?.length ? options.hooks : void 0; | ||
| } | ||
| //#endregion | ||
| export { streamHtmlToMarkdown as n, htmlToMarkdown as t }; |
@@ -1,2 +0,1 @@ | ||
| //#region src/const.d.ts | ||
| declare const TAG_H1 = 7; | ||
@@ -12,4 +11,2 @@ declare const TAG_H2 = 8; | ||
| declare const NodeEventExit$1 = 1; | ||
| //#endregion | ||
| //#region src/types.d.ts | ||
| /** | ||
@@ -390,3 +387,2 @@ * Imperative hook-based transform plugins. **JavaScript engine only.** | ||
| } | ||
| //#endregion | ||
| export { TAG_H3 as C, TEXT_NODE as D, TAG_H6 as E, TAG_H2 as S, TAG_H5 as T, TransformPlugin as _, ExtractedElement as a, NodeEventExit$1 as b, MdreamOptions as c, NodeEvent as d, PluginContext as f, TextNode as g, TagOverride as h, EngineOptions as i, MdreamRuntimeState as l, TagHandler as m, CleanOptions as n, FrontmatterConfig as o, SplitterOptions as p, ElementNode as r, MarkdownChunk as s, BuiltinPlugins as t, Node as u, ELEMENT_NODE as v, TAG_H4 as w, TAG_H1 as x, NodeEventEnter$1 as y }; |
+0
-6
| import { n as streamHtmlToMarkdown } from "./_chunks/src.mjs"; | ||
| import "./_chunks/const.mjs"; | ||
| import "./_chunks/parse.mjs"; | ||
| import "./_chunks/resolve-plugins.mjs"; | ||
| import "./_chunks/plugins.mjs"; | ||
| import { withMinimalPreset } from "./preset/minimal.mjs"; | ||
@@ -12,3 +8,2 @@ import { readFileSync } from "node:fs"; | ||
| import { dirname, join } from "pathe"; | ||
| //#region src/cli.ts | ||
| async function streamingConvert(options = {}) { | ||
@@ -27,3 +22,2 @@ let conversionOptions = { origin: options.origin }; | ||
| cli.help().version(packageJson.version).parse(); | ||
| //#endregion | ||
| export {}; |
+0
-2
| import { n as streamHtmlToMarkdown, t as htmlToMarkdown } from "./_chunks/src.mjs"; | ||
| import { d as TAG_H3, f as TAG_H4, h as TEXT_NODE, l as TAG_H1, m as TAG_H6, o as NodeEventEnter, p as TAG_H5, r as ELEMENT_NODE, s as NodeEventExit, u as TAG_H2 } from "./_chunks/const.mjs"; | ||
| import "./_chunks/parse.mjs"; | ||
| import "./_chunks/resolve-plugins.mjs"; | ||
| import { s as createPlugin } from "./_chunks/plugins.mjs"; | ||
| import { withMinimalPreset } from "./preset/minimal.mjs"; | ||
| export { ELEMENT_NODE, NodeEventEnter, NodeEventExit, TAG_H1, TAG_H2, TAG_H3, TAG_H4, TAG_H5, TAG_H6, TEXT_NODE, createPlugin, htmlToMarkdown, streamHtmlToMarkdown, withMinimalPreset }; |
@@ -1,2 +0,1 @@ | ||
| //#region src/llms-txt.d.ts | ||
| /** | ||
@@ -88,3 +87,2 @@ * Link in llms.txt section | ||
| declare function createLlmsTxtStream(options: CreateLlmsTxtStreamOptions): WritableStream<ProcessedFile>; | ||
| //#endregion | ||
| export { CreateLlmsTxtStreamOptions, LlmsTxtArtifactsOptions, LlmsTxtArtifactsResult, LlmsTxtLink, LlmsTxtSection, ProcessedFile, createLlmsTxtStream, generateLlmsTxtArtifacts }; |
+0
-39
| import { join, relative } from "pathe"; | ||
| import { mkdir, open } from "node:fs/promises"; | ||
| //#region src/llms-txt.ts | ||
| const FRONTMATTER_RE = /^---\n([\s\S]*?)\n---\n([\s\S]*)$/; | ||
@@ -8,5 +7,2 @@ const ANCHOR_INVALID_CHARS_RE = /[^a-z0-9]/g; | ||
| const TRAILING_SLASH_RE = /\/$/; | ||
| /** | ||
| * Generate llms.txt content | ||
| */ | ||
| function generateLlmsTxtContent(files, options) { | ||
@@ -35,5 +31,2 @@ const { siteName = "Site", description, origin = "", sections, notes } = options; | ||
| } | ||
| /** | ||
| * Parse frontmatter from markdown content | ||
| */ | ||
| function parseFrontmatter(content) { | ||
@@ -61,5 +54,2 @@ const match = content.match(FRONTMATTER_RE); | ||
| } | ||
| /** | ||
| * Serialize frontmatter object to YAML-like format | ||
| */ | ||
| function serializeFrontmatter(data) { | ||
@@ -70,5 +60,2 @@ const lines = []; | ||
| } | ||
| /** | ||
| * Generate llms-full.txt content with complete page content | ||
| */ | ||
| function generateLlmsFullTxtContent(files, options) { | ||
@@ -114,5 +101,2 @@ const { siteName = "Site", description, origin = "", sections, notes } = options; | ||
| } | ||
| /** | ||
| * Generate individual markdown files structure | ||
| */ | ||
| function generateMarkdownFilesContent(files) { | ||
@@ -129,5 +113,2 @@ const markdownFiles = []; | ||
| } | ||
| /** | ||
| * Main function to generate llms.txt artifacts from pre-processed files | ||
| */ | ||
| async function generateLlmsTxtArtifacts(options) { | ||
@@ -147,5 +128,2 @@ const files = options.files; | ||
| } | ||
| /** | ||
| * Format a section with title, description, and links | ||
| */ | ||
| function formatSection(section) { | ||
@@ -166,5 +144,2 @@ let content = `## ${section.title}\n\n`; | ||
| } | ||
| /** | ||
| * Format notes section | ||
| */ | ||
| function formatNotes(notes) { | ||
@@ -176,5 +151,2 @@ const noteLines = Array.isArray(notes) ? notes : [notes]; | ||
| } | ||
| /** | ||
| * Get group prefix for a URL (up to 2 segments) | ||
| */ | ||
| function getGroupPrefix(url, depth) { | ||
@@ -186,6 +158,2 @@ const segments = url.split("/").filter(Boolean); | ||
| } | ||
| /** | ||
| * Sort pages by URL path in hierarchical order (directory tree structure) | ||
| * Groups by up to 2 segments, with root-level pages without nesting grouped together | ||
| */ | ||
| function sortPagesByPath(pages) { | ||
@@ -234,8 +202,2 @@ const twoSegmentCount = /* @__PURE__ */ new Map(); | ||
| } | ||
| /** | ||
| * Create a WritableStream that generates llms.txt artifacts by streaming pages to disk. | ||
| * | ||
| * Writes llms.txt (and optionally llms-full.txt) incrementally as pages are written, | ||
| * never keeping full content in memory. Creates outputDir recursively if needed. | ||
| */ | ||
| function createLlmsTxtStream(options) { | ||
@@ -357,3 +319,2 @@ const { siteName = "Site", description, origin = "", generateFull, outputDir = process.cwd(), sections, notes } = options; | ||
| } | ||
| //#endregion | ||
| export { createLlmsTxtStream, generateLlmsTxtArtifacts }; |
@@ -1,2 +0,1 @@ | ||
| //#region src/negotiate.d.ts | ||
| interface AcceptEntry { | ||
@@ -25,3 +24,2 @@ type: string; | ||
| declare function shouldServeMarkdown(acceptHeader?: string, secFetchDest?: string): boolean; | ||
| //#endregion | ||
| export { parseAcceptHeader, shouldServeMarkdown }; |
+0
-18
@@ -1,6 +0,1 @@ | ||
| //#region src/negotiate.ts | ||
| /** | ||
| * Parse an HTTP Accept header into an ordered list of media types with quality values. | ||
| * Supports quality weights (q=0.9) and preserves original position for tie-breaking. | ||
| */ | ||
| function parseAcceptHeader(accept) { | ||
@@ -36,14 +31,2 @@ if (!accept) return []; | ||
| } | ||
| /** | ||
| * Determine if a client prefers markdown over HTML using proper content negotiation. | ||
| * | ||
| * Uses Accept header quality weights and position ordering: | ||
| * - If text/markdown or text/plain has higher quality than text/html -> markdown | ||
| * - If same quality, earlier position in Accept header wins | ||
| * - Bare wildcard does NOT trigger markdown (prevents breaking OG crawlers) | ||
| * - sec-fetch-dest: document always returns false (browser navigation) | ||
| * | ||
| * @param acceptHeader - The HTTP Accept header value | ||
| * @param secFetchDest - The Sec-Fetch-Dest header value | ||
| */ | ||
| function shouldServeMarkdown(acceptHeader, secFetchDest) { | ||
@@ -92,3 +75,2 @@ if (secFetchDest === "document") return false; | ||
| } | ||
| //#endregion | ||
| export { parseAcceptHeader, shouldServeMarkdown }; |
+0
-3
| import { _ as TransformPlugin, d as NodeEvent, m as TagHandler, r as ElementNode, u as Node } from "./_chunks/types.mjs"; | ||
| //#region src/parse.d.ts | ||
| interface ParseOptions { | ||
@@ -56,3 +54,2 @@ resolvedPlugins?: TransformPlugin[]; | ||
| declare function parseAttributes(attrStr: string): Record<string, string>; | ||
| //#endregion | ||
| export { ParseOptions, ParseResult, ParseState, parseAttributes, parseHtml, parseHtmlStream }; |
+0
-1
@@ -1,3 +0,2 @@ | ||
| import "./_chunks/const.mjs"; | ||
| import { n as parseHtml, r as parseHtmlStream, t as parseAttributes } from "./_chunks/parse.mjs"; | ||
| export { parseAttributes, parseHtml, parseHtmlStream }; |
+0
-10
| import { _ as TransformPlugin, a as ExtractedElement$1, l as MdreamRuntimeState, r as ElementNode } from "./_chunks/types.mjs"; | ||
| import { r as createPlugin } from "./_chunks/index.mjs"; | ||
| //#region src/plugins/extraction.d.ts | ||
| interface ExtractedElement extends ElementNode { | ||
@@ -21,4 +20,2 @@ textContent: string; | ||
| }; | ||
| //#endregion | ||
| //#region src/plugins/filter.d.ts | ||
| /** | ||
@@ -42,4 +39,2 @@ * Plugin that filters nodes based on CSS selectors. | ||
| }): TransformPlugin; | ||
| //#endregion | ||
| //#region src/plugins/frontmatter.d.ts | ||
| interface FrontmatterPluginOptions { | ||
@@ -56,4 +51,2 @@ /** Additional frontmatter fields to include */ | ||
| declare function frontmatterPlugin(options?: FrontmatterPluginOptions): TransformPlugin; | ||
| //#endregion | ||
| //#region src/plugins/isolate-main.d.ts | ||
| /** | ||
@@ -89,4 +82,2 @@ * Plugin that isolates main content using the following priority order: | ||
| declare function isolateMainPlugin(): TransformPlugin; | ||
| //#endregion | ||
| //#region src/plugins/tailwind.d.ts | ||
| /** | ||
@@ -96,3 +87,2 @@ * Creates a plugin that adds Tailwind class processing | ||
| declare function tailwindPlugin(): TransformPlugin; | ||
| //#endregion | ||
| export { type ExtractedElement, createPlugin, extractionCollectorPlugin, extractionPlugin, filterPlugin, frontmatterPlugin, isolateMainPlugin, tailwindPlugin }; |
+0
-1
@@ -1,3 +0,2 @@ | ||
| import "./_chunks/const.mjs"; | ||
| import { a as extractionCollectorPlugin, i as filterPlugin, n as isolateMainPlugin, o as extractionPlugin, r as frontmatterPlugin, s as createPlugin, t as tailwindPlugin } from "./_chunks/plugins.mjs"; | ||
| export { createPlugin, extractionCollectorPlugin, extractionPlugin, filterPlugin, frontmatterPlugin, isolateMainPlugin, tailwindPlugin }; |
| import "../_chunks/const.mjs"; | ||
| //#region src/preset/minimal.ts | ||
| /** | ||
| * Creates a configurable minimal preset with advanced options. | ||
| * Returns declarative plugin config that works with both JS and Rust engines. | ||
| */ | ||
| function withMinimalPreset(options = {}) { | ||
@@ -33,3 +28,2 @@ return { | ||
| } | ||
| //#endregion | ||
| export { withMinimalPreset }; |
| import { p as SplitterOptions, s as MarkdownChunk } from "./_chunks/types.mjs"; | ||
| //#region src/splitter.d.ts | ||
| /** | ||
@@ -20,3 +18,2 @@ * Convert HTML to Markdown and split into chunks in single pass. | ||
| declare function htmlToMarkdownSplitChunks(html: string, options?: SplitterOptions): MarkdownChunk[]; | ||
| //#endregion | ||
| export { type MarkdownChunk, type SplitterOptions, htmlToMarkdownSplitChunks, htmlToMarkdownSplitChunksStream }; |
+0
-20
| import "./_chunks/const.mjs"; | ||
| import { r as parseHtmlStream } from "./_chunks/parse.mjs"; | ||
| import { n as createMarkdownProcessor, r as processPluginsForEvent, t as resolvePlugins } from "./_chunks/resolve-plugins.mjs"; | ||
| import "./_chunks/plugins.mjs"; | ||
| //#region src/splitter.ts | ||
| const MARKDOWN_HEADER_LINE_RE = /^#{1,6}\s+/; | ||
@@ -36,15 +34,5 @@ const NEWLINE_RE = /\n/g; | ||
| } | ||
| /** | ||
| * Get current markdown content WITHOUT clearing buffers | ||
| */ | ||
| function getCurrentMarkdown(state) { | ||
| return state.buffer.join("").trimStart(); | ||
| } | ||
| /** | ||
| * Convert HTML to Markdown and split into chunks in single pass. | ||
| * Yields chunks during HTML event processing for better memory efficiency. | ||
| * | ||
| * **JavaScript engine only** — uses the JS engine's internal processing pipeline. | ||
| * Not compatible with the Rust engine. | ||
| */ | ||
| function* htmlToMarkdownSplitChunksStream(html, options = {}) { | ||
@@ -181,9 +169,2 @@ const opts = createOptions(options); | ||
| } | ||
| /** | ||
| * Convert HTML to Markdown and split into chunks in single pass. | ||
| * Chunks are created during HTML event processing. | ||
| * | ||
| * **JavaScript engine only** — uses the JS engine's internal processing pipeline. | ||
| * Not compatible with the Rust engine. | ||
| */ | ||
| function htmlToMarkdownSplitChunks(html, options = {}) { | ||
@@ -216,3 +197,2 @@ const opts = createOptions(options); | ||
| } | ||
| //#endregion | ||
| export { htmlToMarkdownSplitChunks, htmlToMarkdownSplitChunksStream }; |
+1
-1
| { | ||
| "name": "@mdream/js", | ||
| "type": "module", | ||
| "version": "1.0.3", | ||
| "version": "1.0.4", | ||
| "description": "JavaScript HTML-to-Markdown engine for mdream. Escape hatch for hooks and edge runtimes.", | ||
@@ -6,0 +6,0 @@ "author": { |
154794
-6.17%3427
-8.86%