streaming-markdown
Advanced tools
Comparing version 0.0.5 to 0.0.6
@@ -37,6 +37,2 @@ /** | ||
/** | ||
* @param {Parser} p | ||
* @returns {void } */ | ||
export function parser_add_paragraph(p: Parser): void; | ||
/** | ||
* Parse and render another chunk of markdown. | ||
@@ -76,37 +72,43 @@ * @param {Parser} p | ||
export * from "./t.js"; | ||
export const ROOT: 1; | ||
export const DOCUMENT: 1; | ||
export const PARAGRAPH: 2; | ||
export const HEADING_1: 4; | ||
export const HEADING_2: 8; | ||
export const HEADING_3: 16; | ||
export const HEADING_4: 32; | ||
export const HEADING_5: 64; | ||
export const HEADING_6: 128; | ||
export const CODE_BLOCK: 256; | ||
export const CODE_FENCE: 512; | ||
export const CODE_INLINE: 1024; | ||
export const ITALIC_AST: 2048; | ||
export const ITALIC_UND: 4096; | ||
export const STRONG_AST: 8192; | ||
export const STRONG_UND: 16384; | ||
export const STRIKE: 32768; | ||
export const LINK: 65536; | ||
export const IMAGE: 131072; | ||
export const LINE_BREAK: 4; | ||
export const HEADING_1: 8; | ||
export const HEADING_2: 16; | ||
export const HEADING_3: 32; | ||
export const HEADING_4: 64; | ||
export const HEADING_5: 128; | ||
export const HEADING_6: 256; | ||
export const CODE_BLOCK: 512; | ||
export const CODE_FENCE: 1024; | ||
export const CODE_INLINE: 2048; | ||
export const ITALIC_AST: 4096; | ||
export const ITALIC_UND: 8192; | ||
export const STRONG_AST: 16384; | ||
export const STRONG_UND: 32768; | ||
export const STRIKE: 65536; | ||
export const LINK: 131072; | ||
export const IMAGE: 262144; | ||
export const BLOCKQUOTE: 524288; | ||
/** `HEADING_1 | HEADING_2 | HEADING_3 | HEADING_4 | HEADING_5 | HEADING_6` */ | ||
export const HEADING: 252; | ||
export const ANY_HEADING: 504; | ||
/** `CODE_BLOCK | CODE_FENCE | CODE_INLINE` */ | ||
export const CODE: 1792; | ||
export const ANY_CODE: 3584; | ||
/** `ITALIC_AST | ITALIC_UND` */ | ||
export const ITALIC: 6144; | ||
export const ANY_ITALIC: 12288; | ||
/** `STRONG_AST | STRONG_UND` */ | ||
export const STRONG: 24576; | ||
export const ANY_STRONG: 49152; | ||
/** `STRONG_AST | ITALIC_AST` */ | ||
export const ASTERISK: 10240; | ||
export const ANY_AST: 20480; | ||
/** `STRONG_UND | ITALIC_UND` */ | ||
export const UNDERSCORE: 20480; | ||
/** `CODE | IMAGE` */ | ||
export const NO_FORMATTING: 132864; | ||
export const ANY_UND: 40960; | ||
/** `ANY_CODE | IMAGE` */ | ||
export const NO_NESTING: 265728; | ||
/** `DOCUMENT | BLOCKQUOTE` */ | ||
export const ANY_ROOT: 524289; | ||
export type Token_Type = (typeof Token_Type)[keyof typeof Token_Type]; | ||
export namespace Token_Type { | ||
export { ROOT as Root }; | ||
export { DOCUMENT as Document }; | ||
export { BLOCKQUOTE as Blockquote }; | ||
export { LINE_BREAK as Line_Break }; | ||
export { PARAGRAPH as Paragraph }; | ||
@@ -113,0 +115,0 @@ export { HEADING_1 as Heading_1 }; |
297
mds/mds.js
@@ -11,38 +11,44 @@ /* | ||
export const | ||
ROOT = 1, // 1 | ||
DOCUMENT = 1, // 1 | ||
PARAGRAPH = 2, // 2 | ||
HEADING_1 = 4, // 3 | ||
HEADING_2 = 8, // 4 | ||
HEADING_3 = 16, // 5 | ||
HEADING_4 = 32, // 6 | ||
HEADING_5 = 64, // 7 | ||
HEADING_6 = 128, // 8 | ||
CODE_BLOCK = 256, // 9 | ||
CODE_FENCE = 512, // 10 | ||
CODE_INLINE = 1024, // 11 | ||
ITALIC_AST = 2048, // 12 | ||
ITALIC_UND = 4096, // 13 | ||
STRONG_AST = 8192, // 14 | ||
STRONG_UND = 16384, // 15 | ||
STRIKE = 32768, // 16 | ||
LINK = 65536, // 17 | ||
IMAGE = 131072, // 18 | ||
LINE_BREAK = 4, // 3 | ||
HEADING_1 = 8, // 4 | ||
HEADING_2 = 16, // 5 | ||
HEADING_3 = 32, // 6 | ||
HEADING_4 = 64, // 7 | ||
HEADING_5 = 128, // 8 | ||
HEADING_6 = 256, // 9 | ||
CODE_BLOCK = 512, // 10 | ||
CODE_FENCE = 1024, // 11 | ||
CODE_INLINE = 2048, // 12 | ||
ITALIC_AST = 4096, // 13 | ||
ITALIC_UND = 8192, // 14 | ||
STRONG_AST = 16384, // 15 | ||
STRONG_UND = 32768, // 16 | ||
STRIKE = 65536, // 17 | ||
LINK = 131072, // 18 | ||
IMAGE = 262144, // 19 | ||
BLOCKQUOTE = 524288, // 20 | ||
/** `HEADING_1 | HEADING_2 | HEADING_3 | HEADING_4 | HEADING_5 | HEADING_6` */ | ||
HEADING = 252, | ||
ANY_HEADING = 504, | ||
/** `CODE_BLOCK | CODE_FENCE | CODE_INLINE` */ | ||
CODE = 1792, | ||
ANY_CODE = 3584, | ||
/** `ITALIC_AST | ITALIC_UND` */ | ||
ITALIC = 6144, | ||
ANY_ITALIC = 12288, | ||
/** `STRONG_AST | STRONG_UND` */ | ||
STRONG = 24576, | ||
ANY_STRONG = 49152, | ||
/** `STRONG_AST | ITALIC_AST` */ | ||
ASTERISK = 10240, | ||
ANY_AST = 20480, | ||
/** `STRONG_UND | ITALIC_UND` */ | ||
UNDERSCORE = 20480, | ||
/** `CODE | IMAGE` */ | ||
NO_FORMATTING = 132864 | ||
ANY_UND = 40960, | ||
/** `ANY_CODE | IMAGE` */ | ||
NO_NESTING = 265728, | ||
/** `DOCUMENT | BLOCKQUOTE` */ | ||
ANY_ROOT = 524289 | ||
/** @enum {(typeof Token_Type)[keyof typeof Token_Type]} */ | ||
export const Token_Type = /** @type {const} */({ | ||
Root: ROOT, | ||
Document: DOCUMENT, | ||
Blockquote: BLOCKQUOTE, | ||
Line_Break: LINE_BREAK, | ||
Paragraph: PARAGRAPH, | ||
@@ -72,4 +78,6 @@ Heading_1: HEADING_1, | ||
switch (type) { | ||
case ROOT: return "Root" | ||
case DOCUMENT: return "Document" | ||
case BLOCKQUOTE: return "Blockquote" | ||
case PARAGRAPH: return "Paragraph" | ||
case LINE_BREAK: return "Line_Break" | ||
case HEADING_1: return "Heading_1" | ||
@@ -131,5 +139,6 @@ case HEADING_2: return "Heading_2" | ||
pending : "", | ||
types : /**@type {*}*/([ROOT,,,,,]), | ||
types : /**@type {*}*/([DOCUMENT,,,,,]), | ||
len : 0, | ||
code_fence: "", | ||
newline_blockquote_idx: 0, | ||
} | ||
@@ -143,5 +152,7 @@ } | ||
export function parser_end(p) { | ||
if (p.len === 0) return | ||
parser_write(p, "\n") | ||
parser_add_text(p) | ||
while (p.len > 0) { | ||
parser_end_token(p) | ||
} | ||
} | ||
@@ -166,3 +177,2 @@ | ||
p.renderer.end_node(p.renderer.data) | ||
p.pending = "" | ||
} | ||
@@ -175,3 +185,2 @@ | ||
export function parser_add_token(p, type) { | ||
p.pending = "" | ||
p.len += 1 | ||
@@ -183,9 +192,2 @@ p.types[p.len] = type | ||
/** | ||
* @param {Parser} p | ||
* @returns {void } */ | ||
export function parser_add_paragraph(p) { | ||
if (p.len === 0) parser_add_token(p, PARAGRAPH) | ||
} | ||
/** | ||
* Parse and render another chunk of markdown. | ||
@@ -196,3 +198,5 @@ * @param {Parser} p | ||
export function parser_write(p, chunk) { | ||
for (const char of chunk) { | ||
chars: | ||
for (let char_i = 0; char_i < chunk.length; char_i += 1) { | ||
const char = chunk[char_i] | ||
const in_token = p.types[p.len] | ||
@@ -205,13 +209,57 @@ const pending_with_char = p.pending + char | ||
switch (in_token) { | ||
case ROOT: { | ||
case LINE_BREAK: | ||
console.assert(p.pending.length === 1, "Pending in line break should be one character") | ||
console.assert(p.text.length === 0, "Text when in line break") | ||
switch (p.pending) { | ||
case " ": | ||
p.pending = char | ||
continue | ||
case ">": | ||
p.pending = char | ||
while (p.newline_blockquote_idx+1 < p.len-1) { | ||
p.newline_blockquote_idx += 1 | ||
if (p.types[p.newline_blockquote_idx] === BLOCKQUOTE) { | ||
continue chars | ||
} | ||
} | ||
p.len -= 1 // remove the line break | ||
while (p.newline_blockquote_idx < p.len) { | ||
parser_end_token(p) | ||
} | ||
p.newline_blockquote_idx += 1 | ||
parser_add_token(p, BLOCKQUOTE) | ||
continue | ||
case "\n": | ||
p.len -= 1 // remove the line break | ||
while (p.newline_blockquote_idx < p.len) { | ||
parser_end_token(p) | ||
} | ||
p.newline_blockquote_idx = 0 | ||
p.pending = char | ||
continue | ||
default: | ||
p.len -= 1 // remove the line break | ||
p.renderer.add_node(p.renderer.data, LINE_BREAK) | ||
p.renderer.end_node(p.renderer.data) | ||
char_i -= 1 // reprocess pending | ||
continue | ||
} | ||
case DOCUMENT: | ||
case BLOCKQUOTE: | ||
console.assert(p.text.length === 0, "Root should not have any text") | ||
switch (pending_with_char) { | ||
case "# ": parser_add_token(p, HEADING_1) ;continue | ||
case "## ": parser_add_token(p, HEADING_2) ;continue | ||
case "### ": parser_add_token(p, HEADING_3) ;continue | ||
case "#### ": parser_add_token(p, HEADING_4) ;continue | ||
case "##### ": parser_add_token(p, HEADING_5) ;continue | ||
case "###### ": parser_add_token(p, HEADING_6) ;continue | ||
case "```": parser_add_token(p, CODE_FENCE) ;continue | ||
case "# ": p.pending=""; parser_add_token(p, HEADING_1) ;continue | ||
case "## ": p.pending=""; parser_add_token(p, HEADING_2) ;continue | ||
case "### ": p.pending=""; parser_add_token(p, HEADING_3) ;continue | ||
case "#### ": p.pending=""; parser_add_token(p, HEADING_4) ;continue | ||
case "##### ": p.pending=""; parser_add_token(p, HEADING_5) ;continue | ||
case "###### ": p.pending=""; parser_add_token(p, HEADING_6) ;continue | ||
case "```": p.pending=""; parser_add_token(p, CODE_FENCE) ;continue | ||
case " ": | ||
@@ -221,3 +269,3 @@ case " \t": | ||
case " \t": | ||
case "\t": parser_add_token(p, CODE_BLOCK) ;continue | ||
case "\t": p.pending=""; parser_add_token(p, CODE_BLOCK) ;continue | ||
case "#": | ||
@@ -239,21 +287,37 @@ case "##": | ||
continue | ||
case "> ": | ||
case ">": | ||
p.pending = "" | ||
while (p.newline_blockquote_idx+1 <= p.len) { | ||
p.newline_blockquote_idx += 1 | ||
if (p.types[p.newline_blockquote_idx] === BLOCKQUOTE) { | ||
continue chars | ||
} | ||
} | ||
p.newline_blockquote_idx += 1 | ||
parser_add_token(p, BLOCKQUOTE) | ||
continue | ||
} | ||
switch (p.pending) { | ||
/* `Code Inline` */ | ||
if ('`' === p.pending && | ||
"\n"!== char && | ||
'`' !== char | ||
) { | ||
case "`": | ||
parser_add_token(p, PARAGRAPH) | ||
parser_add_text(p) | ||
parser_add_token(p, CODE_INLINE) | ||
p.pending = "" | ||
p.text = char | ||
continue | ||
/* Trim leading spaces */ | ||
case " ": | ||
case " ": | ||
case " ": | ||
p.pending = char | ||
continue | ||
default: | ||
parser_add_token(p, PARAGRAPH) | ||
char_i -= 1 | ||
continue | ||
} | ||
p.text = p.pending | ||
parser_add_token(p, PARAGRAPH) | ||
p.pending = char | ||
continue | ||
} | ||
case CODE_BLOCK: | ||
@@ -286,4 +350,2 @@ switch (pending_with_char) { | ||
case CODE_FENCE: { | ||
console.assert(p.len === 1, "Code block is always a top-level token") | ||
switch (p.code_fence) { | ||
@@ -297,2 +359,3 @@ case 1: /* can end */ | ||
parser_end_token(p) | ||
p.pending = "" | ||
continue | ||
@@ -342,2 +405,8 @@ case "\n``": | ||
} | ||
if ('`' === p.pending) { | ||
parser_add_text(p) | ||
parser_end_token(p) | ||
p.pending = "" | ||
continue | ||
} | ||
if ('`' === char) { | ||
@@ -347,2 +416,3 @@ p.text += p.pending | ||
parser_end_token(p) | ||
p.pending = "" | ||
continue | ||
@@ -364,2 +434,3 @@ } | ||
parser_end_token(p) | ||
p.pending = "" | ||
} else { | ||
@@ -397,2 +468,3 @@ parser_add_token(p, italic) | ||
parser_add_token(p, strong) | ||
p.pending = "" | ||
} | ||
@@ -419,2 +491,4 @@ } | ||
p.pending = char | ||
} else { | ||
p.pending = "" | ||
} | ||
@@ -429,2 +503,3 @@ continue | ||
parser_end_token(p) | ||
p.pending = "" | ||
continue | ||
@@ -435,3 +510,3 @@ } | ||
case IMAGE: | ||
if (']' === p.pending) { | ||
if ("]" === p.pending) { | ||
/* | ||
@@ -450,4 +525,4 @@ [Link](url) | ||
} | ||
if (p.pending[0] === "]" && | ||
p.pending[1] === "(") { | ||
if (p.pending[0] === ']' && | ||
p.pending[1] === '(') { | ||
/* | ||
@@ -462,2 +537,3 @@ [Link](url) | ||
parser_end_token(p) | ||
p.pending = "" | ||
} else { | ||
@@ -475,18 +551,5 @@ p.pending += char | ||
switch (p.pending) { | ||
/* Newline */ | ||
case "\n": | ||
parser_add_text(p) | ||
/* Paragraph */ | ||
if ('\n' === char) { | ||
while (p.len > 0) parser_end_token(p) | ||
} | ||
/* Line break */ | ||
else { | ||
p.renderer.add_text(p.renderer.data, '\n') | ||
p.pending = char | ||
} | ||
continue | ||
/* Escape character */ | ||
case "\\": | ||
if (in_token & CODE) break | ||
if (in_token & ANY_CODE) break | ||
@@ -499,3 +562,4 @@ if ('\n' === char) { | ||
p.pending = "" | ||
p.text += (char_code >= 48 && char_code <= 90) || // 0-9 A-Z | ||
p.text += (char_code >= 48 && char_code <= 57) || // 0-9 | ||
(char_code >= 65 && char_code <= 90) || // A-Z | ||
(char_code >= 97 && char_code <= 122) // a-z | ||
@@ -506,6 +570,14 @@ ? pending_with_char | ||
continue | ||
/* Newline */ | ||
case "\n": | ||
/* Add Line_Break temporarily */ | ||
p.len += 1 | ||
p.types[p.len] = LINE_BREAK | ||
p.newline_blockquote_idx = 0 | ||
p.pending = char | ||
parser_add_text(p) | ||
continue | ||
/* `Code Inline` */ | ||
case "`": | ||
if (!(in_token & NO_FORMATTING) && | ||
"\n"!== char && | ||
if (!(in_token & NO_NESTING) && | ||
'`' !== char | ||
@@ -516,2 +588,3 @@ ) { | ||
p.text = char | ||
p.pending = "" | ||
continue | ||
@@ -521,3 +594,3 @@ } | ||
case "*": | ||
if (in_token & (NO_FORMATTING | ASTERISK)) break | ||
if (in_token & (NO_NESTING | ANY_AST)) break | ||
@@ -528,2 +601,3 @@ parser_add_text(p) | ||
parser_add_token(p, STRONG_AST) | ||
p.pending = "" | ||
} | ||
@@ -537,3 +611,3 @@ /* *Em* */ | ||
case "_": | ||
if (in_token & (NO_FORMATTING | UNDERSCORE)) break | ||
if (in_token & (NO_NESTING | ANY_UND)) break | ||
@@ -544,2 +618,3 @@ parser_add_text(p) | ||
parser_add_token(p, STRONG_UND) | ||
p.pending = "" | ||
} | ||
@@ -554,7 +629,8 @@ /* _Em_ */ | ||
case "~": | ||
if (!(in_token & (NO_FORMATTING | STRIKE)) && | ||
"~" === char | ||
if (!(in_token & (NO_NESTING | STRIKE)) && | ||
'~' === char | ||
) { | ||
parser_add_text(p) | ||
parser_add_token(p, STRIKE) | ||
p.pending = "" | ||
continue | ||
@@ -565,5 +641,4 @@ } | ||
case "[": | ||
if (!(in_token & (NO_FORMATTING | LINK)) && | ||
"\n"!== char && | ||
"]" !== char | ||
if (!(in_token & (NO_NESTING | LINK)) && | ||
']' !== char | ||
) { | ||
@@ -578,10 +653,16 @@ parser_add_text(p) | ||
case "!": | ||
if (!(in_token & NO_FORMATTING) && | ||
"[" === char | ||
if (!(in_token & NO_NESTING) && | ||
'[' === char | ||
) { | ||
parser_add_text(p) | ||
parser_add_token(p, IMAGE) | ||
p.pending = "" | ||
continue | ||
} | ||
break | ||
case " ": | ||
if (char === " ") { | ||
continue | ||
} | ||
break | ||
} | ||
@@ -637,18 +718,20 @@ | ||
switch (type) { | ||
case ROOT: return // node is already root | ||
case PARAGRAPH: mount = slot = document.createElement("p") ;break | ||
case HEADING_1: mount = slot = document.createElement("h1") ;break | ||
case HEADING_2: mount = slot = document.createElement("h2") ;break | ||
case HEADING_3: mount = slot = document.createElement("h3") ;break | ||
case HEADING_4: mount = slot = document.createElement("h4") ;break | ||
case HEADING_5: mount = slot = document.createElement("h5") ;break | ||
case HEADING_6: mount = slot = document.createElement("h6") ;break | ||
case DOCUMENT: return // node is already a document | ||
case BLOCKQUOTE: mount = slot = document.createElement("blockquote");break | ||
case PARAGRAPH: mount = slot = document.createElement("p") ;break | ||
case LINE_BREAK: mount = slot = document.createElement("br") ;break | ||
case HEADING_1: mount = slot = document.createElement("h1") ;break | ||
case HEADING_2: mount = slot = document.createElement("h2") ;break | ||
case HEADING_3: mount = slot = document.createElement("h3") ;break | ||
case HEADING_4: mount = slot = document.createElement("h4") ;break | ||
case HEADING_5: mount = slot = document.createElement("h5") ;break | ||
case HEADING_6: mount = slot = document.createElement("h6") ;break | ||
case ITALIC_AST: | ||
case ITALIC_UND: mount = slot = document.createElement("em") ;break | ||
case ITALIC_UND: mount = slot = document.createElement("em") ;break | ||
case STRONG_AST: | ||
case STRONG_UND: mount = slot = document.createElement("strong");break | ||
case STRIKE: mount = slot = document.createElement("s") ;break | ||
case CODE_INLINE:mount = slot = document.createElement("code") ;break | ||
case LINK: mount = slot = document.createElement("a") ;break | ||
case IMAGE: mount = slot = document.createElement("img") ;break | ||
case STRONG_UND: mount = slot = document.createElement("strong") ;break | ||
case STRIKE: mount = slot = document.createElement("s") ;break | ||
case CODE_INLINE:mount = slot = document.createElement("code") ;break | ||
case LINK: mount = slot = document.createElement("a") ;break | ||
case IMAGE: mount = slot = document.createElement("img") ;break | ||
case CODE_BLOCK: | ||
@@ -674,7 +757,3 @@ case CODE_FENCE: | ||
export function default_add_text(data, text) { | ||
switch (text) { | ||
case "" : break | ||
case "\n": data.nodes[data.index].appendChild(document.createElement("br")) ;break | ||
default : data.nodes[data.index].appendChild(document.createTextNode(text)) | ||
} | ||
data.nodes[data.index].appendChild(document.createTextNode(text)) | ||
} | ||
@@ -681,0 +760,0 @@ |
@@ -20,2 +20,4 @@ import {Attr_Type, Token_Type} from './mds.js' | ||
code_fence: string | 0 | 1 | ||
/* For Blockquote parsing */ | ||
newline_blockquote_idx: number | ||
} | ||
@@ -22,0 +24,0 @@ |
{ | ||
"name": "streaming-markdown", | ||
"version": "0.0.5", | ||
"version": "0.0.6", | ||
"author": "Damian Tarnawski <gthetarnav@gmail.com>", | ||
@@ -5,0 +5,0 @@ "description": "Streaming Markdown parser, à la ChatGPT", |
# Streaming *Markdown* | ||
[](https://www.npmjs.com/package/streaming-markdown) | ||
[](https://www.npmjs.com/package/streaming-markdown) [](https://github.com/thetarnav/streaming-markdown) | ||
@@ -9,3 +9,3 @@ **Experiment making a streaming makdown parser *à la ChatGPT.*** | ||
Install [`streaming-markdown` package](https://www.npmjs.com/package/streaming-markdown) from npm. | ||
Install `streaming-markdown` package from npm. | ||
@@ -67,3 +67,3 @@ ```bash | ||
- [x] Escaping line breaks | ||
- [ ] Remove leading root spaces | ||
- [x] Trim unnecessary spaces | ||
- [x] Headers | ||
@@ -90,5 +90,9 @@ - [x] Code Block with indent | ||
- [x] src attr | ||
- [ ] Lists | ||
- [ ] Blockquotes | ||
- [ ] Horizontal rules | ||
- [ ] Unordered lists | ||
- [ ] Ordered lists | ||
- [ ] Check lists | ||
- [ ] Nested lists | ||
- [x] Blockquotes | ||
- [ ] Tables | ||
- [ ] Html tags (e.g. `<div>`, `<span>`, `<a>`, `<img>`, etc.) |
Sorry, the diff of this file is not supported yet
33464
919
95