yeast-markdown-parser
Advanced tools
Comparing version 1.5.1-devengage-2714-link-fixes.543 to 1.5.1-devengage-2714-link-fixes.544
@@ -69,6 +69,6 @@ import { YeastBlockNodeTypes, YeastNodeFactory, YeastInlineNodeTypes, isYeastNodeType, scrapeText, ContentGroupType, isYeastTextNode, isYeastNode, YeastParser } from 'yeast-core'; | ||
const ITALICS_REGEX_UNDERSCORES = /(\\?)(_)([^\s_]|\S.*?\S)(\\?)(_)/gi; | ||
const ITALICS_REGEX_ASTERISKS = /(\\?)(\*)([^\s_]|\S.*?\S)(\\?)(\*)/gi; | ||
const BOLD_REGEX_UNDERSCORES = /(\\?)(__)([^\s_]|\S.*?\S)(\\?)(__)/gi; | ||
const BOLD_REGEX_ASTERISKS = /(\\?)(\*\*)([^\s_]|\S.*?\S)(\\?)(\*\*)/gi; | ||
const ITALICS_REGEX_UNDERSCORES = /(?:^|([^\\]))_(?:(\\_|[^\\\s][^_\s]?)_|((?:\\_|\S)(?:\\_|[^_])*?[^\s\\])_)/gi; | ||
const ITALICS_REGEX_ASTERISKS = /(?:^|([^\\]))\*(?:(\\\*|[^\\\s][^\*\s]?)\*|((?:\\\*|\S)(?:\\\*|[^\*])*?[^\s\\])\*)/gi; | ||
const BOLD_REGEX_UNDERSCORES = /(?:^|([^\\]))__(?:(\\_|[^\\\s][^_\s]?)__|((?:\\_|\S)(?:\\_|[^_])*?[^\s\\])__)/gi; | ||
const BOLD_REGEX_ASTERISKS = /(?:^|([^\\]))\*\*(?:(\\\*|[^\\\s][^\*\s]?)\*\*|((?:\\\*|\S)(?:\\\*|[^\*])*?[^\s\\])\*\*)/gi; | ||
class InlineEmphasisPlugin { | ||
@@ -78,13 +78,9 @@ tokenize(text, parser) { | ||
const parseMatch = (match, nodeType) => { | ||
let node; | ||
if (match.length == 6 && match[1] && match[4]) { | ||
node = YeastNodeFactory.CreateText(); | ||
node.text = `${match[2]}${match[3]}${match[5]}`; | ||
} | ||
else { | ||
node = YeastNodeFactory.Create(nodeType); | ||
node.children = parser.parseInline(match[3]); | ||
} | ||
if (match.length < 4 || (!match[2] && !match[3])) | ||
return; | ||
const startOffset = (match[1] || '').length; | ||
let node = YeastNodeFactory.Create(nodeType); | ||
node.children = parser.parseInline(match[2] || match[3]); | ||
tokens.push({ | ||
start: match.index, | ||
start: match.index + startOffset, | ||
end: match.index + match[0].length, | ||
@@ -107,36 +103,2 @@ from: 'InlineEmphasisPlugin', | ||
} | ||
const textArr = text.split(''); | ||
let index = 0; | ||
while (index < textArr.length) { | ||
if (textArr[index] === '*' && textArr[index + 1] !== '*' && textArr[index + 1] !== ' ') { | ||
if (index >= 0 && textArr[index - 1] !== '*') { | ||
let italizedText = ''; | ||
let startIndex = index; | ||
let isInvalidSyntax = false; | ||
do { | ||
if (textArr[index + 1]) { | ||
italizedText += textArr[++index]; | ||
} | ||
else { | ||
isInvalidSyntax = true; | ||
break; | ||
} | ||
} while (textArr[index + 1] !== '*' && index < textArr.length); | ||
if (isInvalidSyntax) { | ||
index++; | ||
continue; | ||
} | ||
const node = YeastNodeFactory.CreateItalicNode(); | ||
node.children = parser.parseInline(italizedText); | ||
tokens.push({ | ||
start: startIndex, | ||
end: startIndex + italizedText.length + 2, | ||
from: 'ItalicsInlinePlugin', | ||
nodes: [node], | ||
}); | ||
index++; | ||
} | ||
} | ||
index++; | ||
} | ||
return tokens; | ||
@@ -350,3 +312,3 @@ } | ||
const LINK_REGEX = /\[([^\[\]]*(?:\\.[^\[\]]*)*)\]\((.+?)(?:\s["'](.*?)["'])?\)/gi; | ||
const LINK_REGEX = /\[\s*([^\[\]]*?(?:\\.[^\[\]]*?)*?)\s*\]\(\s*(\S+)(?:\s+["']\s*(.*?)\s*["'])?\s*\)/gi; | ||
class InlineLinkPlugin { | ||
@@ -356,3 +318,4 @@ tokenize(text, parser) { | ||
for (const match of text.matchAll(LINK_REGEX)) { | ||
if (text.charAt(match.index - 1) === '!') { | ||
const charBefore = text.charAt(match.index - 1); | ||
if (charBefore === '!' || charBefore === '\\') { | ||
continue; | ||
@@ -359,0 +322,0 @@ } |
{ | ||
"name": "yeast-markdown-parser", | ||
"version": "1.5.1-devengage-2714-link-fixes.543", | ||
"version": "1.5.1-devengage-2714-link-fixes.544", | ||
"description": "Parses markdown to yeAST (Yuri's Empathetic Arbitrary Syntax Tree) documents", | ||
@@ -5,0 +5,0 @@ "exports": { |
Lorem ipsum dolor sit amet, consectetur adipiscing elit. ~Nulla viverra~ tortor non diam , non `func()` fringilla nunc vestibulum. _Donec faucibus_, odio a congue mollis, arcu nunc viverra leo, vitae viverra * turpis erat at quam. Pellentesque *foobar* hendrerit ligula neque, et tempus leo bibendum a.pulvinar, nunc a malesuada dignissim, nisi **vestibulum sapien a rutrum** diam maximus est,``function() ` `` a pharetra lorem tortor ac odio. Phasellus consectetur vestibulum sapien a rutrum. ![Alt Text](path/to/image.png 'Single quote text') Mauris vel ![Alt Text](path/to/image.png "Double quote text")pharetra libero, sollicitudin volutpat erat. ![](path/to/image.png "image without alt text")Nulla pulvinar libero sed vehicula sagittis. Mauris at quam fringilla, feugiat ante eget, dictu`m nibh. Vivamus aliquet, [/Genesys/developercenter](https://developer.genesys.cloud/ 'single quote text') tellus et pretium tempus, odio enim dictum sem, ut ultricies [Api Central](https://apicentral.dev-genesys.cloud/index/ "double quote text") nibh leo a dolor. [](https://apicentral.genesys.cloud/index/ "link without alt text") Phasellus suscipit libero rhoncus, euismod nulla ac, venenatis eros*. https://developer.genesys.cloud/ | ||
This paragraph tests _non_standard touching use cases. Sin_gle_ cha*racters* _i_n *wo*rds are f_o_r i*talic*s. Characters * surrounded * by _ whitespace _ are ignored. Es\_ca\_ped ch\*ar\*acters a\__re\__ n\**ot\** \**parsed out\** \_but\_ \*are\* \__rendered without\__ escape characters. Dou__ble__ cha**racters** __i__n **wo**rds are f__o__r b**ol**d. | ||
This paragraph tests _non_standard touching use cases. Sin_gle_ cha*racters* _i_n *wo*rds are f_o_r i*talic*s. Characters * surrounded * by _ whitespace _ are ignored. Es\_ca\_ped ch\*ar\*acters a\_\_re\_\_ n\*\*ot\*\* \*\*parsed out\*\* \_but\_ \*are\* \_\_rendered without\_\_ escape characters. Dou__ble__ cha**racters** __i__n **wo**rds are f__o__r b**ol**d. | ||
Check **that \_escaped text\_ inside bold text** is reassembed into one text node. | ||
Dangling escaped markers, like \* or \__ or \|, should get unescaped. | ||
Dangling escaped markers, like \* or \_ or \|, should get unescaped. | ||
__bold underscore__ escape use cases 1) __bold text__ 2) __bold\_\_text__ 3) __bold\_text__ 4) \_\_notbold__isbold__ 5) __isbold__notbold\_\_ 6) __b__ __bb__ __bbb__ 7) __\___ | ||
**bold asterisk** escape use cases 1) **bold text** 2) **bold\*\*text** 3) **bold\*text** 4) \*\*notbold**isbold** 5) **isbold**notbold\*\* 6) **b** **bb** **bbb** 7) **\*** | ||
_italic underscore_ escape use cases 1) _italic text_ 2) _italic\_\_text_ 3) _italic\_text_ 4) \_notitalic_isitalic_ 5) _isitalic_notitalic\_ 6) _b_ _bb_ _bbb_ 7) _\__ 8) _ notitalic_ and _notitalic _ and _ _ | ||
*italic asterisk* escape use cases 1) *italic text* 2) *italic\*\*text* 3) *italic\*text* 4) \*notitalic*isitalic* 5) *isitalic*notitalic\* 6) *b* *bb* *bbb* 7) *\** 8) * notitalic* and *notitalic * and * * |
@@ -10,2 +10,8 @@ export const LINK_MARKDOWN = `[/Genesys/developercenter](https://developer.genesys.cloud/ 'single quote text')tellus et pretium tempus, https://developer.genesys.cloud/ | ||
Not parsing escaped [brackets \\[inside\\] links](#asdf). | ||
This is not a link because it's not escaped: \\[brackets \\[inside\\] NOT links](#asdf). why \] ok. | ||
Links with whitespace [ why do you do this ]( #somewhere ). | ||
and alt text [ lots more poor spacing ]( #somewhere " goes to somewhere's special. ' ). | ||
`; | ||
@@ -164,3 +170,56 @@ | ||
}, | ||
{ | ||
type: 'paragraph', | ||
children: [ | ||
{ | ||
text: "This is not a link because it's not escaped: [brackets [inside] NOT links](#asdf). why ] ok.", | ||
}, | ||
], | ||
indentation: 0, | ||
}, | ||
{ | ||
type: 'paragraph', | ||
children: [ | ||
{ | ||
text: 'Links with whitespace ', | ||
}, | ||
{ | ||
type: 'link', | ||
children: [ | ||
{ | ||
text: 'why do you do this', | ||
}, | ||
], | ||
href: '#somewhere', | ||
title: 'Link', | ||
}, | ||
{ | ||
text: '.', | ||
}, | ||
], | ||
indentation: 0, | ||
}, | ||
{ | ||
type: 'paragraph', | ||
children: [ | ||
{ | ||
text: 'and alt text ', | ||
}, | ||
{ | ||
type: 'link', | ||
children: [ | ||
{ | ||
text: 'lots more poor spacing', | ||
}, | ||
], | ||
href: '#somewhere', | ||
title: "goes to somewhere's special.", | ||
}, | ||
{ | ||
text: '.', | ||
}, | ||
], | ||
indentation: 0, | ||
}, | ||
], | ||
}; |
@@ -43,2 +43,3 @@ import * as fs from 'fs'; | ||
import { TABLE_AST } from '../resources/table-data'; | ||
import { EVERYTHING_INLINE_AST } from '../resources/everythinginline'; | ||
@@ -390,47 +391,6 @@ const standardBlockPluginCount = 10; | ||
// Check document | ||
checkAstStructureForDefaultDocument(ast, 4); | ||
checkAstStructureForDefaultDocument(ast, 8); | ||
// Paragraph 1 | ||
expect(ast.children[0].children.length).toBe(26); | ||
expect((ast.children[0].children[1] as StrikethroughNode).type).toBe(YeastInlineNodeTypes.Strikethrough); | ||
expect((ast.children[0].children[3] as InlineCodeNode).type).toBe(YeastInlineNodeTypes.Code); | ||
expect((ast.children[0].children[5] as ItalicNode).type).toBe(YeastInlineNodeTypes.Italic); | ||
expect((ast.children[0].children[7] as ItalicNode).type).toBe(YeastInlineNodeTypes.Italic); | ||
expect((ast.children[0].children[9] as BoldNode).type).toBe(YeastInlineNodeTypes.Bold); | ||
expect((ast.children[0].children[11] as InlineCodeNode).type).toBe(YeastInlineNodeTypes.Code); | ||
expect((ast.children[0].children[13] as ImageNode).type).toBe(YeastInlineNodeTypes.Image); | ||
expect((ast.children[0].children[15] as ImageNode).type).toBe(YeastInlineNodeTypes.Image); | ||
expect((ast.children[0].children[17] as ImageNode).type).toBe(YeastInlineNodeTypes.Image); | ||
expect((ast.children[0].children[19] as LinkNode).type).toBe(YeastInlineNodeTypes.Link); | ||
expect((ast.children[0].children[21] as LinkNode).type).toBe(YeastInlineNodeTypes.Link); | ||
expect((ast.children[0].children[23] as LinkNode).type).toBe(YeastInlineNodeTypes.Link); | ||
expect((ast.children[0].children[25] as LinkNode).type).toBe(YeastInlineNodeTypes.Link); | ||
// Paragraph 2 - test bold and italic formats and escaping | ||
expect(ast.children[1].children.length).toBe(27); | ||
expect((ast.children[1].children[1] as ItalicNode).type).toBe(YeastInlineNodeTypes.Italic); | ||
expect((ast.children[1].children[3] as ItalicNode).type).toBe(YeastInlineNodeTypes.Italic); | ||
expect((ast.children[1].children[5] as ItalicNode).type).toBe(YeastInlineNodeTypes.Italic); | ||
expect((ast.children[1].children[7] as ItalicNode).type).toBe(YeastInlineNodeTypes.Italic); | ||
expect((ast.children[1].children[9] as ItalicNode).type).toBe(YeastInlineNodeTypes.Italic); | ||
expect((ast.children[1].children[11] as ItalicNode).type).toBe(YeastInlineNodeTypes.Italic); | ||
expect((ast.children[1].children[13] as ItalicNode).type).toBe(YeastInlineNodeTypes.Italic); | ||
expect((ast.children[1].children[15] as BoldNode).type).toBe(YeastInlineNodeTypes.Bold); | ||
expect((ast.children[1].children[17] as BoldNode).type).toBe(YeastInlineNodeTypes.Bold); | ||
expect((ast.children[1].children[19] as BoldNode).type).toBe(YeastInlineNodeTypes.Bold); | ||
expect((ast.children[1].children[21] as BoldNode).type).toBe(YeastInlineNodeTypes.Bold); | ||
expect((ast.children[1].children[23] as BoldNode).type).toBe(YeastInlineNodeTypes.Bold); | ||
expect((ast.children[1].children[25] as BoldNode).type).toBe(YeastInlineNodeTypes.Bold); | ||
// Paragraph 3 -- make sure text reassembly works for inline elements (paragraph above tests for blocks) | ||
expect(ast.children[2].children.length).toBe(3); | ||
expect((ast.children[2].children[0] as YeastText).text).toBeTruthy(); | ||
expect((ast.children[2].children[1] as BoldNode).type).toBe(YeastInlineNodeTypes.Bold); | ||
expect((ast.children[2].children[1] as BoldNode).children.length).toBe(1); | ||
expect(((ast.children[2].children[1] as BoldNode).children[0] as YeastText).text).toBeTruthy(); | ||
expect((ast.children[2].children[2] as YeastText).text).toBeTruthy(); | ||
// Paragraph 4 - dangling unescapes | ||
expect(ast.children[3].children.length).toBe(1); | ||
expect((ast.children[3].children[0] as YeastText).text).toBe('Dangling escaped markers, like * or __ or |, should get unescaped.'); | ||
// Validate AST | ||
expect(JSON.stringify(ast)).toBe(JSON.stringify(EVERYTHING_INLINE_AST)); | ||
}); | ||
@@ -437,0 +397,0 @@ |
@@ -11,6 +11,13 @@ import { | ||
const ITALICS_REGEX_UNDERSCORES = /(\\?)(_)([^\s_]|\S.*?\S)(\\?)(_)/gi; | ||
const ITALICS_REGEX_ASTERISKS = /(\\?)(\*)([^\s_]|\S.*?\S)(\\?)(\*)/gi; | ||
const BOLD_REGEX_UNDERSCORES = /(\\?)(__)([^\s_]|\S.*?\S)(\\?)(__)/gi; | ||
const BOLD_REGEX_ASTERISKS = /(\\?)(\*\*)([^\s_]|\S.*?\S)(\\?)(\*\*)/gi; | ||
/** | ||
* These expressions look for text between the various emphasis marker types with the following caveats: | ||
* - the beginning of the match is the beginning of the string or not an escape slash | ||
* - the next character is the opening marker | ||
* - the first character after the opening marker is not whitespace | ||
* - the last character before the closing marker is not whitespace | ||
*/ | ||
const ITALICS_REGEX_UNDERSCORES = /(?:^|([^\\]))_(?:(\\_|[^\\\s][^_\s]?)_|((?:\\_|\S)(?:\\_|[^_])*?[^\s\\])_)/gi; | ||
const ITALICS_REGEX_ASTERISKS = /(?:^|([^\\]))\*(?:(\\\*|[^\\\s][^\*\s]?)\*|((?:\\\*|\S)(?:\\\*|[^\*])*?[^\s\\])\*)/gi; | ||
const BOLD_REGEX_UNDERSCORES = /(?:^|([^\\]))__(?:(\\_|[^\\\s][^_\s]?)__|((?:\\_|\S)(?:\\_|[^_])*?[^\s\\])__)/gi; | ||
const BOLD_REGEX_ASTERISKS = /(?:^|([^\\]))\*\*(?:(\\\*|[^\\\s][^\*\s]?)\*\*|((?:\\\*|\S)(?:\\\*|[^\*])*?[^\s\\])\*\*)/gi; | ||
@@ -25,19 +32,13 @@ /** | ||
/** | ||
* match[1] -> (optional) escape slash | ||
* match[2] -> opening characters | ||
* match[3] -> encased text | ||
* match[4] -> (optional) escape slash | ||
* match[5] -> closing characters | ||
* match[1] -> preceding character | ||
* match[2] -> encased text (1 char) | ||
* match[3] -> encased text (2+ chars) | ||
*/ | ||
const parseMatch = (match: RegExpMatchArray, nodeType: YeastInlineNodeTypes) => { | ||
let node: YeastInlineChild; | ||
if (match.length == 6 && match[1] && match[4]) { | ||
node = YeastNodeFactory.CreateText(); | ||
node.text = `${match[2]}${match[3]}${match[5]}`; | ||
} else { | ||
node = YeastNodeFactory.Create(nodeType) as YeastInlineNode; | ||
node.children = parser.parseInline(match[3]); | ||
} | ||
if (match.length < 4 || (!match[2] && !match[3])) return; | ||
const startOffset = (match[1] || '').length; | ||
let node = YeastNodeFactory.Create(nodeType) as YeastInlineNode; | ||
node.children = parser.parseInline(match[2] || match[3]); | ||
tokens.push({ | ||
start: match.index, | ||
start: match.index + startOffset, | ||
end: match.index + match[0].length, | ||
@@ -65,39 +66,4 @@ from: 'InlineEmphasisPlugin', | ||
//Parse for asterisk syntax | ||
const textArr = text.split(''); | ||
let index = 0; | ||
while (index < textArr.length) { | ||
if (textArr[index] === '*' && textArr[index + 1] !== '*' && textArr[index + 1] !== ' ') { | ||
if (index >= 0 && textArr[index - 1] !== '*') { | ||
let italizedText = ''; | ||
let startIndex = index; | ||
let isInvalidSyntax = false; | ||
do { | ||
if (textArr[index + 1]) { | ||
italizedText += textArr[++index]; | ||
} else { | ||
//There was probably no closing tag | ||
isInvalidSyntax = true; | ||
break; | ||
} | ||
} while (textArr[index + 1] !== '*' && index < textArr.length); | ||
if (isInvalidSyntax) { | ||
index++; | ||
continue; | ||
} | ||
const node = YeastNodeFactory.CreateItalicNode(); | ||
node.children = parser.parseInline(italizedText); | ||
tokens.push({ | ||
start: startIndex, | ||
end: startIndex + italizedText.length + 2, | ||
from: 'ItalicsInlinePlugin', | ||
nodes: [node], | ||
}); | ||
index++; | ||
} | ||
} | ||
index++; | ||
} | ||
return tokens; | ||
} | ||
} |
import { InlineTokenizerPlugin, Token, YeastParser, YeastNodeFactory, isYeastNodeType, YeastInlineNodeTypes, scrapeText } from 'yeast-core'; | ||
const LINK_REGEX = /\[([^\[\]]*(?:\\.[^\[\]]*)*)\]\((.+?)(?:\s["'](.*?)["'])?\)/gi; | ||
const LINK_REGEX = /\[\s*([^\[\]]*?(?:\\.[^\[\]]*?)*?)\s*\]\(\s*(\S+)(?:\s+["']\s*(.*?)\s*["'])?\s*\)/gi; | ||
@@ -9,4 +9,5 @@ export class InlineLinkPlugin implements InlineTokenizerPlugin { | ||
for (const match of text.matchAll(LINK_REGEX)) { | ||
// Don't process if it's an image | ||
if (text.charAt(match.index - 1) === '!') { | ||
// Don't process if it's an image or escaped | ||
const charBefore = text.charAt(match.index - 1); | ||
if (charBefore === '!' || charBefore === '\\') { | ||
continue; | ||
@@ -13,0 +14,0 @@ } |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
209923
103
5569