yeast-markdown-parser
Advanced tools
Comparing version 1.5.1-devengage-2714-link-fixes.542 to 1.5.1-devengage-2714-link-fixes.543
@@ -347,3 +347,3 @@ import { YeastBlockNodeTypes, YeastNodeFactory, YeastInlineNodeTypes, isYeastNodeType, scrapeText, ContentGroupType, isYeastTextNode, isYeastNode, YeastParser } from 'yeast-core'; | ||
const LINK_REGEX = /\[([^\[\]]*)\]\((.+?)(?:\s["'](.*?)["'])?\)/gi; | ||
const LINK_REGEX = /\[([^\[\]]*(?:\\.[^\[\]]*)*)\]\((.+?)(?:\s["'](.*?)["'])?\)/gi; | ||
class InlineLinkPlugin { | ||
@@ -1004,3 +1004,24 @@ tokenize(text, parser) { | ||
const TEXT_LINK_REGEX = /https:\/\/[^ )]+/gi; | ||
class UnescapeDanglingEscapes { | ||
parse(document, parser) { | ||
document.children = unescapeStuff(document.children); | ||
return document; | ||
} | ||
} | ||
const ESCAPED_STUFF_REGEX = /\\(__|\*\*|_|\*|\||\[|\])/g; | ||
function unescapeStuff(nodes) { | ||
if (!nodes) | ||
return undefined; | ||
nodes.forEach((node) => { | ||
if (isYeastTextNode(node)) { | ||
node.text = node.text.replaceAll(ESCAPED_STUFF_REGEX, '$1'); | ||
} | ||
if (isYeastNode(node)) { | ||
node.children = unescapeStuff(node.children); | ||
} | ||
}); | ||
return nodes; | ||
} | ||
const TEXT_LINK_REGEX = /https:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)/gi; | ||
class InlineTextLinkPlugin { | ||
@@ -1010,9 +1031,16 @@ tokenize(text, parser) { | ||
for (const match of text.matchAll(TEXT_LINK_REGEX)) { | ||
let linkText = match[0]; | ||
let offset = 0; | ||
let lastChar = linkText.substring(linkText.length - 1); | ||
while (['.', ',', '!', '?', ';'].includes(lastChar)) { | ||
linkText = linkText.substring(0, linkText.length - 1); | ||
lastChar = linkText.substring(linkText.length - 1); | ||
offset++; | ||
} | ||
const node = YeastNodeFactory.CreateLinkNode(); | ||
const linkText = { text: match[0] }; | ||
node.children = [linkText]; | ||
node.href = match[0]; | ||
node.children = [{ text: linkText }]; | ||
node.href = linkText; | ||
tokens.push({ | ||
start: match.index, | ||
end: match.index + match[0].length, | ||
end: match.index + match[0].length - offset, | ||
from: 'InlineLinkPlugin', | ||
@@ -1077,2 +1105,3 @@ nodes: [node], | ||
this.registerPostProcessorPlugin(new AdjacentTextCombiner()); | ||
this.registerPostProcessorPlugin(new UnescapeDanglingEscapes()); | ||
this.registerBlockPlugin(new HeadingParserPlugin()); | ||
@@ -1079,0 +1108,0 @@ this.registerBlockPlugin(new HorizontalRuleParserPlugin()); |
{ | ||
"name": "yeast-markdown-parser", | ||
"version": "1.5.1-devengage-2714-link-fixes.542", | ||
"version": "1.5.1-devengage-2714-link-fixes.543", | ||
"description": "Parses markdown to yeAST (Yuri's Empathetic Arbitrary Syntax Tree) documents", | ||
@@ -5,0 +5,0 @@ "exports": { |
@@ -6,1 +6,3 @@ Lorem ipsum dolor sit amet, consectetur adipiscing elit. ~Nulla viverra~ tortor non diam , non `func()` fringilla nunc vestibulum. _Donec faucibus_, odio a congue mollis, arcu nunc viverra leo, vitae viverra * turpis erat at quam. Pellentesque *foobar* hendrerit ligula neque, et tempus leo bibendum a.pulvinar, nunc a malesuada dignissim, nisi **vestibulum sapien a rutrum** diam maximus est,``function() ` `` a pharetra lorem tortor ac odio. Phasellus consectetur vestibulum sapien a rutrum. ![Alt Text](path/to/image.png 'Single quote text') Mauris vel ![Alt Text](path/to/image.png "Double quote text")pharetra libero, sollicitudin volutpat erat. ![](path/to/image.png "image without alt text")Nulla pulvinar libero sed vehicula sagittis. Mauris at quam fringilla, feugiat ante eget, dictu`m nibh. Vivamus aliquet, [/Genesys/developercenter](https://developer.genesys.cloud/ 'single quote text') tellus et pretium tempus, odio enim dictum sem, ut ultricies [Api Central](https://apicentral.dev-genesys.cloud/index/ "double quote text") nibh leo a dolor. [](https://apicentral.genesys.cloud/index/ "link without alt text") Phasellus suscipit libero rhoncus, euismod nulla ac, venenatis eros*. https://developer.genesys.cloud/ | ||
Check **that \_escaped text\_ inside bold text** is reassembed into one text node. | ||
Dangling escaped markers, like \* or \__ or \|, should get unescaped. |
export const LINK_MARKDOWN = `[/Genesys/developercenter](https://developer.genesys.cloud/ 'single quote text')tellus et pretium tempus, https://developer.genesys.cloud/ | ||
Here's a recursive link: [https://genesys.com](https://genesys.com)`; | ||
Here's a recursive link: [https://genesys.com](https://genesys.com) | ||
Parsing a URL as a link https://genesys.com mid-text. | ||
URL with a trailing period: https://genesys.com/asdf. Or https://genesys.com/asdf/. Or even https://genesys.com/asdf.html. Or even this nonsense: https://a.bc.?;,! | ||
Not parsing escaped [brackets \\[inside\\] links](#asdf). | ||
`; | ||
export const LINK_AST = { | ||
@@ -56,3 +63,103 @@ type: 'document', | ||
}, | ||
{ | ||
type: 'paragraph', | ||
children: [ | ||
{ | ||
text: 'Parsing a URL as a link ', | ||
}, | ||
{ | ||
type: 'link', | ||
children: [ | ||
{ | ||
text: 'https://genesys.com', | ||
}, | ||
], | ||
href: 'https://genesys.com', | ||
}, | ||
{ | ||
text: ' mid-text.', | ||
}, | ||
], | ||
indentation: 0, | ||
}, | ||
{ | ||
type: 'paragraph', | ||
children: [ | ||
{ | ||
text: 'URL with a trailing period: ', | ||
}, | ||
{ | ||
type: 'link', | ||
children: [ | ||
{ | ||
text: 'https://genesys.com/asdf', | ||
}, | ||
], | ||
href: 'https://genesys.com/asdf', | ||
}, | ||
{ | ||
text: '. Or ', | ||
}, | ||
{ | ||
type: 'link', | ||
children: [ | ||
{ | ||
text: 'https://genesys.com/asdf/', | ||
}, | ||
], | ||
href: 'https://genesys.com/asdf/', | ||
}, | ||
{ | ||
text: '. Or even ', | ||
}, | ||
{ | ||
type: 'link', | ||
children: [ | ||
{ | ||
text: 'https://genesys.com/asdf.html', | ||
}, | ||
], | ||
href: 'https://genesys.com/asdf.html', | ||
}, | ||
{ | ||
text: '. Or even this nonsense: ', | ||
}, | ||
{ | ||
type: 'link', | ||
children: [ | ||
{ | ||
text: 'https://a.bc', | ||
}, | ||
], | ||
href: 'https://a.bc', | ||
}, | ||
{ | ||
text: '.?;,!', | ||
}, | ||
], | ||
indentation: 0, | ||
}, | ||
{ | ||
type: 'paragraph', | ||
children: [ | ||
{ | ||
text: 'Not parsing escaped ', | ||
}, | ||
{ | ||
type: 'link', | ||
children: [ | ||
{ | ||
text: 'brackets [inside] links', | ||
}, | ||
], | ||
href: '#asdf', | ||
title: 'Link', | ||
}, | ||
{ | ||
text: '.', | ||
}, | ||
], | ||
indentation: 0, | ||
}, | ||
], | ||
}; |
@@ -159,3 +159,3 @@ import * as fs from 'fs'; | ||
const ast = parser.parse(documentText); | ||
debugAST('tables', ast); | ||
// debugAST('tables', ast); | ||
@@ -387,5 +387,6 @@ // Validate AST | ||
const ast = parser.parse(documentText); | ||
// debugAST('everythinginline', ast); | ||
// Check document | ||
checkAstStructureForDefaultDocument(ast, 3); | ||
checkAstStructureForDefaultDocument(ast, 4); | ||
@@ -431,2 +432,6 @@ // Paragraph 1 | ||
expect((ast.children[2].children[2] as YeastText).text).toBeTruthy(); | ||
// Paragraph 4 - dangling unescapes | ||
expect(ast.children[3].children.length).toBe(1); | ||
expect((ast.children[3].children[0] as YeastText).text).toBe('Dangling escaped markers, like * or __ or |, should get unescaped.'); | ||
}); | ||
@@ -511,2 +516,3 @@ | ||
const ast = parser.parse(LINK_MARKDOWN); | ||
// debugAST('links', ast); | ||
@@ -513,0 +519,0 @@ expect(JSON.stringify(ast)).toBe(JSON.stringify(LINK_AST)); |
@@ -21,2 +21,3 @@ import { YeastParser } from 'yeast-core'; | ||
import { AdjacentTextCombiner } from './plugins/post/AdjacentTextCombiner'; | ||
import { UnescapeDanglingEscapes } from './plugins/post/UnescapeDanglingEscapes'; | ||
import { InlineTextLinkPlugin } from './plugins/inline/InlineTextLinkPlugin'; | ||
@@ -35,2 +36,3 @@ import { InlineImagePlugin } from './plugins/inline/InlineImagePlugin'; | ||
this.registerPostProcessorPlugin(new AdjacentTextCombiner()); | ||
this.registerPostProcessorPlugin(new UnescapeDanglingEscapes()); | ||
@@ -37,0 +39,0 @@ this.registerBlockPlugin(new HeadingParserPlugin()); |
@@ -1,13 +0,4 @@ | ||
import { | ||
InlineTokenizerPlugin, | ||
Token, | ||
YeastParser, | ||
YeastNodeFactory, | ||
YeastInlineNode, | ||
isYeastNodeType, | ||
YeastInlineNodeTypes, | ||
scrapeText, | ||
} from 'yeast-core'; | ||
import { InlineTokenizerPlugin, Token, YeastParser, YeastNodeFactory, isYeastNodeType, YeastInlineNodeTypes, scrapeText } from 'yeast-core'; | ||
const LINK_REGEX = /\[([^\[\]]*)\]\((.+?)(?:\s["'](.*?)["'])?\)/gi; | ||
const LINK_REGEX = /\[([^\[\]]*(?:\\.[^\[\]]*)*)\]\((.+?)(?:\s["'](.*?)["'])?\)/gi; | ||
@@ -18,3 +9,3 @@ export class InlineLinkPlugin implements InlineTokenizerPlugin { | ||
for (const match of text.matchAll(LINK_REGEX)) { | ||
//Don't process if it's an image | ||
// Don't process if it's an image | ||
if (text.charAt(match.index - 1) === '!') { | ||
@@ -24,3 +15,3 @@ continue; | ||
const node = YeastNodeFactory.CreateLinkNode(); | ||
//if alt text is empty use target as alt text | ||
// If alt text is empty use target as alt text | ||
if (match[1].length > 0) { | ||
@@ -27,0 +18,0 @@ node.children = parser.parseInline(match[1]); |
import { InlineTokenizerPlugin, Token, YeastParser, YeastNodeFactory } from 'yeast-core'; | ||
const TEXT_LINK_REGEX = /https:\/\/[^ )]+/gi; | ||
const TEXT_LINK_REGEX = /https:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)/gi; | ||
export class InlineTextLinkPlugin implements InlineTokenizerPlugin { | ||
@@ -10,10 +11,20 @@ tokenize(text: string, parser: YeastParser): void | Token[] { | ||
for (const match of text.matchAll(TEXT_LINK_REGEX)) { | ||
let linkText = match[0]; | ||
let offset = 0; | ||
// Strip off trailing punctuation characters; they're probably not supposed to be part of the link | ||
let lastChar = linkText.substring(linkText.length - 1); | ||
while (['.', ',', '!', '?', ';'].includes(lastChar)) { | ||
linkText = linkText.substring(0, linkText.length - 1); | ||
lastChar = linkText.substring(linkText.length - 1); | ||
offset++; | ||
} | ||
// Create link node | ||
const node = YeastNodeFactory.CreateLinkNode(); | ||
const linkText = { text: match[0] }; | ||
node.children = [linkText]; | ||
node.href = match[0]; | ||
node.children = [{ text: linkText }]; | ||
node.href = linkText; | ||
tokens.push({ | ||
start: match.index, | ||
end: match.index + match[0].length, | ||
end: match.index + match[0].length - offset, | ||
from: 'InlineLinkPlugin', | ||
@@ -20,0 +31,0 @@ nodes: [node], |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
201904
102
4825