Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

yeast-markdown-parser

Package Overview
Dependencies
Maintainers
0
Versions
64
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

yeast-markdown-parser - npm Package Compare versions

Comparing version 1.5.1-devengage-2714-link-fixes.543 to 1.5.1-devengage-2714-link-fixes.544

src/__tests__/resources/everythinginline.ts

63

dist/index.js

@@ -69,6 +69,6 @@ import { YeastBlockNodeTypes, YeastNodeFactory, YeastInlineNodeTypes, isYeastNodeType, scrapeText, ContentGroupType, isYeastTextNode, isYeastNode, YeastParser } from 'yeast-core';

const ITALICS_REGEX_UNDERSCORES = /(\\?)(_)([^\s_]|\S.*?\S)(\\?)(_)/gi;
const ITALICS_REGEX_ASTERISKS = /(\\?)(\*)([^\s_]|\S.*?\S)(\\?)(\*)/gi;
const BOLD_REGEX_UNDERSCORES = /(\\?)(__)([^\s_]|\S.*?\S)(\\?)(__)/gi;
const BOLD_REGEX_ASTERISKS = /(\\?)(\*\*)([^\s_]|\S.*?\S)(\\?)(\*\*)/gi;
const ITALICS_REGEX_UNDERSCORES = /(?:^|([^\\]))_(?:(\\_|[^\\\s][^_\s]?)_|((?:\\_|\S)(?:\\_|[^_])*?[^\s\\])_)/gi;
const ITALICS_REGEX_ASTERISKS = /(?:^|([^\\]))\*(?:(\\\*|[^\\\s][^\*\s]?)\*|((?:\\\*|\S)(?:\\\*|[^\*])*?[^\s\\])\*)/gi;
const BOLD_REGEX_UNDERSCORES = /(?:^|([^\\]))__(?:(\\_|[^\\\s][^_\s]?)__|((?:\\_|\S)(?:\\_|[^_])*?[^\s\\])__)/gi;
const BOLD_REGEX_ASTERISKS = /(?:^|([^\\]))\*\*(?:(\\\*|[^\\\s][^\*\s]?)\*\*|((?:\\\*|\S)(?:\\\*|[^\*])*?[^\s\\])\*\*)/gi;
class InlineEmphasisPlugin {

@@ -78,13 +78,9 @@ tokenize(text, parser) {

const parseMatch = (match, nodeType) => {
let node;
if (match.length == 6 && match[1] && match[4]) {
node = YeastNodeFactory.CreateText();
node.text = `${match[2]}${match[3]}${match[5]}`;
}
else {
node = YeastNodeFactory.Create(nodeType);
node.children = parser.parseInline(match[3]);
}
if (match.length < 4 || (!match[2] && !match[3]))
return;
const startOffset = (match[1] || '').length;
let node = YeastNodeFactory.Create(nodeType);
node.children = parser.parseInline(match[2] || match[3]);
tokens.push({
start: match.index,
start: match.index + startOffset,
end: match.index + match[0].length,

@@ -107,36 +103,2 @@ from: 'InlineEmphasisPlugin',

}
const textArr = text.split('');
let index = 0;
while (index < textArr.length) {
if (textArr[index] === '*' && textArr[index + 1] !== '*' && textArr[index + 1] !== ' ') {
if (index >= 0 && textArr[index - 1] !== '*') {
let italizedText = '';
let startIndex = index;
let isInvalidSyntax = false;
do {
if (textArr[index + 1]) {
italizedText += textArr[++index];
}
else {
isInvalidSyntax = true;
break;
}
} while (textArr[index + 1] !== '*' && index < textArr.length);
if (isInvalidSyntax) {
index++;
continue;
}
const node = YeastNodeFactory.CreateItalicNode();
node.children = parser.parseInline(italizedText);
tokens.push({
start: startIndex,
end: startIndex + italizedText.length + 2,
from: 'ItalicsInlinePlugin',
nodes: [node],
});
index++;
}
}
index++;
}
return tokens;

@@ -350,3 +312,3 @@ }

const LINK_REGEX = /\[([^\[\]]*(?:\\.[^\[\]]*)*)\]\((.+?)(?:\s["'](.*?)["'])?\)/gi;
const LINK_REGEX = /\[\s*([^\[\]]*?(?:\\.[^\[\]]*?)*?)\s*\]\(\s*(\S+)(?:\s+["']\s*(.*?)\s*["'])?\s*\)/gi;
class InlineLinkPlugin {

@@ -356,3 +318,4 @@ tokenize(text, parser) {

for (const match of text.matchAll(LINK_REGEX)) {
if (text.charAt(match.index - 1) === '!') {
const charBefore = text.charAt(match.index - 1);
if (charBefore === '!' || charBefore === '\\') {
continue;

@@ -359,0 +322,0 @@ }

{
"name": "yeast-markdown-parser",
"version": "1.5.1-devengage-2714-link-fixes.543",
"version": "1.5.1-devengage-2714-link-fixes.544",
"description": "Parses markdown to yeAST (Yuri's Empathetic Arbitrary Syntax Tree) documents",

@@ -5,0 +5,0 @@ "exports": {

Lorem ipsum dolor sit amet, consectetur adipiscing elit. ~Nulla viverra~ tortor non diam , non `func()` fringilla nunc vestibulum. _Donec faucibus_, odio a congue mollis, arcu nunc viverra leo, vitae viverra * turpis erat at quam. Pellentesque *foobar* hendrerit ligula neque, et tempus leo bibendum a.pulvinar, nunc a malesuada dignissim, nisi **vestibulum sapien a rutrum** diam maximus est,``function() ` `` a pharetra lorem tortor ac odio. Phasellus consectetur vestibulum sapien a rutrum. ![Alt Text](path/to/image.png 'Single quote text') Mauris vel ![Alt Text](path/to/image.png "Double quote text")pharetra libero, sollicitudin volutpat erat. ![](path/to/image.png "image without alt text")Nulla pulvinar libero sed vehicula sagittis. Mauris at quam fringilla, feugiat ante eget, dictu`m nibh. Vivamus aliquet, [/Genesys/developercenter](https://developer.genesys.cloud/ 'single quote text') tellus et pretium tempus, odio enim dictum sem, ut ultricies [Api Central](https://apicentral.dev-genesys.cloud/index/ "double quote text") nibh leo a dolor. [](https://apicentral.genesys.cloud/index/ "link without alt text") Phasellus suscipit libero rhoncus, euismod nulla ac, venenatis eros*. https://developer.genesys.cloud/
This paragraph tests _non_standard touching use cases. Sin_gle_ cha*racters* _i_n *wo*rds are f_o_r i*talic*s. Characters * surrounded * by _ whitespace _ are ignored. Es\_ca\_ped ch\*ar\*acters a\__re\__ n\**ot\** \**parsed out\** \_but\_ \*are\* \__rendered without\__ escape characters. Dou__ble__ cha**racters** __i__n **wo**rds are f__o__r b**ol**d.
This paragraph tests _non_standard touching use cases. Sin_gle_ cha*racters* _i_n *wo*rds are f_o_r i*talic*s. Characters * surrounded * by _ whitespace _ are ignored. Es\_ca\_ped ch\*ar\*acters a\_\_re\_\_ n\*\*ot\*\* \*\*parsed out\*\* \_but\_ \*are\* \_\_rendered without\_\_ escape characters. Dou__ble__ cha**racters** __i__n **wo**rds are f__o__r b**ol**d.
Check **that \_escaped text\_ inside bold text** is reassembed into one text node.
Dangling escaped markers, like \* or \__ or \|, should get unescaped.
Dangling escaped markers, like \* or \_ or \|, should get unescaped.
__bold underscore__ escape use cases 1) __bold text__ 2) __bold\_\_text__ 3) __bold\_text__ 4) \_\_notbold__isbold__ 5) __isbold__notbold\_\_ 6) __b__ __bb__ __bbb__ 7) __\___
**bold asterisk** escape use cases 1) **bold text** 2) **bold\*\*text** 3) **bold\*text** 4) \*\*notbold**isbold** 5) **isbold**notbold\*\* 6) **b** **bb** **bbb** 7) **\***
_italic underscore_ escape use cases 1) _italic text_ 2) _italic\_\_text_ 3) _italic\_text_ 4) \_notitalic_isitalic_ 5) _isitalic_notitalic\_ 6) _b_ _bb_ _bbb_ 7) _\__ 8) _ notitalic_ and _notitalic _ and _ _
*italic asterisk* escape use cases 1) *italic text* 2) *italic\*\*text* 3) *italic\*text* 4) \*notitalic*isitalic* 5) *isitalic*notitalic\* 6) *b* *bb* *bbb* 7) *\** 8) * notitalic* and *notitalic * and * *

@@ -10,2 +10,8 @@ export const LINK_MARKDOWN = `[/Genesys/developercenter](https://developer.genesys.cloud/ 'single quote text')tellus et pretium tempus, https://developer.genesys.cloud/

Not parsing escaped [brackets \\[inside\\] links](#asdf).
This is not a link because it's not escaped: \\[brackets \\[inside\\] NOT links](#asdf). why \] ok.
Links with whitespace [ why do you do this ]( #somewhere ).
and alt text [ lots more poor spacing ]( #somewhere " goes to somewhere's special. ' ).
`;

@@ -164,3 +170,56 @@

},
{
type: 'paragraph',
children: [
{
text: "This is not a link because it's not escaped: [brackets [inside] NOT links](#asdf). why ] ok.",
},
],
indentation: 0,
},
{
type: 'paragraph',
children: [
{
text: 'Links with whitespace ',
},
{
type: 'link',
children: [
{
text: 'why do you do this',
},
],
href: '#somewhere',
title: 'Link',
},
{
text: '.',
},
],
indentation: 0,
},
{
type: 'paragraph',
children: [
{
text: 'and alt text ',
},
{
type: 'link',
children: [
{
text: 'lots more poor spacing',
},
],
href: '#somewhere',
title: "goes to somewhere's special.",
},
{
text: '.',
},
],
indentation: 0,
},
],
};

@@ -43,2 +43,3 @@ import * as fs from 'fs';

import { TABLE_AST } from '../resources/table-data';
import { EVERYTHING_INLINE_AST } from '../resources/everythinginline';

@@ -390,47 +391,6 @@ const standardBlockPluginCount = 10;

// Check document
checkAstStructureForDefaultDocument(ast, 4);
checkAstStructureForDefaultDocument(ast, 8);
// Paragraph 1
expect(ast.children[0].children.length).toBe(26);
expect((ast.children[0].children[1] as StrikethroughNode).type).toBe(YeastInlineNodeTypes.Strikethrough);
expect((ast.children[0].children[3] as InlineCodeNode).type).toBe(YeastInlineNodeTypes.Code);
expect((ast.children[0].children[5] as ItalicNode).type).toBe(YeastInlineNodeTypes.Italic);
expect((ast.children[0].children[7] as ItalicNode).type).toBe(YeastInlineNodeTypes.Italic);
expect((ast.children[0].children[9] as BoldNode).type).toBe(YeastInlineNodeTypes.Bold);
expect((ast.children[0].children[11] as InlineCodeNode).type).toBe(YeastInlineNodeTypes.Code);
expect((ast.children[0].children[13] as ImageNode).type).toBe(YeastInlineNodeTypes.Image);
expect((ast.children[0].children[15] as ImageNode).type).toBe(YeastInlineNodeTypes.Image);
expect((ast.children[0].children[17] as ImageNode).type).toBe(YeastInlineNodeTypes.Image);
expect((ast.children[0].children[19] as LinkNode).type).toBe(YeastInlineNodeTypes.Link);
expect((ast.children[0].children[21] as LinkNode).type).toBe(YeastInlineNodeTypes.Link);
expect((ast.children[0].children[23] as LinkNode).type).toBe(YeastInlineNodeTypes.Link);
expect((ast.children[0].children[25] as LinkNode).type).toBe(YeastInlineNodeTypes.Link);
// Paragraph 2 - test bold and italic formats and escaping
expect(ast.children[1].children.length).toBe(27);
expect((ast.children[1].children[1] as ItalicNode).type).toBe(YeastInlineNodeTypes.Italic);
expect((ast.children[1].children[3] as ItalicNode).type).toBe(YeastInlineNodeTypes.Italic);
expect((ast.children[1].children[5] as ItalicNode).type).toBe(YeastInlineNodeTypes.Italic);
expect((ast.children[1].children[7] as ItalicNode).type).toBe(YeastInlineNodeTypes.Italic);
expect((ast.children[1].children[9] as ItalicNode).type).toBe(YeastInlineNodeTypes.Italic);
expect((ast.children[1].children[11] as ItalicNode).type).toBe(YeastInlineNodeTypes.Italic);
expect((ast.children[1].children[13] as ItalicNode).type).toBe(YeastInlineNodeTypes.Italic);
expect((ast.children[1].children[15] as BoldNode).type).toBe(YeastInlineNodeTypes.Bold);
expect((ast.children[1].children[17] as BoldNode).type).toBe(YeastInlineNodeTypes.Bold);
expect((ast.children[1].children[19] as BoldNode).type).toBe(YeastInlineNodeTypes.Bold);
expect((ast.children[1].children[21] as BoldNode).type).toBe(YeastInlineNodeTypes.Bold);
expect((ast.children[1].children[23] as BoldNode).type).toBe(YeastInlineNodeTypes.Bold);
expect((ast.children[1].children[25] as BoldNode).type).toBe(YeastInlineNodeTypes.Bold);
// Paragraph 3 -- make sure text reassembly works for inline elements (paragraph above tests for blocks)
expect(ast.children[2].children.length).toBe(3);
expect((ast.children[2].children[0] as YeastText).text).toBeTruthy();
expect((ast.children[2].children[1] as BoldNode).type).toBe(YeastInlineNodeTypes.Bold);
expect((ast.children[2].children[1] as BoldNode).children.length).toBe(1);
expect(((ast.children[2].children[1] as BoldNode).children[0] as YeastText).text).toBeTruthy();
expect((ast.children[2].children[2] as YeastText).text).toBeTruthy();
// Paragraph 4 - dangling unescapes
expect(ast.children[3].children.length).toBe(1);
expect((ast.children[3].children[0] as YeastText).text).toBe('Dangling escaped markers, like * or __ or |, should get unescaped.');
// Validate AST
expect(JSON.stringify(ast)).toBe(JSON.stringify(EVERYTHING_INLINE_AST));
});

@@ -437,0 +397,0 @@

@@ -11,6 +11,13 @@ import {

const ITALICS_REGEX_UNDERSCORES = /(\\?)(_)([^\s_]|\S.*?\S)(\\?)(_)/gi;
const ITALICS_REGEX_ASTERISKS = /(\\?)(\*)([^\s_]|\S.*?\S)(\\?)(\*)/gi;
const BOLD_REGEX_UNDERSCORES = /(\\?)(__)([^\s_]|\S.*?\S)(\\?)(__)/gi;
const BOLD_REGEX_ASTERISKS = /(\\?)(\*\*)([^\s_]|\S.*?\S)(\\?)(\*\*)/gi;
/**
* These expressions look for text between the various emphasis marker types with the following caveats:
* - the beginning of the match is the beginning of the string or not an escape slash
* - the next character is the opening marker
* - the first character after the opening marker is not whitespace
* - the last character before the closing marker is not whitespace
*/
const ITALICS_REGEX_UNDERSCORES = /(?:^|([^\\]))_(?:(\\_|[^\\\s][^_\s]?)_|((?:\\_|\S)(?:\\_|[^_])*?[^\s\\])_)/gi;
const ITALICS_REGEX_ASTERISKS = /(?:^|([^\\]))\*(?:(\\\*|[^\\\s][^\*\s]?)\*|((?:\\\*|\S)(?:\\\*|[^\*])*?[^\s\\])\*)/gi;
const BOLD_REGEX_UNDERSCORES = /(?:^|([^\\]))__(?:(\\_|[^\\\s][^_\s]?)__|((?:\\_|\S)(?:\\_|[^_])*?[^\s\\])__)/gi;
const BOLD_REGEX_ASTERISKS = /(?:^|([^\\]))\*\*(?:(\\\*|[^\\\s][^\*\s]?)\*\*|((?:\\\*|\S)(?:\\\*|[^\*])*?[^\s\\])\*\*)/gi;

@@ -25,19 +32,13 @@ /**

/**
* match[1] -> (optional) escape slash
* match[2] -> opening characters
* match[3] -> encased text
* match[4] -> (optional) escape slash
* match[5] -> closing characters
* match[1] -> preceding character
* match[2] -> encased text (1 char)
* match[3] -> encased text (2+ chars)
*/
const parseMatch = (match: RegExpMatchArray, nodeType: YeastInlineNodeTypes) => {
let node: YeastInlineChild;
if (match.length == 6 && match[1] && match[4]) {
node = YeastNodeFactory.CreateText();
node.text = `${match[2]}${match[3]}${match[5]}`;
} else {
node = YeastNodeFactory.Create(nodeType) as YeastInlineNode;
node.children = parser.parseInline(match[3]);
}
if (match.length < 4 || (!match[2] && !match[3])) return;
const startOffset = (match[1] || '').length;
let node = YeastNodeFactory.Create(nodeType) as YeastInlineNode;
node.children = parser.parseInline(match[2] || match[3]);
tokens.push({
start: match.index,
start: match.index + startOffset,
end: match.index + match[0].length,

@@ -65,39 +66,4 @@ from: 'InlineEmphasisPlugin',

//Parse for asterisk syntax
const textArr = text.split('');
let index = 0;
while (index < textArr.length) {
if (textArr[index] === '*' && textArr[index + 1] !== '*' && textArr[index + 1] !== ' ') {
if (index >= 0 && textArr[index - 1] !== '*') {
let italizedText = '';
let startIndex = index;
let isInvalidSyntax = false;
do {
if (textArr[index + 1]) {
italizedText += textArr[++index];
} else {
//There was probably no closing tag
isInvalidSyntax = true;
break;
}
} while (textArr[index + 1] !== '*' && index < textArr.length);
if (isInvalidSyntax) {
index++;
continue;
}
const node = YeastNodeFactory.CreateItalicNode();
node.children = parser.parseInline(italizedText);
tokens.push({
start: startIndex,
end: startIndex + italizedText.length + 2,
from: 'ItalicsInlinePlugin',
nodes: [node],
});
index++;
}
}
index++;
}
return tokens;
}
}
import { InlineTokenizerPlugin, Token, YeastParser, YeastNodeFactory, isYeastNodeType, YeastInlineNodeTypes, scrapeText } from 'yeast-core';
const LINK_REGEX = /\[([^\[\]]*(?:\\.[^\[\]]*)*)\]\((.+?)(?:\s["'](.*?)["'])?\)/gi;
const LINK_REGEX = /\[\s*([^\[\]]*?(?:\\.[^\[\]]*?)*?)\s*\]\(\s*(\S+)(?:\s+["']\s*(.*?)\s*["'])?\s*\)/gi;

@@ -9,4 +9,5 @@ export class InlineLinkPlugin implements InlineTokenizerPlugin {

for (const match of text.matchAll(LINK_REGEX)) {
// Don't process if it's an image
if (text.charAt(match.index - 1) === '!') {
// Don't process if it's an image or escaped
const charBefore = text.charAt(match.index - 1);
if (charBefore === '!' || charBefore === '\\') {
continue;

@@ -13,0 +14,0 @@ }

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc