remove-markdown
Advanced tools
Comparing version 0.3.0 to 0.5.0
55
index.js
@@ -7,2 +7,5 @@ module.exports = function(md, options) { | ||
options.useImgAltText = options.hasOwnProperty('useImgAltText') ? options.useImgAltText : true; | ||
options.abbr = options.hasOwnProperty('abbr') ? options.abbr : false; | ||
options.replaceLinksWithURL = options.hasOwnProperty('replaceLinksWithURL') ? options.replaceLinksWithURL : false; | ||
options.htmlTagsToSkip = options.hasOwnProperty('htmlTagsToSkip') ? options.htmlTagsToSkip : []; | ||
@@ -12,3 +15,3 @@ var output = md || ''; | ||
// Remove horizontal rules (stripListHeaders conflict with this rule, which is why it has been moved to the top) | ||
output = output.replace(/^(-\s*?|\*\s*?|_\s*?){3,}\s*$/gm, ''); | ||
output = output.replace(/^(-\s*?|\*\s*?|_\s*?){3,}\s*/gm, ''); | ||
@@ -24,3 +27,3 @@ try { | ||
output = output | ||
// Header | ||
// Header | ||
.replace(/\n={2,}/g, '\n') | ||
@@ -34,5 +37,27 @@ // Fenced codeblocks | ||
} | ||
if (options.abbr) { | ||
// Remove abbreviations | ||
output = output.replace(/\*\[.*\]:.*\n/, ''); | ||
} | ||
output = output | ||
// Remove HTML tags | ||
.replace(/<[^>]*>/g, '') | ||
var htmlReplaceRegex = new RegExp('<[^>]*>', 'g'); | ||
if (options.htmlTagsToSkip.length > 0) { | ||
// Using negative lookahead. Eg. (?!sup|sub) will not match 'sup' and 'sub' tags. | ||
var joinedHtmlTagsToSkip = '(?!' + options.htmlTagsToSkip.join("|") + ')'; | ||
// Adding the lookahead literal with the default regex for html. Eg./<(?!sup|sub)[^>]*>/ig | ||
htmlReplaceRegex = new RegExp( | ||
'<' + | ||
joinedHtmlTagsToSkip + | ||
'[^>]*>', | ||
'ig' | ||
); | ||
} | ||
output = output | ||
// Remove HTML tags | ||
.replace(/<[^>]*>/g, '') | ||
.replace(htmlReplaceRegex, '') | ||
// Remove setext-style headers | ||
@@ -46,12 +71,16 @@ .replace(/^[=\-]{2,}\s*$/g, '') | ||
// Remove inline links | ||
.replace(/\[(.*?)\][\[\(].*?[\]\)]/g, '$1') | ||
.replace(/\[([^\]]*?)\][\[\(].*?[\]\)]/g, options.replaceLinksWithURL ? '$2' : '$1') | ||
// Remove blockquotes | ||
.replace(/^\s{0,3}>\s?/g, '') | ||
.replace(/^\s{0,3}>\s?/gm, '') | ||
// .replace(/(^|\n)\s{0,3}>\s?/g, '\n\n') | ||
// Remove reference-style links? | ||
.replace(/^\s{1,2}\[(.*?)\]: (\S+)( ".*?")?\s*$/g, '') | ||
// Remove atx-style headers | ||
.replace(/^(\n)?\s{0,}#{1,6}\s+| {0,}(\n)?\s{0,}#{0,} {0,}(\n)?\s{0,}$/gm, '$1$2$3') | ||
// Remove emphasis (repeat the line to remove double emphasis) | ||
.replace(/([\*_]{1,3})(\S.*?\S{0,1})\1/g, '$2') | ||
.replace(/([\*_]{1,3})(\S.*?\S{0,1})\1/g, '$2') | ||
.replace(/^(\n)?\s{0,}#{1,6}\s+| {0,}(\n)?\s{0,}#{0,} #{0,}(\n)?\s{0,}$/gm, '$1$2$3') | ||
// Remove * emphasis | ||
.replace(/([\*]+)(\S)(.*?\S)??\1/g, '$2$3') | ||
// Remove _ emphasis. Unlike *, _ emphasis gets rendered only if | ||
// 1. Either there is a whitespace character before opening _ and after closing _. | ||
// 2. Or _ is at the start/end of the string. | ||
.replace(/(^|\W)([_]+)(\S)(.*?\S)??\2($|\W)/g, '$1$3$4$5') | ||
// Remove code blocks | ||
@@ -61,4 +90,8 @@ .replace(/(`{3,})(.*?)\1/gm, '$2') | ||
.replace(/`(.+?)`/g, '$1') | ||
// Replace two or more newlines with exactly two? Not entirely sure this belongs here... | ||
.replace(/\n{2,}/g, '\n\n'); | ||
// // Replace two or more newlines with exactly two? Not entirely sure this belongs here... | ||
// .replace(/\n{2,}/g, '\n\n') | ||
// // Remove newlines in a paragraph | ||
// .replace(/(\S+)\n\s*(\S+)/g, '$1 $2') | ||
// Replace strike through | ||
.replace(/~(.*?)~/g, '$1'); | ||
} catch(e) { | ||
@@ -65,0 +98,0 @@ console.error(e); |
{ | ||
"name": "remove-markdown", | ||
"version": "0.3.0", | ||
"version": "0.5.0", | ||
"description": "Remove Markdown formatting from text", | ||
@@ -5,0 +5,0 @@ "main": "index.js", |
@@ -72,2 +72,14 @@ 'use strict'; | ||
it('should remove emphasis only if there is no space between word and emphasis characters.', function () { | ||
const string = 'There should be no _space_, *before* *closing * _ephasis character _.'; | ||
const expected = 'There should be no space, before *closing * _ephasis character _.'; | ||
expect(removeMd(string)).to.equal(expected); | ||
}); | ||
it('should remove "_" emphasis only if there is space before opening and after closing emphasis characters.', function () { | ||
const string = '._Spaces_ _ before_ and _after _ emphasised character results in no emphasis.'; | ||
const expected = '.Spaces _ before_ and _after _ emphasised character results in no emphasis.'; | ||
expect(removeMd(string)).to.equal(expected); | ||
}); | ||
it('should remove double emphasis', function () { | ||
@@ -113,14 +125,27 @@ const string = '**this sentence has __double styling__**'; | ||
}); | ||
it('should remove blockquotes over multiple lines', function () { | ||
const string = '> I am a blockquote firstline \n>I am a blockquote secondline'; | ||
const expected = 'I am a blockquote firstline\nI am a blockquote secondline'; | ||
expect(removeMd(string)).to.equal(expected); | ||
}); | ||
// it('should remove blockquotes following other content', function () { | ||
// const string = '## A headline\n\nA paragraph of text\n\n> I am a blockquote'; | ||
// const expected = 'A headline\n\nA paragraph of text\n\nI am a blockquote'; | ||
// expect(removeMd(string)).to.equal(expected); | ||
// }); | ||
it('should not remove greater than signs', function () { | ||
var tests = [ | ||
{ string: '100 > 0', expected: '100 > 0' }, | ||
{ string: '100 >= 0', expected: '100 >= 0' }, | ||
{ string: '100>0', expected: '100>0' }, | ||
{ string: '> 100 > 0', expected: '100 > 0' }, | ||
{ string: '1 < 100', expected: '1 < 100' }, | ||
{ string: '1 <= 100', expected: '1 <= 100' }, | ||
{ string: '100 > 0', expected: '100 > 0' }, | ||
{ string: '100 >= 0', expected: '100 >= 0' }, | ||
{ string: '100>0', expected: '100>0' }, | ||
{ string: '> 100 > 0', expected: '100 > 0' }, | ||
{ string: '1 < 100', expected: '1 < 100' }, | ||
{ string: '1 <= 100', expected: '1 <= 100' }, | ||
]; | ||
tests.forEach(function (test) { | ||
expect(removeMd(test.string)).to.equal(test.expected); | ||
expect(removeMd(test.string)).to.equal(test.expected); | ||
}); | ||
@@ -146,3 +171,20 @@ }); | ||
}); | ||
it('should not strip paragraphs without content', function() { | ||
const paragraph = '\n#This paragraph\n##This paragraph#'; | ||
const expected = paragraph; | ||
expect(removeMd(paragraph)).to.equal(expected); | ||
}); | ||
it('should not trigger ReDoS with atx-headers', function () { | ||
const start = Date.now(); | ||
const paragraph = '\n## This is a long "'+' '.repeat(200)+'" heading ##\n'; | ||
const expected = /\nThis is a long " {200}" heading\n/; | ||
expect(removeMd(paragraph)).to.match(expected); | ||
const duration = Date.now()-start; | ||
expect(duration).to.be.lt(500); | ||
}); | ||
}); | ||
}); |
20675
247
6