Comparing version 0.1.1 to 0.1.2
@@ -14,3 +14,3 @@ /** | ||
newline: /^\n+/, | ||
block: /^ {4,}[^\n]*(?:\n {4,}[^\n]*|\n)*(?=\n| *$)/, | ||
code: /^ {4,}[^\n]*(?:\n {4,}[^\n]*|\n)*(?=\n| *$)/, | ||
hr: /^( *[\-*_]){3,} *\n/, | ||
@@ -25,16 +25,4 @@ heading: /^ *(#{1,6}) *([^\0]+?) *#* *\n+/, | ||
block.keys = [ | ||
'newline', | ||
'block', | ||
'heading', | ||
'lheading', | ||
'hr', | ||
'blockquote', | ||
'list', | ||
'html', | ||
'text' | ||
]; | ||
/** | ||
* Lexer | ||
* Block Lexer | ||
*/ | ||
@@ -66,110 +54,145 @@ | ||
block.token = function(str, tokens) { | ||
str = str.replace(/^ +$/gm, ''); | ||
var rules = block | ||
, keys = block.keys | ||
, len = keys.length | ||
, key | ||
var str = str.replace(/^ +$/gm, '') | ||
, loose | ||
, cap | ||
, loose; | ||
, item | ||
, space | ||
, i | ||
, l; | ||
var scan = function() { | ||
if (!str) return; | ||
for (var i = 0; i < len; i++) { | ||
key = keys[i]; | ||
if (cap = rules[key].exec(str)) { | ||
str = str.substring(cap[0].length); | ||
return true; | ||
while (str) { | ||
// newline | ||
if (cap = block.newline.exec(str)) { | ||
str = str.substring(cap[0].length); | ||
if (cap[0].length > 1) { | ||
tokens.push({ | ||
type: 'space' | ||
}); | ||
} | ||
continue; | ||
} | ||
}; | ||
while (scan()) { | ||
switch (key) { | ||
case 'newline': | ||
if (cap[0].length > 1) { | ||
tokens.push({ | ||
type: 'space' | ||
}); | ||
// code | ||
if (cap = block.code.exec(str)) { | ||
str = str.substring(cap[0].length); | ||
cap = cap[0].replace(/^ {4}/gm, ''); | ||
tokens.push({ | ||
type: 'code', | ||
text: cap | ||
}); | ||
continue; | ||
} | ||
// heading | ||
if (cap = block.heading.exec(str)) { | ||
str = str.substring(cap[0].length); | ||
tokens.push({ | ||
type: 'heading', | ||
depth: cap[1].length, | ||
text: cap[2] | ||
}); | ||
continue; | ||
} | ||
// lheading | ||
if (cap = block.lheading.exec(str)) { | ||
str = str.substring(cap[0].length); | ||
tokens.push({ | ||
type: 'heading', | ||
depth: cap[2] === '=' ? 1 : 2, | ||
text: cap[1] | ||
}); | ||
continue; | ||
} | ||
// hr | ||
if (cap = block.hr.exec(str)) { | ||
str = str.substring(cap[0].length); | ||
tokens.push({ | ||
type: 'hr' | ||
}); | ||
continue; | ||
} | ||
// blockquote | ||
if (cap = block.blockquote.exec(str)) { | ||
str = str.substring(cap[0].length); | ||
tokens.push({ | ||
type: 'blockquote_start' | ||
}); | ||
cap = cap[0].replace(/^ *>/gm, ''); | ||
block.token(cap, tokens); | ||
tokens.push({ | ||
type: 'blockquote_end' | ||
}); | ||
continue; | ||
} | ||
// list | ||
if (cap = block.list.exec(str)) { | ||
str = str.substring(cap[0].length); | ||
tokens.push({ | ||
type: 'list_start', | ||
ordered: isFinite(cap[2]) | ||
}); | ||
loose = /\n *\n *(?:[*+-]|\d+\.)/.test(cap[0]); | ||
// get each top-level item | ||
cap = cap[0].match( | ||
/^( *)([*+-]|\d+\.)[^\n]*(?:\n(?!\1(?:\2|\d+\.))[^\n]*)*/gm | ||
); | ||
i = 0; | ||
l = cap.length; | ||
for (; i < l; i++) { | ||
// remove the list items sigil | ||
// so its seen as the next token | ||
item = cap[i].replace(/^ *([*+-]|\d+\.) */, ''); | ||
// outdent whatever the | ||
// list item contains, hacky | ||
space = /\n( +)/.exec(item); | ||
if (space) { | ||
space = new RegExp('^' + space[1], 'gm'); | ||
item = item.replace(space, ''); | ||
} | ||
break; | ||
case 'hr': | ||
tokens.push({ | ||
type: 'hr' | ||
type: loose | ||
? 'loose_item_start' | ||
: 'list_item_start' | ||
}); | ||
break; | ||
case 'lheading': | ||
block.token(item, tokens); | ||
tokens.push({ | ||
type: 'heading', | ||
depth: cap[2] === '=' ? 1 : 2, | ||
text: cap[1] | ||
type: 'list_item_end' | ||
}); | ||
break; | ||
case 'heading': | ||
tokens.push({ | ||
type: 'heading', | ||
depth: cap[1].length, | ||
text: cap[2] | ||
}); | ||
break; | ||
case 'block': | ||
cap = cap[0].replace(/^ {4}/gm, ''); | ||
tokens.push({ | ||
type: 'block', | ||
text: cap | ||
}); | ||
break; | ||
case 'list': | ||
tokens.push({ | ||
type: 'list_start', | ||
ordered: isFinite(cap[2]) | ||
}); | ||
loose = /\n *\n *(?:[*+-]|\d+\.)/.test(cap[0]); | ||
// get each top-level item | ||
cap = cap[0].match( | ||
/^( *)([*+-]|\d+\.)[^\n]*(?:\n(?!\1(?:\2|\d+\.))[^\n]*)*/gm | ||
); | ||
each(cap, function(item) { | ||
// remove the list items sigil | ||
// so its seen as the next token | ||
item = item.replace(/^ *([*+-]|\d+\.) */, ''); | ||
// outdent whatever the | ||
// list item contains, hacky | ||
var space = /\n( +)/.exec(item); | ||
if (space) { | ||
space = new RegExp('^' + space[1], 'gm'); | ||
item = item.replace(space, ''); | ||
} | ||
tokens.push({ | ||
type: loose | ||
? 'loose_item_start' | ||
: 'list_item_start' | ||
}); | ||
block.token(item, tokens); | ||
tokens.push({ | ||
type: 'list_item_end' | ||
}); | ||
}); | ||
tokens.push({ | ||
type: 'list_end' | ||
}); | ||
break; | ||
case 'html': | ||
case 'text': | ||
tokens.push({ | ||
type: key, | ||
text: cap[0] | ||
}); | ||
break; | ||
case 'blockquote': | ||
tokens.push({ | ||
type: 'blockquote_start' | ||
}); | ||
cap = cap[0].replace(/^ *>/gm, ''); | ||
block.token(cap, tokens); | ||
tokens.push({ | ||
type: 'blockquote_end' | ||
}); | ||
break; | ||
} | ||
tokens.push({ | ||
type: 'list_end' | ||
}); | ||
continue; | ||
} | ||
// html | ||
if (cap = block.html.exec(str)) { | ||
str = str.substring(cap[0].length); | ||
tokens.push({ | ||
type: 'html', | ||
text: cap[0] | ||
}); | ||
continue; | ||
} | ||
// text | ||
if (cap = block.text.exec(str)) { | ||
str = str.substring(cap[0].length); | ||
tokens.push({ | ||
type: 'text', | ||
text: cap[0] | ||
}); | ||
continue; | ||
} | ||
} | ||
@@ -198,16 +221,2 @@ | ||
inline.keys = [ | ||
'escape', | ||
'autolink', | ||
'tag', | ||
'link', | ||
'reflink', | ||
'nolink', | ||
'strong', | ||
'em', | ||
'code', | ||
'br', | ||
'text' | ||
]; | ||
// hacky, but performant | ||
@@ -243,109 +252,107 @@ inline.text = (function() { | ||
, text | ||
, href; | ||
var rules = inline | ||
, keys = inline.keys | ||
, len = keys.length | ||
, key | ||
, href | ||
, cap; | ||
var scan = function() { | ||
if (!str) return; | ||
for (var i = 0; i < len; i++) { | ||
key = keys[i]; | ||
if (cap = rules[key].exec(str)) { | ||
str = str.substring(cap[0].length); | ||
return true; | ||
while (str) { | ||
// escape | ||
if (cap = inline.escape.exec(str)) { | ||
str = str.substring(cap[0].length); | ||
out += cap[1]; | ||
continue; | ||
} | ||
// autolink | ||
if (cap = inline.autolink.exec(str)) { | ||
str = str.substring(cap[0].length); | ||
if (cap[2] === '@') { | ||
text = cap[1][6] === ':' | ||
? mangle(cap[1].substring(7)) | ||
: mangle(cap[1]); | ||
href = mangle('mailto:') + text; | ||
} else { | ||
text = escape(cap[1]); | ||
href = text; | ||
} | ||
out += '<a href="' | ||
+ href | ||
+ '">' | ||
+ text | ||
+ '</a>'; | ||
continue; | ||
} | ||
}; | ||
while (scan()) { | ||
switch (key) { | ||
case 'escape': | ||
out += cap[1]; | ||
break; | ||
case 'tag': | ||
out += cap[0]; | ||
break; | ||
case 'link': | ||
case 'reflink': | ||
case 'nolink': | ||
if (key !== 'link') { | ||
link = (cap[2] || cap[1]).replace(/\s+/g, ' '); | ||
link = links[link]; | ||
if (!link) { | ||
out += cap[0][0]; | ||
str = cap[0].substring(1) + str; | ||
break; | ||
} | ||
} else { | ||
text = /^\s*<?([^\s]*?)>?(?:\s+"([^\n]+)")?\s*$/.exec(cap[2]); | ||
link = { | ||
href: text[1], | ||
title: text[2] | ||
}; | ||
} | ||
if (cap[0][0] !== '!') { | ||
out += '<a href="' | ||
+ escape(link.href) | ||
+ '"' | ||
+ (link.title | ||
? ' title="' | ||
+ escape(link.title) | ||
+ '"' | ||
: '') | ||
+ '>' | ||
+ inline.lexer(cap[1]) | ||
+ '</a>'; | ||
} else { | ||
out += '<img src="' | ||
+ escape(link.href) | ||
+ '" alt="' | ||
+ escape(cap[1]) | ||
+ '"' | ||
+ (link.title | ||
? ' title="' | ||
+ escape(link.title) | ||
+ '"' | ||
: '') | ||
+ '>'; | ||
} | ||
break; | ||
case 'autolink': | ||
if (cap[2] === '@') { | ||
text = cap[1][6] === ':' | ||
? mangle(cap[1].substring(7)) | ||
: mangle(cap[1]); | ||
href = mangle('mailto:') + text; | ||
} else { | ||
text = escape(cap[1]); | ||
href = text; | ||
} | ||
out += '<a href="' + href + '">' | ||
+ text | ||
+ '</a>'; | ||
break; | ||
case 'strong': | ||
out += '<strong>' | ||
+ inline.lexer(cap[2] || cap[1]) | ||
+ '</strong>'; | ||
break; | ||
case 'em': | ||
out += '<em>' | ||
+ inline.lexer(cap[2] || cap[1]) | ||
+ '</em>'; | ||
break; | ||
case 'code': | ||
out += '<code>' | ||
+ escape(cap[2] || cap[1]) | ||
+ '</code>'; | ||
break; | ||
case 'br': | ||
out += '<br>'; | ||
break; | ||
case 'text': | ||
out += escape(cap[0]); | ||
break; | ||
// tag | ||
if (cap = inline.tag.exec(str)) { | ||
str = str.substring(cap[0].length); | ||
out += cap[0]; | ||
continue; | ||
} | ||
// link | ||
if (cap = inline.link.exec(str)) { | ||
str = str.substring(cap[0].length); | ||
text = /^\s*<?([^\s]*?)>?(?:\s+"([^\n]+)")?\s*$/.exec(cap[2]); | ||
link = { | ||
href: text[1], | ||
title: text[2] | ||
}; | ||
out += mlink(cap, link); | ||
continue; | ||
} | ||
// reflink, nolink | ||
if ((cap = inline.reflink.exec(str)) | ||
|| (cap = inline.nolink.exec(str))) { | ||
str = str.substring(cap[0].length); | ||
link = (cap[2] || cap[1]).replace(/\s+/g, ' '); | ||
link = links[link]; | ||
if (!link) { | ||
out += cap[0][0]; | ||
str = cap[0].substring(1) + str; | ||
continue; | ||
} | ||
out += mlink(cap, link); | ||
continue; | ||
} | ||
// strong | ||
if (cap = inline.strong.exec(str)) { | ||
str = str.substring(cap[0].length); | ||
out += '<strong>' | ||
+ inline.lexer(cap[2] || cap[1]) | ||
+ '</strong>'; | ||
continue; | ||
} | ||
// em | ||
if (cap = inline.em.exec(str)) { | ||
str = str.substring(cap[0].length); | ||
out += '<em>' | ||
+ inline.lexer(cap[2] || cap[1]) | ||
+ '</em>'; | ||
continue; | ||
} | ||
// code | ||
if (cap = inline.code.exec(str)) { | ||
str = str.substring(cap[0].length); | ||
out += '<code>' | ||
+ escape(cap[2] || cap[1]) | ||
+ '</code>'; | ||
continue; | ||
} | ||
// br | ||
if (cap = inline.br.exec(str)) { | ||
str = str.substring(cap[0].length); | ||
out += '<br>'; | ||
continue; | ||
} | ||
// text | ||
if (cap = inline.text.exec(str)) { | ||
str = str.substring(cap[0].length); | ||
out += escape(cap[0]); | ||
continue; | ||
} | ||
} | ||
@@ -356,2 +363,30 @@ | ||
var mlink = function(cap, link) { | ||
if (cap[0][0] !== '!') { | ||
return '<a href="' | ||
+ escape(link.href) | ||
+ '"' | ||
+ (link.title | ||
? ' title="' | ||
+ escape(link.title) | ||
+ '"' | ||
: '') | ||
+ '>' | ||
+ inline.lexer(cap[1]) | ||
+ '</a>'; | ||
} else { | ||
return '<img src="' | ||
+ escape(link.href) | ||
+ '" alt="' | ||
+ escape(cap[1]) | ||
+ '"' | ||
+ (link.title | ||
? ' title="' | ||
+ escape(link.title) | ||
+ '"' | ||
: '') | ||
+ '>'; | ||
} | ||
}; | ||
/** | ||
@@ -370,33 +405,52 @@ * Parsing | ||
switch (token.type) { | ||
case 'space': | ||
case 'space': { | ||
return ''; | ||
case 'hr': | ||
} | ||
case 'hr': { | ||
return '<hr>'; | ||
case 'heading': | ||
return '<h' + token.depth + '>' | ||
} | ||
case 'heading': { | ||
return '<h' | ||
+ token.depth | ||
+ '>' | ||
+ inline.lexer(token.text) | ||
+ '</h' + token.depth + '>'; | ||
case 'block': | ||
+ '</h' | ||
+ token.depth | ||
+ '>'; | ||
} | ||
case 'code': { | ||
return '<pre><code>' | ||
+ escape(token.text) | ||
+ '</code></pre>'; | ||
case 'blockquote_start': | ||
} | ||
case 'blockquote_start': { | ||
var body = []; | ||
while (next().type !== 'blockquote_end') { | ||
body.push(tok()); | ||
} | ||
return '<blockquote>' | ||
+ body.join('') | ||
+ '</blockquote>'; | ||
case 'list_start': | ||
} | ||
case 'list_start': { | ||
var type = token.ordered ? 'ol' : 'ul' | ||
, body = []; | ||
while (next().type !== 'list_end') { | ||
body.push(tok()); | ||
} | ||
return '<' + type + '>' | ||
return '<' | ||
+ type | ||
+ '>' | ||
+ body.join('') | ||
+ '</' + type + '>'; | ||
case 'list_item_start': | ||
+ '</' | ||
+ type | ||
+ '>'; | ||
} | ||
case 'list_item_start': { | ||
var body = []; | ||
while (next().type !== 'list_item_end') { | ||
@@ -407,18 +461,27 @@ body.push(token.type === 'text' | ||
} | ||
return '<li>' | ||
+ body.join(' ') | ||
+ '</li>'; | ||
case 'loose_item_start': | ||
} | ||
case 'loose_item_start': { | ||
var body = []; | ||
while (next().type !== 'list_item_end') { | ||
body.push(tok()); | ||
} | ||
return '<li>' | ||
+ body.join(' ') | ||
+ '</li>'; | ||
case 'html': | ||
} | ||
case 'html': { | ||
return inline.lexer(token.text); | ||
case 'text': | ||
return '<p>' + text() + '</p>'; | ||
} | ||
} | ||
case 'text': { | ||
return '<p>' | ||
+ text() | ||
+ '</p>'; | ||
} | ||
} | ||
}; | ||
@@ -482,7 +545,2 @@ | ||
var each = function(obj, func) { | ||
var i = 0, l = obj.length; | ||
for (; i < l; i++) func(obj[i]); | ||
}; | ||
/** | ||
@@ -489,0 +547,0 @@ * Expose |
@@ -5,3 +5,3 @@ { | ||
"author": "Christopher Jeffrey", | ||
"version": "0.1.1", | ||
"version": "0.1.2", | ||
"main": "./lib/marked.js", | ||
@@ -8,0 +8,0 @@ "bin": { "marked": "./bin/marked" }, |
# marked | ||
A full-featured markdown parser and compiler implemented in ~430 lines of JS. | ||
A full-featured markdown parser and compiler implemented in ~430 lines of JS. | ||
Built for speed. | ||
@@ -10,5 +10,6 @@ | ||
$ node test --bench | ||
marked completed in 15051ms. | ||
showdown completed in 28267ms. | ||
markdown-js completed in 70732ms. | ||
marked completed in 12071ms. | ||
showdown (reuse converter) completed in 27387ms. | ||
showdown (new converter) completed in 75617ms. | ||
markdown-js completed in 70069ms. | ||
``` | ||
@@ -24,15 +25,15 @@ | ||
The point of marked was to create a markdown compiler where it was possible to | ||
frequently parse huge chunks of markdown without having to worry about | ||
The point of marked was to create a markdown compiler where it was possible to | ||
frequently parse huge chunks of markdown without having to worry about | ||
caching the compiled output somehow...or blocking for an unnecesarily long time. | ||
marked lingers around 430 (may vary) lines long and still implements all | ||
marked lingers around 430 (may vary) lines long and still implements all | ||
markdown features. It is also now fully compatible with the client-side. | ||
marked more or less passes the official markdown test suite in its | ||
entirety. This is important because a surprising number of markdown compilers | ||
cannot pass more than a few tests. It was very difficult to get marked as | ||
compliant as it is. It could have cut corners in several areas for the sake | ||
of performance, but did not in order to be exactly what you expect in terms | ||
of a markdown rendering. In fact, this is why marked could be considered at a | ||
marked more or less passes the official markdown test suite in its | ||
entirety. This is important because a surprising number of markdown compilers | ||
cannot pass more than a few tests. It was very difficult to get marked as | ||
compliant as it is. It could have cut corners in several areas for the sake | ||
of performance, but did not in order to be exactly what you expect in terms | ||
of a markdown rendering. In fact, this is why marked could be considered at a | ||
disadvantage in the benchmarks above. | ||
@@ -66,6 +67,6 @@ | ||
- Implement GFM features. | ||
- Possibly add some | ||
[ReMarkable](http://camendesign.com/code/remarkable/documentation.html) | ||
- Possibly add some | ||
[ReMarkable](http://camendesign.com/code/remarkable/documentation.html) | ||
features while remaining backwardly compatible with all markdown syntax. | ||
- Optimize the lexer to return an iterator instead of a collection of tokens. | ||
- Add an explicit pretty printing and minification feature. |
@@ -51,3 +51,3 @@ #!/usr/bin/env node | ||
main: | ||
main: | ||
for (; i_ < l_; i_++) { | ||
@@ -57,10 +57,10 @@ filename = keys[i_]; | ||
// this was messing with | ||
// this was messing with | ||
// `node test | less` on sakura | ||
try { | ||
try { | ||
text = marked(file.text).replace(/\s/g, ''); | ||
html = file.html.replace(/\s/g, ''); | ||
} catch(e) { | ||
console.log('%s failed.', filename); | ||
throw e; | ||
} catch(e) { | ||
console.log('%s failed.', filename); | ||
throw e; | ||
} | ||
@@ -74,13 +74,13 @@ | ||
text = text.substring( | ||
Math.max(i - 30, 0), | ||
Math.max(i - 30, 0), | ||
Math.min(i + 30, text.length)); | ||
html = html.substring( | ||
Math.max(i - 30, 0), | ||
Math.max(i - 30, 0), | ||
Math.min(i + 30, html.length)); | ||
console.log( | ||
'\n#%d. %s failed at offset %d. Near: "%s".\n', | ||
'\n#%d. %s failed at offset %d. Near: "%s".\n', | ||
i_ + 1, filename, i, text); | ||
console.log('\nGot:\n%s\n', | ||
console.log('\nGot:\n%s\n', | ||
pretty(text).trim() || text); | ||
console.log('\nExpected:\n%s\n', | ||
console.log('\nExpected:\n%s\n', | ||
pretty(html).trim() || html); | ||
@@ -130,12 +130,2 @@ if (breakOnError) { | ||
/** | ||
* There's two ways to benchmark showdown here. | ||
* The first way is to create a new converter | ||
* every time, this will renew any closured | ||
* variables. It is the "proper" way of using | ||
* showdown. However, for this benchmark, | ||
* I will use the completely improper method | ||
* which is must faster, just to be fair. | ||
*/ | ||
var showdown = (function() { | ||
@@ -148,4 +138,13 @@ var Showdown = require('showdown').Showdown; | ||
})(); | ||
main.bench('showdown', showdown); | ||
main.bench('showdown (reuse converter)', showdown); | ||
var showdown_slow = (function() { | ||
var Showdown = require('showdown').Showdown; | ||
return function(text) { | ||
var convert = new Showdown.converter(); | ||
return convert.makeHtml(text); | ||
}; | ||
})(); | ||
main.bench('showdown (new converter)', showdown_slow); | ||
var markdownjs = require('markdown-js'); | ||
@@ -157,2 +156,7 @@ main.bench('markdown-js', function(text) { | ||
var time = function() { | ||
var marked = require('../'); | ||
main.bench('marked', marked); | ||
}; | ||
var old_bench = function() { | ||
@@ -252,3 +256,3 @@ var text = fs.readFileSync(__dirname + '/main.md', 'utf8'); | ||
// temporarily remove elements before | ||
// temporarily remove elements before | ||
// processing, also remove whitespace | ||
@@ -259,3 +263,3 @@ str = str.replace(remove, function(element, name) { | ||
.replace(/[\r\n]/g, ''); | ||
return '<!' + (hash.push(element) - 1) | ||
return '<!' + (hash.push(element) - 1) | ||
+ (Array(element.length - 3).join('%')) + '/>'; | ||
@@ -282,5 +286,5 @@ }); | ||
out.push(indent(depth) + full); | ||
if (!closing[name] | ||
&& name[0] !== '!' | ||
&& name[0] !== '?' | ||
if (!closing[name] | ||
&& name[0] !== '!' | ||
&& name[0] !== '?' | ||
&& tag[tag.length-1] !== '/') { | ||
@@ -296,3 +300,3 @@ depth++; | ||
// restore the elements to | ||
// restore the elements to | ||
// their original locations | ||
@@ -324,2 +328,4 @@ str = str.replace(replace, function($0, $1) { | ||
bench(); | ||
} else if (~process.argv.indexOf('--time')) { | ||
time(); | ||
} else if (~process.argv.indexOf('--old_bench')) { | ||
@@ -326,0 +332,0 @@ old_bench(); |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
216350
849
70