Comparing version 2.8.0 to 2.8.1
@@ -1,7 +0,3 @@ | ||
module.exports = asciiAlpha | ||
var check = require('../util/regex-check') | ||
var fromCharCode = require('../constant/from-char-code') | ||
function asciiAlpha(code) { | ||
return /[A-Za-z]/.test(fromCharCode(code)) | ||
} | ||
module.exports = check(/[A-Za-z]/) |
@@ -1,7 +0,3 @@ | ||
module.exports = asciiAlphanumeric | ||
var check = require('../util/regex-check') | ||
var fromCharCode = require('../constant/from-char-code') | ||
function asciiAlphanumeric(code) { | ||
return /[\dA-Za-z]/.test(fromCharCode(code)) | ||
} | ||
module.exports = check(/[\dA-Za-z]/) |
@@ -1,8 +0,3 @@ | ||
module.exports = asciiAtext | ||
var check = require('../util/regex-check') | ||
var fromCharCode = require('../constant/from-char-code') | ||
// Also includes dot. | ||
function asciiAtext(code) { | ||
return /[#-'*+\--9=?A-Z^-~]/.test(fromCharCode(code)) | ||
} | ||
module.exports = check(/[#-'*+\--9=?A-Z^-~]/) |
@@ -1,7 +0,3 @@ | ||
module.exports = asciiDigit | ||
var check = require('../util/regex-check') | ||
var fromCharCode = require('../constant/from-char-code') | ||
function asciiDigit(code) { | ||
return /\d/.test(fromCharCode(code)) | ||
} | ||
module.exports = check(/\d/) |
@@ -1,7 +0,3 @@ | ||
module.exports = asciiHexDigit | ||
var check = require('../util/regex-check') | ||
var fromCharCode = require('../constant/from-char-code') | ||
function asciiHexDigit(code) { | ||
return /[\dA-Fa-f]/.test(fromCharCode(code)) | ||
} | ||
module.exports = check(/[\dA-Fa-f]/) |
@@ -1,7 +0,3 @@ | ||
module.exports = asciiPunctuation | ||
var check = require('../util/regex-check') | ||
var fromCharCode = require('../constant/from-char-code') | ||
function asciiPunctuation(code) { | ||
return /[!-/:-@[-`{-~]/.test(fromCharCode(code)) | ||
} | ||
module.exports = check(/[!-/:-@[-`{-~]/) |
// This module is compiled away! | ||
// | ||
// micromark works based on character codes. | ||
// This module contains constants for the ASCII block and the replacement | ||
// character. | ||
// A couple of them are handled in a special way, such as the line endings | ||
// (CR, LF, and CR+LF, commonly known as end-of-line: EOLs), the tab (horizontal | ||
// tab) and its expansion based on what column it’s at (virtual space), | ||
// and the end-of-file (eof) character. | ||
// As values are preprocessed before handling them, the actual characters LF, | ||
// CR, HT, and NUL (which is present as the replacement character), are | ||
// guaranteed to not exist. | ||
exports.carriageReturn = -5 | ||
@@ -3,0 +14,0 @@ exports.lineFeed = -4 |
module.exports = markdownLineEndingOrSpace | ||
function markdownLineEndingOrSpace(code) { | ||
return ( | ||
code === -5 || | ||
code === -4 || | ||
code === -3 || | ||
code === -2 || | ||
code === -1 || | ||
code === 32 | ||
) | ||
return code < 0 || code === 32 | ||
} |
module.exports = markdownLineEnding | ||
function markdownLineEnding(code) { | ||
return code === -5 || code === -4 || code === -3 | ||
return code < -2 | ||
} |
@@ -1,10 +0,6 @@ | ||
module.exports = punctuation | ||
var unicodePunctuation = require('../constant/unicode-punctuation-regex') | ||
var fromCharCode = require('../constant/from-char-code') | ||
var check = require('../util/regex-check') | ||
// Size note: removing ASCII from the regex and using `ascii-punctuation` here | ||
// In fact adds to the bundle size. | ||
function punctuation(code) { | ||
return unicodePunctuation.test(fromCharCode(code)) | ||
} | ||
module.exports = check(unicodePunctuation) |
@@ -1,7 +0,3 @@ | ||
module.exports = whitespace | ||
var check = require('../util/regex-check') | ||
var fromCharCode = require('../constant/from-char-code') | ||
function whitespace(code) { | ||
return /\s/.test(fromCharCode(code)) | ||
} | ||
module.exports = check(/\s/) |
// This module is compiled away! | ||
// | ||
// While micromark works based on character codes, this module includes the | ||
// string versions of ’em. | ||
// The C0 block, except for LF, CR, HT, and w/ the replacement character added, | ||
// are available here. | ||
exports.ht = '\t' | ||
@@ -3,0 +8,0 @@ exports.lf = '\n' |
@@ -0,1 +1,13 @@ | ||
// While micromark is a lexer/tokenizer, the common case of going from markdown | ||
// to html is currently built in as this module, even though the parts can be | ||
// used separately to build ASTs, CSTs, or many other output formats. | ||
// | ||
// Having an HTML compiler built in is useful because it allows us to check for | ||
// compliancy to CommonMark, the de facto norm of markdown, specified in roughly | ||
// 600 input/output cases. | ||
// | ||
// This module has an interface which accepts lists of events instead of the | ||
// whole at once, however, because markdown can’t be truly streaming, we buffer | ||
// events before processing and outputting the final result. | ||
module.exports = compileHtml | ||
@@ -9,2 +21,3 @@ | ||
var chunkedSplice = require('../util/chunked-splice') | ||
var normalizeUri = require('../util/normalize-uri') | ||
@@ -14,5 +27,14 @@ var normalizeIdentifier = require('../util/normalize-identifier') | ||
var combineExtensions = require('../util/combine-html-extensions') | ||
var miniflat = require('../util/miniflat') | ||
// These two ensure that certain characters which have special meaning in HTML, | ||
// are dealt with. | ||
// Technically, we can skip `>`, and `"` in many cases, but CM includes them. | ||
var characterReferences = {'"': 'quot', '&': 'amp', '<': 'lt', '>': 'gt'} | ||
var characterReferencesExpression = /["&<>]/g | ||
// These two are allowlists of essentially safe protocols for full URLs in | ||
// respectively the `href` (on `<a>`) and `src` (on `<img>`) attributes. | ||
// They are based on what is allowed on GitHub, | ||
// <https://github.com/syntax-tree/hast-util-sanitize/blob/9275b21/lib/github.json#L31> | ||
var protocolHref = /^(https?|ircs?|mailto|xmpp)$/i | ||
@@ -22,100 +44,138 @@ var protocolSrc = /^https?$/i | ||
function compileHtml(options) { | ||
// Configuration. | ||
// Includes `htmlExtensions` (an array of extensions), `defaultLineEnding` (a | ||
// preferred EOL), `allowDangerousProtocol` (whether to allow potential | ||
// dangerous protocols), and `allowDangerousHtml` (whether to allow potential | ||
// dangerous HTML). | ||
var settings = options || {} | ||
// Tags is needed because according to markdown, links and emphasis and | ||
// whatnot can exist in images, however, as HTML doesn’t allow content in | ||
// images, the tags are ignored in the `alt` attribute, but the content | ||
// remains. | ||
var tags = true | ||
// An object to track identifiers to media (URLs and titles) defined with | ||
// definitions. | ||
var definitions = {} | ||
// A lot of the handlers need to capture some of the output data, modify it | ||
// somehow, and then deal with it. | ||
// We do that by tracking a stack of buffers, that can be opened (with | ||
// `buffer`) and closed (with `resume`) to access them. | ||
var buffers = [[]] | ||
// As definitions can come after references, we need to figure out the media | ||
// (urls and titles) defined by them before handling the references. | ||
// So, we do sort of what HTML does: put metadata at the start (in head), and | ||
// then put content after (`body`). | ||
var head = [] | ||
var body = [] | ||
// As events for tokens can come in parts, we need to store them somewhere | ||
// before we handle everything. | ||
// We do that in `events`. | ||
var events = [] | ||
// As we can have links in images and the other way around, where the deepest | ||
// ones are closed first, we need to track which one we’re in. | ||
var mediaStack = [] | ||
// Same for tightness, which is specific to lists. | ||
// We need to track if we’re currently in a tight or loose container. | ||
var tightStack = [] | ||
var defaultHandlers = { | ||
enter: { | ||
blockQuote: onenterblockquote, | ||
codeFenced: onentercodefenced, | ||
codeFencedFenceInfo: buffer, | ||
codeFencedFenceMeta: buffer, | ||
codeIndented: onentercodeindented, | ||
codeText: onentercodetext, | ||
content: onentercontent, | ||
definition: onenterdefinition, | ||
definitionDestinationString: onenterdefinitiondestinationstring, | ||
definitionLabelString: buffer, | ||
definitionTitleString: buffer, | ||
emphasis: onenteremphasis, | ||
htmlFlow: onenterhtmlflow, | ||
htmlText: onenterhtml, | ||
image: onenterimage, | ||
label: buffer, | ||
link: onenterlink, | ||
listItemMarker: onenterlistitemmarker, | ||
listItemValue: onenterlistitemvalue, | ||
listOrdered: onenterlistordered, | ||
listUnordered: onenterlistunordered, | ||
paragraph: onenterparagraph, | ||
reference: buffer, | ||
resource: onenterresource, | ||
resourceDestinationString: onenterresourcedestinationstring, | ||
resourceTitleString: buffer, | ||
setextHeading: onentersetextheading, | ||
strong: onenterstrong | ||
}, | ||
exit: { | ||
atxHeading: onexitatxheading, | ||
atxHeadingSequence: onexitatxheadingsequence, | ||
autolinkEmail: onexitautolinkemail, | ||
autolinkProtocol: onexitautolinkprotocol, | ||
blockQuote: onexitblockquote, | ||
characterEscapeValue: onexitdata, | ||
characterReferenceMarkerHexadecimal: onexitcharacterreferencemarker, | ||
characterReferenceMarkerNumeric: onexitcharacterreferencemarker, | ||
characterReferenceValue: onexitcharacterreferencevalue, | ||
codeFenced: onexitflowcode, | ||
codeFencedFence: onexitcodefencedfence, | ||
codeFencedFenceInfo: onexitcodefencedfenceinfo, | ||
codeFencedFenceMeta: resume, | ||
codeFlowValue: onexitcodeflowvalue, | ||
codeIndented: onexitflowcode, | ||
codeText: onexitcodetext, | ||
codeTextData: onexitdata, | ||
data: onexitdata, | ||
definition: onexitdefinition, | ||
definitionDestinationString: onexitdefinitiondestinationstring, | ||
definitionLabelString: onexitdefinitionlabelstring, | ||
definitionTitleString: onexitdefinitiontitlestring, | ||
emphasis: onexitemphasis, | ||
hardBreakEscape: onexithardbreak, | ||
hardBreakTrailing: onexithardbreak, | ||
htmlFlow: onexithtml, | ||
htmlFlowData: onexitdata, | ||
htmlText: onexithtml, | ||
htmlTextData: onexitdata, | ||
image: onexitmedia, | ||
label: onexitlabel, | ||
labelText: onexitlabeltext, | ||
lineEnding: onexitlineending, | ||
link: onexitmedia, | ||
listOrdered: onexitlistordered, | ||
listUnordered: onexitlistunordered, | ||
paragraph: onexitparagraph, | ||
reference: resume, | ||
referenceString: onexitreferencestring, | ||
resource: resume, | ||
resourceDestinationString: onexitresourcedestinationstring, | ||
resourceTitleString: onexitresourcetitlestring, | ||
setextHeading: onexitsetextheading, | ||
setextHeadingLineSequence: onexitsetextheadinglinesequence, | ||
setextHeadingText: onexitsetextheadingtext, | ||
strong: onexitstrong, | ||
thematicBreak: onexitthematicbreak | ||
} | ||
} | ||
// Combine the HTML extensions with the default handlers. | ||
// An HTML extension is an object whose fields are either `enter` or `exit` | ||
// (reflecting whether a token is entered or exited). | ||
// The values at such objects are names of tokens mapping to handlers. | ||
// Handlers are called, respectively when a token is opener or closed, with | ||
// that token, and a context as `this`. | ||
var handlers = combineExtensions( | ||
[ | ||
{ | ||
enter: { | ||
blockQuote: onenterblockquote, | ||
codeFenced: onentercodefenced, | ||
codeFencedFenceInfo: buffer, | ||
codeFencedFenceMeta: buffer, | ||
codeIndented: onentercodeindented, | ||
codeText: onentercodetext, | ||
content: onentercontent, | ||
definition: onenterdefinition, | ||
definitionDestinationString: onenterdefinitiondestinationstring, | ||
definitionLabelString: buffer, | ||
definitionTitleString: buffer, | ||
emphasis: onenteremphasis, | ||
htmlFlow: onenterhtmlflow, | ||
htmlText: onenterhtml, | ||
image: onenterimage, | ||
label: buffer, | ||
link: onentermedia, | ||
listItemMarker: onenterlistitemmarker, | ||
listItemValue: onenterlistitemvalue, | ||
listOrdered: onenterlistordered, | ||
listUnordered: onenterlistunordered, | ||
paragraph: onenterparagraph, | ||
reference: onenterreference, | ||
resource: onenterresource, | ||
resourceDestinationString: onenterresourcedestinationstring, | ||
resourceTitleString: buffer, | ||
setextHeading: onentersetextheading, | ||
strong: onenterstrong | ||
}, | ||
exit: { | ||
atxHeading: onexitatxheading, | ||
atxHeadingSequence: onexitatxheadingsequence, | ||
autolinkEmail: onexitautolinkemail, | ||
autolinkProtocol: onexitautolinkprotocol, | ||
blockQuote: onexitblockquote, | ||
characterEscapeValue: onexitcharacterescapevalue, | ||
characterReferenceMarkerHexadecimal: onexitcharacterreferencemarker, | ||
characterReferenceMarkerNumeric: onexitcharacterreferencemarker, | ||
characterReferenceValue: onexitcharacterreferencevalue, | ||
codeFenced: onexitflowcode, | ||
codeFencedFence: onexitcodefencedfence, | ||
codeFencedFenceInfo: onexitcodefencedfenceinfo, | ||
codeFencedFenceMeta: resume, | ||
codeFlowValue: onexitcodeflowvalue, | ||
codeIndented: onexitflowcode, | ||
codeText: onexitcodetext, | ||
codeTextData: onexitdata, | ||
data: onexitdata, | ||
definition: onexitdefinition, | ||
definitionDestinationString: onexitdefinitiondestinationstring, | ||
definitionLabelString: onexitdefinitionlabelstring, | ||
definitionTitleString: onexitdefinitiontitlestring, | ||
emphasis: onexitemphasis, | ||
hardBreakEscape: onexithardbreak, | ||
hardBreakTrailing: onexithardbreak, | ||
htmlFlow: onexithtml, | ||
htmlFlowData: onexitdata, | ||
htmlText: onexithtml, | ||
htmlTextData: onexitdata, | ||
image: onexitmedia, | ||
label: onexitlabel, | ||
labelText: onexitlabeltext, | ||
lineEnding: onexitlineending, | ||
link: onexitmedia, | ||
listOrdered: onexitlistordered, | ||
listUnordered: onexitlistunordered, | ||
paragraph: onexitparagraph, | ||
reference: onexitreference, | ||
referenceString: onexitreferencestring, | ||
resource: resume, | ||
resourceDestinationString: onexitresourcedestinationstring, | ||
resourceTitleString: onexitresourcetitlestring, | ||
setextHeading: onexitsetextheading, | ||
setextHeadingLineSequence: onexitsetextheadinglinesequence, | ||
setextHeadingText: onexitsetextheadingtext, | ||
strong: onexitstrong, | ||
thematicBreak: onexitthematicbreak | ||
} | ||
} | ||
].concat(settings.htmlExtensions || []) | ||
[defaultHandlers].concat(miniflat(settings.htmlExtensions)) | ||
) | ||
// Handlers do often need to keep track of some state. | ||
// That state is provided here as a key-value store (an object). | ||
var data = {tightStack: tightStack} | ||
// The context for handlers references a couple of useful functions. | ||
// In handlers from extensions, those can be accessed at `this`. | ||
// For the handlers here, they can be accessed directly. | ||
var context = { | ||
@@ -133,51 +193,65 @@ lineEndingIfNeeded: lineEndingIfNeeded, | ||
// Generally, micromark copies line endings (`'\r'`, `'\n'`, `'\r\n'`) in the | ||
// markdown document over to the compiled HTML. | ||
// In some cases, such as `> a`, CommonMark requires that extra line endings | ||
// are added: `<blockquote>\n<p>a</p>\n</blockquote>`. | ||
// This variable hold the default line ending when given (or `undefined`), | ||
// and in the latter case will be updated to the first found line ending if | ||
// there is one. | ||
var lineEndingStyle = settings.defaultLineEnding | ||
var media | ||
// Return the function that handles a slice of events. | ||
return compile | ||
// Deal w/ a slice of events. | ||
// Return either the empty string if there’s nothing of note to return, or the | ||
// result when done. | ||
function compile(slice) { | ||
events = events.concat(slice) | ||
return slice[slice.length - 1] === null ? done() : '' | ||
} | ||
function done() { | ||
var length = events.length - 1 | ||
var index = -1 | ||
var start = 0 | ||
var listStack = [] | ||
var length | ||
var index | ||
var start | ||
var listStack | ||
var handler | ||
var result | ||
var event | ||
chunkedSplice(events, events.length, 0, slice) | ||
if (events[events.length - 1] !== null) { | ||
return '' | ||
} | ||
length = events.length - 1 | ||
index = -1 | ||
start = 0 | ||
listStack = [] | ||
while (++index < length) { | ||
event = events[index] | ||
// Figure out the line ending style used in the document. | ||
if ( | ||
!lineEndingStyle && | ||
(event[1].type === 'lineEnding' || event[1].type === 'lineEndingBlank') | ||
(events[index][1].type === 'lineEnding' || | ||
events[index][1].type === 'lineEndingBlank') | ||
) { | ||
lineEndingStyle = event[2].sliceSerialize(event[1]) | ||
lineEndingStyle = events[index][2].sliceSerialize(events[index][1]) | ||
} | ||
// We preprocess lists to clean up a couple of line endings, and to infer | ||
// whether the list is loose or not. | ||
// Preprocess lists to infer whether the list is loose or not. | ||
if ( | ||
event[1].type === 'listOrdered' || | ||
event[1].type === 'listUnordered' | ||
events[index][1].type === 'listOrdered' || | ||
events[index][1].type === 'listUnordered' | ||
) { | ||
if (event[0] === 'enter') { | ||
if (events[index][0] === 'enter') { | ||
listStack.push(index) | ||
} else { | ||
prepareList(events.slice(listStack.pop(index), index)) | ||
prepareList(events.slice(listStack.pop(), index)) | ||
} | ||
} | ||
// We detect definitions here, and move them to the front. | ||
if (event[1].type === 'definition') { | ||
if (event[0] === 'enter') { | ||
body = body.concat(events.slice(start, index)) | ||
// Move definitions to the front. | ||
if (events[index][1].type === 'definition') { | ||
if (events[index][0] === 'enter') { | ||
chunkedSplice(body, body.length, 0, events.slice(start, index)) | ||
start = index | ||
} else { | ||
head = head.concat(events.slice(start, index + 1)) | ||
chunkedSplice(head, head.length, 0, events.slice(start, index + 1)) | ||
start = index + 1 | ||
@@ -188,6 +262,8 @@ } | ||
result = head.concat(body, events.slice(start, length)) | ||
chunkedSplice(head, head.length, 0, body) | ||
chunkedSplice(head, head.length, 0, events.slice(start, length)) | ||
result = head | ||
index = -1 | ||
// Handle the start of the document, if defined. | ||
if (handlers.enter.null) { | ||
@@ -197,2 +273,3 @@ handlers.enter.null.call(context) | ||
// Handle all events. | ||
while (++index < length) { | ||
@@ -209,2 +286,3 @@ handler = handlers[result[index][0]] | ||
// Handle the end of the document, if defined. | ||
if (handlers.exit.null) { | ||
@@ -217,2 +295,3 @@ handlers.exit.null.call(context) | ||
// Figure out whether lists are loose or not. | ||
function prepareList(events) { | ||
@@ -229,8 +308,4 @@ var length = events.length - 1 // Skip close. | ||
if ( | ||
event[1].type === 'listUnordered' || | ||
event[1].type === 'listOrdered' || | ||
event[1].type === 'blockQuote' | ||
) { | ||
atMarker = false | ||
if (event[1]._container) { | ||
atMarker = undefined | ||
@@ -251,3 +326,3 @@ if (event[0] === 'enter') { | ||
if (atMarker) { | ||
atMarker = false | ||
atMarker = undefined | ||
} else { | ||
@@ -258,3 +333,3 @@ loose = true | ||
} else { | ||
atMarker = false | ||
atMarker = undefined | ||
} | ||
@@ -266,2 +341,3 @@ } | ||
// Set data into the key-value store. | ||
function setData(key, value) { | ||
@@ -271,2 +347,3 @@ data[key] = value | ||
// Get data from the key-value store. | ||
function getData(key) { | ||
@@ -276,2 +353,3 @@ return data[key] | ||
// Capture some of the output data. | ||
function buffer() { | ||
@@ -281,2 +359,3 @@ buffers.push([]) | ||
// Stop capturing and access the output data. | ||
function resume() { | ||
@@ -286,2 +365,3 @@ return buffers.pop().join('') | ||
// Output (parts of) HTML tags. | ||
function tag(value) { | ||
@@ -293,2 +373,3 @@ if (!tags) return | ||
// Output raw data. | ||
function raw(value) { | ||
@@ -299,2 +380,3 @@ setData('lastWasTag') | ||
// Output an extra line ending. | ||
function lineEnding() { | ||
@@ -304,2 +386,3 @@ raw(lineEndingStyle || '\n') | ||
// Output an extra line ending if the previous value wasn’t EOF/EOL. | ||
function lineEndingIfNeeded() { | ||
@@ -317,2 +400,3 @@ var buffer = buffers[buffers.length - 1] | ||
// Make a value safe for injection in HTML (except w/ `ignoreEncode`). | ||
function encode(value) { | ||
@@ -327,2 +411,8 @@ return getData('ignoreEncode') | ||
// Make a value safe for injection as a URL. | ||
// This does encode unsafe characters with percent-encoding, skipping already | ||
// encoded sequences (`normalizeUri`). | ||
// Further unsafe characters are encoded as character references (`encode`). | ||
// Finally, if the URL includes an unknown protocol (such as a dangerous | ||
// example, `javascript:`), the value is ignored. | ||
function url(url, protocol) { | ||
@@ -391,5 +481,5 @@ var value = encode(normalizeUri(url || '')) | ||
tag('<li>') | ||
setData('expectFirstItem') | ||
// “Hack” to prevent a line ending from showing up if the item is empty. | ||
setData('lastWasTag') | ||
setData('expectFirstItem') | ||
} | ||
@@ -474,3 +564,3 @@ | ||
function onexitflowcode() { | ||
// Send an extra line feed, if we saw data, and the code didn’t end in one. | ||
// Send an extra line feed, if we saw data. | ||
if (getData('flowCodeSeenData')) lineEndingIfNeeded() | ||
@@ -483,36 +573,26 @@ tag('</code></pre>') | ||
function onenterimage(token) { | ||
onentermedia(token) | ||
tags = undefined | ||
function onenterimage() { | ||
mediaStack.push({image: true}) | ||
tags = undefined // Disallow tags. | ||
} | ||
function onentermedia(token) { | ||
media = {type: token.type, label: ''} | ||
mediaStack.push(media) | ||
function onenterlink() { | ||
mediaStack.push({}) | ||
} | ||
function onexitlabeltext(token) { | ||
media.labelId = normalizeIdentifier(this.sliceSerialize(token)) | ||
mediaStack[mediaStack.length - 1].labelId = this.sliceSerialize(token) | ||
} | ||
function onexitlabel() { | ||
media.label = resume() | ||
mediaStack[mediaStack.length - 1].label = resume() | ||
} | ||
function onenterreference() { | ||
buffer() | ||
media.reference = '' | ||
} | ||
function onexitreferencestring(token) { | ||
media.referenceId = normalizeIdentifier(this.sliceSerialize(token)) | ||
mediaStack[mediaStack.length - 1].referenceId = this.sliceSerialize(token) | ||
} | ||
function onexitreference() { | ||
media.reference = resume() | ||
} | ||
function onenterresource() { | ||
buffer() // We can have line endings in the resource, ignore them. | ||
media.destination = '' | ||
mediaStack[mediaStack.length - 1].destination = '' | ||
} | ||
@@ -522,2 +602,4 @@ | ||
buffer() | ||
// Ignore encoding the result, as we’ll first percent encode the url and | ||
// encode manually after. | ||
setData('ignoreEncode', true) | ||
@@ -527,3 +609,3 @@ } | ||
function onexitresourcedestinationstring() { | ||
media.destination = resume() | ||
mediaStack[mediaStack.length - 1].destination = resume() | ||
setData('ignoreEncode') | ||
@@ -533,3 +615,3 @@ } | ||
function onexitresourcetitlestring() { | ||
media.title = resume() | ||
mediaStack[mediaStack.length - 1].title = resume() | ||
} | ||
@@ -539,8 +621,6 @@ | ||
var index = mediaStack.length - 1 | ||
var context | ||
var title | ||
context = | ||
var media = mediaStack[index] | ||
var context = | ||
media.destination === undefined | ||
? definitions[media.referenceId || media.labelId] | ||
? definitions[normalizeIdentifier(media.referenceId || media.labelId)] | ||
: media | ||
@@ -551,3 +631,3 @@ | ||
while (index--) { | ||
if (mediaStack[index].type === 'image') { | ||
if (mediaStack[index].image) { | ||
tags = undefined | ||
@@ -558,5 +638,3 @@ break | ||
title = context.title | ||
if (media.type === 'image') { | ||
if (media.image) { | ||
tag('<img src="' + url(context.destination, protocolSrc) + '" alt="') | ||
@@ -569,5 +647,5 @@ raw(media.label) | ||
tag(title ? ' title="' + title + '"' : '') | ||
tag(context.title ? ' title="' + context.title + '"' : '') | ||
if (media.type === 'image') { | ||
if (media.image) { | ||
tag(' />') | ||
@@ -581,3 +659,2 @@ } else { | ||
mediaStack.pop() | ||
media = mediaStack[mediaStack.length - 1] | ||
} | ||
@@ -587,4 +664,3 @@ | ||
buffer() | ||
media = {} | ||
mediaStack.push(media) | ||
mediaStack.push({}) | ||
} | ||
@@ -595,3 +671,3 @@ | ||
resume() | ||
media.labelId = normalizeIdentifier(this.sliceSerialize(token)) | ||
mediaStack[mediaStack.length - 1].labelId = this.sliceSerialize(token) | ||
} | ||
@@ -605,3 +681,3 @@ | ||
function onexitdefinitiondestinationstring() { | ||
media.destination = resume() | ||
mediaStack[mediaStack.length - 1].destination = resume() | ||
setData('ignoreEncode') | ||
@@ -611,7 +687,7 @@ } | ||
function onexitdefinitiontitlestring() { | ||
media.title = resume() | ||
mediaStack[mediaStack.length - 1].title = resume() | ||
} | ||
function onexitdefinition() { | ||
var id = media.labelId | ||
var id = normalizeIdentifier(mediaStack[mediaStack.length - 1].labelId) | ||
@@ -621,7 +697,6 @@ resume() | ||
if (!own.call(definitions, id)) { | ||
definitions[id] = media | ||
definitions[id] = mediaStack[mediaStack.length - 1] | ||
} | ||
mediaStack.pop() | ||
media = mediaStack[mediaStack.length - 1] | ||
} | ||
@@ -749,6 +824,2 @@ | ||
function onexitcharacterescapevalue(token) { | ||
raw(encode(this.sliceSerialize(token))) | ||
} | ||
function onexitcharacterreferencemarker(token) { | ||
@@ -755,0 +826,0 @@ setData('characterReferenceType', token.type) |
// This module is compiled away! | ||
// | ||
// Parsing markdown comes with a couple of constants, such as minimum or maximum | ||
// sizes of certain sequences. | ||
// Additionally, there are a couple symbols used inside micromark. | ||
// These are all defined here, but compiled away by scripts. | ||
exports.asciiAlphaCaseDifference = 32 // The shift between lower- and uppercase is `0x20`. | ||
@@ -35,1 +40,2 @@ exports.attentionSideBefore = 1 // Symbol to mark an attention sequence as before content: `*a` | ||
exports.thematicBreakMarkerCountMin = 3 // At least 3 asterisks, dashes, or underscores are needed. | ||
exports.v8MaxSafeChunkSize = 10000 // V8 (and potentially others) have problems injecting giant arrays into other arrays, hence we operate in chunks. |
// This module is compiled away! | ||
// | ||
// Here is the list of all types of tokens exposed by micromark, with a short | ||
// explanation of what they include and where they are found. | ||
// In picking names, generally, the rule is to be as explicit as possible | ||
// instead of reusing names. | ||
// For example, there is a `definitionDestination` and a `resourceDestination`, | ||
// instead of one shared name. | ||
@@ -3,0 +10,0 @@ // Generic type for data, such as in a title, a destination, etc. |
// This module is generated by `script/`. | ||
// | ||
// CommonMark handles attention (emphasis, strong) markers based on what comes | ||
// before or after them. | ||
// One such difference is if those characters are Unicode punctuation. | ||
// This script is generated from the Unicode data. | ||
module.exports = /[!-\/:-@\[-`\{-~\xA1\xA7\xAB\xB6\xB7\xBB\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C77\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4F\u2E52\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]/ |
@@ -21,21 +21,14 @@ var attention = require('./tokenize/attention') | ||
var whitespace = require('./tokenize/whitespace') | ||
var resolveText = require('./initialize/text').resolver | ||
var document = {} | ||
var contentInitial = {} | ||
var flowInitial = {} | ||
var flow = {} | ||
var string = {} | ||
var text = {} | ||
var insideSpan = {} | ||
var document = (exports.document = {}) | ||
var contentInitial = (exports.contentInitial = {}) | ||
var flowInitial = (exports.flowInitial = {}) | ||
var flow = (exports.flow = {}) | ||
var string = (exports.string = {}) | ||
var text = (exports.text = {}) | ||
var insideSpan = (exports.insideSpan = {}) | ||
exports.document = document | ||
exports.contentInitial = contentInitial | ||
exports.flowInitial = flowInitial | ||
exports.flow = flow | ||
exports.string = string | ||
exports.text = text | ||
exports.insideSpan = insideSpan | ||
insideSpan[null] = [attention, resolveText] | ||
insideSpan[null] = attention | ||
document[42] = list | ||
@@ -42,0 +35,0 @@ document[43] = list |
@@ -35,5 +35,6 @@ exports.tokenize = initializeContent | ||
function lineStart(code) { | ||
var token = effects.enter('chunkText') | ||
token.contentType = 'text' | ||
token.previous = previous | ||
var token = effects.enter('chunkText', { | ||
contentType: 'text', | ||
previous: previous | ||
}) | ||
@@ -40,0 +41,0 @@ if (previous) { |
@@ -120,4 +120,3 @@ exports.tokenize = initializeDocument | ||
childFlow.write(null) | ||
childFlow = undefined | ||
childToken = undefined | ||
childToken = childFlow = undefined | ||
} | ||
@@ -211,4 +210,3 @@ | ||
inspectResult.continued = continued | ||
self.containerState = undefined | ||
self.interrupt = undefined | ||
self.interrupt = self.containerState = undefined | ||
return ok(code) | ||
@@ -215,0 +213,0 @@ } |
@@ -80,8 +80,6 @@ exports.tokenize = initializeFlow | ||
function startContent(code) { | ||
var token | ||
effects.enter('content') | ||
token = effects.enter('chunkContent') | ||
token.contentType = 'content' | ||
previous = token | ||
previous = effects.enter('chunkContent', { | ||
contentType: 'content' | ||
}) | ||
@@ -120,7 +118,8 @@ return data(code) | ||
effects.exit('chunkContent')._break = true | ||
token = effects.enter('chunkContent') | ||
token.contentType = 'content' | ||
token.previous = previous | ||
previous.next = token | ||
previous = token | ||
token = effects.enter('chunkContent', { | ||
contentType: 'content', | ||
previous: previous | ||
}) | ||
previous = previous.next = token | ||
return data | ||
@@ -127,0 +126,0 @@ } |
exports.text = initializeFactory('text') | ||
exports.string = initializeFactory('string') | ||
exports.resolver = {resolveAll: resolveAllText} | ||
@@ -7,3 +8,3 @@ var own = require('../constant/has-own-property') | ||
function initializeFactory(field) { | ||
return {tokenize: initializeText} | ||
return {tokenize: initializeText, resolveAll: resolveAllText} | ||
@@ -52,1 +53,29 @@ function initializeText(effects) { | ||
} | ||
function resolveAllText(events) { | ||
var index = -1 | ||
var dataEnter | ||
var event | ||
while (++index <= events.length) { | ||
event = events[index] | ||
if (dataEnter === undefined) { | ||
if (event && event[0] === 'enter' && event[1].type === 'data') { | ||
dataEnter = index | ||
index++ | ||
} | ||
} else if (!event || event[1].type !== 'data') { | ||
// Don’t do anything if there is one data token. | ||
if (index !== dataEnter + 2) { | ||
events[dataEnter][1].end = events[index - 1][1].end | ||
events.splice(dataEnter + 2, index - dataEnter - 2) | ||
index = dataEnter + 2 | ||
} | ||
dataEnter = undefined | ||
} | ||
} | ||
return events | ||
} |
@@ -8,14 +8,12 @@ module.exports = createParser | ||
var constructs = require('./constructs') | ||
var createTokenizer = require('./util/create-tokenizer') | ||
var combineExtensions = require('./util/combine-extensions') | ||
var miniflat = require('./util/miniflat') | ||
function createParser(options) { | ||
var settings = options || {} | ||
var parser | ||
parser = { | ||
var parser = { | ||
defined: [], | ||
constructs: combineExtensions( | ||
[constructs].concat(settings.extensions || []) | ||
[constructs].concat(miniflat(settings.extensions)) | ||
), | ||
@@ -22,0 +20,0 @@ |
@@ -19,4 +19,3 @@ module.exports = stream | ||
emitter.readable = true | ||
emitter.writable = true | ||
emitter.writable = emitter.readable = true | ||
emitter.write = write | ||
@@ -23,0 +22,0 @@ emitter.end = end |
@@ -5,5 +5,6 @@ exports.tokenize = tokenizeAttention | ||
var shallow = require('../util/shallow') | ||
var chunkedSplice = require('../util/chunked-splice') | ||
var classifyCharacter = require('../util/classify-character') | ||
var movePoint = require('../util/move-point') | ||
var resolveAll = require('../util/resolve-all') | ||
var movePoint = require('../util/move-point') | ||
@@ -15,3 +16,2 @@ // Internal type for markers that could turn into emphasis or strong sequences. | ||
function resolveAllAttention(events, context) { | ||
var length = events.length | ||
var index = -1 | ||
@@ -23,9 +23,9 @@ var attention | ||
var indexOpen | ||
var eventsUpTo | ||
var use | ||
var openingSequence | ||
var closingSequence | ||
var nextEvents | ||
// Walk through all events. | ||
while (++index < length) { | ||
while (++index < events.length) { | ||
closing = events[index][1] | ||
@@ -106,21 +106,25 @@ | ||
eventsUpTo = [].concat( | ||
// Before. | ||
events.slice(0, indexOpen - 1), | ||
// If there are more markers in the opening, add them before. | ||
opening.end.offset - opening.start.offset | ||
? [ | ||
['enter', opening, context], | ||
['exit', opening, context] | ||
] | ||
: [], | ||
// Opening. | ||
[ | ||
['enter', attention, context], | ||
['enter', openingSequence, context], | ||
['exit', openingSequence, context], | ||
['enter', text, context] | ||
], | ||
nextEvents = [] | ||
// Between. | ||
// If there are more markers in the opening, add them before. | ||
if (opening.end.offset - opening.start.offset) { | ||
chunkedSplice(nextEvents, nextEvents.length, 0, [ | ||
['enter', opening, context], | ||
['exit', opening, context] | ||
]) | ||
} | ||
// Opening. | ||
chunkedSplice(nextEvents, nextEvents.length, 0, [ | ||
['enter', attention, context], | ||
['enter', openingSequence, context], | ||
['exit', openingSequence, context], | ||
['enter', text, context] | ||
]) | ||
// Between. | ||
chunkedSplice( | ||
nextEvents, | ||
nextEvents.length, | ||
0, | ||
resolveAll( | ||
@@ -130,27 +134,33 @@ context.parser.constructs.insideSpan.null, | ||
context | ||
), | ||
// Closing. | ||
[ | ||
['exit', text, context], | ||
['enter', closingSequence, context], | ||
['exit', closingSequence, context], | ||
['exit', attention, context] | ||
] | ||
) | ||
) | ||
// After. | ||
events = eventsUpTo.concat( | ||
// If there are more markers in the closing, add them after. | ||
closing.end.offset - closing.start.offset | ||
? [ | ||
['enter', closing, context], | ||
['exit', closing, context] | ||
] | ||
: [], | ||
events.slice(index + 2) | ||
// Closing. | ||
chunkedSplice(nextEvents, nextEvents.length, 0, [ | ||
['exit', text, context], | ||
['enter', closingSequence, context], | ||
['exit', closingSequence, context], | ||
['exit', attention, context] | ||
]) | ||
// If there are more markers in the closing, add them after. | ||
if (closing.end.offset - closing.start.offset) { | ||
chunkedSplice(nextEvents, nextEvents.length, 0, [ | ||
['enter', closing, context], | ||
['exit', closing, context] | ||
]) | ||
} | ||
chunkedSplice( | ||
events, | ||
indexOpen - 1, | ||
index - indexOpen + 3, | ||
nextEvents | ||
) | ||
length = events.length | ||
index = eventsUpTo.length - 1 | ||
index = | ||
indexOpen + | ||
nextEvents.length - | ||
(closing.end.offset - closing.start.offset ? 4 : 2) | ||
break | ||
@@ -196,3 +206,2 @@ } | ||
function more(code) { | ||
var token | ||
var after | ||
@@ -208,3 +217,2 @@ var open | ||
token = effects.exit(attentionSequence) | ||
after = classifyCharacter(code) | ||
@@ -220,4 +228,3 @@ open = !after || (before && after === 2) | ||
token._open = open | ||
token._close = close | ||
effects.exit(attentionSequence, {_open: open, _close: close}) | ||
@@ -224,0 +231,0 @@ return ok(code) |
@@ -9,3 +9,2 @@ exports.tokenize = tokenizeAutolink | ||
function tokenizeAutolink(effects, ok, nok) { | ||
var token | ||
var size | ||
@@ -25,3 +24,3 @@ | ||
effects.exit('autolinkMarker') | ||
token = effects.enter('autolinkProtocol') | ||
effects.enter('autolinkProtocol') | ||
return open | ||
@@ -106,4 +105,4 @@ } | ||
if (code === 62) { | ||
token.type = 'autolinkEmail' | ||
effects.exit('autolinkEmail') | ||
// Exit, then change the type. | ||
effects.exit('autolinkProtocol').type = 'autolinkEmail' | ||
return end(code) | ||
@@ -110,0 +109,0 @@ } |
@@ -21,3 +21,3 @@ exports.tokenize = tokenizeBlockQuoteStart | ||
if (!self.containerState.started) { | ||
effects.enter('blockQuote')._container = true | ||
effects.enter('blockQuote', {_container: true}) | ||
self.containerState.started = true | ||
@@ -24,0 +24,0 @@ } |
@@ -8,2 +8,3 @@ exports.tokenize = tokenizeCodeFenced | ||
var chunkedSplice = require('../util/chunked-splice') | ||
var prefixSize = require('../util/prefix-size') | ||
@@ -29,7 +30,7 @@ var createSpaceTokenizer = require('./partial-space') | ||
if (fences < 2 && events[length - 3][1].type === 'lineEnding') { | ||
return [].concat( | ||
events.slice(0, -3), | ||
[events[length - 1]], | ||
events.slice(-3, -1) | ||
) | ||
chunkedSplice(events, length - 3, 3, [ | ||
events[length - 1], | ||
events[length - 3], | ||
events[length - 2] | ||
]) | ||
} | ||
@@ -42,4 +43,4 @@ | ||
var self = this | ||
var closingFence = {tokenize: tokenizeClosingFence, partial: true} | ||
var initialPrefix = prefixSize(this.events) | ||
var closingFence = {tokenize: tokenizeClosingFence, partial: true} | ||
var sizeOpen = 0 | ||
@@ -84,3 +85,3 @@ var marker | ||
effects.enter('codeFencedFenceInfo') | ||
effects.enter('chunkString').contentType = 'string' | ||
effects.enter('chunkString', {contentType: 'string'}) | ||
return info(code) | ||
@@ -113,3 +114,3 @@ } | ||
effects.enter('codeFencedFenceMeta') | ||
effects.enter('chunkString').contentType = 'string' | ||
effects.enter('chunkString', {contentType: 'string'}) | ||
return meta(code) | ||
@@ -116,0 +117,0 @@ } |
@@ -7,2 +7,3 @@ exports.tokenize = tokenizeCodeIndented | ||
var chunkedSplice = require('../util/chunked-splice') | ||
var prefixSize = require('../util/prefix-size') | ||
@@ -43,8 +44,6 @@ var createSpaceTokenizer = require('./partial-space') | ||
return [].concat( | ||
[['enter', code, context]], | ||
events.slice(0, index + 1), | ||
[['exit', code, context]], | ||
events.slice(index + 1) | ||
) | ||
chunkedSplice(events, 0, 0, [['enter', code, context]]) | ||
chunkedSplice(events, index + 2, 0, [['exit', code, context]]) | ||
return events | ||
} | ||
@@ -92,3 +91,3 @@ | ||
return effects.check(continuedIndent, continued, end) | ||
return effects.check(continuedIndent, continued, end)(code) | ||
} | ||
@@ -95,0 +94,0 @@ |
@@ -96,3 +96,3 @@ exports.tokenize = tokenizeDefinition | ||
effects.enter('chunkString').contentType = 'string' | ||
effects.enter('chunkString', {contentType: 'string'}) | ||
return label(code) | ||
@@ -165,3 +165,3 @@ } | ||
effects.enter('definitionDestinationString') | ||
effects.enter('chunkString').contentType = 'string' | ||
effects.enter('chunkString', {contentType: 'string'}) | ||
effects.consume(code) | ||
@@ -186,3 +186,3 @@ return code === 92 ? destinationRawEscape : destinationRaw | ||
effects.enter('definitionDestinationString') | ||
effects.enter('chunkString').contentType = 'string' | ||
effects.enter('chunkString', {contentType: 'string'}) | ||
effects.consume(code) | ||
@@ -324,3 +324,3 @@ return code === 92 ? destinationEnclosedEscape : destinationEnclosed | ||
effects.enter('chunkString').contentType = 'string' | ||
effects.enter('chunkString', {contentType: 'string'}) | ||
effects.consume(code) | ||
@@ -327,0 +327,0 @@ return code === 92 ? escape : data |
@@ -8,2 +8,3 @@ exports.tokenize = tokenizeAtxHeading | ||
var chunkedSplice = require('../util/chunked-splice') | ||
var createSpaceTokenizer = require('./partial-space') | ||
@@ -16,3 +17,2 @@ | ||
var text | ||
var result | ||
@@ -41,4 +41,2 @@ // Prefix whitespace, part of the opening. | ||
result = events.slice(0, contentStart) | ||
if (contentEnd > contentStart) { | ||
@@ -58,11 +56,11 @@ content = { | ||
result = result.concat( | ||
[['enter', content, context]], | ||
[['enter', text, context]], | ||
[['exit', text, context]], | ||
[['exit', content, context]] | ||
) | ||
chunkedSplice(events, contentStart, contentEnd - contentStart + 1, [ | ||
['enter', content, context], | ||
['enter', text, context], | ||
['exit', text, context], | ||
['exit', content, context] | ||
]) | ||
} | ||
return result.concat(events.slice(contentEnd + 1)) | ||
return events | ||
} | ||
@@ -69,0 +67,0 @@ |
@@ -150,5 +150,2 @@ exports.tokenize = tokenizeHtml | ||
function tagName(code) { | ||
var raw | ||
var basic | ||
if ( | ||
@@ -160,6 +157,3 @@ code === null || | ||
) { | ||
raw = raws.indexOf(buffer) > -1 | ||
basic = basics.indexOf(buffer) > -1 | ||
if (raw && startTag && code !== 47) { | ||
if (raws.indexOf(buffer) > -1 && startTag && code !== 47) { | ||
kind = 1 | ||
@@ -169,3 +163,3 @@ return self.interrupt ? ok(code) : continuation(code) | ||
if (basic) { | ||
if (basics.indexOf(buffer) > -1) { | ||
kind = 6 | ||
@@ -380,3 +374,3 @@ | ||
continuationAtLineEnding | ||
) | ||
)(code) | ||
} | ||
@@ -383,0 +377,0 @@ |
@@ -11,4 +11,5 @@ exports.tokenize = tokenizeLabelEnd | ||
var normalizeIdentifier = require('../util/normalize-identifier') | ||
var chunkedSplice = require('../util/chunked-splice') | ||
var resolveAll = require('../util/resolve-all') | ||
var shallow = require('../util/shallow') | ||
var resolveAll = require('../util/resolve-all') | ||
var spaceOrLineEnding = require('./partial-space-or-line-ending') | ||
@@ -22,7 +23,6 @@ var createSpaceTokenizer = require('./partial-space') | ||
function resolveAllLabelEnd(events) { | ||
var length = events.length | ||
var index = -1 | ||
var token | ||
while (++index < length) { | ||
while (++index < events.length) { | ||
token = events[index][1] | ||
@@ -32,12 +32,10 @@ | ||
events[index][0] === 'enter' && | ||
((!token._used && | ||
(token.type === 'labelImage' || | ||
token.type === 'labelLink' || | ||
token.type === 'labelEnd')) || | ||
(token.type === 'data' && token._wasLabel)) | ||
!token._used && | ||
(token.type === 'labelImage' || | ||
token.type === 'labelLink' || | ||
token.type === 'labelEnd') | ||
) { | ||
// Remove the marker. | ||
events.splice(index + 1, token.type === 'labelImage' ? 4 : 2) | ||
token.type = 'data' | ||
// Remove the marker. | ||
events.splice(index + 1, 2) | ||
length -= 2 | ||
} | ||
@@ -59,2 +57,3 @@ } | ||
var offset | ||
var nextEvents | ||
@@ -68,4 +67,11 @@ // Find an opening. | ||
if (openIndex) { | ||
// Mark other link openings as data, as we can’t have links in links. | ||
// Mark other link openings as inactive, as we can’t have links in | ||
// links. | ||
if (type === 'link' && token.type === 'labelLink') { | ||
// Already marked as inactive by a previous call, we can stop | ||
// iterating. | ||
if (token._inactive) { | ||
break | ||
} | ||
token._inactive = true | ||
@@ -75,19 +81,16 @@ } | ||
// Find where the link or image starts. | ||
else { | ||
if ( | ||
(token.type === 'labelImage' || token.type === 'labelLink') && | ||
!token._inactive && | ||
!token._used | ||
) { | ||
openIndex = index | ||
type = token.type === 'labelLink' ? 'link' : 'image' | ||
offset = token.type === 'labelLink' ? 0 : 2 | ||
} | ||
else if ( | ||
(token.type === 'labelImage' || token.type === 'labelLink') && | ||
!token._balanced && | ||
!token._inactive && | ||
!token._used | ||
) { | ||
openIndex = index | ||
type = token.type === 'labelLink' ? 'link' : 'image' | ||
offset = token.type === 'labelLink' ? 0 : 2 | ||
} | ||
} | ||
// Exit. | ||
else { | ||
if (!closeIndex && !token._used && token.type === 'labelEnd') { | ||
closeIndex = index | ||
} | ||
else if (!closeIndex && !token._used && token.type === 'labelEnd') { | ||
closeIndex = index | ||
} | ||
@@ -114,12 +117,23 @@ } | ||
return [].concat( | ||
events.slice(0, openIndex), | ||
[ | ||
['enter', group, context], | ||
['enter', label, context] | ||
], | ||
nextEvents = [ | ||
['enter', group, context], | ||
['enter', label, context] | ||
] | ||
// Opening marker. | ||
events.slice(openIndex + 1, openIndex + offset + 3), | ||
[['enter', text, context]], | ||
// Opening marker. | ||
chunkedSplice( | ||
nextEvents, | ||
nextEvents.length, | ||
0, | ||
events.slice(openIndex + 1, openIndex + offset + 3) | ||
) | ||
// Text open. | ||
chunkedSplice(nextEvents, nextEvents.length, 0, [['enter', text, context]]) | ||
// Between. | ||
chunkedSplice( | ||
nextEvents, | ||
nextEvents.length, | ||
0, | ||
resolveAll( | ||
@@ -129,11 +143,22 @@ context.parser.constructs.insideSpan.null, | ||
context | ||
), | ||
) | ||
) | ||
[['exit', text, context]], | ||
// Closing marker. | ||
events.slice(closeIndex - 2, closeIndex), | ||
[['exit', label, context]], | ||
events.slice(closeIndex + 1), | ||
[['exit', group, context]] | ||
) | ||
// Text close, marker close, label close. | ||
chunkedSplice(nextEvents, nextEvents.length, 0, [ | ||
['exit', text, context], | ||
events[closeIndex - 2], | ||
events[closeIndex - 1], | ||
['exit', label, context] | ||
]) | ||
// Reference, resource, or so. | ||
chunkedSplice(nextEvents, nextEvents.length, 0, events.slice(closeIndex + 1)) | ||
// Media close. | ||
chunkedSplice(nextEvents, nextEvents.length, 0, [['exit', group, context]]) | ||
chunkedSplice(events, openIndex, events.length, nextEvents) | ||
return events | ||
} | ||
@@ -150,3 +175,2 @@ | ||
var index = self.events.length | ||
var labelEnd | ||
@@ -157,2 +181,3 @@ // Find an opening. | ||
!self.events[index][1]._used && | ||
!self.events[index][1]._balanced && | ||
(self.events[index][1].type === 'labelImage' || | ||
@@ -176,3 +201,7 @@ self.events[index][1].type === 'labelLink') | ||
labelEnd = effects.enter('labelEnd') | ||
labelIdentifier = normalizeIdentifier( | ||
self.sliceSerialize({start: labelStart.end, end: self.now()}) | ||
) | ||
effects.enter('labelEnd') | ||
effects.enter('labelMarker') | ||
@@ -182,6 +211,2 @@ effects.consume(code) | ||
effects.exit('labelEnd') | ||
labelIdentifier = normalizeIdentifier( | ||
self.sliceSerialize({start: labelStart.end, end: labelEnd.start}) | ||
) | ||
return afterLabelEnd | ||
@@ -218,4 +243,3 @@ } | ||
function balancedButNok(code) { | ||
labelStart.type = 'data' | ||
labelStart._wasLabel = true | ||
labelStart._balanced = true | ||
return nok(code) | ||
@@ -259,3 +283,3 @@ } | ||
effects.enter('resourceDestinationString') | ||
effects.enter('chunkString').contentType = 'string' | ||
effects.enter('chunkString', {contentType: 'string'}) | ||
return destinationRaw(code) | ||
@@ -270,3 +294,3 @@ } | ||
effects.enter('resourceDestinationString') | ||
effects.enter('chunkString').contentType = 'string' | ||
effects.enter('chunkString', {contentType: 'string'}) | ||
return destinationEnclosed(code) | ||
@@ -405,3 +429,3 @@ } | ||
effects.enter('chunkString').contentType = 'string' | ||
effects.enter('chunkString', {contentType: 'string'}) | ||
return title(code) | ||
@@ -466,3 +490,3 @@ } | ||
effects.enter('referenceString') | ||
effects.enter('chunkString').contentType = 'string' | ||
effects.enter('chunkString', {contentType: 'string'}) | ||
return atStart | ||
@@ -469,0 +493,0 @@ } |
@@ -55,7 +55,6 @@ exports.tokenize = tokenizeListStart | ||
self.containerState.type = 'listUnordered' | ||
effects.enter(self.containerState.type)._container = true | ||
effects.enter(self.containerState.type, {_container: true}) | ||
} | ||
token = effects.enter('listItemPrefix') | ||
token._size = 0 | ||
token = effects.enter('listItemPrefix', {_size: 0}) | ||
return atMarker(code) | ||
@@ -71,9 +70,8 @@ } | ||
self.containerState.type = 'listOrdered' | ||
effects.enter(self.containerState.type)._container = true | ||
effects.enter(self.containerState.type, {_container: true}) | ||
} | ||
token = effects.enter('listItemPrefix') | ||
token = effects.enter('listItemPrefix', {_size: 1}) | ||
effects.enter('listItemValue') | ||
effects.consume(code) | ||
token._size = 1 | ||
return self.interrupt ? afterValue : inside | ||
@@ -129,3 +127,3 @@ } | ||
if (markdownSpace(code)) { | ||
effects.enter('listItemPrefixWhitespace')._size = 1 | ||
effects.enter('listItemPrefixWhitespace', {_size: 1}) | ||
effects.consume(code) | ||
@@ -171,9 +169,7 @@ effects.exit('listItemPrefixWhitespace') | ||
if (self.containerState.furtherBlankLines || !markdownSpace(code)) { | ||
self.containerState.initialBlankLine = undefined | ||
self.containerState.furtherBlankLines = undefined | ||
self.containerState.furtherBlankLines = self.containerState.initialBlankLine = undefined | ||
return effects.attempt(nextItem, onItem, nok)(code) | ||
} | ||
self.containerState.initialBlankLine = undefined | ||
self.containerState.furtherBlankLines = undefined | ||
self.containerState.furtherBlankLines = self.containerState.initialBlankLine = undefined | ||
return effects.attempt( | ||
@@ -180,0 +176,0 @@ indent, |
@@ -21,4 +21,3 @@ exports.tokenize = tokenizeSpaceOrLineEnding | ||
if (markdownSpace(code)) { | ||
token = effects.enter('whitespace') | ||
token._size = 0 | ||
token = effects.enter('whitespace', {_size: 0}) | ||
return whitespace(code) | ||
@@ -25,0 +24,0 @@ } |
@@ -17,4 +17,3 @@ module.exports = createSpaceTokenizer | ||
if (markdownSpace(code)) { | ||
token = effects.enter(type) | ||
token._size = 0 | ||
token = effects.enter(type, {_size: 0}) | ||
return prefix(code) | ||
@@ -28,4 +27,4 @@ } | ||
if (token._size < limit && markdownSpace(code)) { | ||
effects.consume(code) | ||
token._size++ | ||
effects.consume(code) | ||
return prefix | ||
@@ -32,0 +31,0 @@ } |
@@ -7,2 +7,3 @@ exports.tokenize = tokenizeWhitespace | ||
var chunkedSplice = require('../util/chunked-splice') | ||
var shallow = require('../util/shallow') | ||
@@ -30,8 +31,4 @@ var createSpaceTokenizer = require('./partial-space') | ||
result = [].concat( | ||
[['enter', token, context]], | ||
events.slice(0, 2), | ||
[['exit', token, context]], | ||
events.slice(2) | ||
) | ||
chunkedSplice(events, 0, 0, [['enter', token, context]]) | ||
chunkedSplice(events, 3, 0, [['exit', token, context]]) | ||
} else { | ||
@@ -52,4 +49,3 @@ head.type = 'lineSuffix' | ||
if (markdownSpace(code)) { | ||
token = effects.enter('whitespace') | ||
token._size = 0 | ||
token = effects.enter('whitespace', {_size: 0}) | ||
return whitespace(code) | ||
@@ -93,6 +89,5 @@ } | ||
// Mark as normal data. | ||
token.type = 'data' | ||
effects.exit('data') | ||
effects.exit('whitespace').type = 'data' | ||
return ok(code) | ||
} | ||
} |
@@ -7,2 +7,6 @@ module.exports = classifyCharacter | ||
// Classify whether a character is unicode whitespace, unicode punctuation, or | ||
// anything else. | ||
// Used for attention (emphasis, strong), whose sequences can open or close | ||
// based on the class of surrounding characters. | ||
function classifyCharacter(code) { | ||
@@ -9,0 +13,0 @@ if ( |
module.exports = combineExtensions | ||
var own = require('../constant/has-own-property') | ||
var miniflat = require('./miniflat') | ||
var chunkedSplice = require('./chunked-splice') | ||
// Combine several syntax extensions into one. | ||
function combineExtensions(extensions) { | ||
@@ -22,3 +25,2 @@ var all = {} | ||
var code | ||
var constructs | ||
@@ -30,25 +32,21 @@ for (hook in extension) { | ||
for (code in right) { | ||
constructs = mergeConstructs( | ||
[].concat(right[code] || []), | ||
left[code] = constructs( | ||
miniflat(right[code]), | ||
own.call(left, code) ? left[code] : [] | ||
) | ||
left[code] = constructs.length === 1 ? constructs[0] : constructs | ||
} | ||
} | ||
} | ||
function mergeConstructs(constructs, between) { | ||
var length = constructs.length | ||
var index = -1 | ||
var before = [] | ||
var after = [] | ||
var list | ||
function constructs(list, existing) { | ||
var length = list.length | ||
var index = -1 | ||
var before = [] | ||
while (++index < length) { | ||
list = constructs[index].add === 'after' ? after : before | ||
list.push(constructs[index]) | ||
} | ||
while (++index < length) { | ||
;(list[index].add === 'after' ? existing : before).push(list[index]) | ||
} | ||
return before.concat(between, after) | ||
} | ||
chunkedSplice(existing, 0, 0, before) | ||
return existing | ||
} |
@@ -5,2 +5,3 @@ module.exports = combineExtensions | ||
// Combine several HTML extensions into one. | ||
function combineExtensions(extensions) { | ||
@@ -7,0 +8,0 @@ var handlers = {} |
@@ -6,2 +6,3 @@ module.exports = createTokenizer | ||
var markdownLineEnding = require('../character/markdown-line-ending') | ||
var chunkedSplice = require('./chunked-splice') | ||
var shallow = require('./shallow') | ||
@@ -11,6 +12,14 @@ var serializeChunks = require('./serialize-chunks') | ||
var resolveAll = require('./resolve-all') | ||
var miniflat = require('./miniflat') | ||
// Create a tokenizer. | ||
// Tokenizers deal with one type of data (e.g., containers, flow, text). | ||
// The parser is the object dealing with it all. | ||
// `initialize` works like other constructs, except that only its `tokenize` | ||
// function is used, in which case it doesn’t receive an `ok` or `nok`. | ||
// `from` can be given to set the point before the first character, although | ||
// when further lines are indented, they must be set with `defineSkip`. | ||
function createTokenizer(parser, initialize, from) { | ||
var point = from ? assign({}, from) : {line: 1, column: 1, offset: 0} | ||
var columnStart = [] | ||
var columnStart = {} | ||
var resolveAllConstructs = [] | ||
@@ -36,3 +45,3 @@ var chunks = [] | ||
// State and tools for resolving, serializing. | ||
// State and tools for resolving and serializing. | ||
var context = { | ||
@@ -49,6 +58,13 @@ previous: null, | ||
// The state function. | ||
var state = initialize.tokenize.call(context, effects) | ||
if (initialize.resolveAll) { | ||
resolveAllConstructs.push(initialize) | ||
} | ||
// Track which character we expect to be consumed, to catch bugs. | ||
var expectedCode | ||
// Store where we are in the input stream. | ||
point._index = index | ||
@@ -59,4 +75,4 @@ point._bufferIndex = bufferIndex | ||
function write(value) { | ||
chunks.push(value) | ||
function write(chunk) { | ||
chunks.push(chunk) | ||
@@ -66,3 +82,3 @@ main() | ||
// Exit if we’re not done, resolve might change stuff. | ||
if (value !== null) { | ||
if (chunk !== null) { | ||
return [] | ||
@@ -77,3 +93,3 @@ } | ||
// Add EOF token. | ||
context.events.push(value) | ||
context.events.push(chunk) | ||
return context.events | ||
@@ -87,3 +103,3 @@ } | ||
function sliceSerialize(token) { | ||
return serializeChunks(sliceChunks(chunks, token)) | ||
return serializeChunks(sliceStream(token)) | ||
} | ||
@@ -101,8 +117,3 @@ | ||
columnStart[value.line] = value.column | ||
// If we’re currently at that point: | ||
if (point.column === 1 && point.line === value.line) { | ||
point.column = value.column | ||
point.offset += value.column - 1 | ||
} | ||
accountForPotentialSkip() | ||
} | ||
@@ -115,2 +126,6 @@ | ||
// Main loop (note that `index` and `bufferIndex` are modified by `consume`). | ||
// Here is where we walk through the chunks, which either include strings of | ||
// several characters, or numerical character codes. | ||
// The reason to do this in a loop instead of a call is so the stack can | ||
// drain. | ||
function main() { | ||
@@ -123,2 +138,3 @@ var chunk | ||
// If we’re in a buffer chunk, loop through it. | ||
if (typeof chunk === 'string') { | ||
@@ -139,4 +155,4 @@ chunkIndex = index | ||
expectedCode = chunk | ||
consumed = undefined | ||
state = state(expectedCode) | ||
@@ -153,12 +169,8 @@ } | ||
point.line++ | ||
if (point.line in columnStart) { | ||
point.column = columnStart[point.line] | ||
point.offset += columnStart[point.line] - 1 | ||
} else { | ||
point.column = 1 | ||
} | ||
} | ||
// Anything else. | ||
else if (code !== -1) { | ||
point.column = 1 | ||
point.offset += code === -3 ? 2 : 1 | ||
accountForPotentialSkip() | ||
} else if (code !== -1) { | ||
point.column++ | ||
point.offset++ | ||
} | ||
@@ -179,9 +191,6 @@ | ||
if (code !== -1) { | ||
point.offset += code === -3 ? 2 : 1 | ||
} | ||
point._bufferIndex = bufferIndex | ||
point._index = index | ||
// Expose the previous character. | ||
context.previous = code | ||
@@ -191,4 +200,6 @@ } | ||
// Start a token. | ||
function enter(type) { | ||
var token = {type: type, start: now()} | ||
function enter(type, fields) { | ||
var token = fields || {} | ||
token.type = type | ||
token.start = now() | ||
@@ -203,6 +214,8 @@ context.events.push(['enter', token, context]) | ||
// Stop a token. | ||
function exit(type) { | ||
function exit(type, fields) { | ||
var token = stack.pop() | ||
token.end = now() | ||
if (fields) assign(token, fields) | ||
context.events.push(['exit', token, context]) | ||
@@ -223,5 +236,8 @@ | ||
function constructFactory(onreturn, props) { | ||
// Factory to attempt/check/interrupt. | ||
function constructFactory(onreturn, fields) { | ||
return hook | ||
// Handle either an object mapping codes to constructs, a list of | ||
// constructs, or a single construct. | ||
function hook(constructs, returnState, bogusState) { | ||
@@ -239,8 +255,5 @@ var listOfConstructs | ||
function handleMapOfConstructs(code) { | ||
var list = | ||
code === null | ||
? [] | ||
: [].concat(constructs[code] || [], constructs.null || []) | ||
var list = miniflat(constructs[code]).concat(miniflat(constructs.null)) | ||
if (list.length) { | ||
if (code !== null && list.length) { | ||
return handleListOfConstructs(list)(code) | ||
@@ -266,3 +279,3 @@ } | ||
function start(code) { | ||
info = store(construct) | ||
info = store() | ||
@@ -274,3 +287,3 @@ if (!construct.partial) { | ||
return construct.tokenize.call( | ||
assign({}, context, props), | ||
assign({}, context, fields), | ||
effects, | ||
@@ -309,5 +322,8 @@ ok, | ||
if (construct.resolve) { | ||
context.events = context.events | ||
.slice(0, from) | ||
.concat(construct.resolve(context.events.slice(from), context)) | ||
chunkedSplice( | ||
context.events, | ||
from, | ||
context.events.length - from, | ||
construct.resolve(context.events.slice(from), context) | ||
) | ||
} | ||
@@ -335,10 +351,2 @@ | ||
index = startIndex | ||
// Sometimes, we reset to directly after a line ending. | ||
// Make sure to indent. | ||
if (point.line in columnStart && point.column === 1) { | ||
point.column = columnStart[point.line] | ||
point.offset += columnStart[point.line] - 1 | ||
} | ||
bufferIndex = startBufferIndex | ||
@@ -349,4 +357,12 @@ context.previous = startPrevious | ||
stack = startStack | ||
accountForPotentialSkip() | ||
} | ||
} | ||
function accountForPotentialSkip() { | ||
if (point.line in columnStart && point.column === 1) { | ||
point.column = columnStart[point.line] | ||
point.offset += columnStart[point.line] - 1 | ||
} | ||
} | ||
} |
module.exports = flatMap | ||
var chunkedSplice = require('./chunked-splice') | ||
// Note: `a` could be given here, which is then passed to the map function. | ||
@@ -11,3 +13,3 @@ // It functions as a rest/spread, but smaller. | ||
while (++index < length) { | ||
result = result.concat(map(array[index], a)) | ||
chunkedSplice(result, result.length, 0, map(array[index], a)) | ||
} | ||
@@ -14,0 +16,0 @@ |
module.exports = lowercase | ||
// Lowercase a character code. | ||
function lowercase(code) { | ||
return code > 64 && code < 91 ? code + 32 : code | ||
} |
module.exports = resolveAll | ||
var miniflat = require('./miniflat') | ||
function resolveAll(oneOrMoreConstructs, events, context) { | ||
var constructs = [].concat(oneOrMoreConstructs) | ||
var constructs = miniflat(oneOrMoreConstructs) | ||
var called = [] | ||
@@ -6,0 +8,0 @@ var length = constructs.length |
module.exports = subtokenize | ||
var splice = require('../constant/splice') | ||
var chunkedSplice = require('./chunked-splice') | ||
@@ -17,2 +17,3 @@ var flatMap = require('./flat-map') | ||
var otherEvent | ||
var slice | ||
@@ -34,3 +35,3 @@ while (++index < events.length) { | ||
if (subevents) { | ||
splice.apply(events, [index, 2].concat(subevents)) | ||
chunkedSplice(events, index, 2, subevents) | ||
index += subevents.length - 1 | ||
@@ -105,9 +106,5 @@ event[1]._subevents = undefined | ||
// Switch container exit w/ line endings. | ||
splice.apply( | ||
events, | ||
[lineIndex, index - lineIndex + 1, event].concat( | ||
events.slice(lineIndex, index) | ||
) | ||
) | ||
slice = events.slice(lineIndex, index) | ||
slice.unshift(event) | ||
chunkedSplice(events, lineIndex, index - lineIndex + 1, slice) | ||
index = lineIndex | ||
@@ -121,3 +118,3 @@ } | ||
function unravelLinkedTokens(token, context) { | ||
function unravelLinkedTokenImplementation(token, context) { | ||
var hasGfmTaskListHack = token._gfmTasklistFirstContentOfListItem | ||
@@ -140,3 +137,3 @@ var tokenizer | ||
if (!token.next) { | ||
stream = stream.concat(null) | ||
stream.push(null) | ||
} | ||
@@ -156,9 +153,15 @@ | ||
} | ||
} | ||
if (token.next) { | ||
function unravelLinkedTokens(token, context) { | ||
while (token) { | ||
// Loop over the tokens because a recursive function would cause a stackoverflow | ||
unravelLinkedTokenImplementation(token, context) | ||
if (!token.next) { | ||
// Done! | ||
divideTokens(token) | ||
} | ||
// Unravel the next token. | ||
unravelLinkedTokens(token.next, context) | ||
} else { | ||
// Done! | ||
divideTokens(token) | ||
token = token.next | ||
} | ||
@@ -165,0 +168,0 @@ } |
@@ -1,7 +0,3 @@ | ||
module.exports = asciiAlpha | ||
var check = require('../util/regex-check') | ||
var fromCharCode = require('../constant/from-char-code') | ||
function asciiAlpha(code) { | ||
return /[A-Za-z]/.test(fromCharCode(code)) | ||
} | ||
module.exports = check(/[A-Za-z]/) |
@@ -1,7 +0,3 @@ | ||
module.exports = asciiAlphanumeric | ||
var check = require('../util/regex-check') | ||
var fromCharCode = require('../constant/from-char-code') | ||
function asciiAlphanumeric(code) { | ||
return /[\dA-Za-z]/.test(fromCharCode(code)) | ||
} | ||
module.exports = check(/[\dA-Za-z]/) |
@@ -1,8 +0,3 @@ | ||
module.exports = asciiAtext | ||
var check = require('../util/regex-check') | ||
var fromCharCode = require('../constant/from-char-code') | ||
// Also includes dot. | ||
function asciiAtext(code) { | ||
return /[#-'*+\--9=?A-Z^-~]/.test(fromCharCode(code)) | ||
} | ||
module.exports = check(/[#-'*+\--9=?A-Z^-~]/) |
@@ -1,7 +0,3 @@ | ||
module.exports = asciiDigit | ||
var check = require('../util/regex-check') | ||
var fromCharCode = require('../constant/from-char-code') | ||
function asciiDigit(code) { | ||
return /\d/.test(fromCharCode(code)) | ||
} | ||
module.exports = check(/\d/) |
@@ -1,7 +0,3 @@ | ||
module.exports = asciiHexDigit | ||
var check = require('../util/regex-check') | ||
var fromCharCode = require('../constant/from-char-code') | ||
function asciiHexDigit(code) { | ||
return /[\dA-Fa-f]/.test(fromCharCode(code)) | ||
} | ||
module.exports = check(/[\dA-Fa-f]/) |
@@ -1,7 +0,3 @@ | ||
module.exports = asciiPunctuation | ||
var check = require('../util/regex-check') | ||
var fromCharCode = require('../constant/from-char-code') | ||
function asciiPunctuation(code) { | ||
return /[!-/:-@[-`{-~]/.test(fromCharCode(code)) | ||
} | ||
module.exports = check(/[!-/:-@[-`{-~]/) |
// This module is compiled away! | ||
// | ||
// micromark works based on character codes. | ||
// This module contains constants for the ASCII block and the replacement | ||
// character. | ||
// A couple of them are handled in a special way, such as the line endings | ||
// (CR, LF, and CR+LF, commonly known as end-of-line: EOLs), the tab (horizontal | ||
// tab) and its expansion based on what column it’s at (virtual space), | ||
// and the end-of-file (eof) character. | ||
// As values are preprocessed before handling them, the actual characters LF, | ||
// CR, HT, and NUL (which is present as the replacement character), are | ||
// guaranteed to not exist. | ||
exports.carriageReturn = -5 | ||
@@ -3,0 +14,0 @@ exports.lineFeed = -4 |
@@ -6,10 +6,3 @@ module.exports = markdownLineEndingOrSpace | ||
function markdownLineEndingOrSpace(code) { | ||
return ( | ||
code === codes.carriageReturn || | ||
code === codes.lineFeed || | ||
code === codes.carriageReturnLineFeed || | ||
code === codes.horizontalTab || | ||
code === codes.virtualSpace || | ||
code === codes.space | ||
) | ||
return code < codes.nul || code === codes.space | ||
} |
@@ -6,7 +6,3 @@ module.exports = markdownLineEnding | ||
function markdownLineEnding(code) { | ||
return ( | ||
code === codes.carriageReturn || | ||
code === codes.lineFeed || | ||
code === codes.carriageReturnLineFeed | ||
) | ||
return code < codes.horizontalTab | ||
} |
@@ -1,10 +0,6 @@ | ||
module.exports = punctuation | ||
var unicodePunctuation = require('../constant/unicode-punctuation-regex') | ||
var fromCharCode = require('../constant/from-char-code') | ||
var check = require('../util/regex-check') | ||
// Size note: removing ASCII from the regex and using `ascii-punctuation` here | ||
// In fact adds to the bundle size. | ||
function punctuation(code) { | ||
return unicodePunctuation.test(fromCharCode(code)) | ||
} | ||
module.exports = check(unicodePunctuation) |
@@ -1,7 +0,3 @@ | ||
module.exports = whitespace | ||
var check = require('../util/regex-check') | ||
var fromCharCode = require('../constant/from-char-code') | ||
function whitespace(code) { | ||
return /\s/.test(fromCharCode(code)) | ||
} | ||
module.exports = check(/\s/) |
// This module is compiled away! | ||
// | ||
// While micromark works based on character codes, this module includes the | ||
// string versions of ’em. | ||
// The C0 block, except for LF, CR, HT, and w/ the replacement character added, | ||
// are available here. | ||
exports.ht = '\t' | ||
@@ -3,0 +8,0 @@ exports.lf = '\n' |
@@ -0,4 +1,15 @@ | ||
// While micromark is a lexer/tokenizer, the common case of going from markdown | ||
// to html is currently built in as this module, even though the parts can be | ||
// used separately to build ASTs, CSTs, or many other output formats. | ||
// | ||
// Having an HTML compiler built in is useful because it allows us to check for | ||
// compliancy to CommonMark, the de facto norm of markdown, specified in roughly | ||
// 600 input/output cases. | ||
// | ||
// This module has an interface which accepts lists of events instead of the | ||
// whole at once, however, because markdown can’t be truly streaming, we buffer | ||
// events before processing and outputting the final result. | ||
module.exports = compileHtml | ||
var assert = require('assert') | ||
var decode = require('parse-entities/decode-entity') | ||
@@ -10,2 +21,3 @@ var codes = require('../character/codes') | ||
var types = require('../constant/types') | ||
var chunkedSplice = require('../util/chunked-splice') | ||
var normalizeUri = require('../util/normalize-uri') | ||
@@ -15,5 +27,14 @@ var normalizeIdentifier = require('../util/normalize-identifier') | ||
var combineExtensions = require('../util/combine-html-extensions') | ||
var miniflat = require('../util/miniflat') | ||
// These two ensure that certain characters which have special meaning in HTML, | ||
// are dealt with. | ||
// Technically, we can skip `>`, and `"` in many cases, but CM includes them. | ||
var characterReferences = {'"': 'quot', '&': 'amp', '<': 'lt', '>': 'gt'} | ||
var characterReferencesExpression = /["&<>]/g | ||
// These two are allowlists of essentially safe protocols for full URLs in | ||
// respectively the `href` (on `<a>`) and `src` (on `<img>`) attributes. | ||
// They are based on what is allowed on GitHub, | ||
// <https://github.com/syntax-tree/hast-util-sanitize/blob/9275b21/lib/github.json#L31> | ||
var protocolHref = /^(https?|ircs?|mailto|xmpp)$/i | ||
@@ -23,99 +44,137 @@ var protocolSrc = /^https?$/i | ||
function compileHtml(options) { | ||
// Configuration. | ||
// Includes `htmlExtensions` (an array of extensions), `defaultLineEnding` (a | ||
// preferred EOL), `allowDangerousProtocol` (whether to allow potential | ||
// dangerous protocols), and `allowDangerousHtml` (whether to allow potential | ||
// dangerous HTML). | ||
var settings = options || {} | ||
// Tags is needed because according to markdown, links and emphasis and | ||
// whatnot can exist in images, however, as HTML doesn’t allow content in | ||
// images, the tags are ignored in the `alt` attribute, but the content | ||
// remains. | ||
var tags = true | ||
// An object to track identifiers to media (URLs and titles) defined with | ||
// definitions. | ||
var definitions = {} | ||
// A lot of the handlers need to capture some of the output data, modify it | ||
// somehow, and then deal with it. | ||
// We do that by tracking a stack of buffers, that can be opened (with | ||
// `buffer`) and closed (with `resume`) to access them. | ||
var buffers = [[]] | ||
// As definitions can come after references, we need to figure out the media | ||
// (urls and titles) defined by them before handling the references. | ||
// So, we do sort of what HTML does: put metadata at the start (in head), and | ||
// then put content after (`body`). | ||
var head = [] | ||
var body = [] | ||
// As events for tokens can come in parts, we need to store them somewhere | ||
// before we handle everything. | ||
// We do that in `events`. | ||
var events = [] | ||
// As we can have links in images and the other way around, where the deepest | ||
// ones are closed first, we need to track which one we’re in. | ||
var mediaStack = [] | ||
// Same for tightness, which is specific to lists. | ||
// We need to track if we’re currently in a tight or loose container. | ||
var tightStack = [] | ||
var defaultHandlers = { | ||
enter: { | ||
blockQuote: onenterblockquote, | ||
codeFenced: onentercodefenced, | ||
codeFencedFenceInfo: buffer, | ||
codeFencedFenceMeta: buffer, | ||
codeIndented: onentercodeindented, | ||
codeText: onentercodetext, | ||
content: onentercontent, | ||
definition: onenterdefinition, | ||
definitionDestinationString: onenterdefinitiondestinationstring, | ||
definitionLabelString: buffer, | ||
definitionTitleString: buffer, | ||
emphasis: onenteremphasis, | ||
htmlFlow: onenterhtmlflow, | ||
htmlText: onenterhtml, | ||
image: onenterimage, | ||
label: buffer, | ||
link: onenterlink, | ||
listItemMarker: onenterlistitemmarker, | ||
listItemValue: onenterlistitemvalue, | ||
listOrdered: onenterlistordered, | ||
listUnordered: onenterlistunordered, | ||
paragraph: onenterparagraph, | ||
reference: buffer, | ||
resource: onenterresource, | ||
resourceDestinationString: onenterresourcedestinationstring, | ||
resourceTitleString: buffer, | ||
setextHeading: onentersetextheading, | ||
strong: onenterstrong | ||
}, | ||
exit: { | ||
atxHeading: onexitatxheading, | ||
atxHeadingSequence: onexitatxheadingsequence, | ||
autolinkEmail: onexitautolinkemail, | ||
autolinkProtocol: onexitautolinkprotocol, | ||
blockQuote: onexitblockquote, | ||
characterEscapeValue: onexitdata, | ||
characterReferenceMarkerHexadecimal: onexitcharacterreferencemarker, | ||
characterReferenceMarkerNumeric: onexitcharacterreferencemarker, | ||
characterReferenceValue: onexitcharacterreferencevalue, | ||
codeFenced: onexitflowcode, | ||
codeFencedFence: onexitcodefencedfence, | ||
codeFencedFenceInfo: onexitcodefencedfenceinfo, | ||
codeFencedFenceMeta: resume, | ||
codeFlowValue: onexitcodeflowvalue, | ||
codeIndented: onexitflowcode, | ||
codeText: onexitcodetext, | ||
codeTextData: onexitdata, | ||
data: onexitdata, | ||
definition: onexitdefinition, | ||
definitionDestinationString: onexitdefinitiondestinationstring, | ||
definitionLabelString: onexitdefinitionlabelstring, | ||
definitionTitleString: onexitdefinitiontitlestring, | ||
emphasis: onexitemphasis, | ||
hardBreakEscape: onexithardbreak, | ||
hardBreakTrailing: onexithardbreak, | ||
htmlFlow: onexithtml, | ||
htmlFlowData: onexitdata, | ||
htmlText: onexithtml, | ||
htmlTextData: onexitdata, | ||
image: onexitmedia, | ||
label: onexitlabel, | ||
labelText: onexitlabeltext, | ||
lineEnding: onexitlineending, | ||
link: onexitmedia, | ||
listOrdered: onexitlistordered, | ||
listUnordered: onexitlistunordered, | ||
paragraph: onexitparagraph, | ||
reference: resume, | ||
referenceString: onexitreferencestring, | ||
resource: resume, | ||
resourceDestinationString: onexitresourcedestinationstring, | ||
resourceTitleString: onexitresourcetitlestring, | ||
setextHeading: onexitsetextheading, | ||
setextHeadingLineSequence: onexitsetextheadinglinesequence, | ||
setextHeadingText: onexitsetextheadingtext, | ||
strong: onexitstrong, | ||
thematicBreak: onexitthematicbreak | ||
} | ||
} | ||
// Combine the HTML extensions with the default handlers. | ||
// An HTML extension is an object whose fields are either `enter` or `exit` | ||
// (reflecting whether a token is entered or exited). | ||
// The values at such objects are names of tokens mapping to handlers. | ||
// Handlers are called, respectively when a token is opener or closed, with | ||
// that token, and a context as `this`. | ||
var handlers = combineExtensions( | ||
[ | ||
{ | ||
enter: { | ||
blockQuote: onenterblockquote, | ||
codeFenced: onentercodefenced, | ||
codeFencedFenceInfo: buffer, | ||
codeFencedFenceMeta: buffer, | ||
codeIndented: onentercodeindented, | ||
codeText: onentercodetext, | ||
content: onentercontent, | ||
definition: onenterdefinition, | ||
definitionDestinationString: onenterdefinitiondestinationstring, | ||
definitionLabelString: buffer, | ||
definitionTitleString: buffer, | ||
emphasis: onenteremphasis, | ||
htmlFlow: onenterhtmlflow, | ||
htmlText: onenterhtml, | ||
image: onenterimage, | ||
label: buffer, | ||
link: onentermedia, | ||
listItemMarker: onenterlistitemmarker, | ||
listItemValue: onenterlistitemvalue, | ||
listOrdered: onenterlistordered, | ||
listUnordered: onenterlistunordered, | ||
paragraph: onenterparagraph, | ||
reference: onenterreference, | ||
resource: onenterresource, | ||
resourceDestinationString: onenterresourcedestinationstring, | ||
resourceTitleString: buffer, | ||
setextHeading: onentersetextheading, | ||
strong: onenterstrong | ||
}, | ||
exit: { | ||
atxHeading: onexitatxheading, | ||
atxHeadingSequence: onexitatxheadingsequence, | ||
autolinkEmail: onexitautolinkemail, | ||
autolinkProtocol: onexitautolinkprotocol, | ||
blockQuote: onexitblockquote, | ||
characterEscapeValue: onexitcharacterescapevalue, | ||
characterReferenceMarkerHexadecimal: onexitcharacterreferencemarker, | ||
characterReferenceMarkerNumeric: onexitcharacterreferencemarker, | ||
characterReferenceValue: onexitcharacterreferencevalue, | ||
codeFenced: onexitflowcode, | ||
codeFencedFence: onexitcodefencedfence, | ||
codeFencedFenceInfo: onexitcodefencedfenceinfo, | ||
codeFencedFenceMeta: resume, | ||
codeFlowValue: onexitcodeflowvalue, | ||
codeIndented: onexitflowcode, | ||
codeText: onexitcodetext, | ||
codeTextData: onexitdata, | ||
data: onexitdata, | ||
definition: onexitdefinition, | ||
definitionDestinationString: onexitdefinitiondestinationstring, | ||
definitionLabelString: onexitdefinitionlabelstring, | ||
definitionTitleString: onexitdefinitiontitlestring, | ||
emphasis: onexitemphasis, | ||
hardBreakEscape: onexithardbreak, | ||
hardBreakTrailing: onexithardbreak, | ||
htmlFlow: onexithtml, | ||
htmlFlowData: onexitdata, | ||
htmlText: onexithtml, | ||
htmlTextData: onexitdata, | ||
image: onexitmedia, | ||
label: onexitlabel, | ||
labelText: onexitlabeltext, | ||
lineEnding: onexitlineending, | ||
link: onexitmedia, | ||
listOrdered: onexitlistordered, | ||
listUnordered: onexitlistunordered, | ||
paragraph: onexitparagraph, | ||
reference: onexitreference, | ||
referenceString: onexitreferencestring, | ||
resource: resume, | ||
resourceDestinationString: onexitresourcedestinationstring, | ||
resourceTitleString: onexitresourcetitlestring, | ||
setextHeading: onexitsetextheading, | ||
setextHeadingLineSequence: onexitsetextheadinglinesequence, | ||
setextHeadingText: onexitsetextheadingtext, | ||
strong: onexitstrong, | ||
thematicBreak: onexitthematicbreak | ||
} | ||
} | ||
].concat(settings.htmlExtensions || []) | ||
[defaultHandlers].concat(miniflat(settings.htmlExtensions)) | ||
) | ||
// Handlers do often need to keep track of some state. | ||
// That state is provided here as a key-value store (an object). | ||
var data = {tightStack: tightStack} | ||
// The context for handlers references a couple of useful functions. | ||
// In handlers from extensions, those can be accessed at `this`. | ||
// For the handlers here, they can be accessed directly. | ||
var context = { | ||
@@ -132,52 +191,66 @@ lineEndingIfNeeded: lineEndingIfNeeded, | ||
} | ||
// Generally, micromark copies line endings (`'\r'`, `'\n'`, `'\r\n'`) in the | ||
// markdown document over to the compiled HTML. | ||
// In some cases, such as `> a`, CommonMark requires that extra line endings | ||
// are added: `<blockquote>\n<p>a</p>\n</blockquote>`. | ||
// This variable hold the default line ending when given (or `undefined`), | ||
// and in the latter case will be updated to the first found line ending if | ||
// there is one. | ||
var lineEndingStyle = settings.defaultLineEnding | ||
var media | ||
// Return the function that handles a slice of events. | ||
return compile | ||
// Deal w/ a slice of events. | ||
// Return either the empty string if there’s nothing of note to return, or the | ||
// result when done. | ||
function compile(slice) { | ||
events = events.concat(slice) | ||
return slice[slice.length - 1] === codes.eof ? done() : '' | ||
} | ||
function done() { | ||
var length = events.length - 1 | ||
var index = -1 | ||
var start = 0 | ||
var listStack = [] | ||
var length | ||
var index | ||
var start | ||
var listStack | ||
var handler | ||
var result | ||
var event | ||
chunkedSplice(events, events.length, 0, slice) | ||
if (events[events.length - 1] !== codes.eof) { | ||
return '' | ||
} | ||
length = events.length - 1 | ||
index = -1 | ||
start = 0 | ||
listStack = [] | ||
while (++index < length) { | ||
event = events[index] | ||
// Figure out the line ending style used in the document. | ||
if ( | ||
!lineEndingStyle && | ||
(event[1].type === types.lineEnding || | ||
event[1].type === types.lineEndingBlank) | ||
(events[index][1].type === types.lineEnding || | ||
events[index][1].type === types.lineEndingBlank) | ||
) { | ||
lineEndingStyle = event[2].sliceSerialize(event[1]) | ||
lineEndingStyle = events[index][2].sliceSerialize(events[index][1]) | ||
} | ||
// We preprocess lists to clean up a couple of line endings, and to infer | ||
// whether the list is loose or not. | ||
// Preprocess lists to infer whether the list is loose or not. | ||
if ( | ||
event[1].type === types.listOrdered || | ||
event[1].type === types.listUnordered | ||
events[index][1].type === types.listOrdered || | ||
events[index][1].type === types.listUnordered | ||
) { | ||
if (event[0] === 'enter') { | ||
if (events[index][0] === 'enter') { | ||
listStack.push(index) | ||
} else { | ||
prepareList(events.slice(listStack.pop(index), index)) | ||
prepareList(events.slice(listStack.pop(), index)) | ||
} | ||
} | ||
// We detect definitions here, and move them to the front. | ||
if (event[1].type === types.definition) { | ||
if (event[0] === 'enter') { | ||
body = body.concat(events.slice(start, index)) | ||
// Move definitions to the front. | ||
if (events[index][1].type === types.definition) { | ||
if (events[index][0] === 'enter') { | ||
chunkedSplice(body, body.length, 0, events.slice(start, index)) | ||
start = index | ||
} else { | ||
head = head.concat(events.slice(start, index + 1)) | ||
chunkedSplice(head, head.length, 0, events.slice(start, index + 1)) | ||
start = index + 1 | ||
@@ -188,6 +261,8 @@ } | ||
result = head.concat(body, events.slice(start, length)) | ||
chunkedSplice(head, head.length, 0, body) | ||
chunkedSplice(head, head.length, 0, events.slice(start, length)) | ||
result = head | ||
index = -1 | ||
// Handle the start of the document, if defined. | ||
if (handlers.enter.null) { | ||
@@ -197,2 +272,3 @@ handlers.enter.null.call(context) | ||
// Handle all events. | ||
while (++index < length) { | ||
@@ -209,2 +285,3 @@ handler = handlers[result[index][0]] | ||
// Handle the end of the document, if defined. | ||
if (handlers.exit.null) { | ||
@@ -217,2 +294,3 @@ handlers.exit.null.call(context) | ||
// Figure out whether lists are loose or not. | ||
function prepareList(events) { | ||
@@ -229,8 +307,4 @@ var length = events.length - 1 // Skip close. | ||
if ( | ||
event[1].type === types.listUnordered || | ||
event[1].type === types.listOrdered || | ||
event[1].type === types.blockQuote | ||
) { | ||
atMarker = false | ||
if (event[1]._container) { | ||
atMarker = undefined | ||
@@ -251,3 +325,3 @@ if (event[0] === 'enter') { | ||
if (atMarker) { | ||
atMarker = false | ||
atMarker = undefined | ||
} else { | ||
@@ -258,3 +332,3 @@ loose = true | ||
} else { | ||
atMarker = false | ||
atMarker = undefined | ||
} | ||
@@ -266,2 +340,3 @@ } | ||
// Set data into the key-value store. | ||
function setData(key, value) { | ||
@@ -271,2 +346,3 @@ data[key] = value | ||
// Get data from the key-value store. | ||
function getData(key) { | ||
@@ -276,2 +352,3 @@ return data[key] | ||
// Capture some of the output data. | ||
function buffer() { | ||
@@ -281,2 +358,3 @@ buffers.push([]) | ||
// Stop capturing and access the output data. | ||
function resume() { | ||
@@ -286,2 +364,3 @@ return buffers.pop().join('') | ||
// Output (parts of) HTML tags. | ||
function tag(value) { | ||
@@ -293,2 +372,3 @@ if (!tags) return | ||
// Output raw data. | ||
function raw(value) { | ||
@@ -299,2 +379,3 @@ setData('lastWasTag') | ||
// Output an extra line ending. | ||
function lineEnding() { | ||
@@ -304,2 +385,3 @@ raw(lineEndingStyle || '\n') | ||
// Output an extra line ending if the previous value wasn’t EOF/EOL. | ||
function lineEndingIfNeeded() { | ||
@@ -321,2 +403,3 @@ var buffer = buffers[buffers.length - 1] | ||
// Make a value safe for injection in HTML (except w/ `ignoreEncode`). | ||
function encode(value) { | ||
@@ -331,2 +414,8 @@ return getData('ignoreEncode') | ||
// Make a value safe for injection as a URL. | ||
// This does encode unsafe characters with percent-encoding, skipping already | ||
// encoded sequences (`normalizeUri`). | ||
// Further unsafe characters are encoded as character references (`encode`). | ||
// Finally, if the URL includes an unknown protocol (such as a dangerous | ||
// example, `javascript:`), the value is ignored. | ||
function url(url, protocol) { | ||
@@ -395,5 +484,5 @@ var value = encode(normalizeUri(url || '')) | ||
tag('<li>') | ||
setData('expectFirstItem') | ||
// “Hack” to prevent a line ending from showing up if the item is empty. | ||
setData('lastWasTag') | ||
setData('expectFirstItem') | ||
} | ||
@@ -478,3 +567,3 @@ | ||
function onexitflowcode() { | ||
// Send an extra line feed, if we saw data, and the code didn’t end in one. | ||
// Send an extra line feed, if we saw data. | ||
if (getData('flowCodeSeenData')) lineEndingIfNeeded() | ||
@@ -487,36 +576,26 @@ tag('</code></pre>') | ||
function onenterimage(token) { | ||
onentermedia(token) | ||
tags = undefined | ||
function onenterimage() { | ||
mediaStack.push({image: true}) | ||
tags = undefined // Disallow tags. | ||
} | ||
function onentermedia(token) { | ||
media = {type: token.type, label: ''} | ||
mediaStack.push(media) | ||
function onenterlink() { | ||
mediaStack.push({}) | ||
} | ||
function onexitlabeltext(token) { | ||
media.labelId = normalizeIdentifier(this.sliceSerialize(token)) | ||
mediaStack[mediaStack.length - 1].labelId = this.sliceSerialize(token) | ||
} | ||
function onexitlabel() { | ||
media.label = resume() | ||
mediaStack[mediaStack.length - 1].label = resume() | ||
} | ||
function onenterreference() { | ||
buffer() | ||
media.reference = '' | ||
} | ||
function onexitreferencestring(token) { | ||
media.referenceId = normalizeIdentifier(this.sliceSerialize(token)) | ||
mediaStack[mediaStack.length - 1].referenceId = this.sliceSerialize(token) | ||
} | ||
function onexitreference() { | ||
media.reference = resume() | ||
} | ||
function onenterresource() { | ||
buffer() // We can have line endings in the resource, ignore them. | ||
media.destination = '' | ||
mediaStack[mediaStack.length - 1].destination = '' | ||
} | ||
@@ -526,2 +605,4 @@ | ||
buffer() | ||
// Ignore encoding the result, as we’ll first percent encode the url and | ||
// encode manually after. | ||
setData('ignoreEncode', true) | ||
@@ -531,3 +612,3 @@ } | ||
function onexitresourcedestinationstring() { | ||
media.destination = resume() | ||
mediaStack[mediaStack.length - 1].destination = resume() | ||
setData('ignoreEncode') | ||
@@ -537,3 +618,3 @@ } | ||
function onexitresourcetitlestring() { | ||
media.title = resume() | ||
mediaStack[mediaStack.length - 1].title = resume() | ||
} | ||
@@ -543,8 +624,6 @@ | ||
var index = mediaStack.length - 1 | ||
var context | ||
var title | ||
context = | ||
var media = mediaStack[index] | ||
var context = | ||
media.destination === undefined | ||
? definitions[media.referenceId || media.labelId] | ||
? definitions[normalizeIdentifier(media.referenceId || media.labelId)] | ||
: media | ||
@@ -555,3 +634,3 @@ | ||
while (index--) { | ||
if (mediaStack[index].type === 'image') { | ||
if (mediaStack[index].image) { | ||
tags = undefined | ||
@@ -562,6 +641,3 @@ break | ||
assert(context, 'expected a context media object to be defined') | ||
title = context.title | ||
if (media.type === 'image') { | ||
if (media.image) { | ||
tag('<img src="' + url(context.destination, protocolSrc) + '" alt="') | ||
@@ -574,5 +650,5 @@ raw(media.label) | ||
tag(title ? ' title="' + title + '"' : '') | ||
tag(context.title ? ' title="' + context.title + '"' : '') | ||
if (media.type === 'image') { | ||
if (media.image) { | ||
tag(' />') | ||
@@ -586,3 +662,2 @@ } else { | ||
mediaStack.pop() | ||
media = mediaStack[mediaStack.length - 1] | ||
} | ||
@@ -592,4 +667,3 @@ | ||
buffer() | ||
media = {} | ||
mediaStack.push(media) | ||
mediaStack.push({}) | ||
} | ||
@@ -600,3 +674,3 @@ | ||
resume() | ||
media.labelId = normalizeIdentifier(this.sliceSerialize(token)) | ||
mediaStack[mediaStack.length - 1].labelId = this.sliceSerialize(token) | ||
} | ||
@@ -610,3 +684,3 @@ | ||
function onexitdefinitiondestinationstring() { | ||
media.destination = resume() | ||
mediaStack[mediaStack.length - 1].destination = resume() | ||
setData('ignoreEncode') | ||
@@ -616,7 +690,7 @@ } | ||
function onexitdefinitiontitlestring() { | ||
media.title = resume() | ||
mediaStack[mediaStack.length - 1].title = resume() | ||
} | ||
function onexitdefinition() { | ||
var id = media.labelId | ||
var id = normalizeIdentifier(mediaStack[mediaStack.length - 1].labelId) | ||
@@ -626,7 +700,6 @@ resume() | ||
if (!own.call(definitions, id)) { | ||
definitions[id] = media | ||
definitions[id] = mediaStack[mediaStack.length - 1] | ||
} | ||
mediaStack.pop() | ||
media = mediaStack[mediaStack.length - 1] | ||
} | ||
@@ -754,6 +827,2 @@ | ||
function onexitcharacterescapevalue(token) { | ||
raw(encode(this.sliceSerialize(token))) | ||
} | ||
function onexitcharacterreferencemarker(token) { | ||
@@ -760,0 +829,0 @@ setData('characterReferenceType', token.type) |
// This module is compiled away! | ||
// | ||
// Parsing markdown comes with a couple of constants, such as minimum or maximum | ||
// sizes of certain sequences. | ||
// Additionally, there are a couple symbols used inside micromark. | ||
// These are all defined here, but compiled away by scripts. | ||
exports.asciiAlphaCaseDifference = 32 // The shift between lower- and uppercase is `0x20`. | ||
@@ -35,1 +40,2 @@ exports.attentionSideBefore = 1 // Symbol to mark an attention sequence as before content: `*a` | ||
exports.thematicBreakMarkerCountMin = 3 // At least 3 asterisks, dashes, or underscores are needed. | ||
exports.v8MaxSafeChunkSize = 10000 // V8 (and potentially others) have problems injecting giant arrays into other arrays, hence we operate in chunks. |
// This module is compiled away! | ||
// | ||
// Here is the list of all types of tokens exposed by micromark, with a short | ||
// explanation of what they include and where they are found. | ||
// In picking names, generally, the rule is to be as explicit as possible | ||
// instead of reusing names. | ||
// For example, there is a `definitionDestination` and a `resourceDestination`, | ||
// instead of one shared name. | ||
@@ -3,0 +10,0 @@ // Generic type for data, such as in a title, a destination, etc. |
// This module is generated by `script/`. | ||
// | ||
// CommonMark handles attention (emphasis, strong) markers based on what comes | ||
// before or after them. | ||
// One such difference is if those characters are Unicode punctuation. | ||
// This script is generated from the Unicode data. | ||
module.exports = /[!-/:-@[-`{-~\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C77\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4F\u2E52\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]/ |
@@ -22,21 +22,14 @@ var codes = require('./character/codes') | ||
var whitespace = require('./tokenize/whitespace') | ||
var resolveText = require('./initialize/text').resolver | ||
var document = {} | ||
var contentInitial = {} | ||
var flowInitial = {} | ||
var flow = {} | ||
var string = {} | ||
var text = {} | ||
var insideSpan = {} | ||
var document = (exports.document = {}) | ||
var contentInitial = (exports.contentInitial = {}) | ||
var flowInitial = (exports.flowInitial = {}) | ||
var flow = (exports.flow = {}) | ||
var string = (exports.string = {}) | ||
var text = (exports.text = {}) | ||
var insideSpan = (exports.insideSpan = {}) | ||
exports.document = document | ||
exports.contentInitial = contentInitial | ||
exports.flowInitial = flowInitial | ||
exports.flow = flow | ||
exports.string = string | ||
exports.text = text | ||
exports.insideSpan = insideSpan | ||
insideSpan[codes.eof] = [attention, resolveText] | ||
insideSpan[codes.eof] = attention | ||
document[codes.asterisk] = list | ||
@@ -43,0 +36,0 @@ document[codes.plusSign] = list |
@@ -43,5 +43,6 @@ exports.tokenize = initializeContent | ||
function lineStart(code) { | ||
var token = effects.enter(types.chunkText) | ||
token.contentType = constants.contentTypeText | ||
token.previous = previous | ||
var token = effects.enter(types.chunkText, { | ||
contentType: constants.contentTypeText, | ||
previous: previous | ||
}) | ||
@@ -48,0 +49,0 @@ if (previous) { |
@@ -124,4 +124,3 @@ exports.tokenize = initializeDocument | ||
childFlow.write(codes.eof) | ||
childFlow = undefined | ||
childToken = undefined | ||
childToken = childFlow = undefined | ||
} | ||
@@ -215,4 +214,3 @@ | ||
inspectResult.continued = continued | ||
self.containerState = undefined | ||
self.interrupt = undefined | ||
self.interrupt = self.containerState = undefined | ||
return ok(code) | ||
@@ -219,0 +217,0 @@ } |
@@ -83,4 +83,2 @@ exports.tokenize = initializeFlow | ||
function startContent(code) { | ||
var token | ||
assert( | ||
@@ -92,5 +90,5 @@ code !== codes.eof && !markdownLineEnding(code), | ||
effects.enter(types.content) | ||
token = effects.enter(types.chunkContent) | ||
token.contentType = constants.contentTypeContent | ||
previous = token | ||
previous = effects.enter(types.chunkContent, { | ||
contentType: constants.contentTypeContent | ||
}) | ||
@@ -130,7 +128,7 @@ return data(code) | ||
effects.exit(types.chunkContent)._break = true | ||
token = effects.enter(types.chunkContent) | ||
token.contentType = constants.contentTypeContent | ||
token.previous = previous | ||
previous.next = token | ||
previous = token | ||
token = effects.enter(types.chunkContent, { | ||
contentType: constants.contentTypeContent, | ||
previous: previous | ||
}) | ||
previous = previous.next = token | ||
return data | ||
@@ -137,0 +135,0 @@ } |
exports.text = initializeFactory('text') | ||
exports.string = initializeFactory('string') | ||
exports.resolver = {resolveAll: resolveAllText} | ||
@@ -9,3 +10,3 @@ var codes = require('../character/codes') | ||
function initializeFactory(field) { | ||
return {tokenize: initializeText} | ||
return {tokenize: initializeText, resolveAll: resolveAllText} | ||
@@ -54,1 +55,29 @@ function initializeText(effects) { | ||
} | ||
function resolveAllText(events) { | ||
var index = -1 | ||
var dataEnter | ||
var event | ||
while (++index <= events.length) { | ||
event = events[index] | ||
if (dataEnter === undefined) { | ||
if (event && event[0] === 'enter' && event[1].type === types.data) { | ||
dataEnter = index | ||
index++ | ||
} | ||
} else if (!event || event[1].type !== types.data) { | ||
// Don’t do anything if there is one data token. | ||
if (index !== dataEnter + 2) { | ||
events[dataEnter][1].end = events[index - 1][1].end | ||
events.splice(dataEnter + 2, index - dataEnter - 2) | ||
index = dataEnter + 2 | ||
} | ||
dataEnter = undefined | ||
} | ||
} | ||
return events | ||
} |
@@ -8,14 +8,12 @@ module.exports = createParser | ||
var constructs = require('./constructs') | ||
var createTokenizer = require('./util/create-tokenizer') | ||
var combineExtensions = require('./util/combine-extensions') | ||
var miniflat = require('./util/miniflat') | ||
function createParser(options) { | ||
var settings = options || {} | ||
var parser | ||
parser = { | ||
var parser = { | ||
defined: [], | ||
constructs: combineExtensions( | ||
[constructs].concat(settings.extensions || []) | ||
[constructs].concat(miniflat(settings.extensions)) | ||
), | ||
@@ -22,0 +20,0 @@ content: create(initializeContent), |
@@ -19,4 +19,3 @@ module.exports = stream | ||
emitter.readable = true | ||
emitter.writable = true | ||
emitter.writable = emitter.readable = true | ||
emitter.write = write | ||
@@ -23,0 +22,0 @@ emitter.end = end |
@@ -8,5 +8,6 @@ exports.tokenize = tokenizeAttention | ||
var shallow = require('../util/shallow') | ||
var chunkedSplice = require('../util/chunked-splice') | ||
var classifyCharacter = require('../util/classify-character') | ||
var movePoint = require('../util/move-point') | ||
var resolveAll = require('../util/resolve-all') | ||
var movePoint = require('../util/move-point') | ||
@@ -18,3 +19,2 @@ // Internal type for markers that could turn into emphasis or strong sequences. | ||
function resolveAllAttention(events, context) { | ||
var length = events.length | ||
var index = -1 | ||
@@ -26,9 +26,9 @@ var attention | ||
var indexOpen | ||
var eventsUpTo | ||
var use | ||
var openingSequence | ||
var closingSequence | ||
var nextEvents | ||
// Walk through all events. | ||
while (++index < length) { | ||
while (++index < events.length) { | ||
closing = events[index][1] | ||
@@ -106,20 +106,25 @@ | ||
eventsUpTo = [].concat( | ||
// Before. | ||
events.slice(0, indexOpen - 1), | ||
// If there are more markers in the opening, add them before. | ||
opening.end.offset - opening.start.offset | ||
? [ | ||
['enter', opening, context], | ||
['exit', opening, context] | ||
] | ||
: [], | ||
// Opening. | ||
[ | ||
['enter', attention, context], | ||
['enter', openingSequence, context], | ||
['exit', openingSequence, context], | ||
['enter', text, context] | ||
], | ||
// Between. | ||
nextEvents = [] | ||
// If there are more markers in the opening, add them before. | ||
if (opening.end.offset - opening.start.offset) { | ||
chunkedSplice(nextEvents, nextEvents.length, 0, [ | ||
['enter', opening, context], | ||
['exit', opening, context] | ||
]) | ||
} | ||
// Opening. | ||
chunkedSplice(nextEvents, nextEvents.length, 0, [ | ||
['enter', attention, context], | ||
['enter', openingSequence, context], | ||
['exit', openingSequence, context], | ||
['enter', text, context] | ||
]) | ||
// Between. | ||
chunkedSplice( | ||
nextEvents, | ||
nextEvents.length, | ||
0, | ||
resolveAll( | ||
@@ -129,26 +134,33 @@ context.parser.constructs.insideSpan.null, | ||
context | ||
), | ||
// Closing. | ||
[ | ||
['exit', text, context], | ||
['enter', closingSequence, context], | ||
['exit', closingSequence, context], | ||
['exit', attention, context] | ||
] | ||
) | ||
) | ||
// After. | ||
events = eventsUpTo.concat( | ||
// If there are more markers in the closing, add them after. | ||
closing.end.offset - closing.start.offset | ||
? [ | ||
['enter', closing, context], | ||
['exit', closing, context] | ||
] | ||
: [], | ||
events.slice(index + 2) | ||
// Closing. | ||
chunkedSplice(nextEvents, nextEvents.length, 0, [ | ||
['exit', text, context], | ||
['enter', closingSequence, context], | ||
['exit', closingSequence, context], | ||
['exit', attention, context] | ||
]) | ||
// If there are more markers in the closing, add them after. | ||
if (closing.end.offset - closing.start.offset) { | ||
chunkedSplice(nextEvents, nextEvents.length, 0, [ | ||
['enter', closing, context], | ||
['exit', closing, context] | ||
]) | ||
} | ||
chunkedSplice( | ||
events, | ||
indexOpen - 1, | ||
index - indexOpen + 3, | ||
nextEvents | ||
) | ||
length = events.length | ||
index = eventsUpTo.length - 1 | ||
index = | ||
indexOpen + | ||
nextEvents.length - | ||
(closing.end.offset - closing.start.offset ? 4 : 2) | ||
break | ||
@@ -194,3 +206,2 @@ } | ||
function more(code) { | ||
var token | ||
var after | ||
@@ -206,3 +217,2 @@ var open | ||
token = effects.exit(attentionSequence) | ||
after = classifyCharacter(code) | ||
@@ -218,4 +228,3 @@ open = !after || (before && after === constants.characterGroupPunctuation) | ||
token._open = open | ||
token._close = close | ||
effects.exit(attentionSequence, {_open: open, _close: close}) | ||
@@ -222,0 +231,0 @@ return ok(code) |
@@ -13,3 +13,2 @@ exports.tokenize = tokenizeAutolink | ||
function tokenizeAutolink(effects, ok, nok) { | ||
var token | ||
var size | ||
@@ -29,3 +28,3 @@ | ||
effects.exit(types.autolinkMarker) | ||
token = effects.enter(types.autolinkProtocol) | ||
effects.enter(types.autolinkProtocol) | ||
return open | ||
@@ -122,4 +121,4 @@ } | ||
if (code === codes.greaterThan) { | ||
token.type = types.autolinkEmail | ||
effects.exit(types.autolinkEmail) | ||
// Exit, then change the type. | ||
effects.exit(types.autolinkProtocol).type = types.autolinkEmail | ||
return end(code) | ||
@@ -126,0 +125,0 @@ } |
@@ -23,3 +23,3 @@ exports.tokenize = tokenizeBlockQuoteStart | ||
if (!self.containerState.started) { | ||
effects.enter(types.blockQuote)._container = true | ||
effects.enter(types.blockQuote, {_container: true}) | ||
self.containerState.started = true | ||
@@ -26,0 +26,0 @@ } |
@@ -11,2 +11,3 @@ exports.tokenize = tokenizeCodeFenced | ||
var types = require('../constant/types') | ||
var chunkedSplice = require('../util/chunked-splice') | ||
var prefixSize = require('../util/prefix-size') | ||
@@ -32,7 +33,7 @@ var createSpaceTokenizer = require('./partial-space') | ||
if (fences < 2 && events[length - 3][1].type === types.lineEnding) { | ||
return [].concat( | ||
events.slice(0, -3), | ||
[events[length - 1]], | ||
events.slice(-3, -1) | ||
) | ||
chunkedSplice(events, length - 3, 3, [ | ||
events[length - 1], | ||
events[length - 3], | ||
events[length - 2] | ||
]) | ||
} | ||
@@ -45,4 +46,4 @@ | ||
var self = this | ||
var closingFence = {tokenize: tokenizeClosingFence, partial: true} | ||
var initialPrefix = prefixSize(this.events) | ||
var closingFence = {tokenize: tokenizeClosingFence, partial: true} | ||
var sizeOpen = 0 | ||
@@ -90,3 +91,3 @@ var marker | ||
effects.enter(types.codeFencedFenceInfo) | ||
effects.enter(types.chunkString).contentType = constants.contentTypeString | ||
effects.enter(types.chunkString, {contentType: constants.contentTypeString}) | ||
return info(code) | ||
@@ -119,3 +120,3 @@ } | ||
effects.enter(types.codeFencedFenceMeta) | ||
effects.enter(types.chunkString).contentType = constants.contentTypeString | ||
effects.enter(types.chunkString, {contentType: constants.contentTypeString}) | ||
return meta(code) | ||
@@ -122,0 +123,0 @@ } |
@@ -10,2 +10,3 @@ exports.tokenize = tokenizeCodeIndented | ||
var types = require('../constant/types') | ||
var chunkedSplice = require('../util/chunked-splice') | ||
var prefixSize = require('../util/prefix-size') | ||
@@ -46,8 +47,6 @@ var createSpaceTokenizer = require('./partial-space') | ||
return [].concat( | ||
[['enter', code, context]], | ||
events.slice(0, index + 1), | ||
[['exit', code, context]], | ||
events.slice(index + 1) | ||
) | ||
chunkedSplice(events, 0, 0, [['enter', code, context]]) | ||
chunkedSplice(events, index + 2, 0, [['exit', code, context]]) | ||
return events | ||
} | ||
@@ -98,3 +97,3 @@ | ||
return effects.check(continuedIndent, continued, end) | ||
return effects.check(continuedIndent, continued, end)(code) | ||
} | ||
@@ -101,0 +100,0 @@ |
@@ -105,3 +105,3 @@ exports.tokenize = tokenizeDefinition | ||
effects.enter(types.chunkString).contentType = constants.contentTypeString | ||
effects.enter(types.chunkString, {contentType: constants.contentTypeString}) | ||
return label(code) | ||
@@ -178,3 +178,3 @@ } | ||
effects.enter(types.definitionDestinationString) | ||
effects.enter(types.chunkString).contentType = constants.contentTypeString | ||
effects.enter(types.chunkString, {contentType: constants.contentTypeString}) | ||
effects.consume(code) | ||
@@ -199,3 +199,3 @@ return code === codes.backslash ? destinationRawEscape : destinationRaw | ||
effects.enter(types.definitionDestinationString) | ||
effects.enter(types.chunkString).contentType = constants.contentTypeString | ||
effects.enter(types.chunkString, {contentType: constants.contentTypeString}) | ||
effects.consume(code) | ||
@@ -353,3 +353,3 @@ return code === codes.backslash | ||
effects.enter(types.chunkString).contentType = constants.contentTypeString | ||
effects.enter(types.chunkString, {contentType: constants.contentTypeString}) | ||
effects.consume(code) | ||
@@ -356,0 +356,0 @@ return code === codes.backslash ? escape : data |
@@ -10,2 +10,3 @@ exports.tokenize = tokenizeAtxHeading | ||
var types = require('../constant/types') | ||
var chunkedSplice = require('../util/chunked-splice') | ||
var createSpaceTokenizer = require('./partial-space') | ||
@@ -18,3 +19,2 @@ | ||
var text | ||
var result | ||
@@ -43,4 +43,2 @@ // Prefix whitespace, part of the opening. | ||
result = events.slice(0, contentStart) | ||
if (contentEnd > contentStart) { | ||
@@ -59,11 +57,11 @@ content = { | ||
result = result.concat( | ||
[['enter', content, context]], | ||
[['enter', text, context]], | ||
[['exit', text, context]], | ||
[['exit', content, context]] | ||
) | ||
chunkedSplice(events, contentStart, contentEnd - contentStart + 1, [ | ||
['enter', content, context], | ||
['enter', text, context], | ||
['exit', text, context], | ||
['exit', content, context] | ||
]) | ||
} | ||
return result.concat(events.slice(contentEnd + 1)) | ||
return events | ||
} | ||
@@ -70,0 +68,0 @@ |
@@ -154,5 +154,2 @@ exports.tokenize = tokenizeHtml | ||
function tagName(code) { | ||
var raw | ||
var basic | ||
if ( | ||
@@ -164,6 +161,3 @@ code === codes.eof || | ||
) { | ||
raw = raws.indexOf(buffer) > -1 | ||
basic = basics.indexOf(buffer) > -1 | ||
if (raw && startTag && code !== codes.slash) { | ||
if (raws.indexOf(buffer) > -1 && startTag && code !== codes.slash) { | ||
kind = constants.htmlRaw | ||
@@ -173,3 +167,3 @@ return self.interrupt ? ok(code) : continuation(code) | ||
if (basic) { | ||
if (basics.indexOf(buffer) > -1) { | ||
kind = constants.htmlBasic | ||
@@ -395,3 +389,3 @@ | ||
continuationAtLineEnding | ||
) | ||
)(code) | ||
} | ||
@@ -398,0 +392,0 @@ |
@@ -13,4 +13,5 @@ exports.tokenize = tokenizeLabelEnd | ||
var normalizeIdentifier = require('../util/normalize-identifier') | ||
var chunkedSplice = require('../util/chunked-splice') | ||
var resolveAll = require('../util/resolve-all') | ||
var shallow = require('../util/shallow') | ||
var resolveAll = require('../util/resolve-all') | ||
var spaceOrLineEnding = require('./partial-space-or-line-ending') | ||
@@ -24,7 +25,6 @@ var createSpaceTokenizer = require('./partial-space') | ||
function resolveAllLabelEnd(events) { | ||
var length = events.length | ||
var index = -1 | ||
var token | ||
while (++index < length) { | ||
while (++index < events.length) { | ||
token = events[index][1] | ||
@@ -34,12 +34,10 @@ | ||
events[index][0] === 'enter' && | ||
((!token._used && | ||
(token.type === types.labelImage || | ||
token.type === types.labelLink || | ||
token.type === types.labelEnd)) || | ||
(token.type === types.data && token._wasLabel)) | ||
!token._used && | ||
(token.type === types.labelImage || | ||
token.type === types.labelLink || | ||
token.type === types.labelEnd) | ||
) { | ||
// Remove the marker. | ||
events.splice(index + 1, token.type === types.labelImage ? 4 : 2) | ||
token.type = types.data | ||
// Remove the marker. | ||
events.splice(index + 1, 2) | ||
length -= 2 | ||
} | ||
@@ -61,2 +59,3 @@ } | ||
var offset | ||
var nextEvents | ||
@@ -70,4 +69,11 @@ // Find an opening. | ||
if (openIndex) { | ||
// Mark other link openings as data, as we can’t have links in links. | ||
// Mark other link openings as inactive, as we can’t have links in | ||
// links. | ||
if (type === types.link && token.type === types.labelLink) { | ||
// Already marked as inactive by a previous call, we can stop | ||
// iterating. | ||
if (token._inactive) { | ||
break | ||
} | ||
token._inactive = true | ||
@@ -77,19 +83,16 @@ } | ||
// Find where the link or image starts. | ||
else { | ||
if ( | ||
(token.type === types.labelImage || token.type === types.labelLink) && | ||
!token._inactive && | ||
!token._used | ||
) { | ||
openIndex = index | ||
type = token.type === types.labelLink ? types.link : types.image | ||
offset = token.type === types.labelLink ? 0 : 2 | ||
} | ||
else if ( | ||
(token.type === types.labelImage || token.type === types.labelLink) && | ||
!token._balanced && | ||
!token._inactive && | ||
!token._used | ||
) { | ||
openIndex = index | ||
type = token.type === types.labelLink ? types.link : types.image | ||
offset = token.type === types.labelLink ? 0 : 2 | ||
} | ||
} | ||
// Exit. | ||
else { | ||
if (!closeIndex && !token._used && token.type === types.labelEnd) { | ||
closeIndex = index | ||
} | ||
else if (!closeIndex && !token._used && token.type === types.labelEnd) { | ||
closeIndex = index | ||
} | ||
@@ -116,11 +119,23 @@ } | ||
return [].concat( | ||
events.slice(0, openIndex), | ||
[ | ||
['enter', group, context], | ||
['enter', label, context] | ||
], | ||
// Opening marker. | ||
events.slice(openIndex + 1, openIndex + offset + 3), | ||
[['enter', text, context]], | ||
nextEvents = [ | ||
['enter', group, context], | ||
['enter', label, context] | ||
] | ||
// Opening marker. | ||
chunkedSplice( | ||
nextEvents, | ||
nextEvents.length, | ||
0, | ||
events.slice(openIndex + 1, openIndex + offset + 3) | ||
) | ||
// Text open. | ||
chunkedSplice(nextEvents, nextEvents.length, 0, [['enter', text, context]]) | ||
// Between. | ||
chunkedSplice( | ||
nextEvents, | ||
nextEvents.length, | ||
0, | ||
resolveAll( | ||
@@ -130,10 +145,22 @@ context.parser.constructs.insideSpan.null, | ||
context | ||
), | ||
[['exit', text, context]], | ||
// Closing marker. | ||
events.slice(closeIndex - 2, closeIndex), | ||
[['exit', label, context]], | ||
events.slice(closeIndex + 1), | ||
[['exit', group, context]] | ||
) | ||
) | ||
// Text close, marker close, label close. | ||
chunkedSplice(nextEvents, nextEvents.length, 0, [ | ||
['exit', text, context], | ||
events[closeIndex - 2], | ||
events[closeIndex - 1], | ||
['exit', label, context] | ||
]) | ||
// Reference, resource, or so. | ||
chunkedSplice(nextEvents, nextEvents.length, 0, events.slice(closeIndex + 1)) | ||
// Media close. | ||
chunkedSplice(nextEvents, nextEvents.length, 0, [['exit', group, context]]) | ||
chunkedSplice(events, openIndex, events.length, nextEvents) | ||
return events | ||
} | ||
@@ -150,3 +177,2 @@ | ||
var index = self.events.length | ||
var labelEnd | ||
@@ -157,2 +183,3 @@ // Find an opening. | ||
!self.events[index][1]._used && | ||
!self.events[index][1]._balanced && | ||
(self.events[index][1].type === types.labelImage || | ||
@@ -176,3 +203,6 @@ self.events[index][1].type === types.labelLink) | ||
labelEnd = effects.enter(types.labelEnd) | ||
labelIdentifier = normalizeIdentifier( | ||
self.sliceSerialize({start: labelStart.end, end: self.now()}) | ||
) | ||
effects.enter(types.labelEnd) | ||
effects.enter(types.labelMarker) | ||
@@ -182,5 +212,2 @@ effects.consume(code) | ||
effects.exit(types.labelEnd) | ||
labelIdentifier = normalizeIdentifier( | ||
self.sliceSerialize({start: labelStart.end, end: labelEnd.start}) | ||
) | ||
return afterLabelEnd | ||
@@ -217,4 +244,3 @@ } | ||
function balancedButNok(code) { | ||
labelStart.type = types.data | ||
labelStart._wasLabel = true | ||
labelStart._balanced = true | ||
return nok(code) | ||
@@ -258,3 +284,3 @@ } | ||
effects.enter(types.resourceDestinationString) | ||
effects.enter(types.chunkString).contentType = constants.contentTypeString | ||
effects.enter(types.chunkString, {contentType: constants.contentTypeString}) | ||
return destinationRaw(code) | ||
@@ -269,3 +295,3 @@ } | ||
effects.enter(types.resourceDestinationString) | ||
effects.enter(types.chunkString).contentType = constants.contentTypeString | ||
effects.enter(types.chunkString, {contentType: constants.contentTypeString}) | ||
return destinationEnclosed(code) | ||
@@ -423,3 +449,3 @@ } | ||
effects.enter(types.chunkString).contentType = constants.contentTypeString | ||
effects.enter(types.chunkString, {contentType: constants.contentTypeString}) | ||
return title(code) | ||
@@ -485,3 +511,3 @@ } | ||
effects.enter(types.referenceString) | ||
effects.enter(types.chunkString).contentType = constants.contentTypeString | ||
effects.enter(types.chunkString, {contentType: constants.contentTypeString}) | ||
return atStart | ||
@@ -488,0 +514,0 @@ } |
@@ -59,7 +59,6 @@ exports.tokenize = tokenizeListStart | ||
self.containerState.type = types.listUnordered | ||
effects.enter(self.containerState.type)._container = true | ||
effects.enter(self.containerState.type, {_container: true}) | ||
} | ||
token = effects.enter(types.listItemPrefix) | ||
token._size = 0 | ||
token = effects.enter(types.listItemPrefix, {_size: 0}) | ||
return atMarker(code) | ||
@@ -79,9 +78,8 @@ } | ||
self.containerState.type = types.listOrdered | ||
effects.enter(self.containerState.type)._container = true | ||
effects.enter(self.containerState.type, {_container: true}) | ||
} | ||
token = effects.enter(types.listItemPrefix) | ||
token = effects.enter(types.listItemPrefix, {_size: 1}) | ||
effects.enter(types.listItemValue) | ||
effects.consume(code) | ||
token._size = 1 | ||
return self.interrupt ? afterValue : inside | ||
@@ -148,3 +146,3 @@ } | ||
if (markdownSpace(code)) { | ||
effects.enter(types.listItemPrefixWhitespace)._size = 1 | ||
effects.enter(types.listItemPrefixWhitespace, {_size: 1}) | ||
effects.consume(code) | ||
@@ -190,9 +188,7 @@ effects.exit(types.listItemPrefixWhitespace) | ||
if (self.containerState.furtherBlankLines || !markdownSpace(code)) { | ||
self.containerState.initialBlankLine = undefined | ||
self.containerState.furtherBlankLines = undefined | ||
self.containerState.furtherBlankLines = self.containerState.initialBlankLine = undefined | ||
return effects.attempt(nextItem, onItem, nok)(code) | ||
} | ||
self.containerState.initialBlankLine = undefined | ||
self.containerState.furtherBlankLines = undefined | ||
self.containerState.furtherBlankLines = self.containerState.initialBlankLine = undefined | ||
return effects.attempt( | ||
@@ -199,0 +195,0 @@ indent, |
@@ -22,4 +22,3 @@ exports.tokenize = tokenizeSpaceOrLineEnding | ||
if (markdownSpace(code)) { | ||
token = effects.enter(types.whitespace) | ||
token._size = 0 | ||
token = effects.enter(types.whitespace, {_size: 0}) | ||
return whitespace(code) | ||
@@ -26,0 +25,0 @@ } |
@@ -17,4 +17,3 @@ module.exports = createSpaceTokenizer | ||
if (markdownSpace(code)) { | ||
token = effects.enter(type) | ||
token._size = 0 | ||
token = effects.enter(type, {_size: 0}) | ||
return prefix(code) | ||
@@ -28,4 +27,4 @@ } | ||
if (token._size < limit && markdownSpace(code)) { | ||
effects.consume(code) | ||
token._size++ | ||
effects.consume(code) | ||
return prefix | ||
@@ -32,0 +31,0 @@ } |
@@ -9,2 +9,3 @@ exports.tokenize = tokenizeWhitespace | ||
var types = require('../constant/types') | ||
var chunkedSplice = require('../util/chunked-splice') | ||
var shallow = require('../util/shallow') | ||
@@ -32,8 +33,4 @@ var createSpaceTokenizer = require('./partial-space') | ||
result = [].concat( | ||
[['enter', token, context]], | ||
events.slice(0, 2), | ||
[['exit', token, context]], | ||
events.slice(2) | ||
) | ||
chunkedSplice(events, 0, 0, [['enter', token, context]]) | ||
chunkedSplice(events, 3, 0, [['exit', token, context]]) | ||
} else { | ||
@@ -54,4 +51,3 @@ head.type = types.lineSuffix | ||
if (markdownSpace(code)) { | ||
token = effects.enter(types.whitespace) | ||
token._size = 0 | ||
token = effects.enter(types.whitespace, {_size: 0}) | ||
return whitespace(code) | ||
@@ -95,6 +91,5 @@ } | ||
// Mark as normal data. | ||
token.type = types.data | ||
effects.exit(types.data) | ||
effects.exit(types.whitespace).type = types.data | ||
return ok(code) | ||
} | ||
} |
@@ -9,2 +9,6 @@ module.exports = classifyCharacter | ||
// Classify whether a character is unicode whitespace, unicode punctuation, or | ||
// anything else. | ||
// Used for attention (emphasis, strong), whose sequences can open or close | ||
// based on the class of surrounding characters. | ||
function classifyCharacter(code) { | ||
@@ -11,0 +15,0 @@ if ( |
module.exports = combineExtensions | ||
var own = require('../constant/has-own-property') | ||
var miniflat = require('./miniflat') | ||
var chunkedSplice = require('./chunked-splice') | ||
// Combine several syntax extensions into one. | ||
function combineExtensions(extensions) { | ||
@@ -22,3 +25,2 @@ var all = {} | ||
var code | ||
var constructs | ||
@@ -30,25 +32,21 @@ for (hook in extension) { | ||
for (code in right) { | ||
constructs = mergeConstructs( | ||
[].concat(right[code] || []), | ||
left[code] = constructs( | ||
miniflat(right[code]), | ||
own.call(left, code) ? left[code] : [] | ||
) | ||
left[code] = constructs.length === 1 ? constructs[0] : constructs | ||
} | ||
} | ||
} | ||
function mergeConstructs(constructs, between) { | ||
var length = constructs.length | ||
var index = -1 | ||
var before = [] | ||
var after = [] | ||
var list | ||
function constructs(list, existing) { | ||
var length = list.length | ||
var index = -1 | ||
var before = [] | ||
while (++index < length) { | ||
list = constructs[index].add === 'after' ? after : before | ||
list.push(constructs[index]) | ||
} | ||
while (++index < length) { | ||
;(list[index].add === 'after' ? existing : before).push(list[index]) | ||
} | ||
return before.concat(between, after) | ||
} | ||
chunkedSplice(existing, 0, 0, before) | ||
return existing | ||
} |
@@ -5,2 +5,3 @@ module.exports = combineExtensions | ||
// Combine several HTML extensions into one. | ||
function combineExtensions(extensions) { | ||
@@ -7,0 +8,0 @@ var handlers = {} |
@@ -8,2 +8,3 @@ module.exports = createTokenizer | ||
var markdownLineEnding = require('../character/markdown-line-ending') | ||
var chunkedSplice = require('./chunked-splice') | ||
var shallow = require('./shallow') | ||
@@ -13,6 +14,14 @@ var serializeChunks = require('./serialize-chunks') | ||
var resolveAll = require('./resolve-all') | ||
var miniflat = require('./miniflat') | ||
// Create a tokenizer. | ||
// Tokenizers deal with one type of data (e.g., containers, flow, text). | ||
// The parser is the object dealing with it all. | ||
// `initialize` works like other constructs, except that only its `tokenize` | ||
// function is used, in which case it doesn’t receive an `ok` or `nok`. | ||
// `from` can be given to set the point before the first character, although | ||
// when further lines are indented, they must be set with `defineSkip`. | ||
function createTokenizer(parser, initialize, from) { | ||
var point = from ? assign({}, from) : {line: 1, column: 1, offset: 0} | ||
var columnStart = [] | ||
var columnStart = {} | ||
var resolveAllConstructs = [] | ||
@@ -38,3 +47,3 @@ var chunks = [] | ||
// State and tools for resolving, serializing. | ||
// State and tools for resolving and serializing. | ||
var context = { | ||
@@ -51,6 +60,13 @@ previous: codes.eof, | ||
// The state function. | ||
var state = initialize.tokenize.call(context, effects) | ||
if (initialize.resolveAll) { | ||
resolveAllConstructs.push(initialize) | ||
} | ||
// Track which character we expect to be consumed, to catch bugs. | ||
var expectedCode | ||
// Store where we are in the input stream. | ||
point._index = index | ||
@@ -61,4 +77,4 @@ point._bufferIndex = bufferIndex | ||
function write(value) { | ||
chunks.push(value) | ||
function write(chunk) { | ||
chunks.push(chunk) | ||
@@ -68,3 +84,3 @@ main() | ||
// Exit if we’re not done, resolve might change stuff. | ||
if (value !== codes.eof) { | ||
if (chunk !== codes.eof) { | ||
return [] | ||
@@ -79,3 +95,3 @@ } | ||
// Add EOF token. | ||
context.events.push(value) | ||
context.events.push(chunk) | ||
return context.events | ||
@@ -89,3 +105,3 @@ } | ||
function sliceSerialize(token) { | ||
return serializeChunks(sliceChunks(chunks, token)) | ||
return serializeChunks(sliceStream(token)) | ||
} | ||
@@ -103,8 +119,4 @@ | ||
columnStart[value.line] = value.column | ||
// If we’re currently at that point: | ||
if (point.column === 1 && point.line === value.line) { | ||
point.column = value.column | ||
point.offset += value.column - 1 | ||
} | ||
accountForPotentialSkip() | ||
debug('position: define skip: `%j`', point) | ||
} | ||
@@ -117,2 +129,6 @@ | ||
// Main loop (note that `index` and `bufferIndex` are modified by `consume`). | ||
// Here is where we walk through the chunks, which either include strings of | ||
// several characters, or numerical character codes. | ||
// The reason to do this in a loop instead of a call is so the stack can | ||
// drain. | ||
function main() { | ||
@@ -125,2 +141,3 @@ var chunk | ||
// If we’re in a buffer chunk, loop through it. | ||
if (typeof chunk === 'string') { | ||
@@ -141,5 +158,6 @@ chunkIndex = index | ||
} else { | ||
assert.equal(consumed, true, 'expected character to be consumed') | ||
expectedCode = chunk | ||
debug('main: passing `%s` to %s (chunk)', expectedCode, state.name) | ||
consumed = undefined | ||
debug('main: passing `%s` to %s (chunk)', expectedCode, state.name) | ||
state = state(expectedCode) | ||
@@ -155,3 +173,3 @@ } | ||
expectedCode, | ||
'expected given code to equal consumed code' | ||
'expected given code to equal expected code' | ||
) | ||
@@ -173,12 +191,9 @@ | ||
point.line++ | ||
if (point.line in columnStart) { | ||
point.column = columnStart[point.line] | ||
point.offset += columnStart[point.line] - 1 | ||
} else { | ||
point.column = 1 | ||
} | ||
} | ||
// Anything else. | ||
else if (code !== codes.virtualSpace) { | ||
point.column = 1 | ||
point.offset += code === codes.carriageReturnLineFeed ? 2 : 1 | ||
accountForPotentialSkip() | ||
debug('position: after eol: `%j`', point) | ||
} else if (code !== codes.virtualSpace) { | ||
point.column++ | ||
point.offset++ | ||
} | ||
@@ -199,9 +214,6 @@ | ||
if (code !== codes.virtualSpace) { | ||
point.offset += code === codes.carriageReturnLineFeed ? 2 : 1 | ||
} | ||
point._bufferIndex = bufferIndex | ||
point._index = index | ||
// Expose the previous character. | ||
context.previous = code | ||
@@ -211,4 +223,6 @@ } | ||
// Start a token. | ||
function enter(type) { | ||
var token = {type: type, start: now()} | ||
function enter(type, fields) { | ||
var token = fields || {} | ||
token.type = type | ||
token.start = now() | ||
@@ -227,3 +241,3 @@ assert.equal(typeof type, 'string', 'expected string type') | ||
// Stop a token. | ||
function exit(type) { | ||
function exit(type, fields) { | ||
assert.equal(typeof type, 'string', 'expected string type') | ||
@@ -236,2 +250,4 @@ assert.notEqual(type.length, 0, 'expected non-empty string') | ||
if (fields) assign(token, fields) | ||
assert.equal(type, token.type, 'expected exit token to match current token') | ||
@@ -263,5 +279,8 @@ | ||
function constructFactory(onreturn, props) { | ||
// Factory to attempt/check/interrupt. | ||
function constructFactory(onreturn, fields) { | ||
return hook | ||
// Handle either an object mapping codes to constructs, a list of | ||
// constructs, or a single construct. | ||
function hook(constructs, returnState, bogusState) { | ||
@@ -279,8 +298,5 @@ var listOfConstructs | ||
function handleMapOfConstructs(code) { | ||
var list = | ||
code === null | ||
? [] | ||
: [].concat(constructs[code] || [], constructs.null || []) | ||
var list = miniflat(constructs[code]).concat(miniflat(constructs.null)) | ||
if (list.length) { | ||
if (code !== codes.eof && list.length) { | ||
return handleListOfConstructs(list)(code) | ||
@@ -306,3 +322,3 @@ } | ||
function start(code) { | ||
info = store(construct) | ||
info = store() | ||
@@ -314,3 +330,3 @@ if (!construct.partial) { | ||
return construct.tokenize.call( | ||
assign({}, context, props), | ||
assign({}, context, fields), | ||
effects, | ||
@@ -351,5 +367,8 @@ ok, | ||
if (construct.resolve) { | ||
context.events = context.events | ||
.slice(0, from) | ||
.concat(construct.resolve(context.events.slice(from), context)) | ||
chunkedSplice( | ||
context.events, | ||
from, | ||
context.events.length - from, | ||
construct.resolve(context.events.slice(from), context) | ||
) | ||
} | ||
@@ -383,10 +402,2 @@ | ||
index = startIndex | ||
// Sometimes, we reset to directly after a line ending. | ||
// Make sure to indent. | ||
if (point.line in columnStart && point.column === 1) { | ||
point.column = columnStart[point.line] | ||
point.offset += columnStart[point.line] - 1 | ||
} | ||
bufferIndex = startBufferIndex | ||
@@ -397,4 +408,13 @@ context.previous = startPrevious | ||
stack = startStack | ||
accountForPotentialSkip() | ||
debug('position: restore: `%j`', point) | ||
} | ||
} | ||
function accountForPotentialSkip() { | ||
if (point.line in columnStart && point.column === 1) { | ||
point.column = columnStart[point.line] | ||
point.offset += columnStart[point.line] - 1 | ||
} | ||
} | ||
} |
module.exports = flatMap | ||
var chunkedSplice = require('./chunked-splice') | ||
// Note: `a` could be given here, which is then passed to the map function. | ||
@@ -11,3 +13,3 @@ // It functions as a rest/spread, but smaller. | ||
while (++index < length) { | ||
result = result.concat(map(array[index], a)) | ||
chunkedSplice(result, result.length, 0, map(array[index], a)) | ||
} | ||
@@ -14,0 +16,0 @@ |
@@ -6,2 +6,3 @@ module.exports = lowercase | ||
// Lowercase a character code. | ||
function lowercase(code) { | ||
@@ -8,0 +9,0 @@ return code > codes.atSign && code < codes.leftSquareBracket |
module.exports = resolveAll | ||
var miniflat = require('./miniflat') | ||
function resolveAll(oneOrMoreConstructs, events, context) { | ||
var constructs = [].concat(oneOrMoreConstructs) | ||
var constructs = miniflat(oneOrMoreConstructs) | ||
var called = [] | ||
@@ -6,0 +8,0 @@ var length = constructs.length |
@@ -5,3 +5,3 @@ module.exports = subtokenize | ||
var codes = require('../character/codes') | ||
var splice = require('../constant/splice') | ||
var chunkedSplice = require('./chunked-splice') | ||
var types = require('../constant/types') | ||
@@ -20,2 +20,3 @@ var flatMap = require('./flat-map') | ||
var otherEvent | ||
var slice | ||
@@ -43,3 +44,3 @@ while (++index < events.length) { | ||
splice.apply(events, [index, 2].concat(subevents)) | ||
chunkedSplice(events, index, 2, subevents) | ||
index += subevents.length - 1 | ||
@@ -114,8 +115,5 @@ event[1]._subevents = undefined | ||
// Switch container exit w/ line endings. | ||
splice.apply( | ||
events, | ||
[lineIndex, index - lineIndex + 1, event].concat( | ||
events.slice(lineIndex, index) | ||
) | ||
) | ||
slice = events.slice(lineIndex, index) | ||
slice.unshift(event) | ||
chunkedSplice(events, lineIndex, index - lineIndex + 1, slice) | ||
index = lineIndex | ||
@@ -129,3 +127,3 @@ } | ||
function unravelLinkedTokens(token, context) { | ||
function unravelLinkedTokenImplementation(token, context) { | ||
var hasGfmTaskListHack = token._gfmTasklistFirstContentOfListItem | ||
@@ -148,3 +146,3 @@ var tokenizer | ||
if (!token.next) { | ||
stream = stream.concat(codes.eof) | ||
stream.push(codes.eof) | ||
} | ||
@@ -164,9 +162,15 @@ | ||
} | ||
} | ||
if (token.next) { | ||
function unravelLinkedTokens(token, context) { | ||
while (token) { | ||
// Loop over the tokens because a recursive function would cause a stackoverflow | ||
unravelLinkedTokenImplementation(token, context) | ||
if (!token.next) { | ||
// Done! | ||
divideTokens(token) | ||
} | ||
// Unravel the next token. | ||
unravelLinkedTokens(token.next, context) | ||
} else { | ||
// Done! | ||
divideTokens(token) | ||
token = token.next | ||
} | ||
@@ -173,0 +177,0 @@ } |
{ | ||
"name": "micromark", | ||
"version": "2.8.0", | ||
"version": "2.8.1", | ||
"description": "small commonmark compliant markdown parser with positional info and concrete tokens", | ||
@@ -67,2 +67,5 @@ "license": "MIT", | ||
"concat-stream": "^2.0.0", | ||
"eslint-plugin-es": "^3.0.0", | ||
"eslint-plugin-security": "^1.0.0", | ||
"eslint-plugin-sonarjs": "^0.5.0", | ||
"gzip-size-cli": "^3.0.0", | ||
@@ -107,18 +110,45 @@ "ms": "^2.0.0", | ||
"esnext": false, | ||
"prettier": true, | ||
"envs": [ | ||
"shared-node-browser" | ||
], | ||
"overrides": [ | ||
{ | ||
"files": [ | ||
"lib/**/*.js" | ||
], | ||
"plugin": [ | ||
"es" | ||
], | ||
"extends": [ | ||
"plugin:es/no-2015", | ||
"plugin:security/recommended", | ||
"plugin:sonarjs/recommended" | ||
], | ||
"rules": { | ||
"complexity": "off", | ||
"es/no-object-assign": "off", | ||
"guard-for-in": "off", | ||
"max-depth": "off", | ||
"no-multi-assign": "off", | ||
"no-unmodified-loop-condition": "off", | ||
"security/detect-object-injection": "off", | ||
"sonarjs/cognitive-complexity": "off", | ||
"unicorn/explicit-length-check": "off", | ||
"unicorn/prefer-includes": "off", | ||
"unicorn/prefer-number-properties": "off" | ||
} | ||
}, | ||
{ | ||
"files": [ | ||
"test/**/*.js" | ||
], | ||
"rules": { | ||
"import/no-unassigned-import": "off" | ||
} | ||
} | ||
], | ||
"ignores": [ | ||
"dist/" | ||
], | ||
"prettier": true, | ||
"rules": { | ||
"complexity": "off", | ||
"max-depth": "off", | ||
"guard-for-in": "off", | ||
"import/no-unassigned-import": "off", | ||
"no-lonely-if": "off", | ||
"no-unmodified-loop-condition": "off", | ||
"unicorn/explicit-length-check": "off", | ||
"unicorn/prefer-includes": "off", | ||
"unicorn/prefer-number-properties": "off", | ||
"unicorn/prefer-type-error": "off" | ||
} | ||
] | ||
}, | ||
@@ -125,0 +155,0 @@ "remarkConfig": { |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
396406
165
12399
21