micromark-extension-gfm-autolink-literal
Advanced tools
Comparing version 1.0.3 to 1.0.4
@@ -1,6 +0,11 @@ | ||
/** @type {HtmlExtension} */ | ||
/** | ||
* Extension for `micromark` that can be passed in `htmlExtensions` to support | ||
* GFM autolink literals when serializing to HTML. | ||
* | ||
* @type {HtmlExtension} | ||
*/ | ||
export const gfmAutolinkLiteralHtml: HtmlExtension | ||
export type CompileContext = import('micromark-util-types').CompileContext | ||
export type Handle = import('micromark-util-types').Handle | ||
export type HtmlExtension = import('micromark-util-types').HtmlExtension | ||
export type Handle = import('micromark-util-types').Handle | ||
export type CompileContext = import('micromark-util-types').CompileContext | ||
export type Token = import('micromark-util-types').Token |
/** | ||
* @typedef {import('micromark-util-types').CompileContext} CompileContext | ||
* @typedef {import('micromark-util-types').Handle} Handle | ||
* @typedef {import('micromark-util-types').HtmlExtension} HtmlExtension | ||
* @typedef {import('micromark-util-types').Handle} Handle | ||
* @typedef {import('micromark-util-types').CompileContext} CompileContext | ||
* @typedef {import('micromark-util-types').Token} Token | ||
@@ -10,3 +10,10 @@ */ | ||
/** @type {HtmlExtension} */ | ||
// To do: next major: expose functions that yields extension. | ||
/** | ||
* Extension for `micromark` that can be passed in `htmlExtensions` to support | ||
* GFM autolink literals when serializing to HTML. | ||
* | ||
* @type {HtmlExtension} | ||
*/ | ||
export const gfmAutolinkLiteralHtml = { | ||
@@ -16,3 +23,6 @@ exit: {literalAutolinkEmail, literalAutolinkHttp, literalAutolinkWww} | ||
/** @type {Handle} */ | ||
/** | ||
* @this {CompileContext} | ||
* @type {Handle} | ||
*/ | ||
function literalAutolinkWww(token) { | ||
@@ -22,3 +32,6 @@ anchorFromToken.call(this, token, 'http://') | ||
/** @type {Handle} */ | ||
/** | ||
* @this {CompileContext} | ||
* @type {Handle} | ||
*/ | ||
function literalAutolinkEmail(token) { | ||
@@ -28,3 +41,6 @@ anchorFromToken.call(this, token, 'mailto:') | ||
/** @type {Handle} */ | ||
/** | ||
* @this {CompileContext} | ||
* @type {Handle} | ||
*/ | ||
function literalAutolinkHttp(token) { | ||
@@ -37,3 +53,3 @@ anchorFromToken.call(this, token) | ||
* @param {Token} token | ||
* @param {string} [protocol] | ||
* @param {string | null | undefined} [protocol] | ||
* @returns {void} | ||
@@ -40,0 +56,0 @@ */ |
@@ -1,9 +0,15 @@ | ||
/** @type {Extension} */ | ||
/** | ||
* Extension for `micromark` that can be passed in `extensions` to enable GFM | ||
* autolink literal syntax. | ||
* | ||
* @type {Extension} | ||
*/ | ||
export const gfmAutolinkLiteral: Extension | ||
export type Code = import('micromark-util-types').Code | ||
export type ConstructRecord = import('micromark-util-types').ConstructRecord | ||
export type Event = import('micromark-util-types').Event | ||
export type Extension = import('micromark-util-types').Extension | ||
export type ConstructRecord = import('micromark-util-types').ConstructRecord | ||
export type Tokenizer = import('micromark-util-types').Tokenizer | ||
export type Previous = import('micromark-util-types').Previous | ||
export type State = import('micromark-util-types').State | ||
export type Event = import('micromark-util-types').Event | ||
export type Code = import('micromark-util-types').Code | ||
export type TokenizeContext = import('micromark-util-types').TokenizeContext | ||
export type Tokenizer = import('micromark-util-types').Tokenizer |
/** | ||
* @typedef {import('micromark-util-types').Code} Code | ||
* @typedef {import('micromark-util-types').ConstructRecord} ConstructRecord | ||
* @typedef {import('micromark-util-types').Event} Event | ||
* @typedef {import('micromark-util-types').Extension} Extension | ||
* @typedef {import('micromark-util-types').ConstructRecord} ConstructRecord | ||
* @typedef {import('micromark-util-types').Tokenizer} Tokenizer | ||
* @typedef {import('micromark-util-types').Previous} Previous | ||
* @typedef {import('micromark-util-types').State} State | ||
* @typedef {import('micromark-util-types').Event} Event | ||
* @typedef {import('micromark-util-types').Code} Code | ||
* @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext | ||
* @typedef {import('micromark-util-types').Tokenizer} Tokenizer | ||
*/ | ||
import {ok as assert} from 'uvu/assert' | ||
import { | ||
@@ -16,5 +16,3 @@ asciiAlpha, | ||
asciiControl, | ||
asciiDigit, | ||
markdownLineEndingOrSpace, | ||
markdownLineEnding, | ||
unicodePunctuation, | ||
@@ -25,8 +23,8 @@ unicodeWhitespace | ||
const www = {tokenize: tokenizeWww, partial: true} | ||
const wwwPrefix = {tokenize: tokenizeWwwPrefix, partial: true} | ||
const domain = {tokenize: tokenizeDomain, partial: true} | ||
const path = {tokenize: tokenizePath, partial: true} | ||
const punctuation = {tokenize: tokenizePunctuation, partial: true} | ||
const namedCharacterReference = { | ||
tokenize: tokenizeNamedCharacterReference, | ||
const trail = {tokenize: tokenizeTrail, partial: true} | ||
const emailDomainDotTrail = { | ||
tokenize: tokenizeEmailDomainDotTrail, | ||
partial: true | ||
@@ -36,3 +34,6 @@ } | ||
const wwwAutolink = {tokenize: tokenizeWwwAutolink, previous: previousWww} | ||
const httpAutolink = {tokenize: tokenizeHttpAutolink, previous: previousHttp} | ||
const protocolAutolink = { | ||
tokenize: tokenizeProtocolAutolink, | ||
previous: previousProtocol | ||
} | ||
const emailAutolink = {tokenize: tokenizeEmailAutolink, previous: previousEmail} | ||
@@ -43,3 +44,10 @@ | ||
/** @type {Extension} */ | ||
// To do: next major: expose functions that yields extension. | ||
/** | ||
* Extension for `micromark` that can be passed in `extensions` to enable GFM | ||
* autolink literal syntax. | ||
* | ||
* @type {Extension} | ||
*/ | ||
export const gfmAutolinkLiteral = {text} | ||
@@ -61,22 +69,47 @@ | ||
text[codes.underscore] = emailAutolink | ||
text[codes.uppercaseH] = [emailAutolink, httpAutolink] | ||
text[codes.lowercaseH] = [emailAutolink, httpAutolink] | ||
text[codes.uppercaseH] = [emailAutolink, protocolAutolink] | ||
text[codes.lowercaseH] = [emailAutolink, protocolAutolink] | ||
text[codes.uppercaseW] = [emailAutolink, wwwAutolink] | ||
text[codes.lowercaseW] = [emailAutolink, wwwAutolink] | ||
/** @type {Tokenizer} */ | ||
// To do: perform email autolink literals on events, afterwards. | ||
// That’s where `markdown-rs` and `cmark-gfm` perform it. | ||
// It should look for `@`, then for atext backwards, and then for a label | ||
// forwards. | ||
// To do: `mailto:`, `xmpp:` protocol as prefix. | ||
/** | ||
* Email autolink literal. | ||
* | ||
* ```markdown | ||
* > | a contact@example.org b | ||
* ^^^^^^^^^^^^^^^^^^^ | ||
* ``` | ||
* | ||
* @this {TokenizeContext} | ||
* @type {Tokenizer} | ||
*/ | ||
function tokenizeEmailAutolink(effects, ok, nok) { | ||
const self = this | ||
/** @type {boolean | undefined} */ | ||
let dot | ||
/** @type {boolean} */ | ||
let hasDot | ||
/** @type {boolean|undefined} */ | ||
let hasDigitInLastSegment | ||
let data | ||
return start | ||
/** @type {State} */ | ||
/** | ||
* Start of email autolink literal. | ||
* | ||
* ```markdown | ||
* > | a contact@example.org b | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function start(code) { | ||
if ( | ||
!gfmAtext(code) || | ||
!previousEmail(self.previous) || | ||
!previousEmail.call(self, self.previous) || | ||
previousUnbalanced(self.events) | ||
@@ -92,3 +125,12 @@ ) { | ||
/** @type {State} */ | ||
/** | ||
* In email atext. | ||
* | ||
* ```markdown | ||
* > | a contact@example.org b | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function atext(code) { | ||
@@ -102,3 +144,3 @@ if (gfmAtext(code)) { | ||
effects.consume(code) | ||
return label | ||
return emailDomain | ||
} | ||
@@ -109,50 +151,76 @@ | ||
/** @type {State} */ | ||
function label(code) { | ||
/** | ||
* In email domain. | ||
* | ||
* The reference code is a bit overly complex as it handles the `@`, of which | ||
* there may be just one. | ||
* Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L318> | ||
* | ||
* ```markdown | ||
* > | a contact@example.org b | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function emailDomain(code) { | ||
// Dot followed by alphanumerical (not `-` or `_`). | ||
if (code === codes.dot) { | ||
return effects.check(punctuation, done, dotContinuation)(code) | ||
return effects.check( | ||
emailDomainDotTrail, | ||
emailDomainAfter, | ||
emailDomainDot | ||
)(code) | ||
} | ||
if (code === codes.dash || code === codes.underscore) { | ||
return effects.check(punctuation, nok, dashOrUnderscoreContinuation)(code) | ||
} | ||
if (asciiAlphanumeric(code)) { | ||
if (!hasDigitInLastSegment && asciiDigit(code)) { | ||
hasDigitInLastSegment = true | ||
} | ||
// Alphanumerical, `-`, and `_`. | ||
if ( | ||
code === codes.dash || | ||
code === codes.underscore || | ||
asciiAlphanumeric(code) | ||
) { | ||
data = true | ||
effects.consume(code) | ||
return label | ||
return emailDomain | ||
} | ||
return done(code) | ||
} | ||
// To do: `/` if xmpp. | ||
/** @type {State} */ | ||
function dotContinuation(code) { | ||
effects.consume(code) | ||
hasDot = true | ||
hasDigitInLastSegment = undefined | ||
return label | ||
// Note: normally we’d truncate trailing punctuation from the link. | ||
// However, email autolink literals cannot contain any of those markers, | ||
// except for `.`, but that can only occur if it isn’t trailing. | ||
// So we can ignore truncating! | ||
return emailDomainAfter(code) | ||
} | ||
/** @type {State} */ | ||
function dashOrUnderscoreContinuation(code) { | ||
/** | ||
* In email domain, on dot that is not a trail. | ||
* | ||
* ```markdown | ||
* > | a contact@example.org b | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function emailDomainDot(code) { | ||
effects.consume(code) | ||
return afterDashOrUnderscore | ||
dot = true | ||
return emailDomain | ||
} | ||
/** @type {State} */ | ||
function afterDashOrUnderscore(code) { | ||
if (code === codes.dot) { | ||
return effects.check(punctuation, nok, dotContinuation)(code) | ||
} | ||
return label(code) | ||
} | ||
/** @type {State} */ | ||
function done(code) { | ||
if (hasDot && !hasDigitInLastSegment) { | ||
/** | ||
* After email domain. | ||
* | ||
* ```markdown | ||
* > | a contact@example.org b | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function emailDomainAfter(code) { | ||
// Domain must not be empty, must include a dot, and must end in alphabetical. | ||
// Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L332>. | ||
if (data && dot && asciiAlpha(self.previous)) { | ||
effects.exit('literalAutolinkEmail') | ||
@@ -167,13 +235,32 @@ effects.exit('literalAutolink') | ||
/** @type {Tokenizer} */ | ||
/** | ||
* `www` autolink literal. | ||
* | ||
* ```markdown | ||
* > | a www.example.org b | ||
* ^^^^^^^^^^^^^^^ | ||
* ``` | ||
* | ||
* @this {TokenizeContext} | ||
* @type {Tokenizer} | ||
*/ | ||
function tokenizeWwwAutolink(effects, ok, nok) { | ||
const self = this | ||
return start | ||
return wwwStart | ||
/** @type {State} */ | ||
function start(code) { | ||
/** | ||
* Start of www autolink literal. | ||
* | ||
* ```markdown | ||
* > | www.example.com/a?b#c | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function wwwStart(code) { | ||
if ( | ||
(code !== codes.uppercaseW && code !== codes.lowercaseW) || | ||
!previousWww(self.previous) || | ||
!previousWww.call(self, self.previous) || | ||
previousUnbalanced(self.events) | ||
@@ -186,8 +273,7 @@ ) { | ||
effects.enter('literalAutolinkWww') | ||
// For `www.` we check instead of attempt, because when it matches, GH | ||
// treats it as part of a domain (yes, it says a valid domain must come | ||
// after `www.`, but that’s not how it’s implemented by them). | ||
// Note: we *check*, so we can discard the `www.` we parsed. | ||
// If it worked, we consider it as a part of the domain. | ||
return effects.check( | ||
www, | ||
effects.attempt(domain, effects.attempt(path, done), nok), | ||
wwwPrefix, | ||
effects.attempt(domain, effects.attempt(path, wwwAfter), nok), | ||
nok | ||
@@ -197,4 +283,13 @@ )(code) | ||
/** @type {State} */ | ||
function done(code) { | ||
/** | ||
* After a www autolink literal. | ||
* | ||
* ```markdown | ||
* > | www.example.com/a?b#c | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function wwwAfter(code) { | ||
effects.exit('literalAutolinkWww') | ||
@@ -206,29 +301,41 @@ effects.exit('literalAutolink') | ||
/** @type {Tokenizer} */ | ||
function tokenizeHttpAutolink(effects, ok, nok) { | ||
/** | ||
* Protocol autolink literal. | ||
* | ||
* ```markdown | ||
* > | a https://example.org b | ||
* ^^^^^^^^^^^^^^^^^^^ | ||
* ``` | ||
* | ||
* @this {TokenizeContext} | ||
* @type {Tokenizer} | ||
*/ | ||
function tokenizeProtocolAutolink(effects, ok, nok) { | ||
const self = this | ||
let buffer = '' | ||
let seen = false | ||
return start | ||
return protocolStart | ||
/** @type {State} */ | ||
function start(code) { | ||
/** | ||
* Start of protocol autolink literal. | ||
* | ||
* ```markdown | ||
* > | https://example.com/a?b#c | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function protocolStart(code) { | ||
if ( | ||
(code !== codes.uppercaseH && code !== codes.lowercaseH) || | ||
!previousHttp(self.previous) || | ||
previousUnbalanced(self.events) | ||
(code === codes.uppercaseH || code === codes.lowercaseH) && | ||
previousProtocol.call(self, self.previous) && | ||
!previousUnbalanced(self.events) | ||
) { | ||
return nok(code) | ||
} | ||
effects.enter('literalAutolink') | ||
effects.enter('literalAutolinkHttp') | ||
effects.consume(code) | ||
return t1 | ||
} | ||
/** @type {State} */ | ||
function t1(code) { | ||
if (code === codes.uppercaseT || code === codes.lowercaseT) { | ||
effects.enter('literalAutolink') | ||
effects.enter('literalAutolinkHttp') | ||
buffer += String.fromCodePoint(code) | ||
effects.consume(code) | ||
return t2 | ||
return protocolPrefixInside | ||
} | ||
@@ -239,17 +346,27 @@ | ||
/** @type {State} */ | ||
function t2(code) { | ||
if (code === codes.uppercaseT || code === codes.lowercaseT) { | ||
/** | ||
* In protocol. | ||
* | ||
* ```markdown | ||
* > | https://example.com/a?b#c | ||
* ^^^^^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function protocolPrefixInside(code) { | ||
// `5` is size of `https` | ||
if (asciiAlpha(code) && buffer.length < 5) { | ||
buffer += String.fromCodePoint(code) | ||
effects.consume(code) | ||
return p | ||
return protocolPrefixInside | ||
} | ||
return nok(code) | ||
} | ||
if (code === codes.colon) { | ||
const protocol = buffer.toLowerCase() | ||
/** @type {State} */ | ||
function p(code) { | ||
if (code === codes.uppercaseP || code === codes.lowercaseP) { | ||
effects.consume(code) | ||
return s | ||
if (protocol === 'http' || protocol === 'https') { | ||
effects.consume(code) | ||
return protocolSlashesInside | ||
} | ||
} | ||
@@ -260,37 +377,22 @@ | ||
/** @type {State} */ | ||
function s(code) { | ||
if (code === codes.uppercaseS || code === codes.lowercaseS) { | ||
effects.consume(code) | ||
return colon | ||
} | ||
return colon(code) | ||
} | ||
/** @type {State} */ | ||
function colon(code) { | ||
if (code === codes.colon) { | ||
effects.consume(code) | ||
return slash1 | ||
} | ||
return nok(code) | ||
} | ||
/** @type {State} */ | ||
function slash1(code) { | ||
/** | ||
* In slashes. | ||
* | ||
* ```markdown | ||
* > | https://example.com/a?b#c | ||
* ^^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function protocolSlashesInside(code) { | ||
if (code === codes.slash) { | ||
effects.consume(code) | ||
return slash2 | ||
} | ||
return nok(code) | ||
} | ||
if (seen) { | ||
return afterProtocol | ||
} | ||
/** @type {State} */ | ||
function slash2(code) { | ||
if (code === codes.slash) { | ||
effects.consume(code) | ||
return after | ||
seen = true | ||
return protocolSlashesInside | ||
} | ||
@@ -301,14 +403,35 @@ | ||
/** @type {State} */ | ||
function after(code) { | ||
/** | ||
* After protocol, before domain. | ||
* | ||
* ```markdown | ||
* > | https://example.com/a?b#c | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function afterProtocol(code) { | ||
// To do: this is different from `markdown-rs`: | ||
// https://github.com/wooorm/markdown-rs/blob/b3a921c761309ae00a51fe348d8a43adbc54b518/src/construct/gfm_autolink_literal.rs#L172-L182 | ||
return code === codes.eof || | ||
asciiControl(code) || | ||
markdownLineEndingOrSpace(code) || | ||
unicodeWhitespace(code) || | ||
unicodePunctuation(code) | ||
? nok(code) | ||
: effects.attempt(domain, effects.attempt(path, done), nok)(code) | ||
: effects.attempt(domain, effects.attempt(path, protocolAfter), nok)(code) | ||
} | ||
/** @type {State} */ | ||
function done(code) { | ||
/** | ||
* After a protocol autolink literal. | ||
* | ||
* ```markdown | ||
* > | https://example.com/a?b#c | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function protocolAfter(code) { | ||
effects.exit('literalAutolinkHttp') | ||
@@ -320,31 +443,38 @@ effects.exit('literalAutolink') | ||
/** @type {Tokenizer} */ | ||
function tokenizeWww(effects, ok, nok) { | ||
return start | ||
/** | ||
* `www` prefix. | ||
* | ||
* ```markdown | ||
* > | a www.example.org b | ||
* ^^^^ | ||
* ``` | ||
* | ||
* @this {TokenizeContext} | ||
* @type {Tokenizer} | ||
*/ | ||
function tokenizeWwwPrefix(effects, ok, nok) { | ||
let size = 0 | ||
/** @type {State} */ | ||
function start(code) { | ||
assert( | ||
code === codes.uppercaseW || code === codes.lowercaseW, | ||
'expected `w`' | ||
) | ||
effects.consume(code) | ||
return w2 | ||
} | ||
return wwwPrefixInside | ||
/** @type {State} */ | ||
function w2(code) { | ||
if (code === codes.uppercaseW || code === codes.lowercaseW) { | ||
/** | ||
* In www prefix. | ||
* | ||
* ```markdown | ||
* > | www.example.com | ||
* ^^^^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function wwwPrefixInside(code) { | ||
if ((code === codes.uppercaseW || code === codes.lowercaseW) && size < 3) { | ||
size++ | ||
effects.consume(code) | ||
return w3 | ||
return wwwPrefixInside | ||
} | ||
return nok(code) | ||
} | ||
/** @type {State} */ | ||
function w3(code) { | ||
if (code === codes.uppercaseW || code === codes.lowercaseW) { | ||
if (code === codes.dot && size === 3) { | ||
effects.consume(code) | ||
return dot | ||
return wwwPrefixAfter | ||
} | ||
@@ -355,39 +485,55 @@ | ||
/** @type {State} */ | ||
function dot(code) { | ||
if (code === codes.dot) { | ||
effects.consume(code) | ||
return after | ||
} | ||
return nok(code) | ||
/** | ||
* After www prefix. | ||
* | ||
* ```markdown | ||
* > | www.example.com | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function wwwPrefixAfter(code) { | ||
// If there is *anything*, we can link. | ||
return code === codes.eof ? nok(code) : ok(code) | ||
} | ||
/** @type {State} */ | ||
function after(code) { | ||
return code === codes.eof || markdownLineEnding(code) ? nok(code) : ok(code) | ||
} | ||
} | ||
/** @type {Tokenizer} */ | ||
/** | ||
* Domain. | ||
* | ||
* ```markdown | ||
* > | a https://example.org b | ||
* ^^^^^^^^^^^ | ||
* ``` | ||
* | ||
* @this {TokenizeContext} | ||
* @type {Tokenizer} | ||
*/ | ||
function tokenizeDomain(effects, ok, nok) { | ||
/** @type {boolean|undefined} */ | ||
let hasUnderscoreInLastSegment | ||
/** @type {boolean|undefined} */ | ||
let hasUnderscoreInLastLastSegment | ||
/** @type {boolean | undefined} */ | ||
let underscoreInLastSegment | ||
/** @type {boolean | undefined} */ | ||
let underscoreInLastLastSegment | ||
/** @type {boolean | undefined} */ | ||
let seen | ||
return domain | ||
return domainInside | ||
/** @type {State} */ | ||
function domain(code) { | ||
if (code === codes.ampersand) { | ||
return effects.check( | ||
namedCharacterReference, | ||
done, | ||
punctuationContinuation | ||
)(code) | ||
} | ||
/** | ||
* In domain. | ||
* | ||
* ```markdown | ||
* > | https://example.com/a | ||
* ^^^^^^^^^^^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function domainInside(code) { | ||
// Check whether this marker, which is a trailing punctuation | ||
// marker, optionally followed by more trailing markers, and then | ||
// followed by an end. | ||
if (code === codes.dot || code === codes.underscore) { | ||
return effects.check(punctuation, done, punctuationContinuation)(code) | ||
return effects.check(trail, domainAfter, domainAtPunctuation)(code) | ||
} | ||
@@ -400,137 +546,339 @@ | ||
// already has that for Unicode punctuation and whitespace, so use those. | ||
// Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L12>. | ||
if ( | ||
code === codes.eof || | ||
asciiControl(code) || | ||
markdownLineEndingOrSpace(code) || | ||
unicodeWhitespace(code) || | ||
(code !== codes.dash && unicodePunctuation(code)) | ||
) { | ||
return done(code) | ||
return domainAfter(code) | ||
} | ||
seen = true | ||
effects.consume(code) | ||
return domain | ||
return domainInside | ||
} | ||
/** @type {State} */ | ||
function punctuationContinuation(code) { | ||
if (code === codes.dot) { | ||
hasUnderscoreInLastLastSegment = hasUnderscoreInLastSegment | ||
hasUnderscoreInLastSegment = undefined | ||
effects.consume(code) | ||
return domain | ||
/** | ||
* In domain, at potential trailing punctuation, that was not trailing. | ||
* | ||
* ```markdown | ||
* > | https://example.com | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function domainAtPunctuation(code) { | ||
// There is an underscore in the last segment of the domain | ||
if (code === codes.underscore) { | ||
underscoreInLastSegment = true | ||
} | ||
// Otherwise, it’s a `.`: save the last segment underscore in the | ||
// penultimate segment slot. | ||
else { | ||
underscoreInLastLastSegment = underscoreInLastSegment | ||
underscoreInLastSegment = undefined | ||
} | ||
if (code === codes.underscore) hasUnderscoreInLastSegment = true | ||
effects.consume(code) | ||
return domain | ||
return domainInside | ||
} | ||
/** @type {State} */ | ||
function done(code) { | ||
if (!hasUnderscoreInLastLastSegment && !hasUnderscoreInLastSegment) { | ||
return ok(code) | ||
/** | ||
* After domain. | ||
* | ||
* ```markdown | ||
* > | https://example.com/a | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} */ | ||
function domainAfter(code) { | ||
// Note: that’s GH says a dot is needed, but it’s not true: | ||
// <https://github.com/github/cmark-gfm/issues/279> | ||
if (underscoreInLastLastSegment || underscoreInLastSegment || !seen) { | ||
return nok(code) | ||
} | ||
return nok(code) | ||
return ok(code) | ||
} | ||
} | ||
/** @type {Tokenizer} */ | ||
/** | ||
* Path. | ||
* | ||
* ```markdown | ||
* > | a https://example.org/stuff b | ||
* ^^^^^^ | ||
* ``` | ||
* | ||
* @this {TokenizeContext} | ||
* @type {Tokenizer} | ||
*/ | ||
function tokenizePath(effects, ok) { | ||
let balance = 0 | ||
let sizeOpen = 0 | ||
let sizeClose = 0 | ||
return inPath | ||
return pathInside | ||
/** @type {State} */ | ||
function inPath(code) { | ||
if (code === codes.ampersand) { | ||
return effects.check( | ||
namedCharacterReference, | ||
ok, | ||
continuedPunctuation | ||
)(code) | ||
/** | ||
* In path. | ||
* | ||
* ```markdown | ||
* > | https://example.com/a | ||
* ^^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function pathInside(code) { | ||
if (code === codes.leftParenthesis) { | ||
sizeOpen++ | ||
effects.consume(code) | ||
return pathInside | ||
} | ||
if (code === codes.leftParenthesis) { | ||
balance++ | ||
// To do: `markdown-rs` also needs this. | ||
// If this is a paren, and there are less closings than openings, | ||
// we don’t check for a trail. | ||
if (code === codes.rightParenthesis && sizeClose < sizeOpen) { | ||
return pathAtPunctuation(code) | ||
} | ||
if (code === codes.rightParenthesis) { | ||
return effects.check( | ||
punctuation, | ||
parenAtPathEnd, | ||
continuedPunctuation | ||
)(code) | ||
// Check whether this trailing punctuation marker is optionally | ||
// followed by more trailing markers, and then followed | ||
// by an end. | ||
if ( | ||
code === codes.exclamationMark || | ||
code === codes.quotationMark || | ||
code === codes.ampersand || | ||
code === codes.apostrophe || | ||
code === codes.rightParenthesis || | ||
code === codes.asterisk || | ||
code === codes.comma || | ||
code === codes.dot || | ||
code === codes.colon || | ||
code === codes.semicolon || | ||
code === codes.lessThan || | ||
code === codes.questionMark || | ||
code === codes.rightSquareBracket || | ||
code === codes.underscore || | ||
code === codes.tilde | ||
) { | ||
return effects.check(trail, ok, pathAtPunctuation)(code) | ||
} | ||
if (pathEnd(code)) { | ||
if ( | ||
code === codes.eof || | ||
markdownLineEndingOrSpace(code) || | ||
unicodeWhitespace(code) | ||
) { | ||
return ok(code) | ||
} | ||
if (trailingPunctuation(code)) { | ||
return effects.check(punctuation, ok, continuedPunctuation)(code) | ||
} | ||
effects.consume(code) | ||
return inPath | ||
return pathInside | ||
} | ||
/** @type {State} */ | ||
function continuedPunctuation(code) { | ||
/** | ||
* In path, at potential trailing punctuation, that was not trailing. | ||
* | ||
* ```markdown | ||
* > | https://example.com/a"b | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function pathAtPunctuation(code) { | ||
// Count closing parens. | ||
if (code === codes.rightParenthesis) { | ||
sizeClose++ | ||
} | ||
effects.consume(code) | ||
return inPath | ||
return pathInside | ||
} | ||
} | ||
/** @type {State} */ | ||
function parenAtPathEnd(code) { | ||
balance-- | ||
return balance < 0 ? ok(code) : continuedPunctuation(code) | ||
/** | ||
* Trail. | ||
* | ||
* This calls `ok` if this *is* the trail, followed by an end, which means | ||
* the entire trail is not part of the link. | ||
* It calls `nok` if this *is* part of the link. | ||
* | ||
* ```markdown | ||
* > | https://example.com"). | ||
* ^^^ | ||
* ``` | ||
* | ||
* @this {TokenizeContext} | ||
* @type {Tokenizer} | ||
*/ | ||
function tokenizeTrail(effects, ok, nok) { | ||
return trail | ||
/** | ||
* In trail of domain or path. | ||
* | ||
* ```markdown | ||
* > | https://example.com"). | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function trail(code) { | ||
// Regular trailing punctuation. | ||
if ( | ||
code === codes.exclamationMark || | ||
code === codes.quotationMark || | ||
code === codes.apostrophe || | ||
code === codes.rightParenthesis || | ||
code === codes.asterisk || | ||
code === codes.comma || | ||
code === codes.dot || | ||
code === codes.colon || | ||
code === codes.semicolon || | ||
code === codes.questionMark || | ||
code === codes.underscore || | ||
code === codes.tilde | ||
) { | ||
effects.consume(code) | ||
return trail | ||
} | ||
// `&` followed by one or more alphabeticals and then a `;`, is | ||
// as a whole considered as trailing punctuation. | ||
// In all other cases, it is considered as continuation of the URL. | ||
if (code === codes.ampersand) { | ||
effects.consume(code) | ||
return trailCharRefStart | ||
} | ||
// Needed because we allow literals after `[`, as we fix: | ||
// <https://github.com/github/cmark-gfm/issues/278>. | ||
// Check that it is not followed by `(` or `[`. | ||
if (code === codes.rightSquareBracket) { | ||
effects.consume(code) | ||
return trailBracketAfter | ||
} | ||
if ( | ||
// `<` is an end. | ||
code === codes.lessThan || | ||
// So is whitespace. | ||
code === codes.eof || | ||
markdownLineEndingOrSpace(code) || | ||
unicodeWhitespace(code) | ||
) { | ||
return ok(code) | ||
} | ||
return nok(code) | ||
} | ||
} | ||
/** @type {Tokenizer} */ | ||
function tokenizeNamedCharacterReference(effects, ok, nok) { | ||
return start | ||
/** | ||
* In trail, after `]`. | ||
* | ||
* > 👉 **Note**: this deviates from `cmark-gfm` to fix a bug. | ||
* > See end of <https://github.com/github/cmark-gfm/issues/278> for more. | ||
* | ||
* ```markdown | ||
* > | https://example.com]( | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function trailBracketAfter(code) { | ||
// Whitespace or something that could start a resource or reference is the end. | ||
// Switch back to trail otherwise. | ||
if ( | ||
code === codes.eof || | ||
code === codes.leftParenthesis || | ||
code === codes.leftSquareBracket || | ||
markdownLineEndingOrSpace(code) || | ||
unicodeWhitespace(code) | ||
) { | ||
return ok(code) | ||
} | ||
/** @type {State} */ | ||
function start(code) { | ||
assert(code === codes.ampersand, 'expected `&`') | ||
effects.consume(code) | ||
return inside | ||
return trail(code) | ||
} | ||
/** @type {State} */ | ||
function inside(code) { | ||
if (asciiAlpha(code)) { | ||
/** | ||
* In character-reference like trail, after `&`. | ||
* | ||
* ```markdown | ||
* > | https://example.com&). | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function trailCharRefStart(code) { | ||
// When non-alpha, it’s not a trail. | ||
return asciiAlpha(code) ? trailCharRefInside(code) : nok(code) | ||
} | ||
/** | ||
* In character-reference like trail. | ||
* | ||
* ```markdown | ||
* > | https://example.com&). | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function trailCharRefInside(code) { | ||
// Switch back to trail if this is well-formed. | ||
if (code === codes.semicolon) { | ||
effects.consume(code) | ||
return inside | ||
return trail | ||
} | ||
if (code === codes.semicolon) { | ||
if (asciiAlpha(code)) { | ||
effects.consume(code) | ||
return after | ||
return trailCharRefInside | ||
} | ||
// It’s not a trail. | ||
return nok(code) | ||
} | ||
/** @type {State} */ | ||
function after(code) { | ||
// If the named character reference is followed by the end of the path, it’s | ||
// not continued punctuation. | ||
return pathEnd(code) ? ok(code) : nok(code) | ||
} | ||
} | ||
/** @type {Tokenizer} */ | ||
function tokenizePunctuation(effects, ok, nok) { | ||
/** | ||
* Dot in email domain trail. | ||
* | ||
* This calls `ok` if this *is* the trail, followed by an end, which means | ||
* the trail is not part of the link. | ||
* It calls `nok` if this *is* part of the link. | ||
* | ||
* ```markdown | ||
* > | contact@example.org. | ||
* ^ | ||
* ``` | ||
* | ||
* @this {TokenizeContext} | ||
* @type {Tokenizer} | ||
*/ | ||
function tokenizeEmailDomainDotTrail(effects, ok, nok) { | ||
return start | ||
/** @type {State} */ | ||
/** | ||
* Dot. | ||
* | ||
* ```markdown | ||
* > | contact@example.org. | ||
* ^ ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function start(code) { | ||
assert( | ||
code === codes.dash || trailingPunctuation(code), | ||
'expected punctuation' | ||
) | ||
// Must be dot. | ||
effects.consume(code) | ||
@@ -540,13 +888,15 @@ return after | ||
/** @type {State} */ | ||
/** | ||
* After dot. | ||
* | ||
* ```markdown | ||
* > | contact@example.org. | ||
* ^ ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function after(code) { | ||
// Check the next. | ||
if (trailingPunctuation(code)) { | ||
effects.consume(code) | ||
return after | ||
} | ||
// If the punctuation marker is followed by the end of the path, it’s not | ||
// continued punctuation. | ||
return pathEnd(code) ? ok(code) : nok(code) | ||
// Not a trail if alphanumeric. | ||
return asciiAlphanumeric(code) ? nok(code) : ok(code) | ||
} | ||
@@ -556,20 +906,17 @@ } | ||
/** | ||
* @param {Code} code | ||
* @returns {boolean} | ||
* See: | ||
* <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L156>. | ||
* | ||
* @type {Previous} | ||
*/ | ||
function trailingPunctuation(code) { | ||
function previousWww(code) { | ||
return ( | ||
code === codes.exclamationMark || | ||
code === codes.quotationMark || | ||
code === codes.apostrophe || | ||
code === codes.rightParenthesis || | ||
code === codes.eof || | ||
code === codes.leftParenthesis || | ||
code === codes.asterisk || | ||
code === codes.comma || | ||
code === codes.dot || | ||
code === codes.colon || | ||
code === codes.semicolon || | ||
code === codes.lessThan || | ||
code === codes.questionMark || | ||
code === codes.underscore || | ||
code === codes.tilde | ||
code === codes.leftSquareBracket || | ||
code === codes.rightSquareBracket || | ||
code === codes.tilde || | ||
markdownLineEndingOrSpace(code) | ||
) | ||
@@ -579,14 +926,24 @@ } | ||
/** | ||
* @param {Code} code | ||
* @returns {boolean} | ||
* See: | ||
* <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L214>. | ||
* | ||
* @type {Previous} | ||
*/ | ||
function pathEnd(code) { | ||
return ( | ||
code === codes.eof || | ||
code === codes.lessThan || | ||
markdownLineEndingOrSpace(code) | ||
) | ||
function previousProtocol(code) { | ||
return !asciiAlpha(code) | ||
} | ||
/** | ||
* @this {TokenizeContext} | ||
* @type {Previous} | ||
*/ | ||
function previousEmail(code) { | ||
// Do not allow a slash “inside” atext. | ||
// The reference code is a bit weird, but that’s what it results in. | ||
// Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L307>. | ||
// Other than slash, every preceding character is allowed. | ||
return !(code === codes.slash || gfmAtext(code)) | ||
} | ||
/** | ||
* @param {Code} code | ||
@@ -605,24 +962,2 @@ * @returns {boolean} | ||
/** @type {Previous} */ | ||
function previousWww(code) { | ||
return ( | ||
code === codes.eof || | ||
code === codes.leftParenthesis || | ||
code === codes.asterisk || | ||
code === codes.underscore || | ||
code === codes.tilde || | ||
markdownLineEndingOrSpace(code) | ||
) | ||
} | ||
/** @type {Previous} */ | ||
function previousHttp(code) { | ||
return code === codes.eof || !asciiAlpha(code) | ||
} | ||
/** @type {Previous} */ | ||
function previousEmail(code) { | ||
return code !== codes.slash && previousHttp(code) | ||
} | ||
/** | ||
@@ -629,0 +964,0 @@ * @param {Array<Event>} events |
@@ -1,6 +0,11 @@ | ||
/** @type {HtmlExtension} */ | ||
/** | ||
* Extension for `micromark` that can be passed in `htmlExtensions` to support | ||
* GFM autolink literals when serializing to HTML. | ||
* | ||
* @type {HtmlExtension} | ||
*/ | ||
export const gfmAutolinkLiteralHtml: HtmlExtension | ||
export type CompileContext = import('micromark-util-types').CompileContext | ||
export type Handle = import('micromark-util-types').Handle | ||
export type HtmlExtension = import('micromark-util-types').HtmlExtension | ||
export type Handle = import('micromark-util-types').Handle | ||
export type CompileContext = import('micromark-util-types').CompileContext | ||
export type Token = import('micromark-util-types').Token |
/** | ||
* @typedef {import('micromark-util-types').CompileContext} CompileContext | ||
* @typedef {import('micromark-util-types').Handle} Handle | ||
* @typedef {import('micromark-util-types').HtmlExtension} HtmlExtension | ||
* @typedef {import('micromark-util-types').Handle} Handle | ||
* @typedef {import('micromark-util-types').CompileContext} CompileContext | ||
* @typedef {import('micromark-util-types').Token} Token | ||
*/ | ||
import {sanitizeUri} from 'micromark-util-sanitize-uri' | ||
/** @type {HtmlExtension} */ | ||
// To do: next major: expose functions that yields extension. | ||
/** | ||
* Extension for `micromark` that can be passed in `htmlExtensions` to support | ||
* GFM autolink literals when serializing to HTML. | ||
* | ||
* @type {HtmlExtension} | ||
*/ | ||
export const gfmAutolinkLiteralHtml = { | ||
@@ -17,24 +25,33 @@ exit: { | ||
} | ||
/** @type {Handle} */ | ||
/** | ||
* @this {CompileContext} | ||
* @type {Handle} | ||
*/ | ||
function literalAutolinkWww(token) { | ||
anchorFromToken.call(this, token, 'http://') | ||
} | ||
/** @type {Handle} */ | ||
/** | ||
* @this {CompileContext} | ||
* @type {Handle} | ||
*/ | ||
function literalAutolinkEmail(token) { | ||
anchorFromToken.call(this, token, 'mailto:') | ||
} | ||
/** @type {Handle} */ | ||
/** | ||
* @this {CompileContext} | ||
* @type {Handle} | ||
*/ | ||
function literalAutolinkHttp(token) { | ||
anchorFromToken.call(this, token) | ||
} | ||
/** | ||
* @this CompileContext | ||
* @param {Token} token | ||
* @param {string} [protocol] | ||
* @param {string | null | undefined} [protocol] | ||
* @returns {void} | ||
*/ | ||
function anchorFromToken(token, protocol) { | ||
@@ -41,0 +58,0 @@ const url = this.sliceSerialize(token) |
@@ -1,9 +0,15 @@ | ||
/** @type {Extension} */ | ||
/** | ||
* Extension for `micromark` that can be passed in `extensions` to enable GFM | ||
* autolink literal syntax. | ||
* | ||
* @type {Extension} | ||
*/ | ||
export const gfmAutolinkLiteral: Extension | ||
export type Code = import('micromark-util-types').Code | ||
export type ConstructRecord = import('micromark-util-types').ConstructRecord | ||
export type Event = import('micromark-util-types').Event | ||
export type Extension = import('micromark-util-types').Extension | ||
export type ConstructRecord = import('micromark-util-types').ConstructRecord | ||
export type Tokenizer = import('micromark-util-types').Tokenizer | ||
export type Previous = import('micromark-util-types').Previous | ||
export type State = import('micromark-util-types').State | ||
export type Event = import('micromark-util-types').Event | ||
export type Code = import('micromark-util-types').Code | ||
export type TokenizeContext = import('micromark-util-types').TokenizeContext | ||
export type Tokenizer = import('micromark-util-types').Tokenizer |
1013
lib/syntax.js
/** | ||
* @typedef {import('micromark-util-types').Code} Code | ||
* @typedef {import('micromark-util-types').ConstructRecord} ConstructRecord | ||
* @typedef {import('micromark-util-types').Event} Event | ||
* @typedef {import('micromark-util-types').Extension} Extension | ||
* @typedef {import('micromark-util-types').ConstructRecord} ConstructRecord | ||
* @typedef {import('micromark-util-types').Tokenizer} Tokenizer | ||
* @typedef {import('micromark-util-types').Previous} Previous | ||
* @typedef {import('micromark-util-types').State} State | ||
* @typedef {import('micromark-util-types').Event} Event | ||
* @typedef {import('micromark-util-types').Code} Code | ||
* @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext | ||
* @typedef {import('micromark-util-types').Tokenizer} Tokenizer | ||
*/ | ||
import { | ||
@@ -14,10 +16,8 @@ asciiAlpha, | ||
asciiControl, | ||
asciiDigit, | ||
markdownLineEndingOrSpace, | ||
markdownLineEnding, | ||
unicodePunctuation, | ||
unicodeWhitespace | ||
} from 'micromark-util-character' | ||
const www = { | ||
tokenize: tokenizeWww, | ||
const wwwPrefix = { | ||
tokenize: tokenizeWwwPrefix, | ||
partial: true | ||
@@ -33,8 +33,8 @@ } | ||
} | ||
const punctuation = { | ||
tokenize: tokenizePunctuation, | ||
const trail = { | ||
tokenize: tokenizeTrail, | ||
partial: true | ||
} | ||
const namedCharacterReference = { | ||
tokenize: tokenizeNamedCharacterReference, | ||
const emailDomainDotTrail = { | ||
tokenize: tokenizeEmailDomainDotTrail, | ||
partial: true | ||
@@ -46,5 +46,5 @@ } | ||
} | ||
const httpAutolink = { | ||
tokenize: tokenizeHttpAutolink, | ||
previous: previousHttp | ||
const protocolAutolink = { | ||
tokenize: tokenizeProtocolAutolink, | ||
previous: previousProtocol | ||
} | ||
@@ -55,12 +55,20 @@ const emailAutolink = { | ||
} | ||
/** @type {ConstructRecord} */ | ||
const text = {} | ||
/** @type {Extension} */ | ||
// To do: next major: expose functions that yields extension. | ||
/** | ||
* Extension for `micromark` that can be passed in `extensions` to enable GFM | ||
* autolink literal syntax. | ||
* | ||
* @type {Extension} | ||
*/ | ||
export const gfmAutolinkLiteral = { | ||
text | ||
} | ||
let code = 48 // Add alphanumerics. | ||
let code = 48 | ||
// Add alphanumerics. | ||
while (code < 123) { | ||
@@ -72,3 +80,2 @@ text[code] = emailAutolink | ||
} | ||
text[43] = emailAutolink | ||
@@ -78,23 +85,46 @@ text[45] = emailAutolink | ||
text[95] = emailAutolink | ||
text[72] = [emailAutolink, httpAutolink] | ||
text[104] = [emailAutolink, httpAutolink] | ||
text[72] = [emailAutolink, protocolAutolink] | ||
text[104] = [emailAutolink, protocolAutolink] | ||
text[87] = [emailAutolink, wwwAutolink] | ||
text[119] = [emailAutolink, wwwAutolink] | ||
/** @type {Tokenizer} */ | ||
// To do: perform email autolink literals on events, afterwards. | ||
// That’s where `markdown-rs` and `cmark-gfm` perform it. | ||
// It should look for `@`, then for atext backwards, and then for a label | ||
// forwards. | ||
// To do: `mailto:`, `xmpp:` protocol as prefix. | ||
/** | ||
* Email autolink literal. | ||
* | ||
* ```markdown | ||
* > | a contact@example.org b | ||
* ^^^^^^^^^^^^^^^^^^^ | ||
* ``` | ||
* | ||
* @this {TokenizeContext} | ||
* @type {Tokenizer} | ||
*/ | ||
function tokenizeEmailAutolink(effects, ok, nok) { | ||
const self = this | ||
/** @type {boolean | undefined} */ | ||
let dot | ||
/** @type {boolean} */ | ||
let hasDot | ||
/** @type {boolean|undefined} */ | ||
let hasDigitInLastSegment | ||
let data | ||
return start | ||
/** @type {State} */ | ||
/** | ||
* Start of email autolink literal. | ||
* | ||
* ```markdown | ||
* > | a contact@example.org b | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function start(code) { | ||
if ( | ||
!gfmAtext(code) || | ||
!previousEmail(self.previous) || | ||
!previousEmail.call(self, self.previous) || | ||
previousUnbalanced(self.events) | ||
@@ -104,3 +134,2 @@ ) { | ||
} | ||
effects.enter('literalAutolink') | ||
@@ -110,4 +139,13 @@ effects.enter('literalAutolinkEmail') | ||
} | ||
/** @type {State} */ | ||
/** | ||
* In email atext. | ||
* | ||
* ```markdown | ||
* > | a contact@example.org b | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function atext(code) { | ||
@@ -118,59 +156,79 @@ if (gfmAtext(code)) { | ||
} | ||
if (code === 64) { | ||
effects.consume(code) | ||
return label | ||
return emailDomain | ||
} | ||
return nok(code) | ||
} | ||
/** @type {State} */ | ||
function label(code) { | ||
/** | ||
* In email domain. | ||
* | ||
* The reference code is a bit overly complex as it handles the `@`, of which | ||
* there may be just one. | ||
* Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L318> | ||
* | ||
* ```markdown | ||
* > | a contact@example.org b | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function emailDomain(code) { | ||
// Dot followed by alphanumerical (not `-` or `_`). | ||
if (code === 46) { | ||
return effects.check(punctuation, done, dotContinuation)(code) | ||
return effects.check( | ||
emailDomainDotTrail, | ||
emailDomainAfter, | ||
emailDomainDot | ||
)(code) | ||
} | ||
if (code === 45 || code === 95) { | ||
return effects.check(punctuation, nok, dashOrUnderscoreContinuation)(code) | ||
} | ||
if (asciiAlphanumeric(code)) { | ||
if (!hasDigitInLastSegment && asciiDigit(code)) { | ||
hasDigitInLastSegment = true | ||
} | ||
// Alphanumerical, `-`, and `_`. | ||
if (code === 45 || code === 95 || asciiAlphanumeric(code)) { | ||
data = true | ||
effects.consume(code) | ||
return label | ||
return emailDomain | ||
} | ||
return done(code) | ||
} | ||
/** @type {State} */ | ||
// To do: `/` if xmpp. | ||
function dotContinuation(code) { | ||
effects.consume(code) | ||
hasDot = true | ||
hasDigitInLastSegment = undefined | ||
return label | ||
// Note: normally we’d truncate trailing punctuation from the link. | ||
// However, email autolink literals cannot contain any of those markers, | ||
// except for `.`, but that can only occur if it isn’t trailing. | ||
// So we can ignore truncating! | ||
return emailDomainAfter(code) | ||
} | ||
/** @type {State} */ | ||
function dashOrUnderscoreContinuation(code) { | ||
/** | ||
* In email domain, on dot that is not a trail. | ||
* | ||
* ```markdown | ||
* > | a contact@example.org b | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function emailDomainDot(code) { | ||
effects.consume(code) | ||
return afterDashOrUnderscore | ||
dot = true | ||
return emailDomain | ||
} | ||
/** @type {State} */ | ||
function afterDashOrUnderscore(code) { | ||
if (code === 46) { | ||
return effects.check(punctuation, nok, dotContinuation)(code) | ||
} | ||
return label(code) | ||
} | ||
/** @type {State} */ | ||
function done(code) { | ||
if (hasDot && !hasDigitInLastSegment) { | ||
/** | ||
* After email domain. | ||
* | ||
* ```markdown | ||
* > | a contact@example.org b | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function emailDomainAfter(code) { | ||
// Domain must not be empty, must include a dot, and must end in alphabetical. | ||
// Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L332>. | ||
if (data && dot && asciiAlpha(self.previous)) { | ||
effects.exit('literalAutolinkEmail') | ||
@@ -180,17 +238,35 @@ effects.exit('literalAutolink') | ||
} | ||
return nok(code) | ||
} | ||
} | ||
/** @type {Tokenizer} */ | ||
/** | ||
* `www` autolink literal. | ||
* | ||
* ```markdown | ||
* > | a www.example.org b | ||
* ^^^^^^^^^^^^^^^ | ||
* ``` | ||
* | ||
* @this {TokenizeContext} | ||
* @type {Tokenizer} | ||
*/ | ||
function tokenizeWwwAutolink(effects, ok, nok) { | ||
const self = this | ||
return start | ||
/** @type {State} */ | ||
return wwwStart | ||
function start(code) { | ||
/** | ||
* Start of www autolink literal. | ||
* | ||
* ```markdown | ||
* > | www.example.com/a?b#c | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function wwwStart(code) { | ||
if ( | ||
(code !== 87 && code !== 119) || | ||
!previousWww(self.previous) || | ||
!previousWww.call(self, self.previous) || | ||
previousUnbalanced(self.events) | ||
@@ -200,17 +276,24 @@ ) { | ||
} | ||
effects.enter('literalAutolink') | ||
effects.enter('literalAutolinkWww') // For `www.` we check instead of attempt, because when it matches, GH | ||
// treats it as part of a domain (yes, it says a valid domain must come | ||
// after `www.`, but that’s not how it’s implemented by them). | ||
effects.enter('literalAutolinkWww') | ||
// Note: we *check*, so we can discard the `www.` we parsed. | ||
// If it worked, we consider it as a part of the domain. | ||
return effects.check( | ||
www, | ||
effects.attempt(domain, effects.attempt(path, done), nok), | ||
wwwPrefix, | ||
effects.attempt(domain, effects.attempt(path, wwwAfter), nok), | ||
nok | ||
)(code) | ||
} | ||
/** @type {State} */ | ||
function done(code) { | ||
/** | ||
* After a www autolink literal. | ||
* | ||
* ```markdown | ||
* > | www.example.com/a?b#c | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function wwwAfter(code) { | ||
effects.exit('literalAutolinkWww') | ||
@@ -221,106 +304,127 @@ effects.exit('literalAutolink') | ||
} | ||
/** @type {Tokenizer} */ | ||
function tokenizeHttpAutolink(effects, ok, nok) { | ||
/** | ||
* Protocol autolink literal. | ||
* | ||
* ```markdown | ||
* > | a https://example.org b | ||
* ^^^^^^^^^^^^^^^^^^^ | ||
* ``` | ||
* | ||
* @this {TokenizeContext} | ||
* @type {Tokenizer} | ||
*/ | ||
function tokenizeProtocolAutolink(effects, ok, nok) { | ||
const self = this | ||
return start | ||
/** @type {State} */ | ||
let buffer = '' | ||
let seen = false | ||
return protocolStart | ||
function start(code) { | ||
/** | ||
* Start of protocol autolink literal. | ||
* | ||
* ```markdown | ||
* > | https://example.com/a?b#c | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function protocolStart(code) { | ||
if ( | ||
(code !== 72 && code !== 104) || | ||
!previousHttp(self.previous) || | ||
previousUnbalanced(self.events) | ||
(code === 72 || code === 104) && | ||
previousProtocol.call(self, self.previous) && | ||
!previousUnbalanced(self.events) | ||
) { | ||
return nok(code) | ||
} | ||
effects.enter('literalAutolink') | ||
effects.enter('literalAutolinkHttp') | ||
effects.consume(code) | ||
return t1 | ||
} | ||
/** @type {State} */ | ||
function t1(code) { | ||
if (code === 84 || code === 116) { | ||
effects.enter('literalAutolink') | ||
effects.enter('literalAutolinkHttp') | ||
buffer += String.fromCodePoint(code) | ||
effects.consume(code) | ||
return t2 | ||
return protocolPrefixInside | ||
} | ||
return nok(code) | ||
} | ||
/** @type {State} */ | ||
function t2(code) { | ||
if (code === 84 || code === 116) { | ||
/** | ||
* In protocol. | ||
* | ||
* ```markdown | ||
* > | https://example.com/a?b#c | ||
* ^^^^^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function protocolPrefixInside(code) { | ||
// `5` is size of `https` | ||
if (asciiAlpha(code) && buffer.length < 5) { | ||
buffer += String.fromCodePoint(code) | ||
effects.consume(code) | ||
return p | ||
return protocolPrefixInside | ||
} | ||
return nok(code) | ||
} | ||
/** @type {State} */ | ||
function p(code) { | ||
if (code === 80 || code === 112) { | ||
effects.consume(code) | ||
return s | ||
} | ||
return nok(code) | ||
} | ||
/** @type {State} */ | ||
function s(code) { | ||
if (code === 83 || code === 115) { | ||
effects.consume(code) | ||
return colon | ||
} | ||
return colon(code) | ||
} | ||
/** @type {State} */ | ||
function colon(code) { | ||
if (code === 58) { | ||
effects.consume(code) | ||
return slash1 | ||
const protocol = buffer.toLowerCase() | ||
if (protocol === 'http' || protocol === 'https') { | ||
effects.consume(code) | ||
return protocolSlashesInside | ||
} | ||
} | ||
return nok(code) | ||
} | ||
/** @type {State} */ | ||
function slash1(code) { | ||
/** | ||
* In slashes. | ||
* | ||
* ```markdown | ||
* > | https://example.com/a?b#c | ||
* ^^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function protocolSlashesInside(code) { | ||
if (code === 47) { | ||
effects.consume(code) | ||
return slash2 | ||
if (seen) { | ||
return afterProtocol | ||
} | ||
seen = true | ||
return protocolSlashesInside | ||
} | ||
return nok(code) | ||
} | ||
/** @type {State} */ | ||
function slash2(code) { | ||
if (code === 47) { | ||
effects.consume(code) | ||
return after | ||
} | ||
return nok(code) | ||
} | ||
/** @type {State} */ | ||
function after(code) { | ||
/** | ||
* After protocol, before domain. | ||
* | ||
* ```markdown | ||
* > | https://example.com/a?b#c | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function afterProtocol(code) { | ||
// To do: this is different from `markdown-rs`: | ||
// https://github.com/wooorm/markdown-rs/blob/b3a921c761309ae00a51fe348d8a43adbc54b518/src/construct/gfm_autolink_literal.rs#L172-L182 | ||
return code === null || | ||
asciiControl(code) || | ||
markdownLineEndingOrSpace(code) || | ||
unicodeWhitespace(code) || | ||
unicodePunctuation(code) | ||
? nok(code) | ||
: effects.attempt(domain, effects.attempt(path, done), nok)(code) | ||
: effects.attempt(domain, effects.attempt(path, protocolAfter), nok)(code) | ||
} | ||
/** @type {State} */ | ||
function done(code) { | ||
/** | ||
* After a protocol autolink literal. | ||
* | ||
* ```markdown | ||
* > | https://example.com/a?b#c | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function protocolAfter(code) { | ||
effects.exit('literalAutolinkHttp') | ||
@@ -331,71 +435,96 @@ effects.exit('literalAutolink') | ||
} | ||
/** @type {Tokenizer} */ | ||
function tokenizeWww(effects, ok, nok) { | ||
return start | ||
/** @type {State} */ | ||
/** | ||
* `www` prefix. | ||
* | ||
* ```markdown | ||
* > | a www.example.org b | ||
* ^^^^ | ||
* ``` | ||
* | ||
* @this {TokenizeContext} | ||
* @type {Tokenizer} | ||
*/ | ||
function tokenizeWwwPrefix(effects, ok, nok) { | ||
let size = 0 | ||
return wwwPrefixInside | ||
function start(code) { | ||
effects.consume(code) | ||
return w2 | ||
} | ||
/** @type {State} */ | ||
function w2(code) { | ||
if (code === 87 || code === 119) { | ||
/** | ||
* In www prefix. | ||
* | ||
* ```markdown | ||
* > | www.example.com | ||
* ^^^^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function wwwPrefixInside(code) { | ||
if ((code === 87 || code === 119) && size < 3) { | ||
size++ | ||
effects.consume(code) | ||
return w3 | ||
return wwwPrefixInside | ||
} | ||
return nok(code) | ||
} | ||
/** @type {State} */ | ||
function w3(code) { | ||
if (code === 87 || code === 119) { | ||
if (code === 46 && size === 3) { | ||
effects.consume(code) | ||
return dot | ||
return wwwPrefixAfter | ||
} | ||
return nok(code) | ||
} | ||
/** @type {State} */ | ||
function dot(code) { | ||
if (code === 46) { | ||
effects.consume(code) | ||
return after | ||
} | ||
return nok(code) | ||
/** | ||
* After www prefix. | ||
* | ||
* ```markdown | ||
* > | www.example.com | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function wwwPrefixAfter(code) { | ||
// If there is *anything*, we can link. | ||
return code === null ? nok(code) : ok(code) | ||
} | ||
/** @type {State} */ | ||
function after(code) { | ||
return code === null || markdownLineEnding(code) ? nok(code) : ok(code) | ||
} | ||
} | ||
/** @type {Tokenizer} */ | ||
/** | ||
* Domain. | ||
* | ||
* ```markdown | ||
* > | a https://example.org b | ||
* ^^^^^^^^^^^ | ||
* ``` | ||
* | ||
* @this {TokenizeContext} | ||
* @type {Tokenizer} | ||
*/ | ||
function tokenizeDomain(effects, ok, nok) { | ||
/** @type {boolean|undefined} */ | ||
let hasUnderscoreInLastSegment | ||
/** @type {boolean|undefined} */ | ||
/** @type {boolean | undefined} */ | ||
let underscoreInLastSegment | ||
/** @type {boolean | undefined} */ | ||
let underscoreInLastLastSegment | ||
/** @type {boolean | undefined} */ | ||
let seen | ||
return domainInside | ||
let hasUnderscoreInLastLastSegment | ||
return domain | ||
/** @type {State} */ | ||
function domain(code) { | ||
if (code === 38) { | ||
return effects.check( | ||
namedCharacterReference, | ||
done, | ||
punctuationContinuation | ||
)(code) | ||
/** | ||
* In domain. | ||
* | ||
* ```markdown | ||
* > | https://example.com/a | ||
* ^^^^^^^^^^^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function domainInside(code) { | ||
// Check whether this marker, which is a trailing punctuation | ||
// marker, optionally followed by more trailing markers, and then | ||
// followed by an end. | ||
if (code === 46 || code === 95) { | ||
return effects.check(trail, domainAfter, domainAtPunctuation)(code) | ||
} | ||
if (code === 46 || code === 95) { | ||
return effects.check(punctuation, done, punctuationContinuation)(code) | ||
} // GH documents that only alphanumerics (other than `-`, `.`, and `_`) can | ||
// GH documents that only alphanumerics (other than `-`, `.`, and `_`) can | ||
// occur, which sounds like ASCII only, but they also support `www.點看.com`, | ||
@@ -405,177 +534,389 @@ // so that’s Unicode. | ||
// already has that for Unicode punctuation and whitespace, so use those. | ||
// Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L12>. | ||
if ( | ||
code === null || | ||
asciiControl(code) || | ||
markdownLineEndingOrSpace(code) || | ||
unicodeWhitespace(code) || | ||
(code !== 45 && unicodePunctuation(code)) | ||
) { | ||
return done(code) | ||
return domainAfter(code) | ||
} | ||
seen = true | ||
effects.consume(code) | ||
return domain | ||
return domainInside | ||
} | ||
/** @type {State} */ | ||
function punctuationContinuation(code) { | ||
if (code === 46) { | ||
hasUnderscoreInLastLastSegment = hasUnderscoreInLastSegment | ||
hasUnderscoreInLastSegment = undefined | ||
effects.consume(code) | ||
return domain | ||
/** | ||
* In domain, at potential trailing punctuation, that was not trailing. | ||
* | ||
* ```markdown | ||
* > | https://example.com | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function domainAtPunctuation(code) { | ||
// There is an underscore in the last segment of the domain | ||
if (code === 95) { | ||
underscoreInLastSegment = true | ||
} | ||
if (code === 95) hasUnderscoreInLastSegment = true | ||
// Otherwise, it’s a `.`: save the last segment underscore in the | ||
// penultimate segment slot. | ||
else { | ||
underscoreInLastLastSegment = underscoreInLastSegment | ||
underscoreInLastSegment = undefined | ||
} | ||
effects.consume(code) | ||
return domain | ||
return domainInside | ||
} | ||
/** @type {State} */ | ||
function done(code) { | ||
if (!hasUnderscoreInLastLastSegment && !hasUnderscoreInLastSegment) { | ||
return ok(code) | ||
/** | ||
* After domain. | ||
* | ||
* ```markdown | ||
* > | https://example.com/a | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} */ | ||
function domainAfter(code) { | ||
// Note: that’s GH says a dot is needed, but it’s not true: | ||
// <https://github.com/github/cmark-gfm/issues/279> | ||
if (underscoreInLastLastSegment || underscoreInLastSegment || !seen) { | ||
return nok(code) | ||
} | ||
return nok(code) | ||
return ok(code) | ||
} | ||
} | ||
/** @type {Tokenizer} */ | ||
/** | ||
* Path. | ||
* | ||
* ```markdown | ||
* > | a https://example.org/stuff b | ||
* ^^^^^^ | ||
* ``` | ||
* | ||
* @this {TokenizeContext} | ||
* @type {Tokenizer} | ||
*/ | ||
function tokenizePath(effects, ok) { | ||
let balance = 0 | ||
return inPath | ||
/** @type {State} */ | ||
let sizeOpen = 0 | ||
let sizeClose = 0 | ||
return pathInside | ||
function inPath(code) { | ||
if (code === 38) { | ||
return effects.check( | ||
namedCharacterReference, | ||
ok, | ||
continuedPunctuation | ||
)(code) | ||
/** | ||
* In path. | ||
* | ||
* ```markdown | ||
* > | https://example.com/a | ||
* ^^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function pathInside(code) { | ||
if (code === 40) { | ||
sizeOpen++ | ||
effects.consume(code) | ||
return pathInside | ||
} | ||
if (code === 40) { | ||
balance++ | ||
// To do: `markdown-rs` also needs this. | ||
// If this is a paren, and there are less closings than openings, | ||
// we don’t check for a trail. | ||
if (code === 41 && sizeClose < sizeOpen) { | ||
return pathAtPunctuation(code) | ||
} | ||
if (code === 41) { | ||
return effects.check( | ||
punctuation, | ||
parenAtPathEnd, | ||
continuedPunctuation | ||
)(code) | ||
// Check whether this trailing punctuation marker is optionally | ||
// followed by more trailing markers, and then followed | ||
// by an end. | ||
if ( | ||
code === 33 || | ||
code === 34 || | ||
code === 38 || | ||
code === 39 || | ||
code === 41 || | ||
code === 42 || | ||
code === 44 || | ||
code === 46 || | ||
code === 58 || | ||
code === 59 || | ||
code === 60 || | ||
code === 63 || | ||
code === 93 || | ||
code === 95 || | ||
code === 126 | ||
) { | ||
return effects.check(trail, ok, pathAtPunctuation)(code) | ||
} | ||
if (pathEnd(code)) { | ||
if ( | ||
code === null || | ||
markdownLineEndingOrSpace(code) || | ||
unicodeWhitespace(code) | ||
) { | ||
return ok(code) | ||
} | ||
if (trailingPunctuation(code)) { | ||
return effects.check(punctuation, ok, continuedPunctuation)(code) | ||
} | ||
effects.consume(code) | ||
return inPath | ||
return pathInside | ||
} | ||
/** @type {State} */ | ||
function continuedPunctuation(code) { | ||
/** | ||
* In path, at potential trailing punctuation, that was not trailing. | ||
* | ||
* ```markdown | ||
* > | https://example.com/a"b | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function pathAtPunctuation(code) { | ||
// Count closing parens. | ||
if (code === 41) { | ||
sizeClose++ | ||
} | ||
effects.consume(code) | ||
return inPath | ||
return pathInside | ||
} | ||
/** @type {State} */ | ||
function parenAtPathEnd(code) { | ||
balance-- | ||
return balance < 0 ? ok(code) : continuedPunctuation(code) | ||
} | ||
} | ||
/** @type {Tokenizer} */ | ||
function tokenizeNamedCharacterReference(effects, ok, nok) { | ||
return start | ||
/** @type {State} */ | ||
/** | ||
* Trail. | ||
* | ||
* This calls `ok` if this *is* the trail, followed by an end, which means | ||
* the entire trail is not part of the link. | ||
* It calls `nok` if this *is* part of the link. | ||
* | ||
* ```markdown | ||
* > | https://example.com"). | ||
* ^^^ | ||
* ``` | ||
* | ||
* @this {TokenizeContext} | ||
* @type {Tokenizer} | ||
*/ | ||
function tokenizeTrail(effects, ok, nok) { | ||
return trail | ||
function start(code) { | ||
effects.consume(code) | ||
return inside | ||
} | ||
/** @type {State} */ | ||
/** | ||
* In trail of domain or path. | ||
* | ||
* ```markdown | ||
* > | https://example.com"). | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function trail(code) { | ||
// Regular trailing punctuation. | ||
if ( | ||
code === 33 || | ||
code === 34 || | ||
code === 39 || | ||
code === 41 || | ||
code === 42 || | ||
code === 44 || | ||
code === 46 || | ||
code === 58 || | ||
code === 59 || | ||
code === 63 || | ||
code === 95 || | ||
code === 126 | ||
) { | ||
effects.consume(code) | ||
return trail | ||
} | ||
function inside(code) { | ||
if (asciiAlpha(code)) { | ||
// `&` followed by one or more alphabeticals and then a `;`, is | ||
// as a whole considered as trailing punctuation. | ||
// In all other cases, it is considered as continuation of the URL. | ||
if (code === 38) { | ||
effects.consume(code) | ||
return inside | ||
return trailCharRefStart | ||
} | ||
// Needed because we allow literals after `[`, as we fix: | ||
// <https://github.com/github/cmark-gfm/issues/278>. | ||
// Check that it is not followed by `(` or `[`. | ||
if (code === 93) { | ||
effects.consume(code) | ||
return trailBracketAfter | ||
} | ||
if ( | ||
// `<` is an end. | ||
code === 60 || | ||
// So is whitespace. | ||
code === null || | ||
markdownLineEndingOrSpace(code) || | ||
unicodeWhitespace(code) | ||
) { | ||
return ok(code) | ||
} | ||
return nok(code) | ||
} | ||
/** | ||
* In trail, after `]`. | ||
* | ||
* > 👉 **Note**: this deviates from `cmark-gfm` to fix a bug. | ||
* > See end of <https://github.com/github/cmark-gfm/issues/278> for more. | ||
* | ||
* ```markdown | ||
* > | https://example.com]( | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function trailBracketAfter(code) { | ||
// Whitespace or something that could start a resource or reference is the end. | ||
// Switch back to trail otherwise. | ||
if ( | ||
code === null || | ||
code === 40 || | ||
code === 91 || | ||
markdownLineEndingOrSpace(code) || | ||
unicodeWhitespace(code) | ||
) { | ||
return ok(code) | ||
} | ||
return trail(code) | ||
} | ||
/** | ||
* In character-reference like trail, after `&`. | ||
* | ||
* ```markdown | ||
* > | https://example.com&). | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function trailCharRefStart(code) { | ||
// When non-alpha, it’s not a trail. | ||
return asciiAlpha(code) ? trailCharRefInside(code) : nok(code) | ||
} | ||
/** | ||
* In character-reference like trail. | ||
* | ||
* ```markdown | ||
* > | https://example.com&). | ||
* ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function trailCharRefInside(code) { | ||
// Switch back to trail if this is well-formed. | ||
if (code === 59) { | ||
effects.consume(code) | ||
return after | ||
return trail | ||
} | ||
if (asciiAlpha(code)) { | ||
effects.consume(code) | ||
return trailCharRefInside | ||
} | ||
// It’s not a trail. | ||
return nok(code) | ||
} | ||
/** @type {State} */ | ||
function after(code) { | ||
// If the named character reference is followed by the end of the path, it’s | ||
// not continued punctuation. | ||
return pathEnd(code) ? ok(code) : nok(code) | ||
} | ||
} | ||
/** @type {Tokenizer} */ | ||
function tokenizePunctuation(effects, ok, nok) { | ||
/** | ||
* Dot in email domain trail. | ||
* | ||
* This calls `ok` if this *is* the trail, followed by an end, which means | ||
* the trail is not part of the link. | ||
* It calls `nok` if this *is* part of the link. | ||
* | ||
* ```markdown | ||
* > | contact@example.org. | ||
* ^ | ||
* ``` | ||
* | ||
* @this {TokenizeContext} | ||
* @type {Tokenizer} | ||
*/ | ||
function tokenizeEmailDomainDotTrail(effects, ok, nok) { | ||
return start | ||
/** @type {State} */ | ||
/** | ||
* Dot. | ||
* | ||
* ```markdown | ||
* > | contact@example.org. | ||
* ^ ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function start(code) { | ||
// Must be dot. | ||
effects.consume(code) | ||
return after | ||
} | ||
/** @type {State} */ | ||
/** | ||
* After dot. | ||
* | ||
* ```markdown | ||
* > | contact@example.org. | ||
* ^ ^ | ||
* ``` | ||
* | ||
* @type {State} | ||
*/ | ||
function after(code) { | ||
// Check the next. | ||
if (trailingPunctuation(code)) { | ||
effects.consume(code) | ||
return after | ||
} // If the punctuation marker is followed by the end of the path, it’s not | ||
// continued punctuation. | ||
return pathEnd(code) ? ok(code) : nok(code) | ||
// Not a trail if alphanumeric. | ||
return asciiAlphanumeric(code) ? nok(code) : ok(code) | ||
} | ||
} | ||
/** | ||
* @param {Code} code | ||
* @returns {boolean} | ||
* See: | ||
* <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L156>. | ||
* | ||
* @type {Previous} | ||
*/ | ||
function trailingPunctuation(code) { | ||
function previousWww(code) { | ||
return ( | ||
code === 33 || | ||
code === 34 || | ||
code === 39 || | ||
code === 41 || | ||
code === null || | ||
code === 40 || | ||
code === 42 || | ||
code === 44 || | ||
code === 46 || | ||
code === 58 || | ||
code === 59 || | ||
code === 60 || | ||
code === 63 || | ||
code === 95 || | ||
code === 126 | ||
code === 91 || | ||
code === 93 || | ||
code === 126 || | ||
markdownLineEndingOrSpace(code) | ||
) | ||
} | ||
/** | ||
* @param {Code} code | ||
* @returns {boolean} | ||
* See: | ||
* <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L214>. | ||
* | ||
* @type {Previous} | ||
*/ | ||
function previousProtocol(code) { | ||
return !asciiAlpha(code) | ||
} | ||
function pathEnd(code) { | ||
return code === null || code === 60 || markdownLineEndingOrSpace(code) | ||
/** | ||
* @this {TokenizeContext} | ||
* @type {Previous} | ||
*/ | ||
function previousEmail(code) { | ||
// Do not allow a slash “inside” atext. | ||
// The reference code is a bit weird, but that’s what it results in. | ||
// Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L307>. | ||
// Other than slash, every preceding character is allowed. | ||
return !(code === 47 || gfmAtext(code)) | ||
} | ||
/** | ||
@@ -585,3 +926,2 @@ * @param {Code} code | ||
*/ | ||
function gfmAtext(code) { | ||
@@ -596,24 +936,3 @@ return ( | ||
} | ||
/** @type {Previous} */ | ||
function previousWww(code) { | ||
return ( | ||
code === null || | ||
code === 40 || | ||
code === 42 || | ||
code === 95 || | ||
code === 126 || | ||
markdownLineEndingOrSpace(code) | ||
) | ||
} | ||
/** @type {Previous} */ | ||
function previousHttp(code) { | ||
return code === null || !asciiAlpha(code) | ||
} | ||
/** @type {Previous} */ | ||
function previousEmail(code) { | ||
return code !== 47 && previousHttp(code) | ||
} | ||
/** | ||
@@ -623,10 +942,7 @@ * @param {Array<Event>} events | ||
*/ | ||
function previousUnbalanced(events) { | ||
let index = events.length | ||
let result = false | ||
while (index--) { | ||
const token = events[index][1] | ||
if ( | ||
@@ -638,5 +954,6 @@ (token.type === 'labelLink' || token.type === 'labelImage') && | ||
break | ||
} // @ts-expect-error If we’ve seen this token, and it was marked as not | ||
} | ||
// @ts-expect-error If we’ve seen this token, and it was marked as not | ||
// having any unbalanced bracket before it, we can exit. | ||
if (token._gfmAutolinkLiteralWalkedInto) { | ||
@@ -647,3 +964,2 @@ result = false | ||
} | ||
if (events.length > 0 && !result) { | ||
@@ -654,4 +970,3 @@ // @ts-expect-error Mark the last token as “walked into” w/o finding | ||
} | ||
return result | ||
} |
{ | ||
"name": "micromark-extension-gfm-autolink-literal", | ||
"version": "1.0.3", | ||
"version": "1.0.4", | ||
"description": "micromark extension to support GFM autolink literals", | ||
@@ -46,7 +46,6 @@ "license": "MIT", | ||
"micromark-util-symbol": "^1.0.0", | ||
"micromark-util-types": "^1.0.0", | ||
"uvu": "^0.5.0" | ||
"micromark-util-types": "^1.0.0" | ||
}, | ||
"devDependencies": { | ||
"@types/tape": "^4.0.0", | ||
"@types/node": "^18.0.0", | ||
"c8": "^7.0.0", | ||
@@ -58,15 +57,16 @@ "create-gfm-fixtures": "^1.0.0", | ||
"rehype": "^12.0.0", | ||
"remark-cli": "^10.0.0", | ||
"remark-cli": "^11.0.0", | ||
"remark-preset-wooorm": "^9.0.0", | ||
"rimraf": "^3.0.0", | ||
"tape": "^5.0.0", | ||
"type-coverage": "^2.0.0", | ||
"typescript": "^4.0.0", | ||
"xo": "^0.47.0" | ||
"typescript": "^5.0.0", | ||
"xo": "^0.54.0" | ||
}, | ||
"scripts": { | ||
"build": "rimraf \"dev/**/*.d.ts\" \"test/**/*.d.ts\" && tsc && type-coverage && micromark-build", | ||
"prepack": "npm run build && npm run format", | ||
"build": "tsc --build --clean && tsc --build && type-coverage && micromark-build", | ||
"format": "remark . -qfo && prettier . -w --loglevel warn && xo --fix", | ||
"test-api": "node --conditions development test/index.js", | ||
"test-coverage": "c8 --check-coverage --branches 100 --functions 100 --lines 100 --statements 100 --reporter lcov node --conditions development test/index.js", | ||
"test-api-prod": "node --conditions production test/index.js", | ||
"test-api-dev": "node --conditions development test/index.js", | ||
"test-api": "npm run test-api-dev && npm run test-api-prod", | ||
"test-coverage": "c8 --100 --reporter lcov npm run test-api", | ||
"test": "npm run build && npm run format && npm run test-coverage" | ||
@@ -85,12 +85,20 @@ }, | ||
"rules": { | ||
"no-await-in-loop": "off", | ||
"no-control-regex": "off", | ||
"node/file-extension-in-import": "off", | ||
"unicorn/no-this-assignment": "off", | ||
"unicorn/prefer-node-protocol": "off" | ||
} | ||
"complexity": "off", | ||
"n/file-extension-in-import": "off", | ||
"unicorn/no-this-assignment": "off" | ||
}, | ||
"overrides": [ | ||
{ | ||
"files": [ | ||
"test/**/*.js" | ||
], | ||
"rules": { | ||
"no-await-in-loop": 0 | ||
} | ||
} | ||
] | ||
}, | ||
"remarkConfig": { | ||
"plugins": [ | ||
"preset-wooorm" | ||
"remark-preset-wooorm" | ||
] | ||
@@ -97,0 +105,0 @@ }, |
282
readme.md
@@ -11,8 +11,29 @@ # micromark-extension-gfm-autolink-literal | ||
**[micromark][]** extension to support GitHub flavored markdown (GFM) [literal | ||
autolinks][]. | ||
[micromark][] extensions to support GFM [literal autolinks][spec]. | ||
## Contents | ||
* [What is this?](#what-is-this) | ||
* [When to use this](#when-to-use-this) | ||
* [Install](#install) | ||
* [Use](#use) | ||
* [API](#api) | ||
* [`gfmAutolinkLiteral`](#gfmautolinkliteral) | ||
* [`gfmAutolinkLiteralHtml`](#gfmautolinkliteralhtml) | ||
* [Bugs](#bugs) | ||
* [Authoring](#authoring) | ||
* [HTML](#html) | ||
* [CSS](#css) | ||
* [Syntax](#syntax) | ||
* [Types](#types) | ||
* [Compatibility](#compatibility) | ||
* [Security](#security) | ||
* [Related](#related) | ||
* [Contribute](#contribute) | ||
* [License](#license) | ||
## What is this? | ||
This package is a micromark extension to add support for GFM literal autolinks. | ||
This package contains extensions that add support for the extra autolink syntax | ||
enabled by GFM to [`micromark`][micromark]. | ||
@@ -28,24 +49,28 @@ GitHub employs different algorithms to autolink: one at parse time and one at | ||
The second algorithm is performed by | ||
[`mdast-util-gfm-autolink-literal`][mdast-autolink-literal]. | ||
[`mdast-util-gfm-autolink-literal`][mdast-util-gfm-autolink-literal]. | ||
The `html` part of this micromark extension does not operate on an AST and hence | ||
can’t perform the second algorithm. | ||
The implementation of autolink literal on github.com is currently buggy. | ||
The bugs have been reported on [`cmark-gfm`][cmark-gfm]. | ||
This micromark extension matches github.com except for its bugs. | ||
## When to use this | ||
In many cases, when working with micromark, you’d want to use | ||
[`micromark-extension-gfm`][micromark-extension-gfm] instead, which combines | ||
this package with other GFM features. | ||
This project is useful when you want to support autolink literals in markdown. | ||
When working with syntax trees, you’d want to combine this package with | ||
[`mdast-util-gfm-autolink-literal`][mdast-autolink-literal] (or | ||
[`mdast-util-gfm`][mdast-util-gfm] when using `micromark-extension-gfm`). | ||
You can use these extensions when you are working with [`micromark`][micromark]. | ||
To support all GFM features, use | ||
[`micromark-extension-gfm`][micromark-extension-gfm] instead. | ||
These tools are all rather low-level. | ||
In most cases, you’d instead want to use [`remark-gfm`][remark-gfm] with | ||
[remark][]. | ||
When you need a syntax tree, combine this package with | ||
[`mdast-util-gfm-autolink-literal`][mdast-util-gfm-autolink-literal]. | ||
All these packages are used in [`remark-gfm`][remark-gfm], which focusses on | ||
making it easier to transform content by abstracting these internals away. | ||
## Install | ||
This package is [ESM only][esm]. | ||
In Node.js (version 12.20+, 14.14+, or 16.0+), install with [npm][]: | ||
In Node.js (version 16+), install with [npm][]: | ||
@@ -56,13 +81,13 @@ ```sh | ||
In Deno with [Skypack][]: | ||
In Deno with [`esm.sh`][esmsh]: | ||
```js | ||
import {gfmAutolinkLiteral, gfmAutolinkLiteralHtml} from 'https://cdn.skypack.dev/micromark-extension-gfm-autolink-literal@1?dts' | ||
import {gfmAutolinkLiteral, gfmAutolinkLiteralHtml} from 'https://esm.sh/micromark-extension-gfm-autolink-literal@1' | ||
``` | ||
In browsers with [Skypack][]: | ||
In browsers with [`esm.sh`][esmsh]: | ||
```html | ||
<script type="module"> | ||
import {gfmAutolinkLiteral, gfmAutolinkLiteralHtml} from 'https://cdn.skypack.dev/micromark-extension-gfm-autolink-literal@1?min' | ||
import {gfmAutolinkLiteral, gfmAutolinkLiteralHtml} from 'https://esm.sh/micromark-extension-gfm-autolink-literal@1?bundle' | ||
</script> | ||
@@ -96,8 +121,8 @@ ``` | ||
This package exports the following identifiers: `gfmAutolinkLiteral`, | ||
`gfmAutolinkLiteralHtml`. | ||
This package exports the identifiers | ||
[`gfmAutolinkLiteral`][api-gfm-autolink-literal] and | ||
[`gfmAutolinkLiteralHtml`][api-gfm-autolink-literal-html]. | ||
There is no default export. | ||
The export map supports the endorsed | ||
[`development` condition](https://nodejs.org/api/packages.html#packages_resolving_user_conditions). | ||
The export map supports the [`development` condition][development]. | ||
Run `node --conditions development module.js` to get instrumented dev code. | ||
@@ -108,32 +133,193 @@ Without this condition, production code is loaded. | ||
An extension for micromark to parse GFM autolink literals (can be passed in | ||
`extensions`). | ||
Extension for `micromark` that can be passed in `extensions` to enable GFM | ||
autolink literal syntax ([`Extension`][micromark-extension]). | ||
### `gfmAutolinkLiteralHtml` | ||
An extension to compile them to HTML (can be passed in `htmlExtensions`). | ||
Extension for `micromark` that can be passed in `htmlExtensions` to support | ||
GFM autolink literals when serializing to HTML | ||
([`HtmlExtension`][micromark-html-extension]). | ||
## Bugs | ||
GitHub’s own algorithm to parse autolink literals contains three bugs. | ||
A smaller bug is left unfixed in this project for consistency. | ||
Two main bugs are not present in this project. | ||
The issues relating to autolink literals are: | ||
* [GFM autolink extension (`www.`, `https?://` parts): links don’t work when | ||
after bracket](https://github.com/github/cmark-gfm/issues/278)\ | ||
fixed here ✅ | ||
* [GFM autolink extension (`www.` part): uppercase does not match on | ||
issues/PRs/comments](https://github.com/github/cmark-gfm/issues/280)\ | ||
fixed here ✅ | ||
* [GFM autolink extension (`www.` part): the word `www` | ||
matches](https://github.com/github/cmark-gfm/issues/279)\ | ||
present here for consistency | ||
## Authoring | ||
It is recommended to use labels, either with a resource or a definition, | ||
instead of autolink literals, as those allow relative URLs and descriptive | ||
text to explain the URL in prose. | ||
## HTML | ||
GFM autolink literals relate to the `<a>` element in HTML. | ||
See [*§ 4.5.1 The `a` element*][html-a] in the HTML spec for more info. | ||
When an email autolink is used, the string `mailto:` is prepended when | ||
generating the `href` attribute of the hyperlink. | ||
When a www autolink is used, the string `http://` is prepended. | ||
## CSS | ||
As hyperlinks are the fundamental thing that makes the web, you will most | ||
definitely have CSS for `a` elements already. | ||
The same CSS can be used for autolink literals, too. | ||
GitHub itself does not apply interesting CSS to autolink literals. | ||
For any link, it currently (June 2022) [uses][css]: | ||
```css | ||
a { | ||
background-color: transparent; | ||
color: #58a6ff; | ||
text-decoration: none; | ||
} | ||
a:active, | ||
a:hover { | ||
outline-width: 0; | ||
} | ||
a:hover { | ||
text-decoration: underline; | ||
} | ||
a:not([href]) { | ||
color: inherit; | ||
text-decoration: none; | ||
} | ||
``` | ||
## Syntax | ||
Autolink literals form with, roughly, the following BNF: | ||
```bnf | ||
gfm_autolink_literal ::= gfm_protocol_autolink | gfm_www_autolink | gfm_email_autolink | ||
; Restriction: the code before must be `www_autolink_before`. | ||
; Restriction: the code after `.` must not be eof. | ||
www_autolink ::= 3('w' | 'W') '.' [domain [path]] | ||
www_autolink_before ::= eof | eol | space_or_tab | '(' | '*' | '_' | '[' | ']' | '~' | ||
; Restriction: the code before must be `http_autolink_before`. | ||
; Restriction: the code after the protocol must be `http_autolink_protocol_after`. | ||
http_autolink ::= ('h' | 'H') 2('t' | 'T') ('p' | 'P') ['s' | 'S'] ':' 2'/' domain [path] | ||
http_autolink_before ::= byte - ascii_alpha | ||
http_autolink_protocol_after ::= byte - eof - eol - ascii_control - unicode_whitespace - ode_punctuation | ||
; Restriction: the code before must be `email_autolink_before`. | ||
; Restriction: `ascii_digit` may not occur in the last label part of the label. | ||
email_autolink ::= 1*('+' | '-' | '.' | '_' | ascii_alphanumeric) '@' 1*(1*label_segment l_dot_cont) 1*label_segment | ||
email_autolink_before ::= byte - ascii_alpha - '/' | ||
; Restriction: `_` may not occur in the last two domain parts. | ||
domain ::= 1*(url_ampt_cont | domain_punct_cont | '-' | byte - eof - ascii_control - ode_whitespace - unicode_punctuation) | ||
; Restriction: must not be followed by `punct`. | ||
domain_punct_cont ::= '.' | '_' | ||
; Restriction: must not be followed by `char-ref`. | ||
url_ampt_cont ::= '&' | ||
; Restriction: a counter `balance = 0` is increased for every `(`, and decreased for every `)`. | ||
; Restriction: `)` must not be `paren_at_end`. | ||
path ::= 1*(url_ampt_cont | path_punctuation_cont | '(' | ')' | byte - eof - eol - space_or_tab) | ||
; Restriction: must not be followed by `punct`. | ||
path_punctuation_cont ::= trailing_punctuation - '<' | ||
; Restriction: must be followed by `punct` and `balance` must be less than `0`. | ||
paren_at_end ::= ')' | ||
label_segment ::= label_dash_underscore_cont | ascii_alpha | ascii_digit | ||
; Restriction: if followed by `punct`, the whole email autolink is invalid. | ||
label_dash_underscore_cont ::= '-' | '_' | ||
; Restriction: must not be followed by `punct`. | ||
label_dot_cont ::= '.' | ||
punct ::= *trailing_punctuation ( byte - eof - eol - space_or_tab - '<' ) | ||
char_ref ::= *ascii_alpha ';' path_end | ||
trailing_punctuation ::= '!' | '"' | '\'' | ')' | '*' | ',' | '.' | ':' | ';' | '<' | '?' | '_' | '~' | ||
``` | ||
The grammar for GFM autolink literal is very relaxed: basically anything | ||
except for whitespace is allowed after a prefix. | ||
To use whitespace characters and otherwise impossible characters, in URLs, | ||
you can use percent encoding: | ||
```markdown | ||
https://example.com/alpha%20bravo | ||
``` | ||
Yields: | ||
```html | ||
<p><a href="https://example.com/alpha%20bravo">https://example.com/alpha%20bravo</a></p> | ||
``` | ||
There are several cases where incorrect encoding of URLs would, in other | ||
languages, result in a parse error. | ||
In markdown, there are no errors, and URLs are normalized. | ||
In addition, many characters are percent encoded | ||
([`sanitizeUri`][micromark-util-sanitize-uri]). | ||
For example: | ||
```markdown | ||
www.a👍b% | ||
``` | ||
Yields: | ||
```html | ||
<p><a href="http://www.a%F0%9F%91%8Db%25">www.a👍b%</a></p> | ||
``` | ||
There is a big difference between how www and protocol literals work | ||
compared to how email literals work. | ||
The first two are done when parsing, and work like anything else in | ||
markdown. | ||
But email literals are handled afterwards: when everything is parsed, we | ||
look back at the events to figure out if there were email addresses. | ||
This particularly affects how they interleave with character escapes and | ||
character references. | ||
## Types | ||
This package is fully typed with [TypeScript][]. | ||
There are no additional exported types. | ||
It exports no additional types. | ||
## Compatibility | ||
This package is at least compatible with all maintained versions of Node.js. | ||
As of now, that is Node.js 12.20+, 14.14+, and 16.0+. | ||
It also works in Deno and modern browsers. | ||
Projects maintained by the unified collective are compatible with all maintained | ||
versions of Node.js. | ||
As of now, that is Node.js 16+. | ||
Our projects sometimes work with older versions, but this is not guaranteed. | ||
These extensions work with `micromark` version 3+. | ||
## Security | ||
This package is safe. | ||
Unlike other links in CommonMark, which allow arbitrary protocols, this | ||
construct always produces safe links. | ||
## Related | ||
* [`syntax-tree/mdast-util-gfm-autolink-literal`][mdast-autolink-literal] | ||
— support GFM autolink literals in mdast | ||
* [`syntax-tree/mdast-util-gfm`][mdast-util-gfm] | ||
— support GFM in mdast | ||
* [`remarkjs/remark-gfm`][remark-gfm] | ||
— support GFM in remark | ||
* [`micromark-extension-gfm`][micromark-extension-gfm] | ||
— support all of GFM | ||
* [`mdast-util-gfm-autolink-literal`][mdast-util-gfm-autolink-literal] | ||
— support all of GFM in mdast | ||
* [`mdast-util-gfm`][mdast-util-gfm] | ||
— support all of GFM in mdast | ||
* [`remark-gfm`][remark-gfm] | ||
— support all of GFM in remark | ||
@@ -184,3 +370,3 @@ ## Contribute | ||
[skypack]: https://www.skypack.dev | ||
[esmsh]: https://esm.sh | ||
@@ -201,14 +387,30 @@ [license]: license | ||
[development]: https://nodejs.org/api/packages.html#packages_resolving_user_conditions | ||
[micromark]: https://github.com/micromark/micromark | ||
[remark]: https://github.com/remarkjs/remark | ||
[micromark-extension-gfm]: https://github.com/micromark/micromark-extension-gfm | ||
[mdast-autolink-literal]: https://github.com/syntax-tree/mdast-util-gfm-autolink-literal | ||
[micromark-util-sanitize-uri]: https://github.com/micromark/micromark/tree/main/packages/micromark-util-sanitize-uri | ||
[micromark-extension]: https://github.com/micromark/micromark#syntaxextension | ||
[micromark-html-extension]: https://github.com/micromark/micromark#htmlextension | ||
[mdast-util-gfm]: https://github.com/syntax-tree/mdast-util-gfm | ||
[mdast-util-gfm-autolink-literal]: https://github.com/syntax-tree/mdast-util-gfm-autolink-literal | ||
[remark-gfm]: https://github.com/remarkjs/remark-gfm | ||
[literal autolinks]: https://github.github.com/gfm/#autolinks-extension- | ||
[spec]: https://github.github.com/gfm/#autolinks-extension- | ||
[html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element | ||
[css]: https://github.com/sindresorhus/github-markdown-css | ||
[cmark-gfm]: https://github.com/github/cmark-gfm | ||
[api-gfm-autolink-literal]: #gfmautolinkliteral | ||
[api-gfm-autolink-literal-html]: #gfmautolinkliteralhtml |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
65812
4
12
1945
410
1
- Removeduvu@^0.5.0
- Removeddequal@2.0.3(transitive)
- Removeddiff@5.2.0(transitive)
- Removedkleur@4.1.5(transitive)
- Removedmri@1.2.0(transitive)
- Removedsade@1.8.1(transitive)
- Removeduvu@0.5.6(transitive)