tent-markdown
Advanced tools
Comparing version
@@ -83,2 +83,121 @@ /* | ||
function addCharsToCharClass(charClass, start, end) { | ||
var s = String.fromCharCode(start); | ||
if (end !== start) { | ||
s += "-" + String.fromCharCode(end); | ||
} | ||
charClass.push(s); | ||
return charClass; | ||
} | ||
twttr.txt.addCharsToCharClass = addCharsToCharClass; | ||
var nonLatinHashtagChars = []; | ||
// Cyrillic | ||
addCharsToCharClass(nonLatinHashtagChars, 0x0400, 0x04ff); // Cyrillic | ||
addCharsToCharClass(nonLatinHashtagChars, 0x0500, 0x0527); // Cyrillic Supplement | ||
addCharsToCharClass(nonLatinHashtagChars, 0x2de0, 0x2dff); // Cyrillic Extended A | ||
addCharsToCharClass(nonLatinHashtagChars, 0xa640, 0xa69f); // Cyrillic Extended B | ||
// Hebrew | ||
addCharsToCharClass(nonLatinHashtagChars, 0x0591, 0x05bf); // Hebrew | ||
addCharsToCharClass(nonLatinHashtagChars, 0x05c1, 0x05c2); | ||
addCharsToCharClass(nonLatinHashtagChars, 0x05c4, 0x05c5); | ||
addCharsToCharClass(nonLatinHashtagChars, 0x05c7, 0x05c7); | ||
addCharsToCharClass(nonLatinHashtagChars, 0x05d0, 0x05ea); | ||
addCharsToCharClass(nonLatinHashtagChars, 0x05f0, 0x05f4); | ||
addCharsToCharClass(nonLatinHashtagChars, 0xfb12, 0xfb28); // Hebrew Presentation Forms | ||
addCharsToCharClass(nonLatinHashtagChars, 0xfb2a, 0xfb36); | ||
addCharsToCharClass(nonLatinHashtagChars, 0xfb38, 0xfb3c); | ||
addCharsToCharClass(nonLatinHashtagChars, 0xfb3e, 0xfb3e); | ||
addCharsToCharClass(nonLatinHashtagChars, 0xfb40, 0xfb41); | ||
addCharsToCharClass(nonLatinHashtagChars, 0xfb43, 0xfb44); | ||
addCharsToCharClass(nonLatinHashtagChars, 0xfb46, 0xfb4f); | ||
// Arabic | ||
addCharsToCharClass(nonLatinHashtagChars, 0x0610, 0x061a); // Arabic | ||
addCharsToCharClass(nonLatinHashtagChars, 0x0620, 0x065f); | ||
addCharsToCharClass(nonLatinHashtagChars, 0x066e, 0x06d3); | ||
addCharsToCharClass(nonLatinHashtagChars, 0x06d5, 0x06dc); | ||
addCharsToCharClass(nonLatinHashtagChars, 0x06de, 0x06e8); | ||
addCharsToCharClass(nonLatinHashtagChars, 0x06ea, 0x06ef); | ||
addCharsToCharClass(nonLatinHashtagChars, 0x06fa, 0x06fc); | ||
addCharsToCharClass(nonLatinHashtagChars, 0x06ff, 0x06ff); | ||
addCharsToCharClass(nonLatinHashtagChars, 0x0750, 0x077f); // Arabic Supplement | ||
addCharsToCharClass(nonLatinHashtagChars, 0x08a0, 0x08a0); // Arabic Extended A | ||
addCharsToCharClass(nonLatinHashtagChars, 0x08a2, 0x08ac); | ||
addCharsToCharClass(nonLatinHashtagChars, 0x08e4, 0x08fe); | ||
addCharsToCharClass(nonLatinHashtagChars, 0xfb50, 0xfbb1); // Arabic Pres. Forms A | ||
addCharsToCharClass(nonLatinHashtagChars, 0xfbd3, 0xfd3d); | ||
addCharsToCharClass(nonLatinHashtagChars, 0xfd50, 0xfd8f); | ||
addCharsToCharClass(nonLatinHashtagChars, 0xfd92, 0xfdc7); | ||
addCharsToCharClass(nonLatinHashtagChars, 0xfdf0, 0xfdfb); | ||
addCharsToCharClass(nonLatinHashtagChars, 0xfe70, 0xfe74); // Arabic Pres. Forms B | ||
addCharsToCharClass(nonLatinHashtagChars, 0xfe76, 0xfefc); | ||
addCharsToCharClass(nonLatinHashtagChars, 0x200c, 0x200c); // Zero-Width Non-Joiner | ||
// Thai | ||
addCharsToCharClass(nonLatinHashtagChars, 0x0e01, 0x0e3a); | ||
addCharsToCharClass(nonLatinHashtagChars, 0x0e40, 0x0e4e); | ||
// Hangul (Korean) | ||
addCharsToCharClass(nonLatinHashtagChars, 0x1100, 0x11ff); // Hangul Jamo | ||
addCharsToCharClass(nonLatinHashtagChars, 0x3130, 0x3185); // Hangul Compatibility Jamo | ||
addCharsToCharClass(nonLatinHashtagChars, 0xA960, 0xA97F); // Hangul Jamo Extended-A | ||
addCharsToCharClass(nonLatinHashtagChars, 0xAC00, 0xD7AF); // Hangul Syllables | ||
addCharsToCharClass(nonLatinHashtagChars, 0xD7B0, 0xD7FF); // Hangul Jamo Extended-B | ||
addCharsToCharClass(nonLatinHashtagChars, 0xFFA1, 0xFFDC); // half-width Hangul | ||
// Japanese and Chinese | ||
addCharsToCharClass(nonLatinHashtagChars, 0x30A1, 0x30FA); // Katakana (full-width) | ||
addCharsToCharClass(nonLatinHashtagChars, 0x30FC, 0x30FE); // Katakana Chouon and iteration marks (full-width) | ||
addCharsToCharClass(nonLatinHashtagChars, 0xFF66, 0xFF9F); // Katakana (half-width) | ||
addCharsToCharClass(nonLatinHashtagChars, 0xFF70, 0xFF70); // Katakana Chouon (half-width) | ||
addCharsToCharClass(nonLatinHashtagChars, 0xFF10, 0xFF19); // \ | ||
addCharsToCharClass(nonLatinHashtagChars, 0xFF21, 0xFF3A); // - Latin (full-width) | ||
addCharsToCharClass(nonLatinHashtagChars, 0xFF41, 0xFF5A); // / | ||
addCharsToCharClass(nonLatinHashtagChars, 0x3041, 0x3096); // Hiragana | ||
addCharsToCharClass(nonLatinHashtagChars, 0x3099, 0x309E); // Hiragana voicing and iteration mark | ||
addCharsToCharClass(nonLatinHashtagChars, 0x3400, 0x4DBF); // Kanji (CJK Extension A) | ||
addCharsToCharClass(nonLatinHashtagChars, 0x4E00, 0x9FFF); // Kanji (Unified) | ||
// -- Disabled as it breaks the Regex. | ||
//addCharsToCharClass(nonLatinHashtagChars, 0x20000, 0x2A6DF); // Kanji (CJK Extension B) | ||
addCharsToCharClass(nonLatinHashtagChars, 0x2A700, 0x2B73F); // Kanji (CJK Extension C) | ||
addCharsToCharClass(nonLatinHashtagChars, 0x2B740, 0x2B81F); // Kanji (CJK Extension D) | ||
addCharsToCharClass(nonLatinHashtagChars, 0x2F800, 0x2FA1F); // Kanji (CJK supplement) | ||
addCharsToCharClass(nonLatinHashtagChars, 0x3003, 0x3003); // Kanji iteration mark | ||
addCharsToCharClass(nonLatinHashtagChars, 0x3005, 0x3005); // Kanji iteration mark | ||
addCharsToCharClass(nonLatinHashtagChars, 0x303B, 0x303B); // Han iteration mark | ||
twttr.txt.regexen.nonLatinHashtagChars = regexSupplant(nonLatinHashtagChars.join("")); | ||
var latinAccentChars = []; | ||
// Latin accented characters (subtracted 0xD7 from the range, it's a confusable multiplication sign. Looks like "x") | ||
addCharsToCharClass(latinAccentChars, 0x00c0, 0x00d6); | ||
addCharsToCharClass(latinAccentChars, 0x00d8, 0x00f6); | ||
addCharsToCharClass(latinAccentChars, 0x00f8, 0x00ff); | ||
// Latin Extended A and B | ||
addCharsToCharClass(latinAccentChars, 0x0100, 0x024f); | ||
// assorted IPA Extensions | ||
addCharsToCharClass(latinAccentChars, 0x0253, 0x0254); | ||
addCharsToCharClass(latinAccentChars, 0x0256, 0x0257); | ||
addCharsToCharClass(latinAccentChars, 0x0259, 0x0259); | ||
addCharsToCharClass(latinAccentChars, 0x025b, 0x025b); | ||
addCharsToCharClass(latinAccentChars, 0x0263, 0x0263); | ||
addCharsToCharClass(latinAccentChars, 0x0268, 0x0268); | ||
addCharsToCharClass(latinAccentChars, 0x026f, 0x026f); | ||
addCharsToCharClass(latinAccentChars, 0x0272, 0x0272); | ||
addCharsToCharClass(latinAccentChars, 0x0289, 0x0289); | ||
addCharsToCharClass(latinAccentChars, 0x028b, 0x028b); | ||
// Okina for Hawaiian (it *is* a letter character) | ||
addCharsToCharClass(latinAccentChars, 0x02bb, 0x02bb); | ||
// Combining diacritics | ||
addCharsToCharClass(latinAccentChars, 0x0300, 0x036f); | ||
// Latin Extended Additional | ||
addCharsToCharClass(latinAccentChars, 0x1e00, 0x1eff); | ||
twttr.txt.regexen.latinAccentChars = regexSupplant(latinAccentChars.join("")); | ||
// A hashtag must contain characters, numbers and underscores, but not all numbers. | ||
twttr.txt.regexen.hashSigns = /[##]/; | ||
twttr.txt.regexen.hashtagAlpha = regexSupplant(/[a-z_#{latinAccentChars}#{nonLatinHashtagChars}]/i); | ||
twttr.txt.regexen.hashtagAlphaNumeric = regexSupplant(/[a-z0-9_#{latinAccentChars}#{nonLatinHashtagChars}]/i); | ||
twttr.txt.regexen.endHashtagMatch = regexSupplant(/^(?:#{hashSigns}|:\/\/)/); | ||
twttr.txt.regexen.hashtagBoundary = regexSupplant(/(?:^|$|[^&a-z0-9_#{latinAccentChars}#{nonLatinHashtagChars}])/); | ||
twttr.txt.regexen.validHashtag = regexSupplant(/(#{hashtagBoundary})(#{hashSigns})(#{hashtagAlphaNumeric}*#{hashtagAlpha}#{hashtagAlphaNumeric}*)/gi); | ||
// URL related regex collection | ||
@@ -134,2 +253,24 @@ twttr.txt.regexen.validUrlPrecedingChars = regexSupplant(/(?:[^A-Za-z0-9@@$###{invalid_chars_group}]|^)/); | ||
twttr.extractHashtagsWithIndices = function(text, options) { | ||
if (!text || !text.match(twttr.txt.regexen.hashSigns)) { | ||
return []; | ||
} | ||
var tags = []; | ||
text.replace(twttr.txt.regexen.validHashtag, function(match, before, hash, hashText, offset, chunk) { | ||
var after = chunk.slice(offset + match.length); | ||
if (after.match(twttr.txt.regexen.endHashtagMatch)) | ||
return; | ||
var startPosition = offset + before.length; | ||
var endPosition = startPosition + hashText.length + 1; | ||
tags.push({ | ||
hashtag: hashText, | ||
indices: [startPosition, endPosition] | ||
}); | ||
}); | ||
return tags; | ||
}; | ||
twttr.extractUrlsWithIndices = function(text, options) { | ||
@@ -206,2 +347,3 @@ if (!options) { | ||
expose.extractUrlsWithIndices = twttr.extractUrlsWithIndices; | ||
expose.extractHashtagsWithIndices = twttr.extractHashtagsWithIndices; | ||
@@ -208,0 +350,0 @@ })((function() { |
@@ -33,3 +33,3 @@ // Released under BSD license | ||
if ( (_m = block.match(/\[[^\]]+\]\([^\)]+\)$/)) && (_m.index < item.indices[0]) && (_m.index + _m[0].length > item.indices[0]) ) { | ||
if ( (_m = block.match(/\[[^\]]+\]\([^\)]+\)/)) && (_m.index < item.indices[0]) && (_m.index + _m[0].length > item.indices[0]) ) { | ||
// markdown link syntax, don't autolink | ||
@@ -113,2 +113,100 @@ continue; | ||
hashtags: function (block, next) { | ||
hashtags = expose.extractHashtagsWithIndices(block); | ||
if (!hashtags.length) { | ||
// no hashtags here, moving along | ||
return; | ||
} | ||
var autolink_items = []; | ||
var item; | ||
for (var i = 0; i < hashtags.length; i++) { | ||
item = hashtags[i]; | ||
if ( block.slice(0, item.indices[1] + 1).match(/\[[^\]]+\]\([^\)]+\)$/) ) { | ||
// markdown link syntax, don't autolink | ||
continue; | ||
} | ||
if ( (_m = block.match(/\[[^\]]+\]\([^\)]+\)/)) && (_m.index < item.indices[0]) && (_m.index + _m[0].length > item.indices[0]) ) { | ||
// markdown link syntax, don't autolink | ||
continue; | ||
} | ||
if ( block.slice(item.indices[0] - 1, block.length).match(/^\[[^\]]+\]\([^\)]+\)/) ) { | ||
// hashtag inside markdown link display text, don't autolink | ||
continue; | ||
} | ||
if ( block.match('`') ) { | ||
// check if the hashtag is inside code backticks | ||
var _indices = [], | ||
_regex = /`/g, | ||
m = null; | ||
while ( m = _regex.exec(block) ) { | ||
_indices.push(m.index); | ||
} | ||
var skip = false, | ||
_last_index = null; | ||
if ( _indices.length && (_indices.length % 2 === 0) ) { | ||
for (var j = 0; j < _indices.length; j += 2) { | ||
if ( (_indices[j] < item.indices[0]) && (_indices[j+1] >= item.indices[1]) ) { | ||
// matched hashtag is inside code backticks, ignore | ||
_last_index = _indices[j+1]; | ||
skip = true; | ||
} | ||
} | ||
} | ||
if (skip === true) { | ||
// don't autolink | ||
continue; | ||
} | ||
} | ||
// we're good to process this hashtag | ||
autolink_items.push(item) | ||
} | ||
if (!autolink_items.length) { | ||
// there's nothing to autolink | ||
return; | ||
} | ||
// wrap matched hashtags in links | ||
var jsonml = ["para"], | ||
_block = block, | ||
item = null, | ||
index_offset = 0, | ||
before = null; | ||
for (var i = 0; i < autolink_items.length; i++) { | ||
item = autolink_items[i]; | ||
// process text before hashtag | ||
before = _block.slice(0, item.indices[0] + index_offset); | ||
if (before.length) { | ||
jsonml = jsonml.concat( this.processInline(before) ); | ||
} | ||
// linkify hashtag | ||
jsonml.push(["link", { href: '#' + item.hashtag, rel: "hashtag" }, '#' + item.hashtag]); | ||
// discard processed text | ||
// and update index offset | ||
_block = _block.slice(item.indices[1] + index_offset, _block.length) | ||
index_offset -= before.length + (item.indices[1] - item.indices[0]) | ||
} | ||
// process remaining text | ||
jsonml = jsonml.concat( this.processInline(_block) ); | ||
return [jsonml]; | ||
}, | ||
// Taken from Markdown.dialects.Gruber.block.para | ||
@@ -354,3 +452,4 @@ para: function para( block, next ) { | ||
this.footnotes = options.footnotes || []; | ||
this.preprocessors = [this.expandFootnoteLinkHrefs].concat(options.preprocessors || []); | ||
this.hashtagURITemplate = options.hashtagURITemplate || '?hashtag={hashtag}'; | ||
this.preprocessors = [this.expandFootnoteLinkHrefs, this.expandHashtagHrefs].concat(options.preprocessors || []); | ||
} | ||
@@ -377,2 +476,17 @@ | ||
Preprocesser.prototype.expandHashtagHrefs = function ( jsonml ) { | ||
// Skip over anything that isn't a link | ||
if (jsonml[0] !== 'link') return jsonml; | ||
// Skip over links that aren't hashtags | ||
if (!jsonml[1].rel || jsonml[1].rel !== 'hashtag') return jsonml; | ||
// remove # prefix | ||
var hashtag = decodeURIComponent(jsonml[1].href).substr(1); | ||
jsonml[1].href = this.hashtagURITemplate.replace('{hashtag}', encodeURIComponent(hashtag)); | ||
return jsonml; | ||
} | ||
Preprocesser.prototype.preprocessTreeNode = function ( jsonml, references ) { | ||
@@ -406,2 +520,3 @@ for (var i=0, _len = this.preprocessors.length; i < _len; i++) { | ||
window.markdown.extractUrlsWithIndices = window.twttr.extractUrlsWithIndices; | ||
window.markdown.extractHashtagsWithIndices = window.twttr.extractHashtagsWithIndices; | ||
return window.markdown; | ||
@@ -412,2 +527,3 @@ } | ||
exports.markdown.extractUrlsWithIndices = require('./link-matcher').extractUrlsWithIndices; | ||
exports.markdown.extractHashtagsWithIndices = require('./link-matcher').extractHashtagsWithIndices; | ||
@@ -414,0 +530,0 @@ return exports.markdown; |
{ | ||
"name": "tent-markdown", | ||
"version": "0.1.2", | ||
"version": "0.1.3", | ||
"description": "Tent dialect for markdown lib", | ||
@@ -5,0 +5,0 @@ "keywords": [ |
@@ -7,4 +7,5 @@ ["html", | ||
"Have you seen http://example.com? It's great" | ||
] | ||
], | ||
" isn't it?" | ||
] | ||
] |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
74711
23.85%59
7.27%1840
18.94%88
41.94%