Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

tent-markdown

Package Overview
Dependencies
Maintainers
1
Versions
13
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

tent-markdown - npm Package Compare versions

Comparing version 0.1.2 to 0.1.3

test/features/links/hashtag_in_markdown_link.json

142

lib/link-matcher.js

@@ -83,2 +83,121 @@ /*

function addCharsToCharClass(charClass, start, end) {
var s = String.fromCharCode(start);
if (end !== start) {
s += "-" + String.fromCharCode(end);
}
charClass.push(s);
return charClass;
}
twttr.txt.addCharsToCharClass = addCharsToCharClass;
var nonLatinHashtagChars = [];
// Cyrillic
addCharsToCharClass(nonLatinHashtagChars, 0x0400, 0x04ff); // Cyrillic
addCharsToCharClass(nonLatinHashtagChars, 0x0500, 0x0527); // Cyrillic Supplement
addCharsToCharClass(nonLatinHashtagChars, 0x2de0, 0x2dff); // Cyrillic Extended A
addCharsToCharClass(nonLatinHashtagChars, 0xa640, 0xa69f); // Cyrillic Extended B
// Hebrew
addCharsToCharClass(nonLatinHashtagChars, 0x0591, 0x05bf); // Hebrew
addCharsToCharClass(nonLatinHashtagChars, 0x05c1, 0x05c2);
addCharsToCharClass(nonLatinHashtagChars, 0x05c4, 0x05c5);
addCharsToCharClass(nonLatinHashtagChars, 0x05c7, 0x05c7);
addCharsToCharClass(nonLatinHashtagChars, 0x05d0, 0x05ea);
addCharsToCharClass(nonLatinHashtagChars, 0x05f0, 0x05f4);
addCharsToCharClass(nonLatinHashtagChars, 0xfb12, 0xfb28); // Hebrew Presentation Forms
addCharsToCharClass(nonLatinHashtagChars, 0xfb2a, 0xfb36);
addCharsToCharClass(nonLatinHashtagChars, 0xfb38, 0xfb3c);
addCharsToCharClass(nonLatinHashtagChars, 0xfb3e, 0xfb3e);
addCharsToCharClass(nonLatinHashtagChars, 0xfb40, 0xfb41);
addCharsToCharClass(nonLatinHashtagChars, 0xfb43, 0xfb44);
addCharsToCharClass(nonLatinHashtagChars, 0xfb46, 0xfb4f);
// Arabic
addCharsToCharClass(nonLatinHashtagChars, 0x0610, 0x061a); // Arabic
addCharsToCharClass(nonLatinHashtagChars, 0x0620, 0x065f);
addCharsToCharClass(nonLatinHashtagChars, 0x066e, 0x06d3);
addCharsToCharClass(nonLatinHashtagChars, 0x06d5, 0x06dc);
addCharsToCharClass(nonLatinHashtagChars, 0x06de, 0x06e8);
addCharsToCharClass(nonLatinHashtagChars, 0x06ea, 0x06ef);
addCharsToCharClass(nonLatinHashtagChars, 0x06fa, 0x06fc);
addCharsToCharClass(nonLatinHashtagChars, 0x06ff, 0x06ff);
addCharsToCharClass(nonLatinHashtagChars, 0x0750, 0x077f); // Arabic Supplement
addCharsToCharClass(nonLatinHashtagChars, 0x08a0, 0x08a0); // Arabic Extended A
addCharsToCharClass(nonLatinHashtagChars, 0x08a2, 0x08ac);
addCharsToCharClass(nonLatinHashtagChars, 0x08e4, 0x08fe);
addCharsToCharClass(nonLatinHashtagChars, 0xfb50, 0xfbb1); // Arabic Pres. Forms A
addCharsToCharClass(nonLatinHashtagChars, 0xfbd3, 0xfd3d);
addCharsToCharClass(nonLatinHashtagChars, 0xfd50, 0xfd8f);
addCharsToCharClass(nonLatinHashtagChars, 0xfd92, 0xfdc7);
addCharsToCharClass(nonLatinHashtagChars, 0xfdf0, 0xfdfb);
addCharsToCharClass(nonLatinHashtagChars, 0xfe70, 0xfe74); // Arabic Pres. Forms B
addCharsToCharClass(nonLatinHashtagChars, 0xfe76, 0xfefc);
addCharsToCharClass(nonLatinHashtagChars, 0x200c, 0x200c); // Zero-Width Non-Joiner
// Thai
addCharsToCharClass(nonLatinHashtagChars, 0x0e01, 0x0e3a);
addCharsToCharClass(nonLatinHashtagChars, 0x0e40, 0x0e4e);
// Hangul (Korean)
addCharsToCharClass(nonLatinHashtagChars, 0x1100, 0x11ff); // Hangul Jamo
addCharsToCharClass(nonLatinHashtagChars, 0x3130, 0x3185); // Hangul Compatibility Jamo
addCharsToCharClass(nonLatinHashtagChars, 0xA960, 0xA97F); // Hangul Jamo Extended-A
addCharsToCharClass(nonLatinHashtagChars, 0xAC00, 0xD7AF); // Hangul Syllables
addCharsToCharClass(nonLatinHashtagChars, 0xD7B0, 0xD7FF); // Hangul Jamo Extended-B
addCharsToCharClass(nonLatinHashtagChars, 0xFFA1, 0xFFDC); // half-width Hangul
// Japanese and Chinese
addCharsToCharClass(nonLatinHashtagChars, 0x30A1, 0x30FA); // Katakana (full-width)
addCharsToCharClass(nonLatinHashtagChars, 0x30FC, 0x30FE); // Katakana Chouon and iteration marks (full-width)
addCharsToCharClass(nonLatinHashtagChars, 0xFF66, 0xFF9F); // Katakana (half-width)
addCharsToCharClass(nonLatinHashtagChars, 0xFF70, 0xFF70); // Katakana Chouon (half-width)
addCharsToCharClass(nonLatinHashtagChars, 0xFF10, 0xFF19); // \
addCharsToCharClass(nonLatinHashtagChars, 0xFF21, 0xFF3A); // - Latin (full-width)
addCharsToCharClass(nonLatinHashtagChars, 0xFF41, 0xFF5A); // /
addCharsToCharClass(nonLatinHashtagChars, 0x3041, 0x3096); // Hiragana
addCharsToCharClass(nonLatinHashtagChars, 0x3099, 0x309E); // Hiragana voicing and iteration mark
addCharsToCharClass(nonLatinHashtagChars, 0x3400, 0x4DBF); // Kanji (CJK Extension A)
addCharsToCharClass(nonLatinHashtagChars, 0x4E00, 0x9FFF); // Kanji (Unified)
// -- Disabled as it breaks the Regex.
//addCharsToCharClass(nonLatinHashtagChars, 0x20000, 0x2A6DF); // Kanji (CJK Extension B)
addCharsToCharClass(nonLatinHashtagChars, 0x2A700, 0x2B73F); // Kanji (CJK Extension C)
addCharsToCharClass(nonLatinHashtagChars, 0x2B740, 0x2B81F); // Kanji (CJK Extension D)
addCharsToCharClass(nonLatinHashtagChars, 0x2F800, 0x2FA1F); // Kanji (CJK supplement)
addCharsToCharClass(nonLatinHashtagChars, 0x3003, 0x3003); // Kanji iteration mark
addCharsToCharClass(nonLatinHashtagChars, 0x3005, 0x3005); // Kanji iteration mark
addCharsToCharClass(nonLatinHashtagChars, 0x303B, 0x303B); // Han iteration mark
twttr.txt.regexen.nonLatinHashtagChars = regexSupplant(nonLatinHashtagChars.join(""));
var latinAccentChars = [];
// Latin accented characters (subtracted 0xD7 from the range, it's a confusable multiplication sign. Looks like "x")
addCharsToCharClass(latinAccentChars, 0x00c0, 0x00d6);
addCharsToCharClass(latinAccentChars, 0x00d8, 0x00f6);
addCharsToCharClass(latinAccentChars, 0x00f8, 0x00ff);
// Latin Extended A and B
addCharsToCharClass(latinAccentChars, 0x0100, 0x024f);
// assorted IPA Extensions
addCharsToCharClass(latinAccentChars, 0x0253, 0x0254);
addCharsToCharClass(latinAccentChars, 0x0256, 0x0257);
addCharsToCharClass(latinAccentChars, 0x0259, 0x0259);
addCharsToCharClass(latinAccentChars, 0x025b, 0x025b);
addCharsToCharClass(latinAccentChars, 0x0263, 0x0263);
addCharsToCharClass(latinAccentChars, 0x0268, 0x0268);
addCharsToCharClass(latinAccentChars, 0x026f, 0x026f);
addCharsToCharClass(latinAccentChars, 0x0272, 0x0272);
addCharsToCharClass(latinAccentChars, 0x0289, 0x0289);
addCharsToCharClass(latinAccentChars, 0x028b, 0x028b);
// Okina for Hawaiian (it *is* a letter character)
addCharsToCharClass(latinAccentChars, 0x02bb, 0x02bb);
// Combining diacritics
addCharsToCharClass(latinAccentChars, 0x0300, 0x036f);
// Latin Extended Additional
addCharsToCharClass(latinAccentChars, 0x1e00, 0x1eff);
twttr.txt.regexen.latinAccentChars = regexSupplant(latinAccentChars.join(""));
// A hashtag must contain characters, numbers and underscores, but not all numbers.
twttr.txt.regexen.hashSigns = /[##]/;
twttr.txt.regexen.hashtagAlpha = regexSupplant(/[a-z_#{latinAccentChars}#{nonLatinHashtagChars}]/i);
twttr.txt.regexen.hashtagAlphaNumeric = regexSupplant(/[a-z0-9_#{latinAccentChars}#{nonLatinHashtagChars}]/i);
twttr.txt.regexen.endHashtagMatch = regexSupplant(/^(?:#{hashSigns}|:\/\/)/);
twttr.txt.regexen.hashtagBoundary = regexSupplant(/(?:^|$|[^&a-z0-9_#{latinAccentChars}#{nonLatinHashtagChars}])/);
twttr.txt.regexen.validHashtag = regexSupplant(/(#{hashtagBoundary})(#{hashSigns})(#{hashtagAlphaNumeric}*#{hashtagAlpha}#{hashtagAlphaNumeric}*)/gi);
// URL related regex collection

@@ -134,2 +253,24 @@ twttr.txt.regexen.validUrlPrecedingChars = regexSupplant(/(?:[^A-Za-z0-9@@$###{invalid_chars_group}]|^)/);

twttr.extractHashtagsWithIndices = function(text, options) {
if (!text || !text.match(twttr.txt.regexen.hashSigns)) {
return [];
}
var tags = [];
text.replace(twttr.txt.regexen.validHashtag, function(match, before, hash, hashText, offset, chunk) {
var after = chunk.slice(offset + match.length);
if (after.match(twttr.txt.regexen.endHashtagMatch))
return;
var startPosition = offset + before.length;
var endPosition = startPosition + hashText.length + 1;
tags.push({
hashtag: hashText,
indices: [startPosition, endPosition]
});
});
return tags;
};
twttr.extractUrlsWithIndices = function(text, options) {

@@ -206,2 +347,3 @@ if (!options) {

expose.extractUrlsWithIndices = twttr.extractUrlsWithIndices;
expose.extractHashtagsWithIndices = twttr.extractHashtagsWithIndices;

@@ -208,0 +350,0 @@ })((function() {

120

lib/tent-markdown.js

@@ -33,3 +33,3 @@ // Released under BSD license

if ( (_m = block.match(/\[[^\]]+\]\([^\)]+\)$/)) && (_m.index < item.indices[0]) && (_m.index + _m[0].length > item.indices[0]) ) {
if ( (_m = block.match(/\[[^\]]+\]\([^\)]+\)/)) && (_m.index < item.indices[0]) && (_m.index + _m[0].length > item.indices[0]) ) {
// markdown link syntax, don't autolink

@@ -113,2 +113,100 @@ continue;

hashtags: function (block, next) {
hashtags = expose.extractHashtagsWithIndices(block);
if (!hashtags.length) {
// no hashtags here, moving along
return;
}
var autolink_items = [];
var item;
for (var i = 0; i < hashtags.length; i++) {
item = hashtags[i];
if ( block.slice(0, item.indices[1] + 1).match(/\[[^\]]+\]\([^\)]+\)$/) ) {
// markdown link syntax, don't autolink
continue;
}
if ( (_m = block.match(/\[[^\]]+\]\([^\)]+\)/)) && (_m.index < item.indices[0]) && (_m.index + _m[0].length > item.indices[0]) ) {
// markdown link syntax, don't autolink
continue;
}
if ( block.slice(item.indices[0] - 1, block.length).match(/^\[[^\]]+\]\([^\)]+\)/) ) {
// hashtag inside markdown link display text, don't autolink
continue;
}
if ( block.match('`') ) {
// check if the hashtag is inside code backticks
var _indices = [],
_regex = /`/g,
m = null;
while ( m = _regex.exec(block) ) {
_indices.push(m.index);
}
var skip = false,
_last_index = null;
if ( _indices.length && (_indices.length % 2 === 0) ) {
for (var j = 0; j < _indices.length; j += 2) {
if ( (_indices[j] < item.indices[0]) && (_indices[j+1] >= item.indices[1]) ) {
// matched hashtag is inside code backticks, ignore
_last_index = _indices[j+1];
skip = true;
}
}
}
if (skip === true) {
// don't autolink
continue;
}
}
// we're good to process this hashtag
autolink_items.push(item)
}
if (!autolink_items.length) {
// there's nothing to autolink
return;
}
// wrap matched hashtags in links
var jsonml = ["para"],
_block = block,
item = null,
index_offset = 0,
before = null;
for (var i = 0; i < autolink_items.length; i++) {
item = autolink_items[i];
// process text before hashtag
before = _block.slice(0, item.indices[0] + index_offset);
if (before.length) {
jsonml = jsonml.concat( this.processInline(before) );
}
// linkify hashtag
jsonml.push(["link", { href: '#' + item.hashtag, rel: "hashtag" }, '#' + item.hashtag]);
// discard processed text
// and update index offset
_block = _block.slice(item.indices[1] + index_offset, _block.length)
index_offset -= before.length + (item.indices[1] - item.indices[0])
}
// process remaining text
jsonml = jsonml.concat( this.processInline(_block) );
return [jsonml];
},
// Taken from Markdown.dialects.Gruber.block.para

@@ -354,3 +452,4 @@ para: function para( block, next ) {

this.footnotes = options.footnotes || [];
this.preprocessors = [this.expandFootnoteLinkHrefs].concat(options.preprocessors || []);
this.hashtagURITemplate = options.hashtagURITemplate || '?hashtag={hashtag}';
this.preprocessors = [this.expandFootnoteLinkHrefs, this.expandHashtagHrefs].concat(options.preprocessors || []);
}

@@ -377,2 +476,17 @@

Preprocesser.prototype.expandHashtagHrefs = function ( jsonml ) {
// Skip over anything that isn't a link
if (jsonml[0] !== 'link') return jsonml;
// Skip over links that aren't hashtags
if (!jsonml[1].rel || jsonml[1].rel !== 'hashtag') return jsonml;
// remove # prefix
var hashtag = decodeURIComponent(jsonml[1].href).substr(1);
jsonml[1].href = this.hashtagURITemplate.replace('{hashtag}', encodeURIComponent(hashtag));
return jsonml;
}
Preprocesser.prototype.preprocessTreeNode = function ( jsonml, references ) {

@@ -406,2 +520,3 @@ for (var i=0, _len = this.preprocessors.length; i < _len; i++) {

window.markdown.extractUrlsWithIndices = window.twttr.extractUrlsWithIndices;
window.markdown.extractHashtagsWithIndices = window.twttr.extractHashtagsWithIndices;
return window.markdown;

@@ -412,2 +527,3 @@ }

exports.markdown.extractUrlsWithIndices = require('./link-matcher').extractUrlsWithIndices;
exports.markdown.extractHashtagsWithIndices = require('./link-matcher').extractHashtagsWithIndices;

@@ -414,0 +530,0 @@ return exports.markdown;

2

package.json
{
"name": "tent-markdown",
"version": "0.1.2",
"version": "0.1.3",
"description": "Tent dialect for markdown lib",

@@ -5,0 +5,0 @@ "keywords": [

@@ -7,4 +7,5 @@ ["html",

"Have you seen http://example.com? It's great"
]
],
" isn't it?"
]
]

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc