* Autolinker.js
* 0.17.0
* 0.17.1

@@ -10,2 +10,2 @@ * Copyright(c) 2015 Gregory Jacobs <>

a=/[0-9a-zA-Z][0-9a-zA-Z:]*/,b=/[^\s\0"'>\/=\x01-\x1F\x7F]+/,c=/(?:"[^"]*?"|'[^']*?'|[^'"=<>`\s]+)/,d=b.source+"(?:\\s*=\\s*"+c.source+")?";return new RegExp(["(?:","<(!DOCTYPE)","(?:","\\s+","(?:",d,"|",c.source+")",")*",">",")","|","(?:","<(/)?","("+a.source+")","(?:","\\s+",d,")*","\\s*/?",">",")"].join(""),"gi")}(),htmlCharacterEntitiesRegex:/(&nbsp;|&#160;|&lt;|&#60;|&gt;|&#62;|&quot;|&#34;|&#39;)/gi,parse:function(a){for(var b,c,d=this.htmlRegex,e=0,f=[];null!==(b=d.exec(a));){var g=b[0],h=b[1]||b[3],i=!!b[2],j=a.substring(e,b.index);j&&(c=this.parseTextAndEntityNodes(j),f.push.apply(f,c)),f.push(this.createElementNode(g,h,i)),e=b.index+g.length}if(e<a.length){var k=a.substring(e);k&&(c=this.parseTextAndEntityNodes(k),f.push.apply(f,c))}return f},parseTextAndEntityNodes:function(b){for(var c=[],d=a.Util.splitAndCapture(b,this.htmlCharacterEntitiesRegex),e=0,f=d.length;f>e;e+=2){var g=d[e],h=d[e+1];g&&c.push(this.createTextNode(g)),h&&c.push(this.createEntityNode(h))}return c},createElementNode:function(b,c,d){return new a.htmlParser.ElementNode({text:b,tagName:c.toLowerCase(),closing:d})},createEntityNode:function(b){return new a.htmlParser.EntityNode({text:b})},createTextNode:function(b){return new a.htmlParser.TextNode({text:b})}}),a.htmlParser.HtmlNode=a.Util.extend(Object,{text:"",constructor:function(b){a.Util.assign(this,b)},getType:a.Util.abstractMethod,getText:function(){return this.text}}),a.htmlParser.ElementNode=a.Util.extend(a.htmlParser.HtmlNode,{tagName:"",closing:!1,getType:function(){return"element"},getTagName:function(){return this.tagName},isClosing:function(){return this.closing}}),a.htmlParser.EntityNode=a.Util.extend(a.htmlParser.HtmlNode,{getType:function(){return"entity"}}),a.htmlParser.TextNode=a.Util.extend(a.htmlParser.HtmlNode,{getType:function(){return"text"}}),a.matchParser.MatchParser=a.Util.extend(Object,{urls:!0,email:!0,twitter:!0,phone:!0,hashtag:!1,stripPrefix:!0,matcherRegex:function(){var a=/(^|[^\w])@(\w{1,15})/,b=/(^|[^\w])#(\w{1,15})/,c=/(?:[\-;:&=\+\$,\w\.]+@)/,d=/(?:\+?\d{1,3}[-\s.])?\(?\d{3}\)?[-\s.]?\d{3}[-\s.]\d{4}/,e=/(?:[A-Za-z][-.+A-Za-z0-9]+:(?![A-Za-z][-.+A-Za-z0-9]+:\/\/)(?!\d+\/?)(?:\/\/)?)/,f=/(?:www\.)/,g=/[A-Za-z0-9\.\-]*[A-Za-z0-9\-]/,h=/\.(?:international|construction|contractors|enterprises|photography|productions|foundation|immobilien|industries|management|properties|technology|christmas|community|directory|education|equipment|institute|marketing|solutions|vacations|bargains|boutique|builders|catering|cleaning|clothing|computer|democrat|diamonds|graphics|holdings|lighting|partners|plumbing|supplies|training|ventures|academy|careers|company|cruises|domains|exposed|flights|florist|gallery|guitars|holiday|kitchen|neustar|okinawa|recipes|rentals|reviews|shiksha|singles|support|systems|agency|berlin|camera|center|coffee|condos|dating|estate|events|expert|futbol|kaufen|luxury|maison|monash|museum|nagoya|photos|repair|report|social|supply|tattoo|tienda|travel|viajes|villas|vision|voting|voyage|actor|build|cards|cheap|codes|dance|email|glass|house|mango|ninja|parts|photo|shoes|solar|today|tokyo|tools|watch|works|aero|arpa|asia|best|bike|blue|buzz|camp|club|cool|coop|farm|fish|gift|guru|info|jobs|kiwi|kred|land|limo|link|menu|mobi|moda|name|pics|pink|post|qpon|rich|ruhr|sexy|tips|vote|voto|wang|wien|wiki|zone|bar|bid|biz|cab|cat|ceo|com|edu|gov|int|kim|mil|net|onl|org|pro|pub|red|tel|uno|wed|xxx|xyz|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw)\b/,i=/[\-A-Za-z0-9+&@#\/%=~_()|'$*\[\]?!:,.;]*[\-A-Za-z0-9+&@#\/%=~_()|'$*\[\]]/;return new RegExp(["(",a.source,")","|","(",c.source,g.source,h.source,")","|","(","(?:","(",e.source,g.source,")","|","(?:","(.?//)?",f.source,g.source,")","|","(?:","(.?//)?",g.source,h.source,")",")","(?:"+i.source+")?",")","|","(",d.source,")","|","(",b.source,")"].join(""),"gi")}(),charBeforeProtocolRelMatchRegex:/^(.)?\/\//,constructor:function(b){a.Util.assign(this,b),this.matchValidator=new a.MatchValidator},replace:function(a,b,c){var d=this;return a.replace(this.matcherRegex,function(a,e,f,g,h,i,j,k,l,m,n,o,p){var q=d.processCandidateMatch(a,e,f,g,h,i,j,k,l,m,n,o,p);if(q){var,q.match);return q.prefixStr+r+q.suffixStr}return a})},processCandidateMatch:function(b,c,d,e,f,g,h,i,j,k,l,m,n){var o,p=i||j,q="",r="";if(g&&!this.urls||f&&!||k&&!||c&&!this.twitter||l&&!this.hashtag||!this.matchValidator.isValidMatch(g,h,p))return null;if(this.matchHasUnbalancedClosingParen(b)&&(b=b.substr(0,b.length-1),r=")"),f)o=new a.match.Email({matchedText:b,email:f});else if(c)d&&(q=d,b=b.slice(1)),o=new a.match.Twitter({matchedText:b,twitterHandle:e});else if(k){var s=b.replace(/\D/g,"");o=new a.match.Phone({matchedText:b,number:s})}else if(l)m&&(q=m,b=b.slice(1)),o=new a.match.Hashtag({matchedText:b,serviceName:this.hashtag,hashtag:n});else{if(p){var t=p.match(this.charBeforeProtocolRelMatchRegex)[1]||"";t&&(q=t,b=b.slice(1))}o=new a.match.Url({matchedText:b,url:b,protocolUrlMatch:!!h,protocolRelativeMatch:!!p,stripPrefix:this.stripPrefix})}return{prefixStr:q,suffixStr:r,match:o}},matchHasUnbalancedClosingParen:function(a){var b=a.charAt(a.length-1);if(")"===b){var c=a.match(/\(/g),d=a.match(/\)/g),e=c&&c.length||0,f=d&&d.length||0;if(f>e)return!0}return!1}}),a.MatchValidator=a.Util.extend(Object,{invalidProtocolRelMatchRegex:/^[\w]\/\//,hasFullProtocolRegex:/^[A-Za-z][-.+A-Za-z0-9]+:\/\//,uriSchemeRegex:/^[A-Za-z][-.+A-Za-z0-9]+:/,hasWordCharAfterProtocolRegex:/:[^\s]*?[A-Za-z]/,isValidMatch:function(a,b,c){return b&&!this.isValidUriScheme(b)||this.urlMatchDoesNotHaveProtocolOrDot(a,b)||this.urlMatchDoesNotHaveAtLeastOneWordChar(a,b)||this.isInvalidProtocolRelativeMatch(c)?!1:!0},isValidUriScheme:function(a){var b=a.match(this.uriSchemeRegex)[0].toLowerCase();return"javascript:"!==b&&"vbscript:"!==b},urlMatchDoesNotHaveProtocolOrDot:function(a,b){return!(!a||b&&this.hasFullProtocolRegex.test(b)||-1!==a.indexOf("."))},urlMatchDoesNotHaveAtLeastOneWordChar:function(a,b){return a&&b?!this.hasWordCharAfterProtocolRegex.test(a):!1},isInvalidProtocolRelativeMatch:function(a){return!!a&&this.invalidProtocolRelMatchRegex.test(a)}}),a.match.Match=a.Util.extend(Object,{constructor:function(b){a.Util.assign(this,b)},getType:a.Util.abstractMethod,getMatchedText:function(){return this.matchedText},getAnchorHref:a.Util.abstractMethod,getAnchorText:a.Util.abstractMethod}),a.match.Email=a.Util.extend(a.match.Match,{getType:function(){return"email"},getEmail:function(){return},getAnchorHref:function(){return"mailto:"},getAnchorText:function(){return}}),a.match.Hashtag=a.Util.extend(a.match.Match,{getType:function(){return"hashtag"},getHashtag:function(){return this.hashtag},getAnchorHref:function(){var a=this.serviceName,b=this.hashtag;switch(a){case"twitter":return""+b;case"facebook":return""+b;default:throw new Error("Unknown service name to point hashtag to: ",a)}},getAnchorText:function(){return"#"+this.hashtag}}),a.match.Phone=a.Util.extend(a.match.Match,{getType:function(){return"phone"},getNumber:function(){return this.number},getAnchorHref:function(){return"tel:"+this.number},getAnchorText:function(){return this.matchedText}}),a.match.Twitter=a.Util.extend(a.match.Match,{getType:function(){return"twitter"},getTwitterHandle:function(){return @@ -61,2 +61,3 @@ /*global require, module */


@@ -63,0 +64,0 @@ 'src/htmlParser/EntityNode.js',

"name": "autolinker",
"version": "0.17.0",
"version": "0.17.1",
"description": "Utility to automatically link the URLs, email addresses, and Twitter handles in a given block of text/HTML",

@@ -5,0 +5,0 @@ "main": "dist/Autolinker.js",

@@ -263,3 +263,3 @@ /**

* (depending on if the {@link #urls}, {@link #email}, {@link #phone},
* {@link #twitter}, and {@link #hashtags} options are enabled).
* {@link #twitter}, and {@link #hashtag} options are enabled).
* @return {String} The HTML, with matches automatically linked.

@@ -289,4 +289,4 @@ */

} else if( nodeType === 'entity' ) {
resultHtml.push( nodeText ); // append HTML entity nodes (such as '&nbsp;') verbatim
} else if( nodeType === 'entity' || nodeType === 'comment' ) {
resultHtml.push( nodeText ); // append HTML entity nodes (such as '&nbsp;') or HTML comments (such as '<!-- Comment -->') verbatim

@@ -312,4 +312,5 @@ } else {

* Process the text that lies in between HTML tags, performing the anchor tag replacements for
* the matches, and returns the string with the replacements made.
* Process the text that lies in between HTML tags, performing the anchor
* tag replacements for the matches, and returns the string with the
* replacements made.

@@ -328,3 +329,4 @@ * This method does the actual wrapping of matches with anchor tags.

* Creates the return string value for a given match in the input string, for the {@link #processTextNode} method.
* Creates the return string value for a given match in the input string,
* for the {@link #linkifyStr} method.

@@ -448,4 +450,4 @@ * This method handles the {@link #replaceFn}, if one was provided.

* For instance, if given the text: `You should go to`, then the result
* will be `You should go to &lt;a href=""&gt;;/a&gt;`
* For instance, if given the text: `You should go to`,
* then the result will be `You should go to &lt;a href=""&gt;;/a&gt;`

@@ -458,6 +460,8 @@ * Example:

* @static
* @param {String} textOrHtml The HTML or text to find matches within (depending on if
* the {@link #urls}, {@link #email}, {@link #phone}, {@link #twitter}, and {@link #hashtags} options are enabled).
* @param {Object} [options] Any of the configuration options for the Autolinker class, specified in an Object (map).
* See the class description for an example call.
* @param {String} textOrHtml The HTML or text to find matches within (depending
* on if the {@link #urls}, {@link #email}, {@link #phone}, {@link #twitter},
* and {@link #hashtag} options are enabled).
* @param {Object} [options] Any of the configuration options for the Autolinker
* class, specified in an Object (map). See the class description for an
* example call.
* @return {String} The HTML text, with matches automatically linked.

@@ -464,0 +468,0 @@ */

@@ -6,6 +6,6 @@ /*global Autolinker */

* @extends Object
* An HTML parser implementation which simply walks an HTML string and returns an array of
* An HTML parser implementation which simply walks an HTML string and returns an array of
* {@link Autolinker.htmlParser.HtmlNode HtmlNodes} that represent the basic HTML structure of the input string.
* Autolinker uses this to only link URLs/emails/Twitter handles within text nodes, effectively ignoring / "walking

@@ -15,34 +15,37 @@ * around" HTML tags.

Autolinker.htmlParser.HtmlParser = Autolinker.Util.extend( Object, {
* @private
* @property {RegExp} htmlRegex
* The regular expression used to pull out HTML tags from a string. Handles namespaced HTML tags and
* attribute names, as specified by
* Capturing groups:
* 1. The "!DOCTYPE" tag name, if a tag is a &lt;!DOCTYPE&gt; tag.
* 2. If it is an end tag, this group will have the '/'.
* 3. The tag name for all tags (other than the &lt;!DOCTYPE&gt; tag)
* 3. If it is a comment tag, this group will hold the comment text (i.e.
* the text inside the `&lt;!--` and `--&gt;`.
* 4. The tag name for all tags (other than the &lt;!DOCTYPE&gt; tag)
htmlRegex : (function() {
var tagNameRegex = /[0-9a-zA-Z][0-9a-zA-Z:]*/,
var commentTagRegex = /!--([\s\S]+?)--/,
tagNameRegex = /[0-9a-zA-Z][0-9a-zA-Z:]*/,
attrNameRegex = /[^\s\0"'>\/=\x01-\x1F\x7F]+/, // the unicode range accounts for excluding control chars, and the delete char
attrValueRegex = /(?:"[^"]*?"|'[^']*?'|[^'"=<>`\s]+)/, // double quoted, single quoted, or unquoted attribute values
nameEqualsValueRegex = attrNameRegex.source + '(?:\\s*=\\s*' + attrValueRegex.source + ')?'; // optional '=[value]'
return new RegExp( [
// for <!DOCTYPE> tag. Ex: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "">)
// for <!DOCTYPE> tag. Ex: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "">)
'<(!DOCTYPE)', // *** Capturing Group 1 - If it's a doctype tag
// Zero or more attributes following the tag name
'\\s+', // one or more whitespace chars before an attribute
// Either:
// A. attr="value", or
// B. "value" alone (To cover example doctype tag: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "">)
// A. attr="value", or
// B. "value" alone (To cover example doctype tag: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "">)
'(?:', nameEqualsValueRegex, '|', attrValueRegex.source + ')',

@@ -52,20 +55,30 @@ ')*',

// All other HTML tags (i.e. tags that are not <!DOCTYPE>)
'<(/)?', // Beginning of a tag. Either '<' for a start tag, or '</' for an end tag.
'<(/)?', // Beginning of a tag or comment. Either '<' for a start tag, or '</' for an end tag.
// *** Capturing Group 2: The slash or an empty string. Slash ('/') for end tag, empty string for start or self-closing tag.
// *** Capturing Group 3 - The tag name
'(' + tagNameRegex.source + ')',
// Zero or more attributes following the tag name
'\\s+', // one or more whitespace chars before an attribute
nameEqualsValueRegex, // attr="value" (with optional ="value" part)
'\\s*/?', // any trailing spaces and optional '/' before the closing '>'
commentTagRegex.source, // *** Capturing Group 3 - A Comment Tag's Text
// *** Capturing Group 4 - The tag name
'(' + tagNameRegex.source + ')',
// Zero or more attributes following the tag name
'\\s+', // one or more whitespace chars before an attribute
nameEqualsValueRegex, // attr="value" (with optional ="value" part)
'\\s*/?', // any trailing spaces and optional '/' before the closing '>'

@@ -75,3 +88,3 @@ ')'

} )(),

@@ -82,12 +95,12 @@ * @private

* The regular expression that matches common HTML character entities.
* Ignoring &amp; as it could be part of a query string -- handling it separately.
htmlCharacterEntitiesRegex: /(&nbsp;|&#160;|&lt;|&#60;|&gt;|&#62;|&quot;|&#34;|&#39;)/gi,
* Parses an HTML string and returns a simple array of {@link Autolinker.htmlParser.HtmlNode HtmlNodes} to represent
* the HTML structure of the input string.
* Parses an HTML string and returns a simple array of {@link Autolinker.htmlParser.HtmlNode HtmlNodes}
* to represent the HTML structure of the input string.
* @param {String} html The HTML to parse.

@@ -102,9 +115,10 @@ * @return {Autolinker.htmlParser.HtmlNode[]}

nodes = []; // will be the result of the method
while( ( currentResult = htmlRegex.exec( html ) ) !== null ) {
var tagText = currentResult[ 0 ],
tagName = currentResult[ 1 ] || currentResult[ 3 ], // The <!DOCTYPE> tag (ex: "!DOCTYPE"), or another tag (ex: "a" or "img")
commentText = currentResult[ 3 ], // if we've matched a comment
tagName = currentResult[ 1 ] || currentResult[ 4 ], // The <!DOCTYPE> tag (ex: "!DOCTYPE"), or another tag (ex: "a" or "img")
isClosingTag = !!currentResult[ 2 ],
inBetweenTagsText = html.substring( lastIndex, currentResult.index );
// Push TextNodes and EntityNodes for any text found between tags

@@ -115,13 +129,17 @@ if( inBetweenTagsText ) {

// Push the ElementNode
nodes.push( this.createElementNode( tagText, tagName, isClosingTag ) );
// Push the CommentNode or ElementNode
if( commentText ) {
nodes.push( this.createCommentNode( tagText, commentText ) );
} else {
nodes.push( this.createElementNode( tagText, tagName, isClosingTag ) );
lastIndex = currentResult.index + tagText.length;
// Process any remaining text after the last HTML element. Will process all of the text if there were no HTML elements.
if( lastIndex < html.length ) {
var text = html.substring( lastIndex );
// Push TextNodes and EntityNodes for any text found between tags

@@ -133,15 +151,16 @@ if( text ) {

return nodes;
* Parses text and HTML entity nodes from a given string. The input string should not have any HTML tags (elements)
* within it.
* Parses text and HTML entity nodes from a given string. The input string
* should not have any HTML tags (elements) within it.
* @private
* @param {String} text The text to parse.
* @return {Autolinker.htmlParser.HtmlNode[]} An array of HtmlNodes to represent the
* {@link Autolinker.htmlParser.TextNode TextNodes} and {@link Autolinker.htmlParser.EntityNode EntityNodes} found.
* @return {Autolinker.htmlParser.HtmlNode[]} An array of HtmlNodes to
* represent the {@link Autolinker.htmlParser.TextNode TextNodes} and
* {@link Autolinker.htmlParser.EntityNode EntityNodes} found.

@@ -151,5 +170,5 @@ parseTextAndEntityNodes : function( text ) {

textAndEntityTokens = Autolinker.Util.splitAndCapture( text, this.htmlCharacterEntitiesRegex ); // split at HTML entities, but include the HTML entities in the results array
// Every even numbered token is a TextNode, and every odd numbered token is an EntityNode
// For example: an input `text` of "Test &quot;this&quot; today" would turn into the
// For example: an input `text` of "Test &quot;this&quot; today" would turn into the
// `textAndEntityTokens`: [ 'Test ', '&quot;', 'this', '&quot;', ' today' ]

@@ -159,3 +178,3 @@ for( var i = 0, len = textAndEntityTokens.length; i < len; i += 2 ) {

entityToken = textAndEntityTokens[ i + 1 ];
if( textToken ) nodes.push( this.createTextNode( textToken ) );

@@ -166,11 +185,30 @@ if( entityToken ) nodes.push( this.createEntityNode( entityToken ) );

* Factory method to create an {@link Autolinker.htmlParser.CommentNode CommentNode}.
* @private
* @param {String} tagText The full text of the tag (comment) that was
* matched, including its &lt;!-- and --&gt;.
* @param {String} comment The full text of the comment that was matched.
createCommentNode : function( tagText, commentText ) {
return new Autolinker.htmlParser.CommentNode( {
text: tagText,
comment: Autolinker.Util.trim( commentText )
} );
* Factory method to create an {@link Autolinker.htmlParser.ElementNode ElementNode}.
* @private
* @param {String} tagText The full text of the tag (element) that was matched, including its attributes.
* @param {String} tagName The name of the tag. Ex: An &lt;img&gt; tag would be passed to this method as "img".
* @param {Boolean} isClosingTag `true` if it's a closing tag, false otherwise.
* @param {String} tagText The full text of the tag (element) that was
* matched, including its attributes.
* @param {String} tagName The name of the tag. Ex: An &lt;img&gt; tag would
* be passed to this method as "img".
* @param {Boolean} isClosingTag `true` if it's a closing tag, false
* otherwise.
* @return {Autolinker.htmlParser.ElementNode}

@@ -185,9 +223,10 @@ */

* Factory method to create a {@link Autolinker.htmlParser.EntityNode EntityNode}.
* @private
* @param {String} text The text that was matched for the HTML entity (such as '&amp;nbsp;').
* @param {String} text The text that was matched for the HTML entity (such
* as '&amp;nbsp;').
* @return {Autolinker.htmlParser.EntityNode}

@@ -198,7 +237,7 @@ */

* Factory method to create a {@link Autolinker.htmlParser.TextNode TextNode}.
* @private

@@ -211,3 +250,3 @@ * @param {String} text The text that was matched.

} );

@@ -10,5 +10,5 @@ /*global Autolinker */

* The MatchParser is fed a non-HTML string in order to search for matches.
* Autolinker first uses the {@link HtmlParser} to "walk around" HTML tags,
* and then the text around the HTML tags is passed into the MatchParser in
* order to find the actual matches.
* Autolinker first uses the {@link Autolinker.htmlParser.HtmlParser} to "walk
* around" HTML tags, and then the text around the HTML tags is passed into the
* MatchParser in order to find the actual matches.

@@ -185,7 +185,9 @@ Autolinker.matchParser.MatchParser = Autolinker.Util.extend( Object, {

* The regular expression used to retrieve the character before a protocol-relative URL match.
* The regular expression used to retrieve the character before a
* protocol-relative URL match.
* This is used in conjunction with the {@link #matcherRegex}, which needs to grab the character before a protocol-relative
* '//' due to the lack of a negative look-behind in JavaScript regular expressions. The character before the match is stripped
* from the URL.
* This is used in conjunction with the {@link #matcherRegex}, which needs
* to grab the character before a protocol-relative '//' due to the lack of
* a negative look-behind in JavaScript regular expressions. The character
* before the match is stripped from the URL.

@@ -198,4 +200,4 @@ charBeforeProtocolRelMatchRegex : /^(.)?\/\//,

* The MatchValidator object, used to filter out any false positives from the {@link #matcherRegex}. See
* {@link Autolinker.MatchValidator} for details.
* The MatchValidator object, used to filter out any false positives from
* the {@link #matcherRegex}. See {@link Autolinker.MatchValidator} for details.

@@ -206,3 +208,4 @@

* @constructor
* @param {Object} [cfg] The configuration options for the AnchorTagBuilder instance, specified in an Object (map).
* @param {Object} [cfg] The configuration options for the AnchorTagBuilder
* instance, specified in an Object (map).

@@ -218,3 +221,4 @@ constructor : function( cfg ) {

* Parses the input `text` to search for matches, and calls the `replaceFn`
* to allow replacements of the matches. Returns the `text` with matches replaced.
* to allow replacements of the matches. Returns the `text` with matches
* replaced.

@@ -321,4 +325,6 @@ * @param {String} text The text to search and repace matches in.

// Return out with `null` for match types that are disabled (url, email, twitter, hashtag), or for matches that are
// invalid (false positives from the matcherRegex, which can't use look-behinds since they are unavailable in JS).
// Return out with `null` for match types that are disabled (url, email,
// twitter, hashtag), or for matches that are invalid (false positives
// from the matcherRegex, which can't use look-behinds since they are
// unavailable in JS).

@@ -335,3 +341,4 @@ ( urlMatch && !this.urls ) ||

// Handle a closing parenthesis at the end of the match, and exclude it if there is not a matching open parenthesis
// Handle a closing parenthesis at the end of the match, and exclude it
// if there is not a matching open parenthesis
// in the match itself.

@@ -347,4 +354,5 @@ if( this.matchHasUnbalancedClosingParen( matchStr ) ) {

} else if( twitterMatch ) {
// fix up the `matchStr` if there was a preceding whitespace char, which was needed to determine the match
// itself (since there are no look-behinds in JS regexes)
// fix up the `matchStr` if there was a preceding whitespace char,
// which was needed to determine the match itself (since there are
// no look-behinds in JS regexes)
if( twitterHandlePrefixWhitespaceChar ) {

@@ -362,4 +370,5 @@ prefixStr = twitterHandlePrefixWhitespaceChar;

} else if( hashtagMatch ) {
// fix up the `matchStr` if there was a preceding whitespace char, which was needed to determine the match
// itself (since there are no look-behinds in JS regexes)
// fix up the `matchStr` if there was a preceding whitespace char,
// which was needed to determine the match itself (since there are
// no look-behinds in JS regexes)
if( hashtagPrefixWhitespaceChar ) {

@@ -372,4 +381,6 @@ prefixStr = hashtagPrefixWhitespaceChar;

} else { // url match
// If it's a protocol-relative '//' match, remove the character before the '//' (which the matcherRegex needed
// to match due to the lack of a negative look-behind in JavaScript regular expressions)
// If it's a protocol-relative '//' match, remove the character
// before the '//' (which the matcherRegex needed to match due to
// the lack of a negative look-behind in JavaScript regular
// expressions)
if( protocolRelativeMatch ) {

@@ -402,15 +413,19 @@ var charBeforeMatch = protocolRelativeMatch.match( this.charBeforeProtocolRelMatchRegex )[ 1 ] || "";

* Determines if a match found has an unmatched closing parenthesis. If so, this parenthesis will be removed
* from the match itself, and appended after the generated anchor tag in {@link #processTextNode}.
* Determines if a match found has an unmatched closing parenthesis. If so,
* this parenthesis will be removed from the match itself, and appended
* after the generated anchor tag in {@link #processCandidateMatch}.
* A match may have an extra closing parenthesis at the end of the match because the regular expression must include parenthesis
* for URLs such as "", which should be auto-linked.
* A match may have an extra closing parenthesis at the end of the match
* because the regular expression must include parenthesis for URLs such as
* "", which should be auto-linked.
* However, an extra parenthesis *will* be included when the URL itself is wrapped in parenthesis, such as in the case of
* "(". In this case, the last closing parenthesis should *not* be part of the URL
* itself, and this method will return `true`.
* However, an extra parenthesis *will* be included when the URL itself is
* wrapped in parenthesis, such as in the case of "(".
* In this case, the last closing parenthesis should *not* be part of the
* URL itself, and this method will return `true`.
* @private
* @param {String} matchStr The full match string from the {@link #matcherRegex}.
* @return {Boolean} `true` if there is an unbalanced closing parenthesis at the end of the `matchStr`, `false` otherwise.
* @return {Boolean} `true` if there is an unbalanced closing parenthesis at
* the end of the `matchStr`, `false` otherwise.

@@ -417,0 +432,0 @@ matchHasUnbalancedClosingParen : function( matchStr ) {

@@ -7,30 +7,35 @@ /*global Autolinker */

* @extends Object
* Used by Autolinker to filter out false positives from the {@link Autolinker#matcherRegex}.
* Due to the limitations of regular expressions (including the missing feature of look-behinds in JS regular expressions),
* we cannot always determine the validity of a given match. This class applies a bit of additional logic to filter out any
* false positives that have been matched by the {@link Autolinker#matcherRegex}.
* Used by Autolinker to filter out false positives from the
* {@link Autolinker.matchParser.MatchParser#matcherRegex}.
* Due to the limitations of regular expressions (including the missing feature
* of look-behinds in JS regular expressions), we cannot always determine the
* validity of a given match. This class applies a bit of additional logic to
* filter out any false positives that have been matched by the
* {@link Autolinker.matchParser.MatchParser#matcherRegex}.
Autolinker.MatchValidator = Autolinker.Util.extend( Object, {
* @private
* @property {RegExp} invalidProtocolRelMatchRegex
* The regular expression used to check a potential protocol-relative URL match, coming from the
* {@link Autolinker#matcherRegex}. A protocol-relative URL is, for example, "//"
* This regular expression checks to see if there is a word character before the '//' match in order to determine if
* we should actually autolink a protocol-relative URL. This is needed because there is no negative look-behind in
* JavaScript regular expressions.
* For instance, we want to autolink something like "Go to: //", but we don't want to autolink something
* like "abc//"
* The regular expression used to check a potential protocol-relative URL
* match, coming from the {@link Autolinker.matchParser.MatchParser#matcherRegex}.
* A protocol-relative URL is, for example, "//"
* This regular expression checks to see if there is a word character before
* the '//' match in order to determine if we should actually autolink a
* protocol-relative URL. This is needed because there is no negative
* look-behind in JavaScript regular expressions.
* For instance, we want to autolink something like "Go to: //",
* but we don't want to autolink something like "abc//"
invalidProtocolRelMatchRegex : /^[\w]\/\//,
* Regex to test for a full protocol, with the two trailing slashes. Ex: 'http://'
* @private

@@ -40,8 +45,8 @@ * @property {RegExp} hasFullProtocolRegex

hasFullProtocolRegex : /^[A-Za-z][-.+A-Za-z0-9]+:\/\//,
* Regex to find the URI scheme, such as 'mailto:'.
* This is used to filter out 'javascript:' and 'vbscript:' schemes.
* @private

@@ -51,6 +56,6 @@ * @property {RegExp} uriSchemeRegex

uriSchemeRegex : /^[A-Za-z][-.+A-Za-z0-9]+:/,
* Regex to determine if at least one word char exists after the protocol (i.e. after the ':')
* @private

@@ -60,24 +65,32 @@ * @property {RegExp} hasWordCharAfterProtocolRegex

hasWordCharAfterProtocolRegex : /:[^\s]*?[A-Za-z]/,
* Determines if a given match found by {@link Autolinker#processTextNode} is valid. Will return `false` for:
* 1) URL matches which do not have at least have one period ('.') in the domain name (effectively skipping over
* matches like "abc:def"). However, URL matches with a protocol will be allowed (ex: 'http://localhost')
* 2) URL matches which do not have at least one word character in the domain name (effectively skipping over
* matches like "git:1.0").
* 3) A protocol-relative url match (a URL beginning with '//') whose previous character is a word character
* (effectively skipping over strings like "abc//")
* Determines if a given match found by the {@link Autolinker.matchParser.MatchParser}
* is valid. Will return `false` for:
* 1) URL matches which do not have at least have one period ('.') in the
* domain name (effectively skipping over matches like "abc:def").
* However, URL matches with a protocol will be allowed (ex: 'http://localhost')
* 2) URL matches which do not have at least one word character in the
* domain name (effectively skipping over matches like "git:1.0").
* 3) A protocol-relative url match (a URL beginning with '//') whose
* previous character is a word character (effectively skipping over
* strings like "abc//")
* Otherwise, returns `true`.
* @param {String} urlMatch The matched URL, if there was one. Will be an empty string if the match is not a URL match.
* @param {String} protocolUrlMatch The match URL string for a protocol match. Ex: ''. This is used to match
* something like 'http://localhost', where we won't double check that the domain name has at least one '.' in it.
* @param {String} protocolRelativeMatch The protocol-relative string for a URL match (i.e. '//'), possibly with a preceding
* character (ex, a space, such as: ' //', or a letter, such as: 'a//'). The match is invalid if there is a word character
* preceding the '//'.
* @return {Boolean} `true` if the match given is valid and should be processed, or `false` if the match is invalid and/or
* should just not be processed.
* @param {String} urlMatch The matched URL, if there was one. Will be an
* empty string if the match is not a URL match.
* @param {String} protocolUrlMatch The match URL string for a protocol
* match. Ex: ''. This is used to match something like
* 'http://localhost', where we won't double check that the domain name
* has at least one '.' in it.
* @param {String} protocolRelativeMatch The protocol-relative string for a
* URL match (i.e. '//'), possibly with a preceding character (ex, a
* space, such as: ' //', or a letter, such as: 'a//'). The match is
* invalid if there is a word character preceding the '//'.
* @return {Boolean} `true` if the match given is valid and should be
* processed, or `false` if the match is invalid and/or should just not be
* processed.

@@ -93,14 +106,14 @@ isValidMatch : function( urlMatch, protocolUrlMatch, protocolRelativeMatch ) {

return true;
* Determines if the URI scheme is a valid scheme to be autolinked. Returns `false` if the scheme is
* 'javascript:' or 'vbscript:'
* Determines if the URI scheme is a valid scheme to be autolinked. Returns
* `false` if the scheme is 'javascript:' or 'vbscript:'
* @private
* @param {String} uriSchemeMatch The match URL string for a full URI scheme match. Ex: ''
* or ''.
* @param {String} uriSchemeMatch The match URL string for a full URI scheme
* match. Ex: '' or ''.
* @return {Boolean} `true` if the scheme is a valid one, `false` otherwise.

@@ -110,22 +123,27 @@ */

var uriScheme = uriSchemeMatch.match( this.uriSchemeRegex )[ 0 ].toLowerCase();
return ( uriScheme !== 'javascript:' && uriScheme !== 'vbscript:' );
* Determines if a URL match does not have either:
* a) a full protocol (i.e. 'http://'), or
* b) at least one dot ('.') in the domain name (for a non-full-protocol match).
* Either situation is considered an invalid URL (ex: 'git:d' does not have either the '://' part, or at least one dot
* in the domain name. If the match was '', we would consider this valid.)
* b) at least one dot ('.') in the domain name (for a non-full-protocol
* match).
* Either situation is considered an invalid URL (ex: 'git:d' does not have
* either the '://' part, or at least one dot in the domain name. If the
* match was '', we would consider this valid.)
* @private
* @param {String} urlMatch The matched URL, if there was one. Will be an empty string if the match is not a URL match.
* @param {String} protocolUrlMatch The match URL string for a protocol match. Ex: ''. This is used to match
* something like 'http://localhost', where we won't double check that the domain name has at least one '.' in it.
* @return {Boolean} `true` if the URL match does not have a full protocol, or at least one dot ('.') in a non-full-protocol
* match.
* @param {String} urlMatch The matched URL, if there was one. Will be an
* empty string if the match is not a URL match.
* @param {String} protocolUrlMatch The match URL string for a protocol
* match. Ex: ''. This is used to match something like
* 'http://localhost', where we won't double check that the domain name
* has at least one '.' in it.
* @return {Boolean} `true` if the URL match does not have a full protocol,
* or at least one dot ('.') in a non-full-protocol match.

@@ -135,17 +153,20 @@ urlMatchDoesNotHaveProtocolOrDot : function( urlMatch, protocolUrlMatch ) {

* Determines if a URL match does not have at least one word character after the protocol (i.e. in the domain name).
* At least one letter character must exist in the domain name after a protocol match. Ex: skip over something
* like "git:1.0"
* Determines if a URL match does not have at least one word character after
* the protocol (i.e. in the domain name).
* At least one letter character must exist in the domain name after a
* protocol match. Ex: skip over something like "git:1.0"
* @private
* @param {String} urlMatch The matched URL, if there was one. Will be an empty string if the match is not a URL match.
* @param {String} protocolUrlMatch The match URL string for a protocol match. Ex: ''. This is used to
* know whether or not we have a protocol in the URL string, in order to check for a word character after the protocol
* separator (':').
* @return {Boolean} `true` if the URL match does not have at least one word character in it after the protocol, `false`
* otherwise.
* @param {String} urlMatch The matched URL, if there was one. Will be an
* empty string if the match is not a URL match.
* @param {String} protocolUrlMatch The match URL string for a protocol
* match. Ex: ''. This is used to know whether or not we
* have a protocol in the URL string, in order to check for a word
* character after the protocol separator (':').
* @return {Boolean} `true` if the URL match does not have at least one word
* character in it after the protocol, `false` otherwise.

@@ -159,14 +180,17 @@ urlMatchDoesNotHaveAtLeastOneWordChar : function( urlMatch, protocolUrlMatch ) {

* Determines if a protocol-relative match is an invalid one. This method returns `true` if there is a `protocolRelativeMatch`,
* and that match contains a word character before the '//' (i.e. it must contain whitespace or nothing before the '//' in
* order to be considered valid).
* Determines if a protocol-relative match is an invalid one. This method
* returns `true` if there is a `protocolRelativeMatch`, and that match
* contains a word character before the '//' (i.e. it must contain
* whitespace or nothing before the '//' in order to be considered valid).
* @private
* @param {String} protocolRelativeMatch The protocol-relative string for a URL match (i.e. '//'), possibly with a preceding
* character (ex, a space, such as: ' //', or a letter, such as: 'a//'). The match is invalid if there is a word character
* preceding the '//'.
* @return {Boolean} `true` if it is an invalid protocol-relative match, `false` otherwise.
* @param {String} protocolRelativeMatch The protocol-relative string for a
* URL match (i.e. '//'), possibly with a preceding character (ex, a
* space, such as: ' //', or a letter, such as: 'a//'). The match is
* invalid if there is a word character preceding the '//'.
* @return {Boolean} `true` if it is an invalid protocol-relative match,
* `false` otherwise.

@@ -173,0 +197,0 @@ isInvalidProtocolRelativeMatch : function( protocolRelativeMatch ) {

@@ -6,18 +6,28 @@ /*global Autolinker */

* @singleton
* A few utility methods for Autolinker.
Autolinker.Util = {
* @property {Function} abstractMethod
* A function object which represents an abstract method.
abstractMethod : function() { throw "abstract"; },
* @private
* @property {RegExp} trimRegex
* The regular expression used to trim the leading and trailing whitespace
* from a string.
trimRegex : /^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g,
* Assigns (shallow copies) the properties of `src` onto `dest`.
* @param {Object} dest The destination object.

@@ -33,10 +43,10 @@ * @param {Object} src The source object.

return dest;
* Extends `superclass` to create a new subclass, adding the `protoProps` to the new subclass's prototype.
* @param {Function} superclass The constructor function for the superclass.

@@ -49,6 +59,6 @@ * @param {Object} protoProps The methods/properties to add to the subclass's prototype. This may contain the

var superclassProto = superclass.prototype;
var F = function() {};
F.prototype = superclassProto;
var subclass;

@@ -60,19 +70,19 @@ if( protoProps.hasOwnProperty( 'constructor' ) ) {

var subclassProto = subclass.prototype = new F(); // set up prototype chain
subclassProto.constructor = subclass; // fix constructor property
subclassProto.superclass = superclassProto;
delete protoProps.constructor; // don't re-assign constructor property to the prototype, since a new function may have been created (`subclass`), which is now already there
Autolinker.Util.assign( subclassProto, protoProps );
return subclass;
* Truncates the `str` at `len - ellipsisChars.length`, and adds the `ellipsisChars` to the
* end of the string (by default, two periods: '..'). If the `str` length does not exceed
* end of the string (by default, two periods: '..'). If the `str` length does not exceed
* `len`, the string will be returned unchanged.
* @param {String} str The string to truncate and add an ellipsis to.

@@ -90,7 +100,7 @@ * @param {Number} truncateLen The length to truncate the string at.

* Supports `Array.prototype.indexOf()` functionality for old IE (IE8 and below).
* @param {Array} arr The array to find an element of.

@@ -103,3 +113,3 @@ * @param {*} element The element to find in the array, and return the index of.

return arr.indexOf( element );
} else {

@@ -112,25 +122,25 @@ for( var i = 0, len = arr.length; i < len; i++ ) {

* Performs the functionality of what modern browsers do when `String.prototype.split()` is called
* with a regular expression that contains capturing parenthesis.
* For example:
* // Modern browsers:
* // Modern browsers:
* "a,b,c".split( /(,)/ ); // --> [ 'a', ',', 'b', ',', 'c' ]
* // Old IE (including IE8):
* "a,b,c".split( /(,)/ ); // --> [ 'a', 'b', 'c' ]
* This method emulates the functionality of modern browsers for the old IE case.
* @param {String} str The string to split.
* @param {RegExp} splitRegex The regular expression to split the input `str` on. The splitting
* character(s) will be spliced into the array, as in the "modern browsers" example in the
* description of this method.
* character(s) will be spliced into the array, as in the "modern browsers" example in the
* description of this method.
* Note #1: the supplied regular expression **must** have the 'g' flag specified.
* Note #2: for simplicity's sake, the regular expression does not need
* Note #2: for simplicity's sake, the regular expression does not need
* to contain capturing parenthesis - it will be assumed that any match has them.

@@ -141,18 +151,29 @@ * @return {String[]} The split array of strings, with the splitting character(s) included.

if( ! ) throw new Error( "`splitRegex` must have the 'g' flag set" );
var result = [],
lastIdx = 0,
while( match = splitRegex.exec( str ) ) {
result.push( str.substring( lastIdx, match.index ) );
result.push( match[ 0 ] ); // push the splitting char(s)
lastIdx = match.index + match[ 0 ].length;
result.push( str.substring( lastIdx ) );
return result;
* Trims the leading and trailing whitespace from a string.
* @param {String} str The string to trim.
* @return {String}
trim : function( str ) {
return str.replace( this.trimRegex, '' );
/*global Autolinker, _, describe, beforeEach, afterEach, it, expect, jasmine */
describe( "Autolinker.htmlParser.HtmlParser", function() {
var HtmlParser = Autolinker.htmlParser.HtmlParser,
CommentNode = Autolinker.htmlParser.CommentNode,
ElementNode = Autolinker.htmlParser.ElementNode,

@@ -8,9 +9,15 @@ EntityNode = Autolinker.htmlParser.EntityNode,

beforeEach( function() {
htmlParser = new HtmlParser();
} );
function expectCommentNode( node, text, comment ) {
expect( node ).toEqual( jasmine.any( CommentNode ) );
expect( node.getText() ).toBe( text );
expect( node.getComment() ).toBe( comment );
function expectElementNode( node, tagText, tagName, isClosingTag ) {

@@ -22,3 +29,3 @@ expect( node ).toEqual( jasmine.any( ElementNode ) );

function expectEntityNode( node, text ) {

@@ -28,3 +35,3 @@ expect( node ).toEqual( jasmine.any( EntityNode ) );

function expectTextNode( node, text ) {

@@ -34,142 +41,233 @@ expect( node ).toEqual( jasmine.any( TextNode ) );

it( "should return an empty array for an empty input string", function() {
expect( htmlParser.parse( "" ) ).toEqual( [] );
} );
it( "should be able to reproduce the input string based on the text that was provided to each returned `HtmlNode`", function() {
var inputStr = 'Joe went to <a href=""></a> today,&nbsp;and bought <b>big</b> items',
nodes = htmlParser.parse( inputStr ),
result = [];
for( var i = 0, len = nodes.length; i < len; i++ ) {
result.push( nodes[ i ].getText() );
expect( result.length ).toBe( 11 );
expect( result.join( "" ) ).toBe( inputStr );
describe( 'text node handling', function() {
it( "should return a single text node if there are no HTML nodes in it", function() {
var nodes = htmlParser.parse( "Testing 123" );
expect( nodes.length ).toBe( 1 );
expectTextNode( nodes[ 0 ], 'Testing 123' );
} );
} );
it( "should properly create `HtmlNode` instances for each text/entity/element node encountered, with the proper data filled in on each node", function() {
var inputStr = '&quot;Joe went to &quot;<a href=""></a>&quot; today,&nbsp;and bought <b>big</b> items&quot;',
nodes = htmlParser.parse( inputStr );
expect( nodes.length ).toBe( 15 );
var i = -1;
expectEntityNode ( nodes[ ++i ], '&quot;' );
expectTextNode ( nodes[ ++i ], 'Joe went to ' );
expectEntityNode ( nodes[ ++i ], '&quot;' );
expectElementNode( nodes[ ++i ], '<a href="">', 'a', false );
expectTextNode ( nodes[ ++i ], '' );
expectElementNode( nodes[ ++i ], '</a>', 'a', true );
expectEntityNode ( nodes[ ++i ], '&quot;' );
expectTextNode ( nodes[ ++i ], ' today,' );
expectEntityNode ( nodes[ ++i ], '&nbsp;' );
expectTextNode ( nodes[ ++i ], 'and bought ' );
expectElementNode( nodes[ ++i ], '<b>', 'b', false );
expectTextNode ( nodes[ ++i ], 'big' );
expectElementNode( nodes[ ++i ], '</b>', 'b', true );
expectTextNode ( nodes[ ++i ], ' items' );
expectEntityNode ( nodes[ ++i ], '&quot;' );
describe( 'HTML comment node handling', function() {
it( "should return a single comment node if there is only an HTML comment node in it", function() {
var nodes = htmlParser.parse( "<!-- Testing 123 -->" );
expect( nodes.length ).toBe( 1 );
expectCommentNode( nodes[ 0 ], "<!-- Testing 123 -->", "Testing 123" );
} );
it( "should handle a multi-line comment, and trim any amount of whitespace in the comment for the comment's text", function() {
var nodes = htmlParser.parse( "<!-- \n \t\n Testing 123 \n\t \n\n -->" );
expect( nodes.length ).toBe( 1 );
expectCommentNode( nodes[ 0 ], "<!-- \n \t\n Testing 123 \n\t \n\n -->", "Testing 123" );
} );
it( "should produce 3 nodes for a text node, comment, then text node", function() {
var nodes = htmlParser.parse( "Test <!-- Comment --> Test" );
expect( nodes.length ).toBe( 3 );
expectTextNode ( nodes[ 0 ], 'Test ' );
expectCommentNode( nodes[ 1 ], '<!-- Comment -->', 'Comment' );
expectTextNode ( nodes[ 2 ], ' Test' );
} );
it( "should produce 4 nodes for a text node, comment, text node, comment", function() {
var nodes = htmlParser.parse( "Test <!-- Comment --> Test <!-- Comment 2 -->" );
expect( nodes.length ).toBe( 4 );
expectTextNode ( nodes[ 0 ], 'Test ' );
expectCommentNode( nodes[ 1 ], '<!-- Comment -->', 'Comment' );
expectTextNode ( nodes[ 2 ], ' Test ' );
expectCommentNode( nodes[ 3 ], '<!-- Comment 2 -->', 'Comment 2' );
} );
} );
it( 'should match tags of both upper and lower case', function() {
var inputStr = 'Joe <!DOCTYPE html> went <!doctype "blah" "blah blah"> to <a href=""></a> today,&nbsp;and <A href="">purchased</A> <b>big</b> <B>items</B>',
nodes = htmlParser.parse( inputStr );
expect( nodes.length ).toBe( 22 );
var i = -1;
expectTextNode ( nodes[ ++i ], 'Joe ' );
expectElementNode( nodes[ ++i ], '<!DOCTYPE html>', '!doctype', false );
expectTextNode ( nodes[ ++i ], ' went ' );
expectElementNode( nodes[ ++i ], '<!doctype "blah" "blah blah">', '!doctype', false );
expectTextNode ( nodes[ ++i ], ' to ' );
expectElementNode( nodes[ ++i ], '<a href="">', 'a', false );
expectTextNode ( nodes[ ++i ], '' );
expectElementNode( nodes[ ++i ], '</a>', 'a', true );
expectTextNode ( nodes[ ++i ], ' today,' );
expectEntityNode ( nodes[ ++i ], '&nbsp;' );
expectTextNode ( nodes[ ++i ], 'and ' );
expectElementNode( nodes[ ++i ], '<A href="">', 'a', false );
expectTextNode ( nodes[ ++i ], 'purchased' );
expectElementNode( nodes[ ++i ], '</A>', 'a', true );
expectTextNode ( nodes[ ++i ], ' ' );
expectElementNode( nodes[ ++i ], '<b>', 'b', false );
expectTextNode ( nodes[ ++i ], 'big' );
expectElementNode( nodes[ ++i ], '</b>', 'b', true );
expectTextNode ( nodes[ ++i ], ' ' );
expectElementNode( nodes[ ++i ], '<B>', 'b', false );
expectTextNode ( nodes[ ++i ], 'items' );
expectElementNode( nodes[ ++i ], '</B>', 'b', true );
describe( 'HTML element node handling', function() {
it( "should return a single element node if there is only an HTML element node in it", function() {
var nodes = htmlParser.parse( "<div>" );
expect( nodes.length ).toBe( 1 );
expectElementNode( nodes[ 0 ], '<div>', 'div', false );
} );
it( "should produce 3 nodes for a text node, element, then text node", function() {
var nodes = htmlParser.parse( "Test <div> Test" );
expect( nodes.length ).toBe( 3 );
expectTextNode ( nodes[ 0 ], 'Test ' );
expectElementNode( nodes[ 1 ], '<div>', 'div', false );
expectTextNode ( nodes[ 2 ], ' Test' );
} );
it( "should be able to reproduce the input string based on the text that was provided to each returned `HtmlNode`", function() {
var inputStr = 'Joe went to <a href=""></a> today,&nbsp;and bought <b>big</b> items',
nodes = htmlParser.parse( inputStr ),
result = [];
for( var i = 0, len = nodes.length; i < len; i++ ) {
result.push( nodes[ i ].getText() );
expect( result.length ).toBe( 11 );
expect( result.join( "" ) ).toBe( inputStr );
} );
} );
it( "should *not* match the &amp; HTML entity, as this may be part of a query string", function() {
var nodes = htmlParser.parse( 'Me&amp;You' );
expect( nodes.length ).toBe( 1 );
expectTextNode( nodes[ 0 ], 'Me&amp;You' );
describe( 'HTML entity handling', function() {
it( "should *not* match the &amp; HTML entity, as this may be part of a query string", function() {
var nodes = htmlParser.parse( 'Me&amp;You' );
expect( nodes.length ).toBe( 1 );
expectTextNode( nodes[ 0 ], 'Me&amp;You' );
} );
it( "should properly parse a string that begins with an HTML entity node", function() {
var nodes = htmlParser.parse( '&quot;Test' );
expect( nodes.length ).toBe( 2 );
expectEntityNode( nodes[ 0 ], '&quot;' );
expectTextNode( nodes[ 1 ], 'Test' );
} );
it( "should properly parse a string that ends with an HTML entity node", function() {
var nodes = htmlParser.parse( 'Test&quot;' );
expect( nodes.length ).toBe( 2 );
expectTextNode( nodes[ 0 ], 'Test' );
expectEntityNode( nodes[ 1 ], '&quot;' );
} );
it( "should properly parse a string that begins and ends with an HTML entity node", function() {
var nodes = htmlParser.parse( '&quot;Test&quot;' );
expect( nodes.length ).toBe( 3 );
expectEntityNode( nodes[ 0 ], '&quot;' );
expectTextNode( nodes[ 1 ], 'Test' );
expectEntityNode( nodes[ 2 ], '&quot;' );
} );
it( "should properly parse a string that has an HTML entity node in the middle", function() {
var nodes = htmlParser.parse( 'Test&quot;Test' );
expect( nodes.length ).toBe( 3 );
expectTextNode( nodes[ 0 ], 'Test' );
expectEntityNode( nodes[ 1 ], '&quot;' );
expectTextNode( nodes[ 2 ], 'Test' );
} );
it( "should properly parse a string that only has an HTML entity node", function() {
var nodes = htmlParser.parse( '&quot;' );
expect( nodes.length ).toBe( 1 );
expectEntityNode( nodes[ 0 ], '&quot;' );
} );
} );
it( "should properly parse a string that begins with an HTML entity node", function() {
var nodes = htmlParser.parse( '&quot;Test' );
expect( nodes.length ).toBe( 2 );
expectEntityNode( nodes[ 0 ], '&quot;' );
expectTextNode( nodes[ 1 ], 'Test' );
describe( 'combination examples', function() {
it( "should properly create `HtmlNode` instances for each text/entity/comment/element node encountered, with the proper data filled in on each node", function() {
var inputStr = [
'&quot;Joe went to &quot;',
'<a href=""></a>&quot; ',
'today,&nbsp;and <!-- stuff -->bought <b>big</b> items&quot;'
].join( "" );
var nodes = htmlParser.parse( inputStr );
expect( nodes.length ).toBe( 17 );
var i = -1;
expectEntityNode ( nodes[ ++i ], '&quot;' );
expectTextNode ( nodes[ ++i ], 'Joe went to ' );
expectEntityNode ( nodes[ ++i ], '&quot;' );
expectElementNode( nodes[ ++i ], '<a href="">', 'a', false );
expectTextNode ( nodes[ ++i ], '' );
expectElementNode( nodes[ ++i ], '</a>', 'a', true );
expectEntityNode ( nodes[ ++i ], '&quot;' );
expectTextNode ( nodes[ ++i ], ' today,' );
expectEntityNode ( nodes[ ++i ], '&nbsp;' );
expectTextNode ( nodes[ ++i ], 'and ' );
expectCommentNode( nodes[ ++i ], '<!-- stuff -->', 'stuff' );
expectTextNode ( nodes[ ++i ], 'bought ' );
expectElementNode( nodes[ ++i ], '<b>', 'b', false );
expectTextNode ( nodes[ ++i ], 'big' );
expectElementNode( nodes[ ++i ], '</b>', 'b', true );
expectTextNode ( nodes[ ++i ], ' items' );
expectEntityNode ( nodes[ ++i ], '&quot;' );
} );
it( 'should match tags of both upper and lower case', function() {
var inputStr = 'Joe <!DOCTYPE html><!-- Comment -->went <!doctype "blah" "blah blah"> to <a href=""></a> today,&nbsp;and <A href="">purchased</A> <b>big</b> <B><!-- Comment 2 -->items</B>',
nodes = htmlParser.parse( inputStr );
expect( nodes.length ).toBe( 24 );
var i = -1;
expectTextNode ( nodes[ ++i ], 'Joe ' );
expectElementNode( nodes[ ++i ], '<!DOCTYPE html>', '!doctype', false );
expectCommentNode( nodes[ ++i ], '<!-- Comment -->', 'Comment' );
expectTextNode ( nodes[ ++i ], 'went ' );
expectElementNode( nodes[ ++i ], '<!doctype "blah" "blah blah">', '!doctype', false );
expectTextNode ( nodes[ ++i ], ' to ' );
expectElementNode( nodes[ ++i ], '<a href="">', 'a', false );
expectTextNode ( nodes[ ++i ], '' );
expectElementNode( nodes[ ++i ], '</a>', 'a', true );
expectTextNode ( nodes[ ++i ], ' today,' );
expectEntityNode ( nodes[ ++i ], '&nbsp;' );
expectTextNode ( nodes[ ++i ], 'and ' );
expectElementNode( nodes[ ++i ], '<A href="">', 'a', false );
expectTextNode ( nodes[ ++i ], 'purchased' );
expectElementNode( nodes[ ++i ], '</A>', 'a', true );
expectTextNode ( nodes[ ++i ], ' ' );
expectElementNode( nodes[ ++i ], '<b>', 'b', false );
expectTextNode ( nodes[ ++i ], 'big' );
expectElementNode( nodes[ ++i ], '</b>', 'b', true );
expectTextNode ( nodes[ ++i ], ' ' );
expectElementNode( nodes[ ++i ], '<B>', 'b', false );
expectCommentNode( nodes[ ++i ], '<!-- Comment 2 -->', 'Comment 2' );
expectTextNode ( nodes[ ++i ], 'items' );
expectElementNode( nodes[ ++i ], '</B>', 'b', true );
} );
} );
it( "should properly parse a string that ends with an HTML entity node", function() {
var nodes = htmlParser.parse( 'Test&quot;' );
expect( nodes.length ).toBe( 2 );
expectTextNode( nodes[ 0 ], 'Test' );
expectEntityNode( nodes[ 1 ], '&quot;' );
} );
it( "should properly parse a string that begins and ends with an HTML entity node", function() {
var nodes = htmlParser.parse( '&quot;Test&quot;' );
expect( nodes.length ).toBe( 3 );
expectEntityNode( nodes[ 0 ], '&quot;' );
expectTextNode( nodes[ 1 ], 'Test' );
expectEntityNode( nodes[ 2 ], '&quot;' );
} );
it( "should properly parse a string that has an HTML entity node in the middle", function() {
var nodes = htmlParser.parse( 'Test&quot;Test' );
expect( nodes.length ).toBe( 3 );
expectTextNode( nodes[ 0 ], 'Test' );
expectEntityNode( nodes[ 1 ], '&quot;' );
expectTextNode( nodes[ 2 ], 'Test' );
} );
it( "should properly parse a string that only has an HTML entity node", function() {
var nodes = htmlParser.parse( '&quot;' );
expect( nodes.length ).toBe( 1 );
expectEntityNode( nodes[ 0 ], '&quot;' );
} );
it( "should not freeze up the regular expression engine when presented with the input string in issue #54", function() {
var inputStr = "Shai ist endlich in Deutschland! Und wir haben gute Nachrichten! <3 Alle, die den Shai-Rasierer kostenlos probieren, machen am Gewinnspiel eines Jahresvorrates Klingen mit. Den Rasierer bekommst Du kostenlos durch diesen Link:, und dann machst Du am Gewinnspiel mit! 'Gefallt mir' klicken, wenn Du gern einen Jahresvorrat Shai haben mochtest. (Y)",
nodes = htmlParser.parse( inputStr );
expect( nodes.length ).toBe( 1 );
expectTextNode( nodes[ 0 ], inputStr );
} );
} );

