himalaya
Advanced tools
Comparing version 1.0.1 to 1.1.0
@@ -80,13 +80,15 @@ (function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.himalaya = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){ | ||
function format(nodes) { | ||
function format(nodes, options) { | ||
return nodes.map(function (node) { | ||
var type = node.type; | ||
if (type === 'element') { | ||
var tagName = node.tagName.toLowerCase(); | ||
var attributes = formatAttributes(node.attributes); | ||
var children = format(node.children); | ||
return { type: type, tagName: tagName, attributes: attributes, children: children }; | ||
var outputNode = type === 'element' ? { | ||
type: type, | ||
tagName: node.tagName.toLowerCase(), | ||
attributes: formatAttributes(node.attributes), | ||
children: format(node.children, options) | ||
} : { type: type, content: node.content }; | ||
if (options.includePositions) { | ||
outputNode.position = node.position; | ||
} | ||
return { type: type, content: node.content }; | ||
return outputNode; | ||
}); | ||
@@ -134,3 +136,4 @@ } | ||
childlessTags: _tags.childlessTags, | ||
closingTagAncestorBreakers: _tags.closingTagAncestorBreakers | ||
closingTagAncestorBreakers: _tags.closingTagAncestorBreakers, | ||
includePositions: false | ||
}; | ||
@@ -158,2 +161,6 @@ | ||
}); | ||
exports.feedPosition = feedPosition; | ||
exports.jumpPosition = jumpPosition; | ||
exports.makeInitialPosition = makeInitialPosition; | ||
exports.copyPosition = copyPosition; | ||
exports.default = lexer; | ||
@@ -172,6 +179,44 @@ exports.lex = lex; | ||
function _toConsumableArray(arr) { if (Array.isArray(arr)) { for (var i = 0, arr2 = Array(arr.length); i < arr.length; i++) { arr2[i] = arr[i]; } return arr2; } else { return Array.from(arr); } } | ||
function feedPosition(position, str, len) { | ||
var start = position.index; | ||
var end = position.index = start + len; | ||
for (var i = start; i < end; i++) { | ||
var char = str.charAt(i); | ||
if (char === '\n') { | ||
position.line++; | ||
position.column = 0; | ||
} else { | ||
position.column++; | ||
} | ||
} | ||
} | ||
function jumpPosition(position, str, end) { | ||
var len = end - position.index; | ||
return feedPosition(position, str, len); | ||
} | ||
function makeInitialPosition() { | ||
return { | ||
index: 0, | ||
column: 0, | ||
line: 0 | ||
}; | ||
} | ||
function copyPosition(position) { | ||
return { | ||
index: position.index, | ||
line: position.line, | ||
column: position.column | ||
}; | ||
} | ||
function lexer(str, options) { | ||
var state = { str: str, options: options, cursor: 0, tokens: [] }; | ||
var state = { | ||
str: str, | ||
options: options, | ||
position: makeInitialPosition(), | ||
tokens: [] | ||
}; | ||
lex(state); | ||
@@ -182,10 +227,11 @@ return state.tokens; | ||
function lex(state) { | ||
var str = state.str; | ||
var str = state.str, | ||
childlessTags = state.options.childlessTags; | ||
var len = str.length; | ||
while (state.cursor < len) { | ||
var start = state.cursor; | ||
while (state.position.index < len) { | ||
var start = state.position.index; | ||
lexText(state); | ||
if (state.cursor === start) { | ||
var isComment = (0, _compat.startsWith)(str, '!--', state.cursor + 1); | ||
if (state.position.index === start) { | ||
var isComment = (0, _compat.startsWith)(str, '!--', start + 1); | ||
if (isComment) { | ||
@@ -196,4 +242,2 @@ lexComment(state); | ||
var safeTag = tagName.toLowerCase(); | ||
var childlessTags = state.options.childlessTags; | ||
if ((0, _compat.arrayIncludes)(childlessTags, safeTag)) { | ||
@@ -225,48 +269,51 @@ lexSkipTag(tagName, state); | ||
var str = state.str, | ||
cursor = state.cursor; | ||
position = state.position; | ||
var textEnd = findTextEnd(str, cursor); | ||
var textEnd = findTextEnd(str, position.index); | ||
if (textEnd === position.index) return; | ||
if (textEnd === -1) { | ||
// there is only text left | ||
var _content = str.slice(cursor); | ||
state.cursor = str.length; | ||
state.tokens.push({ type: type, content: _content }); | ||
return; | ||
textEnd = str.length; | ||
} | ||
if (textEnd === cursor) return; | ||
var content = str.slice(cursor, textEnd); | ||
state.cursor = textEnd; | ||
state.tokens.push({ type: type, content: content }); | ||
var start = copyPosition(position); | ||
var content = str.slice(position.index, textEnd); | ||
jumpPosition(position, str, textEnd); | ||
var end = copyPosition(position); | ||
state.tokens.push({ type: type, content: content, position: { start: start, end: end } }); | ||
} | ||
function lexComment(state) { | ||
state.cursor += 4; // "<!--".length | ||
var str = state.str, | ||
cursor = state.cursor; | ||
position = state.position; | ||
var commentEnd = str.indexOf('-->', cursor); | ||
var type = 'comment'; | ||
if (commentEnd === -1) { | ||
// there is only the comment left | ||
var _content2 = str.slice(cursor); | ||
state.cursor = str.length; | ||
state.tokens.push({ type: type, content: _content2 }); | ||
return; | ||
var start = copyPosition(position); | ||
feedPosition(position, str, 4); // "<!--".length | ||
var contentEnd = str.indexOf('-->', position.index); | ||
var commentEnd = contentEnd + 3; // "-->".length | ||
if (contentEnd === -1) { | ||
contentEnd = commentEnd = str.length; | ||
} | ||
var content = str.slice(cursor, commentEnd); | ||
state.cursor = commentEnd + 3; // "-->".length | ||
state.tokens.push({ type: type, content: content }); | ||
var content = str.slice(position.index, contentEnd); | ||
jumpPosition(position, str, commentEnd); | ||
state.tokens.push({ | ||
type: 'comment', | ||
content: content, | ||
position: { | ||
start: start, | ||
end: copyPosition(position) | ||
} | ||
}); | ||
} | ||
function lexTag(state) { | ||
var str = state.str; | ||
var str = state.str, | ||
position = state.position; | ||
{ | ||
var secondChar = str.charAt(state.cursor + 1); | ||
var secondChar = str.charAt(position.index + 1); | ||
var close = secondChar === '/'; | ||
state.tokens.push({ type: 'tag-start', close: close }); | ||
state.cursor += close ? 2 : 1; | ||
var start = copyPosition(position); | ||
feedPosition(position, str, close ? 2 : 1); | ||
state.tokens.push({ type: 'tag-start', close: close, position: { start: start } }); | ||
} | ||
@@ -276,6 +323,7 @@ var tagName = lexTagName(state); | ||
{ | ||
var firstChar = str.charAt(state.cursor); | ||
var firstChar = str.charAt(position.index); | ||
var _close = firstChar === '/'; | ||
state.tokens.push({ type: 'tag-end', close: _close }); | ||
state.cursor += _close ? 2 : 1; | ||
feedPosition(position, str, _close ? 2 : 1); | ||
var end = copyPosition(position); | ||
state.tokens.push({ type: 'tag-end', close: _close, position: { end: end } }); | ||
} | ||
@@ -293,6 +341,6 @@ return tagName; | ||
var str = state.str, | ||
cursor = state.cursor; | ||
position = state.position; | ||
var len = str.length; | ||
var start = cursor; | ||
var start = position.index; | ||
while (start < len) { | ||
@@ -313,5 +361,8 @@ var char = str.charAt(start); | ||
state.cursor = end; | ||
jumpPosition(position, str, end); | ||
var tagName = str.slice(start, end); | ||
state.tokens.push({ type: 'tag', content: tagName }); | ||
state.tokens.push({ | ||
type: 'tag', | ||
content: tagName | ||
}); | ||
return tagName; | ||
@@ -322,5 +373,6 @@ } | ||
var str = state.str, | ||
position = state.position, | ||
tokens = state.tokens; | ||
var cursor = state.cursor; | ||
var cursor = position.index; | ||
var quote = null; // null, single-, or double-quote | ||
@@ -368,3 +420,3 @@ var wordBegin = cursor; // index of word start | ||
} | ||
state.cursor = cursor; | ||
jumpPosition(position, str, cursor); | ||
@@ -413,9 +465,12 @@ var wLen = words.length; | ||
var push = [].push; | ||
function lexSkipTag(tagName, state) { | ||
var str = state.str, | ||
cursor = state.cursor, | ||
position = state.position, | ||
tokens = state.tokens; | ||
var safeTagName = tagName.toLowerCase(); | ||
var len = str.length; | ||
var index = cursor; | ||
var index = position.index; | ||
while (index < len) { | ||
@@ -428,17 +483,26 @@ var nextTag = str.indexOf('</', index); | ||
var tagState = { str: str, cursor: nextTag + 2, tokens: [] }; | ||
var name = lexTagName(tagState); | ||
var safeTagName = tagName.toLowerCase(); | ||
var tagStartPosition = copyPosition(position); | ||
jumpPosition(tagStartPosition, str, nextTag); | ||
var tagState = { str: str, position: tagStartPosition, tokens: [] }; | ||
var name = lexTag(tagState); | ||
if (safeTagName !== name.toLowerCase()) { | ||
index = tagState.cursor; | ||
index = tagState.position.index; | ||
continue; | ||
} | ||
var content = str.slice(cursor, nextTag); | ||
tokens.push({ type: 'text', content: content }); | ||
var openTag = { type: 'tag-start', close: true }; | ||
var closeTag = { type: 'tag-end', close: false }; | ||
lexTagAttributes(tagState); | ||
tokens.push.apply(tokens, [openTag].concat(_toConsumableArray(tagState.tokens), [closeTag])); | ||
state.cursor = tagState.cursor + 1; | ||
if (nextTag !== position.index) { | ||
var textStart = copyPosition(position); | ||
jumpPosition(position, str, nextTag); | ||
tokens.push({ | ||
type: 'text', | ||
content: str.slice(textStart.index, nextTag), | ||
position: { | ||
start: textStart, | ||
end: copyPosition(position) | ||
} | ||
}); | ||
} | ||
push.apply(tokens, tagState.tokens); | ||
jumpPosition(position, str, tagState.position.index); | ||
break; | ||
@@ -456,2 +520,3 @@ } | ||
exports.hasTerminalParent = hasTerminalParent; | ||
exports.rewindStack = rewindStack; | ||
exports.parse = parse; | ||
@@ -486,2 +551,10 @@ | ||
function rewindStack(stack, newLength, childrenEndPosition, endPosition) { | ||
stack[newLength].position.end = endPosition; | ||
for (var i = newLength + 1, len = stack.length; i < len; i++) { | ||
stack[i].position.end = childrenEndPosition; | ||
} | ||
stack.splice(newLength); | ||
} | ||
function parse(state) { | ||
@@ -509,7 +582,6 @@ var tokens = state.tokens, | ||
var index = stack.length; | ||
var didRewind = false; | ||
var shouldRewind = false; | ||
while (--index > -1) { | ||
if (stack[index].tagName === tagName) { | ||
stack.splice(index); | ||
didRewind = true; | ||
shouldRewind = true; | ||
break; | ||
@@ -523,3 +595,4 @@ } | ||
} | ||
if (didRewind) { | ||
if (shouldRewind) { | ||
rewindStack(stack, index, token.position.start, tokens[cursor - 1].position.end); | ||
break; | ||
@@ -545,3 +618,3 @@ } else { | ||
if (tagName === stack[currentIndex].tagName) { | ||
stack = stack.slice(0, currentIndex); | ||
rewindStack(stack, currentIndex, token.position.start, token.position.start); | ||
var previousIndex = currentIndex - 1; | ||
@@ -566,15 +639,25 @@ nodes = stack[previousIndex].children; | ||
var children = []; | ||
nodes.push({ | ||
var position = { | ||
start: token.position.start, | ||
end: attrToken.position.end | ||
}; | ||
var elementNode = { | ||
type: 'element', | ||
tagName: tagToken.content, | ||
attributes: attributes, | ||
children: children | ||
}); | ||
children: children, | ||
position: position | ||
}; | ||
nodes.push(elementNode); | ||
var hasChildren = !(attrToken.close || (0, _compat.arrayIncludes)(options.voidTags, tagName)); | ||
if (hasChildren) { | ||
stack.push({ tagName: tagName, children: children }); | ||
var size = stack.push({ tagName: tagName, children: children, position: position }); | ||
var innerState = { tokens: tokens, options: options, cursor: cursor, stack: stack }; | ||
parse(innerState); | ||
cursor = innerState.cursor; | ||
var rewoundInElement = stack.length === size; | ||
if (rewoundInElement) { | ||
elementNode.position.end = tokens[cursor - 1].position.end; | ||
} | ||
} | ||
@@ -581,0 +664,0 @@ } |
@@ -26,13 +26,15 @@ 'use strict'; | ||
function format(nodes) { | ||
function format(nodes, options) { | ||
return nodes.map(function (node) { | ||
var type = node.type; | ||
if (type === 'element') { | ||
var tagName = node.tagName.toLowerCase(); | ||
var attributes = formatAttributes(node.attributes); | ||
var children = format(node.children); | ||
return { type: type, tagName: tagName, attributes: attributes, children: children }; | ||
var outputNode = type === 'element' ? { | ||
type: type, | ||
tagName: node.tagName.toLowerCase(), | ||
attributes: formatAttributes(node.attributes), | ||
children: format(node.children, options) | ||
} : { type: type, content: node.content }; | ||
if (options.includePositions) { | ||
outputNode.position = node.position; | ||
} | ||
return { type: type, content: node.content }; | ||
return outputNode; | ||
}); | ||
@@ -39,0 +41,0 @@ } |
@@ -30,3 +30,4 @@ 'use strict'; | ||
childlessTags: _tags.childlessTags, | ||
closingTagAncestorBreakers: _tags.closingTagAncestorBreakers | ||
closingTagAncestorBreakers: _tags.closingTagAncestorBreakers, | ||
includePositions: false | ||
}; | ||
@@ -33,0 +34,0 @@ |
181
lib/lexer.js
@@ -6,2 +6,6 @@ 'use strict'; | ||
}); | ||
exports.feedPosition = feedPosition; | ||
exports.jumpPosition = jumpPosition; | ||
exports.makeInitialPosition = makeInitialPosition; | ||
exports.copyPosition = copyPosition; | ||
exports.default = lexer; | ||
@@ -20,6 +24,44 @@ exports.lex = lex; | ||
function _toConsumableArray(arr) { if (Array.isArray(arr)) { for (var i = 0, arr2 = Array(arr.length); i < arr.length; i++) { arr2[i] = arr[i]; } return arr2; } else { return Array.from(arr); } } | ||
function feedPosition(position, str, len) { | ||
var start = position.index; | ||
var end = position.index = start + len; | ||
for (var i = start; i < end; i++) { | ||
var char = str.charAt(i); | ||
if (char === '\n') { | ||
position.line++; | ||
position.column = 0; | ||
} else { | ||
position.column++; | ||
} | ||
} | ||
} | ||
function jumpPosition(position, str, end) { | ||
var len = end - position.index; | ||
return feedPosition(position, str, len); | ||
} | ||
function makeInitialPosition() { | ||
return { | ||
index: 0, | ||
column: 0, | ||
line: 0 | ||
}; | ||
} | ||
function copyPosition(position) { | ||
return { | ||
index: position.index, | ||
line: position.line, | ||
column: position.column | ||
}; | ||
} | ||
function lexer(str, options) { | ||
var state = { str: str, options: options, cursor: 0, tokens: [] }; | ||
var state = { | ||
str: str, | ||
options: options, | ||
position: makeInitialPosition(), | ||
tokens: [] | ||
}; | ||
lex(state); | ||
@@ -30,10 +72,11 @@ return state.tokens; | ||
function lex(state) { | ||
var str = state.str; | ||
var str = state.str, | ||
childlessTags = state.options.childlessTags; | ||
var len = str.length; | ||
while (state.cursor < len) { | ||
var start = state.cursor; | ||
while (state.position.index < len) { | ||
var start = state.position.index; | ||
lexText(state); | ||
if (state.cursor === start) { | ||
var isComment = (0, _compat.startsWith)(str, '!--', state.cursor + 1); | ||
if (state.position.index === start) { | ||
var isComment = (0, _compat.startsWith)(str, '!--', start + 1); | ||
if (isComment) { | ||
@@ -44,4 +87,2 @@ lexComment(state); | ||
var safeTag = tagName.toLowerCase(); | ||
var childlessTags = state.options.childlessTags; | ||
if ((0, _compat.arrayIncludes)(childlessTags, safeTag)) { | ||
@@ -73,48 +114,51 @@ lexSkipTag(tagName, state); | ||
var str = state.str, | ||
cursor = state.cursor; | ||
position = state.position; | ||
var textEnd = findTextEnd(str, cursor); | ||
var textEnd = findTextEnd(str, position.index); | ||
if (textEnd === position.index) return; | ||
if (textEnd === -1) { | ||
// there is only text left | ||
var _content = str.slice(cursor); | ||
state.cursor = str.length; | ||
state.tokens.push({ type: type, content: _content }); | ||
return; | ||
textEnd = str.length; | ||
} | ||
if (textEnd === cursor) return; | ||
var content = str.slice(cursor, textEnd); | ||
state.cursor = textEnd; | ||
state.tokens.push({ type: type, content: content }); | ||
var start = copyPosition(position); | ||
var content = str.slice(position.index, textEnd); | ||
jumpPosition(position, str, textEnd); | ||
var end = copyPosition(position); | ||
state.tokens.push({ type: type, content: content, position: { start: start, end: end } }); | ||
} | ||
function lexComment(state) { | ||
state.cursor += 4; // "<!--".length | ||
var str = state.str, | ||
cursor = state.cursor; | ||
position = state.position; | ||
var commentEnd = str.indexOf('-->', cursor); | ||
var type = 'comment'; | ||
if (commentEnd === -1) { | ||
// there is only the comment left | ||
var _content2 = str.slice(cursor); | ||
state.cursor = str.length; | ||
state.tokens.push({ type: type, content: _content2 }); | ||
return; | ||
var start = copyPosition(position); | ||
feedPosition(position, str, 4); // "<!--".length | ||
var contentEnd = str.indexOf('-->', position.index); | ||
var commentEnd = contentEnd + 3; // "-->".length | ||
if (contentEnd === -1) { | ||
contentEnd = commentEnd = str.length; | ||
} | ||
var content = str.slice(cursor, commentEnd); | ||
state.cursor = commentEnd + 3; // "-->".length | ||
state.tokens.push({ type: type, content: content }); | ||
var content = str.slice(position.index, contentEnd); | ||
jumpPosition(position, str, commentEnd); | ||
state.tokens.push({ | ||
type: 'comment', | ||
content: content, | ||
position: { | ||
start: start, | ||
end: copyPosition(position) | ||
} | ||
}); | ||
} | ||
function lexTag(state) { | ||
var str = state.str; | ||
var str = state.str, | ||
position = state.position; | ||
{ | ||
var secondChar = str.charAt(state.cursor + 1); | ||
var secondChar = str.charAt(position.index + 1); | ||
var close = secondChar === '/'; | ||
state.tokens.push({ type: 'tag-start', close: close }); | ||
state.cursor += close ? 2 : 1; | ||
var start = copyPosition(position); | ||
feedPosition(position, str, close ? 2 : 1); | ||
state.tokens.push({ type: 'tag-start', close: close, position: { start: start } }); | ||
} | ||
@@ -124,6 +168,7 @@ var tagName = lexTagName(state); | ||
{ | ||
var firstChar = str.charAt(state.cursor); | ||
var firstChar = str.charAt(position.index); | ||
var _close = firstChar === '/'; | ||
state.tokens.push({ type: 'tag-end', close: _close }); | ||
state.cursor += _close ? 2 : 1; | ||
feedPosition(position, str, _close ? 2 : 1); | ||
var end = copyPosition(position); | ||
state.tokens.push({ type: 'tag-end', close: _close, position: { end: end } }); | ||
} | ||
@@ -141,6 +186,6 @@ return tagName; | ||
var str = state.str, | ||
cursor = state.cursor; | ||
position = state.position; | ||
var len = str.length; | ||
var start = cursor; | ||
var start = position.index; | ||
while (start < len) { | ||
@@ -161,5 +206,8 @@ var char = str.charAt(start); | ||
state.cursor = end; | ||
jumpPosition(position, str, end); | ||
var tagName = str.slice(start, end); | ||
state.tokens.push({ type: 'tag', content: tagName }); | ||
state.tokens.push({ | ||
type: 'tag', | ||
content: tagName | ||
}); | ||
return tagName; | ||
@@ -170,5 +218,6 @@ } | ||
var str = state.str, | ||
position = state.position, | ||
tokens = state.tokens; | ||
var cursor = state.cursor; | ||
var cursor = position.index; | ||
var quote = null; // null, single-, or double-quote | ||
@@ -216,3 +265,3 @@ var wordBegin = cursor; // index of word start | ||
} | ||
state.cursor = cursor; | ||
jumpPosition(position, str, cursor); | ||
@@ -261,9 +310,12 @@ var wLen = words.length; | ||
var push = [].push; | ||
function lexSkipTag(tagName, state) { | ||
var str = state.str, | ||
cursor = state.cursor, | ||
position = state.position, | ||
tokens = state.tokens; | ||
var safeTagName = tagName.toLowerCase(); | ||
var len = str.length; | ||
var index = cursor; | ||
var index = position.index; | ||
while (index < len) { | ||
@@ -276,17 +328,26 @@ var nextTag = str.indexOf('</', index); | ||
var tagState = { str: str, cursor: nextTag + 2, tokens: [] }; | ||
var name = lexTagName(tagState); | ||
var safeTagName = tagName.toLowerCase(); | ||
var tagStartPosition = copyPosition(position); | ||
jumpPosition(tagStartPosition, str, nextTag); | ||
var tagState = { str: str, position: tagStartPosition, tokens: [] }; | ||
var name = lexTag(tagState); | ||
if (safeTagName !== name.toLowerCase()) { | ||
index = tagState.cursor; | ||
index = tagState.position.index; | ||
continue; | ||
} | ||
var content = str.slice(cursor, nextTag); | ||
tokens.push({ type: 'text', content: content }); | ||
var openTag = { type: 'tag-start', close: true }; | ||
var closeTag = { type: 'tag-end', close: false }; | ||
lexTagAttributes(tagState); | ||
tokens.push.apply(tokens, [openTag].concat(_toConsumableArray(tagState.tokens), [closeTag])); | ||
state.cursor = tagState.cursor + 1; | ||
if (nextTag !== position.index) { | ||
var textStart = copyPosition(position); | ||
jumpPosition(position, str, nextTag); | ||
tokens.push({ | ||
type: 'text', | ||
content: str.slice(textStart.index, nextTag), | ||
position: { | ||
start: textStart, | ||
end: copyPosition(position) | ||
} | ||
}); | ||
} | ||
push.apply(tokens, tagState.tokens); | ||
jumpPosition(position, str, tagState.position.index); | ||
break; | ||
@@ -293,0 +354,0 @@ } |
@@ -8,2 +8,3 @@ 'use strict'; | ||
exports.hasTerminalParent = hasTerminalParent; | ||
exports.rewindStack = rewindStack; | ||
exports.parse = parse; | ||
@@ -38,2 +39,10 @@ | ||
function rewindStack(stack, newLength, childrenEndPosition, endPosition) { | ||
stack[newLength].position.end = endPosition; | ||
for (var i = newLength + 1, len = stack.length; i < len; i++) { | ||
stack[i].position.end = childrenEndPosition; | ||
} | ||
stack.splice(newLength); | ||
} | ||
function parse(state) { | ||
@@ -61,7 +70,6 @@ var tokens = state.tokens, | ||
var index = stack.length; | ||
var didRewind = false; | ||
var shouldRewind = false; | ||
while (--index > -1) { | ||
if (stack[index].tagName === tagName) { | ||
stack.splice(index); | ||
didRewind = true; | ||
shouldRewind = true; | ||
break; | ||
@@ -75,3 +83,4 @@ } | ||
} | ||
if (didRewind) { | ||
if (shouldRewind) { | ||
rewindStack(stack, index, token.position.start, tokens[cursor - 1].position.end); | ||
break; | ||
@@ -97,3 +106,3 @@ } else { | ||
if (tagName === stack[currentIndex].tagName) { | ||
stack = stack.slice(0, currentIndex); | ||
rewindStack(stack, currentIndex, token.position.start, token.position.start); | ||
var previousIndex = currentIndex - 1; | ||
@@ -118,15 +127,25 @@ nodes = stack[previousIndex].children; | ||
var children = []; | ||
nodes.push({ | ||
var position = { | ||
start: token.position.start, | ||
end: attrToken.position.end | ||
}; | ||
var elementNode = { | ||
type: 'element', | ||
tagName: tagToken.content, | ||
attributes: attributes, | ||
children: children | ||
}); | ||
children: children, | ||
position: position | ||
}; | ||
nodes.push(elementNode); | ||
var hasChildren = !(attrToken.close || (0, _compat.arrayIncludes)(options.voidTags, tagName)); | ||
if (hasChildren) { | ||
stack.push({ tagName: tagName, children: children }); | ||
var size = stack.push({ tagName: tagName, children: children, position: position }); | ||
var innerState = { tokens: tokens, options: options, cursor: cursor, stack: stack }; | ||
parse(innerState); | ||
cursor = innerState.cursor; | ||
var rewoundInElement = stack.length === size; | ||
if (rewoundInElement) { | ||
elementNode.position.end = tokens[cursor - 1].position.end; | ||
} | ||
} | ||
@@ -133,0 +152,0 @@ } |
{ | ||
"name": "himalaya", | ||
"description": "HTML to JSON parser", | ||
"version": "1.0.1", | ||
"version": "1.1.0", | ||
"author": "Chris Andrejewski <christopher.andrejewski@gmail.com>", | ||
@@ -47,3 +47,3 @@ "ava": { | ||
"source-map-support": "^0.5.0", | ||
"standard": "^10.0.1", | ||
"standard": "^11.0.0", | ||
"vinyl-buffer": "^1.0.1", | ||
@@ -50,0 +50,0 @@ "vinyl-source-stream": "^2.0.0" |
@@ -91,2 +91,33 @@ # Himalaya | ||
### Line, column, and index positions | ||
Himalaya can include the start and end positions of nodes in the parse output. | ||
To enable this, you can pass `parse` the `parseDefaults` extended with `includePositions: true`: | ||
```js | ||
import { parse, parseDefaults } from 'himalaya' | ||
parse('<img>', { ...parseDefaults, includePositions: true }) | ||
/* => | ||
[ | ||
{ | ||
"type": "element", | ||
"tagName": "img", | ||
"attributes": [], | ||
"children": [], | ||
"position": { | ||
"start": { | ||
"index": 0, | ||
"line": 0, | ||
"column": 0 | ||
}, | ||
"end": { | ||
"index": 5, | ||
"line": 0, | ||
"column": 5 | ||
} | ||
} | ||
} | ||
] | ||
*/ | ||
``` | ||
## Going back to HTML | ||
@@ -93,0 +124,0 @@ Himalaya provides a `stringify` method. The following example parses the HTML to JSON then parses the JSON back into HTML. |
@@ -17,13 +17,17 @@ export function splitHead (str, sep) { | ||
export function format (nodes) { | ||
export function format (nodes, options) { | ||
return nodes.map(node => { | ||
const type = node.type | ||
if (type === 'element') { | ||
const tagName = node.tagName.toLowerCase() | ||
const attributes = formatAttributes(node.attributes) | ||
const children = format(node.children) | ||
return {type, tagName, attributes, children} | ||
const outputNode = type === 'element' | ||
? { | ||
type, | ||
tagName: node.tagName.toLowerCase(), | ||
attributes: formatAttributes(node.attributes), | ||
children: format(node.children, options) | ||
} | ||
: { type, content: node.content } | ||
if (options.includePositions) { | ||
outputNode.position = node.position | ||
} | ||
return {type, content: node.content} | ||
return outputNode | ||
}) | ||
@@ -30,0 +34,0 @@ } |
@@ -16,3 +16,4 @@ import lexer from './lexer' | ||
childlessTags, | ||
closingTagAncestorBreakers | ||
closingTagAncestorBreakers, | ||
includePositions: false | ||
} | ||
@@ -19,0 +20,0 @@ |
175
src/lexer.js
@@ -8,4 +8,44 @@ import { | ||
export function feedPosition (position, str, len) { | ||
const start = position.index | ||
const end = position.index = start + len | ||
for (let i = start; i < end; i++) { | ||
const char = str.charAt(i) | ||
if (char === '\n') { | ||
position.line++ | ||
position.column = 0 | ||
} else { | ||
position.column++ | ||
} | ||
} | ||
} | ||
export function jumpPosition (position, str, end) { | ||
const len = end - position.index | ||
return feedPosition(position, str, len) | ||
} | ||
export function makeInitialPosition () { | ||
return { | ||
index: 0, | ||
column: 0, | ||
line: 0 | ||
} | ||
} | ||
export function copyPosition (position) { | ||
return { | ||
index: position.index, | ||
line: position.line, | ||
column: position.column | ||
} | ||
} | ||
export default function lexer (str, options) { | ||
const state = {str, options, cursor: 0, tokens: []} | ||
const state = { | ||
str, | ||
options, | ||
position: makeInitialPosition(), | ||
tokens: [] | ||
} | ||
lex(state) | ||
@@ -16,9 +56,9 @@ return state.tokens | ||
export function lex (state) { | ||
const {str} = state | ||
const {str, options: {childlessTags}} = state | ||
const len = str.length | ||
while (state.cursor < len) { | ||
const start = state.cursor | ||
while (state.position.index < len) { | ||
const start = state.position.index | ||
lexText(state) | ||
if (state.cursor === start) { | ||
const isComment = startsWith(str, '!--', state.cursor + 1) | ||
if (state.position.index === start) { | ||
const isComment = startsWith(str, '!--', start + 1) | ||
if (isComment) { | ||
@@ -29,3 +69,2 @@ lexComment(state) | ||
const safeTag = tagName.toLowerCase() | ||
const {childlessTags} = state.options | ||
if (arrayIncludes(childlessTags, safeTag)) { | ||
@@ -56,44 +95,46 @@ lexSkipTag(tagName, state) | ||
const type = 'text' | ||
const {str, cursor} = state | ||
const textEnd = findTextEnd(str, cursor) | ||
const {str, position} = state | ||
let textEnd = findTextEnd(str, position.index) | ||
if (textEnd === position.index) return | ||
if (textEnd === -1) { | ||
// there is only text left | ||
const content = str.slice(cursor) | ||
state.cursor = str.length | ||
state.tokens.push({type, content}) | ||
return | ||
textEnd = str.length | ||
} | ||
if (textEnd === cursor) return | ||
const content = str.slice(cursor, textEnd) | ||
state.cursor = textEnd | ||
state.tokens.push({type, content}) | ||
const start = copyPosition(position) | ||
const content = str.slice(position.index, textEnd) | ||
jumpPosition(position, str, textEnd) | ||
const end = copyPosition(position) | ||
state.tokens.push({type, content, position: {start, end}}) | ||
} | ||
export function lexComment (state) { | ||
state.cursor += 4 // "<!--".length | ||
const {str, cursor} = state | ||
const commentEnd = str.indexOf('-->', cursor) | ||
const type = 'comment' | ||
if (commentEnd === -1) { | ||
// there is only the comment left | ||
const content = str.slice(cursor) | ||
state.cursor = str.length | ||
state.tokens.push({type, content}) | ||
return | ||
const {str, position} = state | ||
const start = copyPosition(position) | ||
feedPosition(position, str, 4) // "<!--".length | ||
let contentEnd = str.indexOf('-->', position.index) | ||
let commentEnd = contentEnd + 3 // "-->".length | ||
if (contentEnd === -1) { | ||
contentEnd = commentEnd = str.length | ||
} | ||
const content = str.slice(cursor, commentEnd) | ||
state.cursor = commentEnd + 3 // "-->".length | ||
state.tokens.push({type, content}) | ||
const content = str.slice(position.index, contentEnd) | ||
jumpPosition(position, str, commentEnd) | ||
state.tokens.push({ | ||
type: 'comment', | ||
content, | ||
position: { | ||
start, | ||
end: copyPosition(position) | ||
} | ||
}) | ||
} | ||
export function lexTag (state) { | ||
const {str} = state | ||
const {str, position} = state | ||
{ | ||
const secondChar = str.charAt(state.cursor + 1) | ||
const secondChar = str.charAt(position.index + 1) | ||
const close = secondChar === '/' | ||
state.tokens.push({type: 'tag-start', close}) | ||
state.cursor += close ? 2 : 1 | ||
const start = copyPosition(position) | ||
feedPosition(position, str, close ? 2 : 1) | ||
state.tokens.push({type: 'tag-start', close, position: {start}}) | ||
} | ||
@@ -103,6 +144,7 @@ const tagName = lexTagName(state) | ||
{ | ||
const firstChar = str.charAt(state.cursor) | ||
const firstChar = str.charAt(position.index) | ||
const close = firstChar === '/' | ||
state.tokens.push({type: 'tag-end', close}) | ||
state.cursor += close ? 2 : 1 | ||
feedPosition(position, str, close ? 2 : 1) | ||
const end = copyPosition(position) | ||
state.tokens.push({type: 'tag-end', close, position: {end}}) | ||
} | ||
@@ -119,5 +161,5 @@ return tagName | ||
export function lexTagName (state) { | ||
const {str, cursor} = state | ||
const {str, position} = state | ||
const len = str.length | ||
let start = cursor | ||
let start = position.index | ||
while (start < len) { | ||
@@ -138,5 +180,8 @@ const char = str.charAt(start) | ||
state.cursor = end | ||
jumpPosition(position, str, end) | ||
const tagName = str.slice(start, end) | ||
state.tokens.push({type: 'tag', content: tagName}) | ||
state.tokens.push({ | ||
type: 'tag', | ||
content: tagName | ||
}) | ||
return tagName | ||
@@ -146,4 +191,4 @@ } | ||
export function lexTagAttributes (state) { | ||
const {str, tokens} = state | ||
let cursor = state.cursor | ||
const {str, position, tokens} = state | ||
let cursor = position.index | ||
let quote = null // null, single-, or double-quote | ||
@@ -191,3 +236,3 @@ let wordBegin = cursor // index of word start | ||
} | ||
state.cursor = cursor | ||
jumpPosition(position, str, cursor) | ||
@@ -236,6 +281,9 @@ const wLen = words.length | ||
const push = [].push | ||
export function lexSkipTag (tagName, state) { | ||
const {str, cursor, tokens} = state | ||
const {str, position, tokens} = state | ||
const safeTagName = tagName.toLowerCase() | ||
const len = str.length | ||
let index = cursor | ||
let index = position.index | ||
while (index < len) { | ||
@@ -248,19 +296,28 @@ const nextTag = str.indexOf('</', index) | ||
const tagState = {str, cursor: nextTag + 2, tokens: []} | ||
const name = lexTagName(tagState) | ||
const safeTagName = tagName.toLowerCase() | ||
const tagStartPosition = copyPosition(position) | ||
jumpPosition(tagStartPosition, str, nextTag) | ||
const tagState = {str, position: tagStartPosition, tokens: []} | ||
const name = lexTag(tagState) | ||
if (safeTagName !== name.toLowerCase()) { | ||
index = tagState.cursor | ||
index = tagState.position.index | ||
continue | ||
} | ||
const content = str.slice(cursor, nextTag) | ||
tokens.push({type: 'text', content}) | ||
const openTag = {type: 'tag-start', close: true} | ||
const closeTag = {type: 'tag-end', close: false} | ||
lexTagAttributes(tagState) | ||
tokens.push(openTag, ...tagState.tokens, closeTag) | ||
state.cursor = tagState.cursor + 1 | ||
if (nextTag !== position.index) { | ||
const textStart = copyPosition(position) | ||
jumpPosition(position, str, nextTag) | ||
tokens.push({ | ||
type: 'text', | ||
content: str.slice(textStart.index, nextTag), | ||
position: { | ||
start: textStart, | ||
end: copyPosition(position) | ||
} | ||
}) | ||
} | ||
push.apply(tokens, tagState.tokens) | ||
jumpPosition(position, str, tagState.position.index) | ||
break | ||
} | ||
} |
@@ -28,2 +28,10 @@ import {arrayIncludes} from './compat' | ||
export function rewindStack (stack, newLength, childrenEndPosition, endPosition) { | ||
stack[newLength].position.end = endPosition | ||
for (let i = newLength + 1, len = stack.length; i < len; i++) { | ||
stack[i].position.end = childrenEndPosition | ||
} | ||
stack.splice(newLength) | ||
} | ||
export function parse (state) { | ||
@@ -48,7 +56,6 @@ const {tokens, options} = state | ||
let index = stack.length | ||
let didRewind = false | ||
let shouldRewind = false | ||
while (--index > -1) { | ||
if (stack[index].tagName === tagName) { | ||
stack.splice(index) | ||
didRewind = true | ||
shouldRewind = true | ||
break | ||
@@ -62,3 +69,4 @@ } | ||
} | ||
if (didRewind) { | ||
if (shouldRewind) { | ||
rewindStack(stack, index, token.position.start, tokens[cursor - 1].position.end) | ||
break | ||
@@ -83,3 +91,3 @@ } else { | ||
if (tagName === stack[currentIndex].tagName) { | ||
stack = stack.slice(0, currentIndex) | ||
rewindStack(stack, currentIndex, token.position.start, token.position.start) | ||
const previousIndex = currentIndex - 1 | ||
@@ -104,15 +112,25 @@ nodes = stack[previousIndex].children | ||
const children = [] | ||
nodes.push({ | ||
const position = { | ||
start: token.position.start, | ||
end: attrToken.position.end | ||
} | ||
const elementNode = { | ||
type: 'element', | ||
tagName: tagToken.content, | ||
attributes, | ||
children | ||
}) | ||
children, | ||
position | ||
} | ||
nodes.push(elementNode) | ||
const hasChildren = !(attrToken.close || arrayIncludes(options.voidTags, tagName)) | ||
if (hasChildren) { | ||
stack.push({tagName, children}) | ||
const size = stack.push({tagName, children, position}) | ||
const innerState = {tokens, options, cursor, stack} | ||
parse(innerState) | ||
cursor = innerState.cursor | ||
const rewoundInElement = stack.length === size | ||
if (rewoundInElement) { | ||
elementNode.position.end = tokens[cursor - 1].position.end | ||
} | ||
} | ||
@@ -119,0 +137,0 @@ } |
import test from 'ava' | ||
import {parse, parseDefaults} from '../' | ||
import {parse, parseDefaults} from '../lib' | ||
import {formatAttributes} from '../lib/format' | ||
@@ -18,2 +18,45 @@ | ||
test('parse() should emit positions if includePositions is true', t => { | ||
t.deepEqual( | ||
parse('<h1>Hello world</h1>', Object.assign({}, parseDefaults, { includePositions: true })), | ||
[ | ||
{ | ||
type: 'element', | ||
tagName: 'h1', | ||
attributes: [], | ||
children: [ | ||
{ | ||
type: 'text', | ||
content: 'Hello world', | ||
position: { | ||
start: { | ||
index: 4, | ||
line: 0, | ||
column: 4 | ||
}, | ||
end: { | ||
index: 15, | ||
line: 0, | ||
column: 15 | ||
} | ||
} | ||
} | ||
], | ||
position: { | ||
start: { | ||
index: 0, | ||
line: 0, | ||
column: 0 | ||
}, | ||
end: { | ||
index: 20, | ||
line: 0, | ||
column: 20 | ||
} | ||
} | ||
} | ||
] | ||
) | ||
}) | ||
/* | ||
@@ -20,0 +63,0 @@ These tests ensure the parser and v1 formatting align. |
@@ -13,2 +13,6 @@ import test from 'ava' | ||
function ps (index) { | ||
return { index, line: 0, column: index } | ||
} | ||
test('lexer should return tokens', t => { | ||
@@ -19,9 +23,9 @@ const str = '<h1>Test case</h1>' | ||
t.deepEqual(tokens, [ | ||
{type: 'tag-start', close: false}, | ||
{type: 'tag-start', close: false, position: {start: ps(0)}}, | ||
{type: 'tag', content: 'h1'}, | ||
{type: 'tag-end', close: false}, | ||
{type: 'text', content: 'Test case'}, | ||
{type: 'tag-start', close: true}, | ||
{type: 'tag-end', close: false, position: {end: ps(4)}}, | ||
{type: 'text', content: 'Test case', position: {start: ps(4), end: ps(13)}}, | ||
{type: 'tag-start', close: true, position: {start: ps(13)}}, | ||
{type: 'tag', content: 'h1'}, | ||
{type: 'tag-end', close: false} | ||
{type: 'tag-end', close: false, position: {end: ps(str.length)}} | ||
]) | ||
@@ -36,3 +40,3 @@ }) | ||
t.deepEqual(tokens, [ | ||
{type: 'text', content: '2 <= 4 >'} | ||
{type: 'text', content: '2 <= 4 >', position: {start: ps(0), end: ps(str.length)}} | ||
]) | ||
@@ -46,7 +50,7 @@ } | ||
t.deepEqual(tokens, [ | ||
{type: 'text', content: '2 '}, | ||
{type: 'tag-start', close: false}, | ||
{type: 'text', content: '2 ', position: {start: ps(0), end: ps(2)}}, | ||
{type: 'tag-start', close: false, position: {start: ps(2)}}, | ||
{type: 'tag', content: 'a'}, | ||
{type: 'attribute', content: '4'}, | ||
{type: 'tag-end', close: false} | ||
{type: 'tag-end', close: false, position: {end: ps(str.length)}} | ||
]) | ||
@@ -61,9 +65,9 @@ } | ||
t.deepEqual(tokens, [ | ||
{type: 'tag-start', close: false}, | ||
{type: 'tag-start', close: false, position: {start: ps(0)}}, | ||
{type: 'tag', content: 'template'}, | ||
{type: 'tag-end', close: false}, | ||
{type: 'text', content: 'Hello <img/>'}, | ||
{type: 'tag-start', close: true}, | ||
{type: 'tag-end', close: false, position: {end: ps(10)}}, | ||
{type: 'text', content: 'Hello <img/>', position: {start: ps(10), end: ps(22)}}, | ||
{type: 'tag-start', close: true, position: {start: ps(22)}}, | ||
{type: 'tag', content: 'template'}, | ||
{type: 'tag-end', close: false} | ||
{type: 'tag-end', close: false, position: {end: ps(str.length)}} | ||
]) | ||
@@ -83,25 +87,33 @@ }) | ||
const state = {str, cursor: 0, tokens: []} | ||
const state = {str, position: ps(0), tokens: []} | ||
lexText(state) | ||
t.is(state.cursor, finish) | ||
t.is(state.position.index, finish) | ||
const token = state.tokens[0] | ||
t.deepEqual(token, { | ||
type: 'text', | ||
content: 'text that ends' | ||
content: 'text that ends', | ||
position: { | ||
start: ps(0), | ||
end: ps(14) | ||
} | ||
}) | ||
}) | ||
test('lexText should tokenize from the cursor', t => { | ||
test('lexText should tokenize from the current position', t => { | ||
const str = 'abcdtext that ends<x>' | ||
const finish = str.indexOf('<') | ||
const state = {str, cursor: 4, tokens: []} | ||
const state = {str, position: ps(4), tokens: []} | ||
lexText(state) | ||
t.is(state.cursor, finish) | ||
t.is(state.position.index, finish) | ||
const token = state.tokens[0] | ||
t.deepEqual(token, { | ||
type: 'text', | ||
content: 'text that ends' | ||
content: 'text that ends', | ||
position: { | ||
start: ps(4), | ||
end: ps(18) | ||
} | ||
}) | ||
@@ -114,10 +126,14 @@ }) | ||
const state = {str, cursor: 0, tokens: []} | ||
const state = {str, position: ps(0), tokens: []} | ||
lexText(state) | ||
t.is(state.cursor, finish) | ||
t.is(state.position.index, finish) | ||
const token = state.tokens[0] | ||
t.deepEqual(token, { | ||
type: 'text', | ||
content: 'text that does not end' | ||
content: 'text that does not end', | ||
position: { | ||
start: ps(0), | ||
end: ps(str.length) | ||
} | ||
}) | ||
@@ -131,6 +147,6 @@ }) | ||
const state = {str, cursor: start, tokens: []} | ||
const state = {str, position: ps(start), tokens: []} | ||
lexText(state) | ||
t.is(state.cursor, finish) | ||
t.is(state.position.index, finish) | ||
t.is(state.tokens.length, 0) | ||
@@ -142,9 +158,13 @@ }) | ||
const finish = str.indexOf('abcd') | ||
const state = {str, cursor: 0, tokens: []} | ||
const state = {str, position: ps(0), tokens: []} | ||
lexComment(state) | ||
t.is(state.cursor, finish) | ||
t.is(state.position.index, finish) | ||
t.deepEqual(state.tokens[0], { | ||
type: 'comment', | ||
content: ' this is a comment ' | ||
content: ' this is a comment ', | ||
position: { | ||
start: ps(0), | ||
end: ps(finish) | ||
} | ||
}) | ||
@@ -156,22 +176,30 @@ }) | ||
const finish = str.length | ||
const state = {str, cursor: 0, tokens: []} | ||
const state = {str, position: ps(0), tokens: []} | ||
lexComment(state) | ||
t.is(state.cursor, finish) | ||
t.is(state.position.index, finish) | ||
t.deepEqual(state.tokens[0], { | ||
type: 'comment', | ||
content: ' this is a comment' | ||
content: ' this is a comment', | ||
position: { | ||
start: ps(0), | ||
end: ps(finish) | ||
} | ||
}) | ||
}) | ||
test('lexComment should tokenize from cursor', t => { | ||
test('lexComment should tokenize from current position', t => { | ||
const str = 'abcd<!-- comment text --><x>' | ||
const finish = str.indexOf('<x>') | ||
const state = {str, cursor: 4, tokens: []} | ||
const state = {str, position: ps(4), tokens: []} | ||
lexComment(state) | ||
t.is(state.cursor, finish) | ||
t.is(state.position.index, finish) | ||
t.deepEqual(state.tokens[0], { | ||
type: 'comment', | ||
content: ' comment text ' | ||
content: ' comment text ', | ||
position: { | ||
start: ps(4), | ||
end: ps(finish) | ||
} | ||
}) | ||
@@ -183,8 +211,12 @@ }) | ||
const finish = str.length | ||
const state = {str, cursor: 0, tokens: []} | ||
const state = {str, position: ps(0), tokens: []} | ||
lexComment(state) | ||
t.is(state.cursor, finish) | ||
t.is(state.position.index, finish) | ||
t.deepEqual(state.tokens[0], { | ||
type: 'comment', | ||
content: '' | ||
content: '', | ||
position: { | ||
start: ps(0), | ||
end: ps(finish) | ||
} | ||
}) | ||
@@ -196,9 +228,9 @@ }) | ||
const finish = str.indexOf('abcd') | ||
const state = {str, cursor: 0, tokens: []} | ||
const state = {str, position: ps(0), tokens: []} | ||
lexTag(state) | ||
t.is(state.cursor, finish) | ||
t.is(state.position.index, finish) | ||
t.deepEqual(state.tokens, [ | ||
{type: 'tag-start', close: false}, | ||
{type: 'tag-start', close: false, position: {start: ps(0)}}, | ||
{type: 'tag', content: 'img'}, // not a part of this test | ||
{type: 'tag-end', close: true} | ||
{type: 'tag-end', close: true, position: {end: ps(finish)}} | ||
]) | ||
@@ -210,5 +242,5 @@ }) | ||
const finish = 2 | ||
const state = {str, cursor: 0, tokens: []} | ||
const state = {str, position: ps(0), tokens: []} | ||
lexTagName(state) | ||
t.is(state.cursor, finish) | ||
t.is(state.position.index, finish) | ||
t.deepEqual(state.tokens[0], { | ||
@@ -222,5 +254,5 @@ type: 'tag', | ||
const str = '>/ div' | ||
const state = {str, cursor: 0, tokens: []} | ||
const state = {str, position: ps(0), tokens: []} | ||
lexTagName(state) | ||
t.is(state.cursor, str.length) | ||
t.is(state.position.index, str.length) | ||
t.deepEqual(state.tokens[0], { | ||
@@ -235,5 +267,5 @@ type: 'tag', | ||
const finish = str.indexOf('>abcd') | ||
const state = {str, cursor: 0, tokens: []} | ||
const state = {str, position: ps(0), tokens: []} | ||
lexTagAttributes(state) | ||
t.is(state.cursor, finish) | ||
t.is(state.position.index, finish) | ||
t.deepEqual(state.tokens, [ | ||
@@ -249,5 +281,5 @@ {type: 'attribute', content: 'yes="no"'}, | ||
const finish = str.indexOf('>abcd') | ||
const state = {str, cursor: 0, tokens: []} | ||
const state = {str, position: ps(0), tokens: []} | ||
lexTagAttributes(state) | ||
t.is(state.cursor, finish) | ||
t.is(state.position.index, finish) | ||
t.deepEqual(state.tokens, [ | ||
@@ -263,5 +295,5 @@ {type: 'attribute', content: 'yes="no"'}, | ||
const str = '<div foo= bar="baz"></div>' | ||
const state = {str, cursor: 4, tokens: []} | ||
const state = {str, position: ps(4), tokens: []} | ||
lexTagAttributes(state) | ||
t.is(state.cursor, str.indexOf('></div>')) | ||
t.is(state.position.index, str.indexOf('></div>')) | ||
t.deepEqual(state.tokens, [ | ||
@@ -275,5 +307,5 @@ {type: 'attribute', content: 'foo'}, | ||
const str = '<div foo="bar"\nbaz="bat"></div>' | ||
const state = {str, cursor: 4, tokens: []} | ||
const state = {str, position: ps(4), tokens: []} | ||
lexTagAttributes(state) | ||
t.is(state.cursor, str.indexOf('></div>')) | ||
t.is(state.position.index, str.indexOf('></div>')) | ||
t.deepEqual(state.tokens, [ | ||
@@ -287,5 +319,5 @@ {type: 'attribute', content: 'foo="bar"'}, | ||
const str = '<div foo="bar"\tbaz="bat"></div>' | ||
const state = {str, cursor: 4, tokens: []} | ||
const state = {str, position: ps(4), tokens: []} | ||
lexTagAttributes(state) | ||
t.is(state.cursor, str.indexOf('></div>')) | ||
t.is(state.position.index, str.indexOf('></div>')) | ||
t.deepEqual(state.tokens, [ | ||
@@ -300,5 +332,5 @@ {type: 'attribute', content: 'foo="bar"'}, | ||
const finish = str.indexOf('>abcd') | ||
const state = {str, cursor: 0, tokens: []} | ||
const state = {str, position: ps(0), tokens: []} | ||
lexTagAttributes(state) | ||
t.is(state.cursor, finish) | ||
t.is(state.position.index, finish) | ||
t.deepEqual(state.tokens, [ | ||
@@ -312,5 +344,5 @@ {type: 'attribute', content: 'yes="no"'} | ||
const finish = str.indexOf('>abcd') | ||
const state = {str, cursor: 0, tokens: []} | ||
const state = {str, position: ps(0), tokens: []} | ||
lexTagAttributes(state) | ||
t.is(state.cursor, finish) | ||
t.is(state.position.index, finish) | ||
t.deepEqual(state.tokens, [ | ||
@@ -325,5 +357,5 @@ {type: 'attribute', content: 'num=8'}, | ||
const finish = str.indexOf('>abcd') | ||
const state = {str, cursor: 0, tokens: []} | ||
const state = {str, position: ps(0), tokens: []} | ||
lexTagAttributes(state) | ||
t.is(state.cursor, finish) | ||
t.is(state.position.index, finish) | ||
t.deepEqual(state.tokens, [ | ||
@@ -337,10 +369,10 @@ {type: 'attribute', content: 'x'} | ||
const finish = str.indexOf('<x>') | ||
const state = {str, cursor: 10, tokens: []} | ||
const state = {str, position: ps(10), tokens: []} | ||
lexSkipTag('test', state) | ||
t.is(state.cursor, finish) | ||
t.is(state.position.index, finish) | ||
t.deepEqual(state.tokens, [ | ||
{type: 'text', content: '<h1>Test case</h1>'}, | ||
{type: 'tag-start', close: true}, | ||
{type: 'text', content: '<h1>Test case</h1>', position: {start: ps(10), end: ps(28)}}, | ||
{type: 'tag-start', close: true, position: {start: ps(28)}}, | ||
{type: 'tag', content: 'test'}, | ||
{type: 'tag-end', close: false} | ||
{type: 'tag-end', close: false, position: {end: ps(finish)}} | ||
]) | ||
@@ -352,10 +384,10 @@ }) | ||
const finish = str.indexOf('<x>') | ||
const state = {str, cursor: 6, tokens: []} | ||
const state = {str, position: ps(6), tokens: []} | ||
lexSkipTag('tEsT', state) | ||
t.is(state.cursor, finish) | ||
t.is(state.position.index, finish) | ||
t.deepEqual(state.tokens, [ | ||
{type: 'text', content: 'proving <???> the point'}, | ||
{type: 'tag-start', close: true}, | ||
{type: 'text', content: 'proving <???> the point', position: {start: ps(6), end: ps(29)}}, | ||
{type: 'tag-start', close: true, position: {start: ps(29)}}, | ||
{type: 'tag', content: 'TeSt'}, | ||
{type: 'tag-end', close: false} | ||
{type: 'tag-end', close: false, position: {end: ps(finish)}} | ||
]) | ||
@@ -366,7 +398,7 @@ }) | ||
const str = '<script>This never ends' | ||
const state = {str, cursor: 8, tokens: []} | ||
const state = {str, position: ps(8), tokens: []} | ||
lexSkipTag('script', state) | ||
t.is(state.cursor, str.length) | ||
t.is(state.position.index, str.length) | ||
t.deepEqual(state.tokens, [ | ||
{type: 'text', content: 'This never ends'} | ||
{type: 'text', content: 'This never ends', position: {start: ps(8), end: ps(str.length)}} | ||
]) | ||
@@ -377,13 +409,25 @@ }) | ||
const str = '<script>proving </nothing></script>' | ||
const state = {str, cursor: 8, tokens: []} | ||
const state = {str, position: ps(8), tokens: []} | ||
lexSkipTag('script', state) | ||
t.is(state.cursor, str.length) | ||
t.is(state.position.index, str.length) | ||
t.deepEqual(state.tokens, [ | ||
{type: 'text', content: 'proving </nothing>'}, | ||
{type: 'tag-start', close: true}, | ||
{type: 'text', content: 'proving </nothing>', position: {start: ps(8), end: ps(26)}}, | ||
{type: 'tag-start', close: true, position: {start: ps(26)}}, | ||
{type: 'tag', content: 'script'}, | ||
{type: 'tag-end', close: false} | ||
{type: 'tag-end', close: false, position: {end: ps(str.length)}} | ||
]) | ||
}) | ||
test('lexSkipTag should not add an empty inner text node', t => { | ||
const str = '<script></script>' | ||
const state = {str, position: ps(8), tokens: []} | ||
lexSkipTag('script', state) | ||
t.is(state.position.index, str.length) | ||
t.deepEqual(state.tokens, [ | ||
{type: 'tag-start', close: true, position: {start: ps(8)}}, | ||
{type: 'tag', content: 'script'}, | ||
{type: 'tag-end', close: false, position: {end: ps(str.length)}} | ||
]) | ||
}) | ||
test('isWhitespace should work', t => { | ||
@@ -390,0 +434,0 @@ t.is(isWhitespaceChar(' '), true) |
import test from 'ava' | ||
import parser from '../src/parser' | ||
import lexer from '../src/lexer' | ||
import parser from '../lib/parser' | ||
import lexer from '../lib/lexer' | ||
function ps (index) { | ||
return { index, line: 0, column: index } | ||
} | ||
const lexerOptions = { childlessTags: [] } | ||
@@ -24,5 +28,13 @@ const parserOptions = { | ||
type: 'text', | ||
content: 'Hello world' | ||
content: 'Hello world', | ||
position: { | ||
start: ps(4), | ||
end: ps(15) | ||
} | ||
} | ||
] | ||
], | ||
position: { | ||
start: ps(0), | ||
end: ps(str.length) | ||
} | ||
} | ||
@@ -44,3 +56,7 @@ ]) | ||
type: 'text', | ||
content: 'abc' | ||
content: 'abc', | ||
position: { | ||
start: ps(5), | ||
end: ps(8) | ||
} | ||
}, | ||
@@ -51,9 +67,21 @@ { | ||
attributes: [], | ||
children: [] | ||
children: [], | ||
position: { | ||
start: ps(8), | ||
end: ps(14) | ||
} | ||
}, | ||
{ | ||
type: 'text', | ||
content: 'def' | ||
content: 'def', | ||
position: { | ||
start: ps(14), | ||
end: ps(17) | ||
} | ||
} | ||
] | ||
], | ||
position: { | ||
start: ps(0), | ||
end: ps(str.length) | ||
} | ||
} | ||
@@ -81,5 +109,13 @@ ]) | ||
type: 'text', | ||
content: 'This is one' | ||
content: 'This is one', | ||
position: { | ||
start: ps(3), | ||
end: ps(14) | ||
} | ||
} | ||
] | ||
], | ||
position: { | ||
start: ps(0), | ||
end: ps(14) | ||
} | ||
}, | ||
@@ -93,5 +129,13 @@ { | ||
type: 'text', | ||
content: 'This is two' | ||
content: 'This is two', | ||
position: { | ||
start: ps(17), | ||
end: ps(28) | ||
} | ||
} | ||
] | ||
], | ||
position: { | ||
start: ps(14), | ||
end: ps(str.length) | ||
} | ||
} | ||
@@ -118,3 +162,7 @@ ]) | ||
type: 'text', | ||
content: 'This is one ' | ||
content: 'This is one ', | ||
position: { | ||
start: ps(3), | ||
end: ps(15) | ||
} | ||
}, | ||
@@ -128,7 +176,19 @@ { | ||
type: 'text', | ||
content: 'okay' | ||
content: 'okay', | ||
position: { | ||
start: ps(21), | ||
end: ps(25) | ||
} | ||
} | ||
] | ||
], | ||
position: { | ||
start: ps(15), | ||
end: ps(25) | ||
} | ||
} | ||
] | ||
], | ||
position: { | ||
start: ps(0), | ||
end: ps(25) | ||
} | ||
}, | ||
@@ -142,5 +202,13 @@ { | ||
type: 'text', | ||
content: 'This is two' | ||
content: 'This is two', | ||
position: { | ||
start: ps(28), | ||
end: ps(39) | ||
} | ||
} | ||
] | ||
], | ||
position: { | ||
start: ps(25), | ||
end: ps(43) | ||
} | ||
} | ||
@@ -165,6 +233,14 @@ ]) | ||
attributes: [], | ||
position: { | ||
start: ps(0), | ||
end: ps(36) | ||
}, | ||
children: [ | ||
{ | ||
type: 'text', | ||
content: 'This is ' | ||
content: 'This is ', | ||
position: { | ||
start: ps(5), | ||
end: ps(13) | ||
} | ||
}, | ||
@@ -175,6 +251,14 @@ { | ||
attributes: [], | ||
position: { | ||
start: ps(13), | ||
end: ps(30) | ||
}, | ||
children: [ | ||
{ | ||
type: 'text', | ||
content: 'one ' | ||
content: 'one ', | ||
position: { | ||
start: ps(16), | ||
end: ps(20) | ||
} | ||
}, | ||
@@ -185,6 +269,14 @@ { | ||
attributes: [], | ||
position: { | ||
start: ps(20), | ||
end: ps(30) | ||
}, | ||
children: [ | ||
{ | ||
type: 'text', | ||
content: 'okay' | ||
content: 'okay', | ||
position: { | ||
start: ps(26), | ||
end: ps(30) | ||
} | ||
} | ||
@@ -215,2 +307,6 @@ ] | ||
attributes: ['class="cake"', 'data-key="abc"', 'disabled'], | ||
position: { | ||
start: ps(0), | ||
end: ps(48) | ||
}, | ||
children: [] | ||
@@ -230,6 +326,14 @@ } | ||
attributes: [], | ||
position: { | ||
start: ps(0), | ||
end: ps(str.length) | ||
}, | ||
children: [ | ||
{ | ||
type: 'text', | ||
content: 'abc' | ||
content: 'abc', | ||
position: { | ||
start: ps(5), | ||
end: ps(str.length) | ||
} | ||
} | ||
@@ -250,2 +354,6 @@ ] | ||
attributes: [], | ||
position: { | ||
start: ps(0), | ||
end: ps(str.length) | ||
}, | ||
children: [] | ||
@@ -265,6 +373,14 @@ } | ||
attributes: [], | ||
position: { | ||
start: ps(0), | ||
end: ps(14) | ||
}, | ||
children: [ | ||
{ | ||
type: 'text', | ||
content: 'abc' | ||
content: 'abc', | ||
position: { | ||
start: ps(5), | ||
end: ps(8) | ||
} | ||
} | ||
@@ -275,3 +391,7 @@ ] | ||
type: 'text', | ||
content: 'def' | ||
content: 'def', | ||
position: { | ||
start: ps(14), | ||
end: ps(17) | ||
} | ||
} | ||
@@ -308,2 +428,6 @@ ]) | ||
attributes: [], | ||
position: { | ||
start: ps(0), | ||
end: ps(42) | ||
}, | ||
children: [ | ||
@@ -314,6 +438,14 @@ { | ||
attributes: [], | ||
position: { | ||
start: ps(4), | ||
end: ps(37) | ||
}, | ||
children: [ | ||
{ | ||
type: 'text', | ||
content: 'abc' | ||
content: 'abc', | ||
position: { | ||
start: ps(8), | ||
end: ps(11) | ||
} | ||
}, | ||
@@ -324,2 +456,6 @@ { | ||
attributes: [], | ||
position: { | ||
start: ps(11), | ||
end: ps(32) | ||
}, | ||
children: [ | ||
@@ -330,6 +466,14 @@ { | ||
attributes: [], | ||
position: { | ||
start: ps(15), | ||
end: ps(27) | ||
}, | ||
children: [ | ||
{ | ||
type: 'text', | ||
content: 'def' | ||
content: 'def', | ||
position: { | ||
start: ps(19), | ||
end: ps(22) | ||
} | ||
} | ||
@@ -363,2 +507,6 @@ ] | ||
attributes: [], | ||
position: { | ||
start: ps(0), | ||
end: ps(55) | ||
}, | ||
children: [ | ||
@@ -369,6 +517,14 @@ { | ||
attributes: [], | ||
position: { | ||
start: ps(4), | ||
end: ps(50) | ||
}, | ||
children: [ | ||
{ | ||
type: 'text', | ||
content: 'abc' | ||
content: 'abc', | ||
position: { | ||
start: ps(8), | ||
end: ps(11) | ||
} | ||
}, | ||
@@ -379,2 +535,6 @@ { | ||
attributes: [], | ||
position: { | ||
start: ps(11), | ||
end: ps(45) | ||
}, | ||
children: [ | ||
@@ -385,2 +545,6 @@ { | ||
attributes: [], | ||
position: { | ||
start: ps(15), | ||
end: ps(40) | ||
}, | ||
children: [ | ||
@@ -391,6 +555,14 @@ { | ||
attributes: [], | ||
position: { | ||
start: ps(21), | ||
end: ps(33) | ||
}, | ||
children: [ | ||
{ | ||
type: 'text', | ||
content: 'def' | ||
content: 'def', | ||
position: { | ||
start: ps(25), | ||
end: ps(28) | ||
} | ||
} | ||
@@ -426,2 +598,6 @@ ] | ||
attributes: [], | ||
position: { | ||
start: ps(0), | ||
end: ps(49) | ||
}, | ||
children: [ | ||
@@ -432,6 +608,14 @@ { | ||
attributes: [], | ||
position: { | ||
start: ps(4), | ||
end: ps(44) | ||
}, | ||
children: [ | ||
{ | ||
type: 'text', | ||
content: 'abc' | ||
content: 'abc', | ||
position: { | ||
start: ps(8), | ||
end: ps(11) | ||
} | ||
}, | ||
@@ -442,2 +626,6 @@ { | ||
attributes: [], | ||
position: { | ||
start: ps(11), | ||
end: ps(39) | ||
}, | ||
children: [ | ||
@@ -448,6 +636,14 @@ { | ||
attributes: [], | ||
position: { | ||
start: ps(15), | ||
end: ps(22) | ||
}, | ||
children: [ | ||
{ | ||
type: 'text', | ||
content: 'def' | ||
content: 'def', | ||
position: { | ||
start: ps(19), | ||
end: ps(22) | ||
} | ||
} | ||
@@ -460,6 +656,14 @@ ] | ||
attributes: [], | ||
position: { | ||
start: ps(22), | ||
end: ps(34) | ||
}, | ||
children: [ | ||
{ | ||
type: 'text', | ||
content: 'ghi' | ||
content: 'ghi', | ||
position: { | ||
start: ps(26), | ||
end: ps(29) | ||
} | ||
} | ||
@@ -497,2 +701,6 @@ ] | ||
attributes: [], | ||
position: { | ||
start: ps(0), | ||
end: ps(96) | ||
}, | ||
children: [ | ||
@@ -503,2 +711,6 @@ { | ||
attributes: [], | ||
position: { | ||
start: ps(7), | ||
end: ps(88) | ||
}, | ||
children: [ | ||
@@ -509,2 +721,6 @@ { | ||
attributes: [], | ||
position: { | ||
start: ps(14), | ||
end: ps(80) | ||
}, | ||
children: [ | ||
@@ -515,2 +731,6 @@ { | ||
attributes: [], | ||
position: { | ||
start: ps(18), | ||
end: ps(75) | ||
}, | ||
children: [ | ||
@@ -521,2 +741,6 @@ { | ||
attributes: [], | ||
position: { | ||
start: ps(22), | ||
end: ps(70) | ||
}, | ||
children: [ | ||
@@ -527,2 +751,6 @@ { | ||
attributes: [], | ||
position: { | ||
start: ps(29), | ||
end: ps(62) | ||
}, | ||
children: [ | ||
@@ -533,2 +761,6 @@ { | ||
attributes: [], | ||
position: { | ||
start: ps(36), | ||
end: ps(54) | ||
}, | ||
children: [ | ||
@@ -539,2 +771,6 @@ { | ||
attributes: [], | ||
position: { | ||
start: ps(40), | ||
end: ps(49) | ||
}, | ||
children: [] | ||
@@ -569,6 +805,10 @@ } | ||
{ | ||
'type': 'text', | ||
'content': 'x' | ||
type: 'text', | ||
content: 'x', | ||
position: { | ||
start: ps(4), | ||
end: ps(str.length) | ||
} | ||
} | ||
]) | ||
}) |
@@ -72,1 +72,21 @@ # Himalaya AST Specification - Version 1 | ||
A `text` node. | ||
## Positions | ||
The parser can be configured to emit line, column, and index numbers for nodes. | ||
The `includePositions: true` parse option adds the `position` field: | ||
```ts | ||
interface Position { | ||
index: number; | ||
line: number; | ||
column: number; | ||
} | ||
interface Node { | ||
type: string; | ||
position: { | ||
start: Position; | ||
end: Position; | ||
} | ||
} | ||
``` |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
383439
3338
139