@bbob/parser
Advanced tools
Comparing version 1.0.5 to 1.0.7
@@ -14,2 +14,3 @@ const N = '\n'.charCodeAt(0); | ||
const SLASH = '/'.charCodeAt(0); | ||
const BACKSLASH = '\\'.charCodeAt(0); | ||
@@ -35,2 +36,3 @@ const PLACEHOLDER_SPACE_TAB = ' '; | ||
PLACEHOLDER_SPACE, | ||
BACKSLASH, | ||
}; |
@@ -15,10 +15,6 @@ const { | ||
const { | ||
SLASH, | ||
getChar, | ||
} = require('./char'); | ||
const Tokenizer = require('./Tokenizer'); | ||
const TagNode = require('./TagNode'); | ||
const createTagNode = (tag, attrs = {}, content = []) => ({ tag, attrs, content }); | ||
const createTagNode = (tag, attrs = {}, content = []) => new TagNode(tag, attrs, content); | ||
@@ -37,7 +33,3 @@ /** | ||
constructor(input, options = {}) { | ||
this.tokenizer = new Tokenizer(input, { | ||
onToken: (token) => { | ||
this.parseToken(token); | ||
}, | ||
}); | ||
this.createTokenizer(input); | ||
@@ -48,13 +40,24 @@ this.options = options; | ||
this.nestedNodes = []; | ||
this.curTags = []; | ||
this.curTagsAttrName = []; | ||
this.tagNodes = []; | ||
this.tagNodesAttrName = []; | ||
} | ||
isNestedTag(token) { | ||
createTokenizer(input) { | ||
this.tokenizer = new Tokenizer(input, { | ||
onToken: (token) => { | ||
this.parseToken(token); | ||
}, | ||
}); | ||
} | ||
isTagNested(token) { | ||
return this.tokenizer.isTokenNested(token); | ||
} | ||
getCurTag() { | ||
if (this.curTags.length) { | ||
return this.curTags[this.curTags.length - 1]; | ||
/** | ||
* @return {TagNode} | ||
*/ | ||
getTagNode() { | ||
if (this.tagNodes.length) { | ||
return this.tagNodes[this.tagNodes.length - 1]; | ||
} | ||
@@ -65,29 +68,29 @@ | ||
createCurTag(token) { | ||
this.curTags.push(createTagNode(getTokenValue(token))); | ||
createTagNode(token) { | ||
this.tagNodes.push(createTagNode(getTokenValue(token))); | ||
} | ||
createCurTagAttrName(token) { | ||
this.curTagsAttrName.push(getTokenValue(token)); | ||
createTagNodeAttrName(token) { | ||
this.tagNodesAttrName.push(getTokenValue(token)); | ||
} | ||
getCurTagAttrName() { | ||
if (this.curTagsAttrName.length) { | ||
return this.curTagsAttrName[this.curTagsAttrName.length - 1]; | ||
getTagNodeAttrName() { | ||
if (this.tagNodesAttrName.length) { | ||
return this.tagNodesAttrName[this.tagNodesAttrName.length - 1]; | ||
} | ||
return this.getCurTag().tag; | ||
return this.getTagNode().tag; | ||
} | ||
clearCurTagAttrName() { | ||
if (this.curTagsAttrName.length) { | ||
this.curTagsAttrName.pop(); | ||
clearTagNodeAttrName() { | ||
if (this.tagNodesAttrName.length) { | ||
this.tagNodesAttrName.pop(); | ||
} | ||
} | ||
clearCurTag() { | ||
if (this.curTags.length) { | ||
this.curTags.pop(); | ||
clearTagNode() { | ||
if (this.tagNodes.length) { | ||
this.tagNodes.pop(); | ||
this.clearCurTagAttrName(); | ||
this.clearTagNodeAttrName(); | ||
} | ||
@@ -111,9 +114,9 @@ } | ||
if (isTagStart(token)) { | ||
this.createCurTag(token); | ||
this.createTagNode(token); | ||
if (this.isNestedTag(token)) { | ||
this.nestedNodes.push(this.getCurTag()); | ||
if (this.isTagNested(token)) { | ||
this.nestedNodes.push(this.getTagNode()); | ||
} else { | ||
this.appendNode(this.getCurTag()); | ||
this.clearCurTag(); | ||
this.appendNode(this.getTagNode()); | ||
this.clearTagNode(); | ||
} | ||
@@ -125,3 +128,3 @@ } | ||
if (isTagEnd(token)) { | ||
this.clearCurTag(); | ||
this.clearTagNode(); | ||
@@ -153,12 +156,14 @@ const lastNestedNode = this.nestedNodes.pop(); | ||
handleCurTag(token) { | ||
if (this.getCurTag()) { | ||
handleTagNode(token) { | ||
const tagNode = this.getTagNode(); | ||
if (tagNode) { | ||
if (isAttrNameToken(token)) { | ||
this.createCurTagAttrName(token); | ||
this.getCurTag().attrs[this.getCurTagAttrName()] = null; | ||
this.createTagNodeAttrName(token); | ||
tagNode.attr(this.getTagNodeAttrName(), null); | ||
} else if (isAttrValueToken(token)) { | ||
this.getCurTag().attrs[this.getCurTagAttrName()] = getTokenValue(token); | ||
this.clearCurTagAttrName(); | ||
tagNode.attr(this.getTagNodeAttrName(), getTokenValue(token)); | ||
this.clearTagNodeAttrName(); | ||
} else if (isTextToken(token)) { | ||
this.getCurTag().content.push(getTokenValue(token)); | ||
tagNode.append(getTokenValue(token)); | ||
} | ||
@@ -172,3 +177,3 @@ } else if (isTextToken(token)) { | ||
this.handleTagToken(token); | ||
this.handleCurTag(token); | ||
this.handleTagNode(token); | ||
} | ||
@@ -195,20 +200,2 @@ | ||
findNestedTags() { | ||
const tags = (this.tokens || []).filter(isTagToken).reduce((acc, token) => { | ||
acc[getTokenValue(token)] = true; | ||
return acc; | ||
}, {}); | ||
const closeChar = getChar(SLASH); | ||
return Object.keys(tags).reduce((arr, key) => { | ||
if (tags[key] && tags[closeChar + key]) { | ||
arr.push(key); | ||
} | ||
return arr; | ||
}, []); | ||
} | ||
isAllowedTag(value) { | ||
@@ -223,5 +210,3 @@ if (this.options.onlyAllowTags && this.options.onlyAllowTags.length) { | ||
new Parser('[Verse 2]').parse(); | ||
module.exports = Parser; | ||
module.exports.createTagNode = createTagNode; |
@@ -25,8 +25,7 @@ const { | ||
const isTextToken = (token) => { | ||
const type = token[TOKEN_TYPE_ID]; | ||
const isTextToken = token => | ||
token[TOKEN_TYPE_ID] === TOKEN_TYPE_SPACE || | ||
token[TOKEN_TYPE_ID] === TOKEN_TYPE_NEW_LINE || | ||
token[TOKEN_TYPE_ID] === TOKEN_TYPE_WORD; | ||
return type === TOKEN_TYPE_SPACE || type === TOKEN_TYPE_NEW_LINE || type === TOKEN_TYPE_WORD; | ||
}; | ||
const isTagToken = token => token[TOKEN_TYPE_ID] === TOKEN_TYPE_TAG; | ||
@@ -65,2 +64,6 @@ const isTagEnd = token => getTokenValue(token).charCodeAt(0) === SLASH; | ||
isEmpty() { | ||
return !!this.type; | ||
} | ||
isText() { | ||
@@ -67,0 +70,0 @@ return isTextToken(this); |
@@ -7,2 +7,3 @@ const { | ||
SLASH, | ||
BACKSLASH, | ||
} = require('./char'); | ||
@@ -18,7 +19,8 @@ const Token = require('./Token'); | ||
this.rowPos = 0; | ||
this.index = 0; | ||
// eslint-disable-next-line no-bitwise | ||
this.index = 2 ** 32; | ||
this.tokenIndex = -1; | ||
this.tokens = new Array(Math.floor(this.buffer.length)); | ||
this.dummyToken = createTokenOfType('', '', '', ''); | ||
this.dummyToken = null; // createTokenOfType('', '', '', ''); | ||
@@ -32,2 +34,14 @@ this.wordToken = this.dummyToken; | ||
this.options = options; | ||
this.charMap = { | ||
[TAB]: this.charSPACE.bind(this), | ||
[SPACE]: this.charSPACE.bind(this), | ||
[N]: this.charN.bind(this), | ||
[OPEN_BRAKET]: this.charOPENBRAKET.bind(this), | ||
[CLOSE_BRAKET]: this.charCLOSEBRAKET.bind(this), | ||
[EQ]: this.charEQ.bind(this), | ||
[QUOTEMARK]: this.charQUOTEMARK.bind(this), | ||
[BACKSLASH]: this.charBACKSLASH.bind(this), | ||
default: this.charWORD.bind(this), | ||
}; | ||
} | ||
@@ -47,2 +61,11 @@ | ||
skipChar(num) { | ||
this.index += num; | ||
this.colPos += num; | ||
} | ||
seekChar(num) { | ||
return this.buffer.charCodeAt(this.index + num); | ||
} | ||
nextCol() { | ||
@@ -57,3 +80,3 @@ this.colPos += 1; | ||
flushWord() { | ||
if (this.wordToken[Token.TYPE_ID] && this.wordToken[Token.VALUE_ID]) { | ||
if (this.inWord() && this.wordToken[Token.VALUE_ID]) { | ||
this.appendToken(this.wordToken); | ||
@@ -65,3 +88,3 @@ this.wordToken = this.createWordToken(''); | ||
createWord(value, line, row) { | ||
if (this.wordToken[Token.TYPE_ID] === '') { | ||
if (!this.inWord()) { | ||
this.wordToken = this.createWordToken(value, line, row); | ||
@@ -72,6 +95,6 @@ } | ||
flushTag() { | ||
if (this.tagToken[Token.TYPE_ID]) { | ||
if (this.inTag()) { | ||
// [] and [=] tag case | ||
if (this.tagToken[Token.VALUE_ID] === '') { | ||
const value = this.attrValueToken[Token.TYPE_ID] ? getChar(EQ) : ''; | ||
const value = this.inAttrValue() ? getChar(EQ) : ''; | ||
const word = getChar(OPEN_BRAKET) + value + getChar(CLOSE_BRAKET); | ||
@@ -84,3 +107,3 @@ | ||
if (this.attrValueToken[Token.TYPE_ID]) { | ||
if (this.inAttrValue()) { | ||
this.attrValueToken = this.dummyToken; | ||
@@ -92,3 +115,3 @@ } | ||
if (this.attrNameToken[Token.TYPE_ID] && !this.attrValueToken[Token.TYPE_ID]) { | ||
if (this.inAttrName() && !this.inAttrValue()) { | ||
this.tagToken[Token.VALUE_ID] += PLACEHOLDER_SPACE + this.attrNameToken[Token.VALUE_ID]; | ||
@@ -104,4 +127,4 @@ this.attrNameToken = this.dummyToken; | ||
flushUnclosedTag() { | ||
if (this.tagToken[Token.TYPE_ID]) { | ||
const value = this.tagToken[Token.VALUE_ID] + (this.attrValueToken[Token.VALUE_ID] ? getChar(EQ) : ''); | ||
if (this.inTag()) { | ||
const value = this.tagToken[Token.VALUE_ID] + (this.attrValueToken && this.attrValueToken[Token.VALUE_ID] ? getChar(EQ) : ''); | ||
@@ -115,3 +138,3 @@ this.tagToken[Token.TYPE_ID] = Token.TYPE_WORD; | ||
if (this.attrValueToken[Token.TYPE_ID]) { | ||
if (this.inAttrValue()) { | ||
this.attrValueToken = this.dummyToken; | ||
@@ -123,3 +146,3 @@ } | ||
flushAttrNames() { | ||
if (this.attrNameToken[Token.TYPE_ID]) { | ||
if (this.inAttrName()) { | ||
this.attrTokens.push(this.attrNameToken); | ||
@@ -129,3 +152,4 @@ this.attrNameToken = this.dummyToken; | ||
if (this.attrValueToken[Token.TYPE_ID]) { | ||
if (this.inAttrValue()) { | ||
this.attrValueToken.quoted = undefined; | ||
this.attrTokens.push(this.attrValueToken); | ||
@@ -144,9 +168,14 @@ this.attrValueToken = this.dummyToken; | ||
charSPACE(charCode) { | ||
const spaceCode = charCode === TAB ? PLACEHOLDER_SPACE_TAB : PLACEHOLDER_SPACE; | ||
this.flushWord(); | ||
if (this.tagToken[Token.TYPE_ID]) { | ||
this.attrNameToken = this.createAttrNameToken(''); | ||
if (this.inTag()) { | ||
if (this.inAttrValue() && this.attrValueToken.quoted) { | ||
this.attrValueToken[Token.VALUE_ID] += spaceCode; | ||
} else { | ||
this.flushAttrNames(); | ||
this.attrNameToken = this.createAttrNameToken(''); | ||
} | ||
} else { | ||
const spaceCode = charCode === TAB ? PLACEHOLDER_SPACE_TAB : PLACEHOLDER_SPACE; | ||
this.appendToken(this.createSpaceToken(spaceCode)); | ||
@@ -173,12 +202,19 @@ } | ||
charCLOSEBRAKET() { | ||
this.nextCol(); | ||
this.flushTag(); | ||
this.flushAttrNames(); | ||
this.flushAttrs(); | ||
this.nextCol(); | ||
} | ||
charEQ(charCode) { | ||
if (this.tagToken[Token.TYPE_ID]) { | ||
const nextCharCode = this.seekChar(1); | ||
const isNextQuotemark = nextCharCode === QUOTEMARK; | ||
if (this.inTag()) { | ||
this.attrValueToken = this.createAttrValueToken(''); | ||
if (isNextQuotemark) { | ||
this.attrValueToken.quoted = true; | ||
this.skipChar(1); | ||
} | ||
} else { | ||
@@ -192,5 +228,11 @@ this.wordToken[Token.VALUE_ID] += getChar(charCode); | ||
charQUOTEMARK(charCode) { | ||
if (this.attrValueToken[Token.TYPE_ID] && this.attrValueToken[Token.VALUE_ID] > 0) { | ||
const prevCharCode = this.seekChar(-1); | ||
const isPrevBackslash = prevCharCode === BACKSLASH; | ||
if (this.inAttrValue() && | ||
this.attrValueToken[Token.VALUE_ID] && | ||
this.attrValueToken.quoted && | ||
!isPrevBackslash) { | ||
this.flushAttrNames(); | ||
} else if (this.tagToken[Token.TYPE_ID] === '') { | ||
} else if (!this.inTag()) { | ||
this.wordToken[Token.VALUE_ID] += getChar(charCode); | ||
@@ -202,9 +244,27 @@ } | ||
charBACKSLASH() { | ||
const nextCharCode = this.seekChar(1); | ||
const isNextQuotemark = nextCharCode === QUOTEMARK; | ||
if (this.inAttrValue() && | ||
this.attrValueToken[Token.VALUE_ID] && | ||
this.attrValueToken.quoted && | ||
isNextQuotemark | ||
) { | ||
this.attrValueToken[Token.VALUE_ID] += getChar(nextCharCode); | ||
this.skipChar(1); | ||
} | ||
this.nextCol(); | ||
} | ||
charWORD(charCode) { | ||
if (this.tagToken[Token.TYPE_ID] && this.attrValueToken[Token.TYPE_ID]) { | ||
this.attrValueToken[Token.VALUE_ID] += getChar(charCode); | ||
} else if (this.tagToken[Token.TYPE_ID] && this.attrNameToken[Token.TYPE_ID]) { | ||
this.attrNameToken[Token.VALUE_ID] += getChar(charCode); | ||
} else if (this.tagToken[Token.TYPE_ID]) { | ||
this.tagToken[Token.VALUE_ID] += getChar(charCode); | ||
if (this.inTag()) { | ||
if (this.inAttrValue()) { | ||
this.attrValueToken[Token.VALUE_ID] += getChar(charCode); | ||
} else if (this.inAttrName()) { | ||
this.attrNameToken[Token.VALUE_ID] += getChar(charCode); | ||
} else { | ||
this.tagToken[Token.VALUE_ID] += getChar(charCode); | ||
} | ||
} else { | ||
@@ -220,37 +280,10 @@ this.createWord(); | ||
tokenize() { | ||
this.index = 0; | ||
while (this.index < this.buffer.length) { | ||
const charCode = this.buffer.charCodeAt(this.index); | ||
switch (charCode) { | ||
case TAB: | ||
case SPACE: | ||
this.charSPACE(charCode); | ||
break; | ||
(this.charMap[charCode] || this.charMap.default)(charCode); | ||
case N: | ||
this.charN(charCode); | ||
break; | ||
case OPEN_BRAKET: | ||
this.charOPENBRAKET(); | ||
break; | ||
case CLOSE_BRAKET: | ||
this.charCLOSEBRAKET(); | ||
break; | ||
case EQ: | ||
this.charEQ(charCode); | ||
break; | ||
case QUOTEMARK: | ||
this.charQUOTEMARK(charCode); | ||
break; | ||
default: | ||
this.charWORD(charCode); | ||
break; | ||
} | ||
this.index += 1; | ||
// eslint-disable-next-line no-plusplus | ||
++this.index; | ||
} | ||
@@ -266,2 +299,18 @@ | ||
inWord() { | ||
return this.wordToken && this.wordToken[Token.TYPE_ID]; | ||
} | ||
inTag() { | ||
return this.tagToken && this.tagToken[Token.TYPE_ID]; | ||
} | ||
inAttrValue() { | ||
return this.attrValueToken && this.attrValueToken[Token.TYPE_ID]; | ||
} | ||
inAttrName() { | ||
return this.attrNameToken && this.attrNameToken[Token.TYPE_ID]; | ||
} | ||
createWordToken(value = '', line = this.colPos, row = this.rowPos) { | ||
@@ -297,5 +346,2 @@ return createTokenOfType(Token.TYPE_WORD, value, line, row); | ||
// warm up tokenizer to elimitate code branches that never execute | ||
new Tokenizer('[b param="hello"]Sample text[/b]\n\t[Chorus 2] x html([a. title][, alt][, classes]) x [=] [/y]').tokenize(); | ||
module.exports = Tokenizer; | ||
@@ -302,0 +348,0 @@ module.exports.createTokenOfType = createTokenOfType; |
{ | ||
"name": "@bbob/parser", | ||
"version": "1.0.5", | ||
"version": "1.0.7", | ||
"description": "Fast, flexible, and lean implementation of BBcode parser", | ||
@@ -21,3 +21,4 @@ "homepage": "https://github.com/JiLiZART/bbob", | ||
], | ||
"main": "./lib/index.js", | ||
"main": "lib/index.js", | ||
"browser": "dist/umd.js", | ||
"repository": { | ||
@@ -28,2 +29,4 @@ "type": "git", | ||
"scripts": { | ||
"build": "../../node_modules/.bin/rollup -c", | ||
"dev": "../../node_modules/.bin/rollup -c -w", | ||
"test": "../../node_modules/.bin/jest --", | ||
@@ -38,3 +41,5 @@ "cover": "../../node_modules/.bin/jest --coverage", | ||
}, | ||
"files": ["lib"] | ||
"files": [ | ||
"lib" | ||
] | ||
} |
@@ -1,2 +0,15 @@ | ||
# bbob-parser | ||
Fast BB Code parser written in pure javascript, no dependencies | ||
# @bbob/parser | ||
Parses BBCode and returns AST Tree looks like | ||
```json | ||
[ | ||
{ | ||
tag: 'url', | ||
attrs: { | ||
url: 'https://github.com/JiLiZART/bbob/tree/master/packages/bbob-parser' | ||
}, | ||
content: ['hello', ' ', 'world!'] | ||
} | ||
] | ||
``` |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
40554
10
1292
16
1