@webkitty/searchbox
Advanced tools
Comparing version 0.0.3 to 0.0.4
"use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
const MooLexer = require("moo"); | ||
const TOKEN_OP = 'OP'; | ||
const TOKEN_KEY = 'KEY'; | ||
const TOKEN_VALUE = 'VALUE'; | ||
const TOKEN_FULLTEXT = 'FULLTEXT'; | ||
const TYPE_OP = 'OP'; | ||
const TYPE_SEP = 'SEP'; | ||
const TYPE_KEY = 'KEY'; | ||
const TYPE_WORD = 'WORD'; | ||
const TYPE_CHAIN_PAIR = [TYPE_KEY, TYPE_SEP, TYPE_WORD].join('|'); | ||
const TYPE_CHAIN_OP_PAIR = [TYPE_OP, TYPE_KEY, TYPE_SEP, TYPE_WORD].join('|'); | ||
function parse(input, opts) { | ||
const keywords = opts ? (opts.keywords || []) : []; | ||
const parser = new Parser(); | ||
const lexer = new Lexer(keywords, parser.parse.bind(parser)); | ||
const lexer = new Lexer(keywords, parser.addToken.bind(parser)); | ||
lexer.lex(input); | ||
return parser.getFormula(); | ||
return parser.parse(); | ||
} | ||
@@ -41,27 +43,40 @@ exports.parse = parse; | ||
constructor() { | ||
this.query = new Formula(); | ||
this.formula = new Formula(); | ||
this.tokens = []; | ||
} | ||
getFormula() { | ||
return this.query; | ||
addToken(token) { | ||
this.tokens.push(token); | ||
} | ||
parse(token) { | ||
switch (token.type) { | ||
case TOKEN_OP: | ||
this.lastOp = token.value; | ||
break; | ||
case TOKEN_KEY: | ||
this.lastKey = token.value; | ||
break; | ||
case TOKEN_VALUE: | ||
this.append(this.lastKey, token.value, this.lastOp); | ||
this.lastOp = undefined; | ||
this.lastKey = undefined; | ||
break; | ||
case TOKEN_FULLTEXT: | ||
this.append('fulltext', token.value); | ||
break; | ||
parse() { | ||
let tupleSize; | ||
for (let i = 0; i < this.tokens.length; i += tupleSize) { | ||
const t1 = this.tokens[i]; | ||
const t2 = i + 1 < this.tokens.length ? this.tokens[i + 1] : undefined; | ||
const t3 = i + 2 < this.tokens.length ? this.tokens[i + 2] : undefined; | ||
const t4 = i + 3 < this.tokens.length ? this.tokens[i + 3] : undefined; | ||
tupleSize = this.parseTuple(t1, t2, t3, t4); | ||
} | ||
return this.formula; | ||
} | ||
parseTuple(t1, t2, t3, t4) { | ||
const t1Type = t1.type; | ||
const t2Type = t2 ? t2.type : undefined; | ||
const t3Type = t3 ? t3.type : undefined; | ||
const t4Type = t4 ? t4.type : undefined; | ||
const typeChain = [t1Type, t2Type, t3Type, t4Type].join('|'); | ||
if (typeChain.startsWith(TYPE_CHAIN_PAIR)) { | ||
this.append(t1.value, t3.value); | ||
return 3; | ||
} | ||
else if (typeChain.startsWith(TYPE_CHAIN_OP_PAIR)) { | ||
this.append(t2.value, t4.value, t1.value); | ||
return 4; | ||
} | ||
else { | ||
this.append('fulltext', t1.value); | ||
return 1; | ||
} | ||
} | ||
append(key, value, op) { | ||
this.query.append(key, value, op); | ||
this.formula.append(key, value, op); | ||
} | ||
@@ -78,3 +93,2 @@ } | ||
const SEP = /:/u; | ||
const KEY = new RegExp(keywords.join('|'), 'u'); | ||
const WORD = new RegExp("(?<=^|[ \f\n\r\t\v.,'\"+\\-!?:]+)(?:.+?)(?=$|[ \f\n\r\t\v.,'\"\+\\-!?:;]+)", "u"); | ||
@@ -84,26 +98,12 @@ const WORDS_SQ = /'.*?'/u; | ||
const NON_WORD = /\W+?/u; | ||
this.lexer = MooLexer.states({ | ||
init: { | ||
WS: WS, | ||
[TOKEN_OP]: { match: OP }, | ||
[TOKEN_KEY]: { | ||
match: KEY, | ||
type: MooLexer.keywords({ KEY: keywords }), | ||
push: 'pair' | ||
}, | ||
[TOKEN_FULLTEXT]: [ | ||
{ match: WORDS_SQ, value: x => x.slice(1, -1) }, | ||
{ match: WORDS_DQ, value: x => x.slice(1, -1) }, | ||
{ match: WORD, lineBreaks: true }, | ||
], | ||
NON_WORD: { match: NON_WORD, lineBreaks: true }, | ||
}, | ||
pair: { | ||
SEP: { match: SEP }, | ||
[TOKEN_VALUE]: [ | ||
{ match: WORDS_SQ, value: x => x.slice(1, -1), pop: 1 }, | ||
{ match: WORDS_DQ, value: x => x.slice(1, -1), pop: 1 }, | ||
{ match: WORD, lineBreaks: true, pop: 1 }, | ||
], | ||
}, | ||
this.lexer = MooLexer.compile({ | ||
WS: WS, | ||
[TYPE_OP]: { match: OP }, | ||
[TYPE_SEP]: { match: SEP }, | ||
[TYPE_WORD]: [ | ||
{ match: WORDS_SQ, value: x => x.slice(1, -1) }, | ||
{ match: WORDS_DQ, value: x => x.slice(1, -1) }, | ||
{ match: WORD, type: MooLexer.keywords({ [TYPE_KEY]: keywords }) }, | ||
], | ||
NON_WORD: { match: NON_WORD, lineBreaks: true }, | ||
}); | ||
@@ -116,6 +116,6 @@ } | ||
switch (token.type) { | ||
case TOKEN_OP: | ||
case TOKEN_KEY: | ||
case TOKEN_VALUE: | ||
case TOKEN_FULLTEXT: | ||
case TYPE_OP: | ||
case TYPE_SEP: | ||
case TYPE_KEY: | ||
case TYPE_WORD: | ||
this.emitToken(token.type, token.value); | ||
@@ -122,0 +122,0 @@ break; |
{ | ||
"name": "@webkitty/searchbox", | ||
"version": "0.0.3", | ||
"version": "0.0.4", | ||
"author": { | ||
@@ -5,0 +5,0 @@ "name": "stuchl4n3k", |
@@ -44,1 +44,3 @@ <h1 align=center>Searchbox</h1> | ||
This project is [MIT](LICENSE) licensed. | ||
[](https://app.fossa.io/projects/git%2Bgithub.com%2Fwebkitty%2Fsearchbox?ref=badge_large) |
125
src/index.ts
import MooLexer = require('moo'); | ||
const TOKEN_OP = 'OP'; | ||
const TOKEN_KEY = 'KEY'; | ||
const TOKEN_VALUE = 'VALUE'; | ||
const TOKEN_FULLTEXT = 'FULLTEXT'; | ||
const TYPE_OP = 'OP'; | ||
const TYPE_SEP = 'SEP'; | ||
const TYPE_KEY = 'KEY'; | ||
const TYPE_WORD = 'WORD'; | ||
const TYPE_CHAIN_PAIR = [TYPE_KEY, TYPE_SEP, TYPE_WORD].join('|'); | ||
const TYPE_CHAIN_OP_PAIR = [TYPE_OP, TYPE_KEY, TYPE_SEP, TYPE_WORD].join('|'); | ||
export {parse} | ||
@@ -20,6 +23,6 @@ | ||
const parser = new Parser(); | ||
const lexer = new Lexer(keywords, parser.parse.bind(parser)); | ||
const lexer = new Lexer(keywords, parser.addToken.bind(parser)); | ||
lexer.lex(input); | ||
return parser.getFormula(); | ||
return parser.parse(); | ||
} | ||
@@ -63,30 +66,42 @@ | ||
private query: Formula = new Formula(); | ||
private lastOp?: string; | ||
private lastKey?: string; | ||
private formula: Formula = new Formula(); | ||
private tokens: LexerToken[] = []; | ||
public getFormula(): Formula { | ||
return this.query; | ||
public addToken(token: LexerToken): void { | ||
// console.debug(`Parsing token #${token.type}# : #${token.value}#`); | ||
this.tokens.push(token); | ||
} | ||
public parse(token: LexerToken): void { | ||
// console.debug(`Parsing token #${token.type}# : #${token.value}#`); | ||
switch (token.type) { | ||
case TOKEN_OP: | ||
this.lastOp = token.value; | ||
break; | ||
public parse(): Formula { | ||
let tupleSize: number; | ||
case TOKEN_KEY: | ||
this.lastKey = token.value; | ||
break; | ||
for (let i = 0; i < this.tokens.length; i += tupleSize) { | ||
const t1 = this.tokens[i]; | ||
const t2 = i + 1 < this.tokens.length ? this.tokens[i + 1] : undefined; | ||
const t3 = i + 2 < this.tokens.length ? this.tokens[i + 2] : undefined; | ||
const t4 = i + 3 < this.tokens.length ? this.tokens[i + 3] : undefined; | ||
case TOKEN_VALUE: | ||
this.append(this.lastKey!, token.value, this.lastOp); | ||
this.lastOp = undefined; | ||
this.lastKey = undefined; | ||
break; | ||
tupleSize = this.parseTuple(t1, t2, t3, t4); | ||
} | ||
case TOKEN_FULLTEXT: | ||
this.append('fulltext', token.value); | ||
break; | ||
return this.formula; | ||
} | ||
private parseTuple(t1: LexerToken, t2?: LexerToken, t3?: LexerToken, t4?: LexerToken): number { | ||
const t1Type = t1.type; | ||
const t2Type = t2 ? t2.type : undefined; | ||
const t3Type = t3 ? t3.type : undefined; | ||
const t4Type = t4 ? t4.type : undefined; | ||
const typeChain = [t1Type, t2Type, t3Type, t4Type].join('|'); | ||
if (typeChain.startsWith(TYPE_CHAIN_PAIR)) { | ||
this.append(t1.value, t3!.value); | ||
return 3; | ||
} else if (typeChain.startsWith(TYPE_CHAIN_OP_PAIR)) { | ||
this.append(t2!.value, t4!.value, t1.value); | ||
return 4; | ||
} else { | ||
this.append('fulltext', t1.value); | ||
return 1; | ||
} | ||
@@ -96,3 +111,3 @@ } | ||
private append(key: string, value: string, op?: string) { | ||
this.query.append(key, value, op); | ||
this.formula.append(key, value, op); | ||
} | ||
@@ -118,8 +133,7 @@ } | ||
// Define our (unicode) grammar. | ||
// Define our (unicode) vocabulary. | ||
const WS = /[ \t]+/u; | ||
const OP = new RegExp(`${operators.join('|')}(?=(?:${keywords.join('|')}))`, 'u'); | ||
const SEP = /:/u; | ||
const KEY = new RegExp(keywords.join('|'), 'u'); | ||
const WORD = new RegExp("(?<=^|[ \f\n\r\t\v.,'\"+\\-!?:]+)(?:.+?)(?=$|[ \f\n\r\t\v.,'\"\+\\-!?:;]+)", "u") | ||
const WORD = new RegExp("(?<=^|[ \f\n\r\t\v.,'\"+\\-!?:]+)(?:.+?)(?=$|[ \f\n\r\t\v.,'\"\+\\-!?:;]+)", "u"); | ||
const WORDS_SQ = /'.*?'/u; | ||
@@ -129,30 +143,13 @@ const WORDS_DQ = /".*?"/u; | ||
// Define stateful grammar rules. | ||
this.lexer = MooLexer.states({ | ||
// Initial state: primarily we match keywords and operators here. The rest is fulltext. | ||
init: { | ||
WS: WS, | ||
[TOKEN_OP]: {match: OP}, | ||
[TOKEN_KEY]: { | ||
match: KEY, | ||
type: MooLexer.keywords({KEY: keywords}), | ||
push: 'pair' | ||
}, | ||
[TOKEN_FULLTEXT]: [ | ||
{match: WORDS_SQ, value: x => x.slice(1, -1)}, | ||
{match: WORDS_DQ, value: x => x.slice(1, -1)}, | ||
{match: WORD, lineBreaks: true}, | ||
], | ||
NON_WORD: {match: NON_WORD, lineBreaks: true}, | ||
}, | ||
// Pair state: If a keyword was matched, look for the value and move back to init. | ||
pair: { | ||
SEP: {match: SEP}, | ||
[TOKEN_VALUE]: [ | ||
{match: WORDS_SQ, value: x => x.slice(1, -1), pop: 1}, | ||
{match: WORDS_DQ, value: x => x.slice(1, -1), pop: 1}, | ||
{match: WORD, lineBreaks: true, pop: 1}, | ||
], | ||
}, | ||
// Define grammar rules. | ||
this.lexer = MooLexer.compile({ | ||
WS: WS, | ||
[TYPE_OP]: {match: OP}, | ||
[TYPE_SEP]: {match: SEP}, | ||
[TYPE_WORD]: [ | ||
{match: WORDS_SQ, value: x => x.slice(1, -1)}, | ||
{match: WORDS_DQ, value: x => x.slice(1, -1)}, | ||
{match: WORD, type: MooLexer.keywords({[TYPE_KEY]: keywords})}, | ||
], | ||
NON_WORD: {match: NON_WORD, lineBreaks: true}, | ||
}); | ||
@@ -167,6 +164,6 @@ } | ||
switch (token.type) { | ||
case TOKEN_OP: | ||
case TOKEN_KEY: | ||
case TOKEN_VALUE: | ||
case TOKEN_FULLTEXT: | ||
case TYPE_OP: | ||
case TYPE_SEP: | ||
case TYPE_KEY: | ||
case TYPE_WORD: | ||
this.emitToken(token.type, token.value); | ||
@@ -173,0 +170,0 @@ break; |
@@ -41,5 +41,5 @@ const searchbox = require('./../../dist'); | ||
expect(literals[0].op).toBe(undefined); | ||
expect(literals[0].values).toHaveLength(5); | ||
expect(literals[0].values).toHaveLength(6); | ||
expect(literals[0].values).toStrictEqual(expect.arrayContaining([ | ||
"Foo", "bar", "perex", "naughty:naughty:monkey", "-trailer" | ||
"Foo", "bar", "perex", ":", "naughty:naughty:monkey", "-trailer" | ||
])); | ||
@@ -68,2 +68,47 @@ | ||
test('parse one word', () => { | ||
const inputText = 'skedaddle'; | ||
const keywords = []; | ||
const formula = searchbox.parse(inputText, {keywords}); | ||
const literals = formula.getLiterals(); | ||
expect(literals).toHaveLength(1); | ||
expect(literals[0].key).toBe('fulltext'); | ||
expect(literals[0].op).toBe(undefined); | ||
expect(literals[0].values).toHaveLength(1); | ||
expect(literals[0].values).toStrictEqual(expect.arrayContaining(["skedaddle"])); | ||
}); | ||
test('parse one keyword', () => { | ||
const inputText = 'skedaddle'; | ||
const keywords = ['skedaddle']; | ||
const formula = searchbox.parse(inputText, {keywords}); | ||
const literals = formula.getLiterals(); | ||
expect(literals).toHaveLength(1); | ||
expect(literals[0].key).toBe('fulltext'); | ||
expect(literals[0].op).toBe(undefined); | ||
expect(literals[0].values).toHaveLength(1); | ||
expect(literals[0].values).toStrictEqual(expect.arrayContaining(["skedaddle"])); | ||
}); | ||
test('parse one keyword with separator', () => { | ||
const inputText = 'skedaddle:'; | ||
const keywords = ['skedaddle']; | ||
const formula = searchbox.parse(inputText, {keywords}); | ||
const literals = formula.getLiterals(); | ||
expect(literals).toHaveLength(1); | ||
expect(literals[0].key).toBe('fulltext'); | ||
expect(literals[0].op).toBe(undefined); | ||
expect(literals[0].values).toHaveLength(2); | ||
expect(literals[0].values).toStrictEqual(expect.arrayContaining(["skedaddle", ":"])); | ||
}); | ||
test('parse query without keywords', () => { | ||
@@ -79,9 +124,34 @@ const inputText = 'Foo bar title:hello author:joe -text:"foo bar" text:baz perex:\'naughty:naughty:monkey\' -trailer'; | ||
expect(literals[0].op).toBe(undefined); | ||
expect(literals[0].values).toHaveLength(13); | ||
expect(literals[0].values).toHaveLength(18); | ||
expect(literals[0].values).toStrictEqual(expect.arrayContaining([ | ||
"Foo", "bar", "title", "hello", "author", "joe", "-text", "foo bar", | ||
"text", "baz", "perex", "naughty:naughty:monkey", "-trailer" | ||
"Foo", "bar", "title", ":", "hello", "author", ":", "joe", "-text", ":", | ||
"foo bar", "text", ":", "baz", "perex", ":", "naughty:naughty:monkey", | ||
"-trailer" | ||
])); | ||
}); | ||
test('parse query with keyword-like words', () => { | ||
const inputText = 'Class is not class:and not classname nor nameclass'; | ||
const keywords = ['class']; | ||
const formula = searchbox.parse(inputText, {keywords}); | ||
const literals = formula.getLiterals(); | ||
expect(literals).toHaveLength(2); | ||
expect(literals[0].key).toBe('fulltext'); | ||
expect(literals[0].op).toBe(undefined); | ||
expect(literals[0].values).toHaveLength(7); | ||
expect(literals[0].values).toStrictEqual(expect.arrayContaining([ | ||
"Class", "is", "not", "not", "classname", "nor", "nameclass" | ||
])); | ||
expect(literals[1].key).toBe('class'); | ||
expect(literals[1].op).toBe(undefined); | ||
expect(literals[1].values).toHaveLength(1); | ||
expect(literals[1].values).toStrictEqual(expect.arrayContaining([ | ||
"and" | ||
])); | ||
}); | ||
test('parse query with repeated keyword', () => { | ||
@@ -224,2 +294,2 @@ const inputText = 'title:\'Never take\' title:"raisins from rabbits"'; | ||
])); | ||
}); | ||
}); |
31468
601
46