Comparing version 0.3.0 to 0.4.0
@@ -0,1 +1,17 @@ | ||
## 0.4.0 (2019-09-10) | ||
### Bug fixes | ||
Don't rely on additional data stored in the parse table during recovery (shrinking the parse tables). | ||
Fix a crash that could occur when starting a nested parse when there were multiple active stacks. | ||
Fix an issue where error nodes would sometimes not be merged. | ||
Don't reuse cached tokens for states that have a different token group. | ||
### Breaking changes | ||
The on-disk parse table format changed again. | ||
## 0.3.0 (2019-08-22) | ||
@@ -2,0 +18,0 @@ |
@@ -30,8 +30,6 @@ export declare const enum Action { | ||
Skip = 2, | ||
Recover = 3, | ||
TokenizerMask = 4, | ||
DefaultReduce = 5, | ||
ForcedReduce = 6, | ||
Size = 8, | ||
Shift = 3 | ||
TokenizerMask = 3, | ||
DefaultReduce = 4, | ||
ForcedReduce = 5, | ||
Size = 6 | ||
} | ||
@@ -38,0 +36,0 @@ export declare const enum Encode { |
@@ -40,5 +40,17 @@ 'use strict'; | ||
Badness[Badness["Unit"] = 100] = "Unit"; | ||
// Limits in between which stacks are less agressively pruned | ||
Badness[Badness["Stabilizing"] = 50] = "Stabilizing"; | ||
Badness[Badness["Wild"] = 150] = "Wild"; | ||
// Badness at which we disallow adding a stack if another stack | ||
// shares its top state and position. | ||
Badness[Badness["Deduplicate"] = 200] = "Deduplicate"; | ||
// The maximum amount of active stacks at which recovery actions are | ||
// applied | ||
Badness[Badness["MaxRecoverStacks"] = 25] = "MaxRecoverStacks"; | ||
// If badness reaches this level (and there are sibling stacks), | ||
// don't recover. | ||
Badness[Badness["TooBadToRecover"] = 500] = "TooBadToRecover"; | ||
// If the best sibling is this amount better than the current stack, | ||
// don't apply recovery. | ||
Badness[Badness["RecoverSiblingFactor"] = 3] = "RecoverSiblingFactor"; | ||
// Constant used to prune stacks that run error-free alongside each | ||
// other for too long | ||
Badness[Badness["MaxParallelBufferLength"] = 800] = "MaxParallelBufferLength"; | ||
})(Badness || (Badness = {})); | ||
@@ -183,3 +195,3 @@ // Badness is a measure of how off-the-rails a given parse is. It is | ||
if (isReduce === void 0) { isReduce = false; } | ||
if (term == 0 /* Err */) { // Try to omit superfluous error nodes | ||
if (term == 0 /* Err */) { // Try to omit/merge adjacent error nodes | ||
var cur = this, top = this.buffer.length; | ||
@@ -190,5 +202,10 @@ if (top == 0 && cur.parent) { | ||
} | ||
if (top > 0 && cur.buffer[top - 4] == 0 /* Err */ && cur.buffer[top - 1] > -1 && | ||
(start == end || cur.buffer[top - 2] >= start)) | ||
return; | ||
if (top > 0 && cur.buffer[top - 4] == 0 /* Err */ && cur.buffer[top - 1] > -1) { | ||
if (start == end) | ||
return; | ||
if (cur.buffer[top - 2] >= start) { | ||
cur.buffer[top - 2] = end; | ||
return; | ||
} | ||
} | ||
} | ||
@@ -208,3 +225,4 @@ if (!isReduce || this.pos == end) { // Simple case, just append | ||
index -= 4; | ||
size -= 4; | ||
if (size > 4) | ||
size -= 4; | ||
} | ||
@@ -290,3 +308,3 @@ this.buffer[index] = term; | ||
this.storeNode(0 /* Err */, this.pos, nextEnd, isNode ? 8 : 4); | ||
this.pos = nextEnd; | ||
this.pos = this.reducePos = nextEnd; | ||
this.badness += 100 /* Unit */; | ||
@@ -300,3 +318,3 @@ }; | ||
for (var sim = new SimulatedStack(this);;) { | ||
var action = this.cx.parser.stateSlot(sim.top, 5 /* DefaultReduce */) || this.cx.parser.hasAction(sim.top, term); | ||
var action = this.cx.parser.stateSlot(sim.top, 4 /* DefaultReduce */) || this.cx.parser.hasAction(sim.top, term); | ||
if ((action & 65536 /* ReduceFlag */) == 0) | ||
@@ -312,3 +330,3 @@ return true; | ||
get: function () { | ||
var force = this.cx.parser.stateSlot(this.state, 6 /* ForcedReduce */); | ||
var force = this.cx.parser.stateSlot(this.state, 5 /* ForcedReduce */); | ||
if (!(force & 65536 /* ReduceFlag */)) | ||
@@ -340,3 +358,3 @@ return 0; | ||
for (var frame = this.stack.length - 3; frame >= 0; frame -= 3) { | ||
var force = this.cx.parser.stateSlot(this.stack[frame], 6 /* ForcedReduce */); | ||
var force = this.cx.parser.stateSlot(this.stack[frame], 5 /* ForcedReduce */); | ||
if (types.includes(force & 65535 /* ValueMask */)) { | ||
@@ -349,50 +367,26 @@ var base = frame - (3 * (force >> 19 /* ReduceDepthShift */)); | ||
}; | ||
// Scan for a state that has either a direct action or a recovery | ||
// action for next, without actually building up a new stack | ||
// Apply up to Recover.MaxNext recovery actions that conceptually | ||
// inserts some missing token or rule. | ||
/// @internal | ||
Stack.prototype.canRecover = function (next) { | ||
var visited = null, parser = this.cx.parser; | ||
for (var sim = new SimulatedStack(this), i = 0;; i++) { | ||
if (parser.hasAction(sim.top, next) || parser.getRecover(sim.top, next) != 0) | ||
return true; | ||
// Find a way to reduce from here | ||
var reduce = parser.anyReduce(sim.top); | ||
if (reduce == 0 && ((reduce = this.cx.parser.stateSlot(sim.top, 6 /* ForcedReduce */)) & 65536 /* ReduceFlag */) == 0) | ||
return false; | ||
sim.reduce(reduce); | ||
if (i > 10) { | ||
// Guard against getting stuck in a cycle | ||
if (!visited) | ||
visited = []; | ||
else if (i == 100 || visited.includes(sim.top)) | ||
return false; | ||
visited.push(sim.top); | ||
} | ||
} | ||
}; | ||
// Try to apply a recovery action that conceptually | ||
// inserts some missing content and syncs back to a state that will | ||
// match `next`. If it finds one, it'll return a new stack with the | ||
// insertion applied. If not, it'll return null. | ||
/// @internal | ||
Stack.prototype.recoverByInsert = function (next) { | ||
if (!this.canRecover(next)) | ||
return null; | ||
// Now that we know there's a recovery to be found, run the | ||
// reduces again, the expensive way, updating the stack | ||
var result = this.split(), parser = this.cx.parser; | ||
result.reducePos = result.pos; | ||
result.badness += 100 /* Unit */; | ||
for (;;) { | ||
for (;;) { | ||
if (parser.hasAction(result.state, next)) | ||
return result; | ||
var recover = parser.getRecover(result.state, next); | ||
if (!recover) | ||
break; | ||
result.pushState(recover, result.pos); | ||
result.storeNode(0 /* Err */, result.reducePos, result.reducePos, 4, true); | ||
} | ||
result.forceReduce(false); | ||
var _this = this; | ||
var nextStates = this.cx.parser.nextStates(this.state); | ||
if (nextStates.length > 4 /* MaxNext */) { | ||
var best = nextStates.filter(function (s) { return s != _this.state && _this.cx.parser.hasAction(s, next); }); | ||
for (var i = 0; best.length < 4 /* MaxNext */ && i < nextStates.length; i++) | ||
if (!best.includes(nextStates[i])) | ||
best.push(nextStates[i]); | ||
nextStates = best; | ||
} | ||
var result = []; | ||
for (var i = 0; i < nextStates.length && result.length < 4 /* MaxNext */; i++) { | ||
if (nextStates[i] == this.state) | ||
continue; | ||
var stack = this.split(); | ||
stack.storeNode(0 /* Err */, stack.pos, stack.pos, 4, true); | ||
stack.pushState(nextStates[i], this.pos); | ||
stack.badness += 100 /* Unit */; | ||
result.push(stack); | ||
} | ||
return result; | ||
}; | ||
@@ -402,6 +396,6 @@ // Force a reduce, if possible. Return false if that can't | ||
/// @internal | ||
Stack.prototype.forceReduce = function (countBadness) { | ||
Stack.prototype.forceReduce = function () { | ||
var reduce = this.cx.parser.anyReduce(this.state); | ||
if (reduce == 0) { | ||
reduce = this.cx.parser.stateSlot(this.state, 6 /* ForcedReduce */); | ||
if ((reduce >> 19 /* ReduceDepthShift */) == 0) { // Don't use 0 or a zero-depth reduction | ||
reduce = this.cx.parser.stateSlot(this.state, 5 /* ForcedReduce */); | ||
if ((reduce & 65536 /* ReduceFlag */) == 0) | ||
@@ -429,2 +423,6 @@ return false; | ||
}()); | ||
var Recover; | ||
(function (Recover) { | ||
Recover[Recover["MaxNext"] = 4] = "MaxNext"; | ||
})(Recover || (Recover = {})); | ||
// Used to cheaply run some reductions to scan ahead without mutating | ||
@@ -706,2 +704,3 @@ // an entire stack | ||
_this.extended = -1; | ||
_this.mask = 0; | ||
return _this; | ||
@@ -727,3 +726,3 @@ } | ||
for (var i = 0; i < tokenizers.length; i++) { | ||
if (((1 << i) & parser.stateSlot(stack.state, 4 /* TokenizerMask */)) == 0) | ||
if (((1 << i) & parser.stateSlot(stack.state, 3 /* TokenizerMask */)) == 0) | ||
continue; | ||
@@ -740,4 +739,7 @@ var tokenizer = tokenizers[i], token = void 0; | ||
this.tokens.push(token = new CachedToken(tokenizer)); | ||
if (tokenizer.contextual || token.start != stack.pos) | ||
var mask = parser.stateSlot(stack.state, 3 /* TokenizerMask */); | ||
if (tokenizer.contextual || token.start != stack.pos || token.mask != mask) { | ||
this.updateCachedToken(token, stack, input); | ||
token.mask = mask; | ||
} | ||
var startIndex = actionIndex; | ||
@@ -837,9 +839,10 @@ if (token.extended > -1) | ||
} | ||
ParseContext.prototype.takeStack = function () { | ||
ParseContext.prototype.takeStack = function (at) { | ||
if (at === void 0) { at = 0; } | ||
// Binary heap pop | ||
var stacks = this.stacks, elt = stacks[0], replacement = stacks.pop(); | ||
var stacks = this.stacks, elt = stacks[at], replacement = stacks.pop(); | ||
if (stacks.length == 0) | ||
return elt; | ||
stacks[0] = replacement; | ||
for (var index = 0;;) { | ||
stacks[at] = replacement; | ||
for (var index = at;;) { | ||
var childIndex = (index << 1) + 1; | ||
@@ -861,25 +864,34 @@ if (childIndex >= stacks.length) | ||
}; | ||
ParseContext.prototype.putStack = function (stack, strict) { | ||
if (strict === void 0) { strict = stack.badness < 50 /* Stabilizing */ || stack.badness > 150 /* Wild */; } | ||
var stacks = this.stacks; | ||
for (var i = 0; i < stacks.length; i++) { | ||
var other = stacks[i]; | ||
if ((strict || other.state == stack.state) && other.pos == stack.pos) { | ||
var diff = stack.badness - other.badness || (stack.badness < 50 /* Stabilizing */ ? 0 : stack.stack.length - other.stack.length); | ||
if (diff < 0) { | ||
stacks[i] = stack; | ||
return true; | ||
ParseContext.prototype.putStack = function (stack) { | ||
if (stack.badness >= 200 /* Deduplicate */) { | ||
for (var i = 0; i < this.stacks.length; i++) { | ||
var other = this.stacks[i]; | ||
if (other.state == stack.state && other.pos == stack.pos) { | ||
var diff = stack.badness - other.badness || stack.stack.length - other.stack.length; | ||
if (diff < 0) { | ||
this.stacks[i] = stack; | ||
return true; | ||
} | ||
else if (diff >= 0) | ||
return false; | ||
} | ||
else if (diff > 0) | ||
return false; | ||
} | ||
} | ||
else if (stack.badness == 0 && this.stacks.length && stack.buffer.length > 800 /* MaxParallelBufferLength */) { | ||
// If a stack looks error-free, but isn't the only active one | ||
// _and_ has a buffer that is long but not the longest, prune | ||
// it, since this might be a situation where two stacks can | ||
// continue indefinitely. | ||
var maxOther = this.stacks.reduce(function (m, s) { return Math.max(m, s.buffer.length); }, 0); | ||
if (maxOther > stack.buffer.length) | ||
return false; | ||
} | ||
// Binary heap add | ||
var index = stacks.push(stack) - 1; | ||
var index = this.stacks.push(stack) - 1; | ||
while (index > 0) { | ||
var parentIndex = index >> 1, parent = stacks[parentIndex]; | ||
var parentIndex = index >> 1, parent = this.stacks[parentIndex]; | ||
if (stack.compare(parent) >= 0) | ||
break; | ||
stacks[index] = parent; | ||
stacks[parentIndex] = stack; | ||
this.stacks[index] = parent; | ||
this.stacks[parentIndex] = stack; | ||
index = parentIndex; | ||
@@ -903,2 +915,3 @@ } | ||
var stack = this.takeStack(), start = stack.pos, _a = stack.cx, input = _a.input, parser = _a.parser; | ||
var base = verbose ? stack + " -> " : ""; | ||
if (this.cache) { | ||
@@ -910,3 +923,3 @@ for (var cached = this.cache.nodeAt(start); cached;) { | ||
if (verbose) | ||
console.log(stack + (" (via reuse of " + parser.getName(cached.type.id) + ")")); | ||
console.log(base + stack + (" (via reuse of " + parser.getName(cached.type.id) + ")")); | ||
this.putStack(stack); | ||
@@ -951,3 +964,3 @@ return null; | ||
if (verbose) | ||
console.log(newStack + " (nested)"); | ||
console.log(base + newStack + " (nested)"); | ||
this.putStack(newStack); | ||
@@ -957,3 +970,3 @@ } | ||
} | ||
var defaultReduce = parser.stateSlot(stack.state, 5 /* DefaultReduce */); | ||
var defaultReduce = parser.stateSlot(stack.state, 4 /* DefaultReduce */); | ||
if (defaultReduce > 0) { | ||
@@ -963,3 +976,3 @@ stack.reduce(defaultReduce); | ||
if (verbose) | ||
console.log(stack + (" (via always-reduce " + parser.getName(defaultReduce & 65535 /* ValueMask */) + ")")); | ||
console.log(base + stack + (" (via always-reduce " + parser.getName(defaultReduce & 65535 /* ValueMask */) + ")")); | ||
return null; | ||
@@ -973,5 +986,5 @@ } | ||
if (verbose) | ||
console.log(localStack + (" (via " + ((action & 65536 /* ReduceFlag */) == 0 ? "shift" | ||
console.log(base + localStack + (" (via " + ((action & 65536 /* ReduceFlag */) == 0 ? "shift" | ||
: "reduce of " + parser.getName(action & 65535 /* ValueMask */)) + " for " + parser.getName(term_1) + " @ " + start + (localStack == stack ? "" : ", split") + ")")); | ||
this.putStack(localStack, (action & 65536 /* ReduceFlag */) != 0); | ||
this.putStack(localStack); | ||
} | ||
@@ -981,6 +994,6 @@ if (actions.length > 0) | ||
// If we're here, the stack failed to advance normally | ||
if (start == input.length) { | ||
if (start == input.length) { // End of file | ||
if (!parser.stateFlag(stack.state, 2 /* Accepting */) && stack.forceReduce()) { | ||
if (verbose) | ||
console.log(stack + " (via forced reduction at eof)"); | ||
console.log(base + stack + " (via forced reduction at eof)"); | ||
this.putStack(stack); | ||
@@ -1000,26 +1013,39 @@ return null; | ||
} | ||
// Not end of file. See if we should recover. | ||
var minBad = this.stacks.reduce(function (m, s) { return Math.min(m, s.badness); }, 1e9); | ||
// If this is not the best stack and its badness is above the | ||
// TooBadToRecover ceiling or RecoverToSibling times the best | ||
// stack, don't continue it. | ||
if (minBad <= stack.badness && | ||
(this.stacks.length >= 25 /* MaxRecoverStacks */ || | ||
stack.badness > Math.min(500 /* TooBadToRecover */, minBad * 3 /* RecoverSiblingFactor */))) | ||
return null; | ||
var _c = stack.cx.tokens.mainToken, end = _c.end, term = _c.value; | ||
if (!this.strict && | ||
!(stack.badness > 150 /* Wild */ && this.stacks.some(function (s) { return s.pos >= stack.pos && s.badness <= stack.badness; }))) { | ||
var inserted = stack.recoverByInsert(term); | ||
if (inserted) { | ||
if (verbose) | ||
console.log(inserted + " (via recover-insert)"); | ||
this.putStack(inserted); | ||
} | ||
if (end == start) { | ||
if (start == input.length) | ||
return null; | ||
end++; | ||
term = 0 /* Err */; | ||
} | ||
stack.recoverByDelete(term, end); | ||
if (this.strict) { | ||
if (this.stacks.length) | ||
return null; | ||
throw new SyntaxError("No parse at " + start + " with " + parser.getName(term) + " (stack is " + stack + ")"); | ||
} | ||
for (var _i = 0, _d = stack.recoverByInsert(term); _i < _d.length; _i++) { | ||
var insert = _d[_i]; | ||
if (verbose) | ||
console.log(stack + (" (via recover-delete " + parser.getName(term) + ")")); | ||
this.putStack(stack); | ||
console.log(base + insert + " (via recover-insert)"); | ||
this.putStack(insert); | ||
} | ||
else if (!this.stacks.length) { | ||
// Only happens in strict mode | ||
throw new SyntaxError("No parse at " + start + " with " + parser.getName(term) + " (stack is " + stack + ")"); | ||
var reduce = stack.split(); | ||
if (reduce.forceReduce()) { | ||
if (verbose) | ||
console.log(base + reduce + " (via force-reduce)"); | ||
this.putStack(reduce); | ||
} | ||
if (end == start) { | ||
if (start == input.length) | ||
return null; | ||
end++; | ||
term = 0 /* Err */; | ||
} | ||
stack.recoverByDelete(term, end); | ||
if (verbose) | ||
console.log(base + stack + (" (via recover-delete " + parser.getName(term) + ")")); | ||
this.putStack(stack); | ||
return null; | ||
@@ -1060,2 +1086,9 @@ }; | ||
console.log(parent + (" (via unnest " + (stack.cx.wrapType > -1 ? parentParser.getName(stack.cx.wrapType) : tree.type.name) + ")")); | ||
// Drop any other stack that has the same parent | ||
for (var i = 0; i < this.stacks.length;) { | ||
if (this.stacks[i].cx.parent == parent) | ||
this.takeStack(i); | ||
else | ||
i++; | ||
} | ||
return parent; | ||
@@ -1110,3 +1143,6 @@ }; | ||
this.termNames = termNames; | ||
this.nextStateCache = []; | ||
this.maxNode = this.group.types.length - 1; | ||
for (var i = 0, l = this.states.length / 6 /* Size */; i < l; i++) | ||
this.nextStateCache[i] = null; | ||
} | ||
@@ -1157,14 +1193,5 @@ /// Parse a given string or stream. | ||
}; | ||
// Get a recovery action for a given state and terminal, or 0 when | ||
// none | ||
///@internal | ||
Parser.prototype.getRecover = function (state, terminal) { | ||
for (var i = this.stateSlot(state, 3 /* Recover */), next = void 0; (next = this.data[i]) != 65535 /* End */; i += 2) | ||
if (next == terminal) | ||
return this.data[i + 1]; | ||
return 0; | ||
}; | ||
/// @internal | ||
Parser.prototype.stateSlot = function (state, slot) { | ||
return this.states[(state << 3 /* Shift */) + slot]; | ||
return this.states[(state * 6 /* Size */) + slot]; | ||
}; | ||
@@ -1182,3 +1209,3 @@ /// @internal | ||
Parser.prototype.anyReduce = function (state) { | ||
var defaultReduce = this.stateSlot(state, 5 /* DefaultReduce */); | ||
var defaultReduce = this.stateSlot(state, 4 /* DefaultReduce */); | ||
if (defaultReduce > 0) | ||
@@ -1189,7 +1216,31 @@ return defaultReduce; | ||
return 0; | ||
var isReduce = this.data[i + 2]; | ||
if (isReduce) | ||
return this.data[i + 1] | (isReduce << 16); | ||
var top = this.data[i + 2]; | ||
if (top & (65536 /* ReduceFlag */ >> 16)) | ||
return this.data[i + 1] | (top << 16); | ||
} | ||
}; | ||
/// Get the states that can follow this one through shift actions or | ||
/// goto jumps. @internal | ||
Parser.prototype.nextStates = function (state) { | ||
var cached = this.nextStateCache[state]; | ||
if (cached) | ||
return cached; | ||
var result = []; | ||
for (var i = this.stateSlot(state, 1 /* Actions */); this.data[i] != 65535 /* End */; i += 3) { | ||
if ((this.data[i + 2] & (65536 /* ReduceFlag */ >> 16)) == 0 && !result.includes(this.data[i + 1])) | ||
result.push(this.data[i + 1]); | ||
} | ||
var table = this.goto, max = table[0]; | ||
for (var term = 0; term < max; term++) { | ||
for (var pos = table[term + 1];;) { | ||
var groupTag = table[pos++], target = table[pos++]; | ||
for (var end = pos + (groupTag >> 1); pos < end; pos++) | ||
if (table[pos] == state && !result.includes(target)) | ||
result.push(target); | ||
if (groupTag & 1) | ||
break; | ||
} | ||
} | ||
return this.nextStateCache[state] = result; | ||
}; | ||
/// @internal | ||
@@ -1291,12 +1342,16 @@ Parser.prototype.overrides = function (token, prev) { | ||
if (!fragile) | ||
node.iterate(0, node.length, function (type) { | ||
return doneStart || (type.id == 0 /* Err */ ? fragile = doneStart = true : undefined); | ||
}, function (type) { | ||
doneStart = true; | ||
node.iterate({ | ||
enter: function (type) { | ||
return doneStart || (type.id == 0 /* Err */ ? fragile = doneStart = true : undefined); | ||
}, | ||
leave: function (type) { doneStart = true; } | ||
}); | ||
if (!fragile) | ||
node.iterate(node.length, 0, function (type) { | ||
return doneEnd || (type.id == 0 /* Err */ ? fragile = doneEnd = true : undefined); | ||
}, function (type) { | ||
doneEnd = true; | ||
node.iterate({ | ||
from: node.length, | ||
to: 0, | ||
enter: function (type) { | ||
return doneEnd || (type.id == 0 /* Err */ ? fragile = doneEnd = true : undefined); | ||
}, | ||
leave: function (type) { doneEnd = true; } | ||
}); | ||
@@ -1303,0 +1358,0 @@ return fragile; |
@@ -23,2 +23,3 @@ import { Stack } from "./stack"; | ||
extended: number; | ||
mask: number; | ||
constructor(tokenizer: Tokenizer); | ||
@@ -85,2 +86,3 @@ clear(start: number): void; | ||
maxNode: number; | ||
private nextStateCache; | ||
constructor(states: Readonly<Uint32Array>, data: Readonly<Uint16Array>, goto: Readonly<Uint16Array>, group: NodeGroup, tokenizers: readonly Tokenizer[], nested: readonly { | ||
@@ -100,3 +102,2 @@ name: string; | ||
hasAction(state: number, terminal: number): number; | ||
getRecover(state: number, terminal: number): number; | ||
stateSlot(state: number, slot: number): number; | ||
@@ -106,2 +107,3 @@ stateFlag(state: number, flag: number): boolean; | ||
anyReduce(state: number): number; | ||
nextStates(state: number): readonly number[]; | ||
overrides(token: number, prev: number): boolean; | ||
@@ -108,0 +110,0 @@ withNested(spec: { |
@@ -5,4 +5,7 @@ import { StackContext } from "./parse"; | ||
Unit = 100, | ||
Stabilizing = 50, | ||
Wild = 150 | ||
Deduplicate = 200, | ||
MaxRecoverStacks = 25, | ||
TooBadToRecover = 500, | ||
RecoverSiblingFactor = 3, | ||
MaxParallelBufferLength = 800 | ||
} | ||
@@ -33,7 +36,6 @@ export declare class Stack { | ||
startOf(types: readonly number[]): number; | ||
canRecover(next: number): boolean; | ||
recoverByInsert(next: number): Stack | null; | ||
forceReduce(countBadness?: boolean): boolean; | ||
recoverByInsert(next: number): Stack[]; | ||
forceReduce(): boolean; | ||
compare(other: Stack): number; | ||
toTree(): Tree; | ||
} |
{ | ||
"name": "lezer", | ||
"version": "0.3.0", | ||
"version": "0.4.0", | ||
"description": "Incremental parser", | ||
@@ -17,3 +17,3 @@ "main": "dist/index.js", | ||
"dependencies": { | ||
"lezer-tree": "^0.3.0" | ||
"lezer-tree": "^0.4.0" | ||
}, | ||
@@ -20,0 +20,0 @@ "scripts": { |
@@ -89,9 +89,7 @@ // This file defines some constants that are needed both in this | ||
Skip = 2, | ||
Recover = 3, | ||
TokenizerMask = 4, | ||
DefaultReduce = 5, | ||
ForcedReduce = 6, | ||
TokenizerMask = 3, | ||
DefaultReduce = 4, | ||
ForcedReduce = 5, | ||
// Total size of a state record | ||
Size = 8, | ||
Shift = 3 | ||
Size = 6 | ||
} | ||
@@ -98,0 +96,0 @@ |
174
src/parse.ts
@@ -86,2 +86,3 @@ import {Stack, Badness} from "./stack" | ||
extended = -1 | ||
mask = 0 | ||
@@ -114,3 +115,7 @@ constructor(readonly tokenizer: Tokenizer) { super() } | ||
if (!token) this.tokens.push(token = new CachedToken(tokenizer)) | ||
if (tokenizer.contextual || token.start != stack.pos) this.updateCachedToken(token, stack, input) | ||
let mask = parser.stateSlot(stack.state, ParseState.TokenizerMask) | ||
if (tokenizer.contextual || token.start != stack.pos || token.mask != mask) { | ||
this.updateCachedToken(token, stack, input) | ||
token.mask = mask | ||
} | ||
@@ -227,8 +232,8 @@ let startIndex = actionIndex | ||
private takeStack() { | ||
private takeStack(at = 0) { | ||
// Binary heap pop | ||
let {stacks} = this, elt = stacks[0], replacement = stacks.pop()! | ||
let {stacks} = this, elt = stacks[at], replacement = stacks.pop()! | ||
if (stacks.length == 0) return elt | ||
stacks[0] = replacement | ||
for (let index = 0;;) { | ||
stacks[at] = replacement | ||
for (let index = at;;) { | ||
let childIndex = (index << 1) + 1 | ||
@@ -249,20 +254,28 @@ if (childIndex >= stacks.length) break | ||
private putStack(stack: Stack, strict = stack.badness < Badness.Stabilizing || stack.badness > Badness.Wild): boolean { | ||
let stacks = this.stacks | ||
for (let i = 0; i < stacks.length; i++) { | ||
let other = stacks[i] | ||
if ((strict || other.state == stack.state) && other.pos == stack.pos) { | ||
let diff = stack.badness - other.badness || (stack.badness < Badness.Stabilizing ? 0 : stack.stack.length - other.stack.length) | ||
if (diff < 0) { stacks[i] = stack; return true } | ||
else if (diff > 0) return false | ||
private putStack(stack: Stack): boolean { | ||
if (stack.badness >= Badness.Deduplicate) { | ||
for (let i = 0; i < this.stacks.length; i++) { | ||
let other = this.stacks[i] | ||
if (other.state == stack.state && other.pos == stack.pos) { | ||
let diff = stack.badness - other.badness || stack.stack.length - other.stack.length | ||
if (diff < 0) { this.stacks[i] = stack; return true } | ||
else if (diff >= 0) return false | ||
} | ||
} | ||
} else if (stack.badness == 0 && this.stacks.length && stack.buffer.length > Badness.MaxParallelBufferLength) { | ||
// If a stack looks error-free, but isn't the only active one | ||
// _and_ has a buffer that is long but not the longest, prune | ||
// it, since this might be a situation where two stacks can | ||
// continue indefinitely. | ||
let maxOther = this.stacks.reduce((m, s) => Math.max(m, s.buffer.length), 0) | ||
if (maxOther > stack.buffer.length) return false | ||
} | ||
// Binary heap add | ||
let index = stacks.push(stack) - 1 | ||
let index = this.stacks.push(stack) - 1 | ||
while (index > 0) { | ||
let parentIndex = index >> 1, parent = stacks[parentIndex] | ||
let parentIndex = index >> 1, parent = this.stacks[parentIndex] | ||
if (stack.compare(parent) >= 0) break | ||
stacks[index] = parent | ||
stacks[parentIndex] = stack | ||
this.stacks[index] = parent | ||
this.stacks[parentIndex] = stack | ||
index = parentIndex | ||
@@ -287,2 +300,3 @@ } | ||
let stack = this.takeStack(), start = stack.pos, {input, parser} = stack.cx | ||
let base = verbose ? stack + " -> " : "" | ||
@@ -294,3 +308,3 @@ if (this.cache) { | ||
stack.useNode(cached, match) | ||
if (verbose) console.log(stack + ` (via reuse of ${parser.getName(cached.type.id)})`) | ||
if (verbose) console.log(base + stack + ` (via reuse of ${parser.getName(cached.type.id)})`) | ||
this.putStack(stack) | ||
@@ -327,3 +341,3 @@ return null | ||
let newStack = Stack.start(new StackContext(nested, stack.cx.maxBufferLength, clippedInput, stack, wrapType), stack.pos) | ||
if (verbose) console.log(newStack + ` (nested)`) | ||
if (verbose) console.log(base + newStack + ` (nested)`) | ||
this.putStack(newStack) | ||
@@ -338,3 +352,3 @@ } | ||
this.putStack(stack) | ||
if (verbose) console.log(stack + ` (via always-reduce ${parser.getName(defaultReduce & Action.ValueMask)})`) | ||
if (verbose) console.log(base + stack + ` (via always-reduce ${parser.getName(defaultReduce & Action.ValueMask)})`) | ||
return null | ||
@@ -349,6 +363,6 @@ } | ||
if (verbose) | ||
console.log(localStack + ` (via ${(action & Action.ReduceFlag) == 0 ? "shift" | ||
console.log(base + localStack + ` (via ${(action & Action.ReduceFlag) == 0 ? "shift" | ||
: `reduce of ${parser.getName(action & Action.ValueMask)}`} for ${ | ||
parser.getName(term)} @ ${start}${localStack == stack ? "" : ", split"})`) | ||
this.putStack(localStack, (action & Action.ReduceFlag) != 0) | ||
this.putStack(localStack) | ||
} | ||
@@ -359,5 +373,5 @@ if (actions.length > 0) return null | ||
if (start == input.length) { | ||
if (start == input.length) { // End of file | ||
if (!parser.stateFlag(stack.state, StateFlag.Accepting) && stack.forceReduce()) { | ||
if (verbose) console.log(stack + " (via forced reduction at eof)") | ||
if (verbose) console.log(base + stack + " (via forced reduction at eof)") | ||
this.putStack(stack) | ||
@@ -377,23 +391,35 @@ return null | ||
// Not end of file. See if we should recover. | ||
let minBad = this.stacks.reduce((m, s) => Math.min(m, s.badness), 1e9) | ||
// If this is not the best stack and its badness is above the | ||
// TooBadToRecover ceiling or RecoverToSibling times the best | ||
// stack, don't continue it. | ||
if (minBad <= stack.badness && | ||
(this.stacks.length >= Badness.MaxRecoverStacks || | ||
stack.badness > Math.min(Badness.TooBadToRecover, minBad * Badness.RecoverSiblingFactor))) | ||
return null | ||
let {end, value: term} = stack.cx.tokens.mainToken | ||
if (!this.strict && | ||
!(stack.badness > Badness.Wild && this.stacks.some(s => s.pos >= stack.pos && s.badness <= stack.badness))) { | ||
let inserted = stack.recoverByInsert(term) | ||
if (inserted) { | ||
if (verbose) console.log(inserted + " (via recover-insert)") | ||
this.putStack(inserted) | ||
} | ||
if (end == start) { | ||
if (start == input.length) return null | ||
end++ | ||
term = Term.Err | ||
} | ||
stack.recoverByDelete(term, end) | ||
if (verbose) console.log(stack + ` (via recover-delete ${parser.getName(term)})`) | ||
this.putStack(stack) | ||
} else if (!this.stacks.length) { | ||
// Only happens in strict mode | ||
if (this.strict) { | ||
if (this.stacks.length) return null | ||
throw new SyntaxError("No parse at " + start + " with " + parser.getName(term) + " (stack is " + stack + ")") | ||
} | ||
for (let insert of stack.recoverByInsert(term)) { | ||
if (verbose) console.log(base + insert + " (via recover-insert)") | ||
this.putStack(insert) | ||
} | ||
let reduce = stack.split() | ||
if (reduce.forceReduce()) { | ||
if (verbose) console.log(base + reduce + " (via force-reduce)") | ||
this.putStack(reduce) | ||
} | ||
if (end == start) { | ||
if (start == input.length) return null | ||
end++ | ||
term = Term.Err | ||
} | ||
stack.recoverByDelete(term, end) | ||
if (verbose) console.log(base + stack + ` (via recover-delete ${parser.getName(term)})`) | ||
this.putStack(stack) | ||
return null | ||
@@ -431,2 +457,7 @@ } | ||
if (verbose) console.log(parent + ` (via unnest ${stack.cx.wrapType > -1 ? parentParser.getName(stack.cx.wrapType) : tree.type.name})`) | ||
// Drop any other stack that has the same parent | ||
for (let i = 0; i < this.stacks.length;) { | ||
if (this.stacks[i].cx.parent == parent) this.takeStack(i) | ||
else i++ | ||
} | ||
return parent | ||
@@ -441,2 +472,3 @@ } | ||
maxNode: number | ||
private nextStateCache: (readonly number[] | null)[] = [] | ||
@@ -486,2 +518,3 @@ /// @internal | ||
this.maxNode = this.group.types.length - 1 | ||
for (let i = 0, l = this.states.length / ParseState.Size; i < l; i++) this.nextStateCache[i] = null | ||
} | ||
@@ -530,14 +563,5 @@ | ||
// Get a recovery action for a given state and terminal, or 0 when | ||
// none | ||
///@internal | ||
getRecover(state: number, terminal: number) { | ||
for (let i = this.stateSlot(state, ParseState.Recover), next; (next = this.data[i]) != Seq.End; i += 2) | ||
if (next == terminal) return this.data[i + 1] | ||
return 0 | ||
} | ||
/// @internal | ||
stateSlot(state: number, slot: number) { | ||
return this.states[(state << ParseState.Shift) + slot] | ||
return this.states[(state * ParseState.Size) + slot] | ||
} | ||
@@ -562,7 +586,29 @@ | ||
if (this.data[i] == Seq.End) return 0 | ||
let isReduce = this.data[i + 2] | ||
if (isReduce) return this.data[i + 1] | (isReduce << 16) | ||
let top = this.data[i + 2] | ||
if (top & (Action.ReduceFlag >> 16)) return this.data[i + 1] | (top << 16) | ||
} | ||
} | ||
/// Get the states that can follow this one through shift actions or | ||
/// goto jumps. @internal | ||
nextStates(state: number): readonly number[] { | ||
let cached = this.nextStateCache[state] | ||
if (cached) return cached | ||
let result: number[] = [] | ||
for (let i = this.stateSlot(state, ParseState.Actions); this.data[i] != Seq.End; i += 3) { | ||
if ((this.data[i + 2] & (Action.ReduceFlag >> 16)) == 0 && !result.includes(this.data[i + 1])) | ||
result.push(this.data[i + 1]) | ||
} | ||
let table = this.goto, max = table[0] | ||
for (let term = 0; term < max; term++) { | ||
for (let pos = table[term + 1];;) { | ||
let groupTag = table[pos++], target = table[pos++] | ||
for (let end = pos + (groupTag >> 1); pos < end; pos++) | ||
if (table[pos] == state && !result.includes(target)) result.push(target) | ||
if (groupTag & 1) break | ||
} | ||
} | ||
return this.nextStateCache[state] = result | ||
} | ||
/// @internal | ||
@@ -672,13 +718,17 @@ overrides(token: number, prev: number) { | ||
let doneStart = false, doneEnd = false, fragile = node.type.id == Term.Err | ||
if (!fragile) node.iterate(0, node.length, type => { | ||
return doneStart || (type.id == Term.Err ? fragile = doneStart = true : undefined) | ||
}, type => { | ||
doneStart = true | ||
if (!fragile) node.iterate({ | ||
enter(type) { | ||
return doneStart || (type.id == Term.Err ? fragile = doneStart = true : undefined) | ||
}, | ||
leave(type) { doneStart = true } | ||
}) | ||
if (!fragile) node.iterate(node.length, 0, type => { | ||
return doneEnd || (type.id == Term.Err ? fragile = doneEnd = true : undefined) | ||
}, type => { | ||
doneEnd = true | ||
if (!fragile) node.iterate({ | ||
from: node.length, | ||
to: 0, | ||
enter(type) { | ||
return doneEnd || (type.id == Term.Err ? fragile = doneEnd = true : undefined) | ||
}, | ||
leave(type) { doneEnd = true } | ||
}) | ||
return fragile | ||
} |
102
src/stack.ts
@@ -8,5 +8,22 @@ import {Action, Term, StateFlag, ParseState} from "./constants" | ||
Unit = 100, | ||
// Limits in between which stacks are less agressively pruned | ||
Stabilizing = 50, | ||
Wild = 150 | ||
// Badness at which we disallow adding a stack if another stack | ||
// shares its top state and position. | ||
Deduplicate = 200, | ||
// The maximum amount of active stacks at which recovery actions are | ||
// applied | ||
MaxRecoverStacks = 25, | ||
// If badness reaches this level (and there are sibling stacks), | ||
// don't recover. | ||
TooBadToRecover = 500, | ||
// If the best sibling is this amount better than the current stack, | ||
// don't apply recovery. | ||
RecoverSiblingFactor = 3, | ||
// Constant used to prune stacks that run error-free alongside each | ||
// other for too long | ||
MaxParallelBufferLength = 800 | ||
} | ||
@@ -144,3 +161,3 @@ | ||
storeNode(term: number, start: number, end: number, size = 4, isReduce = false) { | ||
if (term == Term.Err) { // Try to omit superfluous error nodes | ||
if (term == Term.Err) { // Try to omit/merge adjacent error nodes | ||
let cur: Stack | null = this, top = this.buffer.length | ||
@@ -151,4 +168,6 @@ if (top == 0 && cur.parent) { | ||
} | ||
if (top > 0 && cur.buffer[top - 4] == Term.Err && cur.buffer[top - 1] > -1 && | ||
(start == end || cur.buffer[top - 2] >= start)) return | ||
if (top > 0 && cur.buffer[top - 4] == Term.Err && cur.buffer[top - 1] > -1) { | ||
if (start == end) return | ||
if (cur.buffer[top - 2] >= start) { cur.buffer[top - 2] = end; return } | ||
} | ||
} | ||
@@ -167,3 +186,3 @@ | ||
index -= 4 | ||
size -= 4 | ||
if (size > 4) size -= 4 | ||
} | ||
@@ -245,3 +264,3 @@ this.buffer[index] = term | ||
this.storeNode(Term.Err, this.pos, nextEnd, isNode ? 8 : 4) | ||
this.pos = nextEnd | ||
this.pos = this.reducePos = nextEnd | ||
this.badness += Badness.Unit | ||
@@ -298,46 +317,23 @@ } | ||
// Scan for a state that has either a direct action or a recovery | ||
// action for next, without actually building up a new stack | ||
// Apply up to Recover.MaxNext recovery actions that conceptually | ||
// inserts some missing token or rule. | ||
/// @internal | ||
canRecover(next: number) { | ||
let visited: number[] | null = null, parser = this.cx.parser | ||
for (let sim = new SimulatedStack(this), i = 0;; i++) { | ||
if (parser.hasAction(sim.top, next) || parser.getRecover(sim.top, next) != 0) return true | ||
// Find a way to reduce from here | ||
let reduce = parser.anyReduce(sim.top) | ||
if (reduce == 0 && ((reduce = this.cx.parser.stateSlot(sim.top, ParseState.ForcedReduce)) & Action.ReduceFlag) == 0) return false | ||
sim.reduce(reduce) | ||
if (i > 10) { | ||
// Guard against getting stuck in a cycle | ||
if (!visited) visited = [] | ||
else if (i == 100 || visited.includes(sim.top)) return false | ||
visited.push(sim.top) | ||
} | ||
recoverByInsert(next: number): Stack[] { | ||
let nextStates = this.cx.parser.nextStates(this.state) | ||
if (nextStates.length > Recover.MaxNext) { | ||
let best = nextStates.filter(s => s != this.state && this.cx.parser.hasAction(s, next)) | ||
for (let i = 0; best.length < Recover.MaxNext && i < nextStates.length; i++) | ||
if (!best.includes(nextStates[i])) best.push(nextStates[i]) | ||
nextStates = best | ||
} | ||
} | ||
// Try to apply a recovery action that conceptually | ||
// inserts some missing content and syncs back to a state that will | ||
// match `next`. If it finds one, it'll return a new stack with the | ||
// insertion applied. If not, it'll return null. | ||
/// @internal | ||
recoverByInsert(next: number): Stack | null { | ||
if (!this.canRecover(next)) return null | ||
// Now that we know there's a recovery to be found, run the | ||
// reduces again, the expensive way, updating the stack | ||
let result = this.split(), parser = this.cx.parser | ||
result.reducePos = result.pos | ||
result.badness += Badness.Unit | ||
for (;;) { | ||
for (;;) { | ||
if (parser.hasAction(result.state, next)) return result | ||
let recover = parser.getRecover(result.state, next) | ||
if (!recover) break | ||
result.pushState(recover, result.pos) | ||
result.storeNode(Term.Err, result.reducePos, result.reducePos, 4, true) | ||
} | ||
result.forceReduce(false) | ||
let result: Stack[] = [] | ||
for (let i = 0; i < nextStates.length && result.length < Recover.MaxNext; i++) { | ||
if (nextStates[i] == this.state) continue | ||
let stack = this.split() | ||
stack.storeNode(Term.Err, stack.pos, stack.pos, 4, true) | ||
stack.pushState(nextStates[i], this.pos) | ||
stack.badness += Badness.Unit | ||
result.push(stack) | ||
} | ||
return result | ||
} | ||
@@ -348,5 +344,5 @@ | ||
/// @internal | ||
forceReduce(countBadness = true) { | ||
forceReduce() { | ||
let reduce = this.cx.parser.anyReduce(this.state) | ||
if (reduce == 0) { | ||
if ((reduce >> Action.ReduceDepthShift) == 0) { // Don't use 0 or a zero-depth reduction | ||
reduce = this.cx.parser.stateSlot(this.state, ParseState.ForcedReduce) | ||
@@ -376,2 +372,6 @@ if ((reduce & Action.ReduceFlag) == 0) return false | ||
const enum Recover { | ||
MaxNext = 4 | ||
} | ||
// Used to cheaply run some reductions to scan ahead without mutating | ||
@@ -378,0 +378,0 @@ // an entire stack |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
1974310
227
3007
+ Addedlezer-tree@0.4.0(transitive)
- Removedlezer-tree@0.3.0(transitive)
Updatedlezer-tree@^0.4.0