Big News: Socket raises $60M Series C at a $1B valuation to secure software supply chains for AI-driven development.Announcement
Sign In

@amritk/yaml

Package Overview
Dependencies
Maintainers
1
Versions
4
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@amritk/yaml - npm Package Compare versions

Comparing version
0.1.1
to
0.2.0
+0
-1
dist/guards.d.ts

@@ -12,2 +12,1 @@ import type { YamlAlias, YamlMap, YamlPair, YamlScalar, YamlSeq } from './types.js';

export declare const isPair: (node: unknown) => node is YamlPair;
//# sourceMappingURL=guards.d.ts.map
+1
-2

@@ -5,4 +5,3 @@ export { isAlias, isMap, isPair, isScalar, isSeq } from './guards.js';

export { parse } from './parse.js';
export { parseDocument } from './parse-document.js';
export { parseAllDocuments, parseDocument } from './parse-document.js';
export type { ParseOptions, ScalarStyle, YamlAlias, YamlDocument, YamlError, YamlErrorKind, YamlMap, YamlNode, YamlPair, YamlScalar, YamlSeq, } from './types.js';
//# sourceMappingURL=index.d.ts.map

@@ -5,2 +5,2 @@ export { isAlias, isMap, isPair, isScalar, isSeq } from './guards.js';

export { parse } from './parse.js';
export { parseDocument } from './parse-document.js';
export { parseAllDocuments, parseDocument } from './parse-document.js';

@@ -19,2 +19,1 @@ /** A resolved source position. Lines and columns are 1-based, the YAML convention. */

export declare const lineCounter: (source: string) => LineCounter;
//# sourceMappingURL=line-counter.d.ts.map

@@ -14,2 +14,1 @@ import type { YamlNode } from './types.js';

export declare const nodeAtPath: (root: YamlNode | null, path: NodePath, closest?: boolean) => YamlNode | undefined;
//# sourceMappingURL=node-at-path.d.ts.map
import type { ParseOptions, YamlDocument } from './types.js';
/**
* Parses a YAML document into a node tree with source ranges, collected
* problems, and a lazy `toJS` projection.
* problems, and a lazy `toJS` projection. Only the first document of a stream is
* read; use {@link parseAllDocuments} for multi-document (`---`-separated) input.
*/
export declare const parseDocument: (source: string, options?: ParseOptions) => YamlDocument;
//# sourceMappingURL=parse-document.d.ts.map
/**
* Parses a multi-document YAML stream into one {@link YamlDocument} per `---`
* separated document. Each document gets its own anchors and problem lists. An
* empty stream yields an empty array; an explicit bare `---` yields one
* null-contents document.
*
* The single-document hot path is untouched: this is a thin outer loop that only
* does extra work once a real document boundary appears.
*/
export declare const parseAllDocuments: (source: string, options?: ParseOptions) => YamlDocument[];

@@ -34,2 +34,5 @@ import { resolveDoubleQuoted, resolvePlainValue, resolveSingleQuoted } from './resolve-scalar.js';

const GT = 62; // >
const QUESTION = 63; // ?
const DOT = 46; // .
const PERCENT = 37; // %
// The common case is a value with no anchor/tag — share one frozen object so

@@ -39,2 +42,13 @@ // `scanProps` allocates nothing on the hot path.

const isSpace = (c) => c === SPACE || c === TAB;
/**
* True when the character at `after` ends a `?`/`:` introducer — whitespace, a
* line break, or end of input. This is what distinguishes the explicit-key
* `? ` / `: ` tokens from an ordinary scalar that merely starts with `?`/`:`.
*/
const introducerBoundary = (src, after, len) => {
if (after >= len)
return true;
const c = src.charCodeAt(after);
return c === SPACE || c === TAB || c === NL || c === CR;
};
/** Offset just past the next line break (or end of input). */

@@ -48,2 +62,15 @@ const nextLineStart = (src, from, len) => {

/**
* True when the three characters at `i` are a document marker — `---` or `...` —
* standing alone (followed by whitespace or end of line). `src.charCodeAt(i)`
* decides which marker, so a caller that already knows the first char can gate
* the call and pay nothing on the common path.
*/
const isDocMarker = (src, i, len) => {
const c = src.charCodeAt(i);
if ((c !== DASH && c !== DOT) || src.charCodeAt(i + 1) !== c || src.charCodeAt(i + 2) !== c)
return false;
const n = src.charCodeAt(i + 3);
return i + 3 >= len || n === SPACE || n === TAB || n === NL || n === CR;
};
/**
* Advances the cursor to the start of the next line with real content, skipping

@@ -67,2 +94,25 @@ * blank lines and full-line comments. Leaves `state.pos` parked at the start of

}
if (c === TAB) {
// Cold path: a tab sits in the indentation whitespace. YAML 1.2 forbids
// tabs for indentation, so skip the run of tabs/spaces to find the real
// content, then — only once we know content actually follows (a tab-only
// line is just blank) — record a single error and keep parsing.
let j = i;
while (j < len && (src.charCodeAt(j) === TAB || src.charCodeAt(j) === SPACE))
j++;
const cj = src.charCodeAt(j);
if (j >= len || cj === NL || cj === CR || cj === HASH) {
p = nextLineStart(src, j, len);
continue;
}
if (state.tabReportedAt !== p) {
pushError(state, 'TAB_INDENT', 'Tabs cannot be used for indentation', i, j);
state.tabReportedAt = p;
}
state.pos = p;
line.eof = false;
line.indent = j - p;
line.contentPos = j;
return line;
}
state.pos = p;

@@ -323,2 +373,7 @@ line.eof = false;

break;
// Only a top-level scalar (`parentIndent < 0`) can sit at column 0 alongside
// a `---`/`...` marker; for nested scalars the indent test above already
// stopped us, so this short-circuits to a single comparison off the hot path.
if (parentIndent < 0 && (c === DASH || c === DOT) && isDocMarker(src, i, len))
break;
const lineEnd = plainLineEnd(src, i, len);

@@ -368,2 +423,41 @@ if (!segments)

};
/**
* Folds the interior lines of a `>` block scalar (indent already stripped, so
* more-indented lines keep their extra leading spaces). Per YAML 1.2 line
* folding: a break between two normal (non-empty, non-more-indented) lines folds
* to a space; any break adjacent to a more-indented line stays literal; and a
* run of `p` blank lines yields `p` newlines between two normal lines but `p + 1`
* when either neighbour is more-indented (the entering break is only trimmed
* when it would otherwise have folded to a space).
*/
const foldBlockFolded = (lines) => {
let out = '';
let started = false;
let prevMore = false;
let pendingBlanks = 0;
for (const line of lines) {
// `scanBlockScalar` stores a genuinely empty line as `''` and a
// whitespace-only line that reaches past the block indent as the leftover
// spaces — the latter is a more-indented content line, not a fold blank.
if (line === '') {
pendingBlanks++;
continue;
}
const curMore = line.charCodeAt(0) === SPACE;
if (!started) {
// Blank lines before the first content survive as leading line breaks.
out = '\n'.repeat(pendingBlanks) + line;
started = true;
}
else if (pendingBlanks > 0) {
out += '\n'.repeat(prevMore || curMore ? pendingBlanks + 1 : pendingBlanks) + line;
}
else {
out += (prevMore || curMore ? '\n' : ' ') + line;
}
prevMore = curMore;
pendingBlanks = 0;
}
return out;
};
/** Reads a `|` literal or `>` folded block scalar with chomping and indent indicators. */

@@ -429,3 +523,3 @@ const scanBlockScalar = (state, parentIndent) => {

}
const body = folded ? foldSegments(lines) : lines.join('\n');
const body = folded ? foldBlockFolded(lines) : lines.join('\n');
let value = body;

@@ -522,4 +616,19 @@ if (chomp === 'strip')

}
items.push(parseFlowNode(state));
const item = parseFlowNode(state);
skipFlowWs(state);
if (state.src.charCodeAt(state.pos) === COLON) {
// `[ key: value ]` — an implicit single-pair mapping as a sequence entry
// (the shape `!!omap` is written in). Only reached when an item is actually
// followed by a colon, so plain `[a, b]` sequences pay nothing extra.
state.pos++;
skipFlowWs(state);
const vc = state.src.charCodeAt(state.pos);
const value = vc === COMMA || vc === RBRACKET || state.pos >= state.len ? null : parseFlowNode(state);
const pair = { kind: 'pair', key: item, value, start: item.start, end: value ? value.end : item.end };
items.push({ kind: 'map', items: [pair], start: item.start, end: pair.end });
skipFlowWs(state);
}
else {
items.push(item);
}
const sep = state.src.charCodeAt(state.pos);

@@ -613,2 +722,31 @@ if (sep === COMMA)

};
/**
* Parses the node that follows an explicit `?` or `:` introducer: either an
* inline value on the same line, or a block node on the deeper-indented lines
* below. Mirrors the implicit `key:` value handling but is reached only on the
* cold explicit-entry path, so the hot block-mapping loop stays untouched.
*/
const parseValueOrChild = (state, indent) => {
const { src, len } = state;
skipInlineSpaces(state);
if (atLineEnd(state)) {
finishLine(state);
const child = peekLine(state);
if (!child.eof && child.indent > indent) {
state.pos = child.contentPos;
return parseNode(state, child.indent);
}
if (!child.eof && child.indent === indent) {
const cc = src.charCodeAt(child.contentPos);
if (cc === DASH && (child.contentPos + 1 >= len || isSpace(src.charCodeAt(child.contentPos + 1)))) {
state.pos = child.contentPos;
return parseBlockSeq(state, indent);
}
}
return null;
}
const node = parseInlineValue(state, indent);
finishLineIfMidLine(state);
return node;
};
const keyText = (node) => {

@@ -640,6 +778,8 @@ if (node.kind === 'scalar') {

let colon;
let explicit;
if (firstEntry) {
// `parseNode` already located this colon to decide we are a mapping; reuse
// it instead of re-scanning the first line.
// `parseNode` already classified this line: a non-negative `firstColon` is
// an inline `key:`; a negative one signals an explicit `?` introducer.
colon = firstColon;
explicit = firstColon < 0;
}

@@ -656,51 +796,87 @@ else {

colon = findKeyColon(src, contentPos, len);
if (colon < 0)
break;
if (colon < 0) {
// No inline colon: either an explicit `? key` entry or the end of the map.
if (c === QUESTION && introducerBoundary(src, contentPos + 1, len))
explicit = true;
else
break;
}
else {
explicit = false;
}
}
firstEntry = false;
const lineContentPos = contentPos;
state.pos = contentPos;
let key;
const kc = src.charCodeAt(state.pos);
if (kc === DQUOTE || kc === SQUOTE) {
key = scanQuoted(state, kc);
}
else {
let end = colon;
while (end > lineContentPos && isSpace(src.charCodeAt(end - 1)))
end--;
const text = src.slice(lineContentPos, end);
key = {
let value = null;
if (explicit) {
// `? key` (inline or a block key on the deeper lines below), optionally
// followed by a `: value` line at the same indent. An absent `: value`
// line leaves the value null.
const qStart = contentPos;
state.pos = contentPos + 1;
key = parseValueOrChild(state, indent) ?? {
kind: 'scalar',
value: resolvePlainValue(text),
source: text,
value: null,
source: '',
style: 'plain',
start: lineContentPos,
end,
start: qStart + 1,
end: qStart + 1,
};
const vline = peekLine(state);
if (!vline.eof &&
vline.indent === indent &&
src.charCodeAt(vline.contentPos) === COLON &&
introducerBoundary(src, vline.contentPos + 1, len)) {
state.pos = vline.contentPos + 1;
value = parseValueOrChild(state, indent);
}
}
state.pos = colon + 1;
skipInlineSpaces(state);
let value = null;
if (atLineEnd(state)) {
// Value lives on the following lines (or is empty).
finishLine(state);
const child = peekLine(state);
if (!child.eof && child.indent > indent) {
state.pos = child.contentPos;
value = parseNode(state, child.indent);
else {
const lineContentPos = contentPos;
state.pos = contentPos;
const kc = src.charCodeAt(state.pos);
if (kc === DQUOTE || kc === SQUOTE) {
key = scanQuoted(state, kc);
}
else if (!child.eof && child.indent === indent) {
const cc = src.charCodeAt(child.contentPos);
if (cc === DASH && (child.contentPos + 1 >= len || isSpace(src.charCodeAt(child.contentPos + 1)))) {
else {
let end = colon;
while (end > lineContentPos && isSpace(src.charCodeAt(end - 1)))
end--;
const text = src.slice(lineContentPos, end);
key = {
kind: 'scalar',
value: resolvePlainValue(text),
source: text,
style: 'plain',
start: lineContentPos,
end,
};
}
state.pos = colon + 1;
skipInlineSpaces(state);
if (atLineEnd(state)) {
// Value lives on the following lines (or is empty).
finishLine(state);
const child = peekLine(state);
if (!child.eof && child.indent > indent) {
state.pos = child.contentPos;
value = parseBlockSeq(state, indent);
value = parseNode(state, child.indent);
}
else if (!child.eof && child.indent === indent) {
const cc = src.charCodeAt(child.contentPos);
if (cc === DASH && (child.contentPos + 1 >= len || isSpace(src.charCodeAt(child.contentPos + 1)))) {
state.pos = child.contentPos;
value = parseBlockSeq(state, indent);
}
}
}
else {
value = parseInlineValue(state, indent);
finishLineIfMidLine(state);
}
}
else {
value = parseInlineValue(state, indent);
finishLineIfMidLine(state);
}
if (state.uniqueKeys) {
// Duplicate-key tracking. Complex (map/seq) keys have no stable text form, so
// we skip them rather than collapse every one to the same bucket and falsely
// report a duplicate.
if (state.uniqueKeys && (key.kind === 'scalar' || key.kind === 'alias')) {
const text = keyText(key);

@@ -817,2 +993,7 @@ if (seen) {

return attachProps(parseBlockMap(state, indent, colon), props, state);
// An explicit `? key` introducer also starts a mapping; `-1` tells
// `parseBlockMap` the first entry has no inline colon to reuse.
if (cc === QUESTION && introducerBoundary(src, state.pos + 1, len)) {
return attachProps(parseBlockMap(state, indent, -1), props, state);
}
if (cc === DQUOTE || cc === SQUOTE)

@@ -848,2 +1029,72 @@ return attachProps(scanQuoted(state, cc), props, state);

};
/**
* Decodes a `!!binary` base64 payload to bytes without a dependency. `atob`
* handles the decode; surrounding whitespace (block scalars wrap base64 across
* lines) is stripped first. Returns `null` on malformed input so the caller can
* fall back to the raw value rather than throw.
*/
const decodeBase64 = (text) => {
try {
const binary = atob(text.replace(/\s+/g, ''));
const bytes = new Uint8Array(binary.length);
for (let i = 0; i < binary.length; i++)
bytes[i] = binary.charCodeAt(i);
return bytes;
}
catch {
return null;
}
};
/**
* Coerces a scalar's value to honor a `!!`-style tag. Reached only when a scalar
* actually carries a tag (rare), so the untagged hot path pays just one
* `node.tag !== undefined` check. Beyond the core schema (`str`/`int`/`float`/
* `bool`/`null`) we honor the common extended tags `binary` (→ bytes) and
* `timestamp` (→ `Date`), matching `yaml` (eemeli). Note this is *explicit*
* coercion only: an untagged ISO string still resolves to a string, so the
* implicit-timestamp surprise that makes a JSON superset lossy never happens.
* Unknown/custom tags pass through with the value unchanged — the tag stays on
* the node for callers that want it.
*/
const applyScalarTag = (node) => {
const v = node.value;
switch (node.tag) {
case 'binary': {
const bytes = decodeBase64(typeof v === 'string' ? v : node.source);
return bytes ?? v;
}
case 'timestamp': {
const date = new Date((typeof v === 'string' ? v : node.source).trim());
return Number.isNaN(date.getTime()) ? v : date;
}
case 'str':
// For a plain scalar the raw source *is* the string (so `!!str 1.50` keeps
// its trailing zero); quoted/block styles already resolved to a string.
return node.style === 'plain' ? node.source : typeof v === 'string' ? v : v === null ? '' : String(v);
case 'null':
return null;
case 'bool': {
const s = node.source;
if (s === 'true' || s === 'True' || s === 'TRUE')
return true;
if (s === 'false' || s === 'False' || s === 'FALSE')
return false;
return v;
}
case 'int': {
if (typeof v === 'number')
return Math.trunc(v);
const n = Number.parseInt(typeof v === 'string' ? v : node.source, 10);
return Number.isNaN(n) ? v : n;
}
case 'float': {
if (typeof v === 'number')
return v;
const n = Number.parseFloat(typeof v === 'string' ? v : node.source);
return Number.isNaN(n) ? v : n;
}
default:
return v;
}
};
const toJsValue = (node, anchors, merge) => {

@@ -853,3 +1104,3 @@ if (node === null)

if (node.kind === 'scalar')
return node.value;
return node.tag !== undefined ? applyScalarTag(node) : node.value;
if (node.kind === 'alias') {

@@ -866,2 +1117,7 @@ const target = anchors.get(node.source);

out[i] = toJsValue(items[i] ?? null, anchors, merge);
// `!!omap` is an ordered map written as a sequence of single-pair maps;
// collapse it to a `Map` to match `yaml` (eemeli). One `=== 'omap'` check
// per sequence keeps the untagged path effectively free.
if (node.tag === 'omap')
return toOmap(out);
return out;

@@ -882,4 +1138,25 @@ }

}
// `!!set` is a mapping whose keys are the members; project to a `Set`.
if (node.tag === 'set') {
const set = new Set();
for (let i = 0; i < items.length; i++) {
const pair = items[i];
if (pair)
set.add(toJsValue(pair.key, anchors, merge));
}
return set;
}
return obj;
};
/** Folds a `!!omap` sequence (single-pair maps) into an ordered `Map`. */
const toOmap = (items) => {
const map = new Map();
for (const item of items) {
if (item && typeof item === 'object') {
for (const [k, value] of Object.entries(item))
map.set(k, value);
}
}
return map;
};
/** Folds a `<<` merge value (a map or list of maps) into `target` without overriding existing keys. */

@@ -899,18 +1176,26 @@ const applyMerge = (target, value) => {

};
const newState = (source, options) => ({
src: source,
len: source.length,
pos: 0,
errors: [],
warnings: [],
anchors: new Map(),
uniqueKeys: options.uniqueKeys !== false,
merge: options.merge !== false,
tabReportedAt: -1,
line: { eof: false, indent: 0, contentPos: 0 },
});
/** Builds a document from the current `state`, closing over its anchors/problems. */
const finishDocument = (state, contents) => {
const { errors, warnings, anchors, merge } = state;
return { contents, errors, warnings, toJS: () => toJsValue(contents, anchors, merge) };
};
/**
* Parses a YAML document into a node tree with source ranges, collected
* problems, and a lazy `toJS` projection.
* problems, and a lazy `toJS` projection. Only the first document of a stream is
* read; use {@link parseAllDocuments} for multi-document (`---`-separated) input.
*/
export const parseDocument = (source, options = {}) => {
const state = {
src: source,
len: source.length,
pos: 0,
errors: [],
warnings: [],
anchors: new Map(),
uniqueKeys: options.uniqueKeys !== false,
merge: options.merge !== false,
line: { eof: false, indent: 0, contentPos: 0 },
};
const state = newState(source, options);
skipDocumentHead(state);

@@ -928,9 +1213,79 @@ const head = peekLine(state);

}
const { anchors, merge } = state;
return {
contents,
errors: state.errors,
warnings: state.warnings,
toJS: () => toJsValue(contents, anchors, merge),
};
return finishDocument(state, contents);
};
/**
* Consumes the head of one document in a stream — any `%`-directives, `...`
* end markers of a preceding document, and a single `---` start marker. Returns
* whether a `---` start marker was consumed, which marks an explicit (possibly
* empty) document even when no body follows.
*/
const skipStreamHead = (state) => {
const { src, len } = state;
for (;;) {
const line = peekLine(state);
if (line.eof)
return false;
const p = line.contentPos;
const c = src.charCodeAt(p);
if (c === PERCENT) {
state.pos = nextLineStart(src, p, len);
continue;
}
if (c === DOT && isDocMarker(src, p, len)) {
state.pos = nextLineStart(src, p + 3, len);
continue;
}
if (c === DASH && isDocMarker(src, p, len)) {
state.pos = nextLineStart(src, p + 3, len);
return true;
}
return false;
}
};
/**
* Parses a multi-document YAML stream into one {@link YamlDocument} per `---`
* separated document. Each document gets its own anchors and problem lists. An
* empty stream yields an empty array; an explicit bare `---` yields one
* null-contents document.
*
* The single-document hot path is untouched: this is a thin outer loop that only
* does extra work once a real document boundary appears.
*/
export const parseAllDocuments = (source, options = {}) => {
const state = newState(source, options);
const { src, len } = state;
if (src.charCodeAt(0) === 0xfeff)
state.pos = 1;
const docs = [];
for (;;) {
const sawStart = skipStreamHead(state);
const line = peekLine(state);
let contents = null;
let bodyConsumed = false;
if (!line.eof) {
const p = line.contentPos;
const c = src.charCodeAt(p);
if (c === DASH && isDocMarker(src, p, len)) {
// The next document's start marker: the current document is empty. Leave
// the marker for the next iteration's `skipStreamHead` to consume.
}
else if (c === DOT && isDocMarker(src, p, len)) {
// A `...` end marker terminates this (empty) document; consume it.
state.pos = nextLineStart(src, p + 3, len);
}
else {
state.pos = p;
contents = parseNode(state, line.indent);
finishLineIfMidLine(state);
bodyConsumed = true;
}
}
if (!sawStart && !bodyConsumed)
break;
docs.push(finishDocument(state, contents));
state.errors = [];
state.warnings = [];
state.anchors = new Map();
}
return docs;
};

@@ -8,2 +8,1 @@ import type { ParseOptions } from './types.js';

export declare const parse: (source: string, options?: ParseOptions) => unknown;
//# sourceMappingURL=parse.d.ts.map

@@ -15,2 +15,1 @@ /**

export declare const resolveDoubleQuoted: (inner: string) => string;
//# sourceMappingURL=resolve-scalar.d.ts.map

@@ -101,2 +101,1 @@ /**

};
//# sourceMappingURL=types.d.ts.map
{
"name": "@amritk/yaml",
"version": "0.1.1",
"description": "A tiny, dependency-free YAML parser with exact source positions. Built for diagnostics: every node maps back to an exact line:column.",
"version": "0.2.0",
"description": "A fast, featherweight, zero-dependency YAML parser for OpenAPI tooling — with exact source positions (line:column) on every node.",
"module": "./dist/index.js",

@@ -31,4 +31,3 @@ "type": "module",

"files": [
"dist",
"src"
"dist"
],

@@ -46,3 +45,2 @@ "publishConfig": {

".": {
"development": "./src/index.ts",
"default": "./dist/index.js",

@@ -49,0 +47,0 @@ "types": "./dist/index.d.ts"

@@ -5,3 +5,3 @@ <div align="center">

**A tiny, dependency-free YAML parser with exact source positions — built for diagnostics.**
**The featherweight YAML parser built for OpenAPI tooling — fast, zero-dependency, and it never loses track of where a value came from, down to the column.**

@@ -25,7 +25,11 @@ ![status](https://img.shields.io/badge/status-pre--alpha-ef4444?style=flat-square)&nbsp;

- **vs [`yaml`](https://www.npmjs.com/package/yaml) (eemeli)** — the only other parser here that also tracks source positions — building the source-mapped tree is **~25–31× faster**, and the bundle is **~7.3× smaller**.
- **vs [`js-yaml`](https://www.npmjs.com/package/js-yaml)** — which has **no concept of source positions** — parsing straight to data is **~1.8–2× faster**, the bundle is **~2.8× smaller**, and we *also* hand you the positioned tree it cannot produce.
- **vs [`yaml`](https://www.npmjs.com/package/yaml) (eemeli)** — the only other parser here that also tracks source positions — building the source-mapped tree is **~25–31× faster**, and the bundle is **~6× smaller**.
- **vs [`js-yaml`](https://www.npmjs.com/package/js-yaml)** — which has **no concept of source positions** — parsing straight to data is **~1.8–2× faster**, the bundle is **~2.3× smaller**, and we *also* hand you the positioned tree it cannot produce.
It targets the YAML that real configuration and OpenAPI documents use: block and flow collections, all three quoting styles, literal/folded block scalars with chomping, comments, anchors, aliases, and merge keys. Scalars resolve via the YAML 1.2 **core schema**, so an OpenAPI `version: 1.0.0` stays the string `"1.0.0"` instead of turning into a number.
It targets the YAML that real configuration and OpenAPI documents use: block and flow collections, all three quoting styles, literal/folded block scalars with chomping, comments, anchors, aliases, merge keys, explicit `? key` / `: value` entries, and multi-document (`---`-separated) streams. Scalars resolve via the YAML 1.2 **core schema** — so an OpenAPI `version: 1.0.0` stays the string `"1.0.0"` instead of turning into a number — and the core-schema `!!` tags (`!!str`, `!!int`, `!!float`, `!!bool`, `!!null`) coerce a value when written.
**OpenAPI compatibility.** OpenAPI restricts its YAML to the JSON-compatible subset — *"tags MUST be limited to those allowed by the JSON Schema ruleset"* and map keys must be scalar strings — and that subset is exactly what's covered above. Keeping `version: 1.0.0` a string (rather than a float) and *not* coercing untagged ISO dates into `Date`s is the correct, round-trip-safe behavior an OpenAPI tool needs.
Beyond that JSON-compatible core, the common extended tags resolve too, for general config files (Kubernetes, CI, Ansible) that use them — matching `yaml` (eemeli): `!!binary` → `Uint8Array`, `!!timestamp` → `Date`, `!!set` → `Set`, and `!!omap` → `Map`. These fire only on an *explicit* tag, so they never change how a tagless OpenAPI document parses. (A conformant OpenAPI spec won't contain them.)
---

@@ -80,2 +84,14 @@

### Parse a multi-document stream
```ts
import { parseAllDocuments } from '@amritk/yaml'
const docs = parseAllDocuments('kind: Service\n---\nkind: Deployment\n')
docs.map((d) => d.toJS())
// → [{ kind: 'Service' }, { kind: 'Deployment' }]
```
Each document gets its own `contents`, `errors`, `warnings`, and anchor scope (an alias in one document does not resolve an anchor declared in another). `parseDocument` reads only the first document of a stream.
### Walk the tree

@@ -104,2 +120,3 @@

| `parseDocument(source, options?)` | Parse to `{ contents, errors, warnings, toJS() }` where every node carries `start`/`end` source offsets. |
| `parseAllDocuments(source, options?)` | Parse a multi-document (`---`-separated) stream to an array of documents, each with its own anchors and problems. |
| `nodeAtPath(root, path, closest?)` | Resolve a JSON path to its node (carrying `start`/`end`), optionally falling back to the closest ancestor. |

@@ -140,5 +157,5 @@ | `lineCounter(source)` | Build an `offset → { line, col }` mapper (1-based). |

| --- | --- | --- |
| **@amritk/yaml** | **4.8 KB** | — |
| yaml | 35.6 KB | 7.3× larger |
| js-yaml | 13.5 KB | 2.8× larger |
| **@amritk/yaml** | **6.0 KB** | — |
| yaml | 35.6 KB | 5.9× larger |
| js-yaml | 13.5 KB | 2.3× larger |

@@ -151,4 +168,53 @@ Correctness is pinned to `yaml` by a differential test suite (`src/differential.test.ts`) that parses a battery of documents — including full OpenAPI specs — and asserts byte-identical data output. Where `js-yaml` diverges (its `!!timestamp` type turns ISO strings into `Date`s, which is wrong for a JSON superset), we instead agree with `yaml`.

The parser covers the YAML that configuration and OpenAPI documents use in the wild. A few exotic YAML 1.2 constructs are intentionally out of scope to stay tiny and fast: explicit `? key` mapping entries, multi-document streams (only the first document is read), custom/global tags beyond `!!`-style hints, and non-space indentation. If you need full YAML 1.2 conformance, use `yaml`; if you need a small, fast, position-aware parser for diagnostics, use this.
This is **not** a fully conformant YAML 1.2 processor. It implements the subset
that real configuration and OpenAPI documents use, plus the YAML 1.2 **core
schema** for scalar typing. The exact boundaries:
### Supported
**Structure**
- Block mappings (`key: value`) and block sequences (`- item`), nested arbitrarily.
- Flow mappings `{ … }` and flow sequences `[ … ]`, including spanning multiple lines (split at token boundaries) and trailing commas.
- Implicit single-pair entries inside a flow sequence (`[ key: value ]`).
- Explicit `? key` / `: value` entries, including block and complex (map/seq) keys.
**Scalars**
- Plain (unquoted), single-quoted (`''` escape), and double-quoted scalars (full escapes — `\n`, `\t`, `\xNN`, `\uNNNN`, `\UNNNNNNNN` — line continuation, and folding).
- Literal `|` and folded `>` block scalars with chomping (`-` strip, `+` keep, default clip) and explicit indentation indicators.
- Multi-line plain scalars (folded) in block context.
**Type resolution (YAML 1.2 core schema)**
- `null` (`null`/`Null`/`NULL`/`~`/empty), booleans (`true`/`false` and case variants), integers (decimal, `0x` hex, `0o` octal), floats (including `.inf`/`-.inf`/`.nan`); everything else is a string. So `version: 1.0.0` stays the string `"1.0.0"`.
**Tags**
- Core scalar tags (the JSON-compatible set OpenAPI allows): `!!str`, `!!int`, `!!float`, `!!bool`, `!!null`.
- Extended tags, for general config files beyond the OpenAPI subset: `!!binary` → `Uint8Array`, `!!timestamp` → `Date`, `!!set` → `Set`, `!!omap` → `Map` (matching `yaml`). A conformant OpenAPI document won't use these.
- Any other tag is **captured on the node** (readable via `node.tag`) and its value passed through unchanged.
**References, documents, and trivia**
- Anchors (`&name`) and aliases (`*name`); `<<` merge keys (toggle with the `merge` option).
- Multi-document streams (`---` / `...`) via `parseAllDocuments`, each document with its own anchor scope and problem list.
- Comments (full-line and inline), blank lines, and a leading byte-order mark.
**Diagnostics**
- Exact `[start, end)` source span on every node, duplicate-key detection (`DUPLICATE_KEY`), unterminated flow collections (`UNTERMINATED_FLOW`), and tab-in-indentation (`TAB_INDENT`).
### Not supported
- **Tab indentation.** Forbidden by YAML 1.2; reported as a `TAB_INDENT` error rather than parsed. (Tabs *after* content — e.g. separating a key from its value — are fine.)
- **Directive processing.** `%YAML` and `%TAG` lines are skipped, not applied. There is no resolution of named tag handles (`!handle!suffix`) or verbatim tags (`!<uri>`); every `!`/`!!` prefix is stripped and only the core/extended tag names above are interpreted, so a local `!foo` and `!!foo` are treated alike.
- **Schema selection.** Always the 1.2 core schema — no JSON, failsafe, or YAML 1.1 schema switch.
- **YAML 1.1-only scalar forms.** `yes`/`no`/`on`/`off` booleans, sexagesimal numbers (`1:30:00`), and underscore digit groups (`1_000`) stay strings, per the 1.2 core schema.
- **Implicit timestamps.** An untagged ISO date string stays a string; only an explicit `!!timestamp` produces a `Date`.
- **Multi-line plain scalars inside flow collections.** A plain scalar that *wraps across lines* within `[ … ]` / `{ … }` is not folded (the collection itself may still span lines at token boundaries).
- **Reserved indicators.** A plain scalar beginning with the reserved `@` or `` ` `` is accepted as text rather than rejected.
If you need full YAML 1.2 conformance, use [`yaml`](https://www.npmjs.com/package/yaml). If you need a small, fast, position-aware parser for diagnostics, use this.
---

@@ -155,0 +221,0 @@

{"version":3,"file":"guards.d.ts","sourceRoot":"","sources":["../src/guards.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,OAAO,EAAY,QAAQ,EAAE,UAAU,EAAE,OAAO,EAAE,MAAM,SAAS,CAAA;AAE1F;;;;GAIG;AAEH,eAAO,MAAM,QAAQ,SAAU,OAAO,KAAG,IAAI,IAAI,UACkC,CAAA;AAEnF,eAAO,MAAM,OAAO,SAAU,OAAO,KAAG,IAAI,IAAI,SACkC,CAAA;AAElF,eAAO,MAAM,KAAK,SAAU,OAAO,KAAG,IAAI,IAAI,OACkC,CAAA;AAEhF,eAAO,MAAM,KAAK,SAAU,OAAO,KAAG,IAAI,IAAI,OACkC,CAAA;AAEhF,eAAO,MAAM,MAAM,SAAU,OAAO,KAAG,IAAI,IAAI,QACkC,CAAA"}
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,UAAU,CAAA;AAClE,OAAO,EAAE,KAAK,WAAW,EAAE,KAAK,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAA;AAC5E,OAAO,EAAE,KAAK,QAAQ,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAA;AAC1D,OAAO,EAAE,KAAK,EAAE,MAAM,SAAS,CAAA;AAC/B,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAA;AAChD,YAAY,EACV,YAAY,EACZ,WAAW,EACX,SAAS,EACT,YAAY,EACZ,SAAS,EACT,aAAa,EACb,OAAO,EACP,QAAQ,EACR,QAAQ,EACR,UAAU,EACV,OAAO,GACR,MAAM,SAAS,CAAA"}
{"version":3,"file":"line-counter.d.ts","sourceRoot":"","sources":["../src/line-counter.ts"],"names":[],"mappings":"AAAA,sFAAsF;AACtF,MAAM,MAAM,OAAO,GAAG;IACpB,IAAI,EAAE,MAAM,CAAA;IACZ,GAAG,EAAE,MAAM,CAAA;CACZ,CAAA;AAED,MAAM,MAAM,WAAW,GAAG;IACxB,4DAA4D;IAC5D,OAAO,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAA;CACrC,CAAA;AAED;;;;;;;GAOG;AACH,eAAO,MAAM,WAAW,WAAY,MAAM,KAAG,WAqB5C,CAAA"}
{"version":3,"file":"node-at-path.d.ts","sourceRoot":"","sources":["../src/node-at-path.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAA;AAEvC,iFAAiF;AACjF,MAAM,MAAM,QAAQ,GAAG,SAAS,CAAC,MAAM,GAAG,MAAM,CAAC,EAAE,CAAA;AAEnD;;;;;;;;GAQG;AACH,eAAO,MAAM,UAAU,SAAU,QAAQ,GAAG,IAAI,QAAQ,QAAQ,wBAAoB,QAAQ,GAAG,SA2B9F,CAAA"}
{"version":3,"file":"parse-document.d.ts","sourceRoot":"","sources":["../src/parse-document.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,YAAY,EAAE,YAAY,EAA+D,MAAM,SAAS,CAAA;AAy1BtH;;;GAGG;AACH,eAAO,MAAM,aAAa,WAAY,MAAM,YAAW,YAAY,KAAQ,YAkC1E,CAAA"}
{"version":3,"file":"parse.d.ts","sourceRoot":"","sources":["../src/parse.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,SAAS,CAAA;AAE3C;;;;GAIG;AACH,eAAO,MAAM,KAAK,WAAY,MAAM,YAAY,YAAY,KAAG,OAAgD,CAAA"}
{"version":3,"file":"resolve-scalar.d.ts","sourceRoot":"","sources":["../src/resolve-scalar.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AA8BH,8EAA8E;AAC9E,eAAO,MAAM,iBAAiB,SAAU,MAAM,KAAG,MAAM,GAAG,MAAM,GAAG,OAAO,GAAG,IAoD5E,CAAA;AA4ED,oFAAoF;AACpF,eAAO,MAAM,mBAAmB,UAAW,MAAM,KAAG,MAGnD,CAAA;AAED,6FAA6F;AAC7F,eAAO,MAAM,mBAAmB,UAAW,MAAM,KAAG,MAgCnD,CAAA"}
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,oFAAoF;AACpF,MAAM,MAAM,WAAW,GAAG,OAAO,GAAG,QAAQ,GAAG,QAAQ,GAAG,eAAe,GAAG,cAAc,CAAA;AAE1F;;;;GAIG;AACH,MAAM,MAAM,UAAU,GAAG;IACvB,IAAI,EAAE,QAAQ,CAAA;IACd,KAAK,EAAE,MAAM,GAAG,MAAM,GAAG,OAAO,GAAG,IAAI,CAAA;IACvC,MAAM,EAAE,MAAM,CAAA;IACd,KAAK,EAAE,WAAW,CAAA;IAClB,8CAA8C;IAC9C,KAAK,EAAE,MAAM,CAAA;IACb,4CAA4C;IAC5C,GAAG,EAAE,MAAM,CAAA;IACX,mEAAmE;IACnE,GAAG,CAAC,EAAE,MAAM,CAAA;IACZ,wDAAwD;IACxD,MAAM,CAAC,EAAE,MAAM,CAAA;CAChB,CAAA;AAED,iFAAiF;AACjF,MAAM,MAAM,SAAS,GAAG;IACtB,IAAI,EAAE,OAAO,CAAA;IACb,sEAAsE;IACtE,MAAM,EAAE,MAAM,CAAA;IACd,KAAK,EAAE,MAAM,CAAA;IACb,GAAG,EAAE,MAAM,CAAA;CACZ,CAAA;AAED,yDAAyD;AACzD,MAAM,MAAM,QAAQ,GAAG;IACrB,IAAI,EAAE,MAAM,CAAA;IACZ,GAAG,EAAE,QAAQ,CAAA;IACb,iFAAiF;IACjF,KAAK,EAAE,QAAQ,GAAG,IAAI,CAAA;IACtB,KAAK,EAAE,MAAM,CAAA;IACb,GAAG,EAAE,MAAM,CAAA;CACZ,CAAA;AAED,sDAAsD;AACtD,MAAM,MAAM,OAAO,GAAG;IACpB,IAAI,EAAE,KAAK,CAAA;IACX,KAAK,EAAE,QAAQ,EAAE,CAAA;IACjB,KAAK,EAAE,MAAM,CAAA;IACb,GAAG,EAAE,MAAM,CAAA;IACX,GAAG,CAAC,EAAE,MAAM,CAAA;IACZ,MAAM,CAAC,EAAE,MAAM,CAAA;CAChB,CAAA;AAED,6CAA6C;AAC7C,MAAM,MAAM,OAAO,GAAG;IACpB,IAAI,EAAE,KAAK,CAAA;IACX,KAAK,EAAE,QAAQ,EAAE,CAAA;IACjB,KAAK,EAAE,MAAM,CAAA;IACb,GAAG,EAAE,MAAM,CAAA;IACX,GAAG,CAAC,EAAE,MAAM,CAAA;IACZ,MAAM,CAAC,EAAE,MAAM,CAAA;CAChB,CAAA;AAED,MAAM,MAAM,QAAQ,GAAG,UAAU,GAAG,SAAS,GAAG,OAAO,GAAG,OAAO,CAAA;AAEjE,iGAAiG;AACjG,MAAM,MAAM,aAAa,GAAG,OAAO,GAAG,SAAS,CAAA;AAE/C;;;;GAIG;AACH,MAAM,MAAM,SAAS,GAAG;IACtB,IAAI,EAAE,aAAa,CAAA;IACnB,8FAA8F;IAC9F,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,EAAE,MAAM,CAAA;IACb,GAAG,EAAE,MAAM,CAAA;CACZ,CAAA;AAED,oFAAoF;AACpF,MAAM,MAAM,YAAY,GAAG;IACzB,kDAAkD;IAClD,QAAQ,EAAE,QAAQ,GAAG,IAAI,CAAA;IACzB,MAAM,EAAE,SAAS,EAAE,CAAA;IACnB,QAAQ,EAAE,SAAS,EAAE,CAAA;IACrB,iFAAiF;IACjF,IAAI,EAAE,MAAM,OAAO,CAAA;CACpB,CAAA;AAED,MAAM,MAAM,YAAY,GAAG;IACzB;;;OAGG;IACH,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB;;;OAGG;IACH,KAAK,CAAC,EAAE,OAAO,CAAA;CAChB,CAAA"}
import { readFileSync } from 'node:fs'
import { describe, expect, it } from 'vitest'
import { parse as eemeli } from 'yaml'
import { FIXTURES } from '../bench/fixtures'
import { parse as ours } from './parse'
/**
* Real-world public specs vendored under `fixtures/` (see `fixtures/README.md`
* for provenance). Read from disk so the bytes stay identical to what the
* upstream publisher serves. `bench/fixtures.ts` stays synthetic; this is where
* we exercise the parser against documents we don't control.
*/
const VENDORED = ['digitalocean'] as const
const readVendored = (name: string): string =>
readFileSync(new URL(`../fixtures/${name}.yaml`, import.meta.url), 'utf8')
/**
* Differential tests against `yaml` (eemeli) — the reference parser the Loupe
* linter currently uses and the one that, like us, tracks source positions. For
* the YAML subset that real configuration and OpenAPI documents use, our plain
* data projection must match it exactly. Where we intentionally diverge from
* `js-yaml` (its `!!timestamp` type turns ISO strings into `Date`s, which is
* wrong for a JSON superset) we instead agree with `yaml`.
*/
const CASES: string[] = [
// Scalars and core-schema typing.
'n: 42\nf: 3.14\nneg: -5\nhex: 0x1F\nb1: true\nb2: false\nnil: null\ntilde: ~\n',
'version: 1.0.0\nopenapi: 3.1.0\nname: hello world\n',
'empty:\nafter: 1\n',
// Sequences in both styles.
'- a\n- b\n- c\n',
'items:\n- 1\n- 2\n',
'matrix:\n - [1, 2]\n - [3, 4]\n',
// Mappings nested in sequences and vice versa.
'- name: a\n tags: [x, y]\n- name: b\n tags: []\n',
// Flow collections.
'a: [1, 2, 3]\nb: {x: 1, y: 2}\nc: []\nd: {}\n',
'nested: {list: [1, {deep: true}], n: null}\n',
// Quoting and escapes.
`s: 'single ''quote'''\nd: "double \\"quote\\" and \\n newline"\n`,
'q: "https://example.com/a?b=c&d=e"\n',
// Multi-line flow scalars — folding edge cases that real specs (GitHub's
// OpenAPI) hit: trailing whitespace on the closing line is literal content,
// and a blank-line run reaching the close yields one fewer newline.
"s: 'first line\n second line. '\n",
's: "first line\n second line. "\n',
"s: 'para one\n still one\n\n '\n",
's: "para one\n still one\n\n "\n',
"s: 'a\n\n\n '\nt: 'a\n b\n\n c'\n",
// Block scalars with chomping.
'text: |\n line one\n line two\n',
'text: |-\n no trailing\n',
'folded: >\n one\n two\n three\n',
// Comments scattered through the document.
'# leading\na: 1 # inline\n# middle\nb: 2\n',
// Anchors, aliases, and merge keys.
'a: &x\n k: 1\nb: *x\n',
'base: &b {p: 1, q: 2}\nuse:\n <<: *b\n q: 3\n',
// Realistic documents.
FIXTURES.small,
FIXTURES.medium,
FIXTURES.large,
]
describe('differential', () => {
for (const [index, source] of CASES.entries()) {
const label = source.length > 40 ? `${source.slice(0, 37).replace(/\n/g, '\\n')}…` : source.replace(/\n/g, '\\n')
it(`matches yaml for case ${index}: ${label}`, () => {
// `yaml` defaults merge keys off; we default them on, so enable them here
// to line the two up on the `<<` case.
expect(ours(source)).toEqual(eemeli(source, { merge: true }))
})
}
// Large, real-world public specs we don't control — the documents this
// parser actually has to survive in the wild.
for (const name of VENDORED) {
it(`matches yaml for vendored spec: ${name}`, () => {
const source = readVendored(name)
expect(ours(source)).toEqual(eemeli(source, { merge: true }))
})
}
})
import { describe, expect, it } from 'vitest'
import { isAlias, isMap, isPair, isScalar, isSeq } from './guards'
import { parseDocument } from './parse-document'
describe('guards', () => {
it('identifies a scalar', () => {
const node = parseDocument('hello').contents
expect(isScalar(node)).toBe(true)
expect(isMap(node)).toBe(false)
expect(isSeq(node)).toBe(false)
})
it('identifies a map and its pairs', () => {
const node = parseDocument('a: 1').contents
expect(isMap(node)).toBe(true)
if (isMap(node)) {
expect(isPair(node.items[0])).toBe(true)
expect(isScalar(node.items[0]?.key)).toBe(true)
}
})
it('identifies a sequence', () => {
const node = parseDocument('- 1\n- 2').contents
expect(isSeq(node)).toBe(true)
})
it('identifies an alias', () => {
const doc = parseDocument('a: &x 1\nb: *x')
const node = doc.contents
if (isMap(node)) expect(isAlias(node.items[1]?.value)).toBe(true)
})
it('rejects non-nodes', () => {
expect(isScalar(null)).toBe(false)
expect(isMap(undefined)).toBe(false)
expect(isSeq('a string')).toBe(false)
expect(isPair(42)).toBe(false)
})
})
import type { YamlAlias, YamlMap, YamlNode, YamlPair, YamlScalar, YamlSeq } from './types'
/**
* Narrowing guards over the node union. They let consumers walk a tree without
* reaching for `kind` string comparisons, mirroring the ergonomics of the
* mainstream `yaml` package so swapping parsers is mechanical.
*/
export const isScalar = (node: unknown): node is YamlScalar =>
typeof node === 'object' && node !== null && (node as YamlNode).kind === 'scalar'
export const isAlias = (node: unknown): node is YamlAlias =>
typeof node === 'object' && node !== null && (node as YamlNode).kind === 'alias'
export const isMap = (node: unknown): node is YamlMap =>
typeof node === 'object' && node !== null && (node as YamlNode).kind === 'map'
export const isSeq = (node: unknown): node is YamlSeq =>
typeof node === 'object' && node !== null && (node as YamlNode).kind === 'seq'
export const isPair = (node: unknown): node is YamlPair =>
typeof node === 'object' && node !== null && (node as YamlPair).kind === 'pair'
export { isAlias, isMap, isPair, isScalar, isSeq } from './guards'
export { type LineCounter, type LinePos, lineCounter } from './line-counter'
export { type NodePath, nodeAtPath } from './node-at-path'
export { parse } from './parse'
export { parseDocument } from './parse-document'
export type {
ParseOptions,
ScalarStyle,
YamlAlias,
YamlDocument,
YamlError,
YamlErrorKind,
YamlMap,
YamlNode,
YamlPair,
YamlScalar,
YamlSeq,
} from './types'
import { describe, expect, it } from 'vitest'
import { lineCounter } from './line-counter'
describe('line-counter', () => {
const source = 'openapi: 3.1.0\ninfo:\n title: My API\n'
it('maps the first offset to line 1, column 1', () => {
expect(lineCounter(source).linePos(0)).toEqual({ line: 1, col: 1 })
})
it('maps an offset on a later line', () => {
// Offset 30 is the `M` of "My API" on the third line.
expect(lineCounter(source).linePos(30)).toEqual({ line: 3, col: 10 })
})
it('places a position right after a newline at the next line start', () => {
// Offset 15 is the first character of the second line (`info:`).
expect(lineCounter(source).linePos(15)).toEqual({ line: 2, col: 1 })
})
it('clamps out-of-range offsets', () => {
const lc = lineCounter('abc')
expect(lc.linePos(-5)).toEqual({ line: 1, col: 1 })
expect(lc.linePos(999)).toEqual({ line: 1, col: 4 })
})
it('handles an empty source', () => {
expect(lineCounter('').linePos(0)).toEqual({ line: 1, col: 1 })
})
})
/** A resolved source position. Lines and columns are 1-based, the YAML convention. */
export type LinePos = {
line: number
col: number
}
export type LineCounter = {
/** Maps a character offset to a 1-based `{ line, col }`. */
linePos: (offset: number) => LinePos
}
/**
* Builds an offset → `line:column` mapper for a source string.
*
* We scan once up front to record where each line starts, then every lookup is
* a binary search — so turning the offset ranges on nodes and errors into
* human-facing positions stays cheap even for large documents. Kept as a plain
* factory (no class) to match the codebase style.
*/
export const lineCounter = (source: string): LineCounter => {
const length = source.length
// lineStarts[n] is the offset at which line (n + 1) begins; line 1 starts at 0.
const lineStarts = [0]
for (let i = 0; i < length; i++) {
if (source.charCodeAt(i) === 10 /* \n */) lineStarts.push(i + 1)
}
const linePos = (offset: number): LinePos => {
const clamped = offset < 0 ? 0 : offset > length ? length : offset
let low = 0
let high = lineStarts.length - 1
while (low < high) {
const mid = (low + high + 1) >> 1
if ((lineStarts[mid] ?? 0) <= clamped) low = mid
else high = mid - 1
}
return { line: low + 1, col: clamped - (lineStarts[low] ?? 0) + 1 }
}
return { linePos }
}
import { describe, expect, it } from 'vitest'
import { lineCounter } from './line-counter'
import { nodeAtPath } from './node-at-path'
import { parseDocument } from './parse-document'
describe('node-at-path', () => {
const source = ['openapi: 3.1.0', 'info:', ' title: My API', ' version: 1.0.0', 'paths: {}'].join('\n')
const { contents } = parseDocument(source)
it('locates a nested scalar by path', () => {
const node = nodeAtPath(contents, ['info', 'title'])
expect(node?.kind).toBe('scalar')
if (node?.kind === 'scalar') expect(node.value).toBe('My API')
})
it('exposes the exact source range for a located node', () => {
const node = nodeAtPath(contents, ['info', 'title'])
const lc = lineCounter(source)
expect(node && lc.linePos(node.start)).toEqual({ line: 3, col: 10 })
expect(node && lc.linePos(node.end)).toEqual({ line: 3, col: 16 })
})
it('locates an array element with a numeric segment', () => {
const doc = parseDocument('tags:\n - name: a\n - name: b\n')
const node = nodeAtPath(doc.contents, ['tags', 1, 'name'])
if (node?.kind === 'scalar') expect(node.value).toBe('b')
})
it('returns undefined for a missing path', () => {
expect(nodeAtPath(contents, ['info', 'description'])).toBeUndefined()
})
it('falls back to the closest ancestor when asked', () => {
const node = nodeAtPath(contents, ['info', 'description'], true)
// The `info` map begins at its first child key, `title`.
const lc = lineCounter(source)
expect(node && lc.linePos(node.start)).toEqual({ line: 3, col: 3 })
})
it('matches numeric map keys against stringified segments', () => {
const doc = parseDocument('responses:\n "200":\n description: ok\n')
const node = nodeAtPath(doc.contents, ['responses', '200', 'description'])
if (node?.kind === 'scalar') expect(node.value).toBe('ok')
})
})
import { isMap, isSeq } from './guards'
import type { YamlNode } from './types'
/** A path into a document, e.g. `['paths', '/pets', 'get']` or `['tags', 0]`. */
export type NodePath = readonly (string | number)[]
/**
* Walks a node tree to the node addressed by `path`, returning it (with its
* exact `range`) or `undefined` if the path does not exist.
*
* When `closest` is true and the full path is missing, it returns the deepest
* ancestor that does exist — exactly what a linter wants so a diagnostic can
* still point at the nearest real source span instead of nowhere. Keys are
* compared as strings so a numeric path segment matches a stringified map key.
*/
export const nodeAtPath = (root: YamlNode | null, path: NodePath, closest = false): YamlNode | undefined => {
let node: YamlNode | null | undefined = root
let matched: YamlNode | undefined = root ?? undefined
for (const segment of path) {
if (!node) break
let next: YamlNode | null | undefined
if (isMap(node)) {
const key = String(segment)
for (const pair of node.items) {
if (keyOf(pair.key) === key) {
next = pair.value
break
}
}
} else if (isSeq(node)) {
const index = typeof segment === 'number' ? segment : Number(segment)
next = Number.isInteger(index) ? node.items[index] : undefined
}
if (next == null) return closest ? matched : undefined
node = next
matched = next
}
return node ?? undefined
}
const keyOf = (node: YamlNode): string => {
if (node.kind === 'scalar') return node.value === null ? 'null' : String(node.value)
if (node.kind === 'alias') return '*' + node.source
return ''
}
import { describe, expect, it } from 'vitest'
import { parseDocument } from './parse-document'
describe('parse-document', () => {
it('parses a block mapping', () => {
expect(parseDocument('a: 1\nb: 2\n').toJS()).toEqual({ a: 1, b: 2 })
})
it('parses nested block mappings', () => {
expect(parseDocument('a:\n b:\n c: 1\n d: 2\n').toJS()).toEqual({ a: { b: { c: 1 }, d: 2 } })
})
it('parses a block sequence', () => {
expect(parseDocument('- one\n- two\n').toJS()).toEqual(['one', 'two'])
})
it('parses a sequence indented to its parent key', () => {
expect(parseDocument('items:\n- 1\n- 2\n').toJS()).toEqual({ items: [1, 2] })
})
it('parses a mapping nested in a sequence entry', () => {
expect(parseDocument('- name: a\n age: 1\n- name: b\n age: 2\n').toJS()).toEqual([
{ name: 'a', age: 1 },
{ name: 'b', age: 2 },
])
})
it('treats a key with no value as null', () => {
expect(parseDocument('a:\nb: 1\n').toJS()).toEqual({ a: null, b: 1 })
})
it('parses flow sequences and mappings, including nesting', () => {
expect(parseDocument('a: [1, 2, {x: y, z: 3}]\n').toJS()).toEqual({ a: [1, 2, { x: 'y', z: 3 }] })
})
it('parses a flow collection that spans multiple lines', () => {
expect(parseDocument('a: [\n 1,\n 2,\n]\n').toJS()).toEqual({ a: [1, 2] })
})
it('parses single- and double-quoted scalars', () => {
expect(parseDocument(`a: 'it''s here'\nb: "tab\\tend"\n`).toJS()).toEqual({ a: "it's here", b: 'tab\tend' })
})
it('does not treat a colon inside a URL as a mapping separator', () => {
expect(parseDocument('url: https://example.com/path\n').toJS()).toEqual({ url: 'https://example.com/path' })
})
it('parses a literal block scalar and clips the trailing newline', () => {
expect(parseDocument('text: |\n line1\n line2\n').toJS()).toEqual({ text: 'line1\nline2\n' })
})
it('strips the trailing newline with the `-` chomping indicator', () => {
expect(parseDocument('text: |-\n line1\n line2\n').toJS()).toEqual({ text: 'line1\nline2' })
})
it('keeps trailing newlines with the `+` chomping indicator', () => {
expect(parseDocument('text: |+\n line1\n\n\nnext: 1\n').toJS()).toEqual({ text: 'line1\n\n\n', next: 1 })
})
it('folds a folded block scalar', () => {
expect(parseDocument('text: >\n one\n two\n').toJS()).toEqual({ text: 'one two\n' })
})
it('ignores full-line and inline comments', () => {
const source = '# header\na: 1 # trailing\n# between\nb: 2\n'
expect(parseDocument(source).toJS()).toEqual({ a: 1, b: 2 })
})
it('resolves anchors and aliases', () => {
expect(parseDocument('base: &b\n x: 1\nuse: *b\n').toJS()).toEqual({ base: { x: 1 }, use: { x: 1 } })
})
it('applies merge keys without overriding explicit keys', () => {
const source = 'defaults: &d\n timeout: 30\n retries: 3\nservice:\n <<: *d\n retries: 5\n'
expect(parseDocument(source).toJS()).toEqual({
defaults: { timeout: 30, retries: 3 },
service: { timeout: 30, retries: 5 },
})
})
it('reports a duplicate key as an error with its range', () => {
const { errors } = parseDocument('a: 1\na: 2\n')
expect(errors).toHaveLength(1)
expect(errors[0]?.code).toBe('DUPLICATE_KEY')
expect(errors[0]?.kind).toBe('error')
// The error points at the second `a`, which starts at offset 5.
expect(errors[0]?.start).toBe(5)
})
it('keeps the last value for a duplicate key but still flags it', () => {
expect(parseDocument('a: 1\na: 2\n').toJS()).toEqual({ a: 2 })
})
it('allows duplicate keys when uniqueKeys is disabled', () => {
const { errors, toJS } = parseDocument('a: 1\na: 2\n', { uniqueKeys: false })
expect(errors).toHaveLength(0)
expect(toJS()).toEqual({ a: 2 })
})
it('records an error for an unterminated flow collection', () => {
const { errors } = parseDocument('a: [1, 2\n')
expect(errors.some((e) => e.code === 'UNTERMINATED_FLOW')).toBe(true)
})
it('skips a document-start marker and directives', () => {
expect(parseDocument('%YAML 1.2\n---\na: 1\n').toJS()).toEqual({ a: 1 })
})
it('stops at a document-end marker', () => {
expect(parseDocument('a: 1\n...\n').toJS()).toEqual({ a: 1 })
})
it('returns null contents for an empty document', () => {
expect(parseDocument('').contents).toBeNull()
expect(parseDocument(' \n \n').contents).toBeNull()
})
it('records the source range of a scalar value', () => {
const source = 'title: My API'
const node = parseDocument(source).contents
if (node?.kind === 'map') {
const value = node.items[0]?.value
// "My API" starts at offset 7 and ends (exclusive) at 13.
expect([value?.start, value?.end]).toEqual([7, 13])
}
})
it('starts a block map range at its first key', () => {
const source = 'info:\n title: a\n version: b\n'
const node = parseDocument(source).contents
if (node?.kind === 'map') {
const info = node.items[0]?.value
// The nested map begins at `title`, the first child key (offset 8).
expect(info?.start).toBe(8)
}
})
it('exposes anchors on the nodes that declare them', () => {
const node = parseDocument('a: &myAnchor 1\n').contents
if (node?.kind === 'map') {
const value = node.items[0]?.value
if (value?.kind === 'scalar') expect(value.anchor).toBe('myAnchor')
}
})
})
import { resolveDoubleQuoted, resolvePlainValue, resolveSingleQuoted } from './resolve-scalar'
import type { ParseOptions, YamlDocument, YamlError, YamlMap, YamlNode, YamlPair, YamlScalar, YamlSeq } from './types'
/**
* The parser. One cohesive recursive-descent walker — this is the deliberate
* exception to the repo's one-function-per-file rule (mirroring
* `runtime-validators`' interpreter): the scanning helpers share a tight,
* mutable cursor and only make sense together.
*
* Strategy: a single left-to-right pass over the source string with an explicit
* offset cursor (`state.pos`). Block structure is driven by indentation, scalars
* and flow collections are scanned inline. Every node records its absolute
* `[start, end)` range as we go, so positions are a byproduct of parsing rather
* than a second pass. The hot path (plain block mappings of plain scalars)
* touches each character roughly once.
*/
const NL = 10 // \n
const CR = 13 // \r
const SPACE = 32
const TAB = 9
const HASH = 35 // #
const DASH = 45 // -
const COLON = 58 // :
const STAR = 42 // *
const AMP = 38 // &
const BANG = 33 // !
const SQUOTE = 39 // '
const DQUOTE = 34 // "
const LBRACKET = 91 // [
const RBRACKET = 93 // ]
const LBRACE = 123 // {
const RBRACE = 125 // }
const COMMA = 44 // ,
const PIPE = 124 // |
const GT = 62 // >
type LineInfo = {
eof: boolean
/** Number of leading spaces on the content line. */
indent: number
/** Offset of the first non-space character. */
contentPos: number
}
type State = {
src: string
len: number
pos: number
errors: YamlError[]
warnings: YamlError[]
anchors: Map<string, YamlNode>
uniqueKeys: boolean
merge: boolean
/**
* Reused by `peekLine` to avoid allocating a result object per line. Callers
* read it immediately and never hold it across another `peekLine`, so a single
* shared instance is safe and keeps large documents allocation-light.
*/
line: LineInfo
}
type NodeProps = { anchor?: string; tag?: string }
// The common case is a value with no anchor/tag — share one frozen object so
// `scanProps` allocates nothing on the hot path.
const NO_PROPS: NodeProps = Object.freeze({})
const isSpace = (c: number): boolean => c === SPACE || c === TAB
/** Offset just past the next line break (or end of input). */
const nextLineStart = (src: string, from: number, len: number): number => {
let i = from
while (i < len && src.charCodeAt(i) !== NL) i++
return i < len ? i + 1 : len
}
/**
* Advances the cursor to the start of the next line with real content, skipping
* blank lines and full-line comments. Leaves `state.pos` parked at the start of
* that line (column 0) so indentation can be measured deterministically.
*/
const peekLine = (state: State): LineInfo => {
const { src, len, line } = state
let p = state.pos
while (p < len) {
let i = p
while (i < len && src.charCodeAt(i) === SPACE) i++
const c = src.charCodeAt(i)
if (i >= len) break
if (c === NL || c === CR || c === HASH) {
p = nextLineStart(src, i, len)
continue
}
state.pos = p
line.eof = false
line.indent = i - p
line.contentPos = i
return line
}
state.pos = len
line.eof = true
line.indent = 0
line.contentPos = len
return line
}
const skipInlineSpaces = (state: State): void => {
const { src, len } = state
let p = state.pos
while (p < len && isSpace(src.charCodeAt(p))) p++
state.pos = p
}
/** True when the rest of the current line holds nothing but a comment. */
const atLineEnd = (state: State): boolean => {
const c = state.src.charCodeAt(state.pos)
return state.pos >= state.len || c === NL || c === CR || c === HASH
}
/** Consumes a trailing comment and the line break, parking at the next line start. */
const finishLine = (state: State): void => {
state.pos = nextLineStart(state.src, state.pos, state.len)
}
/**
* Consumes to the next line only when the cursor is mid-line. Block collections
* and block scalars already end parked at a line start; scalars, aliases, and
* flow collections end mid-line and need flushing. The `prev char was \n` test
* lets one helper serve every node kind.
*/
const finishLineIfMidLine = (state: State): void => {
if (state.pos > 0 && state.pos < state.len && state.src.charCodeAt(state.pos - 1) !== NL) {
finishLine(state)
}
}
const pushError = (state: State, code: string, message: string, start: number, end: number): void => {
state.errors.push({ kind: 'error', code, message, start, end })
}
/**
* Finds the offset of the `key:` separator on the current line, or -1 if the
* line is not a mapping entry. Honors quotes so a `:` inside a quoted key does
* not count, and requires the YAML block rule that the colon be followed by
* whitespace or end-of-line.
*/
const findKeyColon = (src: string, from: number, len: number): number => {
let i = from
// A quote only delimits when it opens the key (`Let's` mid-word is literal), so
// the quote-skip belongs before the scan loop — not as a per-character test.
const first = src.charCodeAt(from)
if (first === SQUOTE) {
i = from + 1
while (i < len) {
if (src.charCodeAt(i) === SQUOTE) {
if (src.charCodeAt(i + 1) === SQUOTE) i += 2
else break
} else i++
}
i++
} else if (first === DQUOTE) {
i = from + 1
while (i < len) {
const d = src.charCodeAt(i)
if (d === DQUOTE) break
if (d === 92 /* \ */) i += 2
else i++
}
i++
}
// Past any opening quote, the only things that matter are line end, a ` #`
// comment, and the `key:` colon. Hoisting the quote checks keeps this the
// tight inner loop it wants to be — three comparisons per character.
while (i < len) {
const c = src.charCodeAt(i)
if (c === NL || c === CR) return -1
if (c === HASH && i > from && isSpace(src.charCodeAt(i - 1))) return -1
if (c === COLON) {
const n = src.charCodeAt(i + 1)
if (i + 1 >= len || n === SPACE || n === TAB || n === NL || n === CR) return i
}
i++
}
return -1
}
/** Reads `&anchor` / `!tag` properties that precede a node value on its line. */
const scanProps = (state: State): NodeProps => {
const { src } = state
const c = src.charCodeAt(state.pos)
// Fast path: the vast majority of values carry no properties.
if (c !== AMP && c !== BANG && c !== SPACE && c !== TAB) return NO_PROPS
return scanPropsSlow(state)
}
const scanPropsSlow = (state: State): NodeProps => {
const { src, len } = state
let anchor: string | undefined
let tag: string | undefined
for (;;) {
skipInlineSpaces(state)
const c = src.charCodeAt(state.pos)
if (c === AMP) {
let i = state.pos + 1
while (i < len && !isSpace(src.charCodeAt(i)) && src.charCodeAt(i) !== NL && src.charCodeAt(i) !== CR) i++
anchor = src.slice(state.pos + 1, i)
state.pos = i
} else if (c === BANG) {
let i = state.pos + 1
while (i < len && !isSpace(src.charCodeAt(i)) && src.charCodeAt(i) !== NL && src.charCodeAt(i) !== CR) i++
tag = src.slice(state.pos, i).replace(/^!+/, '')
state.pos = i
} else {
break
}
}
if (anchor === undefined && tag === undefined) return NO_PROPS
// Build conditionally: `exactOptionalPropertyTypes` forbids explicit undefined.
const props: NodeProps = {}
if (anchor !== undefined) props.anchor = anchor
if (tag !== undefined) props.tag = tag
return props
}
const attachProps = (node: YamlNode, props: NodeProps, state: State): YamlNode => {
if (props === NO_PROPS) return node
if (props.anchor) {
if (node.kind !== 'alias') node.anchor = props.anchor
state.anchors.set(props.anchor, node)
}
if (props.tag && node.kind !== 'alias') node.tag = props.tag
return node
}
/** Reads a single- or double-quoted scalar, including multi-line spans. */
const scanQuoted = (state: State, quote: number): YamlScalar => {
const { src, len } = state
const start = state.pos
let i = start + 1
if (quote === SQUOTE) {
while (i < len) {
if (src.charCodeAt(i) === SQUOTE) {
if (src.charCodeAt(i + 1) === SQUOTE) i += 2
else {
i++
break
}
} else i++
}
} else {
while (i < len) {
const c = src.charCodeAt(i)
if (c === 92 /* \ */) {
i += 2
continue
}
if (c === DQUOTE) {
i++
break
}
i++
}
}
const source = src.slice(start, i)
const inner = src.slice(start + 1, i - 1)
const value = quote === SQUOTE ? resolveSingleQuoted(inner) : resolveDoubleQuoted(inner)
state.pos = i
return { kind: 'scalar', value, source, style: quote === SQUOTE ? 'single' : 'double', start, end: i }
}
/** Reads a `*alias` reference. */
const scanAlias = (state: State): YamlNode => {
const { src, len } = state
const start = state.pos
let i = start + 1
while (i < len) {
const c = src.charCodeAt(i)
if (isSpace(c) || c === NL || c === CR || c === COMMA || c === RBRACKET || c === RBRACE) break
i++
}
const name = src.slice(start + 1, i)
state.pos = i
return { kind: 'alias', source: name, start, end: i }
}
/** Index of the end of a plain scalar's text on one line (trailing spaces and ` #` comment trimmed). */
const plainLineEnd = (src: string, from: number, len: number): number => {
let i = from
let lastNonSpace = from
while (i < len) {
const c = src.charCodeAt(i)
if (c === NL || c === CR) break
if (c === HASH && i > from && isSpace(src.charCodeAt(i - 1))) break
i++
if (c !== SPACE && c !== TAB) lastNonSpace = i
}
return lastNonSpace
}
/**
* Reads a plain (unquoted) scalar, folding continuation lines that are indented
* deeper than `parentIndent`. Single-line plain scalars — the overwhelmingly
* common case — never allocate the line array.
*/
const scanPlainScalar = (state: State, parentIndent: number): YamlScalar => {
const { src, len } = state
const start = state.pos
let valueEnd = plainLineEnd(src, start, len)
let segments: string[] | null = null
let scan = nextLineStart(src, valueEnd, len)
for (;;) {
if (scan >= len) break
let i = scan
while (i < len && src.charCodeAt(i) === SPACE) i++
const c = src.charCodeAt(i)
if (c === NL || c === CR) {
// Blank line: only meaningful if a deeper line follows, so stage it.
if (!segments) segments = [src.slice(start, valueEnd)]
segments.push('')
scan = nextLineStart(src, i, len)
continue
}
if (i >= len) break
const indent = i - scan
if (indent <= parentIndent || c === HASH) break
const lineEnd = plainLineEnd(src, i, len)
if (!segments) segments = [src.slice(start, valueEnd)]
segments.push(src.slice(i, lineEnd))
valueEnd = lineEnd
scan = nextLineStart(src, lineEnd, len)
}
state.pos = valueEnd
if (!segments) {
const text = src.slice(start, valueEnd)
return { kind: 'scalar', value: resolvePlainValue(text), source: text, style: 'plain', start, end: valueEnd }
}
// Drop trailing blank segments that turned out to precede sibling structure.
while (segments.length > 1 && segments[segments.length - 1] === '') segments.pop()
const folded = foldSegments(segments)
const source = src.slice(start, valueEnd)
return { kind: 'scalar', value: folded, source, style: 'plain', start, end: valueEnd }
}
/** Folds plain-scalar continuation lines: single break → space, blank line → newline. */
const foldSegments = (segments: string[]): string => {
let out = (segments[0] ?? '').replace(/[ \t]+$/, '')
let i = 1
while (i < segments.length) {
const seg = (segments[i] ?? '').trim()
if (seg === '') {
let blanks = 0
while (i < segments.length && (segments[i] ?? '').trim() === '') {
blanks++
i++
}
out += '\n'.repeat(blanks)
if (i < segments.length) {
out += (segments[i] ?? '').trim()
i++
}
} else {
out += ' ' + seg
i++
}
}
return out
}
/** Reads a `|` literal or `>` folded block scalar with chomping and indent indicators. */
const scanBlockScalar = (state: State, parentIndent: number): YamlScalar => {
const { src, len } = state
const start = state.pos
const folded = src.charCodeAt(state.pos) === GT
state.pos++
let chomp: 'clip' | 'strip' | 'keep' = 'clip'
let explicitIndent = 0
for (;;) {
const c = src.charCodeAt(state.pos)
if (c === DASH) chomp = 'strip'
else if (c === 43 /* + */) chomp = 'keep'
else if (c >= 49 && c <= 57 /* 1-9 */) explicitIndent = c - 48
else break
state.pos++
}
finishLine(state)
let contentIndent = explicitIndent ? parentIndent + explicitIndent : -1
const lines: string[] = []
let valueEnd = state.pos
for (;;) {
const lineStart = state.pos
if (lineStart >= len) break
let i = lineStart
while (i < len && src.charCodeAt(i) === SPACE) i++
const c = src.charCodeAt(i)
const indent = i - lineStart
if (c === NL || c === CR || i >= len) {
// Whitespace-only line. Once the content indent is known, anything beyond
// it is real content (literal scalars preserve that extra indentation).
lines.push(contentIndent !== -1 && indent > contentIndent ? ' '.repeat(indent - contentIndent) : '')
state.pos = nextLineStart(src, i, len)
continue
}
if (contentIndent === -1) {
if (indent <= parentIndent) break
contentIndent = indent
}
if (indent < contentIndent) break
let lineEnd = lineStart + contentIndent
while (lineEnd < len && src.charCodeAt(lineEnd) !== NL && src.charCodeAt(lineEnd) !== CR) lineEnd++
lines.push(src.slice(lineStart + contentIndent, lineEnd))
valueEnd = lineEnd
state.pos = nextLineStart(src, lineEnd, len)
}
// Separate interior content from trailing blank lines for chomping.
let trailingBlanks = 0
while (lines.length > 0 && lines[lines.length - 1] === '') {
trailingBlanks++
lines.pop()
}
const body = folded ? foldSegments(lines) : lines.join('\n')
let value = body
if (chomp === 'strip') value = body
else if (chomp === 'keep') value = body + '\n'.repeat(trailingBlanks + (lines.length ? 1 : 0))
else value = body + (lines.length ? '\n' : '')
return {
kind: 'scalar',
value,
source: src.slice(start, valueEnd),
style: folded ? 'block-folded' : 'block-literal',
start,
end: valueEnd,
}
}
/** Skips whitespace, line breaks, and comments — used between flow tokens. */
const skipFlowWs = (state: State): void => {
const { src, len } = state
let p = state.pos
while (p < len) {
const c = src.charCodeAt(p)
if (c === SPACE || c === TAB || c === NL || c === CR) {
p++
} else if (c === HASH) {
p = nextLineStart(src, p, len)
} else break
}
state.pos = p
}
/** Reads a plain scalar inside a flow collection (terminated by flow indicators). */
const scanFlowPlain = (state: State): YamlScalar => {
const { src, len } = state
const start = state.pos
let i = start
while (i < len) {
const c = src.charCodeAt(i)
if (c === COMMA || c === LBRACKET || c === RBRACKET || c === LBRACE || c === RBRACE || c === NL || c === CR) break
if (c === COLON) {
const n = src.charCodeAt(i + 1)
if (i + 1 >= len || isSpace(n) || n === COMMA || n === RBRACKET || n === RBRACE || n === NL || n === CR) break
}
if (c === HASH && i > start && isSpace(src.charCodeAt(i - 1))) break
i++
}
let end = i
while (end > start && isSpace(src.charCodeAt(end - 1))) end--
const text = src.slice(start, end)
state.pos = i
return { kind: 'scalar', value: resolvePlainValue(text), source: text, style: 'plain', start, end }
}
const parseFlowNode = (state: State): YamlNode => {
skipFlowWs(state)
const props = scanProps(state)
skipFlowWs(state)
const c = state.src.charCodeAt(state.pos)
let node: YamlNode
if (c === LBRACKET) node = parseFlowSeq(state)
else if (c === LBRACE) node = parseFlowMap(state)
else if (c === DQUOTE || c === SQUOTE) node = scanQuoted(state, c)
else if (c === STAR) node = scanAlias(state)
else node = scanFlowPlain(state)
return attachProps(node, props, state)
}
const parseFlowSeq = (state: State): YamlSeq => {
const start = state.pos
state.pos++ // [
const items: YamlNode[] = []
for (;;) {
skipFlowWs(state)
const c = state.src.charCodeAt(state.pos)
if (c === RBRACKET) {
state.pos++
break
}
if (state.pos >= state.len) {
pushError(state, 'UNTERMINATED_FLOW', 'Missing closing "]" for flow sequence', start, state.pos)
break
}
items.push(parseFlowNode(state))
skipFlowWs(state)
const sep = state.src.charCodeAt(state.pos)
if (sep === COMMA) state.pos++
else if (sep === RBRACKET) {
state.pos++
break
} else {
pushError(state, 'UNTERMINATED_FLOW', 'Missing closing "]" for flow sequence', start, state.pos)
break
}
}
return { kind: 'seq', items, start, end: state.pos }
}
const parseFlowMap = (state: State): YamlMap => {
const start = state.pos
state.pos++ // {
const items: YamlPair[] = []
for (;;) {
skipFlowWs(state)
const c = state.src.charCodeAt(state.pos)
if (c === RBRACE) {
state.pos++
break
}
if (state.pos >= state.len) {
pushError(state, 'UNTERMINATED_FLOW', 'Missing closing "}" for flow mapping', start, state.pos)
break
}
const key = parseFlowNode(state)
skipFlowWs(state)
let value: YamlNode | null = null
if (state.src.charCodeAt(state.pos) === COLON) {
state.pos++
skipFlowWs(state)
const vc = state.src.charCodeAt(state.pos)
if (vc !== COMMA && vc !== RBRACE) value = parseFlowNode(state)
}
items.push({ kind: 'pair', key, value, start: key.start, end: value ? value.end : key.end })
skipFlowWs(state)
const sep = state.src.charCodeAt(state.pos)
if (sep === COMMA) state.pos++
else if (sep === RBRACE) {
state.pos++
break
} else {
pushError(state, 'UNTERMINATED_FLOW', 'Missing closing "}" for flow mapping', start, state.pos)
break
}
}
return { kind: 'map', items, start, end: state.pos }
}
/** Parses the inline value that follows a `key:` separator on the same line. */
const parseInlineValue = (state: State, parentIndent: number): YamlNode | null => {
const props = scanProps(state)
skipInlineSpaces(state)
if (atLineEnd(state)) {
// Properties with no inline value: the real value is the block node below.
if (props.anchor || props.tag) {
finishLine(state)
const child = peekLine(state)
if (!child.eof && child.indent > parentIndent) {
state.pos = child.contentPos
const node = parseNode(state, child.indent)
return attachProps(node, props, state)
}
}
return null
}
const c = state.src.charCodeAt(state.pos)
let node: YamlNode
if (c === STAR) node = scanAlias(state)
else if (c === PIPE || c === GT) node = scanBlockScalar(state, parentIndent)
else if (c === LBRACKET) node = parseFlowSeq(state)
else if (c === LBRACE) node = parseFlowMap(state)
else if (c === DQUOTE || c === SQUOTE) node = scanQuoted(state, c)
else node = scanPlainScalar(state, parentIndent)
return attachProps(node, props, state)
}
const keyText = (node: YamlNode): string => {
if (node.kind === 'scalar') {
const v = node.value
// Keys are usually strings already — skip the String() round-trip.
if (typeof v === 'string') return v
return v === null ? 'null' : String(v)
}
if (node.kind === 'alias') return '*' + node.source
return ''
}
const parseBlockMap = (state: State, indent: number, firstColon: number): YamlMap => {
const { src, len } = state
const items: YamlPair[] = []
// Duplicate-key tracking is lazy: most maps have unique keys, and many have a
// single key, so we only allocate the Set once a second key actually appears.
let firstKey: string | null = null
let seen: Set<string> | null = null
// The cursor is already parked at the first key's content; later iterations
// re-derive the next entry's position with `peekLine` (which needs a line
// start, an invariant the previous entry's value leaves us on).
let contentPos = state.pos
let firstEntry = true
for (;;) {
let colon: number
if (firstEntry) {
// `parseNode` already located this colon to decide we are a mapping; reuse
// it instead of re-scanning the first line.
colon = firstColon
} else {
const line = peekLine(state)
if (line.eof || line.indent !== indent) break
contentPos = line.contentPos
const c = src.charCodeAt(contentPos)
// A `- ` at this indent is a sequence, not a mapping key.
if (c === DASH && (contentPos + 1 >= len || isSpace(src.charCodeAt(contentPos + 1)))) break
colon = findKeyColon(src, contentPos, len)
if (colon < 0) break
}
firstEntry = false
const lineContentPos = contentPos
state.pos = contentPos
let key: YamlNode
const kc = src.charCodeAt(state.pos)
if (kc === DQUOTE || kc === SQUOTE) {
key = scanQuoted(state, kc)
} else {
let end = colon
while (end > lineContentPos && isSpace(src.charCodeAt(end - 1))) end--
const text = src.slice(lineContentPos, end)
key = {
kind: 'scalar',
value: resolvePlainValue(text),
source: text,
style: 'plain',
start: lineContentPos,
end,
}
}
state.pos = colon + 1
skipInlineSpaces(state)
let value: YamlNode | null = null
if (atLineEnd(state)) {
// Value lives on the following lines (or is empty).
finishLine(state)
const child = peekLine(state)
if (!child.eof && child.indent > indent) {
state.pos = child.contentPos
value = parseNode(state, child.indent)
} else if (!child.eof && child.indent === indent) {
const cc = src.charCodeAt(child.contentPos)
if (cc === DASH && (child.contentPos + 1 >= len || isSpace(src.charCodeAt(child.contentPos + 1)))) {
state.pos = child.contentPos
value = parseBlockSeq(state, indent)
}
}
} else {
value = parseInlineValue(state, indent)
finishLineIfMidLine(state)
}
if (state.uniqueKeys) {
const text = keyText(key)
if (seen) {
if (seen.has(text)) pushError(state, 'DUPLICATE_KEY', `Map key "${text}" is duplicated`, key.start, key.end)
else seen.add(text)
} else if (firstKey === null) {
firstKey = text
} else {
seen = new Set([firstKey])
if (firstKey === text) pushError(state, 'DUPLICATE_KEY', `Map key "${text}" is duplicated`, key.start, key.end)
else seen.add(text)
}
}
items.push({ kind: 'pair', key, value, start: key.start, end: value ? value.end : key.end })
}
const last = items[items.length - 1]
const first = items[0]
const start = first ? first.start : state.pos
const end = last ? last.end : state.pos
return { kind: 'map', items, start, end }
}
const parseBlockSeq = (state: State, indent: number): YamlSeq => {
const { src, len } = state
const items: YamlNode[] = []
let startOffset = -1
// As with `parseBlockMap`, the first entry is at the current cursor; later
// entries are located with `peekLine` from the line start we end up on.
let contentPos = state.pos
let firstEntry = true
for (;;) {
if (!firstEntry) {
const line = peekLine(state)
if (line.eof || line.indent !== indent) break
contentPos = line.contentPos
}
firstEntry = false
const c = src.charCodeAt(contentPos)
if (c !== DASH || (contentPos + 1 < len && !isSpace(src.charCodeAt(contentPos + 1)))) break
if (startOffset === -1) startOffset = contentPos
const dashPos = contentPos
state.pos = dashPos + 1
skipInlineSpaces(state)
let item: YamlNode
if (atLineEnd(state)) {
finishLine(state)
const child = peekLine(state)
if (!child.eof && child.indent > indent) {
state.pos = child.contentPos
item = parseNode(state, child.indent)
} else {
item = { kind: 'scalar', value: null, source: '', style: 'plain', start: dashPos + 1, end: dashPos + 1 }
}
} else {
const contentCol = state.pos - contentPos + indent
item = parseNode(state, contentCol)
finishLineIfMidLine(state)
}
items.push(item)
}
const last = items[items.length - 1]
const start = startOffset === -1 ? state.pos : startOffset
const end = last ? last.end : state.pos
return { kind: 'seq', items, start, end }
}
/**
* Parses a block node (mapping, sequence, or scalar) whose first token sits at
* column `indent`. The cursor is assumed to be at that first token.
*/
const parseNode = (state: State, indent: number): YamlNode => {
const { src, len } = state
const c = src.charCodeAt(state.pos)
if (c === DASH && (state.pos + 1 >= len || isSpace(src.charCodeAt(state.pos + 1)))) {
return parseBlockSeq(state, indent)
}
const props = scanProps(state)
if (props.anchor || props.tag) {
skipInlineSpaces(state)
if (atLineEnd(state)) {
finishLine(state)
const child = peekLine(state)
if (!child.eof && child.indent > indent) {
state.pos = child.contentPos
return attachProps(parseNode(state, child.indent), props, state)
}
return attachProps(
{ kind: 'scalar', value: null, source: '', style: 'plain', start: state.pos, end: state.pos },
props,
state,
)
}
}
const cc = src.charCodeAt(state.pos)
if (cc === STAR) return attachProps(scanAlias(state), props, state)
if (cc === LBRACKET) return attachProps(parseFlowSeq(state), props, state)
if (cc === LBRACE) return attachProps(parseFlowMap(state), props, state)
if (cc === PIPE || cc === GT) return attachProps(scanBlockScalar(state, indent - 1), props, state)
// A line beginning with a quote may be a quoted *key* (e.g. `"200":`), so the
// mapping check has to come before treating the quote as a standalone scalar.
const colon = findKeyColon(src, state.pos, len)
if (colon >= 0) return attachProps(parseBlockMap(state, indent, colon), props, state)
if (cc === DQUOTE || cc === SQUOTE) return attachProps(scanQuoted(state, cc), props, state)
return attachProps(scanPlainScalar(state, indent - 1), props, state)
}
/** Skips a leading BOM, `%`-directives, and a `---` document-start marker. */
const skipDocumentHead = (state: State): void => {
const { src, len } = state
if (src.charCodeAt(0) === 0xfeff) state.pos = 1
for (;;) {
const line = peekLine(state)
if (line.eof) return
const c = src.charCodeAt(line.contentPos)
if (c === 37 /* % */) {
state.pos = nextLineStart(src, line.contentPos, len)
continue
}
if (
c === DASH &&
src.charCodeAt(line.contentPos + 1) === DASH &&
src.charCodeAt(line.contentPos + 2) === DASH &&
(line.contentPos + 3 >= len ||
isSpace(src.charCodeAt(line.contentPos + 3)) ||
src.charCodeAt(line.contentPos + 3) === NL)
) {
state.pos = nextLineStart(src, line.contentPos + 3, len)
continue
}
return
}
}
const toJsValue = (node: YamlNode | null, anchors: Map<string, YamlNode>, merge: boolean): unknown => {
if (node === null) return null
if (node.kind === 'scalar') return node.value
if (node.kind === 'alias') {
const target = anchors.get(node.source)
return target ? toJsValue(target, anchors, merge) : undefined
}
if (node.kind === 'seq') {
// Index loop into a pre-sized array: no per-seq closure (as `.map` allocates)
// and the result array never reallocates as it grows.
const items = node.items
const out = new Array(items.length)
for (let i = 0; i < items.length; i++) out[i] = toJsValue(items[i] ?? null, anchors, merge)
return out
}
const obj: Record<string, unknown> = {}
const items = node.items
for (let i = 0; i < items.length; i++) {
const pair = items[i]
if (pair === undefined) continue
const key = pair.key
if (merge && key.kind === 'scalar' && key.source === '<<') {
applyMerge(obj, toJsValue(pair.value, anchors, merge))
continue
}
obj[keyText(key)] = pair.value ? toJsValue(pair.value, anchors, merge) : null
}
return obj
}
/** Folds a `<<` merge value (a map or list of maps) into `target` without overriding existing keys. */
const applyMerge = (target: Record<string, unknown>, value: unknown): void => {
if (Array.isArray(value)) {
for (const entry of value) applyMerge(target, entry)
return
}
if (value && typeof value === 'object') {
for (const [k, v] of Object.entries(value)) {
if (!(k in target)) target[k] = v
}
}
}
/**
* Parses a YAML document into a node tree with source ranges, collected
* problems, and a lazy `toJS` projection.
*/
export const parseDocument = (source: string, options: ParseOptions = {}): YamlDocument => {
const state: State = {
src: source,
len: source.length,
pos: 0,
errors: [],
warnings: [],
anchors: new Map(),
uniqueKeys: options.uniqueKeys !== false,
merge: options.merge !== false,
line: { eof: false, indent: 0, contentPos: 0 },
}
skipDocumentHead(state)
const head = peekLine(state)
let contents: YamlNode | null = null
if (!head.eof) {
// Stop a bare `...` document-end marker from being read as a scalar.
const c = source.charCodeAt(head.contentPos)
const isDocEnd =
c === 46 /* . */ && source.charCodeAt(head.contentPos + 1) === 46 && source.charCodeAt(head.contentPos + 2) === 46
if (!isDocEnd) {
state.pos = head.contentPos
contents = parseNode(state, head.indent)
}
}
const { anchors, merge } = state
return {
contents,
errors: state.errors,
warnings: state.warnings,
toJS: () => toJsValue(contents, anchors, merge),
}
}
import { describe, expect, it } from 'vitest'
import { parse } from './parse'
describe('parse', () => {
it('parses a mapping to a plain object', () => {
expect(parse('a: 1\nb: two\n')).toEqual({ a: 1, b: 'two' })
})
it('parses a sequence to an array', () => {
expect(parse('- 1\n- 2\n- 3\n')).toEqual([1, 2, 3])
})
it('parses a realistic nested document', () => {
const data = parse(['openapi: 3.1.0', 'info:', ' title: My API', ' version: 1.0.0', 'paths: {}'].join('\n'))
expect(data).toEqual({ openapi: '3.1.0', info: { title: 'My API', version: '1.0.0' }, paths: {} })
})
it('returns null for an empty document', () => {
expect(parse('')).toBeNull()
expect(parse('\n\n# just a comment\n')).toBeNull()
})
it('coerces scalar types via the core schema', () => {
expect(parse('n: 5\nf: 1.5\nb: true\nz: null\ns: hello\n')).toEqual({
n: 5,
f: 1.5,
b: true,
z: null,
s: 'hello',
})
})
})
import { parseDocument } from './parse-document'
import type { ParseOptions } from './types'
/**
* Parses a YAML string straight to its JavaScript value, the way `JSON.parse`
* would. Use {@link parseDocument} instead when you need source ranges or the
* list of problems for diagnostics.
*/
export const parse = (source: string, options?: ParseOptions): unknown => parseDocument(source, options).toJS()
import { describe, expect, it } from 'vitest'
import { resolveDoubleQuoted, resolvePlainValue, resolveSingleQuoted } from './resolve-scalar'
describe('resolve-scalar', () => {
it('resolves the null forms', () => {
expect(resolvePlainValue('')).toBeNull()
expect(resolvePlainValue('~')).toBeNull()
expect(resolvePlainValue('null')).toBeNull()
expect(resolvePlainValue('NULL')).toBeNull()
})
it('resolves booleans only for the canonical spellings', () => {
expect(resolvePlainValue('true')).toBe(true)
expect(resolvePlainValue('False')).toBe(false)
// YAML 1.2 core schema does not treat yes/no/on/off as booleans.
expect(resolvePlainValue('yes')).toBe('yes')
expect(resolvePlainValue('off')).toBe('off')
})
it('resolves integers in decimal, hex, and octal', () => {
expect(resolvePlainValue('42')).toBe(42)
expect(resolvePlainValue('-7')).toBe(-7)
expect(resolvePlainValue('0x1F')).toBe(31)
expect(resolvePlainValue('0o17')).toBe(15)
})
it('resolves floats including infinity and nan', () => {
expect(resolvePlainValue('3.14')).toBe(3.14)
expect(resolvePlainValue('1e3')).toBe(1000)
expect(resolvePlainValue('-.inf')).toBe(Number.NEGATIVE_INFINITY)
expect(Number.isNaN(resolvePlainValue('.nan') as number)).toBe(true)
})
it('keeps version-like and ambiguous text as strings', () => {
// The killer case: an OpenAPI version must not become a float.
expect(resolvePlainValue('1.0.0')).toBe('1.0.0')
expect(resolvePlainValue('3.1.0')).toBe('3.1.0')
expect(resolvePlainValue('name')).toBe('name')
expect(resolvePlainValue('true story')).toBe('true story')
})
it('unescapes single-quoted scalars', () => {
expect(resolveSingleQuoted("it''s fine")).toBe("it's fine")
expect(resolveSingleQuoted('plain')).toBe('plain')
})
it('unescapes double-quoted scalars', () => {
expect(resolveDoubleQuoted('a\\nb')).toBe('a\nb')
expect(resolveDoubleQuoted('tab\\there')).toBe('tab\there')
expect(resolveDoubleQuoted('quote\\"end')).toBe('quote"end')
expect(resolveDoubleQuoted('\\u00e9')).toBe('é')
expect(resolveDoubleQuoted('plain')).toBe('plain')
})
it('folds multi-line flow scalars', () => {
expect(resolveDoubleQuoted('one\ntwo')).toBe('one two')
expect(resolveSingleQuoted('one\n\ntwo')).toBe('one\ntwo')
})
})
/**
* Scalar resolution: turning raw YAML text into a JavaScript value.
*
* We follow the YAML 1.2 "core schema" tag-resolution rules for plain scalars
* (null / bool / int / float / string) and the standard escape rules for
* quoted scalars. The hot path — a plain scalar with no special characters — is
* a couple of cheap comparisons before any regex runs.
*/
const INT_DEC = /^[-+]?[0-9]+$/
const INT_HEX = /^[-+]?0x[0-9a-fA-F]+$/
const INT_OCT = /^[-+]?0o[0-7]+$/
// Float requires a `.` or exponent so version strings like `1.0.0` stay strings.
const FLOAT = /^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$/
/**
* First-character gate as a 128-entry lookup table. Only a handful of characters
* can begin a non-string scalar; the overwhelmingly common case is a key or
* value whose first char is a plain letter, so a single indexed read beats the
* branch chain it replaces. Built once at module load.
*/
const MAYBE_SPECIAL = /* @__PURE__ */ (() => {
const t = new Uint8Array(128)
t[0x2e] = 1 // .
t[0x2d] = 1 // -
t[0x2b] = 1 // +
t[0x7e] = 1 // ~
for (let d = 0x30; d <= 0x39; d++) t[d] = 1 // 0-9
t[0x6e] = 1 // n
t[0x4e] = 1 // N
t[0x74] = 1 // t
t[0x54] = 1 // T
t[0x66] = 1 // f
t[0x46] = 1 // F
return t
})()
/** Resolves a plain (unquoted) scalar to its core-schema JavaScript value. */
export const resolvePlainValue = (text: string): string | number | boolean | null => {
// Empty plain scalar is null in YAML (e.g. a key with no value).
if (text === '') return null
// Cheap first-char gate: only a handful of characters can begin a non-string.
const c = text.charCodeAt(0)
if (c >= 128 || MAYBE_SPECIAL[c] === 0) return text
switch (text) {
case '~':
case 'null':
case 'Null':
case 'NULL':
return null
case 'true':
case 'True':
case 'TRUE':
return true
case 'false':
case 'False':
case 'FALSE':
return false
case '.inf':
case '.Inf':
case '.INF':
case '+.inf':
case '+.Inf':
case '+.INF':
return Number.POSITIVE_INFINITY
case '-.inf':
case '-.Inf':
case '-.INF':
return Number.NEGATIVE_INFINITY
case '.nan':
case '.NaN':
case '.NAN':
return Number.NaN
default:
break
}
// Only digits, signs, and `.` can begin a number. A word starting with
// n/N/t/T/f/F that was not a keyword above is a plain string, so we can skip
// the numeric regexes entirely — a meaningful win on key-heavy documents.
if (c === 0x6e || c === 0x4e || c === 0x74 || c === 0x54 || c === 0x66 || c === 0x46) return text
if (INT_DEC.test(text)) return Number.parseInt(text, 10)
if (INT_HEX.test(text)) return Number.parseInt(text.replace('0x', ''), 16) * (text[0] === '-' ? -1 : 1)
if (INT_OCT.test(text)) return Number.parseInt(text.replace('0o', ''), 8) * (text[0] === '-' ? -1 : 1)
if (FLOAT.test(text)) return Number.parseFloat(text)
return text
}
const DOUBLE_ESCAPES: Record<string, string> = {
'0': '\0',
a: '\x07',
b: '\b',
t: '\t',
'\t': '\t',
n: '\n',
v: '\v',
f: '\f',
r: '\r',
e: '\x1b',
' ': ' ',
'"': '"',
'/': '/',
'\\': '\\',
N: '…',
_: ' ',
L: '
',
P: '
',
}
/**
* Folds the line breaks of a multi-line flow scalar, per the YAML flow folding
* rules: a single break between content becomes a space, and a run of blank
* lines becomes that many literal newlines.
*
* Whitespace handling mirrors what the spec keeps as content vs. discards:
* - leading whitespace on a continuation line is folding indentation, so it is
* always dropped;
* - trailing whitespace is dropped on every line *except the last*, where no
* line break follows so the spaces are literal content;
* - a blank-line run that reaches the end of the scalar yields one fewer
* newline, because the break before the closing delimiter is stripped.
*/
const lstrip = (s: string): string => s.replace(/^[ \t]+/, '')
const rstrip = (s: string): string => s.replace(/[ \t]+$/, '')
const foldLines = (text: string): string => {
const lines = text.split('\n')
if (lines.length === 1) return text
const last = lines.length - 1
let out = rstrip(lines[0] ?? '')
let i = 1
while (i <= last) {
if ((lines[i] ?? '').trim() === '') {
// Run of blank lines.
let blanks = 0
while (i <= last && (lines[i] ?? '').trim() === '') {
blanks++
i++
}
if (i > last) {
// Trailing run reaching the closing delimiter: a lone break still folds
// to a space; any further blank lines each drop one break, so a run of
// `n` contributes `n - 1` newlines.
out += blanks === 1 ? ' ' : '\n'.repeat(blanks - 1)
} else {
// Interior run: each blank line is one newline, then the next content.
out += '\n'.repeat(blanks)
out += i === last ? lstrip(lines[i] ?? '') : (lines[i] ?? '').trim()
i++
}
} else {
// Single break folds to a space. Keep trailing whitespace only on the
// final line, where it is literal content rather than folding padding.
out += ' ' + (i === last ? lstrip(lines[i] ?? '') : (lines[i] ?? '').trim())
i++
}
}
return out
}
/** Resolves a single-quoted scalar: the only escape is `''` → `'`, plus folding. */
export const resolveSingleQuoted = (inner: string): string => {
const folded = inner.indexOf('\n') === -1 ? inner : foldLines(inner)
return folded.indexOf("''") === -1 ? folded : folded.replace(/''/g, "'")
}
/** Resolves a double-quoted scalar: full escape handling, line continuation, and folding. */
export const resolveDoubleQuoted = (inner: string): string => {
// Fast path: a plain double-quoted string with nothing to process.
if (inner.indexOf('\\') === -1 && inner.indexOf('\n') === -1) return inner
const source = inner.indexOf('\n') === -1 ? inner : foldLines(inner)
let out = ''
let i = 0
while (i < source.length) {
const ch = source[i]
if (ch !== '\\') {
out += ch
i++
continue
}
const next = source[i + 1]
if (next === undefined) {
out += '\\'
break
}
if (next === 'x' || next === 'u' || next === 'U') {
const len = next === 'x' ? 2 : next === 'u' ? 4 : 8
const hex = source.slice(i + 2, i + 2 + len)
const code = Number.parseInt(hex, 16)
out += Number.isNaN(code) ? next : String.fromCodePoint(code)
i += 2 + len
continue
}
const mapped = DOUBLE_ESCAPES[next]
out += mapped ?? next
i += 2
}
return out
}
/**
* Core data model for the parser. Every node carries its absolute `[start, end)`
* character-offset span as two inline fields so a consumer can map any value
* back to an exact `line:column` in the source — the whole reason this package
* exists. Storing them inline (rather than as a `range` tuple) avoids a second
* heap allocation per node.
*/
/** Scalar styles we distinguish, because the style decides how a value resolves. */
export type ScalarStyle = 'plain' | 'single' | 'double' | 'block-literal' | 'block-folded'
/**
* A leaf value: a string, number, boolean, or null. `value` is the resolved
* JavaScript value; `source` is the raw text exactly as it appeared (handy for
* diagnostics that want to quote the original).
*/
export type YamlScalar = {
kind: 'scalar'
value: string | number | boolean | null
source: string
style: ScalarStyle
/** Inclusive start offset into the source. */
start: number
/** Exclusive end offset into the source. */
end: number
/** A `!!`-style tag if one was written, e.g. `str` for `!!str`. */
tag?: string
/** The `&name` anchor declared on this node, if any. */
anchor?: string
}
/** A `*name` reference to a previously anchored node. Resolved during `toJS`. */
export type YamlAlias = {
kind: 'alias'
/** The anchor name this alias points at (without the leading `*`). */
source: string
start: number
end: number
}
/** One `key: value` entry of a block or flow mapping. */
export type YamlPair = {
kind: 'pair'
key: YamlNode
/** `null` when a key is written with no value, e.g. `paths:` on its own line. */
value: YamlNode | null
start: number
end: number
}
/** A mapping — an ordered list of key/value pairs. */
export type YamlMap = {
kind: 'map'
items: YamlPair[]
start: number
end: number
tag?: string
anchor?: string
}
/** A sequence — an ordered list of nodes. */
export type YamlSeq = {
kind: 'seq'
items: YamlNode[]
start: number
end: number
tag?: string
anchor?: string
}
export type YamlNode = YamlScalar | YamlAlias | YamlMap | YamlSeq
/** Severity for collected problems. Errors mean the data may be wrong; warnings are advisory. */
export type YamlErrorKind = 'error' | 'warning'
/**
* A parse problem with an exact source span. `start`/`end` are `[start, end)`
* offsets; pair them with {@link import('./line-counter').lineCounter} for
* `line:column`.
*/
export type YamlError = {
kind: YamlErrorKind
/** Short stable code, e.g. `DUPLICATE_KEY`, so callers can branch without string-matching. */
code: string
message: string
start: number
end: number
}
/** A parsed document: the node tree, any problems, and a lazy `toJS` projection. */
export type YamlDocument = {
/** Root node, or `null` for an empty document. */
contents: YamlNode | null
errors: YamlError[]
warnings: YamlError[]
/** Materializes the plain JavaScript value, resolving aliases and merge keys. */
toJS: () => unknown
}
export type ParseOptions = {
/**
* Report duplicate mapping keys. Default `true`. Set `false` to allow them
* (the last value wins, matching `JSON.parse` semantics).
*/
uniqueKeys?: boolean
/**
* Honor the `<<` merge key (YAML merge spec). Default `true`. When off, `<<`
* is treated as an ordinary key.
*/
merge?: boolean
}