@amritk/yaml
Advanced tools
@@ -1,1 +0,1 @@ | ||
| {"version":3,"file":"resolve-scalar.d.ts","sourceRoot":"","sources":["../src/resolve-scalar.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AA8BH,8EAA8E;AAC9E,eAAO,MAAM,iBAAiB,SAAU,MAAM,KAAG,MAAM,GAAG,MAAM,GAAG,OAAO,GAAG,IAoD5E,CAAA;AAyDD,oFAAoF;AACpF,eAAO,MAAM,mBAAmB,UAAW,MAAM,KAAG,MAGnD,CAAA;AAED,6FAA6F;AAC7F,eAAO,MAAM,mBAAmB,UAAW,MAAM,KAAG,MAgCnD,CAAA"} | ||
| {"version":3,"file":"resolve-scalar.d.ts","sourceRoot":"","sources":["../src/resolve-scalar.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AA8BH,8EAA8E;AAC9E,eAAO,MAAM,iBAAiB,SAAU,MAAM,KAAG,MAAM,GAAG,MAAM,GAAG,OAAO,GAAG,IAoD5E,CAAA;AA4ED,oFAAoF;AACpF,eAAO,MAAM,mBAAmB,UAAW,MAAM,KAAG,MAGnD,CAAA;AAED,6FAA6F;AAC7F,eAAO,MAAM,mBAAmB,UAAW,MAAM,KAAG,MAgCnD,CAAA"} |
+32
-13
@@ -113,6 +113,16 @@ /** | ||
| /** | ||
| * Folds the line breaks of a multi-line flow scalar: a single break becomes a | ||
| * space, and each blank line becomes one literal newline. Leading and trailing | ||
| * whitespace on continuation lines is trimmed, per the YAML flow folding rules. | ||
| * Folds the line breaks of a multi-line flow scalar, per the YAML flow folding | ||
| * rules: a single break between content becomes a space, and a run of blank | ||
| * lines becomes that many literal newlines. | ||
| * | ||
| * Whitespace handling mirrors what the spec keeps as content vs. discards: | ||
| * - leading whitespace on a continuation line is folding indentation, so it is | ||
| * always dropped; | ||
| * - trailing whitespace is dropped on every line *except the last*, where no | ||
| * line break follows so the spaces are literal content; | ||
| * - a blank-line run that reaches the end of the scalar yields one fewer | ||
| * newline, because the break before the closing delimiter is stripped. | ||
| */ | ||
| const lstrip = (s) => s.replace(/^[ \t]+/, ''); | ||
| const rstrip = (s) => s.replace(/[ \t]+$/, ''); | ||
| const foldLines = (text) => { | ||
@@ -122,16 +132,23 @@ const lines = text.split('\n'); | ||
| return text; | ||
| let out = lines[0]?.replace(/[ \t]+$/, '') ?? ''; | ||
| const last = lines.length - 1; | ||
| let out = rstrip(lines[0] ?? ''); | ||
| let i = 1; | ||
| while (i < lines.length) { | ||
| const trimmed = (lines[i] ?? '').trim(); | ||
| if (trimmed === '') { | ||
| // Run of blank lines: each one contributes a newline. | ||
| while (i <= last) { | ||
| if ((lines[i] ?? '').trim() === '') { | ||
| // Run of blank lines. | ||
| let blanks = 0; | ||
| while (i < lines.length && (lines[i] ?? '').trim() === '') { | ||
| while (i <= last && (lines[i] ?? '').trim() === '') { | ||
| blanks++; | ||
| i++; | ||
| } | ||
| out += '\n'.repeat(blanks); | ||
| if (i < lines.length) { | ||
| out += (lines[i] ?? '').trim(); | ||
| if (i > last) { | ||
| // Trailing run reaching the closing delimiter: a lone break still folds | ||
| // to a space; any further blank lines each drop one break, so a run of | ||
| // `n` contributes `n - 1` newlines. | ||
| out += blanks === 1 ? ' ' : '\n'.repeat(blanks - 1); | ||
| } | ||
| else { | ||
| // Interior run: each blank line is one newline, then the next content. | ||
| out += '\n'.repeat(blanks); | ||
| out += i === last ? lstrip(lines[i] ?? '') : (lines[i] ?? '').trim(); | ||
| i++; | ||
@@ -141,3 +158,5 @@ } | ||
| else { | ||
| out += ' ' + trimmed; | ||
| // Single break folds to a space. Keep trailing whitespace only on the | ||
| // final line, where it is literal content rather than folding padding. | ||
| out += ' ' + (i === last ? lstrip(lines[i] ?? '') : (lines[i] ?? '').trim()); | ||
| i++; | ||
@@ -144,0 +163,0 @@ } |
+1
-1
| { | ||
| "name": "@amritk/yaml", | ||
| "version": "0.1.0", | ||
| "version": "0.1.1", | ||
| "description": "A tiny, dependency-free YAML parser with exact source positions. Built for diagnostics: every node maps back to an exact line:column.", | ||
@@ -5,0 +5,0 @@ "module": "./dist/index.js", |
+2
-2
@@ -20,3 +20,3 @@ <div align="center"> | ||
| `@amritk/yaml` parses YAML into a JavaScript value **and** a lightweight tree where **every node records its exact `[start, end)` source range**. That second part is the whole point: a linter or language server needs to put a squiggle at an exact `line:column`, and most fast YAML parsers throw position information away. | ||
| `@amritk/yaml` parses YAML into a JavaScript value **and** a lightweight tree where **every node records the exact source it came from** — a `start` offset (inclusive) and an `end` offset (exclusive). That second part is the whole point: a linter or language server needs to put a squiggle at an exact `line:column`, and most fast YAML parsers throw position information away. | ||
@@ -102,3 +102,3 @@ It is **zero-dependency** and tuned to be **small and fast**. Against the two parsers people reach for on the web: | ||
| | `parseDocument(source, options?)` | Parse to `{ contents, errors, warnings, toJS() }` where every node carries `start`/`end` source offsets. | | ||
| | `nodeAtPath(root, path, closest?)` | Resolve a JSON path to its node (with `range`), optionally falling back to the closest ancestor. | | ||
| | `nodeAtPath(root, path, closest?)` | Resolve a JSON path to its node (carrying `start`/`end`), optionally falling back to the closest ancestor. | | ||
| | `lineCounter(source)` | Build an `offset → { line, col }` mapper (1-based). | | ||
@@ -105,0 +105,0 @@ | `isScalar` / `isMap` / `isSeq` / `isPair` / `isAlias` | Narrowing guards over the node union. | |
@@ -0,1 +1,2 @@ | ||
| import { readFileSync } from 'node:fs' | ||
| import { describe, expect, it } from 'vitest' | ||
@@ -8,2 +9,12 @@ import { parse as eemeli } from 'yaml' | ||
| /** | ||
| * Real-world public specs vendored under `fixtures/` (see `fixtures/README.md` | ||
| * for provenance). Read from disk so the bytes stay identical to what the | ||
| * upstream publisher serves. `bench/fixtures.ts` stays synthetic; this is where | ||
| * we exercise the parser against documents we don't control. | ||
| */ | ||
| const VENDORED = ['digitalocean'] as const | ||
| const readVendored = (name: string): string => | ||
| readFileSync(new URL(`../fixtures/${name}.yaml`, import.meta.url), 'utf8') | ||
| /** | ||
| * Differential tests against `yaml` (eemeli) — the reference parser the Loupe | ||
@@ -34,2 +45,10 @@ * linter currently uses and the one that, like us, tracks source positions. For | ||
| 'q: "https://example.com/a?b=c&d=e"\n', | ||
| // Multi-line flow scalars — folding edge cases that real specs (GitHub's | ||
| // OpenAPI) hit: trailing whitespace on the closing line is literal content, | ||
| // and a blank-line run reaching the close yields one fewer newline. | ||
| "s: 'first line\n second line. '\n", | ||
| 's: "first line\n second line. "\n', | ||
| "s: 'para one\n still one\n\n '\n", | ||
| 's: "para one\n still one\n\n "\n', | ||
| "s: 'a\n\n\n '\nt: 'a\n b\n\n c'\n", | ||
| // Block scalars with chomping. | ||
@@ -59,2 +78,11 @@ 'text: |\n line one\n line two\n', | ||
| } | ||
| // Large, real-world public specs we don't control — the documents this | ||
| // parser actually has to survive in the wild. | ||
| for (const name of VENDORED) { | ||
| it(`matches yaml for vendored spec: ${name}`, () => { | ||
| const source = readVendored(name) | ||
| expect(ours(source)).toEqual(eemeli(source, { merge: true })) | ||
| }) | ||
| } | ||
| }) |
+32
-13
@@ -115,27 +115,46 @@ /** | ||
| /** | ||
| * Folds the line breaks of a multi-line flow scalar: a single break becomes a | ||
| * space, and each blank line becomes one literal newline. Leading and trailing | ||
| * whitespace on continuation lines is trimmed, per the YAML flow folding rules. | ||
| * Folds the line breaks of a multi-line flow scalar, per the YAML flow folding | ||
| * rules: a single break between content becomes a space, and a run of blank | ||
| * lines becomes that many literal newlines. | ||
| * | ||
| * Whitespace handling mirrors what the spec keeps as content vs. discards: | ||
| * - leading whitespace on a continuation line is folding indentation, so it is | ||
| * always dropped; | ||
| * - trailing whitespace is dropped on every line *except the last*, where no | ||
| * line break follows so the spaces are literal content; | ||
| * - a blank-line run that reaches the end of the scalar yields one fewer | ||
| * newline, because the break before the closing delimiter is stripped. | ||
| */ | ||
| const lstrip = (s: string): string => s.replace(/^[ \t]+/, '') | ||
| const rstrip = (s: string): string => s.replace(/[ \t]+$/, '') | ||
| const foldLines = (text: string): string => { | ||
| const lines = text.split('\n') | ||
| if (lines.length === 1) return text | ||
| let out = lines[0]?.replace(/[ \t]+$/, '') ?? '' | ||
| const last = lines.length - 1 | ||
| let out = rstrip(lines[0] ?? '') | ||
| let i = 1 | ||
| while (i < lines.length) { | ||
| const trimmed = (lines[i] ?? '').trim() | ||
| if (trimmed === '') { | ||
| // Run of blank lines: each one contributes a newline. | ||
| while (i <= last) { | ||
| if ((lines[i] ?? '').trim() === '') { | ||
| // Run of blank lines. | ||
| let blanks = 0 | ||
| while (i < lines.length && (lines[i] ?? '').trim() === '') { | ||
| while (i <= last && (lines[i] ?? '').trim() === '') { | ||
| blanks++ | ||
| i++ | ||
| } | ||
| out += '\n'.repeat(blanks) | ||
| if (i < lines.length) { | ||
| out += (lines[i] ?? '').trim() | ||
| if (i > last) { | ||
| // Trailing run reaching the closing delimiter: a lone break still folds | ||
| // to a space; any further blank lines each drop one break, so a run of | ||
| // `n` contributes `n - 1` newlines. | ||
| out += blanks === 1 ? ' ' : '\n'.repeat(blanks - 1) | ||
| } else { | ||
| // Interior run: each blank line is one newline, then the next content. | ||
| out += '\n'.repeat(blanks) | ||
| out += i === last ? lstrip(lines[i] ?? '') : (lines[i] ?? '').trim() | ||
| i++ | ||
| } | ||
| } else { | ||
| out += ' ' + trimmed | ||
| // Single break folds to a space. Keep trailing whitespace only on the | ||
| // final line, where it is literal content rather than folding padding. | ||
| out += ' ' + (i === last ? lstrip(lines[i] ?? '') : (lines[i] ?? '').trim()) | ||
| i++ | ||
@@ -142,0 +161,0 @@ } |
125490
2.94%3019
2.13%