Big News: Socket raises $60M Series C at a $1B valuation to secure software supply chains for AI-driven development.Announcement
Sign In

@optave/codegraph

Package Overview
Dependencies
Maintainers
1
Versions
47
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@optave/codegraph - npm Package Compare versions

Comparing version
3.0.4
to
3.1.0
+369
src/sequence.js
/**
* Sequence diagram generation – Mermaid sequenceDiagram from call graph edges.
*
* Participants are files (not individual functions). Calls within the same file
* become self-messages. This keeps diagrams readable and matches typical
* sequence-diagram conventions.
*/
import { openReadonlyOrFail } from './db.js';
import { paginateResult, printNdjson } from './paginate.js';
import { findMatchingNodes, isTestFile, kindIcon } from './queries.js';
import { FRAMEWORK_ENTRY_PREFIXES } from './structure.js';
// ─── Alias generation ────────────────────────────────────────────────
/**
* Build short participant aliases from file paths with collision handling.
* e.g. "src/builder.js" → "builder", but if two files share basename,
* progressively add parent dirs: "src/builder" vs "lib/builder".
*/
function buildAliases(files) {
const aliases = new Map();
const basenames = new Map();
// Group by basename
for (const file of files) {
const base = file
.split('/')
.pop()
.replace(/\.[^.]+$/, '');
if (!basenames.has(base)) basenames.set(base, []);
basenames.get(base).push(file);
}
for (const [base, paths] of basenames) {
if (paths.length === 1) {
aliases.set(paths[0], base);
} else {
// Collision — progressively add parent dirs until aliases are unique
for (let depth = 2; depth <= 10; depth++) {
const trial = new Map();
let allUnique = true;
const seen = new Set();
for (const p of paths) {
const parts = p.replace(/\.[^.]+$/, '').split('/');
const alias = parts
.slice(-depth)
.join('_')
.replace(/[^a-zA-Z0-9_-]/g, '_');
trial.set(p, alias);
if (seen.has(alias)) allUnique = false;
seen.add(alias);
}
if (allUnique || depth === 10) {
for (const [p, alias] of trial) {
aliases.set(p, alias);
}
break;
}
}
}
}
return aliases;
}
// ─── Core data function ──────────────────────────────────────────────
/**
* Build sequence diagram data by BFS-forward from an entry point.
*
* @param {string} name - Symbol name to trace from
* @param {string} [dbPath]
* @param {object} [opts]
* @param {number} [opts.depth=10]
* @param {boolean} [opts.noTests]
* @param {string} [opts.file]
* @param {string} [opts.kind]
* @param {boolean} [opts.dataflow]
* @param {number} [opts.limit]
* @param {number} [opts.offset]
* @returns {{ entry, participants, messages, depth, totalMessages, truncated }}
*/
export function sequenceData(name, dbPath, opts = {}) {
const db = openReadonlyOrFail(dbPath);
const maxDepth = opts.depth || 10;
const noTests = opts.noTests || false;
const withDataflow = opts.dataflow || false;
// Phase 1: Direct LIKE match
let matchNode = findMatchingNodes(db, name, opts)[0] ?? null;
// Phase 2: Prefix-stripped matching
if (!matchNode) {
for (const prefix of FRAMEWORK_ENTRY_PREFIXES) {
matchNode = findMatchingNodes(db, `${prefix}${name}`, opts)[0] ?? null;
if (matchNode) break;
}
}
if (!matchNode) {
db.close();
return {
entry: null,
participants: [],
messages: [],
depth: maxDepth,
totalMessages: 0,
truncated: false,
};
}
const entry = {
name: matchNode.name,
file: matchNode.file,
kind: matchNode.kind,
line: matchNode.line,
};
// BFS forward — track edges, not just nodes
const visited = new Set([matchNode.id]);
let frontier = [matchNode.id];
const messages = [];
const fileSet = new Set([matchNode.file]);
const idToNode = new Map();
idToNode.set(matchNode.id, matchNode);
let truncated = false;
const getCallees = db.prepare(
`SELECT DISTINCT n.id, n.name, n.kind, n.file, n.line
FROM edges e JOIN nodes n ON e.target_id = n.id
WHERE e.source_id = ? AND e.kind = 'calls'`,
);
for (let d = 1; d <= maxDepth; d++) {
const nextFrontier = [];
for (const fid of frontier) {
const callees = getCallees.all(fid);
const caller = idToNode.get(fid);
for (const c of callees) {
if (noTests && isTestFile(c.file)) continue;
// Always record the message (even for visited nodes — different caller path)
fileSet.add(c.file);
messages.push({
from: caller.file,
to: c.file,
label: c.name,
type: 'call',
depth: d,
});
if (visited.has(c.id)) continue;
visited.add(c.id);
nextFrontier.push(c.id);
idToNode.set(c.id, c);
}
}
frontier = nextFrontier;
if (frontier.length === 0) break;
if (d === maxDepth && frontier.length > 0) {
// Only mark truncated if at least one frontier node has further callees
const hasMoreCalls = frontier.some((fid) => getCallees.all(fid).length > 0);
if (hasMoreCalls) truncated = true;
}
}
// Dataflow annotations: add return arrows
if (withDataflow && messages.length > 0) {
const hasTable = db
.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='dataflow'")
.get();
if (hasTable) {
// Build name|file lookup for O(1) target node access
const nodeByNameFile = new Map();
for (const n of idToNode.values()) {
nodeByNameFile.set(`${n.name}|${n.file}`, n);
}
const getReturns = db.prepare(
`SELECT d.expression FROM dataflow d
WHERE d.source_id = ? AND d.kind = 'returns'`,
);
const getFlowsTo = db.prepare(
`SELECT d.expression FROM dataflow d
WHERE d.target_id = ? AND d.kind = 'flows_to'
ORDER BY d.param_index`,
);
// For each called function, check if it has return edges
const seenReturns = new Set();
for (const msg of [...messages]) {
if (msg.type !== 'call') continue;
const targetNode = nodeByNameFile.get(`${msg.label}|${msg.to}`);
if (!targetNode) continue;
const returnKey = `${msg.to}->${msg.from}:${msg.label}`;
if (seenReturns.has(returnKey)) continue;
const returns = getReturns.all(targetNode.id);
if (returns.length > 0) {
seenReturns.add(returnKey);
const expr = returns[0].expression || 'result';
messages.push({
from: msg.to,
to: msg.from,
label: expr,
type: 'return',
depth: msg.depth,
});
}
}
// Annotate call messages with parameter names
for (const msg of messages) {
if (msg.type !== 'call') continue;
const targetNode = nodeByNameFile.get(`${msg.label}|${msg.to}`);
if (!targetNode) continue;
const params = getFlowsTo.all(targetNode.id);
if (params.length > 0) {
const paramNames = params
.map((p) => p.expression)
.filter(Boolean)
.slice(0, 3);
if (paramNames.length > 0) {
msg.label = `${msg.label}(${paramNames.join(', ')})`;
}
}
}
}
}
// Sort messages by depth, then call before return
messages.sort((a, b) => {
if (a.depth !== b.depth) return a.depth - b.depth;
if (a.type === 'call' && b.type === 'return') return -1;
if (a.type === 'return' && b.type === 'call') return 1;
return 0;
});
// Build participant list from files
const aliases = buildAliases([...fileSet]);
const participants = [...fileSet].map((file) => ({
id: aliases.get(file),
label: file.split('/').pop(),
file,
}));
// Sort participants: entry file first, then alphabetically
participants.sort((a, b) => {
if (a.file === entry.file) return -1;
if (b.file === entry.file) return 1;
return a.file.localeCompare(b.file);
});
// Replace file paths with alias IDs in messages
for (const msg of messages) {
msg.from = aliases.get(msg.from);
msg.to = aliases.get(msg.to);
}
db.close();
const base = {
entry,
participants,
messages,
depth: maxDepth,
totalMessages: messages.length,
truncated,
};
const result = paginateResult(base, 'messages', { limit: opts.limit, offset: opts.offset });
if (opts.limit !== undefined || opts.offset !== undefined) {
const activeFiles = new Set(result.messages.flatMap((m) => [m.from, m.to]));
result.participants = result.participants.filter((p) => activeFiles.has(p.id));
}
return result;
}
// ─── Mermaid formatter ───────────────────────────────────────────────
/**
* Escape special Mermaid characters in labels.
*/
function escapeMermaid(str) {
return str
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/:/g, '#colon;')
.replace(/"/g, '#quot;');
}
/**
* Convert sequenceData result to Mermaid sequenceDiagram syntax.
* @param {{ participants, messages, truncated }} seqResult
* @returns {string}
*/
export function sequenceToMermaid(seqResult) {
const lines = ['sequenceDiagram'];
for (const p of seqResult.participants) {
lines.push(` participant ${p.id} as ${escapeMermaid(p.label)}`);
}
for (const msg of seqResult.messages) {
const arrow = msg.type === 'return' ? '-->>' : '->>';
lines.push(` ${msg.from}${arrow}${msg.to}: ${escapeMermaid(msg.label)}`);
}
if (seqResult.truncated && seqResult.participants.length > 0) {
lines.push(
` note right of ${seqResult.participants[0].id}: Truncated at depth ${seqResult.depth}`,
);
}
return lines.join('\n');
}
// ─── CLI formatter ───────────────────────────────────────────────────
/**
* CLI entry point — format sequence data as mermaid, JSON, or ndjson.
*/
export function sequence(name, dbPath, opts = {}) {
const data = sequenceData(name, dbPath, opts);
if (opts.ndjson) {
printNdjson(data, 'messages');
return;
}
if (opts.json) {
console.log(JSON.stringify(data, null, 2));
return;
}
// Default: mermaid format
if (!data.entry) {
console.log(`No matching function found for "${name}".`);
return;
}
const e = data.entry;
console.log(`\nSequence from: [${kindIcon(e.kind)}] ${e.name} ${e.file}:${e.line}`);
console.log(`Participants: ${data.participants.length} Messages: ${data.totalMessages}`);
if (data.truncated) {
console.log(` (truncated at depth ${data.depth})`);
}
console.log();
if (data.messages.length === 0) {
console.log(' (leaf node — no callees)');
return;
}
console.log(sequenceToMermaid(data));
}
+9
-9
{
"name": "@optave/codegraph",
"version": "3.0.4",
"version": "3.1.0",
"description": "Local code graph CLI — parse codebases with tree-sitter, build dependency graphs, query them",

@@ -60,3 +60,3 @@ "type": "module",

"commander": "^14.0.3",
"graphology": "^0.25.4",
"graphology": "^0.26.0",
"graphology-communities-louvain": "^2.0.2",

@@ -75,9 +75,9 @@ "web-tree-sitter": "^0.26.5"

"@modelcontextprotocol/sdk": "^1.0.0",
"@optave/codegraph-darwin-arm64": "3.0.4",
"@optave/codegraph-darwin-x64": "3.0.4",
"@optave/codegraph-linux-arm64-gnu": "3.0.4",
"@optave/codegraph-linux-arm64-musl": "3.0.4",
"@optave/codegraph-linux-x64-gnu": "3.0.4",
"@optave/codegraph-linux-x64-musl": "3.0.4",
"@optave/codegraph-win32-x64-msvc": "3.0.4"
"@optave/codegraph-darwin-arm64": "3.1.0",
"@optave/codegraph-darwin-x64": "3.1.0",
"@optave/codegraph-linux-arm64-gnu": "3.1.0",
"@optave/codegraph-linux-arm64-musl": "3.1.0",
"@optave/codegraph-linux-x64-gnu": "3.1.0",
"@optave/codegraph-linux-x64-musl": "3.1.0",
"@optave/codegraph-win32-x64-msvc": "3.1.0"
},

@@ -84,0 +84,0 @@ "devDependencies": {

+60
-53

@@ -24,3 +24,3 @@ <p align="center">

<a href="#-language-support">Languages</a> &middot;
<a href="#-ai-agent-integration">AI Integration</a> &middot;
<a href="#-ai-agent-integration-core">AI Integration</a> &middot;
<a href="#-how-it-works">How It Works</a> &middot;

@@ -35,8 +35,10 @@ <a href="#-recommended-practices">Practices</a> &middot;

Large codebases are opaque. The structure lives in people's heads, not in tools.
AI agents are the primary interface to large codebases — and they're flying blind.
A developer inherits a project and spends days grepping to understand what calls what. An AI agent burns half its token budget on `grep`, `find`, `cat` — re-discovering the same structure every session. An architect draws boundary rules on a whiteboard that erode within weeks because nothing enforces them. A CI pipeline catches test failures but can't tell you _"this change silently affects 14 callers across 9 files."_
An agent burns a great portion of its token budget on `grep`, `find`, `cat` — re-discovering the same structure every session. It modifies `parseConfig()` without knowing 9 files import it. It hallucinates a function signature because it never saw the real one. Multiply that by every session, every developer, every repo.
The information exists — it's in the code itself. But without a structured map, everyone is navigating blind: developers guess, AI agents hallucinate, and architecture degrades one unreviewed change at a time.
Developers aren't much better off. They inherit projects and spend days grepping to understand what calls what. Architects draw boundary rules that erode within weeks because nothing enforces them. CI catches test failures but can't tell you _"this change silently affects 14 callers across 9 files."_
The information exists — it's in the code itself. But without a structured map, agents hallucinate, developers guess, and architecture degrades one unreviewed change at a time.
## What Codegraph Does

@@ -46,14 +48,14 @@

It parses your code with [tree-sitter](https://tree-sitter.github.io/) (native Rust or WASM), stores the graph in SQLite, and gives you multiple ways to consume it:
It parses your code with [tree-sitter](https://tree-sitter.github.io/) (native Rust or WASM), stores the graph in SQLite, and exposes it where it matters most:
- **CLI** — developers explore, query, and audit their code from the terminal
- **MCP server** — AI agents query the graph directly through 30 tools
- **MCP server** — AI agents query the graph directly through 30 tools — one call instead of 30 `grep`/`find`/`cat` invocations
- **CLI** — developers and agents explore, query, and audit code from the terminal
- **CI gates** — `check` and `manifesto` commands enforce quality thresholds with exit codes
- **Programmatic API** — embed codegraph in your own tools via `npm install`
Instead of 30 tool calls to maybe discover half your dependencies, you get _"this function has 14 callers across 9 files"_ instantly. Instead of hoping architecture rules are followed, you enforce them. Instead of finding breakage in production, `diff-impact --staged` catches it before you commit.
Instead of an agent burning 30 tool calls to maybe discover half your dependencies, it gets _"this function has 14 callers across 9 files"_ in one MCP call. Instead of hoping architecture rules are followed, you enforce them. Instead of finding breakage in production, `diff-impact --staged` catches it before you commit.
**Free. Open source. Fully local.** Zero network calls, zero telemetry. Your code stays on your machine. When you want deeper intelligence, bring your own LLM provider — your code only goes where you choose to send it.
**Three commands to get started:**
**Three commands to a queryable graph:**

@@ -66,3 +68,3 @@ ```bash

That's it. No config files, no Docker, no JVM, no API keys, no accounts. The graph is ready to query.
No config files, no Docker, no JVM, no API keys, no accounts. Point your agent at the MCP server and it has full structural awareness of your codebase.

@@ -75,5 +77,6 @@ ### Why it matters

| **AI agents** | Modify `parseConfig()` without knowing 9 files import it | `fn-impact parseConfig` shows every caller before the edit |
| **Developers** | Inherit a codebase and grep for hours to understand what calls what | `context handleAuth -T` gives source, deps, callers, and tests in one command |
| **AI agents** | Hallucinate function signatures and miss callers | `context <name> -T` returns source, deps, callers, and tests — no guessing |
| **CI pipelines** | Catch test failures but miss structural degradation | `check --staged` fails the build when blast radius or complexity thresholds are exceeded |
| **Developers** | Inherit a codebase and grep for hours to understand what calls what | `context handleAuth -T` gives the same structured view agents use |
| **Developers** | Rename a function, break 14 call sites silently | `diff-impact --staged` catches breakage before you commit |
| **CI pipelines** | Catch test failures but miss structural degradation | `check --staged` fails the build when blast radius or complexity thresholds are exceeded |
| **Architects** | Draw boundary rules that erode within weeks | `manifesto` and `boundaries` enforce architecture rules on every commit |

@@ -87,2 +90,5 @@

|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|
| MCP / AI agent support | **Yes** | — | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** |
| Batch querying | **Yes** | — | — | — | — | — | — | — |
| Composite audit command | **Yes** | — | — | — | — | — | — | — |
| Function-level analysis | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** |

@@ -95,6 +101,3 @@ | Multi-language | **11** | **14** | **32** | **11** | **~10** | **12** | **12** | **3** |

| CI validation predicates | **Yes** | — | — | — | — | — | — | — |
| Composite audit command | **Yes** | — | — | — | — | — | — | — |
| Batch querying | **Yes** | — | — | — | — | — | — | — |
| Graph snapshots | **Yes** | — | — | — | — | — | — | — |
| MCP / AI agent support | **Yes** | — | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** |
| Git diff impact | **Yes** | — | — | — | — | **Yes** | **Yes** | **Yes** |

@@ -123,11 +126,11 @@ | Branch structural diff | **Yes** | — | — | — | — | — | — | **Yes** |

|---|---|---|
| **⚡** | **Always-fresh graph** | Three-tier change detection: journal (O(changed)) → mtime+size (O(n) stats) → hash (O(changed) reads). Sub-second rebuilds even on large codebases |
| **🔓** | **Zero-cost core, LLM-enhanced when you want** | Full graph analysis with no API keys, no accounts, no cost. Optionally bring your own LLM provider — your code only goes where you choose |
| **🤖** | **AI-first architecture** | 30-tool [MCP server](https://modelcontextprotocol.io/) — agents query the graph directly instead of scraping the filesystem. One call replaces 20+ grep/find/cat invocations |
| **🏷️** | **Role classification** | Every symbol auto-tagged as `entry`/`core`/`utility`/`adapter`/`dead`/`leaf` — agents instantly know what they're looking at without reading the code |
| **🔬** | **Function-level, not just files** | Traces `handleAuth()` → `validateToken()` → `decryptJWT()` and shows 14 callers across 9 files break if `decryptJWT` changes |
| **🏷️** | **Role classification** | Every symbol auto-tagged as `entry`/`core`/`utility`/`adapter`/`dead`/`leaf` — agents instantly know what they're looking at |
| **🤖** | **Built for AI agents** | 30-tool [MCP server](https://modelcontextprotocol.io/) — AI assistants query your graph directly. Single-repo by default |
| **🌐** | **Multi-language, one CLI** | JS/TS + Python + Go + Rust + Java + C# + PHP + Ruby + HCL in a single graph |
| **⚡** | **Always-fresh graph** | Three-tier change detection: journal (O(changed)) → mtime+size (O(n) stats) → hash (O(changed) reads). Sub-second rebuilds — agents always work with current data |
| **💥** | **Git diff impact** | `codegraph diff-impact` shows changed functions, their callers, and full blast radius — enriched with historically coupled files from git co-change analysis. Ships with a GitHub Actions workflow |
| **🌐** | **Multi-language, one graph** | JS/TS + Python + Go + Rust + Java + C# + PHP + Ruby + HCL in a single graph — agents don't need per-language tools |
| **🧠** | **Hybrid search** | BM25 keyword + semantic embeddings fused via RRF — `hybrid` (default), `semantic`, or `keyword` mode; multi-query via `"auth; token; JWT"` |
| **🔬** | **Dataflow + CFG** | Track how data flows through functions (`flows_to`, `returns`, `mutates`) and visualize intraprocedural control flow graphs for all 11 languages |
| **🔓** | **Fully local, zero cost** | No API keys, no accounts, no network calls. Optionally bring your own LLM provider — your code only goes where you choose |

@@ -139,28 +142,19 @@ ---

```bash
# Install
npm install -g @optave/codegraph
# Build a graph for any project
cd your-project
codegraph build # → .codegraph/graph.db created
# Start exploring
codegraph map # see most-connected files
codegraph query myFunc # find any function, see callers & callees
codegraph deps src/index.ts # file-level import/export map
```
Or install from source:
That's it. The graph is ready. Now connect your AI agent.
### For AI agents (primary use case)
Connect directly via MCP — your agent gets 30 tools to query the graph:
```bash
git clone https://github.com/optave/codegraph.git
cd codegraph && npm install && npm link
codegraph mcp # 30-tool MCP server — AI queries the graph directly
```
> **Dev builds:** Pre-release tarballs are attached to [GitHub Releases](https://github.com/optave/codegraph/releases). Install with `npm install -g <path-to-tarball>`. Note that `npm install -g <tarball-url>` does not work because npm cannot resolve optional platform-specific dependencies from a URL — download the `.tgz` first, then install from the local file.
Or add codegraph to your agent's instructions (e.g. `CLAUDE.md`):
### For AI agents
Add codegraph to your agent's instructions (e.g. `CLAUDE.md`):
```markdown

@@ -176,10 +170,23 @@ Before modifying code, always:

Or connect directly via MCP:
Full agent setup: [AI Agent Guide](docs/guides/ai-agent-guide.md) &middot; [CLAUDE.md template](docs/guides/ai-agent-guide.md#claudemd-template)
### For developers
The same graph is available via CLI:
```bash
codegraph mcp # 30-tool MCP server — AI queries the graph directly
codegraph map # see most-connected files
codegraph query myFunc # find any function, see callers & callees
codegraph deps src/index.ts # file-level import/export map
```
Full agent setup: [AI Agent Guide](docs/guides/ai-agent-guide.md) &middot; [CLAUDE.md template](docs/guides/ai-agent-guide.md#claudemd-template)
Or install from source:
```bash
git clone https://github.com/optave/codegraph.git
cd codegraph && npm install && npm link
```
> **Dev builds:** Pre-release tarballs are attached to [GitHub Releases](https://github.com/optave/codegraph/releases). Install with `npm install -g <path-to-tarball>`. Note that `npm install -g <tarball-url>` does not work because npm cannot resolve optional platform-specific dependencies from a URL — download the `.tgz` first, then install from the local file.
---

@@ -191,8 +198,11 @@

|---|---|---|
| 🔍 | **Symbol search** | Find any function, class, or method by name — exact match priority, relevance scoring, `--file` and `--kind` filters |
| 📁 | **File dependencies** | See what a file imports and what imports it |
| 🤖 | **MCP server** | 30-tool MCP server for AI assistants; single-repo by default, opt-in multi-repo |
| 🎯 | **Deep context** | `context` gives agents source, deps, callers, signature, and tests for a function in one call; `audit --quick` gives structural summaries |
| 🏷️ | **Node role classification** | Every symbol auto-tagged as `entry`/`core`/`utility`/`adapter`/`dead`/`leaf` based on connectivity — agents instantly know architectural role |
| 📦 | **Batch querying** | Accept a list of targets and return all results in one JSON payload — enables multi-agent parallel dispatch |
| 💥 | **Impact analysis** | Trace every file affected by a change (transitive) |
| 🧬 | **Function-level tracing** | Call chains, caller trees, function-level impact, and A→B pathfinding with qualified call resolution |
| 🎯 | **Deep context** | `context` gives AI agents source, deps, callers, signature, and tests for a function in one call; `audit --quick` gives structural summaries of files or functions |
| 📍 | **Fast lookup** | `where` shows exactly where a symbol is defined and used — minimal, fast |
| 🔍 | **Symbol search** | Find any function, class, or method by name — exact match priority, relevance scoring, `--file` and `--kind` filters |
| 📁 | **File dependencies** | See what a file imports and what imports it |
| 📊 | **Diff impact** | Parse `git diff`, find overlapping functions, trace their callers |

@@ -202,3 +212,2 @@ | 🔗 | **Co-change analysis** | Analyze git history for files that always change together — surfaces hidden coupling the static graph can't see; enriches `diff-impact` with historically coupled files |

| 🏗️ | **Structure & hotspots** | Directory cohesion scores, fan-in/fan-out hotspot detection, module boundaries |
| 🏷️ | **Node role classification** | Every symbol auto-tagged as `entry`/`core`/`utility`/`adapter`/`dead`/`leaf` based on connectivity patterns — agents instantly know architectural role |
| 🔄 | **Cycle detection** | Find circular dependencies at file or function level |

@@ -208,3 +217,2 @@ | 📤 | **Export** | DOT, Mermaid, JSON, GraphML, GraphSON, and Neo4j CSV graph export |

| 👀 | **Watch mode** | Incrementally update the graph as files change |
| 🤖 | **MCP server** | 30-tool MCP server for AI assistants; single-repo by default, opt-in multi-repo |
| ⚡ | **Always fresh** | Three-tier incremental detection — sub-second rebuilds even on large codebases |

@@ -224,3 +232,2 @@ | 🔬 | **Data flow analysis** | Intraprocedural parameter tracking, return consumers, argument flows, and mutation detection — all 11 languages |

| 🚦 | **Triage queue** | `triage` merges connectivity, hotspots, roles, and complexity into a ranked audit priority queue |
| 📦 | **Batch querying** | Accept a list of targets and return all results in one JSON payload — enables multi-agent parallel dispatch |
| 🔬 | **Dataflow analysis** | Track how data moves through functions with `flows_to`, `returns`, and `mutates` edges — all 11 languages, included by default, skip with `--no-dataflow` |

@@ -265,3 +272,3 @@ | 🧩 | **Control flow graph** | Intraprocedural CFG construction for all 11 languages — `cfg` command with text/DOT/Mermaid output, included by default, skip with `--no-cfg` |

### Deep Context (AI-Optimized)
### Deep Context (designed for AI agents)

@@ -573,10 +580,10 @@ ```bash

|---|---|
| Build speed (native) | **12.3 ms/file** |
| Build speed (WASM) | **16.3 ms/file** |
| Build speed (native) | **6.2 ms/file** |
| Build speed (WASM) | **19 ms/file** |
| Query time | **3ms** |
| No-op rebuild (native) | **5ms** |
| 1-file rebuild (native) | **375ms** |
| No-op rebuild (native) | **329ms** |
| 1-file rebuild (native) | **335ms** |
| Query: fn-deps | **0.8ms** |
| Query: path | **0.8ms** |
| ~50,000 files (est.) | **~615.0s build** |
| ~50,000 files (est.) | **~310.0s build** |

@@ -599,7 +606,7 @@ Metrics are normalized per file for cross-version comparability. Times above are for a full initial build — incremental rebuilds only re-parse changed files.

## 🤖 AI Agent Integration
## 🤖 AI Agent Integration (Core)
### MCP Server
Codegraph includes a built-in [Model Context Protocol](https://modelcontextprotocol.io/) server with 30 tools (31 in multi-repo mode), so AI assistants can query your dependency graph directly:
Codegraph is built around a [Model Context Protocol](https://modelcontextprotocol.io/) server with 30 tools (31 in multi-repo mode) — the primary way agents consume the graph:

@@ -606,0 +613,0 @@ ```bash

@@ -10,2 +10,3 @@ import { createHash } from 'node:crypto';

import { debug, info, warn } from './logger.js';
import { loadNative } from './native.js';
import { getActiveEngine, parseFilesAuto } from './parser.js';

@@ -448,3 +449,7 @@ import { computeConfidence, resolveImportPath, resolveImportsBatch } from './resolve.js';

// Engine selection: 'native', 'wasm', or 'auto' (default)
const engineOpts = { engine: opts.engine || 'auto', dataflow: opts.dataflow !== false };
const engineOpts = {
engine: opts.engine || 'auto',
dataflow: opts.dataflow !== false,
ast: opts.ast !== false,
};
const { name: engineName, version: engineVersion } = getActiveEngine(engineOpts);

@@ -677,12 +682,35 @@ info(`Using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`);

const insertNode = db.prepare(
'INSERT OR IGNORE INTO nodes (name, kind, file, line, end_line, parent_id) VALUES (?, ?, ?, ?, ?, ?)',
);
const getNodeId = db.prepare(
'SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? AND line = ?',
);
const insertEdge = db.prepare(
'INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES (?, ?, ?, ?, ?)',
);
// Batch INSERT helpers — multi-value INSERTs reduce SQLite round-trips
const BATCH_CHUNK = 200;
function batchInsertNodes(rows) {
if (!rows.length) return;
const ph = '(?,?,?,?,?,?)';
for (let i = 0; i < rows.length; i += BATCH_CHUNK) {
const chunk = rows.slice(i, i + BATCH_CHUNK);
const vals = [];
for (const r of chunk) vals.push(r[0], r[1], r[2], r[3], r[4], r[5]);
db.prepare(
'INSERT OR IGNORE INTO nodes (name,kind,file,line,end_line,parent_id) VALUES ' +
chunk.map(() => ph).join(','),
).run(...vals);
}
}
function batchInsertEdges(rows) {
if (!rows.length) return;
const ph = '(?,?,?,?,?)';
for (let i = 0; i < rows.length; i += BATCH_CHUNK) {
const chunk = rows.slice(i, i + BATCH_CHUNK);
const vals = [];
for (const r of chunk) vals.push(r[0], r[1], r[2], r[3], r[4]);
db.prepare(
'INSERT INTO edges (source_id,target_id,kind,confidence,dynamic) VALUES ' +
chunk.map(() => ph).join(','),
).run(...vals);
}
}
// Prepare hash upsert (with size column from migration v4)

@@ -733,15 +761,29 @@ let upsertHash;

const insertAll = db.transaction(() => {
// Phase 1: Batch insert all file nodes + definitions + exports
const phase1Rows = [];
for (const [relPath, symbols] of allSymbols) {
fileSymbols.set(relPath, symbols);
// Phase 1: Insert file node + definitions + exports (no children yet)
insertNode.run(relPath, 'file', relPath, 0, null, null);
phase1Rows.push([relPath, 'file', relPath, 0, null, null]);
for (const def of symbols.definitions) {
insertNode.run(def.name, def.kind, relPath, def.line, def.endLine || null, null);
phase1Rows.push([def.name, def.kind, relPath, def.line, def.endLine || null, null]);
}
for (const exp of symbols.exports) {
insertNode.run(exp.name, exp.kind, relPath, exp.line, null, null);
phase1Rows.push([exp.name, exp.kind, relPath, exp.line, null, null]);
}
}
batchInsertNodes(phase1Rows);
// Phase 2: Bulk-fetch IDs for file + definitions
// Phase 1b: Mark exported symbols
const markExported = db.prepare(
'UPDATE nodes SET exported = 1 WHERE name = ? AND kind = ? AND file = ? AND line = ?',
);
for (const [relPath, symbols] of allSymbols) {
for (const exp of symbols.exports) {
markExported.run(exp.name, exp.kind, relPath, exp.line);
}
}
// Phase 3: Batch insert children (needs parent IDs from Phase 2)
const childRows = [];
for (const [relPath, symbols] of allSymbols) {
const nodeIdMap = new Map();

@@ -751,4 +793,2 @@ for (const row of bulkGetNodeIds.all(relPath)) {

}
// Phase 3: Insert children with parent_id from the map
for (const def of symbols.definitions) {

@@ -759,19 +799,28 @@ if (!def.children?.length) continue;

for (const child of def.children) {
insertNode.run(child.name, child.kind, relPath, child.line, child.endLine || null, defId);
childRows.push([
child.name,
child.kind,
relPath,
child.line,
child.endLine || null,
defId,
]);
}
}
}
batchInsertNodes(childRows);
// Phase 4: Re-fetch to include children IDs
nodeIdMap.clear();
// Phase 5: Batch insert contains/parameter_of edges
const edgeRows = [];
for (const [relPath, symbols] of allSymbols) {
// Re-fetch to include children IDs
const nodeIdMap = new Map();
for (const row of bulkGetNodeIds.all(relPath)) {
nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);
}
// Phase 5: Insert edges using the cached ID map
const fileId = nodeIdMap.get(`${relPath}|file|0`);
for (const def of symbols.definitions) {
const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`);
// File → top-level definition contains edge
if (fileId && defId) {
insertEdge.run(fileId, defId, 'contains', 1.0, 0);
edgeRows.push([fileId, defId, 'contains', 1.0, 0]);
}

@@ -782,7 +831,5 @@ if (def.children?.length && defId) {

if (childId) {
// Parent → child contains edge
insertEdge.run(defId, childId, 'contains', 1.0, 0);
// Parameter → parent parameter_of edge (inverse direction)
edgeRows.push([defId, childId, 'contains', 1.0, 0]);
if (child.kind === 'parameter') {
insertEdge.run(childId, defId, 'parameter_of', 1.0, 0);
edgeRows.push([childId, defId, 'parameter_of', 1.0, 0]);
}

@@ -822,2 +869,3 @@ }

}
batchInsertEdges(edgeRows);

@@ -859,3 +907,3 @@ // Also update metadata-only entries (self-heal mtime/size without re-parse)

}
const batchResolved = resolveImportsBatch(batchInputs, rootDir, aliases);
const batchResolved = resolveImportsBatch(batchInputs, rootDir, aliases, files);
_t.resolveMs = performance.now() - _t.resolve0;

@@ -973,3 +1021,3 @@

.prepare(
`SELECT id, name, kind, file FROM nodes WHERE kind IN ('function','method','class','interface','struct','type','module','enum','trait')`,
`SELECT id, name, kind, file, line FROM nodes WHERE kind IN ('function','method','class','interface','struct','type','module','enum','trait')`,
)

@@ -989,5 +1037,7 @@ .all();

// Second pass: build edges
// Second pass: build edges (accumulated and batch-inserted)
_t.edges0 = performance.now();
const buildEdges = db.transaction(() => {
const allEdgeRows = [];
for (const [relPath, symbols] of fileSymbols) {

@@ -1006,3 +1056,3 @@ // Skip barrel-only files — loaded for resolution, edges already in DB

const edgeKind = imp.reexport ? 'reexports' : imp.typeOnly ? 'imports-type' : 'imports';
insertEdge.run(fileNodeId, targetRow.id, edgeKind, 1.0, 0);
allEdgeRows.push([fileNodeId, targetRow.id, edgeKind, 1.0, 0]);

@@ -1022,3 +1072,3 @@ if (!imp.reexport && isBarrelFile(resolvedPath)) {

if (actualRow) {
insertEdge.run(
allEdgeRows.push([
fileNodeId,

@@ -1029,3 +1079,3 @@ actualRow.id,

0,
);
]);
}

@@ -1037,149 +1087,194 @@ }

}
}
// Build import name -> target file mapping
const importedNames = new Map();
for (const imp of symbols.imports) {
const resolvedPath = getResolved(path.join(rootDir, relPath), imp.source);
for (const name of imp.names) {
const cleanName = name.replace(/^\*\s+as\s+/, '');
importedNames.set(cleanName, resolvedPath);
// Call/receiver/extends/implements edges — native when available
const native = engineName === 'native' ? loadNative() : null;
if (native?.buildCallEdges) {
const nativeFiles = [];
for (const [relPath, symbols] of fileSymbols) {
if (barrelOnlyFiles.has(relPath)) continue;
const fileNodeRow = getNodeId.get(relPath, 'file', relPath, 0);
if (!fileNodeRow) continue;
// Pre-resolve imported names (including barrel resolution)
const importedNames = [];
for (const imp of symbols.imports) {
const resolvedPath = getResolved(path.join(rootDir, relPath), imp.source);
for (const name of imp.names) {
const cleanName = name.replace(/^\*\s+as\s+/, '');
let targetFile = resolvedPath;
if (isBarrelFile(resolvedPath)) {
const actual = resolveBarrelExport(resolvedPath, cleanName);
if (actual) targetFile = actual;
}
importedNames.push({ name: cleanName, file: targetFile });
}
}
nativeFiles.push({
file: relPath,
fileNodeId: fileNodeRow.id,
definitions: symbols.definitions.map((d) => ({
name: d.name,
kind: d.kind,
line: d.line,
endLine: d.endLine ?? null,
})),
calls: symbols.calls,
importedNames,
classes: symbols.classes,
});
}
// Call edges with confidence scoring — using pre-loaded lookup maps (N+1 fix)
const seenCallEdges = new Set();
for (const call of symbols.calls) {
if (call.receiver && BUILTIN_RECEIVERS.has(call.receiver)) continue;
let caller = null;
let callerSpan = Infinity;
for (const def of symbols.definitions) {
if (def.line <= call.line) {
const end = def.endLine || Infinity;
if (call.line <= end) {
// Call is inside this definition's range — pick narrowest
const span = end - def.line;
if (span < callerSpan) {
const nativeEdges = native.buildCallEdges(nativeFiles, allNodes, [...BUILTIN_RECEIVERS]);
for (const e of nativeEdges) {
allEdgeRows.push([e.sourceId, e.targetId, e.kind, e.confidence, e.dynamic]);
}
} else {
// JS fallback — call/receiver/extends/implements edges
for (const [relPath, symbols] of fileSymbols) {
if (barrelOnlyFiles.has(relPath)) continue;
const fileNodeRow = getNodeId.get(relPath, 'file', relPath, 0);
if (!fileNodeRow) continue;
// Build import name -> target file mapping
const importedNames = new Map();
for (const imp of symbols.imports) {
const resolvedPath = getResolved(path.join(rootDir, relPath), imp.source);
for (const name of imp.names) {
const cleanName = name.replace(/^\*\s+as\s+/, '');
importedNames.set(cleanName, resolvedPath);
}
}
// Call edges with confidence scoring — using pre-loaded lookup maps (N+1 fix)
const seenCallEdges = new Set();
for (const call of symbols.calls) {
if (call.receiver && BUILTIN_RECEIVERS.has(call.receiver)) continue;
let caller = null;
let callerSpan = Infinity;
for (const def of symbols.definitions) {
if (def.line <= call.line) {
const end = def.endLine || Infinity;
if (call.line <= end) {
const span = end - def.line;
if (span < callerSpan) {
const row = getNodeId.get(def.name, def.kind, relPath, def.line);
if (row) {
caller = row;
callerSpan = span;
}
}
} else if (!caller) {
const row = getNodeId.get(def.name, def.kind, relPath, def.line);
if (row) {
caller = row;
callerSpan = span;
}
if (row) caller = row;
}
} else if (!caller) {
// Fallback: def starts before call but call is past end
// Only use if we haven't found an enclosing scope yet
const row = getNodeId.get(def.name, def.kind, relPath, def.line);
if (row) caller = row;
}
}
}
if (!caller) caller = fileNodeRow;
if (!caller) caller = fileNodeRow;
const isDynamic = call.dynamic ? 1 : 0;
let targets;
const importedFrom = importedNames.get(call.name);
const isDynamic = call.dynamic ? 1 : 0;
let targets;
const importedFrom = importedNames.get(call.name);
if (importedFrom) {
// Use pre-loaded map instead of DB query
targets = nodesByNameAndFile.get(`${call.name}|${importedFrom}`) || [];
if (importedFrom) {
targets = nodesByNameAndFile.get(`${call.name}|${importedFrom}`) || [];
if (targets.length === 0 && isBarrelFile(importedFrom)) {
const actualSource = resolveBarrelExport(importedFrom, call.name);
if (actualSource) {
targets = nodesByNameAndFile.get(`${call.name}|${actualSource}`) || [];
if (targets.length === 0 && isBarrelFile(importedFrom)) {
const actualSource = resolveBarrelExport(importedFrom, call.name);
if (actualSource) {
targets = nodesByNameAndFile.get(`${call.name}|${actualSource}`) || [];
}
}
}
}
if (!targets || targets.length === 0) {
// Same file
targets = nodesByNameAndFile.get(`${call.name}|${relPath}`) || [];
if (targets.length === 0) {
// Method name match (e.g. ClassName.methodName)
const methodCandidates = (nodesByName.get(call.name) || []).filter(
(n) => n.name.endsWith(`.${call.name}`) && n.kind === 'method',
);
if (methodCandidates.length > 0) {
targets = methodCandidates;
} else if (
!call.receiver ||
call.receiver === 'this' ||
call.receiver === 'self' ||
call.receiver === 'super'
) {
// Scoped fallback — same-dir or parent-dir only, not global
targets = (nodesByName.get(call.name) || []).filter(
(n) => computeConfidence(relPath, n.file, null) >= 0.5,
if (!targets || targets.length === 0) {
targets = nodesByNameAndFile.get(`${call.name}|${relPath}`) || [];
if (targets.length === 0) {
const methodCandidates = (nodesByName.get(call.name) || []).filter(
(n) => n.name.endsWith(`.${call.name}`) && n.kind === 'method',
);
if (methodCandidates.length > 0) {
targets = methodCandidates;
} else if (
!call.receiver ||
call.receiver === 'this' ||
call.receiver === 'self' ||
call.receiver === 'super'
) {
targets = (nodesByName.get(call.name) || []).filter(
(n) => computeConfidence(relPath, n.file, null) >= 0.5,
);
}
}
// else: method call on a receiver — skip global fallback entirely
}
}
if (targets.length > 1) {
targets.sort((a, b) => {
const confA = computeConfidence(relPath, a.file, importedFrom);
const confB = computeConfidence(relPath, b.file, importedFrom);
return confB - confA;
});
}
if (targets.length > 1) {
targets.sort((a, b) => {
const confA = computeConfidence(relPath, a.file, importedFrom);
const confB = computeConfidence(relPath, b.file, importedFrom);
return confB - confA;
});
}
for (const t of targets) {
const edgeKey = `${caller.id}|${t.id}`;
if (t.id !== caller.id && !seenCallEdges.has(edgeKey)) {
seenCallEdges.add(edgeKey);
const confidence = computeConfidence(relPath, t.file, importedFrom);
insertEdge.run(caller.id, t.id, 'calls', confidence, isDynamic);
for (const t of targets) {
const edgeKey = `${caller.id}|${t.id}`;
if (t.id !== caller.id && !seenCallEdges.has(edgeKey)) {
seenCallEdges.add(edgeKey);
const confidence = computeConfidence(relPath, t.file, importedFrom);
allEdgeRows.push([caller.id, t.id, 'calls', confidence, isDynamic]);
}
}
}
// Receiver edge: caller → receiver type node
if (
call.receiver &&
!BUILTIN_RECEIVERS.has(call.receiver) &&
call.receiver !== 'this' &&
call.receiver !== 'self' &&
call.receiver !== 'super'
) {
const receiverKinds = new Set(['class', 'struct', 'interface', 'type', 'module']);
// Same-file first, then global
const samefile = nodesByNameAndFile.get(`${call.receiver}|${relPath}`) || [];
const candidates = samefile.length > 0 ? samefile : nodesByName.get(call.receiver) || [];
const receiverNodes = candidates.filter((n) => receiverKinds.has(n.kind));
if (receiverNodes.length > 0 && caller) {
const recvTarget = receiverNodes[0];
const recvKey = `recv|${caller.id}|${recvTarget.id}`;
if (!seenCallEdges.has(recvKey)) {
seenCallEdges.add(recvKey);
insertEdge.run(caller.id, recvTarget.id, 'receiver', 0.7, 0);
// Receiver edge: caller → receiver type node
if (
call.receiver &&
!BUILTIN_RECEIVERS.has(call.receiver) &&
call.receiver !== 'this' &&
call.receiver !== 'self' &&
call.receiver !== 'super'
) {
const receiverKinds = new Set(['class', 'struct', 'interface', 'type', 'module']);
const samefile = nodesByNameAndFile.get(`${call.receiver}|${relPath}`) || [];
const candidates =
samefile.length > 0 ? samefile : nodesByName.get(call.receiver) || [];
const receiverNodes = candidates.filter((n) => receiverKinds.has(n.kind));
if (receiverNodes.length > 0 && caller) {
const recvTarget = receiverNodes[0];
const recvKey = `recv|${caller.id}|${recvTarget.id}`;
if (!seenCallEdges.has(recvKey)) {
seenCallEdges.add(recvKey);
allEdgeRows.push([caller.id, recvTarget.id, 'receiver', 0.7, 0]);
}
}
}
}
}
// Class extends edges (use pre-loaded maps instead of inline DB queries)
for (const cls of symbols.classes) {
if (cls.extends) {
const sourceRow = (nodesByNameAndFile.get(`${cls.name}|${relPath}`) || []).find(
(n) => n.kind === 'class',
);
const targetCandidates = nodesByName.get(cls.extends) || [];
const targetRows = targetCandidates.filter((n) => n.kind === 'class');
if (sourceRow) {
for (const t of targetRows) {
insertEdge.run(sourceRow.id, t.id, 'extends', 1.0, 0);
// Class extends edges
for (const cls of symbols.classes) {
if (cls.extends) {
const sourceRow = (nodesByNameAndFile.get(`${cls.name}|${relPath}`) || []).find(
(n) => n.kind === 'class',
);
const targetCandidates = nodesByName.get(cls.extends) || [];
const targetRows = targetCandidates.filter((n) => n.kind === 'class');
if (sourceRow) {
for (const t of targetRows) {
allEdgeRows.push([sourceRow.id, t.id, 'extends', 1.0, 0]);
}
}
}
}
if (cls.implements) {
const sourceRow = (nodesByNameAndFile.get(`${cls.name}|${relPath}`) || []).find(
(n) => n.kind === 'class',
);
const targetCandidates = nodesByName.get(cls.implements) || [];
const targetRows = targetCandidates.filter(
(n) => n.kind === 'interface' || n.kind === 'class',
);
if (sourceRow) {
for (const t of targetRows) {
insertEdge.run(sourceRow.id, t.id, 'implements', 1.0, 0);
if (cls.implements) {
const sourceRow = (nodesByNameAndFile.get(`${cls.name}|${relPath}`) || []).find(
(n) => n.kind === 'class',
);
const targetCandidates = nodesByName.get(cls.implements) || [];
const targetRows = targetCandidates.filter(
(n) => n.kind === 'interface' || n.kind === 'class',
);
if (sourceRow) {
for (const t of targetRows) {
allEdgeRows.push([sourceRow.id, t.id, 'implements', 1.0, 0]);
}
}

@@ -1190,2 +1285,4 @@ }

}
batchInsertEdges(allEdgeRows);
});

@@ -1198,4 +1295,4 @@ buildEdges();

for (const [relPath, symbols] of fileSymbols) {
if (symbols._lineCount) {
lineCountMap.set(relPath, symbols._lineCount);
if (symbols.lineCount ?? symbols._lineCount) {
lineCountMap.set(relPath, symbols.lineCount ?? symbols._lineCount);
} else {

@@ -1455,2 +1552,25 @@ const absPath = path.join(rootDir, relPath);

// Warn about unused exports (exported but zero cross-file consumers)
try {
const unusedCount = db
.prepare(
`SELECT COUNT(*) as c FROM nodes
WHERE exported = 1 AND kind != 'file'
AND id NOT IN (
SELECT DISTINCT e.target_id FROM edges e
JOIN nodes caller ON e.source_id = caller.id
JOIN nodes target ON e.target_id = target.id
WHERE e.kind = 'calls' AND caller.file != target.file
)`,
)
.get().c;
if (unusedCount > 0) {
warn(
`${unusedCount} exported symbol${unusedCount > 1 ? 's have' : ' has'} zero cross-file consumers. Run "codegraph exports <file> --unused" to inspect.`,
);
}
} catch {
/* exported column may not exist on older DBs */
}
// Persist build metadata for mismatch detection

@@ -1457,0 +1577,0 @@ try {

@@ -1049,5 +1049,13 @@ /**

let parsers = null;
let extToLang = null;
let needsFallback = false;
// Always build ext→langId map so native-only builds (where _langId is unset)
// can still derive the language from the file extension.
const extToLang = new Map();
for (const entry of LANGUAGE_REGISTRY) {
for (const ext of entry.extensions) {
extToLang.set(ext, entry.id);
}
}
for (const [relPath, symbols] of fileSymbols) {

@@ -1072,8 +1080,2 @@ if (!symbols._tree) {

parsers = await createParsers();
extToLang = new Map();
for (const entry of LANGUAGE_REGISTRY) {
for (const ext of entry.extensions) {
extToLang.set(ext, entry.id);
}
}
}

@@ -1120,3 +1122,3 @@

if (!tree && !allNative) {
if (!extToLang || !getParserFn) continue;
if (!getParserFn) continue;
langId = extToLang.get(ext);

@@ -1144,3 +1146,3 @@ if (!langId || !CFG_LANG_IDS.has(langId)) continue;

if (!langId) {
langId = extToLang ? extToLang.get(ext) : null;
langId = extToLang.get(ext);
if (!langId) continue;

@@ -1147,0 +1149,0 @@ }

@@ -280,2 +280,3 @@ #!/usr/bin/env node

.option('--ndjson', 'Newline-delimited JSON output')
.option('--unused', 'Show only exports with zero consumers')
.action((file, opts) => {

@@ -288,2 +289,3 @@ fileExports(file, opts.db, {

ndjson: opts.ndjson,
unused: opts.unused,
});

@@ -1143,2 +1145,35 @@ });

program
.command('sequence <name>')
.description('Generate a Mermaid sequence diagram from call graph edges (participants = files)')
.option('--depth <n>', 'Max forward traversal depth', '10')
.option('--dataflow', 'Annotate with parameter names and return arrows from dataflow table')
.option('-d, --db <path>', 'Path to graph.db')
.option('-f, --file <path>', 'Scope to a specific file (partial match)')
.option('-k, --kind <kind>', 'Filter by symbol kind')
.option('-T, --no-tests', 'Exclude test/spec files from results')
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
.option('-j, --json', 'Output as JSON')
.option('--limit <number>', 'Max results to return')
.option('--offset <number>', 'Skip N results (default: 0)')
.option('--ndjson', 'Newline-delimited JSON output')
.action(async (name, opts) => {
if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) {
console.error(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`);
process.exit(1);
}
const { sequence } = await import('./sequence.js');
sequence(name, opts.db, {
depth: parseInt(opts.depth, 10),
file: opts.file,
kind: opts.kind,
noTests: resolveNoTests(opts),
json: opts.json,
dataflow: opts.dataflow,
limit: opts.limit ? parseInt(opts.limit, 10) : undefined,
offset: opts.offset ? parseInt(opts.offset, 10) : undefined,
ndjson: opts.ndjson,
});
});
program
.command('dataflow <name>')

@@ -1145,0 +1180,0 @@ .description('Show data flow for a function: parameters, return consumers, mutations')

@@ -1008,5 +1008,13 @@ /**

let parsers = null;
let extToLang = null;
let needsFallback = false;
// Always build ext→langId map so native-only builds (where _langId is unset)
// can still derive the language from the file extension.
const extToLang = new Map();
for (const entry of LANGUAGE_REGISTRY) {
for (const ext of entry.extensions) {
extToLang.set(ext, entry.id);
}
}
for (const [relPath, symbols] of fileSymbols) {

@@ -1025,8 +1033,2 @@ if (!symbols._tree && !symbols.dataflow) {

parsers = await createParsers();
extToLang = new Map();
for (const entry of LANGUAGE_REGISTRY) {
for (const ext of entry.extensions) {
extToLang.set(ext, entry.id);
}
}
}

@@ -1074,3 +1076,3 @@

if (!tree) {
if (!extToLang || !getParserFn) continue;
if (!getParserFn) continue;
langId = extToLang.get(ext);

@@ -1098,3 +1100,3 @@ if (!langId || !DATAFLOW_LANG_IDS.has(langId)) continue;

if (!langId) {
langId = extToLang ? extToLang.get(ext) : null;
langId = extToLang.get(ext);
if (!langId) continue;

@@ -1101,0 +1103,0 @@ }

@@ -228,2 +228,9 @@ import fs from 'node:fs';

},
{
version: 14,
up: `
ALTER TABLE nodes ADD COLUMN exported INTEGER DEFAULT 0;
CREATE INDEX IF NOT EXISTS idx_nodes_exported ON nodes(exported);
`,
},
];

@@ -230,0 +237,0 @@

@@ -10,3 +10,3 @@ /**

import { paginateResult, printNdjson } from './paginate.js';
import { isTestFile, kindIcon } from './queries.js';
import { findMatchingNodes, isTestFile, kindIcon } from './queries.js';
import { FRAMEWORK_ENTRY_PREFIXES } from './structure.js';

@@ -99,3 +99,3 @@

// Phase 1: Direct LIKE match on full name
let matchNode = findBestMatch(db, name, opts);
let matchNode = findMatchingNodes(db, name, opts)[0] ?? null;

@@ -105,3 +105,3 @@ // Phase 2: Prefix-stripped matching — try adding framework prefixes

for (const prefix of FRAMEWORK_ENTRY_PREFIXES) {
matchNode = findBestMatch(db, `${prefix}${name}`, opts);
matchNode = findMatchingNodes(db, `${prefix}${name}`, opts)[0] ?? null;
if (matchNode) break;

@@ -226,69 +226,2 @@ }

/**
* Find the best matching node using the same relevance scoring as queries.js findMatchingNodes.
*/
function findBestMatch(db, name, opts = {}) {
const kinds = opts.kind
? [opts.kind]
: [
'function',
'method',
'class',
'interface',
'type',
'struct',
'enum',
'trait',
'record',
'module',
];
const placeholders = kinds.map(() => '?').join(', ');
const params = [`%${name}%`, ...kinds];
let fileCondition = '';
if (opts.file) {
fileCondition = ' AND n.file LIKE ?';
params.push(`%${opts.file}%`);
}
const rows = db
.prepare(
`SELECT n.*, COALESCE(fi.cnt, 0) AS fan_in
FROM nodes n
LEFT JOIN (
SELECT target_id, COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY target_id
) fi ON fi.target_id = n.id
WHERE n.name LIKE ? AND n.kind IN (${placeholders})${fileCondition}`,
)
.all(...params);
const noTests = opts.noTests || false;
const nodes = noTests ? rows.filter((n) => !isTestFile(n.file)) : rows;
if (nodes.length === 0) return null;
const lowerQuery = name.toLowerCase();
for (const node of nodes) {
const lowerName = node.name.toLowerCase();
const bareName = lowerName.includes('.') ? lowerName.split('.').pop() : lowerName;
let matchScore;
if (lowerName === lowerQuery || bareName === lowerQuery) {
matchScore = 100;
} else if (lowerName.startsWith(lowerQuery) || bareName.startsWith(lowerQuery)) {
matchScore = 60;
} else if (lowerName.includes(`.${lowerQuery}`) || lowerName.includes(`${lowerQuery}.`)) {
matchScore = 40;
} else {
matchScore = 10;
}
const fanInBonus = Math.min(Math.log2(node.fan_in + 1) * 5, 25);
node._relevance = matchScore + fanInBonus;
}
nodes.sort((a, b) => b._relevance - a._relevance);
return nodes[0];
}
/**
* CLI formatter — text or JSON output.

@@ -295,0 +228,0 @@ */

@@ -124,3 +124,2 @@ /**

export { MCP_DEFAULTS, MCP_MAX_LIMIT, paginate, paginateResult, printNdjson } from './paginate.js';
// Unified parser API

@@ -174,2 +173,4 @@ export { getActiveEngine, isWasmAvailable, parseFileAuto, parseFilesAuto } from './parser.js';

} from './registry.js';
// Sequence diagram generation
export { sequence, sequenceData, sequenceToMermaid } from './sequence.js';
// Snapshot management

@@ -176,0 +177,0 @@ export {

@@ -116,2 +116,7 @@ /**

no_tests: { type: 'boolean', description: 'Exclude test files', default: false },
unused: {
type: 'boolean',
description: 'Show only exports with zero consumers',
default: false,
},
...PAGINATION_PROPS,

@@ -423,2 +428,39 @@ },

{
name: 'sequence',
description:
'Generate a Mermaid sequence diagram from call graph edges. Participants are files, messages are function calls between them.',
inputSchema: {
type: 'object',
properties: {
name: {
type: 'string',
description: 'Entry point or function name to trace from (partial match)',
},
depth: { type: 'number', description: 'Max forward traversal depth', default: 10 },
format: {
type: 'string',
enum: ['mermaid', 'json'],
description: 'Output format (default: mermaid)',
},
dataflow: {
type: 'boolean',
description: 'Annotate with parameter names and return arrows',
default: false,
},
file: {
type: 'string',
description: 'Scope search to functions in this file (partial match)',
},
kind: {
type: 'string',
enum: EVERY_SYMBOL_KIND,
description: 'Filter to a specific symbol kind',
},
no_tests: { type: 'boolean', description: 'Exclude test files', default: false },
...PAGINATION_PROPS,
},
required: ['name'],
},
},
{
name: 'complexity',

@@ -907,2 +949,3 @@ description:

noTests: args.no_tests,
unused: args.unused,
limit: Math.min(args.limit ?? MCP_DEFAULTS.file_exports, MCP_MAX_LIMIT),

@@ -1171,2 +1214,19 @@ offset: args.offset ?? 0,

}
case 'sequence': {
const { sequenceData, sequenceToMermaid } = await import('./sequence.js');
const seqResult = sequenceData(args.name, dbPath, {
depth: args.depth,
file: args.file,
kind: args.kind,
dataflow: args.dataflow,
noTests: args.no_tests,
limit: Math.min(args.limit ?? MCP_DEFAULTS.execution_flow, MCP_MAX_LIMIT),
offset: args.offset ?? 0,
});
result =
args.format === 'json'
? seqResult
: { text: sequenceToMermaid(seqResult), ...seqResult };
break;
}
case 'complexity': {

@@ -1173,0 +1233,0 @@ const { complexityData } = await import('./complexity.js');

@@ -186,129 +186,51 @@ import fs from 'node:fs';

/**
* Normalize native engine output to match the camelCase convention
* used by the WASM extractors.
* Patch native engine output in-place for the few remaining semantic transforms.
* With #[napi(js_name)] on Rust types, most fields already arrive as camelCase.
* This only handles:
* - _lineCount compat for builder.js
* - Backward compat for older native binaries missing js_name annotations
* - dataflow argFlows/mutations bindingType → binding wrapper
*/
function normalizeNativeSymbols(result) {
return {
_lineCount: result.lineCount ?? result.line_count ?? null,
definitions: (result.definitions || []).map((d) => ({
name: d.name,
kind: d.kind,
line: d.line,
endLine: d.endLine ?? d.end_line ?? null,
decorators: d.decorators,
complexity: d.complexity
? {
cognitive: d.complexity.cognitive,
cyclomatic: d.complexity.cyclomatic,
maxNesting: d.complexity.maxNesting,
halstead: d.complexity.halstead ?? null,
loc: d.complexity.loc ?? null,
maintainabilityIndex: d.complexity.maintainabilityIndex ?? null,
}
: null,
cfg: d.cfg?.blocks?.length
? {
blocks: d.cfg.blocks.map((b) => ({
index: b.index,
type: b.type,
startLine: b.startLine,
endLine: b.endLine,
label: b.label ?? null,
})),
edges: d.cfg.edges.map((e) => ({
sourceIndex: e.sourceIndex,
targetIndex: e.targetIndex,
kind: e.kind,
})),
}
: null,
children: d.children?.length
? d.children.map((c) => ({
name: c.name,
kind: c.kind,
line: c.line,
endLine: c.endLine ?? c.end_line ?? null,
}))
: undefined,
})),
calls: (result.calls || []).map((c) => ({
name: c.name,
line: c.line,
dynamic: c.dynamic,
receiver: c.receiver,
})),
imports: (result.imports || []).map((i) => ({
source: i.source,
names: i.names || [],
line: i.line,
typeOnly: i.typeOnly ?? i.type_only,
reexport: i.reexport,
wildcardReexport: i.wildcardReexport ?? i.wildcard_reexport,
pythonImport: i.pythonImport ?? i.python_import,
goImport: i.goImport ?? i.go_import,
rustUse: i.rustUse ?? i.rust_use,
javaImport: i.javaImport ?? i.java_import,
csharpUsing: i.csharpUsing ?? i.csharp_using,
rubyRequire: i.rubyRequire ?? i.ruby_require,
phpUse: i.phpUse ?? i.php_use,
})),
classes: (result.classes || []).map((c) => ({
name: c.name,
extends: c.extends,
implements: c.implements,
line: c.line,
})),
exports: (result.exports || []).map((e) => ({
name: e.name,
kind: e.kind,
line: e.line,
})),
astNodes: (result.astNodes ?? result.ast_nodes ?? []).map((n) => ({
kind: n.kind,
name: n.name,
line: n.line,
text: n.text ?? null,
receiver: n.receiver ?? null,
})),
dataflow: result.dataflow
? {
parameters: (result.dataflow.parameters || []).map((p) => ({
funcName: p.funcName,
paramName: p.paramName,
paramIndex: p.paramIndex,
line: p.line,
})),
returns: (result.dataflow.returns || []).map((r) => ({
funcName: r.funcName,
expression: r.expression ?? '',
referencedNames: r.referencedNames ?? [],
line: r.line,
})),
assignments: (result.dataflow.assignments || []).map((a) => ({
varName: a.varName,
callerFunc: a.callerFunc ?? null,
sourceCallName: a.sourceCallName,
expression: a.expression ?? '',
line: a.line,
})),
argFlows: (result.dataflow.argFlows ?? []).map((f) => ({
callerFunc: f.callerFunc ?? null,
calleeName: f.calleeName,
argIndex: f.argIndex,
argName: f.argName ?? null,
binding: f.bindingType ? { type: f.bindingType } : null,
confidence: f.confidence,
expression: f.expression ?? '',
line: f.line,
})),
mutations: (result.dataflow.mutations || []).map((m) => ({
funcName: m.funcName ?? null,
receiverName: m.receiverName,
binding: m.bindingType ? { type: m.bindingType } : null,
mutatingExpr: m.mutatingExpr,
line: m.line,
})),
}
: null,
};
function patchNativeResult(r) {
// lineCount: napi(js_name) emits "lineCount"; older binaries may emit "line_count"
r.lineCount = r.lineCount ?? r.line_count ?? null;
r._lineCount = r.lineCount;
// Backward compat for older binaries missing js_name annotations
if (r.definitions) {
for (const d of r.definitions) {
if (d.endLine === undefined && d.end_line !== undefined) {
d.endLine = d.end_line;
}
}
}
if (r.imports) {
for (const i of r.imports) {
if (i.typeOnly === undefined) i.typeOnly = i.type_only;
if (i.wildcardReexport === undefined) i.wildcardReexport = i.wildcard_reexport;
if (i.pythonImport === undefined) i.pythonImport = i.python_import;
if (i.goImport === undefined) i.goImport = i.go_import;
if (i.rustUse === undefined) i.rustUse = i.rust_use;
if (i.javaImport === undefined) i.javaImport = i.java_import;
if (i.csharpUsing === undefined) i.csharpUsing = i.csharp_using;
if (i.rubyRequire === undefined) i.rubyRequire = i.ruby_require;
if (i.phpUse === undefined) i.phpUse = i.php_use;
}
}
// dataflow: wrap bindingType into binding object for argFlows and mutations
if (r.dataflow) {
if (r.dataflow.argFlows) {
for (const f of r.dataflow.argFlows) {
f.binding = f.bindingType ? { type: f.bindingType } : null;
}
}
if (r.dataflow.mutations) {
for (const m of r.dataflow.mutations) {
m.binding = m.bindingType ? { type: m.bindingType } : null;
}
}
}
return r;
}

@@ -444,4 +366,4 @@

if (native) {
const result = native.parseFile(filePath, source, !!opts.dataflow);
return result ? normalizeNativeSymbols(result) : null;
const result = native.parseFile(filePath, source, !!opts.dataflow, opts.ast !== false);
return result ? patchNativeResult(result) : null;
}

@@ -468,7 +390,12 @@

if (native) {
const nativeResults = native.parseFiles(filePaths, rootDir, !!opts.dataflow);
const nativeResults = native.parseFiles(
filePaths,
rootDir,
!!opts.dataflow,
opts.ast !== false,
);
for (const r of nativeResults) {
if (!r) continue;
const relPath = path.relative(rootDir, r.file).split(path.sep).join('/');
result.set(relPath, normalizeNativeSymbols(r));
result.set(relPath, patchNativeResult(r));
}

@@ -538,5 +465,5 @@ return result;

const result = cache.parseFile(filePath, source);
return result ? normalizeNativeSymbols(result) : null;
return result ? patchNativeResult(result) : null;
}
return parseFileAuto(filePath, source, opts);
}

@@ -149,4 +149,8 @@ import fs from 'node:fs';

* Returns Map<"fromFile|importSource", resolvedPath> or null when native unavailable.
* @param {Array} inputs - Array of { fromFile, importSource }
* @param {string} rootDir - Project root
* @param {object} aliases - Path aliases
* @param {string[]} [knownFiles] - Optional file paths for FS cache (avoids syscalls)
*/
export function resolveImportsBatch(inputs, rootDir, aliases) {
export function resolveImportsBatch(inputs, rootDir, aliases, knownFiles) {
const native = loadNative();

@@ -160,3 +164,8 @@ if (!native) return null;

}));
const results = native.resolveImports(nativeInputs, rootDir, convertAliasesForNative(aliases));
const results = native.resolveImports(
nativeInputs,
rootDir,
convertAliasesForNative(aliases),
knownFiles || null,
);
const map = new Map();

@@ -163,0 +172,0 @@ for (const r of results) {

Sorry, the diff of this file is too big to display