code-graph-llm
Advanced tools
+1
-1
@@ -7,3 +7,3 @@ /** | ||
| export const CONFIG = Object.freeze({ | ||
| VERSION: '4.18.0', | ||
| VERSION: '4.19.0', | ||
| IGNORE_FILE: '.gitignore', | ||
@@ -10,0 +10,0 @@ MAP_FILE: 'llm-code-graph.md', |
+70
-11
@@ -19,5 +19,12 @@ /** | ||
| this._extCache = new Map(); | ||
| this.FILE_TIMEOUT_MS = 15000; | ||
| this.FILE_TIMEOUT_MS = 5000; | ||
| this._scanStart = null; | ||
| this._skipped = []; | ||
| } | ||
| _elapsed() { | ||
| if (!this._scanStart) return ''; | ||
| return ` +${((Date.now() - this._scanStart) / 1000).toFixed(1)}s`; | ||
| } | ||
| async getIgnores(dir, baseIg) { | ||
@@ -43,7 +50,7 @@ const ig = ignore().add(baseIg); | ||
| const label = path.relative(this.cwd, dir) || '.'; | ||
| console.log(`[Code-Graph] Scanning: ${label}`); | ||
| console.log(`[Code-Graph] Scanning: ${label}${this._elapsed()}`); | ||
| } else if (depth <= 4) { | ||
| const indent = ' '.repeat(depth - 1); | ||
| const label = path.relative(this.cwd, dir) || '.'; | ||
| console.log(`[Code-Graph] Scanning: ${indent}${label}`); | ||
| console.log(`[Code-Graph] Scanning: ${indent}${label}${this._elapsed()}`); | ||
| } | ||
@@ -53,8 +60,19 @@ | ||
| try { | ||
| entries = await fsp.readdir(dir, { withFileTypes: true }); | ||
| const readdirTimeout = new Promise((_, reject) => | ||
| setTimeout(() => reject(new Error('READDIR_TIMEOUT')), 8000) | ||
| ); | ||
| entries = await Promise.race([fsp.readdir(dir, { withFileTypes: true }), readdirTimeout]); | ||
| } catch (e) { | ||
| if (e.message === 'READDIR_TIMEOUT') { | ||
| const rel = path.relative(this.cwd, dir); | ||
| console.error(`[Code-Graph] ERROR: readdir timeout (>8s)${this._elapsed()}, skipping dir: ${rel}`); | ||
| this._skipped.push({ reason: 'readdir-timeout', path: rel }); | ||
| return; | ||
| } | ||
| if (e.code === 'EACCES' || e.code === 'EPERM') { | ||
| console.warn(`[Code-Graph] Skipping unreadable dir: ${dir}`); | ||
| console.error(`[Code-Graph] ERROR: permission denied, skipping dir: ${dir}`); | ||
| this._skipped.push({ reason: 'permission', path: dir }); | ||
| return; | ||
| } | ||
| console.error(`[Code-Graph] ERROR: unexpected error reading dir: ${dir} — ${e.message}`); | ||
| throw e; | ||
@@ -82,5 +100,14 @@ } | ||
| async processFileWithTimeout(fullPath, relPath) { | ||
| let timer; | ||
| console.log(`[Code-Graph] Processing: ${relPath}${this._elapsed()}`); | ||
| let timeoutTimer; | ||
| const tickIntervalMs = 2000; | ||
| const tickers = []; | ||
| const timeout = new Promise((_, reject) => { | ||
| timer = setTimeout(() => reject(new Error('FILE_TIMEOUT')), this.FILE_TIMEOUT_MS); | ||
| timeoutTimer = setTimeout(() => reject(new Error('FILE_TIMEOUT')), this.FILE_TIMEOUT_MS); | ||
| for (let ms = tickIntervalMs; ms < this.FILE_TIMEOUT_MS; ms += tickIntervalMs) { | ||
| tickers.push(setTimeout(() => { | ||
| console.warn(`[Code-Graph] Still processing (${ms / 1000}s)${this._elapsed()}: ${relPath}`); | ||
| }, ms)); | ||
| } | ||
| }); | ||
@@ -91,8 +118,12 @@ try { | ||
| if (e.message === 'FILE_TIMEOUT') { | ||
| console.warn(`[Code-Graph] Timeout (>${this.FILE_TIMEOUT_MS}ms), skipping: ${relPath}`); | ||
| console.error(`[Code-Graph] ERROR: file timeout (>${this.FILE_TIMEOUT_MS}ms)${this._elapsed()}, skipping: ${relPath}`); | ||
| this._skipped.push({ reason: 'file-timeout', path: relPath }); | ||
| } else { | ||
| console.error(`[Code-Graph] ERROR: exception processing ${relPath} — ${e.message}`); | ||
| this._skipped.push({ reason: 'exception', path: relPath, error: e.message }); | ||
| throw e; | ||
| } | ||
| } finally { | ||
| clearTimeout(timer); | ||
| clearTimeout(timeoutTimer); | ||
| tickers.forEach(t => clearTimeout(t)); | ||
| } | ||
@@ -110,3 +141,4 @@ } | ||
| if (stats.size > CONFIG.MAX_FILE_BYTES) { | ||
| console.warn(`[Code-Graph] Skipping large file (${stats.size} bytes): ${relPath}`); | ||
| console.error(`[Code-Graph] ERROR: skipping oversized file (${Math.round(stats.size / 1024)}KB): ${relPath}`); | ||
| this._skipped.push({ reason: 'oversized', path: relPath }); | ||
| return; | ||
@@ -123,2 +155,12 @@ } | ||
| const MAX_PARSE_BYTES = 100_000; | ||
| if (content.length > MAX_PARSE_BYTES) { | ||
| console.error(`[Code-Graph] WARNING: skipping parse on large file (${Math.round(content.length / 1024)}KB): ${relPath}`); | ||
| this._skipped.push({ reason: 'large-no-parse', path: relPath }); | ||
| const isCore = /^(index|main|app|server|cli)\./i.test(path.basename(relPath)); | ||
| this.files.push({ path: relPath, symbols: [], tags: [], isCore, outCount: 0, desc: this.extractFileDesc(content, 0) }); | ||
| if (this.files.length % 25 === 0) console.log(`[Code-Graph] Processed ${this.files.length} files...`); | ||
| return; | ||
| } | ||
| const parseStart = Date.now(); | ||
@@ -190,2 +232,3 @@ const { symbols, inheritance, edges, tags } = CodeParser.extract(content); | ||
| const start = Date.now(); | ||
| this._scanStart = start; | ||
| const t = () => `+${((Date.now() - start) / 1000).toFixed(1)}s`; | ||
@@ -195,4 +238,12 @@ console.log(`[Code-Graph v${CONFIG.VERSION}] Starting map generation...`); | ||
| await this.walk(this.cwd, await this.getIgnores(this.cwd, CONFIG.DEFAULT_IGNORES)); | ||
| const heartbeat = setInterval(() => { | ||
| console.log(`[Code-Graph] Still scanning... (${t()}, ${this.files.length} files so far)`); | ||
| }, 5000); | ||
| try { | ||
| await this.walk(this.cwd, await this.getIgnores(this.cwd, CONFIG.DEFAULT_IGNORES)); | ||
| } finally { | ||
| clearInterval(heartbeat); | ||
| } | ||
| console.log(`[Code-Graph] Scan complete (${t()}): ${this.files.length} files, ${this.allEdges.length} edges found.`); | ||
@@ -213,2 +264,10 @@ | ||
| console.log(`[Code-Graph] Done in ${elapsed}s — ${CONFIG.MAP_FILE} updated (${this.files.length} files, ${this.allEdges.length} edges).`); | ||
| if (this._skipped.length > 0) { | ||
| console.error(`[Code-Graph] WARNINGS: ${this._skipped.length} file(s) skipped:`); | ||
| for (const s of this._skipped) { | ||
| const detail = s.error ? ` (${s.error})` : ''; | ||
| console.error(` [${s.reason}] ${s.path}${detail}`); | ||
| } | ||
| } | ||
| } | ||
@@ -215,0 +274,0 @@ |
+1
-1
| { | ||
| "name": "code-graph-llm", | ||
| "version": "4.18.0", | ||
| "version": "4.19.0", | ||
| "description": "Compact, language-agnostic codebase mapper for LLM token efficiency.", | ||
@@ -5,0 +5,0 @@ "main": "index.js", |
+11
-7
@@ -1,2 +0,2 @@ | ||
| # CODE-GRAPH (v4.18.0) | ||
| # CODE-GRAPH (v4.19.0) | ||
@@ -7,7 +7,11 @@ > Inspired by [Andrej Karpathy skills](https://github.com/forrestchang/andrej-karpathy-skills), [juliusbrussee/caveman](https://github.com/juliusbrussee/caveman), and the community's work building better agent workflows. | ||
| ## New in v4.18.0 | ||
| ## New in v4.19.0 | ||
| - **Fix (Generate — build cache ignores):** Added `.gradle/`, `.kotlin/`, `Pods/`, `DerivedData/`, `.swiftpm/`, `xcuserdata/`, `__pycache__/`, `.mypy_cache/`, `.pytest_cache/` to default ignores. Previously the scanner crawled Android Gradle caches (e.g. `android/.gradle/8.14/kotlin/`) and processed generated Kotlin files inside them. | ||
| - **Fix (Generate — per-file timeout):** Each file now has a 15s processing timeout. If a file hangs (stuck I/O or slow parse), the scanner logs a warning and moves on to the next file instead of blocking forever. | ||
| - **Fix (Generate — depth logging):** Subdirectories at depth 2–4 are now logged with indentation, making it easy to pinpoint which subtree is slow. | ||
| - **Fix (Generate — large file hang):** Files over 100KB now skip symbol extraction instead of running the regex parser. Compiled/generated files like `drift_worker.js` (343KB Dart→JS) caused catastrophic regex backtracking that blocked the Node.js event loop entirely, preventing timeouts and heartbeats from firing. Large files are still indexed in the graph with their description; only symbol extraction is skipped. | ||
| - **Fix (Generate — file timeout reduced):** Per-file timeout reduced from 15s to 5s. Timed-out files are now logged as errors (`console.error`) instead of warnings. | ||
| - **Fix (Generate — readdir timeout):** Directory reads (`readdir`) now have an 8s timeout. If a directory hangs (e.g. broken symlink, network path), the scanner logs an error and skips it instead of blocking forever. | ||
| - **UX (Generate — elapsed timestamps):** Every `Scanning:` log line now shows elapsed time since generation started (e.g. `+0.3s`), making it easy to spot which directory is slow. | ||
| - **UX (Generate — heartbeat):** A `Still scanning...` heartbeat fires every 5s during the walk phase, confirming the process is alive on large repos. | ||
| - **UX (Generate — per-file processing log):** Each file logs `Processing: <path>` before parse begins, so if the process hangs the last visible line identifies the culprit file. | ||
| - **UX (Generate — skip summary):** End-of-run summary lists every skipped file with reason (`large-no-parse`, `file-timeout`, `readdir-timeout`, `oversized`, `permission`, `exception`). | ||
@@ -49,4 +53,4 @@ See [RELEASE_NOTES.md](RELEASE_NOTES.md) for full history. | ||
| ```text | ||
| [Code-Graph v4.18.0] Installed/updated: /absolute/path/to/AGENTS.md | ||
| [Code-Graph v4.18.0] Installed/updated: /absolute/path/to/.codex/hooks.json | ||
| [Code-Graph v4.19.0] Installed/updated: /absolute/path/to/AGENTS.md | ||
| [Code-Graph v4.19.0] Installed/updated: /absolute/path/to/.codex/hooks.json | ||
| ``` | ||
@@ -53,0 +57,0 @@ |
+10
-0
| # RELEASE NOTES | ||
| ### v4.19.0 (2026-05-07) | ||
| - **Fix (Generate — large file hang):** Files over 100KB now skip symbol extraction. Compiled/generated JS (e.g. Dart→JS `drift_worker.js`, 343KB) caused catastrophic regex backtracking in `CodeParser.extract`, blocking the Node.js event loop entirely and preventing file timeouts and heartbeats from firing. Large files are still indexed with description only. | ||
| - **Fix (Generate — file timeout reduced):** `FILE_TIMEOUT_MS` reduced from 15 000ms to 5 000ms. Timed-out files now emit `console.error` and are tracked in the skip summary. | ||
| - **Fix (Generate — readdir timeout):** `fsp.readdir` now races against an 8s timeout. Hung directory reads (broken symlinks, network paths) are caught, logged as errors, and skipped. | ||
| - **UX (Generate — elapsed timestamps):** Every `Scanning:` line now appends `+Xs` elapsed since generation start. | ||
| - **UX (Generate — heartbeat):** A `Still scanning... (+Xs, N files so far)` heartbeat fires every 5s during the walk phase. | ||
| - **UX (Generate — per-file log):** `Processing: <path>` is emitted before each file parse begins, so a hang is immediately attributable to the last visible file. | ||
| - **UX (Generate — skip summary):** After `Done`, any skipped files are listed with reason: `large-no-parse`, `file-timeout`, `readdir-timeout`, `oversized`, `permission`, or `exception`. | ||
| - **Maintenance:** Bumped version to 4.19.0 in `config.js` and `package.json`. | ||
| ### v4.18.0 (2026-05-07) | ||
@@ -4,0 +14,0 @@ - **Fix (Generate — build cache ignores):** Added `.gradle/`, `.kotlin/`, `Pods/`, `DerivedData/`, `.swiftpm/`, `xcuserdata/`, `__pycache__/`, `.mypy_cache/`, `.pytest_cache/` to `DEFAULT_IGNORES`. Scanner was crawling Android Gradle caches (`android/.gradle/8.14/kotlin/`, etc.) and processing large generated Kotlin files inside them, causing hangs on Flutter/Android projects. |
+141
-5
@@ -967,13 +967,13 @@ import assert from 'node:assert'; | ||
| const warns = []; | ||
| const origWarn = console.warn; | ||
| console.warn = (m) => warns.push(m); | ||
| const errs = []; | ||
| const origErr = console.error; | ||
| console.error = (m) => errs.push(m); | ||
| await new ProjectMapper(tempDir).generate(); | ||
| console.warn = origWarn; | ||
| console.error = origErr; | ||
| const map = fs.readFileSync(path.join(tempDir, CONFIG.MAP_FILE), 'utf8'); | ||
| assert.ok(!map.includes('huge.js'), 'oversized file must be skipped'); | ||
| assert.ok(map.includes('small.js'), 'normal file must be included'); | ||
| assert.ok(warns.some(w => w.includes('Skipping large file'))); | ||
| assert.ok(errs.some(w => w.includes('oversized'))); | ||
@@ -1007,1 +1007,137 @@ fs.rmSync(tempDir, { recursive: true }); | ||
| }); | ||
| // --- v4.19.0 Tests --- | ||
| test('ProjectMapper - skips parse on large file (>100KB) but still indexes it', async () => { | ||
| const tempDir = path.join(process.cwd(), 'temp_test_large_no_parse'); | ||
| if (fs.existsSync(tempDir)) fs.rmSync(tempDir, { recursive: true }); | ||
| fs.mkdirSync(tempDir); | ||
| // 110KB JS file — large enough to skip parsing | ||
| fs.writeFileSync(path.join(tempDir, 'big.js'), '// generated\n' + 'x=1;\n'.repeat(22000)); | ||
| fs.writeFileSync(path.join(tempDir, 'small.js'), 'function helper() {}'); | ||
| const errs = []; | ||
| const origErr = console.error; | ||
| console.error = (m) => errs.push(m); | ||
| await new ProjectMapper(tempDir).generate(); | ||
| console.error = origErr; | ||
| const map = fs.readFileSync(path.join(tempDir, CONFIG.MAP_FILE), 'utf8'); | ||
| assert.ok(map.includes('big.js'), 'large file must still appear in map'); | ||
| assert.ok(map.includes('small.js')); | ||
| assert.ok(errs.some(e => e.includes('large') && e.includes('big.js')), 'must log error for large file'); | ||
| fs.rmSync(tempDir, { recursive: true }); | ||
| }); | ||
| test('ProjectMapper - skip summary lists skipped files after generate', async () => { | ||
| const tempDir = path.join(process.cwd(), 'temp_test_skip_summary'); | ||
| if (fs.existsSync(tempDir)) fs.rmSync(tempDir, { recursive: true }); | ||
| fs.mkdirSync(tempDir); | ||
| fs.writeFileSync(path.join(tempDir, 'big.js'), 'x=1;\n'.repeat(22000)); | ||
| const errs = []; | ||
| const origErr = console.error; | ||
| console.error = (m) => errs.push(m); | ||
| await new ProjectMapper(tempDir).generate(); | ||
| console.error = origErr; | ||
| assert.ok(errs.some(e => e.includes('WARNINGS') && e.includes('skipped')), 'must print skip summary'); | ||
| assert.ok(errs.some(e => e.includes('large-no-parse') && e.includes('big.js')), 'summary must name the file and reason'); | ||
| fs.rmSync(tempDir, { recursive: true }); | ||
| }); | ||
| test('ProjectMapper - _skipped tracking resets per instance', async () => { | ||
| const tempDir = path.join(process.cwd(), 'temp_test_skipped_reset'); | ||
| if (fs.existsSync(tempDir)) fs.rmSync(tempDir, { recursive: true }); | ||
| fs.mkdirSync(tempDir); | ||
| fs.writeFileSync(path.join(tempDir, 'small.js'), 'function a() {}'); | ||
| const mapper = new ProjectMapper(tempDir); | ||
| assert.deepStrictEqual(mapper._skipped, [], '_skipped must start empty'); | ||
| const origErr = console.error; | ||
| console.error = () => {}; | ||
| await mapper.generate(); | ||
| console.error = origErr; | ||
| assert.strictEqual(mapper._skipped.length, 0, 'no skips for normal file'); | ||
| fs.rmSync(tempDir, { recursive: true }); | ||
| }); | ||
| test('ProjectMapper - elapsed timer active during generate', async () => { | ||
| const tempDir = path.join(process.cwd(), 'temp_test_elapsed_timer'); | ||
| if (fs.existsSync(tempDir)) fs.rmSync(tempDir, { recursive: true }); | ||
| fs.mkdirSync(tempDir); | ||
| fs.writeFileSync(path.join(tempDir, 'a.js'), 'function a() {}'); | ||
| const logs = []; | ||
| const origLog = console.log; | ||
| console.log = (m) => { logs.push(m); origLog(m); }; | ||
| await new ProjectMapper(tempDir).generate(); | ||
| console.log = origLog; | ||
| const scanLines = logs.filter(l => l.includes('Scanning:')); | ||
| assert.ok(scanLines.length > 0, 'must emit Scanning lines'); | ||
| assert.ok(scanLines.every(l => /\+\d+\.\ds/.test(l)), 'every Scanning line must have +Xs timestamp'); | ||
| fs.rmSync(tempDir, { recursive: true }); | ||
| }); | ||
| test('ProjectMapper - Processing log emitted before each file', async () => { | ||
| const tempDir = path.join(process.cwd(), 'temp_test_processing_log'); | ||
| if (fs.existsSync(tempDir)) fs.rmSync(tempDir, { recursive: true }); | ||
| fs.mkdirSync(tempDir); | ||
| fs.writeFileSync(path.join(tempDir, 'foo.js'), 'function foo() {}'); | ||
| fs.writeFileSync(path.join(tempDir, 'bar.js'), 'function bar() {}'); | ||
| const logs = []; | ||
| const origLog = console.log; | ||
| console.log = (m) => logs.push(m); | ||
| await new ProjectMapper(tempDir).generate(); | ||
| console.log = origLog; | ||
| assert.ok(logs.some(l => l.includes('Processing:') && l.includes('foo.js'))); | ||
| assert.ok(logs.some(l => l.includes('Processing:') && l.includes('bar.js'))); | ||
| fs.rmSync(tempDir, { recursive: true }); | ||
| }); | ||
| test('ProjectMapper - file timeout emits error and tracks in _skipped', async () => { | ||
| const tempDir = path.join(process.cwd(), 'temp_test_file_timeout'); | ||
| if (fs.existsSync(tempDir)) fs.rmSync(tempDir, { recursive: true }); | ||
| fs.mkdirSync(tempDir); | ||
| fs.writeFileSync(path.join(tempDir, 'normal.js'), 'function ok() {}'); | ||
| const mapper = new ProjectMapper(tempDir); | ||
| mapper.FILE_TIMEOUT_MS = 1; | ||
| const errs = []; | ||
| const origErr = console.error; | ||
| console.error = (m) => errs.push(m); | ||
| await mapper.generate(); | ||
| console.error = origErr; | ||
| assert.ok(mapper._skipped.some(s => s.reason === 'file-timeout'), 'timed-out file must appear in _skipped'); | ||
| assert.ok(errs.some(e => e.includes('ERROR') && e.includes('timeout'))); | ||
| fs.rmSync(tempDir, { recursive: true }); | ||
| }); |
AI-detected potential code anomaly
Supply chain riskAI has identified unusual behaviors that may pose a security risk.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
AI-detected potential code anomaly
Supply chain riskAI has identified unusual behaviors that may pose a security risk.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
209690
4.94%3108
4.96%303
1.34%