superlocalmemory
Advanced tools
| """Per-HTTP-request agent ID resolution — ContextVar home. | ||
| Kept in a standalone module so both tools_core and tools_active can import | ||
| it without creating circular dependencies (server.py → tools_core → here, | ||
| and unified_daemon.py → here independently). | ||
| Priority chain (HTTP-first, stdio-fallback): | ||
| 1. ContextVar set by _AgentIDExtractorASGI middleware from /mcp/{agent_id} URL path. | ||
| 2. SLM_AGENT_ID environment variable (stdio transport legacy). | ||
| 3. Hard-coded "mcp_client" sentinel. | ||
| """ | ||
| from __future__ import annotations | ||
| import contextvars | ||
| import os | ||
| import re | ||
| _current_agent_id: contextvars.ContextVar[str] = contextvars.ContextVar( | ||
| "slm_agent_id", default="mcp_client" | ||
| ) | ||
| # Agent ids arrive from an untrusted URL path segment. They are ATTRIBUTION | ||
| # metadata, never an authenticated principal — but they reach loggers, the | ||
| # agent registry, and SQL-bound attribution columns, so we hard-restrict the | ||
| # charset at the single extraction chokepoint. This neutralises log-injection | ||
| # (CRLF / ANSI), oversized ids, and any path-ish characters in one place. | ||
| _AGENT_ID_SANITIZE = re.compile(r"[^A-Za-z0-9._-]") | ||
| _AGENT_ID_MAX_LEN = 64 | ||
| def sanitize_agent_id(raw: str) -> str: | ||
| """Coerce an untrusted agent-id segment to a safe, bounded token.""" | ||
| return _AGENT_ID_SANITIZE.sub("_", raw)[:_AGENT_ID_MAX_LEN] | ||
| def get_current_agent_id(env_fallback: bool = True) -> str: | ||
| """Return the agent_id for the current asyncio task. | ||
| For HTTP transport the ASGI wrapper sets the ContextVar from the URL path | ||
| before the request reaches any MCP tool, so this returns the URL-derived id. | ||
| For stdio transport the ContextVar holds its default ("mcp_client") and we | ||
| fall through to the SLM_AGENT_ID env var instead. | ||
| """ | ||
| ctx_id = _current_agent_id.get() | ||
| if ctx_id != "mcp_client": | ||
| return ctx_id | ||
| if env_fallback: | ||
| return os.environ.get("SLM_AGENT_ID", "mcp_client") | ||
| return "mcp_client" | ||
| class AgentIDExtractorASGI: | ||
| """ASGI wrapper that maps ``/mcp/{agent_id}`` → the agent-id ContextVar. | ||
| Mounted at ``/mcp`` in unified_daemon. IMPORTANT: Starlette's ``Mount`` | ||
| (≥0.35 / 1.x) does NOT strip the mount prefix from ``scope["path"]`` — it | ||
| records the prefix in ``scope["root_path"]`` and leaves ``path`` as the full | ||
| request path (e.g. ``/mcp/claude`` with ``root_path == "/mcp"``). So we | ||
| compute the mount-relative sub-path ourselves as ``path[len(root_path):]``. | ||
| Flow for ``POST /mcp/claude``: | ||
| sub-path ``/claude`` → agent id ``claude`` → set ContextVar → rewrite the | ||
| scope path to ``{root_path}/`` so the inner FastMCP app (Starlette, route | ||
| ``/``) sees the same mount-relative ``/`` it sees for a bare ``/mcp/``. | ||
| Backward compatible: bare ``/mcp/`` has sub-path ``/`` → no agent segment → | ||
| the request passes through untouched and the ContextVar keeps its | ||
| ``"mcp_client"`` default. | ||
| Per-request isolation is guaranteed by ContextVar + ``reset(token)`` in a | ||
| ``finally``, so concurrent HTTP sessions never see each other's agent id. | ||
| """ | ||
| __slots__ = ("_app",) | ||
| def __init__(self, inner) -> None: | ||
| self._app = inner | ||
| async def __call__(self, scope, receive, send): | ||
| if scope.get("type") == "http": | ||
| root_path: str = scope.get("root_path", "") | ||
| full_path: str = scope.get("path", "/") | ||
| # Mount-relative sub-path (what comes AFTER /mcp). When a root_path | ||
| # is present the request path MUST start with it (Starlette Mount | ||
| # guarantees this); if it somehow does not, treat it as no-agent and | ||
| # pass through untouched rather than mis-parsing the full path. | ||
| if root_path: | ||
| if not full_path.startswith(root_path): | ||
| await self._app(scope, receive, send) | ||
| return | ||
| subpath = full_path[len(root_path):] | ||
| else: | ||
| subpath = full_path | ||
| first = subpath.lstrip("/").split("/")[0] | ||
| if first: | ||
| first = sanitize_agent_id(first) | ||
| token = _current_agent_id.set(first) | ||
| # Rewrite the path so the inner app sees the bare mount root, | ||
| # exactly as it would for a no-agent /mcp/ request. | ||
| new_full = (root_path + "/") if root_path else "/" | ||
| new_scope = { | ||
| **scope, | ||
| "path": new_full, | ||
| "raw_path": new_full.encode(), | ||
| } | ||
| try: | ||
| await self._app(new_scope, receive, send) | ||
| finally: | ||
| _current_agent_id.reset(token) | ||
| return | ||
| await self._app(scope, receive, send) |
| """capture.py — Lossless shadow-capture of real proxy traffic (v3.6.10, plan §7). | ||
| Purpose: build a dogfood corpus of real {request, response, model, tokens, | ||
| content_type} pairs so the cache + compression benchmark (benchmarks/optimize/) | ||
| can be replayed against authentic traffic instead of only synthetic prompts. | ||
| Activation: set ``SLM_OPTIMIZE_CAPTURE=1`` in the daemon's environment. When on: | ||
| * the proxy runs in PURE PASSTHROUGH — cache + compression hooks are disabled | ||
| at load time (see server._load_hooks), so capture never observes a mutated | ||
| request or a cache hit; every line is a genuine upstream exchange. | ||
| * each completed exchange is appended as one JSON line to | ||
| ``~/.superlocalmemory/optimize_capture.jsonl`` (0600, gitignored). | ||
| ISOLATION GUARANTEE: this module writes ONLY to optimize_capture.jsonl. It never | ||
| opens memory.db, llmcache.db, or any SLM memory store. (Plan §9 hard rule.) | ||
| FAIL-OPEN: a capture failure (disk full, permission, encode error) is logged and | ||
| swallowed — it MUST NOT break the proxied request the user is waiting on. | ||
| """ | ||
| from __future__ import annotations | ||
| import asyncio | ||
| import json | ||
| import logging | ||
| import os | ||
| import threading | ||
| from pathlib import Path | ||
| from typing import Any | ||
| logger = logging.getLogger("slm.optimize.proxy.capture") | ||
| _CAPTURE_DIRNAME = ".superlocalmemory" | ||
| _CAPTURE_FILENAME = "optimize_capture.jsonl" | ||
| _CAPTURE_ENV = "SLM_OPTIMIZE_CAPTURE" | ||
| _TRUTHY = frozenset({"1", "true", "yes", "on"}) | ||
| # Cap a single captured body so a pathological 10 MB request can't bloat the | ||
| # corpus line beyond what the replay harness will read back. Bodies above this | ||
| # are recorded truncated with a marker (capture is for benchmarking, not audit). | ||
| _MAX_CAPTURE_BODY_BYTES = 1 * 1024 * 1024 # 1 MB per side | ||
| # Providers whose response bodies follow the OpenAI usage schema | ||
| # ({"usage": {"prompt_tokens", "completion_tokens"}, "model"}). | ||
| _OPENAI_FORMAT_PROVIDERS = frozenset({"openai", "gemini-openai-compat"}) | ||
| def capture_enabled() -> bool: | ||
| """True iff ``SLM_OPTIMIZE_CAPTURE`` is set to a truthy value.""" | ||
| return os.environ.get(_CAPTURE_ENV, "").strip().lower() in _TRUTHY | ||
| def _capture_path() -> Path: | ||
| return Path.home() / _CAPTURE_DIRNAME / _CAPTURE_FILENAME | ||
| class ShadowCapture: | ||
| """Thread-safe append-only JSONL writer for proxy exchanges (singleton).""" | ||
| _instance: "ShadowCapture | None" = None | ||
| _instance_lock = threading.Lock() | ||
| def __init__(self, path: Path | None = None) -> None: | ||
| self._path = path or _capture_path() | ||
| self._write_lock = threading.Lock() | ||
| self._count = 0 | ||
| @classmethod | ||
| def get_instance(cls) -> "ShadowCapture": | ||
| # Double-checked locking: cheap fast-path after first construction. | ||
| if cls._instance is None: | ||
| with cls._instance_lock: | ||
| if cls._instance is None: | ||
| cls._instance = cls() | ||
| return cls._instance | ||
| @classmethod | ||
| def reset_instance(cls) -> None: | ||
| """Test hook — drop the singleton so a fresh path can be injected.""" | ||
| with cls._instance_lock: | ||
| cls._instance = None | ||
| @property | ||
| def path(self) -> Path: | ||
| return self._path | ||
| @property | ||
| def count(self) -> int: | ||
| return self._count | ||
| def record(self, entry: dict[str, Any]) -> bool: | ||
| """Append one capture entry as a JSON line. Returns True on success. | ||
| Fail-open: any error is logged and False is returned; never raised. | ||
| Security: opens with a single ``os.open`` carrying ``O_CREAT | | ||
| O_APPEND | O_NOFOLLOW`` and mode ``0o600`` on EVERY write. O_NOFOLLOW | ||
| refuses a symlink pre-placed at the path (symlink-append attack), and | ||
| the unconditional 0600-on-create removes the stat/exists TOCTOU that | ||
| could otherwise drop the file to the process umask. | ||
| """ | ||
| try: | ||
| line = json.dumps(entry, ensure_ascii=False, separators=(",", ":")) | ||
| except (TypeError, ValueError) as exc: | ||
| logger.warning("capture: entry not JSON-serialisable, dropped: %r", exc) | ||
| return False | ||
| try: | ||
| with self._write_lock: | ||
| self._path.parent.mkdir(parents=True, exist_ok=True) | ||
| flags = os.O_CREAT | os.O_WRONLY | os.O_APPEND | getattr(os, "O_NOFOLLOW", 0) | ||
| fd = os.open(self._path, flags, 0o600) | ||
| with os.fdopen(fd, "a", encoding="utf-8") as fh: | ||
| fh.write(line + "\n") | ||
| self._count += 1 | ||
| return True | ||
| except OSError as exc: | ||
| # PermissionError / symlink-refusal (ELOOP) are security-relevant — | ||
| # surface the errno but still fail open so the request is never blocked. | ||
| logger.warning("capture: write failed (fail-open): %r", exc) | ||
| return False | ||
| def _truncate(raw: bytes) -> tuple[str, bool]: | ||
| """Decode bytes for storage; truncate beyond the per-side cap.""" | ||
| truncated = len(raw) > _MAX_CAPTURE_BODY_BYTES | ||
| head = raw[:_MAX_CAPTURE_BODY_BYTES] if truncated else raw | ||
| return head.decode("utf-8", errors="replace"), truncated | ||
| def build_entry( | ||
| *, | ||
| provider: str, | ||
| model: str, | ||
| request_body: bytes, | ||
| response_body: bytes, | ||
| content_type: str, | ||
| input_tokens: int, | ||
| output_tokens: int, | ||
| status_code: int, | ||
| stream: bool, | ||
| ) -> dict[str, Any]: | ||
| """Construct a capture entry dict from a completed exchange. | ||
| No timestamp is stamped here (Date.now is intentionally avoided in some | ||
| runtimes); the replay harness keys on content, not time, and the file's own | ||
| line order preserves arrival sequence. | ||
| """ | ||
| req_str, req_trunc = _truncate(request_body) | ||
| resp_str, resp_trunc = _truncate(response_body) | ||
| return { | ||
| "provider": provider, | ||
| "model": model, | ||
| "content_type": content_type, | ||
| "stream": stream, | ||
| "status_code": status_code, | ||
| "input_tokens": int(input_tokens), | ||
| "output_tokens": int(output_tokens), | ||
| "request": req_str, | ||
| "response": resp_str, | ||
| "request_truncated": req_trunc, | ||
| "response_truncated": resp_trunc, | ||
| } | ||
| def extract_usage(provider: str, body: bytes | None) -> tuple[int, int, str]: | ||
| """Best-effort (input_tokens, output_tokens, model) from a provider JSON body. | ||
| Works on the normalised JSON the SSE parsers emit AND on non-streaming | ||
| upstream JSON. Returns (0, 0, "") when the body is missing/unparseable — | ||
| capture must never fail because usage couldn't be read. | ||
| """ | ||
| if not body: | ||
| return 0, 0, "" | ||
| try: | ||
| data = json.loads(body) | ||
| except (json.JSONDecodeError, ValueError, TypeError): | ||
| return 0, 0, "" | ||
| if not isinstance(data, dict): | ||
| return 0, 0, "" | ||
| if provider == "gemini": | ||
| usage = data.get("usageMetadata") or {} | ||
| return ( | ||
| int(usage.get("promptTokenCount", 0) or 0), | ||
| int(usage.get("candidatesTokenCount", 0) or 0), | ||
| str(data.get("modelVersion", "") or ""), | ||
| ) | ||
| usage = data.get("usage") or {} | ||
| if provider == "anthropic": | ||
| return ( | ||
| int(usage.get("input_tokens", 0) or 0), | ||
| int(usage.get("output_tokens", 0) or 0), | ||
| str(data.get("model", "") or ""), | ||
| ) | ||
| # OpenAI-format providers (explicit allowlist so a future provider variant | ||
| # is not silently parsed with the wrong schema — it warns + returns zeros). | ||
| if provider not in _OPENAI_FORMAT_PROVIDERS: | ||
| logger.warning( | ||
| "capture.extract_usage: unknown provider %r — recording zero tokens", provider | ||
| ) | ||
| return 0, 0, str(data.get("model", "") or "") | ||
| return ( | ||
| int(usage.get("prompt_tokens", 0) or 0), | ||
| int(usage.get("completion_tokens", 0) or 0), | ||
| str(data.get("model", "") or ""), | ||
| ) | ||
| def record_exchange( | ||
| *, | ||
| provider: str, | ||
| model: str, | ||
| request_body: bytes, | ||
| response_body: bytes, | ||
| content_type: str = "application/json", | ||
| input_tokens: int = 0, | ||
| output_tokens: int = 0, | ||
| status_code: int = 200, | ||
| stream: bool = False, | ||
| ) -> bool: | ||
| """Build + append a capture entry. Fail-open. Returns True on success.""" | ||
| entry = build_entry( | ||
| provider=provider, | ||
| model=model, | ||
| request_body=request_body, | ||
| response_body=response_body, | ||
| content_type=content_type, | ||
| input_tokens=input_tokens, | ||
| output_tokens=output_tokens, | ||
| status_code=status_code, | ||
| stream=stream, | ||
| ) | ||
| return ShadowCapture.get_instance().record(entry) | ||
| async def record_exchange_async(**kwargs: Any) -> bool: | ||
| """Async wrapper for ``record_exchange`` that offloads the synchronous file | ||
| write to a worker thread so it never blocks the proxy event loop — relevant | ||
| when many streaming responses complete in the same loop iteration. | ||
| """ | ||
| return await asyncio.to_thread(lambda: record_exchange(**kwargs)) |
+1
-1
| { | ||
| "name": "superlocalmemory", | ||
| "version": "3.6.9", | ||
| "version": "3.6.10", | ||
| "description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.", | ||
@@ -5,0 +5,0 @@ "keywords": [ |
+6
-1
| [project] | ||
| name = "superlocalmemory" | ||
| version = "3.6.9" | ||
| version = "3.6.10" | ||
| description = "Information-geometric agent memory with mathematical guarantees" | ||
@@ -72,2 +72,7 @@ readme = "README.md" | ||
| "sqlite-vec==0.1.9", | ||
| # v3.6.10: LLMLingua-2 prose compression (aggressive mode, opt-in at runtime). | ||
| # Hard dependency so `pip install superlocalmemory` ships compression-ready; | ||
| # the ~560MB model downloads on first setup/warmup (fail-open). Verified the | ||
| # pin does NOT disturb the transformers/torch/numpy pins above. | ||
| "llmlingua==0.2.2", | ||
| ] | ||
@@ -74,0 +79,0 @@ |
+8
-4
@@ -36,3 +36,5 @@ <p align="center"> | ||
| > V3.6 is the only local-first layer that SKIPS repeat LLM calls (cache: 100% saved), SHRINKS prompts 60-95% (compress: extractive + LLMLingua-2), and DISCOUNTS prefix costs (align: native KV-cache) — and remembers everything — in one install. **Your first cache hit pays for the install time. Hours of coding on repeat, minimal API cost.** | ||
| > V3.6 is the only local-first layer that SKIPS repeat LLM calls (cache: 100% saved on a hit), SHRINKS prose prompts (compress: lossless-by-default, opt-in LLMLingua-2), and DISCOUNTS prefix costs (align: native KV-cache) — and remembers everything — in one install. **Your first cache hit pays for the install time. Hours of coding on repeat, minimal API cost.** | ||
| > | ||
| > **v3.6.10:** cache and compression are now **independent runtime switches** (cache-only, compress-only, both, or neither — toggle live from the dashboard, no restart). Compression was rebuilt to be **lossless by default** (the old string/array/code truncation is gone); aggressive mode adds LLMLingua-2 for **prose only** — never code, numbers, structured data, or the current turn. | ||
@@ -43,4 +45,4 @@ ### The Three Levers | ||
| |-------|-----------|:------:|:---------------:| | ||
| | **Cache** | Skip repeat calls — exact-match SQLite lookup, vCache-gated semantic (opt-in) | **100% on a hit** (input + output) | Cache ON, Semantic OFF | | ||
| | **Compress** | Shrink prompts — extractive JSON/code (lossless) + LLMLingua-2 prose (opt-in) | **60–95% on a miss** (input only) | Safe mode ON, Aggressive OFF | | ||
| | **Cache** | Skip repeat calls — exact-match SQLite lookup (zero false hits), vCache-gated semantic (opt-in) | **100% on a hit** (input + output) | Cache ON, Semantic OFF | | ||
| | **Compress** | Shrink prompts — **safe = lossless** normalization; **aggressive = LLMLingua-2 prose only** (opt-in) | Safe: small + lossless · Aggressive: large on prose | Safe mode, Aggressive OFF | | ||
| | **Align** | Stabilize prefix — maximize provider prefix-cache discounts | **Lossless extra** | ON when compression is ON | | ||
@@ -50,2 +52,4 @@ | ||
| > **Independent at runtime:** enable caching only, compression only, both, or neither — from the dashboard Optimize tab, applied live (no restart). Each AI client can also get its own memory identity over HTTP MCP via `http://127.0.0.1:8765/mcp/{agent_id}`. | ||
| ### Quick Start | ||
@@ -66,3 +70,3 @@ | ||
| | `slm cache status\|clear\|invalidate\|ttl\|semantic` | Cache sub-control — exact + semantic tiers | | ||
| | `slm compress status\|mode\|code\|prose\|ccr\|align` | Compression control — per-channel toggles | | ||
| | `slm compress status\|mode\|prose` | Compression control — safe (lossless) / aggressive (LLMLingua-2 prose) | | ||
| | `slm proxy [--port] [--provider]` | Start the interception proxy (port 8765) | | ||
@@ -69,0 +73,0 @@ | `slm wrap <agent>` | Proxy-activate an agent — one command to start saving | |
| Metadata-Version: 2.4 | ||
| Name: superlocalmemory | ||
| Version: 3.6.9 | ||
| Version: 3.6.10 | ||
| Summary: Information-geometric agent memory with mathematical guarantees | ||
@@ -61,2 +61,3 @@ Author-email: Varun Pratap Bhardwaj <admin@superlocalmemory.com> | ||
| Requires-Dist: sqlite-vec==0.1.9 | ||
| Requires-Dist: llmlingua==0.2.2 | ||
| Provides-Extra: search | ||
@@ -129,3 +130,5 @@ Requires-Dist: sentence-transformers==5.3.0; extra == "search" | ||
| > V3.6 is the only local-first layer that SKIPS repeat LLM calls (cache: 100% saved), SHRINKS prompts 60-95% (compress: extractive + LLMLingua-2), and DISCOUNTS prefix costs (align: native KV-cache) — and remembers everything — in one install. **Your first cache hit pays for the install time. Hours of coding on repeat, minimal API cost.** | ||
| > V3.6 is the only local-first layer that SKIPS repeat LLM calls (cache: 100% saved on a hit), SHRINKS prose prompts (compress: lossless-by-default, opt-in LLMLingua-2), and DISCOUNTS prefix costs (align: native KV-cache) — and remembers everything — in one install. **Your first cache hit pays for the install time. Hours of coding on repeat, minimal API cost.** | ||
| > | ||
| > **v3.6.10:** cache and compression are now **independent runtime switches** (cache-only, compress-only, both, or neither — toggle live from the dashboard, no restart). Compression was rebuilt to be **lossless by default** (the old string/array/code truncation is gone); aggressive mode adds LLMLingua-2 for **prose only** — never code, numbers, structured data, or the current turn. | ||
@@ -136,4 +139,4 @@ ### The Three Levers | ||
| |-------|-----------|:------:|:---------------:| | ||
| | **Cache** | Skip repeat calls — exact-match SQLite lookup, vCache-gated semantic (opt-in) | **100% on a hit** (input + output) | Cache ON, Semantic OFF | | ||
| | **Compress** | Shrink prompts — extractive JSON/code (lossless) + LLMLingua-2 prose (opt-in) | **60–95% on a miss** (input only) | Safe mode ON, Aggressive OFF | | ||
| | **Cache** | Skip repeat calls — exact-match SQLite lookup (zero false hits), vCache-gated semantic (opt-in) | **100% on a hit** (input + output) | Cache ON, Semantic OFF | | ||
| | **Compress** | Shrink prompts — **safe = lossless** normalization; **aggressive = LLMLingua-2 prose only** (opt-in) | Safe: small + lossless · Aggressive: large on prose | Safe mode, Aggressive OFF | | ||
| | **Align** | Stabilize prefix — maximize provider prefix-cache discounts | **Lossless extra** | ON when compression is ON | | ||
@@ -143,2 +146,4 @@ | ||
| > **Independent at runtime:** enable caching only, compression only, both, or neither — from the dashboard Optimize tab, applied live (no restart). Each AI client can also get its own memory identity over HTTP MCP via `http://127.0.0.1:8765/mcp/{agent_id}`. | ||
| ### Quick Start | ||
@@ -159,3 +164,3 @@ | ||
| | `slm cache status\|clear\|invalidate\|ttl\|semantic` | Cache sub-control — exact + semantic tiers | | ||
| | `slm compress status\|mode\|code\|prose\|ccr\|align` | Compression control — per-channel toggles | | ||
| | `slm compress status\|mode\|prose` | Compression control — safe (lossless) / aggressive (LLMLingua-2 prose) | | ||
| | `slm proxy [--port] [--provider]` | Start the interception proxy (port 8765) | | ||
@@ -162,0 +167,0 @@ | `slm wrap <agent>` | Proxy-activate an agent — one command to start saving | |
@@ -32,2 +32,3 @@ httpx==0.28.1 | ||
| sqlite-vec==0.1.9 | ||
| llmlingua==0.2.2 | ||
@@ -34,0 +35,0 @@ [dev] |
@@ -274,2 +274,3 @@ AUTHORS.md | ||
| src/superlocalmemory/mcp/_stdin_guard.py | ||
| src/superlocalmemory/mcp/agent_context.py | ||
| src/superlocalmemory/mcp/resources.py | ||
@@ -313,4 +314,2 @@ src/superlocalmemory/mcp/server.py | ||
| src/superlocalmemory/optimize/compress/ccr.py | ||
| src/superlocalmemory/optimize/compress/extractive_code.py | ||
| src/superlocalmemory/optimize/compress/extractive_json.py | ||
| src/superlocalmemory/optimize/compress/prose_llmlingua.py | ||
@@ -330,2 +329,3 @@ src/superlocalmemory/optimize/compress/router.py | ||
| src/superlocalmemory/optimize/proxy/anthropic_surface.py | ||
| src/superlocalmemory/optimize/proxy/capture.py | ||
| src/superlocalmemory/optimize/proxy/gemini_surface.py | ||
@@ -332,0 +332,0 @@ src/superlocalmemory/optimize/proxy/lifecycle.py |
@@ -35,3 +35,3 @@ """SuperLocalMemory — information-geometric agent memory. | ||
| __version__ = "3.6.9" | ||
| __version__ = "3.6.10" | ||
@@ -38,0 +38,0 @@ _REQUIRED_VERSIONS = { |
@@ -5,3 +5,3 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar | ||
| """Handlers for ``slm compress status|mode|code|prose|ccr``.""" | ||
| """Handlers for ``slm compress status|mode|prose``.""" | ||
@@ -23,7 +23,2 @@ from __future__ import annotations | ||
| def _get_cache_db(): | ||
| from superlocalmemory.optimize.storage.db import CacheDB | ||
| return CacheDB() | ||
| def _write_config(**fields) -> None: | ||
@@ -49,7 +44,8 @@ """5-step immutable config-write.""" | ||
| "status": cmd_compress_status, | ||
| "mode": cmd_compress_mode, | ||
| "code": cmd_compress_code, | ||
| "prose": cmd_compress_prose, | ||
| "ccr": cmd_compress_ccr, | ||
| "align": cmd_compress_align, | ||
| "mode": cmd_compress_mode, | ||
| "prose": cmd_compress_prose, | ||
| # removed in v3.6.10: code, ccr, align (extractive compressors removed) | ||
| "code": _cmd_compress_removed("code"), | ||
| "ccr": _cmd_compress_removed("ccr"), | ||
| "align": _cmd_compress_removed("align"), | ||
| } | ||
@@ -60,6 +56,18 @@ handler = _dispatch.get(sub or "") | ||
| else: | ||
| print("Usage: slm compress status|mode|code|prose|ccr [options]") | ||
| print("Usage: slm compress status|mode|prose [options]") | ||
| sys.exit(0) | ||
| def _cmd_compress_removed(name: str): | ||
| def handler(args: Namespace) -> None: | ||
| print( | ||
| f"slm compress {name}: removed in SLM v3.6.10. " | ||
| f"Extractive {name} compression has been replaced by " | ||
| f"Layer 1 (lossless whitespace) + Layer 2 (LLMLingua-2 prose). " | ||
| f"Use 'slm compress prose on' to enable prose compression." | ||
| ) | ||
| sys.exit(0) | ||
| return handler | ||
| def cmd_compress_status(args: Namespace) -> None: | ||
@@ -75,5 +83,4 @@ """Print compression status.""" | ||
| "compress_mode": cfg.compress_mode, | ||
| "compress_code": cfg.compress_code, | ||
| "compress_prose": cfg.compress_prose, | ||
| "compress_ccr": cfg.compress_ccr, | ||
| "compress_protect_recent": cfg.compress_protect_recent, | ||
| } | ||
@@ -84,9 +91,8 @@ print(json.dumps(data, indent=2)) | ||
| print("Compression status:") | ||
| print(f" Enabled: {'yes' if cfg.compress_enabled else 'no'}") | ||
| print(f" Mode: {cfg.compress_mode}") | ||
| print(f" Code: {'ON' if cfg.compress_code else 'OFF'}" | ||
| " (extractive JSON/code — lossless structure)") | ||
| print(f" Prose: {'ON' if cfg.compress_prose else 'OFF'}") | ||
| print(f" CCR: {'ON' if cfg.compress_ccr else 'OFF'}" | ||
| " (reversible context retrieval)") | ||
| print(f" Enabled: {'yes' if cfg.compress_enabled else 'no'}") | ||
| print(f" Mode: {cfg.compress_mode}") | ||
| print(f" Prose (Layer 2): {'ON' if cfg.compress_prose else 'OFF'}" | ||
| " (LLMLingua-2, aggressive mode only)") | ||
| print(f" Protect recent: {cfg.compress_protect_recent} user turns") | ||
| print(" Layer 1 (lossless whitespace normalization) is always ON when enabled.") | ||
@@ -112,19 +118,4 @@ | ||
| def cmd_compress_code(args: Namespace) -> None: | ||
| """Enable or disable code/JSON compression.""" | ||
| use_json = getattr(args, "json", False) | ||
| value = getattr(args, "code_value", "on") | ||
| _write_config(compress_code=(value == "on")) | ||
| if use_json: | ||
| print(json.dumps({"status": "ok", "compress_code": value == "on"})) | ||
| return | ||
| print(f"Code compression: {'ENABLED' if value == 'on' else 'DISABLED'}.") | ||
| print("Daemon hot-reload: active within 2s.") | ||
| def cmd_compress_prose(args: Namespace) -> None: | ||
| """Enable or disable prose compression.""" | ||
| """Enable or disable prose compression (Layer 2, LLMLingua-2).""" | ||
| use_json = getattr(args, "json", False) | ||
@@ -151,37 +142,8 @@ value = getattr(args, "prose_value", "off") | ||
| print(f"Prose compression: {'ENABLED' if value == 'on' else 'DISABLED'}.") | ||
| print(f"Prose compression (LLMLingua-2): {'ENABLED' if value == 'on' else 'DISABLED'}.") | ||
| if value == "on": | ||
| print(" Requires: compress_mode=aggressive and llmlingua package installed.") | ||
| print(" Run: slm compress mode aggressive (if not already set)") | ||
| if value == "on" and "compress_enabled" in fields: | ||
| print(" (also enabled global compress)") | ||
| print("Daemon hot-reload: active within 2s.") | ||
| def cmd_compress_align(args: Namespace) -> None: | ||
| """Enable or disable alignment compression.""" | ||
| use_json = getattr(args, "json", False) | ||
| value = getattr(args, "align_value", "on") | ||
| _write_config(compress_align=(value == "on")) | ||
| if use_json: | ||
| print(json.dumps({"status": "ok", "compress_align": value == "on"})) | ||
| return | ||
| print(f"Alignment compression: {'ENABLED' if value == 'on' else 'DISABLED'}.") | ||
| print("Daemon hot-reload: active within 2s.") | ||
| def cmd_compress_ccr(args: Namespace) -> None: | ||
| """Enable or disable CCR (Compressed Context Retrieval).""" | ||
| use_json = getattr(args, "json", False) | ||
| value = getattr(args, "ccr_value", "off") | ||
| _write_config(compress_ccr=(value == "on")) | ||
| if use_json: | ||
| print(json.dumps({"status": "ok", "compress_ccr": value == "on"})) | ||
| return | ||
| print(f"CCR (Compressed Context Retrieval): {'ENABLED' if value == 'on' else 'DISABLED'}.") | ||
| if value == "on": | ||
| print("Originals stored in llmcache.db for reversible retrieval.") | ||
| print("Daemon hot-reload: active within 2s.") |
@@ -100,5 +100,3 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar | ||
| f" (mode: {cfg.compress_mode}," | ||
| f" code: {'ON' if cfg.compress_code else 'OFF'}," | ||
| f" prose: {'ON' if cfg.compress_prose else 'OFF'}," | ||
| f" CCR: {'ON' if cfg.compress_ccr else 'OFF'})") | ||
| f" prose/L2: {'ON' if cfg.compress_prose else 'OFF'})") | ||
| proxy_status = f"running on :{OPTIMIZE_DEFAULT_PORT}" if proxy_running else "not running" | ||
@@ -105,0 +103,0 @@ print(f" Proxy: {proxy_status}") |
@@ -35,2 +35,4 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar | ||
| _RERANKER_MODEL = "cross-encoder/ms-marco-MiniLM-L-12-v2" | ||
| # v3.6.10: compulsory LLMLingua-2 prose compression model (~560MB, aggressive mode). | ||
| _COMPRESSOR_MODEL = "microsoft/llmlingua-2-xlm-roberta-large-meetingbank" | ||
@@ -182,2 +184,45 @@ | ||
| def _download_compressor(model_name: str) -> bool: | ||
| """Download the LLMLingua-2 prose compression model (v3.6.10). | ||
| Mirrors _download_reranker: a subprocess forces the HF download with visible | ||
| progress. Fail-open — a network hiccup must NOT break setup; the model also | ||
| lazy-downloads on first use in prose_llmlingua.py. | ||
| """ | ||
| print(f"\n Downloading compression model: {model_name}") | ||
| print(f" (LLMLingua-2 prose compressor, ~560MB — aggressive mode only)\n") | ||
| script = ( | ||
| "from llmlingua import PromptCompressor; " | ||
| f"PromptCompressor(model_name='{model_name}', use_llmlingua2=True, " | ||
| "device_map='cpu'); " | ||
| "print('OK')" | ||
| ) | ||
| try: | ||
| result = subprocess.run( | ||
| [sys.executable, "-c", script], | ||
| timeout=900, # 560MB on a slow link can exceed 5 min | ||
| capture_output=False, | ||
| text=True, | ||
| env={ | ||
| **os.environ, | ||
| "CUDA_VISIBLE_DEVICES": "", | ||
| "TOKENIZERS_PARALLELISM": "false", | ||
| "TORCH_DEVICE": "cpu", | ||
| }, | ||
| ) | ||
| if result.returncode == 0: | ||
| print(f" ✓ Compression model ready") | ||
| return True | ||
| print(f" ✗ Compression model download failed (will lazy-download on first use)") | ||
| return False | ||
| except ImportError: | ||
| print(f" ⚠ llmlingua not installed — compression model will download on first use") | ||
| return False | ||
| except Exception as exc: | ||
| print(f" ✗ Compression model error: {exc}") | ||
| return False | ||
| # --------------------------------------------------------------------------- | ||
@@ -398,2 +443,6 @@ # Verification | ||
| print() | ||
| print("─── Step 4c/10: Download Compression Model (LLMLingua-2) ───") | ||
| _download_compressor(_COMPRESSOR_MODEL) | ||
| # -- Step 5: Daemon Configuration (v3.4.3) -- | ||
@@ -400,0 +449,0 @@ print() |
@@ -112,11 +112,10 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar | ||
| Each MCP client (Claude Code, Codex, Gemini CLI, Kimi, etc.) can set | ||
| the ``SLM_AGENT_ID`` env var in its MCP server config so that memories, | ||
| observations, and registry entries are tagged with the actual source | ||
| agent — not the legacy ``"mcp_client"`` default. | ||
| v3.4.39+: enables proper per-agent attribution in ``session_init``, | ||
| ``observe``, and event emissions. | ||
| Priority chain (v3.6.10+): | ||
| 1. ContextVar set by HTTP URL path (/mcp/{agent_id}) — HTTP transport. | ||
| 2. SLM_AGENT_ID env var — stdio transport per-process identity. | ||
| 3. Provided default (legacy "mcp_client"). | ||
| """ | ||
| return os.environ.get("SLM_AGENT_ID", default) | ||
| from superlocalmemory.mcp.agent_context import get_current_agent_id | ||
| resolved = get_current_agent_id(env_fallback=True) | ||
| return resolved if resolved != "mcp_client" else default | ||
@@ -123,0 +122,0 @@ |
@@ -113,2 +113,6 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar | ||
| """ | ||
| # v3.6.10: resolve "mcp_client" sentinel → URL path (HTTP) or env var (stdio) | ||
| if agent_id == "mcp_client": | ||
| from superlocalmemory.mcp.agent_context import get_current_agent_id | ||
| agent_id = get_current_agent_id() | ||
| meta = { | ||
@@ -175,2 +179,6 @@ "project": project, | ||
| """ | ||
| # v3.6.10: resolve "mcp_client" sentinel → URL path (HTTP) or env var (stdio) | ||
| if agent_id == "mcp_client": | ||
| from superlocalmemory.mcp.agent_context import get_current_agent_id | ||
| agent_id = get_current_agent_id() | ||
| import asyncio | ||
@@ -490,2 +498,6 @@ try: | ||
| """ | ||
| # v3.6.10: resolve "mcp_client" sentinel → URL path (HTTP) or env var (stdio) | ||
| if agent_id == "mcp_client": | ||
| from superlocalmemory.mcp.agent_context import get_current_agent_id | ||
| agent_id = get_current_agent_id() | ||
| try: | ||
@@ -525,2 +537,6 @@ from superlocalmemory.core.worker_pool import WorkerPool | ||
| """ | ||
| # v3.6.10: resolve "mcp_client" sentinel → URL path (HTTP) or env var (stdio) | ||
| if agent_id == "mcp_client": | ||
| from superlocalmemory.mcp.agent_context import get_current_agent_id | ||
| agent_id = get_current_agent_id() | ||
| try: | ||
@@ -527,0 +543,0 @@ if not content or not content.strip(): |
@@ -96,2 +96,3 @@ # optimize/cache/boundary_store.py | ||
| epsilon_grid: tuple[float, ...] = (0.01, 0.02, 0.05, 0.10), | ||
| return_threshold: float = 1.0, | ||
| ) -> float: | ||
@@ -101,11 +102,14 @@ """Compute τ̂ — the vCache exploration probability (Eq. 11). | ||
| Args: | ||
| query_sim: Cosine similarity s(x) ∈ [0, 1] for the incoming query. | ||
| delta: δ — user-defined maximum error rate. | ||
| Theorem 4.1 guarantee: Pr(correct) ≥ 1 - δ. | ||
| epsilon_grid: ε values for the Eq. 11 min sweep. | ||
| Distinct from δ; controls CI conservativeness. | ||
| query_sim: Cosine similarity s(x) ∈ [0, 1] for the incoming query. | ||
| delta: δ — user-defined maximum error rate. | ||
| Theorem 4.1 guarantee: Pr(correct) ≥ 1 - δ. | ||
| epsilon_grid: ε values for the Eq. 11 min sweep. | ||
| Distinct from δ; controls CI conservativeness. | ||
| return_threshold: Semantic return threshold from config (semantic_return_threshold). | ||
| C-03 fix: during cold start, exploit directly when | ||
| query_sim >= return_threshold instead of always exploring. | ||
| Returns: | ||
| τ̂ ∈ [0.0, 1.0]. Lower = more exploitation. | ||
| Cold start (n < 3): returns 1.0 (always explore). | ||
| Cold start (n < 3): 0.0 if query_sim >= return_threshold, else 1.0. | ||
@@ -122,3 +126,8 @@ Eq. 11 derivation (from the paper): | ||
| if n < 3: | ||
| return 1.0 # cold start — always explore | ||
| # ARCH-02 note: this function serves dual purpose — (a) warm-phase vCache | ||
| # Eq. 11 tau computation and (b) cold-start similarity gate. The cold-start | ||
| # branch (n < 3) is intentionally simple: if the query is already above the | ||
| # return threshold, serve it (tau=0.0 → exploit); otherwise explore (tau=1.0). | ||
| # C-03: honor return_threshold — avoids 100% miss until 3 samples are accumulated. | ||
| return 0.0 if query_sim >= return_threshold else 1.0 | ||
@@ -150,3 +159,8 @@ # Step 1: Fisher-information SE | ||
| def should_explore(self, query_sim: float, delta: float = 0.05) -> bool: | ||
| def should_explore( | ||
| self, | ||
| query_sim: float, | ||
| delta: float = 0.05, | ||
| return_threshold: float = 1.0, | ||
| ) -> bool: | ||
| """Return True (explore = LLM call) or False (exploit = serve cache). | ||
@@ -156,3 +170,3 @@ | ||
| """ | ||
| tau = self.compute_tau(query_sim, delta=delta) | ||
| tau = self.compute_tau(query_sim, delta=delta, return_threshold=return_threshold) | ||
| return _RNG.random() <= tau | ||
@@ -159,0 +173,0 @@ |
@@ -6,2 +6,3 @@ """exact.py — SQLite-backed exact-match get/set, cacheable-response guard.""" | ||
| import json | ||
| import logging | ||
| import time | ||
@@ -12,2 +13,3 @@ from typing import Any | ||
| logger = logging.getLogger(__name__) | ||
@@ -25,2 +27,3 @@ _NON_CACHEABLE_FINISH_REASONS: frozenset[str] = frozenset({ | ||
| if finish in _NON_CACHEABLE_FINISH_REASONS: | ||
| logger.debug("exact: skip cache (finish_reason=%r)", finish) | ||
| return False | ||
@@ -31,8 +34,11 @@ choices = response.get("choices") or [] | ||
| if fr in _NON_CACHEABLE_FINISH_REASONS: | ||
| logger.debug("exact: skip cache (choice.finish_reason=%r)", fr) | ||
| return False | ||
| msg = choice.get("message") or {} | ||
| if msg.get("tool_calls"): | ||
| logger.debug("exact: skip cache (choice.message.tool_calls present)") | ||
| return False | ||
| for block in response.get("content") or []: | ||
| if isinstance(block, dict) and block.get("type") == "tool_use": | ||
| logger.debug("exact: skip cache (tool_use content block)") | ||
| return False | ||
@@ -53,5 +59,2 @@ return True | ||
| return None | ||
| if row.ttl_expires is not None and row.ttl_expires < time.time(): | ||
| self._db.delete(key, tenant_id) | ||
| return None | ||
| return json.loads(row.value.decode("utf-8")) | ||
@@ -82,3 +85,3 @@ | ||
| ttl_expires=expires_at, | ||
| tags=[], | ||
| tags=tags, | ||
| ) | ||
@@ -85,0 +88,0 @@ return True |
@@ -7,2 +7,3 @@ """key_builder.py — deterministic SHA-256 cache key, tenant-scoped.""" | ||
| import json | ||
| import logging | ||
| import re | ||
@@ -12,2 +13,4 @@ from dataclasses import dataclass, field | ||
| logger = logging.getLogger(__name__) | ||
| DETERMINISTIC_PARAMS: frozenset[str] = frozenset({ | ||
@@ -67,2 +70,12 @@ "max_tokens", "stop", "stop_sequences", "top_p", "top_k", | ||
| if temperature != 0 and not self._config.allow_nonzero_temperature_cache: | ||
| logger.debug( | ||
| "cache skip: temperature=%.2f allow_nonzero=%s", | ||
| temperature, | ||
| self._config.allow_nonzero_temperature_cache, | ||
| ) | ||
| try: | ||
| from superlocalmemory.optimize.metrics.counters import MetricsCollector | ||
| MetricsCollector.get_instance().increment_skipped_temperature() | ||
| except Exception: | ||
| pass | ||
| return None | ||
@@ -69,0 +82,0 @@ |
@@ -5,2 +5,4 @@ """manager.py — CacheManager orchestrator (singleton, fail-open, stampede-shielded).""" | ||
| import hashlib as _hashlib | ||
| import json as _json_mod | ||
| import logging | ||
@@ -12,2 +14,5 @@ import threading | ||
| # C-08: precomputed hash for the common "default" tenant — avoids sha256 on every request | ||
| _DEFAULT_TENANT_HASH: str = _hashlib.sha256(b"default").hexdigest() | ||
| from superlocalmemory.optimize.cache.exact import ExactCache | ||
@@ -235,2 +240,22 @@ from superlocalmemory.optimize.cache.invalidation import InvalidationEngine | ||
| self._metrics.exact_misses += 1 | ||
| # C-01: semantic fallback on exact miss (only when explicitly enabled) | ||
| if self._semantic.is_enabled(): | ||
| try: | ||
| sem_result = self._semantic.lookup(req, tenant_id, None) | ||
| if sem_result is not None: | ||
| self._metrics.semantic_hits += 1 | ||
| if isinstance(sem_result, CachedResponse): | ||
| return sem_result | ||
| # VCacheSemantic may return a response dict — wrap it | ||
| return CachedResponse( | ||
| hit=True, | ||
| data=_json_mod.dumps(sem_result, ensure_ascii=False).encode(), | ||
| cache_key=key, | ||
| ttl_seconds=0, | ||
| ) | ||
| except Exception as exc: | ||
| logger.warning("SemanticTier.lookup raised (fail-open): %s", exc) | ||
| self._metrics.semantic_misses += 1 | ||
| # Return miss WITH the key so callers can use it for cache storage. | ||
@@ -264,2 +289,9 @@ return CachedResponse(hit=False, data=None, cache_key=key, ttl_seconds=0) | ||
| # C-02: index in semantic tier after exact write (fail-open) | ||
| if self._semantic.is_enabled(): | ||
| try: | ||
| self._semantic.index_entry(req, tenant_id, None, response_dict) | ||
| except Exception as exc: | ||
| logger.warning("SemanticTier.index_entry raised (fail-open): %s", exc) | ||
| # ---- CacheHook protocol implementation (INTERFACE-CONTRACT §3) ---- | ||
@@ -276,3 +308,3 @@ | ||
| try: | ||
| result = self.get(req, tenant_id="default") | ||
| result = self.get(req, tenant_id=_DEFAULT_TENANT_HASH) | ||
| if result is not None and not result.hit: | ||
@@ -288,3 +320,3 @@ MetricsCollector.get_instance().on_miss() | ||
| try: | ||
| self.set(req, resp, tenant_id="default") | ||
| self.set(req, resp, tenant_id=_DEFAULT_TENANT_HASH) | ||
| except Exception as exc: | ||
@@ -295,11 +327,41 @@ logger.warning("CacheManager.store raised (fail-open): %s", exc) | ||
| """CacheHook.on_hit() — forward token savings to MetricsCollector. | ||
| Contract §7: cache skip saves BOTH input+output tokens (whole call avoided). | ||
| tokens_saved = input tokens saved (from request). | ||
| Output tokens estimated from response body size (~4 bytes per token). | ||
| M-01: compute input tokens from request body when caller passes 0. | ||
| M-02: parse real output tokens from cached response usage field. | ||
| All counts are estimates for display — not billing-accurate. | ||
| """ | ||
| import json as _json | ||
| # M-01: estimate input tokens from message content | ||
| if tokens_saved == 0 and isinstance(req, ProxyRequest): | ||
| try: | ||
| body = req.body or {} | ||
| total_chars = 0 | ||
| for m in (body.get("messages") or []): | ||
| c = m.get("content", "") | ||
| if isinstance(c, str): | ||
| total_chars += len(c) | ||
| elif isinstance(c, list): | ||
| for blk in c: | ||
| if isinstance(blk, dict): | ||
| total_chars += len(blk.get("text", "") or "") | ||
| total_chars += len(body.get("system", "") or "") | ||
| tokens_saved = max(0, total_chars // 4) | ||
| except Exception: | ||
| pass | ||
| # M-02: parse real output tokens from stored response | ||
| output_tokens = 0 | ||
| if resp: | ||
| # Rough estimate: 4 bytes per token for English text | ||
| output_tokens = len(resp) // 4 | ||
| try: | ||
| data = _json.loads(resp) | ||
| usage = data.get("usage") or {} | ||
| output_tokens = ( | ||
| usage.get("output_tokens") | ||
| or usage.get("completion_tokens") | ||
| or 0 | ||
| ) | ||
| except Exception: | ||
| output_tokens = len(resp) // 4 # fallback byte-estimate | ||
| MetricsCollector.get_instance().on_hit( | ||
@@ -306,0 +368,0 @@ tokens_saved_input=tokens_saved, |
@@ -297,3 +297,4 @@ # optimize/cache/semantic.py | ||
| delta = float(getattr(cfg, "semantic_error_target", _DEFAULT_ERROR_TARGET)) | ||
| if record.should_explore(best_score, delta=delta): | ||
| sem_return_threshold = float(getattr(cfg, "semantic_return_threshold", _DEFAULT_RETURN_THRESHOLD)) | ||
| if record.should_explore(best_score, delta=delta, return_threshold=sem_return_threshold): | ||
| logger.debug( | ||
@@ -367,7 +368,7 @@ "VCacheSemantic: explore (score=%.4f entry=%s t_hat=%.4f)", | ||
| entries: list[tuple[str, str, np.ndarray]] = [] | ||
| for entry_id, blob in rows: | ||
| for entry_id, blob, ctx_fp in rows: # C-10: unpack persisted context_fp | ||
| try: | ||
| v = np.frombuffer(blob, dtype=np.float32).copy() | ||
| if v.shape[0] == _EMBED_DIM: | ||
| entries.append((entry_id, "", v)) | ||
| entries.append((entry_id, ctx_fp, v)) | ||
| except Exception: | ||
@@ -427,3 +428,3 @@ continue | ||
| # Store vector in DB | ||
| # Store vector in DB — C-10: persist context_fp alongside the vector | ||
| vec_bytes = vec.tobytes() | ||
@@ -434,3 +435,7 @@ self._db.vec_add( | ||
| vector=vec_bytes, | ||
| meta={"model": "nomic-ai/nomic-embed-text-v1.5", "dim": _EMBED_DIM}, | ||
| meta={ | ||
| "model": "nomic-ai/nomic-embed-text-v1.5", | ||
| "dim": _EMBED_DIM, | ||
| "context_fp": context_fp, | ||
| }, | ||
| ) | ||
@@ -437,0 +442,0 @@ |
@@ -35,12 +35,6 @@ # compress/prose_llmlingua.py | ||
| self, | ||
| model_name: str = _MODEL_BERT, | ||
| model_name: str = _MODEL_XLM, | ||
| device_map: str = "cpu", | ||
| rate: float = _DEFAULT_RATE, | ||
| ) -> None: | ||
| import os | ||
| if os.environ.get("SLM_DISABLE_HF_DOWNLOAD", "0") == "1": | ||
| raise ImportError( | ||
| "LLMLingua-2 model download blocked: SLM_DISABLE_HF_DOWNLOAD=1. " | ||
| "Set compress_llmlingua_allow_download=true in optimize.json." | ||
| ) | ||
| try: | ||
@@ -47,0 +41,0 @@ from llmlingua import PromptCompressor # type: ignore[import] |
@@ -29,3 +29,2 @@ # compress/router.py | ||
| _MIN_CHARS_FOR_COMPRESSION: int = 500 | ||
| _MIN_RATIO_STRUCTURED: float = 0.60 | ||
@@ -54,4 +53,2 @@ | ||
| def __init__(self) -> None: | ||
| self._json_compressor: "JSONCompressor | None" = None | ||
| self._code_compressor: "CodeCompressor | None" = None | ||
| self._llmlingua_compressor: "LLMLinguaCompressor | None" = None | ||
@@ -108,8 +105,13 @@ self._ccr_store: "CCRStore | None" = None | ||
| if tokens_after >= tokens_before: | ||
| return req # no improvement | ||
| # S-01/Stage-9 fix: build new_bytes BEFORE the improvement guard. | ||
| # Layer 1 normalization saves characters (not word-count tokens), so | ||
| # tokens_after == tokens_before for "normalize" strategy. Checking byte | ||
| # length of the serialized body correctly detects Layer 1 savings. | ||
| body["messages"] = new_messages | ||
| new_bytes = json.dumps(body, ensure_ascii=False, separators=(",", ":")).encode() | ||
| bytes_saved = len(req.body_bytes) - len(new_bytes) | ||
| if bytes_saved <= 0 and tokens_after >= tokens_before: | ||
| return req # neither bytes nor tokens improved | ||
| # CONTRACT §3: fire on_compress metrics callback | ||
@@ -166,3 +168,8 @@ lossy = strategy == "llmlingua2_prose" | ||
| protect_indices = set(range(max(0, len(messages) - protect_recent), len(messages))) | ||
| # K-05: protect last N *user* turns, not last N messages of any role | ||
| user_indices = [i for i, m in enumerate(messages) if m.get("role") == "user"] | ||
| protect_indices: set[int] = set(user_indices[-protect_recent:]) if protect_recent > 0 else set() | ||
| # Always protect the very last message (current turn, any role) | ||
| if messages: | ||
| protect_indices.add(len(messages) - 1) | ||
@@ -255,92 +262,73 @@ for idx, msg in enumerate(messages): | ||
| # JSON detection | ||
| if len(text) < _MIN_CHARS_FOR_COMPRESSION: | ||
| return text, tokens_before, tokens_before, "none" | ||
| # K-01/K-02/K-03: NEVER compress structured content (JSON or code) | ||
| stripped = text.strip() | ||
| if stripped.startswith(("{", "[")): | ||
| # PERF-02: for large content, structural bracket-match avoids O(n) json.loads(). | ||
| # Conservative: matching outer brackets → treat as JSON and skip compression. | ||
| # K-01 mandate is safety-first: false-positive (non-JSON treated as JSON) is | ||
| # safe; false-negative (JSON compressed) would be a correctness violation. | ||
| _last = stripped[-1] if stripped else "" | ||
| if len(stripped) > 8192 and ( | ||
| (stripped[0] == "{" and _last == "}") or (stripped[0] == "[" and _last == "]") | ||
| ): | ||
| return text, tokens_before, tokens_before, "none" # large JSON → passthrough | ||
| try: | ||
| parsed = json.loads(stripped) | ||
| compressor = self._get_json_compressor() | ||
| compressed = compressor.compress(parsed) | ||
| tokens_after = _token_estimate_structured(compressed) | ||
| tokens_before_adj = _token_estimate_structured(text) | ||
| ratio = tokens_after / tokens_before_adj if tokens_before_adj else 1.0 | ||
| if ratio < _MIN_RATIO_STRUCTURED: | ||
| # B-03: store-before-compress | ||
| ccr_id = self._ccr_store_original(text.encode(), model, tenant_id) | ||
| if ccr_id: | ||
| try: | ||
| obj = json.loads(compressed) | ||
| if isinstance(obj, dict): | ||
| obj["__slm_ccr__"] = ccr_id | ||
| compressed = json.dumps(obj, ensure_ascii=False, separators=(",", ":")) | ||
| elif isinstance(obj, list): | ||
| # RB-02: list-root embedding | ||
| wrapper = {"__slm_ccr__": ccr_id, "__slm_data__": obj} | ||
| compressed = json.dumps(wrapper, ensure_ascii=False, separators=(",", ":")) | ||
| except Exception: | ||
| pass | ||
| self._ccr_update_compressed(ccr_id, compressed.encode()) | ||
| logger.debug("[%s] JSON compressed %.2f ratio ccr_id=%s", request_id, ratio, ccr_id) | ||
| return compressed, tokens_before_adj, tokens_after, "extractive_json" | ||
| return text, tokens_before, tokens_before, "none" | ||
| except (json.JSONDecodeError, Exception): | ||
| pass | ||
| json.loads(stripped) | ||
| return text, tokens_before, tokens_before, "none" # valid JSON → passthrough | ||
| except json.JSONDecodeError: | ||
| pass # not valid JSON — treat as prose | ||
| except Exception as exc: | ||
| logger.warning("compress: unexpected error probing JSON content: %s", exc) | ||
| # Code detection | ||
| lang = _detect_language(text) | ||
| if lang is not None: | ||
| compressor = self._get_code_compressor() | ||
| # RB-03: compress first, compute ratio, then store CCR only if beneficial | ||
| compressed_probe = compressor.compress(text, language=lang, ccr_id="") | ||
| tokens_after = _token_estimate(compressed_probe) | ||
| ratio = tokens_after / tokens_before if tokens_before else 1.0 | ||
| if ratio < _MIN_RATIO_STRUCTURED: | ||
| # B-03: store-before-compress | ||
| ccr_id = self._ccr_store_original(text.encode(), model, tenant_id) | ||
| compressed = compressor.compress(text, language=lang, ccr_id=ccr_id) | ||
| if ccr_id: | ||
| self._ccr_update_compressed(ccr_id, compressed.encode()) | ||
| logger.debug("[%s] Code compressed lang=%s ratio=%.2f ccr_id=%s", | ||
| request_id, lang, ratio, ccr_id) | ||
| return compressed, tokens_before, tokens_after, "extractive_code" | ||
| return text, tokens_before, tokens_before, "none" | ||
| if _detect_language(text) is not None: | ||
| return text, tokens_before, tokens_before, "none" # code → passthrough | ||
| # Prose: only if aggressive mode AND compress_prose is enabled | ||
| # (Phase 3 — opt-in prose tier; off by default; gated by config | ||
| # field compress_prose added in LLD-04 INTERFACE-CONTRACT v2.) | ||
| # Layer 1 — lossless whitespace normalization (always-on, safe) | ||
| normalized = self._normalize_whitespace(text) | ||
| tokens_after_l1 = _token_estimate(normalized) | ||
| # Layer 2 — LLMLingua-2 prose compression (aggressive + opt-in only) | ||
| cfg = self._get_config() | ||
| prose_enabled = bool(getattr(cfg, "compress_prose", False)) | ||
| if aggressive and prose_enabled: | ||
| if aggressive and prose_enabled: # pragma: no cover — LLMLingua optional dep | ||
| compressor = self._get_llmlingua_compressor() | ||
| if compressor is not None: | ||
| # B-03: store-before-compress | ||
| # B-03: store original BEFORE lossy compression | ||
| ccr_id = self._ccr_store_original(text.encode(), model, tenant_id) | ||
| compressed = compressor.compress(text) | ||
| compressed = compressor.compress(normalized) | ||
| if ccr_id: | ||
| self._ccr_update_compressed(ccr_id, compressed.encode()) | ||
| tokens_after = _token_estimate(compressed) | ||
| logger.info( | ||
| "[%s] LLMLingua-2 prose compressed rate=%.2f ccr_id=%s (LOSSY)", | ||
| request_id, | ||
| tokens_after / tokens_before if tokens_before else 1.0, | ||
| ccr_id, | ||
| ) | ||
| return compressed, tokens_before, tokens_after, "llmlingua2_prose" | ||
| tokens_after_l2 = _token_estimate(compressed) | ||
| if tokens_after_l2 < tokens_before: | ||
| logger.info( | ||
| "[%s] LLMLingua-2 prose compressed rate=%.2f ccr_id=%s (LOSSY)", | ||
| request_id, | ||
| tokens_after_l2 / tokens_before if tokens_before else 1.0, | ||
| ccr_id, | ||
| ) | ||
| return compressed, tokens_before, tokens_after_l2, "llmlingua2_prose" | ||
| # S-01 fix: compare character length, not word count. | ||
| # _token_estimate() is word-count — whitespace normalization saves characters/bytes | ||
| # but never removes words, so token counts are identical before and after Layer 1. | ||
| # Character comparison correctly detects when normalization reduced the body size. | ||
| if len(normalized) < len(text): | ||
| return normalized, tokens_before, tokens_after_l1, "normalize" | ||
| return text, tokens_before, tokens_before, "none" | ||
| @staticmethod | ||
| def _normalize_whitespace(text: str) -> str: | ||
| """Layer 1 lossless: collapse excess blank lines, strip trailing spaces per line.""" | ||
| import re | ||
| text = re.sub(r"\n{3,}", "\n\n", text) | ||
| lines = [line.rstrip() for line in text.split("\n")] | ||
| return "\n".join(lines) | ||
| # ── Lazy loaders ───────────────────────────────────────────────────── | ||
| def _get_json_compressor(self) -> "JSONCompressor": | ||
| if self._json_compressor is None: | ||
| from superlocalmemory.optimize.compress.extractive_json import JSONCompressor | ||
| self._json_compressor = JSONCompressor() | ||
| return self._json_compressor | ||
| def _get_code_compressor(self) -> "CodeCompressor": | ||
| if self._code_compressor is None: | ||
| from superlocalmemory.optimize.compress.extractive_code import CodeCompressor | ||
| self._code_compressor = CodeCompressor() | ||
| return self._code_compressor | ||
| def _get_llmlingua_compressor(self) -> "LLMLinguaCompressor | None": | ||
| def _get_llmlingua_compressor(self) -> "LLMLinguaCompressor | None": # pragma: no cover | ||
| if self._llmlingua_compressor is None: | ||
@@ -404,8 +392,11 @@ try: | ||
| tokens_before=tb, tokens_after=ta, | ||
| lossy=strat == "llmlingua2_prose", | ||
| ) | ||
| except Exception as exc: | ||
| logger.debug("compress_text failed (non-fatal): %s", exc) | ||
| t = _token_estimate(text) | ||
| return CompressTextResult( | ||
| compressed_text=text, strategy="none", | ||
| tokens_before=len(text.split()), tokens_after=len(text.split()), | ||
| tokens_before=t, tokens_after=t, | ||
| lossy=False, | ||
| ) | ||
@@ -416,6 +407,14 @@ | ||
| class CompressTextResult: | ||
| """Result of a compress_text() call. | ||
| UX-02 note: lossy=True only when strategy="llmlingua2_prose" (Layer 2). | ||
| In the default install (LLMLingua optional dep not installed), lossy is | ||
| always False — install `llmlingua>=0.2.0` and set compress_prose=True + | ||
| compress_mode="aggressive" to activate lossy compression. | ||
| """ | ||
| compressed_text: str | ||
| strategy: str # "extractive_json" | "extractive_code" | "llmlingua2_prose" | "none" | ||
| strategy: str # "normalize" | "llmlingua2_prose" | "none" | ||
| tokens_before: int | ||
| tokens_after: int | ||
| lossy: bool = False # K-10: True only for llmlingua2_prose (Layer 2) | ||
@@ -429,6 +428,2 @@ | ||
| def _token_estimate_structured(text: str) -> int: | ||
| return max(1, len(text) // 4) if text else 0 | ||
| def _msg_has_tool_result(msg: dict) -> bool: | ||
@@ -435,0 +430,0 @@ """B-09: Detect historical tool_result blocks in messages.""" |
@@ -27,2 +27,18 @@ """Module-level accessor for OptimizeConfig (INTERFACE-CONTRACT §2). | ||
| def get_shared_store() -> "ConfigStore": | ||
| """Return the process-wide ConfigStore singleton, creating it on first use. | ||
| This is the SINGLE instance the daemon, the /api/optimize routes, the proxy | ||
| hook-reload callback, and the hot-reload watchdog must all share so that a UI | ||
| config change reaches the live proxy (fixes W-02 hot-reload + W-05 fresh-store | ||
| -per-request). The daemon calls this at startup, registers a change callback, | ||
| and starts the watchdog. | ||
| """ | ||
| global _store | ||
| if _store is None: | ||
| from superlocalmemory.optimize.config.store import ConfigStore | ||
| _store = ConfigStore() | ||
| return _store | ||
| def _set_config_store(store: "ConfigStore") -> None: | ||
@@ -29,0 +45,0 @@ global _store |
@@ -26,11 +26,6 @@ """Default OptimizeConfig singleton — import-safe.""" | ||
| semantic_centroid_min_similarity=0.85, | ||
| compress_enabled=True, | ||
| compress_enabled=False, | ||
| compress_mode="safe", | ||
| compress_code=True, | ||
| compress_json=True, | ||
| compress_prose=False, | ||
| compress_ccr=True, | ||
| compress_align=True, | ||
| compress_protect_recent=4, | ||
| compress_llmlingua_allow_download=False, | ||
| ttl_seconds=86400, | ||
@@ -37,0 +32,0 @@ ttl=TTLConfig(), |
@@ -92,11 +92,6 @@ """Typed schema for optimize.json. | ||
| # Compress | ||
| compress_enabled: bool = True | ||
| compress_enabled: bool = False | ||
| compress_mode: str = "safe" | ||
| compress_code: bool = True | ||
| compress_json: bool = True | ||
| compress_prose: bool = False | ||
| compress_ccr: bool = True | ||
| compress_align: bool = True | ||
| compress_protect_recent: int = 4 | ||
| compress_llmlingua_allow_download: bool = False | ||
@@ -186,13 +181,6 @@ # TTL + providers + pricing | ||
| ), | ||
| compress_enabled=bool(d.get("compress_enabled", True)), | ||
| compress_enabled=bool(d.get("compress_enabled", False)), | ||
| compress_mode=str(d.get("compress_mode", "safe")), | ||
| compress_code=bool(d.get("compress_code", True)), | ||
| compress_json=bool(d.get("compress_json", True)), | ||
| compress_prose=bool(d.get("compress_prose", False)), | ||
| compress_ccr=bool(d.get("compress_ccr", True)), | ||
| compress_align=bool(d.get("compress_align", True)), | ||
| compress_protect_recent=int(d.get("compress_protect_recent", 4)), | ||
| compress_llmlingua_allow_download=bool( | ||
| d.get("compress_llmlingua_allow_download", False) | ||
| ), | ||
| ttl_seconds=int(d.get("ttl_seconds", 86400)), | ||
@@ -235,9 +223,4 @@ providers=providers, | ||
| "compress_mode": self.compress_mode, | ||
| "compress_code": self.compress_code, | ||
| "compress_json": self.compress_json, | ||
| "compress_prose": self.compress_prose, | ||
| "compress_ccr": self.compress_ccr, | ||
| "compress_align": self.compress_align, | ||
| "compress_protect_recent": self.compress_protect_recent, | ||
| "compress_llmlingua_allow_download": self.compress_llmlingua_allow_download, | ||
| "ttl_seconds": self.ttl_seconds, | ||
@@ -244,0 +227,0 @@ "providers": {k: v.as_dict() for k, v in self.providers.items()}, |
@@ -77,3 +77,9 @@ """ConfigStore — reads, writes, and hot-reloads optimize.json. | ||
| def save(self, config: OptimizeConfig) -> None: | ||
| """Write config to optimize.json with version bump.""" | ||
| """Write config to optimize.json with version bump. | ||
| Fires registered change callbacks immediately after a successful write | ||
| (outside the lock) so a UI/CLI save reaches the live proxy without | ||
| waiting for the 2s watchdog poll. The watchdog skips this write via | ||
| ``_saved_by_self`` so callbacks fire exactly once. | ||
| """ | ||
| config.validate() | ||
@@ -97,2 +103,10 @@ with self._lock: | ||
| self._saved_by_self = False | ||
| callbacks = list(self._change_callbacks) | ||
| # Fire callbacks OUTSIDE the lock — a callback may rebuild proxy hooks | ||
| # or call back into get(); keeping them off the lock avoids contention. | ||
| for cb in callbacks: | ||
| try: | ||
| cb(new_cfg) | ||
| except Exception as exc: | ||
| logger.warning("ConfigStore save callback error: %s", exc) | ||
@@ -99,0 +113,0 @@ def start_watchdog(self) -> None: |
@@ -27,3 +27,3 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar | ||
| on_miss(): cache miss | ||
| on_compress(bytes_original, bytes_after): compress ran | ||
| on_compress(tokens_before, tokens_after): compress ran (word-count proxy) | ||
| on_eviction(): entry expired/evicted | ||
@@ -73,10 +73,13 @@ """ | ||
| def on_compress(self, bytes_original: int, bytes_after: int) -> None: | ||
| """Record a compression run with byte counts.""" | ||
| def on_compress(self, tokens_before: int, tokens_after: int) -> None: | ||
| """Record a compression run. Arguments are word-count proxy estimates from _token_estimate(). | ||
| M-03: consistent naming — these are token estimates, not byte counts. | ||
| Stored in compress_bytes_original/after fields for DB schema compat; unit is word-count. | ||
| """ | ||
| with self._data_lock: | ||
| self._compress_runs += 1 | ||
| self._compress_bytes_original += max(0, bytes_original) | ||
| self._compress_bytes_after += max(0, bytes_after) | ||
| saved_tokens = max(0, bytes_original - bytes_after) | ||
| self._tokens_saved_compress += saved_tokens | ||
| self._compress_bytes_original += max(0, tokens_before) | ||
| self._compress_bytes_after += max(0, tokens_after) | ||
| self._tokens_saved_compress += max(0, tokens_before - tokens_after) | ||
@@ -88,2 +91,7 @@ def on_eviction(self) -> None: | ||
| def increment_skipped_temperature(self) -> None: | ||
| """C-04: record a cache skip due to non-zero temperature.""" | ||
| with self._data_lock: | ||
| self._calls_skipped += 1 | ||
| def record_latency(self, ms: float) -> None: | ||
@@ -90,0 +98,0 @@ """Record latency overhead in milliseconds.""" |
@@ -361,2 +361,3 @@ """_helpers.py — Shared HTTP utilities used by all surface modules.""" | ||
| on_complete: "Callable[[bytes], Any] | None" = None, | ||
| max_accumulate: int | None = None, | ||
| ) -> Response | StreamingResponse: | ||
@@ -392,6 +393,8 @@ """Stream-forward with optional post-stream cache-store callback. | ||
| acc: list[bytes] = [] | ||
| acc_bytes = 0 | ||
| acc_capped = False | ||
| complete_called = False | ||
| async def _generate() -> AsyncIterator[bytes]: | ||
| nonlocal complete_called | ||
| nonlocal complete_called, acc_bytes, acc_capped | ||
| stream_error = False | ||
@@ -404,3 +407,13 @@ try: | ||
| if chunk: | ||
| acc.append(chunk) | ||
| # Always forward to the client; only bound what we hold | ||
| # in memory for the on_complete callback (CWE-400). | ||
| if max_accumulate is None or acc_bytes < max_accumulate: | ||
| acc.append(chunk) | ||
| acc_bytes += len(chunk) | ||
| elif not acc_capped: | ||
| acc_capped = True | ||
| logger.debug( | ||
| "[%s] stream accumulator capped at %d bytes", | ||
| request_id, max_accumulate, | ||
| ) | ||
| yield chunk | ||
@@ -453,2 +466,87 @@ except httpx.RemoteProtocolError as exc: | ||
| async def capture_passthrough_forward( | ||
| proxy: Any, | ||
| request: Request, | ||
| *, | ||
| provider: str, | ||
| upstream_url: str, | ||
| allowed_headers: frozenset, | ||
| request_id: str, | ||
| model_hint: str = "", | ||
| sse_parser: "Callable[[bytes], bytes | None] | None" = None, | ||
| is_stream: bool = False, | ||
| ) -> Response | StreamingResponse: | ||
| """Shadow-capture passthrough (v3.6.10, plan §7). | ||
| Pure passthrough to upstream + record the exchange to the capture corpus. | ||
| NO cache, NO compression — capture mode observes only authentic traffic. | ||
| Fail-open: a capture or forward error degrades to a normal forward/error | ||
| response; the user's request is never blocked by capture. | ||
| """ | ||
| from superlocalmemory.optimize.proxy.capture import ( | ||
| extract_usage, | ||
| record_exchange_async, | ||
| ) | ||
| body_bytes = await request.body() | ||
| fwd_headers = _build_forward_headers(request, allowed_headers) | ||
| fwd_headers["content-length"] = str(len(body_bytes)) | ||
| if is_stream: | ||
| async def _on_complete(acc: bytes) -> None: | ||
| parsed = sse_parser(acc) if sse_parser else None | ||
| payload = parsed if parsed is not None else acc | ||
| itok, otok, mdl = extract_usage(provider, parsed) | ||
| await record_exchange_async( | ||
| provider=provider, | ||
| model=mdl or model_hint, | ||
| request_body=body_bytes, | ||
| response_body=payload, | ||
| content_type="text/event-stream", | ||
| input_tokens=itok, | ||
| output_tokens=otok, | ||
| status_code=200, | ||
| stream=True, | ||
| ) | ||
| # Bound the in-memory accumulator (CWE-400): the corpus only keeps the | ||
| # first 1 MB per side anyway, so cap accumulation there. | ||
| from superlocalmemory.optimize.proxy.capture import _MAX_CAPTURE_BODY_BYTES | ||
| return await _stream_and_cache_forward( | ||
| proxy, request_id, fwd_headers, body_bytes, upstream_url, | ||
| on_complete=_on_complete, | ||
| max_accumulate=_MAX_CAPTURE_BODY_BYTES, | ||
| ) | ||
| if proxy.http_client is None: | ||
| return await _fail_open_forward(proxy, request, upstream_url) | ||
| try: | ||
| upstream_resp = await proxy.http_client.post( | ||
| upstream_url, content=body_bytes, headers=fwd_headers, | ||
| ) | ||
| except Exception as exc: | ||
| logger.error("[%s] capture passthrough upstream error: %r", request_id, exc) | ||
| return await _fail_open_forward(proxy, request, upstream_url) | ||
| resp_bytes = upstream_resp.content | ||
| itok, otok, mdl = extract_usage(provider, resp_bytes) | ||
| await record_exchange_async( | ||
| provider=provider, | ||
| model=mdl or model_hint, | ||
| request_body=body_bytes, | ||
| response_body=resp_bytes, | ||
| content_type="application/json", | ||
| input_tokens=itok, | ||
| output_tokens=otok, | ||
| status_code=upstream_resp.status_code, | ||
| stream=False, | ||
| ) | ||
| return Response( | ||
| content=resp_bytes, | ||
| status_code=upstream_resp.status_code, | ||
| media_type="application/json", | ||
| headers=_filter_response_headers(dict(upstream_resp.headers)), | ||
| ) | ||
| async def _safe_cache_check(hooks: HookChain, ctx: ProxyRequest) -> CachedResponse: | ||
@@ -455,0 +553,0 @@ try: |
@@ -28,3 +28,5 @@ """anthropic_surface.py — Anthropic Messages + count_tokens + models surfaces.""" | ||
| _stream_forward, | ||
| capture_passthrough_forward, | ||
| ) | ||
| from superlocalmemory.optimize.proxy.capture import capture_enabled | ||
| from superlocalmemory.optimize.proxy.lifecycle import ProviderResponse, ProxyRequest | ||
@@ -196,2 +198,12 @@ | ||
| has_tools = _body_has_tools(body) | ||
| # v3.6.10 shadow-capture (plan §7): pure passthrough + corpus record. | ||
| if capture_enabled(): | ||
| return await capture_passthrough_forward( | ||
| proxy, request, provider="anthropic", upstream_url=upstream_url, | ||
| allowed_headers=_ANTHROPIC_FORWARD_HEADERS, request_id=request_id, | ||
| model_hint=str(body.get("model", "")), | ||
| sse_parser=_parse_sse_to_json, is_stream=stream, | ||
| ) | ||
| ctx = ProxyRequest( | ||
@@ -198,0 +210,0 @@ provider="anthropic", |
@@ -50,3 +50,6 @@ """gemini_surface.py — Gemini native and OpenAI-compat surfaces. | ||
| _stream_forward, | ||
| capture_passthrough_forward, | ||
| ) | ||
| from superlocalmemory.optimize.proxy.capture import capture_enabled | ||
| from superlocalmemory.optimize.proxy.openai_surface import _parse_openai_sse_to_json | ||
| from superlocalmemory.optimize.proxy.lifecycle import ProviderResponse, ProxyRequest | ||
@@ -242,2 +245,20 @@ | ||
| # v3.6.10 shadow-capture (plan §7): pure passthrough + corpus record. | ||
| if capture_enabled(): | ||
| cap_model = model_and_method.split(":", 1)[0].replace("models/", "") | ||
| cap_url = upstream_url | ||
| if stream: | ||
| _allowed = { | ||
| k: v for k, v in request.query_params.items() | ||
| if k.lower() in _GEMINI_ALLOWED_QUERY_PARAMS | ||
| } | ||
| _allowed["alt"] = "sse" | ||
| cap_url = f"{upstream_url}?{urllib.parse.urlencode(_allowed)}" | ||
| return await capture_passthrough_forward( | ||
| proxy, request, provider="gemini", upstream_url=cap_url, | ||
| allowed_headers=_GEMINI_NATIVE_FORWARD_HEADERS, request_id=request_id, | ||
| model_hint=cap_model, | ||
| sse_parser=_parse_gemini_sse_to_json, is_stream=stream, | ||
| ) | ||
| ctx = ProxyRequest( | ||
@@ -412,2 +433,12 @@ provider="gemini", | ||
| # v3.6.10 shadow-capture (plan §7): pure passthrough + corpus record. | ||
| if capture_enabled(): | ||
| return await capture_passthrough_forward( | ||
| proxy, request, provider="gemini-openai-compat", | ||
| upstream_url=upstream_url, | ||
| allowed_headers=_GEMINI_OPENAI_COMPAT_FORWARD_HEADERS, | ||
| request_id=request_id, model_hint=str(body.get("model", "")), | ||
| sse_parser=_parse_openai_sse_to_json, is_stream=stream, | ||
| ) | ||
| ctx = ProxyRequest( | ||
@@ -414,0 +445,0 @@ provider="gemini-openai-compat", |
@@ -36,3 +36,5 @@ """openai_surface.py — OpenAI /v1/chat/completions and /v1/embeddings. | ||
| _stream_forward, | ||
| capture_passthrough_forward, | ||
| ) | ||
| from superlocalmemory.optimize.proxy.capture import capture_enabled | ||
| from superlocalmemory.optimize.proxy.lifecycle import ProviderResponse, ProxyRequest | ||
@@ -320,2 +322,12 @@ | ||
| has_tools = _body_has_tools(body) | ||
| # v3.6.10 shadow-capture (plan §7): pure passthrough + corpus record. | ||
| if capture_enabled(): | ||
| return await capture_passthrough_forward( | ||
| proxy, request, provider="openai", upstream_url=upstream_url, | ||
| allowed_headers=_OPENAI_FORWARD_HEADERS, request_id=request_id, | ||
| model_hint=str(body.get("model", "")), | ||
| sse_parser=_parse_openai_sse_to_json, is_stream=stream, | ||
| ) | ||
| ctx = ProxyRequest( | ||
@@ -322,0 +334,0 @@ provider="openai", method="POST", path="/v1/chat/completions", |
@@ -78,3 +78,21 @@ """server.py — ProxyApp and build_proxy_router().""" | ||
| def reload_from_config(self, config: OptimizeConfig) -> None: | ||
| """Hot-swap cache/compress behavior when optimize.json changes (v3.6.10). | ||
| Called by the ConfigStore change-callback (UI save → immediate; external | ||
| file/CLI edit → within the 2s watchdog poll). Rebuilds the HookChain so | ||
| ``cache_enabled`` and ``compress_enabled`` can be toggled INDEPENDENTLY | ||
| at runtime with no daemon restart. Note: ``proxy_enabled`` (whether the | ||
| proxy claims /v1/* at all) is a startup decision and is NOT changed here. | ||
| """ | ||
| self.config = config | ||
| self.hooks = _load_hooks(config) | ||
| logger.info( | ||
| "slm.optimize.proxy reloaded (config v%s): cache=%s compress=%s", | ||
| getattr(config, "config_version", "?"), | ||
| type(self.hooks.cache).__name__ if self.hooks.cache else "None", | ||
| type(self.hooks.compress).__name__ if self.hooks.compress else "None", | ||
| ) | ||
| def build_proxy_router(proxy: ProxyApp) -> APIRouter: | ||
@@ -136,2 +154,13 @@ """Build and return the FastAPI router for all proxy surfaces.""" | ||
| def _load_hooks(config: OptimizeConfig) -> HookChain: | ||
| # v3.6.10 shadow-capture (plan §7): capture mode is PURE passthrough — no | ||
| # cache, no compression — so the corpus records only authentic upstream | ||
| # exchanges. This is defense-in-depth alongside the per-surface guard. | ||
| from superlocalmemory.optimize.proxy.capture import capture_enabled | ||
| if capture_enabled(): | ||
| logger.info( | ||
| "slm.optimize.proxy: SLM_OPTIMIZE_CAPTURE on — cache/compress " | ||
| "DISABLED, recording exchanges to optimize_capture.jsonl" | ||
| ) | ||
| return HookChain.empty() | ||
| cache_hook = None | ||
@@ -138,0 +167,0 @@ compress_hook = None |
@@ -36,2 +36,3 @@ """CacheDB — wraps DatabaseManager for llmcache.db operations. | ||
| import os | ||
| import re | ||
| import sqlite3 | ||
@@ -56,2 +57,21 @@ import struct | ||
| _DEFAULT_TENANT: str = "default" | ||
| _TENANT_HEX64 = re.compile(r"[0-9a-f]{64}") | ||
| def _normalize_tenant_id(tenant_id: str) -> str: | ||
| """Match CacheManager.build_key tenant normalization (v3.6.10 fix). | ||
| The proxy stores entries under SHA-256(tenant) when the tenant is not | ||
| already a 64-char hex digest. Public tenant-scoped helpers (entry_count, | ||
| clear_tenant, entry_exists) MUST apply the same hashing or they query the | ||
| wrong tenant — the cause of the dashboard reporting "0 entries" while the | ||
| cache is in fact populated. | ||
| """ | ||
| tid = tenant_id or _DEFAULT_TENANT | ||
| if _TENANT_HEX64.fullmatch(tid): | ||
| return tid | ||
| import hashlib as _hashlib | ||
| return _hashlib.sha256(tid.encode()).hexdigest() | ||
| _ZLIB_LEVEL: int = 6 | ||
@@ -66,2 +86,5 @@ _AES_NONCE_BYTES: int = 12 | ||
| # C-06: persisted AES key — survives machine-id changes after first run | ||
| _KEY_FILE: Path = Path.home() / LLMCACHE_DIRNAME / "opt-key.bin" | ||
| _FORBIDDEN_MEMORY_TABLES: frozenset[str] = frozenset({ | ||
@@ -80,3 +103,9 @@ "memories", "atomic_facts", "profiles", "canonical_entities", | ||
| class MetricsSnapshot: | ||
| """Mirror of llmcache_metrics columns — names MUST match exactly.""" | ||
| """Mirror of llmcache_metrics columns — names MUST match exactly. | ||
| S-03 / M-03 note: compress_bytes_original and compress_bytes_after store | ||
| WORD-COUNT proxy values (len(text.split())), NOT byte counts. The column | ||
| names use "bytes" for DB schema backward compatibility. Treat these fields | ||
| as token-count proxies, not literal byte measurements. | ||
| """ | ||
| id: int = 1 | ||
@@ -93,4 +122,4 @@ hits: int = 0 | ||
| compress_runs: int = 0 | ||
| compress_bytes_original: int = 0 | ||
| compress_bytes_after: int = 0 | ||
| compress_bytes_original: int = 0 # unit: word-count proxy (see S-03 note above) | ||
| compress_bytes_after: int = 0 # unit: word-count proxy (see S-03 note above) | ||
| cache_size_bytes: int = 0 | ||
@@ -217,4 +246,3 @@ cache_entry_count: int = 0 | ||
| self._salt = self._load_or_create_salt() | ||
| machine_id = self._get_machine_id() | ||
| self._aes_key = self._derive_aes_key(machine_id, self._salt) | ||
| self._aes_key = self._get_or_persist_aes_key(self._salt) | ||
| self.assert_no_memory_db_tables() | ||
@@ -301,2 +329,29 @@ | ||
| def _get_or_persist_aes_key(self, salt: bytes) -> bytes: | ||
| """C-06: Load persisted key from disk, or derive + persist on first run. | ||
| Surviving a machine-id change: after first derivation the key is saved to | ||
| opt-key.bin (0600). On subsequent starts the file is read directly, | ||
| so changing the underlying machine-id string cannot invalidate existing | ||
| cache entries. | ||
| """ | ||
| try: | ||
| if _KEY_FILE.exists(): | ||
| key = _KEY_FILE.read_bytes() | ||
| if len(key) == 32: | ||
| return key | ||
| except Exception as exc: | ||
| logger.warning("CacheDB: could not read persisted AES key: %s", exc) | ||
| # First run (or corrupted file): derive from machine-id and persist. | ||
| machine_id = self._get_machine_id() | ||
| key = self._derive_aes_key(machine_id, salt) | ||
| try: | ||
| _KEY_FILE.parent.mkdir(parents=True, exist_ok=True) | ||
| _KEY_FILE.write_bytes(key) | ||
| os.chmod(_KEY_FILE, 0o600) | ||
| except Exception as exc: | ||
| logger.warning("CacheDB: could not persist AES key (fail-open): %s", exc) | ||
| return key | ||
| def _derive_aes_key(self, machine_id: str, salt: bytes) -> bytes: | ||
@@ -636,7 +691,8 @@ kdf = PBKDF2HMAC( | ||
| model_name = str(meta.get("model", "nomic-ai/nomic-embed-text-v1.5")) | ||
| context_fp = str(meta.get("context_fp", "")) # C-10: persist context fingerprint | ||
| self._db.execute( | ||
| "INSERT OR REPLACE INTO llmcache_semantic_vectors " | ||
| "(entry_id, tenant_id, vector_blob, vector_dim, model_name) " | ||
| "VALUES (?, ?, ?, ?, ?)", | ||
| (entry_id, tenant_id, vector, dim, model_name), | ||
| "(entry_id, tenant_id, vector_blob, vector_dim, model_name, context_fp) " | ||
| "VALUES (?, ?, ?, ?, ?, ?)", | ||
| (entry_id, tenant_id, vector, dim, model_name, context_fp), | ||
| ) | ||
@@ -866,10 +922,18 @@ except sqlite3.Error as exc: | ||
| def get_all_vectors(self, tenant_id: str) -> list[tuple[str, bytes]]: | ||
| def get_all_vectors(self, tenant_id: str) -> list[tuple[str, bytes, str]]: | ||
| """Return (entry_id, vector_blob, context_fp) for all vectors in a tenant. | ||
| C-10: context_fp is included so _lazy_warm_tenant() can restore it without | ||
| recomputing embeddings from messages that may no longer be in scope. | ||
| """ | ||
| try: | ||
| rows = self._db.execute( | ||
| "SELECT entry_id, vector_blob FROM llmcache_semantic_vectors " | ||
| "SELECT entry_id, vector_blob, context_fp FROM llmcache_semantic_vectors " | ||
| "WHERE tenant_id = ?", | ||
| (tenant_id,), | ||
| ) | ||
| return [(dict(r)["entry_id"], dict(r)["vector_blob"]) for r in rows] | ||
| return [ | ||
| (dict(r)["entry_id"], dict(r)["vector_blob"], dict(r).get("context_fp", "")) | ||
| for r in rows | ||
| ] | ||
| except sqlite3.Error as exc: | ||
@@ -931,2 +995,3 @@ logger.warning("CacheDB.get_all_vectors failed: %s", exc) | ||
| def entry_exists(self, cache_key: str, tenant_id: str = _DEFAULT_TENANT) -> bool: | ||
| tenant_id = _normalize_tenant_id(tenant_id) | ||
| try: | ||
@@ -942,2 +1007,3 @@ rows = self._db.execute( | ||
| def clear_tenant(self, tenant_id: str) -> int: | ||
| tenant_id = _normalize_tenant_id(tenant_id) | ||
| try: | ||
@@ -969,2 +1035,3 @@ with self._db.transaction(): | ||
| def entry_count(self, tenant_id: str = _DEFAULT_TENANT) -> int: | ||
| tenant_id = _normalize_tenant_id(tenant_id) | ||
| try: | ||
@@ -971,0 +1038,0 @@ rows = self._db.execute( |
@@ -56,2 +56,3 @@ """DDL for llmcache.db — the Optimize module's dedicated SQLite database. | ||
| model_name TEXT NOT NULL DEFAULT 'nomic-ai/nomic-embed-text-v1.5', | ||
| context_fp TEXT NOT NULL DEFAULT '', | ||
| created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now')) | ||
@@ -131,2 +132,3 @@ ) | ||
| Also seeds the single llmcache_metrics row (id=1) via INSERT OR IGNORE. | ||
| C-10: adds context_fp column to existing DBs via ALTER TABLE migration. | ||
| """ | ||
@@ -136,2 +138,11 @@ for stmt in _DDL_STATEMENTS: | ||
| conn.execute("INSERT OR IGNORE INTO llmcache_metrics(id) VALUES (1)") | ||
| # C-10 migration: add context_fp column if missing (existing installs pre-v3.6.10) | ||
| existing_cols = { | ||
| row[1] | ||
| for row in conn.execute("PRAGMA table_info(llmcache_semantic_vectors)") | ||
| } | ||
| if "context_fp" not in existing_cols: | ||
| conn.execute( | ||
| "ALTER TABLE llmcache_semantic_vectors ADD COLUMN context_fp TEXT NOT NULL DEFAULT ''" | ||
| ) | ||
| row = conn.execute( | ||
@@ -138,0 +149,0 @@ "SELECT 1 FROM llmcache_schema_version WHERE version = ?", |
@@ -35,5 +35,3 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar | ||
| compress_mode: Literal['safe', 'aggressive'] | None = None | ||
| compress_code: bool | None = None | ||
| compress_prose: bool | None = None | ||
| compress_ccr: bool | None = None | ||
@@ -45,4 +43,4 @@ | ||
| try: | ||
| from superlocalmemory.optimize.config.store import ConfigStore | ||
| cfg = ConfigStore().get() | ||
| from superlocalmemory.optimize.config import get_shared_store | ||
| cfg = get_shared_store().get() | ||
| return cfg.as_dict() | ||
@@ -59,4 +57,4 @@ except Exception as exc: | ||
| import dataclasses | ||
| from superlocalmemory.optimize.config.store import ConfigStore | ||
| store = ConfigStore() | ||
| from superlocalmemory.optimize.config import get_shared_store | ||
| store = get_shared_store() | ||
| cfg = store.get() | ||
@@ -86,7 +84,7 @@ updates: dict[str, Any] = {} | ||
| try: | ||
| from superlocalmemory.optimize.config.store import ConfigStore | ||
| from superlocalmemory.optimize.config import get_shared_store | ||
| from superlocalmemory.optimize.metrics.counters import get_metrics | ||
| from superlocalmemory.optimize.storage.db import CacheDB | ||
| cfg = ConfigStore().get() | ||
| cfg = get_shared_store().get() | ||
| collector = get_metrics() | ||
@@ -93,0 +91,0 @@ db = CacheDB() |
@@ -327,6 +327,8 @@ // SuperLocalMemory V3 — Auto-Capture/Recall Settings | ||
| try { | ||
| var testBody = {provider: provider, model: model}; | ||
| if (apiKey) testBody.api_key = apiKey; | ||
| var resp = await fetch('/api/v3/provider/test', { | ||
| method: 'POST', | ||
| headers: {'Content-Type': 'application/json'}, | ||
| body: JSON.stringify({provider: provider, model: model, api_key: apiKey}) | ||
| body: JSON.stringify(testBody) | ||
| }); | ||
@@ -333,0 +335,0 @@ var data = await resp.json(); |
@@ -341,2 +341,5 @@ // Neural Glass Shell — Dashboard V2 "Neural Glass" | ||
| switch(tabId) { | ||
| case 'brain-pane': | ||
| if (typeof loadBrain === 'function') loadBrain(); | ||
| break; | ||
| case 'graph-pane': | ||
@@ -343,0 +346,0 @@ if (typeof loadGraph === 'function') loadGraph(); |
@@ -26,2 +26,3 @@ // SuperLocalMemory V3 — Optimize Tab | ||
| _setToggle('opt-enabled', cfg.enabled); | ||
| _setToggle('opt-proxy-enabled', cfg.proxy_enabled); | ||
| _setToggle('opt-cache-enabled', cfg.cache_enabled); | ||
@@ -31,6 +32,3 @@ _setToggle('opt-semantic-enabled', cfg.semantic_enabled); | ||
| _setSelect('opt-compress-mode', cfg.compress_mode); | ||
| _setToggle('opt-compress-code', cfg.compress_code); | ||
| _setToggle('opt-compress-prose', cfg.compress_prose); | ||
| _setToggle('opt-compress-ccr', cfg.compress_ccr); | ||
| _setToggle('opt-compress-align', cfg.compress_align); | ||
| var verEl = document.getElementById('opt-config-version'); | ||
@@ -75,10 +73,8 @@ if (verEl) verEl.textContent = cfg.config_version || '-'; | ||
| var fieldMap = { | ||
| 'opt-enabled': 'enabled', | ||
| 'opt-cache-enabled': 'cache_enabled', | ||
| 'opt-enabled': 'enabled', | ||
| 'opt-proxy-enabled': 'proxy_enabled', | ||
| 'opt-cache-enabled': 'cache_enabled', | ||
| 'opt-semantic-enabled': 'semantic_enabled', | ||
| 'opt-compress-enabled': 'compress_enabled', | ||
| 'opt-compress-code': 'compress_code', | ||
| 'opt-compress-prose': 'compress_prose', | ||
| 'opt-compress-ccr': 'compress_ccr', | ||
| 'opt-compress-align': 'compress_align' | ||
| 'opt-compress-prose': 'compress_prose' | ||
| }; | ||
@@ -115,2 +111,6 @@ | ||
| _putConfig(body); | ||
| if (id === 'opt-proxy-enabled' || id === 'opt-enabled') { | ||
| var notice = document.getElementById('opt-restart-notice'); | ||
| if (notice) notice.classList.remove('d-none'); | ||
| } | ||
| } | ||
@@ -117,0 +117,0 @@ }); |
| # compress/extractive_code.py | ||
| # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar | ||
| # Licensed under AGPL-3.0-or-later | ||
| # | ||
| # AST structural patterns adapted from: | ||
| # headroom/compression/handlers/code_handler.py:66-138 (Apache-2.0) | ||
| # Specifically: _STRUCTURAL_NODE_TYPES per-language dict, CodeLanguage enum | ||
| # headroom/compression/handlers/code_handler.py:141-150 — _SIGNATURE_PATTERNS regex fallback | ||
| # Attribution: See ATTRIBUTION.md. | ||
| """CodeCompressor — AST-aware extractive code compressor. | ||
| Supported languages: Python, JavaScript, Go, Rust, Java, C++. | ||
| Path A (tree-sitter): used if tree-sitter-language-pack installed. | ||
| Path B (regex fallback): used otherwise. Tests pass on both paths. | ||
| """ | ||
| from __future__ import annotations | ||
| import logging | ||
| import re | ||
| from dataclasses import dataclass | ||
| from enum import Enum | ||
| logger = logging.getLogger("slm.optimize.compress.code") | ||
| _BODY_STUB_BY_LANG: dict[str, str] = { | ||
| "python": " # [slm: body compressed — retrieve with ccr_id={ccr_id}]", | ||
| "javascript": " // [slm: body compressed — retrieve with ccr_id={ccr_id}]", | ||
| "go": " // [slm: body compressed — retrieve with ccr_id={ccr_id}]", | ||
| "rust": " // [slm: body compressed — retrieve with ccr_id={ccr_id}]", | ||
| "java": " // [slm: body compressed — retrieve with ccr_id={ccr_id}]", | ||
| "cpp": " // [slm: body compressed — retrieve with ccr_id={ccr_id}]", | ||
| } | ||
| _BODY_STUB_NO_CCR_BY_LANG: dict[str, str] = { | ||
| "python": " # [slm: body compressed]", | ||
| "javascript": " // [slm: body compressed]", | ||
| "go": " // [slm: body compressed]", | ||
| "rust": " // [slm: body compressed]", | ||
| "java": " // [slm: body compressed]", | ||
| "cpp": " // [slm: body compressed]", | ||
| } | ||
| _MIN_BODY_LINES: int = 4 | ||
| class CodeLanguage(Enum): | ||
| PYTHON = "python" | ||
| JAVASCRIPT = "javascript" | ||
| GO = "go" | ||
| RUST = "rust" | ||
| JAVA = "java" | ||
| CPP = "cpp" | ||
| @dataclass(frozen=True) | ||
| class CodeSpan: | ||
| start_line: int | ||
| end_line: int | ||
| role: str # "import" | "signature" | "body" | "class_header" | "decorator" | ||
| is_structural: bool | ||
| # ── Signature patterns for regex path (Path B) ──────────────────────────────── | ||
| _SIGNATURE_PATTERNS: dict[str, list[str]] = { | ||
| "python": [ | ||
| r"^\s*(async\s+)?def\s+\w+\s*\([^)]*\)\s*(->\s*[^:]+)?:", | ||
| r"^\s*class\s+\w+(\([^)]*\))?:", | ||
| r"^\s*import\s+", | ||
| r"^\s*from\s+\w+\s+import", | ||
| r"^\s*@\w+", | ||
| ], | ||
| "javascript": [ | ||
| r"^\s*(async\s+)?function\s+\w+\s*\([^)]*\)", | ||
| r"^\s*class\s+\w+(\s+extends\s+\w+)?", | ||
| r"^\s*import\s+", | ||
| r"^\s*const\s+\w+\s*=\s*(async\s+)?\(", | ||
| r"^\s*export\s+(default\s+|const\s+|class\s+|function\s+)", | ||
| ], | ||
| "go": [ | ||
| r"^\s*func\s+(\(\w+\s+\*?\w+\)\s*)?\w+\s*\(", | ||
| r"^\s*import\s+", | ||
| r"^\s*package\s+", | ||
| r"^\s*type\s+\w+\s+(struct|interface)\s*\{", | ||
| ], | ||
| "rust": [ | ||
| r"^\s*(pub\s+)?(async\s+)?fn\s+\w+", | ||
| r"^\s*use\s+", | ||
| r"^\s*impl\s+", | ||
| r"^\s*struct\s+", | ||
| r"^\s*enum\s+", | ||
| r"^\s*trait\s+", | ||
| ], | ||
| "java": [ | ||
| r"^\s*(public|private|protected)\s+(static\s+)?\w+\s+\w+\s*\(", | ||
| r"^\s*import\s+", | ||
| r"^\s*(public|private|protected)?\s*class\s+\w+", | ||
| ], | ||
| "cpp": [ | ||
| r"^\s*\w[\w\s\*&:<,>]*\s+\w+\s*\([^)]*\)\s*(const\s*)?\{", | ||
| r"^\s*#include\s+", | ||
| r"^\s*(class|struct)\s+\w+", | ||
| ], | ||
| } | ||
| class CodeCompressor: | ||
| """AST-aware extractive code compressor. Thread-safe (no mutable state).""" | ||
| def compress(self, code: str, language: str, ccr_id: str = "") -> str: | ||
| try: | ||
| lang = CodeLanguage(language) | ||
| except ValueError: | ||
| logger.warning("CodeCompressor: unknown language %r — passthrough", language) | ||
| return code | ||
| try: | ||
| if _tree_sitter_available(): | ||
| return self._compress_with_tree_sitter(code, lang, ccr_id) | ||
| else: | ||
| return self._compress_with_regex(code, lang, ccr_id) | ||
| except Exception as exc: | ||
| logger.warning("CodeCompressor.compress failed — passthrough: %s", exc) | ||
| return code | ||
| def _compress_with_tree_sitter(self, code: str, lang: CodeLanguage, ccr_id: str) -> str: | ||
| from tree_sitter_language_pack import get_parser # type: ignore[import] | ||
| parser = get_parser(lang.value) | ||
| tree = parser.parse(code.encode()) | ||
| lines = code.split("\n") | ||
| spans = _collect_spans_tree_sitter(tree.root_node, lang) | ||
| return _apply_spans(lines, spans, ccr_id, lang.value) | ||
| def _compress_with_regex(self, code: str, lang: CodeLanguage, ccr_id: str) -> str: | ||
| lines = code.split("\n") | ||
| spans = _collect_spans_regex(lines, lang) | ||
| return _apply_spans(lines, spans, ccr_id, lang.value) | ||
| # ── Span collection ─────────────────────────────────────────────────────────── | ||
| def _collect_spans_tree_sitter(root_node: object, lang: CodeLanguage) -> list[CodeSpan]: | ||
| _STRUCTURAL_TYPES: dict[str, set[str]] = { | ||
| "python": {"import_statement", "import_from_statement", "function_definition", | ||
| "class_definition", "decorated_definition", "type_alias_statement"}, | ||
| "javascript": {"import_statement", "export_statement", "function_declaration", | ||
| "class_declaration", "method_definition", "arrow_function"}, | ||
| "go": {"import_declaration", "function_declaration", "method_declaration", | ||
| "type_declaration", "interface_type"}, | ||
| "rust": {"use_declaration", "function_item", "impl_item", "struct_item", | ||
| "enum_item", "trait_item"}, | ||
| "java": {"import_declaration", "class_declaration", "method_declaration", | ||
| "interface_declaration", "annotation"}, | ||
| "cpp": {"function_definition", "preproc_include", "class_specifier"}, | ||
| } | ||
| structural = _STRUCTURAL_TYPES.get(lang.value, set()) | ||
| spans: list[CodeSpan] = [] | ||
| def _walk(node: object, depth: int = 0) -> None: | ||
| start_row: int = getattr(node, "start_point", (0, 0))[0] | ||
| end_row: int = getattr(node, "end_point", (0, 0))[0] | ||
| node_type: str = getattr(node, "type", "") | ||
| is_struct = node_type in structural | ||
| if is_struct and node_type in { | ||
| "function_definition", "method_definition", "function_declaration", | ||
| "function_item", "method_declaration", | ||
| }: | ||
| body_node = _find_child(node, "block") or _find_child(node, "body") | ||
| if body_node is not None: | ||
| body_start = getattr(body_node, "start_point", (0, 0))[0] | ||
| sig_end = body_start - 1 | ||
| if sig_end > start_row: | ||
| spans.append(CodeSpan(start_row, sig_end, "signature", True)) | ||
| spans.append(CodeSpan(body_start, end_row, "body", False)) | ||
| else: | ||
| spans.append(CodeSpan(start_row, end_row, "signature", True)) | ||
| elif is_struct and node_type in {"decorated_definition"}: | ||
| def_node = _find_child(node, "function_definition") or _find_child(node, "class_definition") | ||
| if def_node is not None: | ||
| def_start = getattr(def_node, "start_point", (0, 0))[0] | ||
| spans.append(CodeSpan(start_row, def_start - 1, "decorator", True)) | ||
| _walk(def_node, depth + 1) | ||
| else: | ||
| spans.append(CodeSpan(start_row, end_row, node_type, True)) | ||
| elif is_struct: | ||
| spans.append(CodeSpan(start_row, end_row, "signature", True)) | ||
| else: | ||
| children = getattr(node, "children", None) | ||
| if children is not None: | ||
| has_named = False | ||
| for child in children: | ||
| if getattr(child, "is_named", False): | ||
| has_named = True | ||
| _walk(child, depth + 1) | ||
| if not has_named: | ||
| spans.append(CodeSpan(start_row, end_row, "body", False)) | ||
| _walk(root_node) | ||
| return spans | ||
| def _find_child(node: object, child_type: str) -> object | None: | ||
| children = getattr(node, "children", None) | ||
| if children is None: | ||
| return None | ||
| for child in children: | ||
| if hasattr(child, "type") and child.type == child_type: | ||
| return child | ||
| return None | ||
| def _collect_spans_regex(lines: list[str], lang: CodeLanguage) -> list[CodeSpan]: | ||
| patterns = _SIGNATURE_PATTERNS.get(lang.value, []) | ||
| spans: list[CodeSpan] = [] | ||
| i = 0 | ||
| n = len(lines) | ||
| while i < n: | ||
| line = lines[i] | ||
| matched = False | ||
| for pat in patterns: | ||
| if re.search(pat, line): | ||
| body_start = i + 1 | ||
| body_end = body_start | ||
| while body_end < n and ( | ||
| lines[body_end].startswith((" ", "\t", "", "#", "//", "/*", "*/", "{")) | ||
| or re.match(r"^\s*$", lines[body_end]) | ||
| ): | ||
| if re.match(r"^\s*($|#|//|/\*|\*/|\*|}|\)|\])", lines[body_end]): | ||
| body_end += 1 | ||
| else: | ||
| break | ||
| body_len = body_end - body_start | ||
| if body_len >= _MIN_BODY_LINES and _looks_like_body(lines, body_start, body_end): | ||
| spans.append(CodeSpan(i, i, "signature", True)) | ||
| spans.append(CodeSpan(body_start, body_end, "body", False)) | ||
| i = body_end | ||
| matched = True | ||
| break | ||
| else: | ||
| body_start = body_end | ||
| # else: pattern didn't match | ||
| if not matched: | ||
| i += 1 | ||
| return spans | ||
| def _looks_like_body(lines: list[str], start: int, end: int) -> bool: | ||
| body_lines = [l for l in lines[start:end] if l.strip() and not l.strip().startswith(("#", "//"))] | ||
| return len(body_lines) >= _MIN_BODY_LINES | ||
| def _apply_spans( | ||
| lines: list[str], | ||
| spans: list[CodeSpan], | ||
| ccr_id: str, | ||
| lang: str = "python", | ||
| ) -> str: | ||
| if not spans: | ||
| return "\n".join(lines) | ||
| stub_tmpl = _BODY_STUB_BY_LANG.get(lang, _BODY_STUB_BY_LANG["python"]) | ||
| stub_no_ccr = _BODY_STUB_NO_CCR_BY_LANG.get(lang, _BODY_STUB_NO_CCR_BY_LANG["python"]) | ||
| stub = stub_tmpl.format(ccr_id=ccr_id) if ccr_id else stub_no_ccr | ||
| sorted_spans = sorted(spans, key=lambda s: s.start_line) | ||
| start_set = set() | ||
| deduped: list[CodeSpan] = [] | ||
| for s in sorted_spans: | ||
| if s.start_line not in start_set: | ||
| deduped.append(s) | ||
| start_set.add(s.start_line) | ||
| span_by_start: dict[int, CodeSpan] = {s.start_line: s for s in deduped} | ||
| result_lines: list[str] = [] | ||
| i = 0 | ||
| n = len(lines) | ||
| while i < n: | ||
| span = span_by_start.get(i) | ||
| if span is not None and not span.is_structural: | ||
| end = min(span.end_line, n - 1) | ||
| body_len = end - i + 1 | ||
| if body_len >= _MIN_BODY_LINES: | ||
| result_lines.append(stub) | ||
| i = end + 1 | ||
| else: | ||
| for j in range(i, end + 1): | ||
| result_lines.append(lines[j]) | ||
| i = end + 1 | ||
| elif span is not None and span.is_structural: | ||
| end = min(span.end_line, n - 1) | ||
| for j in range(i, end + 1): | ||
| result_lines.append(lines[j]) | ||
| i = end + 1 | ||
| else: | ||
| result_lines.append(lines[i]) | ||
| i += 1 | ||
| return "\n".join(result_lines) | ||
| def _tree_sitter_available() -> bool: | ||
| try: | ||
| import tree_sitter_language_pack # noqa: F401 | ||
| return True | ||
| except ImportError: | ||
| return False |
| # compress/extractive_json.py | ||
| # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar | ||
| # Licensed under AGPL-3.0-or-later | ||
| # | ||
| # Structural masking pattern adapted from: | ||
| # headroom/compression/handlers/json_handler.py (Apache-2.0, Headroom contributors) | ||
| # Specifically: JSONStructureHandler._extract_mask(), JSONToken, JSONTokenType | ||
| # Attribution: See ATTRIBUTION.md. | ||
| # | ||
| # HARD RULE: This compressor MUST NEVER prune or reorder JSON keys. | ||
| # Keys pruned = structured semantics corrupted non-recoverably. Not configurable. | ||
| """JSONCompressor — lossless-ish extractive JSON compressor.""" | ||
| from __future__ import annotations | ||
| import json | ||
| import logging | ||
| from typing import Any | ||
| logger = logging.getLogger("slm.optimize.compress.json") | ||
| VALUE_TRUNCATE_CHARS: int = 120 | ||
| VALUE_TRUNCATE_SUFFIX: str = "\u2026" # ellipsis | ||
| MAX_ARRAY_ITEMS_SHOWN: int = 5 | ||
| ARRAY_REMAINDER_KEY: str = "__slm_omitted__" | ||
| MIN_VALUE_LEN_TO_TRUNCATE: int = 40 | ||
| class JSONCompressor: | ||
| """Lossless-ish JSON compressor. Thread-safe (no mutable state).""" | ||
| def compress(self, parsed: Any) -> str: | ||
| try: | ||
| masked = self._mask(parsed, depth=0) | ||
| return json.dumps(masked, ensure_ascii=False, separators=(",", ":")) | ||
| except Exception as exc: | ||
| logger.warning("JSONCompressor.compress failed — returning original: %s", exc) | ||
| return json.dumps(parsed, ensure_ascii=False, separators=(",", ":")) | ||
| def _mask(self, obj: Any, depth: int) -> Any: | ||
| if isinstance(obj, dict): | ||
| return {k: self._mask(v, depth + 1) for k, v in obj.items()} | ||
| if isinstance(obj, list): | ||
| return self._mask_array(obj, depth) | ||
| if isinstance(obj, str): | ||
| return self._mask_string(obj) | ||
| return obj | ||
| def _mask_array(self, arr: list, depth: int) -> list: | ||
| if len(arr) <= MAX_ARRAY_ITEMS_SHOWN: | ||
| return [self._mask(item, depth + 1) for item in arr] | ||
| shown = [self._mask(item, depth + 1) for item in arr[:MAX_ARRAY_ITEMS_SHOWN]] | ||
| collision = any( | ||
| isinstance(item, dict) and ARRAY_REMAINDER_KEY in item | ||
| for item in arr | ||
| ) | ||
| if collision: | ||
| logger.warning( | ||
| "JSONCompressor: input contains reserved key %r — skipping array sentinel", | ||
| ARRAY_REMAINDER_KEY, | ||
| ) | ||
| else: | ||
| shown.append({ARRAY_REMAINDER_KEY: len(arr) - MAX_ARRAY_ITEMS_SHOWN}) | ||
| return shown | ||
| def _mask_string(self, s: str) -> str: | ||
| if len(s) < MIN_VALUE_LEN_TO_TRUNCATE: | ||
| return s | ||
| if len(s) <= VALUE_TRUNCATE_CHARS: | ||
| return s | ||
| return s[:VALUE_TRUNCATE_CHARS] + VALUE_TRUNCATE_SUFFIX |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
AI-detected potential code anomaly
Supply chain riskAI has identified unusual behaviors that may pose a security risk.
Found 4 instances in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
AI-detected potential code anomaly
Supply chain riskAI has identified unusual behaviors that may pose a security risk.
Found 5 instances in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
7808034
0.39%119229
0.29%898
0.45%47
-2.08%