superlocalmemory
Advanced tools
| # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar | ||
| # Licensed under AGPL-3.0-or-later - see LICENSE file | ||
| # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com | ||
| """Distributed / LAN deployment mode — the single ``SLM_REMOTE`` switch. | ||
| SuperLocalMemory historically assumes every dashboard browser, MCP client, | ||
| and API caller lives on ``127.0.0.1``. That assumption breaks three things | ||
| for users who deploy SLM on a server and reach it across a LAN (issue #39): | ||
| 1. ``/internal/token`` refuses any non-loopback client → Brain page can't | ||
| fetch the install token → "Couldn't load Brain". | ||
| 2. The MCP Streamable-HTTP transport is **stateful** — every call must | ||
| replay the ``Mcp-Session-Id`` from the ``initialize`` handshake. A | ||
| gateway/hub that forwards a tool call without replaying it gets | ||
| ``-32600 Session not found``. | ||
| 3. Dashboard CSRF origin checks only accept loopback origins. | ||
| ``SLM_REMOTE=1`` flips all three assumptions at once, **default OFF** so the | ||
| loopback-only security posture is unchanged for the 99% local case. LAN | ||
| access is still gated by an explicit IP allowlist (``SLM_MCP_ALLOWED_HOSTS``) | ||
| — remote mode alone does not throw the doors open. | ||
| Granular overrides (each implied by ``SLM_REMOTE=1`` but usable alone): | ||
| * ``SLM_MCP_STATELESS=1`` — stateless MCP transport only (gateway fix), | ||
| without opening the dashboard token endpoint. | ||
| Security note (WORSTCASE): stateless MCP drops per-session isolation, and | ||
| serving the install token to a LAN host lets any allowlisted machine read | ||
| the brain. Keep the allowlist specific (never blanket ``*`` unless the | ||
| network is fully trusted) — see ``docs/distributed-deployment.md``. | ||
| """ | ||
| from __future__ import annotations | ||
| import ipaddress | ||
| import os | ||
| _TRUTHY = frozenset({"1", "true", "yes", "on"}) | ||
| def _is_truthy(value: str | None) -> bool: | ||
| return bool(value) and value.strip().lower() in _TRUTHY | ||
| def is_remote_mode() -> bool: | ||
| """True iff ``SLM_REMOTE`` opts this daemon into LAN/distributed mode.""" | ||
| return _is_truthy(os.environ.get("SLM_REMOTE")) | ||
| def mcp_stateless() -> bool: | ||
| """True iff the MCP transport should run stateless (no session id required). | ||
| Enabled by ``SLM_REMOTE=1`` (umbrella) or ``SLM_MCP_STATELESS=1`` (granular). | ||
| Stateless mode lets any gateway/hub forward ``tools/call`` without replaying | ||
| the ``Mcp-Session-Id`` handshake — the fix for issue #39 Issue 3. | ||
| """ | ||
| return is_remote_mode() or _is_truthy(os.environ.get("SLM_MCP_STATELESS")) | ||
| def _allowlist_entries() -> list[str]: | ||
| """Trusted-client allowlist, from ``SLM_MCP_ALLOWED_HOSTS``. | ||
| Reuses the existing LAN allowlist the user already sets for MCP DNS-rebinding | ||
| protection so there is ONE place to configure trusted hosts. Entries are | ||
| comma-separated and may be: ``*`` (any), an exact IP, a CIDR block | ||
| (``192.168.1.0/24``), or a prefix wildcard (``192.168.*``). A trailing | ||
| ``:port`` / ``:*`` (host-header style) is ignored for client-IP matching. | ||
| """ | ||
| raw = os.environ.get("SLM_MCP_ALLOWED_HOSTS", "").strip() | ||
| return [e.strip() for e in raw.split(",") if e.strip()] | ||
| def _strip_port(entry: str) -> str: | ||
| """Drop a trailing ``:port`` / ``:*`` host-header suffix. | ||
| Handles plain ``host[:port]`` and CIDR ``a.b.c.d/n[:port]`` (v3.6.12 lan-1: | ||
| a CIDR written with a host-header port suffix used to fail ip_network() and | ||
| silently deny ALL clients). Bracketless IPv6 literals (≥2 colons, no '/') | ||
| are left untouched. | ||
| """ | ||
| e = entry.strip() | ||
| if "/" in e: | ||
| # CIDR — strip anything after the network prefix (a stray :port/:*) | ||
| return e.partition(":")[0] | ||
| if e.count(":") == 1: # host:port or host:* (IPv4 / hostname) | ||
| return e.split(":", 1)[0] | ||
| return e | ||
| def _host_matches(entry: str, client_host: str, client_ip) -> bool: | ||
| host = _strip_port(entry).strip() | ||
| if not host: | ||
| return False | ||
| if host == "*": | ||
| return True | ||
| if "/" in host and client_ip is not None: | ||
| try: | ||
| return client_ip in ipaddress.ip_network(host, strict=False) | ||
| except ValueError: | ||
| return False | ||
| if host.endswith("*"): | ||
| # STRING prefix match (not CIDR). client_host is always the numeric | ||
| # socket peer IP (never a resolvable hostname), and a dotted prefix like | ||
| # "192.168." rejects "192.1680.x". Prefer CIDR (192.168.0.0/16) for | ||
| # unambiguous network matching; wildcards are a convenience. | ||
| return client_host.startswith(host[:-1]) | ||
| return host == client_host | ||
| def is_lan_client_allowed(client_host: str) -> bool: | ||
| """True iff remote mode is ON and ``client_host`` is in the trusted allowlist. | ||
| Loopback is handled separately by callers — this governs *non*-loopback LAN | ||
| clients only. Returns False whenever remote mode is off or the allowlist is | ||
| empty, so the default posture stays loopback-only. | ||
| """ | ||
| if not is_remote_mode() or not client_host: | ||
| return False | ||
| entries = _allowlist_entries() | ||
| if not entries: | ||
| return False | ||
| try: | ||
| client_ip = ipaddress.ip_address(client_host) | ||
| except ValueError: | ||
| client_ip = None | ||
| return any(_host_matches(e, client_host, client_ip) for e in entries) | ||
| def is_remote_origin_allowed(origin: str) -> bool: | ||
| """True iff remote mode is ON and ``origin``'s host is in the allowlist. | ||
| ``origin`` is a full URL (``http://192.168.50.144:8765``). Empty origin is | ||
| not this function's concern (loopback callers handle that). Used to relax | ||
| the dashboard CSRF origin guard for trusted LAN dashboards. | ||
| """ | ||
| if not is_remote_mode() or not origin: | ||
| return False | ||
| # Extract host from scheme://host[:port] | ||
| rest = origin.split("://", 1)[-1] | ||
| host = rest.split("/", 1)[0] | ||
| # Strip a trailing :port (IPv4/hostname); leave bracketed IPv6 alone. | ||
| if host.startswith("["): | ||
| host = host.split("]", 1)[0].lstrip("[") | ||
| elif host.count(":") == 1: | ||
| host = host.split(":", 1)[0] | ||
| return is_lan_client_allowed(host) | ||
| def _env_int(name: str, default: int) -> int: | ||
| """Read a positive int from env, falling back to ``default`` on any error.""" | ||
| raw = os.environ.get(name, "").strip() | ||
| if not raw: | ||
| return default | ||
| try: | ||
| val = int(raw) | ||
| except ValueError: | ||
| return default | ||
| return val if val > 0 else default | ||
| def rate_limit_config() -> tuple[int, int, int]: | ||
| """(write_max, read_max, window_seconds) for the dashboard rate limiter. | ||
| Issue #40 Issue 3: the limiter was hardcoded (30 writes / 120 reads per 60s) | ||
| with no way to raise it for distributed/LAN debugging, so a remote browser | ||
| that retried a failing Brain load hit ``429 Too Many Requests``. These are | ||
| now tunable via ``SLM_RATE_LIMIT_WRITE`` / ``SLM_RATE_LIMIT_READ`` / | ||
| ``SLM_RATE_LIMIT_WINDOW`` (defaults unchanged for the local case). | ||
| """ | ||
| write_max = _env_int("SLM_RATE_LIMIT_WRITE", 30) | ||
| read_max = _env_int("SLM_RATE_LIMIT_READ", 120) | ||
| window = _env_int("SLM_RATE_LIMIT_WINDOW", 60) | ||
| return write_max, read_max, window | ||
| def is_rate_limit_exempt(client_host: str) -> bool: | ||
| """True iff ``client_host`` should bypass the dashboard rate limiter. | ||
| Loopback is always exempt (the dashboard polls itself rapidly). In remote | ||
| mode, an allowlisted LAN client is the user's own remote browser doing the | ||
| same rapid reads, so it is exempt too — otherwise normal dashboard polling | ||
| trips the limiter (issue #40 Issue 3). | ||
| """ | ||
| if client_host in ("127.0.0.1", "::1", "localhost"): | ||
| return True | ||
| return is_lan_client_allowed(client_host) | ||
| __all__ = ( | ||
| "is_remote_mode", | ||
| "mcp_stateless", | ||
| "is_lan_client_allowed", | ||
| "is_remote_origin_allowed", | ||
| "rate_limit_config", | ||
| "is_rate_limit_exempt", | ||
| ) |
+1
-1
| { | ||
| "name": "superlocalmemory", | ||
| "version": "3.6.11", | ||
| "version": "3.6.12", | ||
| "description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.", | ||
@@ -5,0 +5,0 @@ "keywords": [ |
+1
-1
| [project] | ||
| name = "superlocalmemory" | ||
| version = "3.6.11" | ||
| version = "3.6.12" | ||
| description = "Information-geometric agent memory with mathematical guarantees" | ||
@@ -5,0 +5,0 @@ readme = "README.md" |
+2
-0
@@ -38,2 +38,4 @@ <p align="center"> | ||
| > | ||
| > **v3.6.12 "Distributed-ready":** Run SLM on a server and reach it across your LAN. `SLM_REMOTE=1` (default off) lets the dashboard load from a remote browser, lets MCP gateways/hubs forward tool calls, and makes custom local LLM endpoints (llama.cpp / LM Studio / Azure) configurable right from the dashboard — plus a batch of stability and security fixes. See [`docs/distributed-deployment.md`](docs/distributed-deployment.md). | ||
| > | ||
| > **v3.6.11 "Optimize Everywhere":** Three surfaces. **Proxy** (Surface A) — full-turn cache + compress on transport; needs `ANTHROPIC_BASE_URL`, shrinks the context window. **MCP tools** (Surface B) — `slm_compress`, `slm_retrieve`, `slm_cache_set`, `slm_cache_get`, `slm_optimize_stats`; no proxy, no window shrink, works on any Claude subscription. **Skill** (Surface C) — `slm-optimize` installs in `~/.claude/skills/`; zero-config auto-compress for large tool outputs and CLAUDE.md. No proxy, full 1M window. [See Three Surfaces →](#three-surfaces-proxy--mcp-tools--skill) | ||
@@ -40,0 +42,0 @@ > |
| Metadata-Version: 2.4 | ||
| Name: superlocalmemory | ||
| Version: 3.6.11 | ||
| Version: 3.6.12 | ||
| Summary: Information-geometric agent memory with mathematical guarantees | ||
@@ -131,2 +131,4 @@ Author-email: Varun Pratap Bhardwaj <admin@superlocalmemory.com> | ||
| > | ||
| > **v3.6.12 "Distributed-ready":** Run SLM on a server and reach it across your LAN. `SLM_REMOTE=1` (default off) lets the dashboard load from a remote browser, lets MCP gateways/hubs forward tool calls, and makes custom local LLM endpoints (llama.cpp / LM Studio / Azure) configurable right from the dashboard — plus a batch of stability and security fixes. See [`docs/distributed-deployment.md`](docs/distributed-deployment.md). | ||
| > | ||
| > **v3.6.11 "Optimize Everywhere":** Three surfaces. **Proxy** (Surface A) — full-turn cache + compress on transport; needs `ANTHROPIC_BASE_URL`, shrinks the context window. **MCP tools** (Surface B) — `slm_compress`, `slm_retrieve`, `slm_cache_set`, `slm_cache_get`, `slm_optimize_stats`; no proxy, no window shrink, works on any Claude subscription. **Skill** (Surface C) — `slm-optimize` installs in `~/.claude/skills/`; zero-config auto-compress for large tool outputs and CLAUDE.md. No proxy, full 1M window. [See Three Surfaces →](#three-surfaces-proxy--mcp-tools--skill) | ||
@@ -133,0 +135,0 @@ > |
@@ -117,2 +117,3 @@ AUTHORS.md | ||
| src/superlocalmemory/core/registry.py | ||
| src/superlocalmemory/core/remote_mode.py | ||
| src/superlocalmemory/core/reranker_worker.py | ||
@@ -119,0 +120,0 @@ src/superlocalmemory/core/safe_fs.py |
@@ -411,408 +411,1 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar | ||
| logger.warning("Some SLM workers still alive after %ds timeout", timeout) | ||
| # --------------------------------------------------------------------------- | ||
| # Server: HTTP request handler with engine singleton | ||
| # --------------------------------------------------------------------------- | ||
| _engine = None | ||
| _last_activity = time.monotonic() | ||
| # --------------------------------------------------------------------------- | ||
| # V3.3.28: Observation debounce buffer. | ||
| # | ||
| # When 20+ file edits arrive in quick succession (from parallel AI agents, | ||
| # git checkout, or batch sed), we buffer observations for _OBSERVE_DEBOUNCE_SEC | ||
| # seconds and deduplicate by content hash. This reduces 20 observations → 1-3 | ||
| # batches, each processed by the singleton engine (1 embedding worker). | ||
| # --------------------------------------------------------------------------- | ||
| _OBSERVE_DEBOUNCE_SEC = float(os.environ.get("SLM_OBSERVE_DEBOUNCE_SEC", "3.0")) | ||
| _observe_buffer: list[str] = [] | ||
| _observe_seen: set[str] = set() # content hashes for dedup within window | ||
| _observe_lock = threading.Lock() | ||
| _observe_timer: threading.Timer | None = None | ||
| def _flush_observe_buffer() -> None: | ||
| """Process all buffered observations as a single batch.""" | ||
| global _observe_timer | ||
| with _observe_lock: | ||
| if not _observe_buffer: | ||
| return | ||
| batch = list(_observe_buffer) | ||
| _observe_buffer.clear() | ||
| _observe_seen.clear() | ||
| _observe_timer = None | ||
| # Process each unique observation (already deduped) | ||
| engine = _get_engine() | ||
| from superlocalmemory.hooks.auto_capture import AutoCapture | ||
| auto = AutoCapture(engine=engine) | ||
| for content in batch: | ||
| try: | ||
| decision = auto.evaluate(content) | ||
| if decision.capture: | ||
| auto.capture(content, category=decision.category) | ||
| except Exception as exc: | ||
| # Swallow per-observation to protect the batch, but log so | ||
| # a pattern of dropped observations is visible. | ||
| logger.warning("observation dropped during batch: %s", exc) | ||
| logger.info("Observe debounce: processed %d observations (from buffer)", len(batch)) | ||
| def _enqueue_observation(content: str) -> dict: | ||
| """Add an observation to the debounce buffer. Returns immediate response.""" | ||
| global _observe_timer | ||
| import hashlib | ||
| content_hash = hashlib.md5(content.encode()).hexdigest() | ||
| with _observe_lock: | ||
| if content_hash in _observe_seen: | ||
| return {"captured": False, "reason": "duplicate within debounce window"} | ||
| _observe_seen.add(content_hash) | ||
| _observe_buffer.append(content) | ||
| buf_size = len(_observe_buffer) | ||
| # Reset debounce timer | ||
| if _observe_timer is not None: | ||
| _observe_timer.cancel() | ||
| _observe_timer = threading.Timer(_OBSERVE_DEBOUNCE_SEC, _flush_observe_buffer) | ||
| _observe_timer.daemon = True | ||
| _observe_timer.start() | ||
| return {"captured": True, "queued": True, "buffer_size": buf_size, | ||
| "debounce_sec": _OBSERVE_DEBOUNCE_SEC} | ||
| def _get_engine(): | ||
| global _engine | ||
| if _engine is None: | ||
| from superlocalmemory.core.config import SLMConfig | ||
| from superlocalmemory.core.engine import MemoryEngine | ||
| config = SLMConfig.load() | ||
| _engine = MemoryEngine(config) | ||
| _engine.initialize() | ||
| # Force reranker warmup (blocking — daemon can afford to wait) | ||
| retrieval_eng = getattr(_engine, '_retrieval_engine', None) | ||
| if retrieval_eng: | ||
| reranker = getattr(retrieval_eng, '_reranker', None) | ||
| if reranker and hasattr(reranker, 'warmup_sync'): | ||
| reranker.warmup_sync(timeout=120) | ||
| logger.info("Daemon engine initialized and warm") | ||
| return _engine | ||
| class DaemonHandler(BaseHTTPRequestHandler): | ||
| """Lightweight HTTP handler for daemon requests.""" | ||
| def log_message(self, format, *args): | ||
| """Suppress default access logging.""" | ||
| pass | ||
| def _send_json(self, status: int, data: dict) -> None: | ||
| self.send_response(status) | ||
| self.send_header("Content-Type", "application/json") | ||
| self.end_headers() | ||
| self.wfile.write(json.dumps(data).encode()) | ||
| def _read_body(self) -> dict: | ||
| length = int(self.headers.get("Content-Length", 0)) | ||
| if length == 0: | ||
| return {} | ||
| return json.loads(self.rfile.read(length).decode()) | ||
| def do_GET(self) -> None: | ||
| global _last_activity | ||
| _last_activity = time.monotonic() | ||
| if self.path == "/health": | ||
| self._send_json(200, {"status": "ok", "pid": os.getpid()}) | ||
| return | ||
| if self.path.startswith("/recall"): | ||
| try: | ||
| # Parse query from URL params | ||
| from urllib.parse import urlparse, parse_qs | ||
| params = parse_qs(urlparse(self.path).query) | ||
| query = params.get("q", [""])[0] | ||
| limit = int(params.get("limit", ["20"])[0]) | ||
| # S9-DASH-02: session_id for outcome-queue enqueue. | ||
| # Priority: ?session_id= query arg > X-SLM-Session-Id | ||
| # header > synthetic "cli:<ts>". Without any of these | ||
| # the recall still works — it just doesn't produce a | ||
| # pending_outcome (hook-based signals can't match). | ||
| session_id = params.get("session_id", [""])[0] | ||
| if not session_id: | ||
| session_id = self.headers.get("X-SLM-Session-Id", "") | ||
| if not session_id: | ||
| import time as _t | ||
| session_id = f"http:{int(_t.time() * 1000)}" | ||
| engine = _get_engine() | ||
| raw_fast = params.get("fast", ["false"])[0] | ||
| fast = raw_fast.lower() in ("true", "1") | ||
| response = engine.recall( | ||
| query, limit=limit, session_id=session_id, fast=fast, | ||
| ) | ||
| # Return the same field shape as recall_worker._handle_recall, | ||
| # so MCP processes that proxy through the daemon get recall_trace- | ||
| # compatible data without a second round trip. | ||
| memory_ids = list({ | ||
| r.fact.memory_id for r in response.results[:limit] | ||
| if r.fact.memory_id | ||
| }) | ||
| memory_map = ( | ||
| engine._db.get_memory_content_batch(memory_ids) | ||
| if memory_ids else {} | ||
| ) | ||
| results = [] | ||
| for r in response.results[:limit]: | ||
| fact_type = getattr(r.fact, "fact_type", None) | ||
| lifecycle = getattr(r.fact, "lifecycle", None) | ||
| # v3.5.1: sanitize control chars that break JSON (newlines, tabs in content). | ||
| clean = r.fact.content.replace("\r", " ").replace("\n", " ").replace("\t", " ") | ||
| sc_raw = memory_map.get(r.fact.memory_id, "") | ||
| sc_clean = sc_raw.replace("\r", " ").replace("\n", " ").replace("\t", " ") if sc_raw else "" | ||
| results.append({ | ||
| "fact_id": r.fact.fact_id, | ||
| "memory_id": r.fact.memory_id, | ||
| "content": clean, | ||
| "source_content": sc_clean, | ||
| "score": round(r.score, 4), | ||
| "confidence": round(r.confidence, 4), | ||
| "trust_score": round(r.trust_score, 4), | ||
| "channel_scores": { | ||
| k: round(v, 4) | ||
| for k, v in (r.channel_scores or {}).items() | ||
| }, | ||
| "fact_type": fact_type.value | ||
| if fact_type and hasattr(fact_type, "value") else "", | ||
| "lifecycle": lifecycle.value | ||
| if lifecycle and hasattr(lifecycle, "value") else "", | ||
| "access_count": getattr(r.fact, "access_count", 0), | ||
| "evidence_chain": list( | ||
| getattr(r, "evidence_chain", []) or [] | ||
| ), | ||
| }) | ||
| self._send_json(200, { | ||
| "ok": True, | ||
| "query": query, | ||
| "query_type": response.query_type, | ||
| "result_count": len(results), | ||
| "retrieval_time_ms": round(response.retrieval_time_ms, 1), | ||
| "channel_weights": { | ||
| k: round(v, 3) | ||
| for k, v in (response.channel_weights or {}).items() | ||
| }, | ||
| "total_candidates": getattr(response, "total_candidates", 0), | ||
| "results": results, | ||
| "count": len(results), # backward compat alias | ||
| }) | ||
| except Exception as exc: | ||
| self._send_json(500, {"error": str(exc)}) | ||
| return | ||
| if self.path == "/list": | ||
| try: | ||
| engine = _get_engine() | ||
| facts = engine.list_facts(limit=50) | ||
| items = [ | ||
| {"content": f.content[:100], "fact_type": getattr(f.fact_type, 'value', str(f.fact_type)), | ||
| "created_at": (f.created_at or "")[:19], "fact_id": f.fact_id} | ||
| for f in facts | ||
| ] | ||
| self._send_json(200, {"results": items, "count": len(items)}) | ||
| except Exception as exc: | ||
| self._send_json(500, {"error": str(exc)}) | ||
| return | ||
| if self.path == "/status": | ||
| engine = _get_engine() | ||
| uptime = time.monotonic() - _server_start_time | ||
| self._send_json(200, { | ||
| "status": "running", "pid": os.getpid(), | ||
| "uptime_s": round(uptime), | ||
| "mode": engine._config.mode.value, | ||
| "fact_count": engine.fact_count, | ||
| "idle_s": round(time.monotonic() - _last_activity), | ||
| }) | ||
| return | ||
| self._send_json(404, {"error": "not found"}) | ||
| def do_POST(self) -> None: | ||
| global _last_activity | ||
| _last_activity = time.monotonic() | ||
| if self.path == "/remember": | ||
| try: | ||
| body = self._read_body() | ||
| content = body.get("content", "") | ||
| tags = body.get("tags", "") | ||
| extra_meta = body.get("metadata") or {} | ||
| if not content: | ||
| self._send_json(400, {"error": "content required"}) | ||
| return | ||
| engine = _get_engine() | ||
| metadata = {"tags": tags} if tags else {} | ||
| if isinstance(extra_meta, dict): | ||
| metadata.update(extra_meta) | ||
| fact_ids = engine.store(content, metadata=metadata) | ||
| self._send_json(200, { | ||
| "ok": True, | ||
| "fact_ids": fact_ids, | ||
| "count": len(fact_ids), | ||
| }) | ||
| except Exception as exc: | ||
| self._send_json(500, {"error": str(exc)}) | ||
| return | ||
| if self.path == "/observe": | ||
| try: | ||
| body = self._read_body() | ||
| content = body.get("content", "") | ||
| if not content: | ||
| self._send_json(400, {"error": "content required"}) | ||
| return | ||
| # V3.3.28: Debounced observation processing. | ||
| # Buffers observations for 3s, deduplicates, processes as batch. | ||
| # Returns immediately — the actual capture happens asynchronously | ||
| # via the debounce timer, using the singleton engine. | ||
| result = _enqueue_observation(content) | ||
| self._send_json(200, result) | ||
| except Exception as exc: | ||
| self._send_json(500, {"error": str(exc)}) | ||
| return | ||
| if self.path == "/stop": | ||
| self._send_json(200, {"status": "stopping"}) | ||
| Thread(target=_shutdown_server, daemon=True).start() | ||
| return | ||
| self._send_json(404, {"error": "not found"}) | ||
| # --------------------------------------------------------------------------- | ||
| # Server lifecycle | ||
| # --------------------------------------------------------------------------- | ||
| _server: HTTPServer | None = None | ||
| _server_start_time = time.monotonic() | ||
| def _shutdown_server() -> None: | ||
| global _engine, _server | ||
| try: | ||
| _flush_observe_buffer() | ||
| except Exception as exc: | ||
| logger.warning("flush observe buffer on shutdown failed: %s", exc) | ||
| time.sleep(0.5) | ||
| if _engine is not None: | ||
| try: | ||
| _engine.close() | ||
| except Exception as exc: | ||
| logger.warning("engine close on shutdown failed: %s", exc) | ||
| _engine = None | ||
| if _server is not None: | ||
| _server.shutdown() | ||
| _PID_FILE.unlink(missing_ok=True) | ||
| _PORT_FILE.unlink(missing_ok=True) | ||
| def _idle_watchdog(timeout: int) -> None: | ||
| """Auto-shutdown after idle timeout.""" | ||
| global _last_activity | ||
| while True: | ||
| time.sleep(30) | ||
| idle = time.monotonic() - _last_activity | ||
| if idle > timeout: | ||
| logger.info("Daemon idle for %ds, shutting down", int(idle)) | ||
| _shutdown_server() | ||
| os._exit(0) | ||
| def start_server(port: int = _DEFAULT_PORT, idle_timeout: int | None = None) -> None: | ||
| """Start the daemon HTTP server. Blocks until stopped.""" | ||
| global _server, _server_start_time, _last_activity | ||
| idle_timeout = idle_timeout or int(os.environ.get( | ||
| "SLM_DAEMON_IDLE_TIMEOUT", str(_DEFAULT_IDLE_TIMEOUT), | ||
| )) | ||
| # Banner is advisory — a broken data dir must never prevent the daemon | ||
| # from starting, so the swallow here is intentional. | ||
| try: | ||
| from superlocalmemory import __version__ as _slm_ver | ||
| from superlocalmemory.cli.version_banner import check_and_emit_upgrade_banner | ||
| check_and_emit_upgrade_banner(_slm_ver) | ||
| except Exception as exc: | ||
| logger.warning("upgrade banner on daemon start failed: %s", exc) | ||
| # Apply the v3.4.26 data-dir migration now — the daemon is the | ||
| # authoritative holder of the DB, so this is the right place to do | ||
| # it unconditionally (``migrate`` is idempotent). | ||
| try: | ||
| from pathlib import Path as _P | ||
| from superlocalmemory.migrations.v3_4_25_to_v3_4_26 import ( | ||
| is_ready as _is_ready, migrate as _migrate, | ||
| ) | ||
| _data = _P(os.environ.get("SLM_DATA_DIR") | ||
| or _P.home() / ".superlocalmemory") | ||
| if not _is_ready(_data): | ||
| _migrate(_data) | ||
| except Exception as exc: | ||
| logger.warning("v3.4.26 migration on daemon start failed: %s", exc) | ||
| # Write PID + port files | ||
| _PID_FILE.parent.mkdir(parents=True, exist_ok=True) | ||
| _PID_FILE.write_text(str(os.getpid())) | ||
| _PORT_FILE.write_text(str(port)) | ||
| # Handle SIGTERM for graceful shutdown | ||
| signal.signal(signal.SIGTERM, lambda *_: _shutdown_server() or os._exit(0)) | ||
| # Pre-warm engine (this is the cold start — daemon absorbs it once) | ||
| logger.info("Daemon starting — warming engine...") | ||
| _get_engine() | ||
| logger.info("Engine warm. Daemon ready on port %d (idle timeout: %ds)", port, idle_timeout) | ||
| _server_start_time = time.monotonic() | ||
| _last_activity = time.monotonic() | ||
| # Start idle watchdog | ||
| Thread(target=_idle_watchdog, args=(idle_timeout,), daemon=True, name="idle-watchdog").start() | ||
| # Start HTTP server | ||
| # SO_REUSEADDR must be set on the class BEFORE __init__ calls bind() | ||
| HTTPServer.allow_reuse_address = True | ||
| _server = HTTPServer(("127.0.0.1", port), DaemonHandler) | ||
| try: | ||
| _server.serve_forever() | ||
| except KeyboardInterrupt: | ||
| pass | ||
| finally: | ||
| _shutdown_server() | ||
| # --------------------------------------------------------------------------- | ||
| # CLI entry point | ||
| # --------------------------------------------------------------------------- | ||
| if __name__ == "__main__": | ||
| logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") | ||
| if "--start" in sys.argv: | ||
| start_server() | ||
| elif "--stop" in sys.argv: | ||
| stop_daemon() | ||
| else: | ||
| print("Usage: python -m superlocalmemory.cli.daemon --start|--stop") |
@@ -186,3 +186,5 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar | ||
| recall_p = sub.add_parser("recall", help="Semantic search with 4-channel retrieval") | ||
| # v3.6.12 (parity-3): `search` is an alias of `recall` so the CLI has the | ||
| # same search verb the MCP exposes (handlers dict maps both to cmd_recall). | ||
| recall_p = sub.add_parser("recall", aliases=["search"], help="Semantic search with 4-channel retrieval") | ||
| recall_p.add_argument("query", help="Search query") | ||
@@ -189,0 +191,0 @@ recall_p.add_argument("--limit", type=int, default=10, help="Max results (default 10)") |
@@ -208,3 +208,6 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar | ||
| """ | ||
| content = redact_secrets(entry.content)[:MAX_CONTENT_CHARS] | ||
| # v3.6.12 (redact-1): scrub dashboard/cached content at HIGH aggression | ||
| # so Bearer/GitHub-PAT/Anthropic/OpenAI/GENERIC_KEY patterns are caught | ||
| # (the default 'normal' skipped them, leaking those shapes to the UI). | ||
| content = redact_secrets(entry.content, aggression="high")[:MAX_CONTENT_CHARS] | ||
| fact_ids_json = json.dumps(list(entry.fact_ids)) | ||
@@ -211,0 +214,0 @@ byte_size = ( |
@@ -370,3 +370,6 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar | ||
| model = getattr(config.llm, 'model', model) or model | ||
| timeout = getattr(config.llm, 'timeout', timeout) or timeout | ||
| # v3.6.12 (modeb-4): the LLMConfig field is `timeout_seconds`, not | ||
| # `timeout` — the old read always missed and silently used 30s. | ||
| timeout = getattr(config.llm, 'timeout_seconds', None) or \ | ||
| getattr(config.llm, 'timeout', None) or timeout | ||
@@ -373,0 +376,0 @@ fact_texts = "\n".join(f"- {f['content']}" for f in facts[:_MAX_CLUSTER_SIZE]) |
@@ -127,4 +127,7 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar | ||
| model = getattr(self._config.llm, 'model', None) or "llama3.1:8b" | ||
| # v3.6.12 (modeb-2): honor the configured endpoint instead of hardcoding | ||
| # localhost:11434, so a remote/non-default Ollama host works in Mode B. | ||
| _base = (getattr(self._config.llm, 'api_base', '') or "http://localhost:11434").rstrip("/") | ||
| with httpx.Client(timeout=httpx.Timeout(30.0)) as client: | ||
| resp = client.post("http://localhost:11434/api/generate", json={ | ||
| resp = client.post(f"{_base}/api/generate", json={ | ||
| "model": model, | ||
@@ -131,0 +134,0 @@ "prompt": prompt, |
@@ -141,3 +141,9 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar | ||
| return True | ||
| return bool(self._api_key) | ||
| # v3.6.12 (modeb-1): a custom local OpenAI-compatible endpoint | ||
| # (llama.cpp, LM Studio, vLLM) needs NO API key — _build_openai already | ||
| # omits the Authorization header when the key is empty. Treat a | ||
| # configured base_url as sufficient, otherwise Mode B silently falls | ||
| # back to Mode A extraction for keyless local endpoints. | ||
| _base = getattr(self, "_base_url", "") or getattr(self, "_api_base", "") | ||
| return bool(self._api_key) or bool(_base) | ||
@@ -144,0 +150,0 @@ @property |
@@ -18,4 +18,8 @@ """Per-HTTP-request agent ID resolution — ContextVar home. | ||
| # v3.6.12 (parity-1): default is "" (the "no agent routed" sentinel), NOT the | ||
| # user-visible "mcp_client". Sanitized agent ids are [A-Za-z0-9._-], so "" can | ||
| # never collide — a client that explicitly routes to /mcp/mcp_client is now | ||
| # distinguishable from a bare /mcp/ request with no agent segment. | ||
| _current_agent_id: contextvars.ContextVar[str] = contextvars.ContextVar( | ||
| "slm_agent_id", default="mcp_client" | ||
| "slm_agent_id", default="" | ||
| ) | ||
@@ -46,4 +50,4 @@ | ||
| ctx_id = _current_agent_id.get() | ||
| if ctx_id != "mcp_client": | ||
| return ctx_id | ||
| if ctx_id: | ||
| return ctx_id # an explicitly-routed agent id (incl. "mcp_client") | ||
| if env_fallback: | ||
@@ -50,0 +54,0 @@ return os.environ.get("SLM_AGENT_ID", "mcp_client") |
@@ -312,3 +312,6 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar | ||
| pid = profile_id or engine.profile_id | ||
| facts = engine._db.get_all_facts(pid)[:limit] | ||
| # v3.6.12 (search-2): push the limit into the query — was loading the | ||
| # ENTIRE facts table (deserializing every 768-float embedding) just | ||
| # to return the top N. get_all_facts preserves created_at DESC order. | ||
| facts = engine._db.get_all_facts(pid, limit=limit) | ||
| items = [] | ||
@@ -405,2 +408,11 @@ for f in facts: | ||
| # v3.6.12 (search-3): recall/delete run in a separate worker | ||
| # subprocess that caches its engine (and profile_id) at init. Recycle | ||
| # it so the NEXT recall uses the new profile instead of the stale one. | ||
| try: | ||
| from superlocalmemory.core.worker_pool import WorkerPool | ||
| WorkerPool.shared().shutdown() | ||
| except Exception: | ||
| logger.debug("worker-pool recycle on profile switch skipped") | ||
| return { | ||
@@ -407,0 +419,0 @@ "success": True, |
@@ -79,3 +79,3 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar | ||
| """Register this session with the mesh broker if not already.""" | ||
| global _REGISTERED, _PROJECT_PATH | ||
| global _REGISTERED, _PROJECT_PATH, _PEER_ID | ||
| if _REGISTERED: | ||
@@ -93,2 +93,7 @@ return | ||
| if result: | ||
| # v3.6.12 (mesh-1): the broker mints its OWN peer_id (RegisterRequest has | ||
| # no peer_id field, so our body value is dropped by pydantic). Adopt the | ||
| # broker's id BEFORE starting the heartbeat, otherwise heartbeat/send/ | ||
| # inbox all target a non-existent peer → 404s and the session is reaped. | ||
| _PEER_ID = result.get("peer_id", _PEER_ID) | ||
| _REGISTERED = True | ||
@@ -196,3 +201,3 @@ _start_heartbeat() | ||
| ) | ||
| return result or {"error": "Failed to send message"} | ||
| return result or {"ok": False, "error": "Failed to send message"} | ||
@@ -213,4 +218,7 @@ @server.tool() | ||
| msg_list = (messages or {}).get("messages", []) | ||
| # Auto-mark unread messages as read | ||
| unread_ids = [m["id"] for m in msg_list if not m.get("read")] | ||
| # Auto-mark unread messages as read. v3.6.12 (failopen-2): use .get("id") | ||
| # — a malformed broker message without an "id" key used to raise KeyError | ||
| # out to the agent, violating the never-raise contract. | ||
| unread_ids = [m["id"] for m in msg_list | ||
| if not m.get("read") and m.get("id") is not None] | ||
| if unread_ids: | ||
@@ -246,3 +254,3 @@ await asyncio.to_thread( | ||
| ) | ||
| return result or {"error": "Failed to set state"} | ||
| return result or {"ok": False, "error": "Failed to set state"} | ||
@@ -274,3 +282,3 @@ if key: | ||
| ) | ||
| return result or {"error": "Lock operation failed"} | ||
| return result or {"ok": False, "error": "Lock operation failed"} | ||
@@ -277,0 +285,0 @@ @server.tool(annotations=ToolAnnotations(readOnlyHint=True)) |
@@ -284,6 +284,9 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar | ||
| # Direct messages to this peer | ||
| # v3.6.12 (mesh-3): only UNREAD direct messages — was returning read | ||
| # ones too, so every poll re-listed already-read messages until the | ||
| # 24h cleanup (broadcast/project already filter unread via mesh_reads). | ||
| direct = conn.execute( | ||
| "SELECT id, from_peer, to_peer, msg_type, content, read, created_at, " | ||
| "target_type, project_path FROM mesh_messages " | ||
| "WHERE to_peer=? AND target_type='peer' " | ||
| "WHERE to_peer=? AND target_type='peer' AND COALESCE(read, 0) = 0 " | ||
| "AND (expires_at IS NULL OR expires_at > ?) " | ||
@@ -415,6 +418,14 @@ "ORDER BY created_at DESC LIMIT 100", | ||
| elif action == "release": | ||
| conn.execute("DELETE FROM mesh_locks WHERE file_path=? AND locked_by=?", | ||
| (file_path, locked_by)) | ||
| # v3.6.12 (mesh-2): report whether we actually released. The | ||
| # DELETE is correctly owner-scoped, but it previously returned | ||
| # released=ok:true even when a NON-owner released nothing. | ||
| cur = conn.execute( | ||
| "DELETE FROM mesh_locks WHERE file_path=? AND locked_by=?", | ||
| (file_path, locked_by), | ||
| ) | ||
| conn.commit() | ||
| return {"ok": True, "action": "released"} | ||
| if cur.rowcount and cur.rowcount > 0: | ||
| return {"ok": True, "action": "released"} | ||
| return {"ok": False, "action": "not_released", | ||
| "error": "no lock held by this peer for that file"} | ||
@@ -421,0 +432,0 @@ elif action == "query": |
@@ -320,7 +320,12 @@ # compress/router.py | ||
| def _normalize_whitespace(text: str) -> str: | ||
| """Layer 1 lossless: collapse excess blank lines, strip trailing spaces per line.""" | ||
| """Layer 1 safe: collapse runs of 3+ blank lines to a single blank line. | ||
| v3.6.12 (normalize-1): no longer rstrips trailing spaces per line — that | ||
| is LOSSY for Markdown hard breaks (two trailing spaces) and padded string | ||
| literals, which broke the 'lossless/safe' guarantee that callers (incl. | ||
| slm_compress mode=normalize) rely on. Only collapsing excess blank lines | ||
| remains, which is semantically safe. | ||
| """ | ||
| import re | ||
| text = re.sub(r"\n{3,}", "\n\n", text) | ||
| lines = [line.rstrip() for line in text.split("\n")] | ||
| return "\n".join(lines) | ||
| return re.sub(r"\n{3,}", "\n\n", text) | ||
@@ -327,0 +332,0 @@ # ── Lazy loaders ───────────────────────────────────────────────────── |
@@ -17,3 +17,8 @@ """CacheDB — wraps DatabaseManager for llmcache.db operations. | ||
| - CCR original_blob is ALSO AES-256-GCM encrypted. | ||
| - Key derivation: PBKDF2-HMAC-SHA256(password=machine_id, salt=_per_db_salt, iter=100_000) | ||
| - Key storage: a single MACHINE-WIDE key file (~/.superlocalmemory/opt-key.bin, | ||
| 0o600) is generated once and reused for all cache DBs on the machine. (The | ||
| per-DB salt below is persisted for provenance but does NOT make the AES key | ||
| per-DB — a single install has one llmcache.db, so a machine-wide key is the | ||
| intended model. A tampered/rotated key now degrades to a cache MISS, not a | ||
| crash — see _decrypt fail-open, v3.6.12 cache-1.) | ||
| - Salt: os.urandom(32) generated ONCE at DB creation, stored in | ||
@@ -47,2 +52,3 @@ llmcache_schema_version.description='salt:<hex>'. NO hardcoded salt. | ||
| from cryptography.exceptions import InvalidTag | ||
| from cryptography.hazmat.primitives.ciphers.aead import AESGCM | ||
@@ -373,3 +379,11 @@ from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC | ||
| aesgcm = AESGCM(self._aes_key) | ||
| return aesgcm.decrypt(nonce, ciphertext, associated_data=None) | ||
| # v3.6.12 (cache-1): AES-GCM raises cryptography.exceptions.InvalidTag | ||
| # (NOT a ValueError subclass) on a tampered/wrong-key blob. Every caller | ||
| # catches ValueError to fail-open; convert InvalidTag -> ValueError here | ||
| # at the single chokepoint so a corrupt/rotated-key cache entry degrades | ||
| # to a miss instead of raising out of get()/get_value()/ccr_get(). | ||
| try: | ||
| return aesgcm.decrypt(nonce, ciphertext, associated_data=None) | ||
| except InvalidTag as exc: | ||
| raise ValueError(f"AES-GCM authentication failed: {exc}") from exc | ||
@@ -376,0 +390,0 @@ # ---- assertion ---- |
@@ -111,4 +111,10 @@ #!/usr/bin/env python3 | ||
| from superlocalmemory.infra.rate_limiter import RateLimiter | ||
| _write_limiter = RateLimiter(max_requests=30, window_seconds=60) | ||
| _read_limiter = RateLimiter(max_requests=120, window_seconds=60) | ||
| from superlocalmemory.core.remote_mode import ( | ||
| rate_limit_config, | ||
| is_rate_limit_exempt, | ||
| ) | ||
| # v3.6.12 (issue #40): env-tunable thresholds (defaults unchanged). | ||
| _rl_write, _rl_read, _rl_window = rate_limit_config() | ||
| _write_limiter = RateLimiter(max_requests=_rl_write, window_seconds=_rl_window) | ||
| _read_limiter = RateLimiter(max_requests=_rl_read, window_seconds=_rl_window) | ||
@@ -118,2 +124,4 @@ @application.middleware("http") | ||
| client_ip = request.client.host if request.client else "unknown" | ||
| if is_rate_limit_exempt(client_ip): | ||
| return await call_next(request) | ||
| is_write = request.method in ("POST", "PUT", "DELETE", "PATCH") | ||
@@ -127,3 +135,3 @@ limiter = _write_limiter if is_write else _read_limiter | ||
| content={"error": "Too many requests."}, | ||
| headers={"Retry-After": str(limiter.window_seconds)}, | ||
| headers={"Retry-After": str(limiter.window)}, | ||
| ) | ||
@@ -130,0 +138,0 @@ response = await call_next(request) |
@@ -80,2 +80,15 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar | ||
| raise HTTPException(503, detail="Mesh disabled in config") | ||
| # v3.6.12 (mesh-1 security): SLM_MESH_SHARED_SECRET was read by the broker but | ||
| # never verified on inbound mesh HTTP calls. When a secret is configured, | ||
| # require it (constant-time) from NON-loopback callers via X-Mesh-Secret. | ||
| # The local MCP client always calls over loopback and is exempt, so this is | ||
| # zero-change for single-machine use and closes the LAN mesh auth bypass. | ||
| secret = getattr(broker, "_shared_secret", None) | ||
| if secret: | ||
| client_host = request.client.host if request.client else "" | ||
| if client_host not in ("127.0.0.1", "::1", "localhost"): | ||
| import hmac | ||
| presented = request.headers.get("x-mesh-secret", "") | ||
| if not hmac.compare_digest(presented, secret): | ||
| raise HTTPException(401, detail="invalid or missing mesh secret") | ||
| return broker | ||
@@ -82,0 +95,0 @@ |
@@ -36,4 +36,7 @@ """GET /internal/token — serve install token to the local dashboard. | ||
| "http://127.0.0.1", | ||
| "https://127.0.0.1", | ||
| "http://localhost", | ||
| "https://localhost", | ||
| "http://[::1]", | ||
| "https://[::1]", | ||
| ) | ||
@@ -61,4 +64,13 @@ | ||
| # v3.6.12 (issue #39): in SLM_REMOTE mode, also serve the token to | ||
| # explicitly-allowlisted LAN clients so a remote-browser dashboard can load | ||
| # the Brain page. Default stays loopback-only — remote_mode helpers return | ||
| # False unless SLM_REMOTE=1 AND the client IP is in SLM_MCP_ALLOWED_HOSTS. | ||
| from superlocalmemory.core.remote_mode import ( | ||
| is_lan_client_allowed, | ||
| is_remote_origin_allowed, | ||
| ) | ||
| client_host = request.client.host if request.client else "" | ||
| if not is_loopback(client_host): | ||
| if not is_loopback(client_host) and not is_lan_client_allowed(client_host): | ||
| return JSONResponse({"error": "loopback only"}, status_code=403) | ||
@@ -68,3 +80,3 @@ | ||
| origin = headers.get("origin", "") | ||
| if not _origin_is_loopback(origin): | ||
| if not _origin_is_loopback(origin) and not is_remote_origin_allowed(origin): | ||
| return JSONResponse( | ||
@@ -71,0 +83,0 @@ {"error": "origin not allowed"}, status_code=403, |
@@ -85,4 +85,10 @@ #!/usr/bin/env python3 | ||
| from superlocalmemory.infra.rate_limiter import RateLimiter | ||
| _write_limiter = RateLimiter(max_requests=30, window_seconds=60) | ||
| _read_limiter = RateLimiter(max_requests=120, window_seconds=60) | ||
| from superlocalmemory.core.remote_mode import ( | ||
| rate_limit_config, | ||
| is_rate_limit_exempt, | ||
| ) | ||
| # v3.6.12 (issue #40): env-tunable thresholds (defaults unchanged). | ||
| _rl_write, _rl_read, _rl_window = rate_limit_config() | ||
| _write_limiter = RateLimiter(max_requests=_rl_write, window_seconds=_rl_window) | ||
| _read_limiter = RateLimiter(max_requests=_rl_read, window_seconds=_rl_window) | ||
@@ -92,2 +98,4 @@ @application.middleware("http") | ||
| client_ip = request.client.host if request.client else "unknown" | ||
| if is_rate_limit_exempt(client_ip): | ||
| return await call_next(request) | ||
| is_write = request.method in ("POST", "PUT", "DELETE", "PATCH") | ||
@@ -170,4 +178,7 @@ limiter = _write_limiter if is_write else _read_limiter | ||
| application.include_router(_mod.router) | ||
| except (ImportError, Exception): | ||
| pass | ||
| except (ImportError, Exception) as _exc: | ||
| # v3.6.12 (settings-3): was a silent `pass` — a transient import | ||
| # error in learning.py alone 404s 3 dashboard panes (Learning, | ||
| # Patterns, Feedback) with no trace. Log it like the chat loop above. | ||
| logger.warning("Optional router %s failed: %s", _module_name, _exc) | ||
@@ -174,0 +185,0 @@ # Wire WebSocket manager into routes that need broadcast capability |
@@ -593,2 +593,11 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar | ||
| """Full-text search via FTS5, joined to facts table for reconstruction.""" | ||
| # v3.6.12 (search-1): the raw query was passed straight into FTS5 MATCH, | ||
| # so any '?', '-', quote, or trailing boolean keyword (AND/OR/NOT) raised | ||
| # an FTS5 syntax error. Tokenize to word characters, quote each token, | ||
| # and OR-join — mirrors the recall BM25 channel's safe MATCH expression. | ||
| import re as _re | ||
| tokens = [t for t in _re.findall(r"\w+", query.lower()) if t] | ||
| if not tokens: | ||
| return [] | ||
| match_expr = " OR ".join(f'"{t}"' for t in tokens) | ||
| rows = self.execute( | ||
@@ -599,3 +608,3 @@ """SELECT f.* FROM atomic_facts_fts AS fts | ||
| ORDER BY fts.rank LIMIT ?""", | ||
| (query, profile_id, limit), | ||
| (match_expr, profile_id, limit), | ||
| ) | ||
@@ -602,0 +611,0 @@ return [self._row_to_fact(r) for r in rows] |
@@ -173,2 +173,16 @@ // SuperLocalMemory V3 — Auto-Capture/Recall Settings | ||
| // v3.6.12 (issue #39/#40): populate the endpoint field from the SAVED | ||
| // config endpoint. updateProviderUI() above sets the field to the | ||
| // provider's DEFAULT (e.g. https://api.openai.com/v1), which hides the | ||
| // user's real custom endpoint (llama.cpp/LM Studio) and makes Test | ||
| // Connection probe the wrong URL → 401. Override with data.endpoint here. | ||
| if (data.endpoint) { | ||
| setTimeout(function() { | ||
| var epEl = document.getElementById('settings-endpoint'); | ||
| if (epEl) epEl.value = data.endpoint; | ||
| var epRow = document.getElementById('settings-endpoint-row'); | ||
| if (epRow) epRow.style.display = 'block'; | ||
| }, 0); | ||
| } | ||
| // After provider UI updates, set the saved model value | ||
@@ -318,2 +332,7 @@ if (model) { | ||
| var apiKey = document.getElementById('settings-api-key')?.value || ''; | ||
| // v3.6.12 (issue #39): include the configured custom endpoint. Without this | ||
| // the backend never sees base_url, treats a custom llama.cpp/LM-Studio server | ||
| // as official OpenAI, and 401s on an empty key. Was the real cause of the | ||
| // "Test Connection fails / API key required" report against Mode B. | ||
| var endpoint = document.getElementById('settings-endpoint')?.value || ''; | ||
| var resultEl = document.getElementById('settings-test-result'); | ||
@@ -331,2 +350,3 @@ | ||
| if (apiKey) testBody.api_key = apiKey; | ||
| if (endpoint) { testBody.base_url = endpoint; testBody.endpoint = endpoint; } | ||
| var resp = await fetch('/api/v3/provider/test', { | ||
@@ -354,2 +374,5 @@ method: 'POST', | ||
| var apiKey = document.getElementById('settings-api-key')?.value || ''; | ||
| // v3.6.12 (settings-2): persist the custom endpoint too, else a llama.cpp/ | ||
| // LM-Studio/Azure endpoint can never be saved (backend reads base_url). | ||
| var endpoint = document.getElementById('settings-endpoint')?.value || ''; | ||
@@ -365,2 +388,3 @@ var statusEl = document.getElementById('settings-save-status'); | ||
| var payload = Object.assign({mode: mode, provider: provider, model: model, api_key: apiKey}, embParams); | ||
| if (endpoint) { payload.base_url = endpoint; payload.endpoint = endpoint; } | ||
| var modeResp = await fetch('/api/v3/mode/set', { | ||
@@ -367,0 +391,0 @@ method: 'POST', |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Deprecated
MaintenanceThe maintainer of the package marked it as deprecated. This could indicate that a single version should not be used, or that the package is no longer maintained and any new vulnerabilities will not be fixed.
Found 1 instance in 1 package
AI-detected potential code anomaly
Supply chain riskAI has identified unusual behaviors that may pose a security risk.
Found 6 instances in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
AI-detected potential code anomaly
Supply chain riskAI has identified unusual behaviors that may pose a security risk.
Found 5 instances in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
7845600
0.08%590
0.17%954
0.21%119475
-0.03%1
Infinity%49
2.08%