superlocalmemory - npm Package Compare versions

+197

src/superlocalmemory/core/remote_mode.py

		# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
		# Licensed under AGPL-3.0-or-later - see LICENSE file
		# Part of SuperLocalMemory V3 \| https://qualixar.com \| https://varunpratap.com

		"""Distributed / LAN deployment mode — the single ``SLM_REMOTE`` switch.

		SuperLocalMemory historically assumes every dashboard browser, MCP client,
		and API caller lives on ``127.0.0.1``. That assumption breaks three things
		for users who deploy SLM on a server and reach it across a LAN (issue #39):

		1. ``/internal/token`` refuses any non-loopback client → Brain page can't
		fetch the install token → "Couldn't load Brain".
		2. The MCP Streamable-HTTP transport is stateful — every call must
		replay the ``Mcp-Session-Id`` from the ``initialize`` handshake. A
		gateway/hub that forwards a tool call without replaying it gets
		``-32600 Session not found``.
		3. Dashboard CSRF origin checks only accept loopback origins.

		``SLM_REMOTE=1`` flips all three assumptions at once, default OFF so the
		loopback-only security posture is unchanged for the 99% local case. LAN
		access is still gated by an explicit IP allowlist (``SLM_MCP_ALLOWED_HOSTS``)
		— remote mode alone does not throw the doors open.

		Granular overrides (each implied by ``SLM_REMOTE=1`` but usable alone):
		* ``SLM_MCP_STATELESS=1`` — stateless MCP transport only (gateway fix),
		without opening the dashboard token endpoint.

		Security note (WORSTCASE): stateless MCP drops per-session isolation, and
		serving the install token to a LAN host lets any allowlisted machine read
		the brain. Keep the allowlist specific (never blanket ``*`` unless the
		network is fully trusted) — see ``docs/distributed-deployment.md``.
		"""

		from __future__ import annotations

		import ipaddress
		import os

		_TRUTHY = frozenset({"1", "true", "yes", "on"})


		def _is_truthy(value: str \| None) -> bool:
		return bool(value) and value.strip().lower() in _TRUTHY


		def is_remote_mode() -> bool:
		"""True iff ``SLM_REMOTE`` opts this daemon into LAN/distributed mode."""
		return _is_truthy(os.environ.get("SLM_REMOTE"))


		def mcp_stateless() -> bool:
		"""True iff the MCP transport should run stateless (no session id required).

		Enabled by ``SLM_REMOTE=1`` (umbrella) or ``SLM_MCP_STATELESS=1`` (granular).
		Stateless mode lets any gateway/hub forward ``tools/call`` without replaying
		the ``Mcp-Session-Id`` handshake — the fix for issue #39 Issue 3.
		"""
		return is_remote_mode() or _is_truthy(os.environ.get("SLM_MCP_STATELESS"))


		def _allowlist_entries() -> list[str]:
		"""Trusted-client allowlist, from ``SLM_MCP_ALLOWED_HOSTS``.

		Reuses the existing LAN allowlist the user already sets for MCP DNS-rebinding
		protection so there is ONE place to configure trusted hosts. Entries are
		comma-separated and may be: ``*`` (any), an exact IP, a CIDR block
		(``192.168.1.0/24``), or a prefix wildcard (``192.168.*``). A trailing
		``:port`` / ``:*`` (host-header style) is ignored for client-IP matching.
		"""
		raw = os.environ.get("SLM_MCP_ALLOWED_HOSTS", "").strip()
		return [e.strip() for e in raw.split(",") if e.strip()]


		def _strip_port(entry: str) -> str:
		"""Drop a trailing ``:port`` / ``:*`` host-header suffix.

		Handles plain ``host[:port]`` and CIDR ``a.b.c.d/n[:port]`` (v3.6.12 lan-1:
		a CIDR written with a host-header port suffix used to fail ip_network() and
		silently deny ALL clients). Bracketless IPv6 literals (≥2 colons, no '/')
		are left untouched.
		"""
		e = entry.strip()
		if "/" in e:
		# CIDR — strip anything after the network prefix (a stray :port/:*)
		return e.partition(":")[0]
		if e.count(":") == 1: # host:port or host:* (IPv4 / hostname)
		return e.split(":", 1)[0]
		return e


		def _host_matches(entry: str, client_host: str, client_ip) -> bool:
		host = _strip_port(entry).strip()
		if not host:
		return False
		if host == "*":
		return True
		if "/" in host and client_ip is not None:
		try:
		return client_ip in ipaddress.ip_network(host, strict=False)
		except ValueError:
		return False
		if host.endswith("*"):
		# STRING prefix match (not CIDR). client_host is always the numeric
		# socket peer IP (never a resolvable hostname), and a dotted prefix like
		# "192.168." rejects "192.1680.x". Prefer CIDR (192.168.0.0/16) for
		# unambiguous network matching; wildcards are a convenience.
		return client_host.startswith(host[:-1])
		return host == client_host


		def is_lan_client_allowed(client_host: str) -> bool:
		"""True iff remote mode is ON and ``client_host`` is in the trusted allowlist.

		Loopback is handled separately by callers — this governs non-loopback LAN
		clients only. Returns False whenever remote mode is off or the allowlist is
		empty, so the default posture stays loopback-only.
		"""
		if not is_remote_mode() or not client_host:
		return False
		entries = _allowlist_entries()
		if not entries:
		return False
		try:
		client_ip = ipaddress.ip_address(client_host)
		except ValueError:
		client_ip = None
		return any(_host_matches(e, client_host, client_ip) for e in entries)


		def is_remote_origin_allowed(origin: str) -> bool:
		"""True iff remote mode is ON and ``origin``'s host is in the allowlist.

		``origin`` is a full URL (``http://192.168.50.144:8765``). Empty origin is
		not this function's concern (loopback callers handle that). Used to relax
		the dashboard CSRF origin guard for trusted LAN dashboards.
		"""
		if not is_remote_mode() or not origin:
		return False
		# Extract host from scheme://host[:port]
		rest = origin.split("://", 1)[-1]
		host = rest.split("/", 1)[0]
		# Strip a trailing :port (IPv4/hostname); leave bracketed IPv6 alone.
		if host.startswith("["):
		host = host.split("]", 1)[0].lstrip("[")
		elif host.count(":") == 1:
		host = host.split(":", 1)[0]
		return is_lan_client_allowed(host)


		def _env_int(name: str, default: int) -> int:
		"""Read a positive int from env, falling back to ``default`` on any error."""
		raw = os.environ.get(name, "").strip()
		if not raw:
		return default
		try:
		val = int(raw)
		except ValueError:
		return default
		return val if val > 0 else default


		def rate_limit_config() -> tuple[int, int, int]:
		"""(write_max, read_max, window_seconds) for the dashboard rate limiter.

		Issue #40 Issue 3: the limiter was hardcoded (30 writes / 120 reads per 60s)
		with no way to raise it for distributed/LAN debugging, so a remote browser
		that retried a failing Brain load hit ``429 Too Many Requests``. These are
		now tunable via ``SLM_RATE_LIMIT_WRITE`` / ``SLM_RATE_LIMIT_READ`` /
		``SLM_RATE_LIMIT_WINDOW`` (defaults unchanged for the local case).
		"""
		write_max = _env_int("SLM_RATE_LIMIT_WRITE", 30)
		read_max = _env_int("SLM_RATE_LIMIT_READ", 120)
		window = _env_int("SLM_RATE_LIMIT_WINDOW", 60)
		return write_max, read_max, window


		def is_rate_limit_exempt(client_host: str) -> bool:
		"""True iff ``client_host`` should bypass the dashboard rate limiter.

		Loopback is always exempt (the dashboard polls itself rapidly). In remote
		mode, an allowlisted LAN client is the user's own remote browser doing the
		same rapid reads, so it is exempt too — otherwise normal dashboard polling
		trips the limiter (issue #40 Issue 3).
		"""
		if client_host in ("127.0.0.1", "::1", "localhost"):
		return True
		return is_lan_client_allowed(client_host)


		__all__ = (
		"is_remote_mode",
		"mcp_stateless",
		"is_lan_client_allowed",
		"is_remote_origin_allowed",
		"rate_limit_config",
		"is_rate_limit_exempt",
		)

+1

-1

package.json

		{
		"name": "superlocalmemory",
		"version": "3.6.11",
		"version": "3.6.12",
		"description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
		@@ -5,0 +5,0 @@ "keywords": [

+1

-1

pyproject.toml

		[project]
		name = "superlocalmemory"
		version = "3.6.11"
		version = "3.6.12"
		description = "Information-geometric agent memory with mathematical guarantees"
		@@ -5,0 +5,0 @@ readme = "README.md"

+2

-0

README.md

		@@ -38,2 +38,4 @@ <p align="center">
		>
		> v3.6.12 "Distributed-ready": Run SLM on a server and reach it across your LAN. `SLM_REMOTE=1` (default off) lets the dashboard load from a remote browser, lets MCP gateways/hubs forward tool calls, and makes custom local LLM endpoints (llama.cpp / LM Studio / Azure) configurable right from the dashboard — plus a batch of stability and security fixes. See [`docs/distributed-deployment.md`](docs/distributed-deployment.md).
		>
		> v3.6.11 "Optimize Everywhere": Three surfaces. Proxy (Surface A) — full-turn cache + compress on transport; needs `ANTHROPIC_BASE_URL`, shrinks the context window. MCP tools (Surface B) — `slm_compress`, `slm_retrieve`, `slm_cache_set`, `slm_cache_get`, `slm_optimize_stats`; no proxy, no window shrink, works on any Claude subscription. Skill (Surface C) — `slm-optimize` installs in `~/.claude/skills/`; zero-config auto-compress for large tool outputs and CLAUDE.md. No proxy, full 1M window. [See Three Surfaces →](#three-surfaces-proxy--mcp-tools--skill)
		@@ -40,0 +42,0 @@ >

+3

-1

src/superlocalmemory.egg-info/PKG-INFO

		Metadata-Version: 2.4
		Name: superlocalmemory
		Version: 3.6.11
		Version: 3.6.12
		Summary: Information-geometric agent memory with mathematical guarantees
		@@ -131,2 +131,4 @@ Author-email: Varun Pratap Bhardwaj <admin@superlocalmemory.com>
		>
		> v3.6.12 "Distributed-ready": Run SLM on a server and reach it across your LAN. `SLM_REMOTE=1` (default off) lets the dashboard load from a remote browser, lets MCP gateways/hubs forward tool calls, and makes custom local LLM endpoints (llama.cpp / LM Studio / Azure) configurable right from the dashboard — plus a batch of stability and security fixes. See [`docs/distributed-deployment.md`](docs/distributed-deployment.md).
		>
		> v3.6.11 "Optimize Everywhere": Three surfaces. Proxy (Surface A) — full-turn cache + compress on transport; needs `ANTHROPIC_BASE_URL`, shrinks the context window. MCP tools (Surface B) — `slm_compress`, `slm_retrieve`, `slm_cache_set`, `slm_cache_get`, `slm_optimize_stats`; no proxy, no window shrink, works on any Claude subscription. Skill (Surface C) — `slm-optimize` installs in `~/.claude/skills/`; zero-config auto-compress for large tool outputs and CLAUDE.md. No proxy, full 1M window. [See Three Surfaces →](#three-surfaces-proxy--mcp-tools--skill)
		@@ -133,0 +135,0 @@ >

+1

-0

src/superlocalmemory.egg-info/SOURCES.txt

		@@ -117,2 +117,3 @@ AUTHORS.md
		src/superlocalmemory/core/registry.py
		src/superlocalmemory/core/remote_mode.py
		src/superlocalmemory/core/reranker_worker.py
		@@ -119,0 +120,0 @@ src/superlocalmemory/core/safe_fs.py

+0

-407

src/superlocalmemory/cli/daemon.py

		@@ -411,408 +411,1 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
		logger.warning("Some SLM workers still alive after %ds timeout", timeout)


		# ---------------------------------------------------------------------------
		# Server: HTTP request handler with engine singleton
		# ---------------------------------------------------------------------------

		_engine = None
		_last_activity = time.monotonic()

		# ---------------------------------------------------------------------------
		# V3.3.28: Observation debounce buffer.
		#
		# When 20+ file edits arrive in quick succession (from parallel AI agents,
		# git checkout, or batch sed), we buffer observations for _OBSERVE_DEBOUNCE_SEC
		# seconds and deduplicate by content hash. This reduces 20 observations → 1-3
		# batches, each processed by the singleton engine (1 embedding worker).
		# ---------------------------------------------------------------------------

		_OBSERVE_DEBOUNCE_SEC = float(os.environ.get("SLM_OBSERVE_DEBOUNCE_SEC", "3.0"))
		_observe_buffer: list[str] = []
		_observe_seen: set[str] = set() # content hashes for dedup within window
		_observe_lock = threading.Lock()
		_observe_timer: threading.Timer \| None = None


		def _flush_observe_buffer() -> None:
		"""Process all buffered observations as a single batch."""
		global _observe_timer
		with _observe_lock:
		if not _observe_buffer:
		return
		batch = list(_observe_buffer)
		_observe_buffer.clear()
		_observe_seen.clear()
		_observe_timer = None

		# Process each unique observation (already deduped)
		engine = _get_engine()
		from superlocalmemory.hooks.auto_capture import AutoCapture
		auto = AutoCapture(engine=engine)

		for content in batch:
		try:
		decision = auto.evaluate(content)
		if decision.capture:
		auto.capture(content, category=decision.category)
		except Exception as exc:
		# Swallow per-observation to protect the batch, but log so
		# a pattern of dropped observations is visible.
		logger.warning("observation dropped during batch: %s", exc)

		logger.info("Observe debounce: processed %d observations (from buffer)", len(batch))


		def _enqueue_observation(content: str) -> dict:
		"""Add an observation to the debounce buffer. Returns immediate response."""
		global _observe_timer
		import hashlib
		content_hash = hashlib.md5(content.encode()).hexdigest()

		with _observe_lock:
		if content_hash in _observe_seen:
		return {"captured": False, "reason": "duplicate within debounce window"}

		_observe_seen.add(content_hash)
		_observe_buffer.append(content)
		buf_size = len(_observe_buffer)

		# Reset debounce timer
		if _observe_timer is not None:
		_observe_timer.cancel()
		_observe_timer = threading.Timer(_OBSERVE_DEBOUNCE_SEC, _flush_observe_buffer)
		_observe_timer.daemon = True
		_observe_timer.start()

		return {"captured": True, "queued": True, "buffer_size": buf_size,
		"debounce_sec": _OBSERVE_DEBOUNCE_SEC}


		def _get_engine():
		global _engine
		if _engine is None:
		from superlocalmemory.core.config import SLMConfig
		from superlocalmemory.core.engine import MemoryEngine

		config = SLMConfig.load()
		_engine = MemoryEngine(config)
		_engine.initialize()

		# Force reranker warmup (blocking — daemon can afford to wait)
		retrieval_eng = getattr(_engine, '_retrieval_engine', None)
		if retrieval_eng:
		reranker = getattr(retrieval_eng, '_reranker', None)
		if reranker and hasattr(reranker, 'warmup_sync'):
		reranker.warmup_sync(timeout=120)

		logger.info("Daemon engine initialized and warm")
		return _engine


		class DaemonHandler(BaseHTTPRequestHandler):
		"""Lightweight HTTP handler for daemon requests."""

		def log_message(self, format, *args):
		"""Suppress default access logging."""
		pass

		def _send_json(self, status: int, data: dict) -> None:
		self.send_response(status)
		self.send_header("Content-Type", "application/json")
		self.end_headers()
		self.wfile.write(json.dumps(data).encode())

		def _read_body(self) -> dict:
		length = int(self.headers.get("Content-Length", 0))
		if length == 0:
		return {}
		return json.loads(self.rfile.read(length).decode())

		def do_GET(self) -> None:
		global _last_activity
		_last_activity = time.monotonic()

		if self.path == "/health":
		self._send_json(200, {"status": "ok", "pid": os.getpid()})
		return

		if self.path.startswith("/recall"):
		try:
		# Parse query from URL params
		from urllib.parse import urlparse, parse_qs
		params = parse_qs(urlparse(self.path).query)
		query = params.get("q", [""])[0]
		limit = int(params.get("limit", ["20"])[0])

		# S9-DASH-02: session_id for outcome-queue enqueue.
		# Priority: ?session_id= query arg > X-SLM-Session-Id
		# header > synthetic "cli:<ts>". Without any of these
		# the recall still works — it just doesn't produce a
		# pending_outcome (hook-based signals can't match).
		session_id = params.get("session_id", [""])[0]
		if not session_id:
		session_id = self.headers.get("X-SLM-Session-Id", "")
		if not session_id:
		import time as _t
		session_id = f"http:{int(_t.time() * 1000)}"

		engine = _get_engine()
		raw_fast = params.get("fast", ["false"])[0]
		fast = raw_fast.lower() in ("true", "1")
		response = engine.recall(
		query, limit=limit, session_id=session_id, fast=fast,
		)
		# Return the same field shape as recall_worker._handle_recall,
		# so MCP processes that proxy through the daemon get recall_trace-
		# compatible data without a second round trip.
		memory_ids = list({
		r.fact.memory_id for r in response.results[:limit]
		if r.fact.memory_id
		})
		memory_map = (
		engine._db.get_memory_content_batch(memory_ids)
		if memory_ids else {}
		)
		results = []
		for r in response.results[:limit]:
		fact_type = getattr(r.fact, "fact_type", None)
		lifecycle = getattr(r.fact, "lifecycle", None)
		# v3.5.1: sanitize control chars that break JSON (newlines, tabs in content).
		clean = r.fact.content.replace("\r", " ").replace("\n", " ").replace("\t", " ")
		sc_raw = memory_map.get(r.fact.memory_id, "")
		sc_clean = sc_raw.replace("\r", " ").replace("\n", " ").replace("\t", " ") if sc_raw else ""
		results.append({
		"fact_id": r.fact.fact_id,
		"memory_id": r.fact.memory_id,
		"content": clean,
		"source_content": sc_clean,
		"score": round(r.score, 4),
		"confidence": round(r.confidence, 4),
		"trust_score": round(r.trust_score, 4),
		"channel_scores": {
		k: round(v, 4)
		for k, v in (r.channel_scores or {}).items()
		},
		"fact_type": fact_type.value
		if fact_type and hasattr(fact_type, "value") else "",
		"lifecycle": lifecycle.value
		if lifecycle and hasattr(lifecycle, "value") else "",
		"access_count": getattr(r.fact, "access_count", 0),
		"evidence_chain": list(
		getattr(r, "evidence_chain", []) or []
		),
		})
		self._send_json(200, {
		"ok": True,
		"query": query,
		"query_type": response.query_type,
		"result_count": len(results),
		"retrieval_time_ms": round(response.retrieval_time_ms, 1),
		"channel_weights": {
		k: round(v, 3)
		for k, v in (response.channel_weights or {}).items()
		},
		"total_candidates": getattr(response, "total_candidates", 0),
		"results": results,
		"count": len(results), # backward compat alias
		})
		except Exception as exc:
		self._send_json(500, {"error": str(exc)})
		return

		if self.path == "/list":
		try:
		engine = _get_engine()
		facts = engine.list_facts(limit=50)
		items = [
		{"content": f.content[:100], "fact_type": getattr(f.fact_type, 'value', str(f.fact_type)),
		"created_at": (f.created_at or "")[:19], "fact_id": f.fact_id}
		for f in facts
		]
		self._send_json(200, {"results": items, "count": len(items)})
		except Exception as exc:
		self._send_json(500, {"error": str(exc)})
		return

		if self.path == "/status":
		engine = _get_engine()
		uptime = time.monotonic() - _server_start_time
		self._send_json(200, {
		"status": "running", "pid": os.getpid(),
		"uptime_s": round(uptime),
		"mode": engine._config.mode.value,
		"fact_count": engine.fact_count,
		"idle_s": round(time.monotonic() - _last_activity),
		})
		return

		self._send_json(404, {"error": "not found"})

		def do_POST(self) -> None:
		global _last_activity
		_last_activity = time.monotonic()

		if self.path == "/remember":
		try:
		body = self._read_body()
		content = body.get("content", "")
		tags = body.get("tags", "")
		extra_meta = body.get("metadata") or {}
		if not content:
		self._send_json(400, {"error": "content required"})
		return

		engine = _get_engine()
		metadata = {"tags": tags} if tags else {}
		if isinstance(extra_meta, dict):
		metadata.update(extra_meta)
		fact_ids = engine.store(content, metadata=metadata)
		self._send_json(200, {
		"ok": True,
		"fact_ids": fact_ids,
		"count": len(fact_ids),
		})
		except Exception as exc:
		self._send_json(500, {"error": str(exc)})
		return

		if self.path == "/observe":
		try:
		body = self._read_body()
		content = body.get("content", "")
		if not content:
		self._send_json(400, {"error": "content required"})
		return

		# V3.3.28: Debounced observation processing.
		# Buffers observations for 3s, deduplicates, processes as batch.
		# Returns immediately — the actual capture happens asynchronously
		# via the debounce timer, using the singleton engine.
		result = _enqueue_observation(content)
		self._send_json(200, result)
		except Exception as exc:
		self._send_json(500, {"error": str(exc)})
		return

		if self.path == "/stop":
		self._send_json(200, {"status": "stopping"})
		Thread(target=_shutdown_server, daemon=True).start()
		return

		self._send_json(404, {"error": "not found"})


		# ---------------------------------------------------------------------------
		# Server lifecycle
		# ---------------------------------------------------------------------------

		_server: HTTPServer \| None = None
		_server_start_time = time.monotonic()


		def _shutdown_server() -> None:
		global _engine, _server
		try:
		_flush_observe_buffer()
		except Exception as exc:
		logger.warning("flush observe buffer on shutdown failed: %s", exc)
		time.sleep(0.5)
		if _engine is not None:
		try:
		_engine.close()
		except Exception as exc:
		logger.warning("engine close on shutdown failed: %s", exc)
		_engine = None
		if _server is not None:
		_server.shutdown()
		_PID_FILE.unlink(missing_ok=True)
		_PORT_FILE.unlink(missing_ok=True)


		def _idle_watchdog(timeout: int) -> None:
		"""Auto-shutdown after idle timeout."""
		global _last_activity
		while True:
		time.sleep(30)
		idle = time.monotonic() - _last_activity
		if idle > timeout:
		logger.info("Daemon idle for %ds, shutting down", int(idle))
		_shutdown_server()
		os._exit(0)


		def start_server(port: int = _DEFAULT_PORT, idle_timeout: int \| None = None) -> None:
		"""Start the daemon HTTP server. Blocks until stopped."""
		global _server, _server_start_time, _last_activity

		idle_timeout = idle_timeout or int(os.environ.get(
		"SLM_DAEMON_IDLE_TIMEOUT", str(_DEFAULT_IDLE_TIMEOUT),
		))

		# Banner is advisory — a broken data dir must never prevent the daemon
		# from starting, so the swallow here is intentional.
		try:
		from superlocalmemory import __version__ as _slm_ver
		from superlocalmemory.cli.version_banner import check_and_emit_upgrade_banner
		check_and_emit_upgrade_banner(_slm_ver)
		except Exception as exc:
		logger.warning("upgrade banner on daemon start failed: %s", exc)

		# Apply the v3.4.26 data-dir migration now — the daemon is the
		# authoritative holder of the DB, so this is the right place to do
		# it unconditionally (``migrate`` is idempotent).
		try:
		from pathlib import Path as _P
		from superlocalmemory.migrations.v3_4_25_to_v3_4_26 import (
		is_ready as _is_ready, migrate as _migrate,
		)
		_data = _P(os.environ.get("SLM_DATA_DIR")
		or _P.home() / ".superlocalmemory")
		if not _is_ready(_data):
		_migrate(_data)
		except Exception as exc:
		logger.warning("v3.4.26 migration on daemon start failed: %s", exc)

		# Write PID + port files
		_PID_FILE.parent.mkdir(parents=True, exist_ok=True)
		_PID_FILE.write_text(str(os.getpid()))
		_PORT_FILE.write_text(str(port))

		# Handle SIGTERM for graceful shutdown
		signal.signal(signal.SIGTERM, lambda *_: _shutdown_server() or os._exit(0))

		# Pre-warm engine (this is the cold start — daemon absorbs it once)
		logger.info("Daemon starting — warming engine...")
		_get_engine()
		logger.info("Engine warm. Daemon ready on port %d (idle timeout: %ds)", port, idle_timeout)

		_server_start_time = time.monotonic()
		_last_activity = time.monotonic()

		# Start idle watchdog
		Thread(target=_idle_watchdog, args=(idle_timeout,), daemon=True, name="idle-watchdog").start()

		# Start HTTP server
		# SO_REUSEADDR must be set on the class BEFORE __init__ calls bind()
		HTTPServer.allow_reuse_address = True
		_server = HTTPServer(("127.0.0.1", port), DaemonHandler)
		try:
		_server.serve_forever()
		except KeyboardInterrupt:
		pass
		finally:
		_shutdown_server()


		# ---------------------------------------------------------------------------
		# CLI entry point
		# ---------------------------------------------------------------------------

		if __name__ == "__main__":
		logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
		if "--start" in sys.argv:
		start_server()
		elif "--stop" in sys.argv:
		stop_daemon()
		else:
		print("Usage: python -m superlocalmemory.cli.daemon --start\|--stop")

+3

-1

src/superlocalmemory/cli/main.py

		@@ -186,3 +186,5 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar

		recall_p = sub.add_parser("recall", help="Semantic search with 4-channel retrieval")
		# v3.6.12 (parity-3): `search` is an alias of `recall` so the CLI has the
		# same search verb the MCP exposes (handlers dict maps both to cmd_recall).
		recall_p = sub.add_parser("recall", aliases=["search"], help="Semantic search with 4-channel retrieval")
		recall_p.add_argument("query", help="Search query")
		@@ -189,0 +191,0 @@ recall_p.add_argument("--limit", type=int, default=10, help="Max results (default 10)")

+4

-1

src/superlocalmemory/core/context_cache.py

		@@ -208,3 +208,6 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
		"""
		content = redact_secrets(entry.content)[:MAX_CONTENT_CHARS]
		# v3.6.12 (redact-1): scrub dashboard/cached content at HIGH aggression
		# so Bearer/GitHub-PAT/Anthropic/OpenAI/GENERIC_KEY patterns are caught
		# (the default 'normal' skipped them, leaking those shapes to the UI).
		content = redact_secrets(entry.content, aggression="high")[:MAX_CONTENT_CHARS]
		fact_ids_json = json.dumps(list(entry.fact_ids))
		@@ -211,0 +214,0 @@ byte_size = (

+4

-1

src/superlocalmemory/core/fact_consolidator.py

		@@ -370,3 +370,6 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
		model = getattr(config.llm, 'model', model) or model
		timeout = getattr(config.llm, 'timeout', timeout) or timeout
		# v3.6.12 (modeb-4): the LLMConfig field is `timeout_seconds`, not
		# `timeout` — the old read always missed and silently used 30s.
		timeout = getattr(config.llm, 'timeout_seconds', None) or \
		getattr(config.llm, 'timeout', None) or timeout

		@@ -373,0 +376,0 @@ fact_texts = "\n".join(f"- {f['content']}" for f in facts[:_MAX_CLUSTER_SIZE])

+4

-1

src/superlocalmemory/core/summarizer.py

		@@ -127,4 +127,7 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
		model = getattr(self._config.llm, 'model', None) or "llama3.1:8b"
		# v3.6.12 (modeb-2): honor the configured endpoint instead of hardcoding
		# localhost:11434, so a remote/non-default Ollama host works in Mode B.
		_base = (getattr(self._config.llm, 'api_base', '') or "http://localhost:11434").rstrip("/")
		with httpx.Client(timeout=httpx.Timeout(30.0)) as client:
		resp = client.post("http://localhost:11434/api/generate", json={
		resp = client.post(f"{_base}/api/generate", json={
		"model": model,
		@@ -131,0 +134,0 @@ "prompt": prompt,

+7

-1

src/superlocalmemory/llm/backbone.py

		@@ -141,3 +141,9 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
		return True
		return bool(self._api_key)
		# v3.6.12 (modeb-1): a custom local OpenAI-compatible endpoint
		# (llama.cpp, LM Studio, vLLM) needs NO API key — _build_openai already
		# omits the Authorization header when the key is empty. Treat a
		# configured base_url as sufficient, otherwise Mode B silently falls
		# back to Mode A extraction for keyless local endpoints.
		_base = getattr(self, "_base_url", "") or getattr(self, "_api_base", "")
		return bool(self._api_key) or bool(_base)

		@@ -144,0 +150,0 @@ @property

+7

-3

src/superlocalmemory/mcp/agent_context.py

		@@ -18,4 +18,8 @@ """Per-HTTP-request agent ID resolution — ContextVar home.

		# v3.6.12 (parity-1): default is "" (the "no agent routed" sentinel), NOT the
		# user-visible "mcp_client". Sanitized agent ids are [A-Za-z0-9._-], so "" can
		# never collide — a client that explicitly routes to /mcp/mcp_client is now
		# distinguishable from a bare /mcp/ request with no agent segment.
		_current_agent_id: contextvars.ContextVar[str] = contextvars.ContextVar(
		"slm_agent_id", default="mcp_client"
		"slm_agent_id", default=""
		)
		@@ -46,4 +50,4 @@
		ctx_id = _current_agent_id.get()
		if ctx_id != "mcp_client":
		return ctx_id
		if ctx_id:
		return ctx_id # an explicitly-routed agent id (incl. "mcp_client")
		if env_fallback:
		@@ -50,0 +54,0 @@ return os.environ.get("SLM_AGENT_ID", "mcp_client")

+13

-1

src/superlocalmemory/mcp/tools_core.py

		@@ -312,3 +312,6 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
		pid = profile_id or engine.profile_id
		facts = engine._db.get_all_facts(pid)[:limit]
		# v3.6.12 (search-2): push the limit into the query — was loading the
		# ENTIRE facts table (deserializing every 768-float embedding) just
		# to return the top N. get_all_facts preserves created_at DESC order.
		facts = engine._db.get_all_facts(pid, limit=limit)
		items = []
		@@ -405,2 +408,11 @@ for f in facts:

		# v3.6.12 (search-3): recall/delete run in a separate worker
		# subprocess that caches its engine (and profile_id) at init. Recycle
		# it so the NEXT recall uses the new profile instead of the stale one.
		try:
		from superlocalmemory.core.worker_pool import WorkerPool
		WorkerPool.shared().shutdown()
		except Exception:
		logger.debug("worker-pool recycle on profile switch skipped")

		return {
		@@ -407,0 +419,0 @@ "success": True,

+14

-6

src/superlocalmemory/mcp/tools_mesh.py

		@@ -79,3 +79,3 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
		"""Register this session with the mesh broker if not already."""
		global _REGISTERED, _PROJECT_PATH
		global _REGISTERED, _PROJECT_PATH, _PEER_ID
		if _REGISTERED:
		@@ -93,2 +93,7 @@ return
		if result:
		# v3.6.12 (mesh-1): the broker mints its OWN peer_id (RegisterRequest has
		# no peer_id field, so our body value is dropped by pydantic). Adopt the
		# broker's id BEFORE starting the heartbeat, otherwise heartbeat/send/
		# inbox all target a non-existent peer → 404s and the session is reaped.
		_PEER_ID = result.get("peer_id", _PEER_ID)
		_REGISTERED = True
		@@ -196,3 +201,3 @@ _start_heartbeat()
		)
		return result or {"error": "Failed to send message"}
		return result or {"ok": False, "error": "Failed to send message"}

		@@ -213,4 +218,7 @@ @server.tool()
		msg_list = (messages or {}).get("messages", [])
		# Auto-mark unread messages as read
		unread_ids = [m["id"] for m in msg_list if not m.get("read")]
		# Auto-mark unread messages as read. v3.6.12 (failopen-2): use .get("id")
		# — a malformed broker message without an "id" key used to raise KeyError
		# out to the agent, violating the never-raise contract.
		unread_ids = [m["id"] for m in msg_list
		if not m.get("read") and m.get("id") is not None]
		if unread_ids:
		@@ -246,3 +254,3 @@ await asyncio.to_thread(
		)
		return result or {"error": "Failed to set state"}
		return result or {"ok": False, "error": "Failed to set state"}

		@@ -274,3 +282,3 @@ if key:
		)
		return result or {"error": "Lock operation failed"}
		return result or {"ok": False, "error": "Lock operation failed"}

		@@ -277,0 +285,0 @@ @server.tool(annotations=ToolAnnotations(readOnlyHint=True))

+15

-4

src/superlocalmemory/mesh/broker.py

		@@ -284,6 +284,9 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
		# Direct messages to this peer
		# v3.6.12 (mesh-3): only UNREAD direct messages — was returning read
		# ones too, so every poll re-listed already-read messages until the
		# 24h cleanup (broadcast/project already filter unread via mesh_reads).
		direct = conn.execute(
		"SELECT id, from_peer, to_peer, msg_type, content, read, created_at, "
		"target_type, project_path FROM mesh_messages "
		"WHERE to_peer=? AND target_type='peer' "
		"WHERE to_peer=? AND target_type='peer' AND COALESCE(read, 0) = 0 "
		"AND (expires_at IS NULL OR expires_at > ?) "
		@@ -415,6 +418,14 @@ "ORDER BY created_at DESC LIMIT 100",
		elif action == "release":
		conn.execute("DELETE FROM mesh_locks WHERE file_path=? AND locked_by=?",
		(file_path, locked_by))
		# v3.6.12 (mesh-2): report whether we actually released. The
		# DELETE is correctly owner-scoped, but it previously returned
		# released=ok:true even when a NON-owner released nothing.
		cur = conn.execute(
		"DELETE FROM mesh_locks WHERE file_path=? AND locked_by=?",
		(file_path, locked_by),
		)
		conn.commit()
		return {"ok": True, "action": "released"}
		if cur.rowcount and cur.rowcount > 0:
		return {"ok": True, "action": "released"}
		return {"ok": False, "action": "not_released",
		"error": "no lock held by this peer for that file"}

		@@ -421,0 +432,0 @@ elif action == "query":

+9

-4

src/superlocalmemory/optimize/compress/router.py

		@@ -320,7 +320,12 @@ # compress/router.py
		def _normalize_whitespace(text: str) -> str:
		"""Layer 1 lossless: collapse excess blank lines, strip trailing spaces per line."""
		"""Layer 1 safe: collapse runs of 3+ blank lines to a single blank line.

		v3.6.12 (normalize-1): no longer rstrips trailing spaces per line — that
		is LOSSY for Markdown hard breaks (two trailing spaces) and padded string
		literals, which broke the 'lossless/safe' guarantee that callers (incl.
		slm_compress mode=normalize) rely on. Only collapsing excess blank lines
		remains, which is semantically safe.
		"""
		import re
		text = re.sub(r"\n{3,}", "\n\n", text)
		lines = [line.rstrip() for line in text.split("\n")]
		return "\n".join(lines)
		return re.sub(r"\n{3,}", "\n\n", text)

		@@ -327,0 +332,0 @@ # ── Lazy loaders ─────────────────────────────────────────────────────

+16

-2

src/superlocalmemory/optimize/storage/db.py

		@@ -17,3 +17,8 @@ """CacheDB — wraps DatabaseManager for llmcache.db operations.
		- CCR original_blob is ALSO AES-256-GCM encrypted.
		- Key derivation: PBKDF2-HMAC-SHA256(password=machine_id, salt=_per_db_salt, iter=100_000)
		- Key storage: a single MACHINE-WIDE key file (~/.superlocalmemory/opt-key.bin,
		0o600) is generated once and reused for all cache DBs on the machine. (The
		per-DB salt below is persisted for provenance but does NOT make the AES key
		per-DB — a single install has one llmcache.db, so a machine-wide key is the
		intended model. A tampered/rotated key now degrades to a cache MISS, not a
		crash — see _decrypt fail-open, v3.6.12 cache-1.)
		- Salt: os.urandom(32) generated ONCE at DB creation, stored in
		@@ -47,2 +52,3 @@ llmcache_schema_version.description='salt:<hex>'. NO hardcoded salt.

		from cryptography.exceptions import InvalidTag
		from cryptography.hazmat.primitives.ciphers.aead import AESGCM
		@@ -373,3 +379,11 @@ from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
		aesgcm = AESGCM(self._aes_key)
		return aesgcm.decrypt(nonce, ciphertext, associated_data=None)
		# v3.6.12 (cache-1): AES-GCM raises cryptography.exceptions.InvalidTag
		# (NOT a ValueError subclass) on a tampered/wrong-key blob. Every caller
		# catches ValueError to fail-open; convert InvalidTag -> ValueError here
		# at the single chokepoint so a corrupt/rotated-key cache entry degrades
		# to a miss instead of raising out of get()/get_value()/ccr_get().
		try:
		return aesgcm.decrypt(nonce, ciphertext, associated_data=None)
		except InvalidTag as exc:
		raise ValueError(f"AES-GCM authentication failed: {exc}") from exc

		@@ -376,0 +390,0 @@ # ---- assertion ----

+11

-3

src/superlocalmemory/server/api.py

		@@ -111,4 +111,10 @@ #!/usr/bin/env python3
		from superlocalmemory.infra.rate_limiter import RateLimiter
		_write_limiter = RateLimiter(max_requests=30, window_seconds=60)
		_read_limiter = RateLimiter(max_requests=120, window_seconds=60)
		from superlocalmemory.core.remote_mode import (
		rate_limit_config,
		is_rate_limit_exempt,
		)
		# v3.6.12 (issue #40): env-tunable thresholds (defaults unchanged).
		_rl_write, _rl_read, _rl_window = rate_limit_config()
		_write_limiter = RateLimiter(max_requests=_rl_write, window_seconds=_rl_window)
		_read_limiter = RateLimiter(max_requests=_rl_read, window_seconds=_rl_window)

		@@ -118,2 +124,4 @@ @application.middleware("http")
		client_ip = request.client.host if request.client else "unknown"
		if is_rate_limit_exempt(client_ip):
		return await call_next(request)
		is_write = request.method in ("POST", "PUT", "DELETE", "PATCH")
		@@ -127,3 +135,3 @@ limiter = _write_limiter if is_write else _read_limiter
		content={"error": "Too many requests."},
		headers={"Retry-After": str(limiter.window_seconds)},
		headers={"Retry-After": str(limiter.window)},
		)
		@@ -130,0 +138,0 @@ response = await call_next(request)

+13

-0

src/superlocalmemory/server/routes/mesh.py

		@@ -80,2 +80,15 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
		raise HTTPException(503, detail="Mesh disabled in config")
		# v3.6.12 (mesh-1 security): SLM_MESH_SHARED_SECRET was read by the broker but
		# never verified on inbound mesh HTTP calls. When a secret is configured,
		# require it (constant-time) from NON-loopback callers via X-Mesh-Secret.
		# The local MCP client always calls over loopback and is exempt, so this is
		# zero-change for single-machine use and closes the LAN mesh auth bypass.
		secret = getattr(broker, "_shared_secret", None)
		if secret:
		client_host = request.client.host if request.client else ""
		if client_host not in ("127.0.0.1", "::1", "localhost"):
		import hmac
		presented = request.headers.get("x-mesh-secret", "")
		if not hmac.compare_digest(presented, secret):
		raise HTTPException(401, detail="invalid or missing mesh secret")
		return broker
		@@ -82,0 +95,0 @@

+14

-2

src/superlocalmemory/server/routes/token.py

		@@ -36,4 +36,7 @@ """GET /internal/token — serve install token to the local dashboard.
		"http://127.0.0.1",
		"https://127.0.0.1",
		"http://localhost",
		"https://localhost",
		"http://[::1]",
		"https://[::1]",
		)
		@@ -61,4 +64,13 @@

		# v3.6.12 (issue #39): in SLM_REMOTE mode, also serve the token to
		# explicitly-allowlisted LAN clients so a remote-browser dashboard can load
		# the Brain page. Default stays loopback-only — remote_mode helpers return
		# False unless SLM_REMOTE=1 AND the client IP is in SLM_MCP_ALLOWED_HOSTS.
		from superlocalmemory.core.remote_mode import (
		is_lan_client_allowed,
		is_remote_origin_allowed,
		)

		client_host = request.client.host if request.client else ""
		if not is_loopback(client_host):
		if not is_loopback(client_host) and not is_lan_client_allowed(client_host):
		return JSONResponse({"error": "loopback only"}, status_code=403)
		@@ -68,3 +80,3 @@
		origin = headers.get("origin", "")
		if not _origin_is_loopback(origin):
		if not _origin_is_loopback(origin) and not is_remote_origin_allowed(origin):
		return JSONResponse(
		@@ -71,0 +83,0 @@ {"error": "origin not allowed"}, status_code=403,

+15

-4

src/superlocalmemory/server/ui.py

		@@ -85,4 +85,10 @@ #!/usr/bin/env python3
		from superlocalmemory.infra.rate_limiter import RateLimiter
		_write_limiter = RateLimiter(max_requests=30, window_seconds=60)
		_read_limiter = RateLimiter(max_requests=120, window_seconds=60)
		from superlocalmemory.core.remote_mode import (
		rate_limit_config,
		is_rate_limit_exempt,
		)
		# v3.6.12 (issue #40): env-tunable thresholds (defaults unchanged).
		_rl_write, _rl_read, _rl_window = rate_limit_config()
		_write_limiter = RateLimiter(max_requests=_rl_write, window_seconds=_rl_window)
		_read_limiter = RateLimiter(max_requests=_rl_read, window_seconds=_rl_window)

		@@ -92,2 +98,4 @@ @application.middleware("http")
		client_ip = request.client.host if request.client else "unknown"
		if is_rate_limit_exempt(client_ip):
		return await call_next(request)
		is_write = request.method in ("POST", "PUT", "DELETE", "PATCH")
		@@ -170,4 +178,7 @@ limiter = _write_limiter if is_write else _read_limiter
		application.include_router(_mod.router)
		except (ImportError, Exception):
		pass
		except (ImportError, Exception) as _exc:
		# v3.6.12 (settings-3): was a silent `pass` — a transient import
		# error in learning.py alone 404s 3 dashboard panes (Learning,
		# Patterns, Feedback) with no trace. Log it like the chat loop above.
		logger.warning("Optional router %s failed: %s", _module_name, _exc)

		@@ -174,0 +185,0 @@ # Wire WebSocket manager into routes that need broadcast capability

+10

-1

src/superlocalmemory/storage/database.py

		@@ -593,2 +593,11 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
		"""Full-text search via FTS5, joined to facts table for reconstruction."""
		# v3.6.12 (search-1): the raw query was passed straight into FTS5 MATCH,
		# so any '?', '-', quote, or trailing boolean keyword (AND/OR/NOT) raised
		# an FTS5 syntax error. Tokenize to word characters, quote each token,
		# and OR-join — mirrors the recall BM25 channel's safe MATCH expression.
		import re as _re
		tokens = [t for t in _re.findall(r"\w+", query.lower()) if t]
		if not tokens:
		return []
		match_expr = " OR ".join(f'"{t}"' for t in tokens)
		rows = self.execute(
		@@ -599,3 +608,3 @@ """SELECT f.* FROM atomic_facts_fts AS fts
		ORDER BY fts.rank LIMIT ?""",
		(query, profile_id, limit),
		(match_expr, profile_id, limit),
		)
		@@ -602,0 +611,0 @@ return [self._row_to_fact(r) for r in rows]

+24

-0

src/superlocalmemory/ui/js/auto-settings.js

		@@ -173,2 +173,16 @@ // SuperLocalMemory V3 — Auto-Capture/Recall Settings

		// v3.6.12 (issue #39/#40): populate the endpoint field from the SAVED
		// config endpoint. updateProviderUI() above sets the field to the
		// provider's DEFAULT (e.g. https://api.openai.com/v1), which hides the
		// user's real custom endpoint (llama.cpp/LM Studio) and makes Test
		// Connection probe the wrong URL → 401. Override with data.endpoint here.
		if (data.endpoint) {
		setTimeout(function() {
		var epEl = document.getElementById('settings-endpoint');
		if (epEl) epEl.value = data.endpoint;
		var epRow = document.getElementById('settings-endpoint-row');
		if (epRow) epRow.style.display = 'block';
		}, 0);
		}

		// After provider UI updates, set the saved model value
		@@ -318,2 +332,7 @@ if (model) {
		var apiKey = document.getElementById('settings-api-key')?.value \|\| '';
		// v3.6.12 (issue #39): include the configured custom endpoint. Without this
		// the backend never sees base_url, treats a custom llama.cpp/LM-Studio server
		// as official OpenAI, and 401s on an empty key. Was the real cause of the
		// "Test Connection fails / API key required" report against Mode B.
		var endpoint = document.getElementById('settings-endpoint')?.value \|\| '';
		var resultEl = document.getElementById('settings-test-result');
		@@ -331,2 +350,3 @@
		if (apiKey) testBody.api_key = apiKey;
		if (endpoint) { testBody.base_url = endpoint; testBody.endpoint = endpoint; }
		var resp = await fetch('/api/v3/provider/test', {
		@@ -354,2 +374,5 @@ method: 'POST',
		var apiKey = document.getElementById('settings-api-key')?.value \|\| '';
		// v3.6.12 (settings-2): persist the custom endpoint too, else a llama.cpp/
		// LM-Studio/Azure endpoint can never be saved (backend reads base_url).
		var endpoint = document.getElementById('settings-endpoint')?.value \|\| '';

		@@ -365,2 +388,3 @@ var statusEl = document.getElementById('settings-save-status');
		var payload = Object.assign({mode: mode, provider: provider, model: model, api_key: apiKey}, embParams);
		if (endpoint) { payload.base_url = endpoint; payload.endpoint = endpoint; }
		var modeResp = await fetch('/api/v3/mode/set', {
		@@ -367,0 +391,0 @@ method: 'POST',

CHANGELOG.md

Sorry, the diff of this file is too big to display

src/superlocalmemory/cli/commands.py

Sorry, the diff of this file is too big to display

src/superlocalmemory/server/routes/v3_api.py

Sorry, the diff of this file is too big to display

src/superlocalmemory/server/unified_daemon.py

Sorry, the diff of this file is too big to display

superlocalmemory - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics