Big News: Socket raises $60M Series C at a $1B valuation to secure software supply chains for AI-driven development.Announcement
Sign In

superlocalmemory

Package Overview
Dependencies
Maintainers
1
Versions
176
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

superlocalmemory - npm Package Compare versions

Comparing version
3.6.11
to
3.6.12
+197
src/superlocalmemory/core/remote_mode.py
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
# Licensed under AGPL-3.0-or-later - see LICENSE file
# Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
"""Distributed / LAN deployment mode — the single ``SLM_REMOTE`` switch.
SuperLocalMemory historically assumes every dashboard browser, MCP client,
and API caller lives on ``127.0.0.1``. That assumption breaks three things
for users who deploy SLM on a server and reach it across a LAN (issue #39):
1. ``/internal/token`` refuses any non-loopback client → Brain page can't
fetch the install token → "Couldn't load Brain".
2. The MCP Streamable-HTTP transport is **stateful** — every call must
replay the ``Mcp-Session-Id`` from the ``initialize`` handshake. A
gateway/hub that forwards a tool call without replaying it gets
``-32600 Session not found``.
3. Dashboard CSRF origin checks only accept loopback origins.
``SLM_REMOTE=1`` flips all three assumptions at once, **default OFF** so the
loopback-only security posture is unchanged for the 99% local case. LAN
access is still gated by an explicit IP allowlist (``SLM_MCP_ALLOWED_HOSTS``)
— remote mode alone does not throw the doors open.
Granular overrides (each implied by ``SLM_REMOTE=1`` but usable alone):
* ``SLM_MCP_STATELESS=1`` — stateless MCP transport only (gateway fix),
without opening the dashboard token endpoint.
Security note (WORSTCASE): stateless MCP drops per-session isolation, and
serving the install token to a LAN host lets any allowlisted machine read
the brain. Keep the allowlist specific (never blanket ``*`` unless the
network is fully trusted) — see ``docs/distributed-deployment.md``.
"""
from __future__ import annotations
import ipaddress
import os
_TRUTHY = frozenset({"1", "true", "yes", "on"})
def _is_truthy(value: str | None) -> bool:
return bool(value) and value.strip().lower() in _TRUTHY
def is_remote_mode() -> bool:
"""True iff ``SLM_REMOTE`` opts this daemon into LAN/distributed mode."""
return _is_truthy(os.environ.get("SLM_REMOTE"))
def mcp_stateless() -> bool:
"""True iff the MCP transport should run stateless (no session id required).
Enabled by ``SLM_REMOTE=1`` (umbrella) or ``SLM_MCP_STATELESS=1`` (granular).
Stateless mode lets any gateway/hub forward ``tools/call`` without replaying
the ``Mcp-Session-Id`` handshake — the fix for issue #39 Issue 3.
"""
return is_remote_mode() or _is_truthy(os.environ.get("SLM_MCP_STATELESS"))
def _allowlist_entries() -> list[str]:
"""Trusted-client allowlist, from ``SLM_MCP_ALLOWED_HOSTS``.
Reuses the existing LAN allowlist the user already sets for MCP DNS-rebinding
protection so there is ONE place to configure trusted hosts. Entries are
comma-separated and may be: ``*`` (any), an exact IP, a CIDR block
(``192.168.1.0/24``), or a prefix wildcard (``192.168.*``). A trailing
``:port`` / ``:*`` (host-header style) is ignored for client-IP matching.
"""
raw = os.environ.get("SLM_MCP_ALLOWED_HOSTS", "").strip()
return [e.strip() for e in raw.split(",") if e.strip()]
def _strip_port(entry: str) -> str:
"""Drop a trailing ``:port`` / ``:*`` host-header suffix.
Handles plain ``host[:port]`` and CIDR ``a.b.c.d/n[:port]`` (v3.6.12 lan-1:
a CIDR written with a host-header port suffix used to fail ip_network() and
silently deny ALL clients). Bracketless IPv6 literals (≥2 colons, no '/')
are left untouched.
"""
e = entry.strip()
if "/" in e:
# CIDR — strip anything after the network prefix (a stray :port/:*)
return e.partition(":")[0]
if e.count(":") == 1: # host:port or host:* (IPv4 / hostname)
return e.split(":", 1)[0]
return e
def _host_matches(entry: str, client_host: str, client_ip) -> bool:
host = _strip_port(entry).strip()
if not host:
return False
if host == "*":
return True
if "/" in host and client_ip is not None:
try:
return client_ip in ipaddress.ip_network(host, strict=False)
except ValueError:
return False
if host.endswith("*"):
# STRING prefix match (not CIDR). client_host is always the numeric
# socket peer IP (never a resolvable hostname), and a dotted prefix like
# "192.168." rejects "192.1680.x". Prefer CIDR (192.168.0.0/16) for
# unambiguous network matching; wildcards are a convenience.
return client_host.startswith(host[:-1])
return host == client_host
def is_lan_client_allowed(client_host: str) -> bool:
"""True iff remote mode is ON and ``client_host`` is in the trusted allowlist.
Loopback is handled separately by callers — this governs *non*-loopback LAN
clients only. Returns False whenever remote mode is off or the allowlist is
empty, so the default posture stays loopback-only.
"""
if not is_remote_mode() or not client_host:
return False
entries = _allowlist_entries()
if not entries:
return False
try:
client_ip = ipaddress.ip_address(client_host)
except ValueError:
client_ip = None
return any(_host_matches(e, client_host, client_ip) for e in entries)
def is_remote_origin_allowed(origin: str) -> bool:
"""True iff remote mode is ON and ``origin``'s host is in the allowlist.
``origin`` is a full URL (``http://192.168.50.144:8765``). Empty origin is
not this function's concern (loopback callers handle that). Used to relax
the dashboard CSRF origin guard for trusted LAN dashboards.
"""
if not is_remote_mode() or not origin:
return False
# Extract host from scheme://host[:port]
rest = origin.split("://", 1)[-1]
host = rest.split("/", 1)[0]
# Strip a trailing :port (IPv4/hostname); leave bracketed IPv6 alone.
if host.startswith("["):
host = host.split("]", 1)[0].lstrip("[")
elif host.count(":") == 1:
host = host.split(":", 1)[0]
return is_lan_client_allowed(host)
def _env_int(name: str, default: int) -> int:
"""Read a positive int from env, falling back to ``default`` on any error."""
raw = os.environ.get(name, "").strip()
if not raw:
return default
try:
val = int(raw)
except ValueError:
return default
return val if val > 0 else default
def rate_limit_config() -> tuple[int, int, int]:
"""(write_max, read_max, window_seconds) for the dashboard rate limiter.
Issue #40 Issue 3: the limiter was hardcoded (30 writes / 120 reads per 60s)
with no way to raise it for distributed/LAN debugging, so a remote browser
that retried a failing Brain load hit ``429 Too Many Requests``. These are
now tunable via ``SLM_RATE_LIMIT_WRITE`` / ``SLM_RATE_LIMIT_READ`` /
``SLM_RATE_LIMIT_WINDOW`` (defaults unchanged for the local case).
"""
write_max = _env_int("SLM_RATE_LIMIT_WRITE", 30)
read_max = _env_int("SLM_RATE_LIMIT_READ", 120)
window = _env_int("SLM_RATE_LIMIT_WINDOW", 60)
return write_max, read_max, window
def is_rate_limit_exempt(client_host: str) -> bool:
"""True iff ``client_host`` should bypass the dashboard rate limiter.
Loopback is always exempt (the dashboard polls itself rapidly). In remote
mode, an allowlisted LAN client is the user's own remote browser doing the
same rapid reads, so it is exempt too — otherwise normal dashboard polling
trips the limiter (issue #40 Issue 3).
"""
if client_host in ("127.0.0.1", "::1", "localhost"):
return True
return is_lan_client_allowed(client_host)
__all__ = (
"is_remote_mode",
"mcp_stateless",
"is_lan_client_allowed",
"is_remote_origin_allowed",
"rate_limit_config",
"is_rate_limit_exempt",
)
+1
-1
{
"name": "superlocalmemory",
"version": "3.6.11",
"version": "3.6.12",
"description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",

@@ -5,0 +5,0 @@ "keywords": [

[project]
name = "superlocalmemory"
version = "3.6.11"
version = "3.6.12"
description = "Information-geometric agent memory with mathematical guarantees"

@@ -5,0 +5,0 @@ readme = "README.md"

@@ -38,2 +38,4 @@ <p align="center">

>
> **v3.6.12 "Distributed-ready":** Run SLM on a server and reach it across your LAN. `SLM_REMOTE=1` (default off) lets the dashboard load from a remote browser, lets MCP gateways/hubs forward tool calls, and makes custom local LLM endpoints (llama.cpp / LM Studio / Azure) configurable right from the dashboard — plus a batch of stability and security fixes. See [`docs/distributed-deployment.md`](docs/distributed-deployment.md).
>
> **v3.6.11 "Optimize Everywhere":** Three surfaces. **Proxy** (Surface A) — full-turn cache + compress on transport; needs `ANTHROPIC_BASE_URL`, shrinks the context window. **MCP tools** (Surface B) — `slm_compress`, `slm_retrieve`, `slm_cache_set`, `slm_cache_get`, `slm_optimize_stats`; no proxy, no window shrink, works on any Claude subscription. **Skill** (Surface C) — `slm-optimize` installs in `~/.claude/skills/`; zero-config auto-compress for large tool outputs and CLAUDE.md. No proxy, full 1M window. [See Three Surfaces →](#three-surfaces-proxy--mcp-tools--skill)

@@ -40,0 +42,0 @@ >

Metadata-Version: 2.4
Name: superlocalmemory
Version: 3.6.11
Version: 3.6.12
Summary: Information-geometric agent memory with mathematical guarantees

@@ -131,2 +131,4 @@ Author-email: Varun Pratap Bhardwaj <admin@superlocalmemory.com>

>
> **v3.6.12 "Distributed-ready":** Run SLM on a server and reach it across your LAN. `SLM_REMOTE=1` (default off) lets the dashboard load from a remote browser, lets MCP gateways/hubs forward tool calls, and makes custom local LLM endpoints (llama.cpp / LM Studio / Azure) configurable right from the dashboard — plus a batch of stability and security fixes. See [`docs/distributed-deployment.md`](docs/distributed-deployment.md).
>
> **v3.6.11 "Optimize Everywhere":** Three surfaces. **Proxy** (Surface A) — full-turn cache + compress on transport; needs `ANTHROPIC_BASE_URL`, shrinks the context window. **MCP tools** (Surface B) — `slm_compress`, `slm_retrieve`, `slm_cache_set`, `slm_cache_get`, `slm_optimize_stats`; no proxy, no window shrink, works on any Claude subscription. **Skill** (Surface C) — `slm-optimize` installs in `~/.claude/skills/`; zero-config auto-compress for large tool outputs and CLAUDE.md. No proxy, full 1M window. [See Three Surfaces →](#three-surfaces-proxy--mcp-tools--skill)

@@ -133,0 +135,0 @@ >

@@ -117,2 +117,3 @@ AUTHORS.md

src/superlocalmemory/core/registry.py
src/superlocalmemory/core/remote_mode.py
src/superlocalmemory/core/reranker_worker.py

@@ -119,0 +120,0 @@ src/superlocalmemory/core/safe_fs.py

@@ -411,408 +411,1 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar

logger.warning("Some SLM workers still alive after %ds timeout", timeout)
# ---------------------------------------------------------------------------
# Server: HTTP request handler with engine singleton
# ---------------------------------------------------------------------------
_engine = None
_last_activity = time.monotonic()
# ---------------------------------------------------------------------------
# V3.3.28: Observation debounce buffer.
#
# When 20+ file edits arrive in quick succession (from parallel AI agents,
# git checkout, or batch sed), we buffer observations for _OBSERVE_DEBOUNCE_SEC
# seconds and deduplicate by content hash. This reduces 20 observations → 1-3
# batches, each processed by the singleton engine (1 embedding worker).
# ---------------------------------------------------------------------------
_OBSERVE_DEBOUNCE_SEC = float(os.environ.get("SLM_OBSERVE_DEBOUNCE_SEC", "3.0"))
_observe_buffer: list[str] = []
_observe_seen: set[str] = set() # content hashes for dedup within window
_observe_lock = threading.Lock()
_observe_timer: threading.Timer | None = None
def _flush_observe_buffer() -> None:
"""Process all buffered observations as a single batch."""
global _observe_timer
with _observe_lock:
if not _observe_buffer:
return
batch = list(_observe_buffer)
_observe_buffer.clear()
_observe_seen.clear()
_observe_timer = None
# Process each unique observation (already deduped)
engine = _get_engine()
from superlocalmemory.hooks.auto_capture import AutoCapture
auto = AutoCapture(engine=engine)
for content in batch:
try:
decision = auto.evaluate(content)
if decision.capture:
auto.capture(content, category=decision.category)
except Exception as exc:
# Swallow per-observation to protect the batch, but log so
# a pattern of dropped observations is visible.
logger.warning("observation dropped during batch: %s", exc)
logger.info("Observe debounce: processed %d observations (from buffer)", len(batch))
def _enqueue_observation(content: str) -> dict:
"""Add an observation to the debounce buffer. Returns immediate response."""
global _observe_timer
import hashlib
content_hash = hashlib.md5(content.encode()).hexdigest()
with _observe_lock:
if content_hash in _observe_seen:
return {"captured": False, "reason": "duplicate within debounce window"}
_observe_seen.add(content_hash)
_observe_buffer.append(content)
buf_size = len(_observe_buffer)
# Reset debounce timer
if _observe_timer is not None:
_observe_timer.cancel()
_observe_timer = threading.Timer(_OBSERVE_DEBOUNCE_SEC, _flush_observe_buffer)
_observe_timer.daemon = True
_observe_timer.start()
return {"captured": True, "queued": True, "buffer_size": buf_size,
"debounce_sec": _OBSERVE_DEBOUNCE_SEC}
def _get_engine():
global _engine
if _engine is None:
from superlocalmemory.core.config import SLMConfig
from superlocalmemory.core.engine import MemoryEngine
config = SLMConfig.load()
_engine = MemoryEngine(config)
_engine.initialize()
# Force reranker warmup (blocking — daemon can afford to wait)
retrieval_eng = getattr(_engine, '_retrieval_engine', None)
if retrieval_eng:
reranker = getattr(retrieval_eng, '_reranker', None)
if reranker and hasattr(reranker, 'warmup_sync'):
reranker.warmup_sync(timeout=120)
logger.info("Daemon engine initialized and warm")
return _engine
class DaemonHandler(BaseHTTPRequestHandler):
"""Lightweight HTTP handler for daemon requests."""
def log_message(self, format, *args):
"""Suppress default access logging."""
pass
def _send_json(self, status: int, data: dict) -> None:
self.send_response(status)
self.send_header("Content-Type", "application/json")
self.end_headers()
self.wfile.write(json.dumps(data).encode())
def _read_body(self) -> dict:
length = int(self.headers.get("Content-Length", 0))
if length == 0:
return {}
return json.loads(self.rfile.read(length).decode())
def do_GET(self) -> None:
global _last_activity
_last_activity = time.monotonic()
if self.path == "/health":
self._send_json(200, {"status": "ok", "pid": os.getpid()})
return
if self.path.startswith("/recall"):
try:
# Parse query from URL params
from urllib.parse import urlparse, parse_qs
params = parse_qs(urlparse(self.path).query)
query = params.get("q", [""])[0]
limit = int(params.get("limit", ["20"])[0])
# S9-DASH-02: session_id for outcome-queue enqueue.
# Priority: ?session_id= query arg > X-SLM-Session-Id
# header > synthetic "cli:<ts>". Without any of these
# the recall still works — it just doesn't produce a
# pending_outcome (hook-based signals can't match).
session_id = params.get("session_id", [""])[0]
if not session_id:
session_id = self.headers.get("X-SLM-Session-Id", "")
if not session_id:
import time as _t
session_id = f"http:{int(_t.time() * 1000)}"
engine = _get_engine()
raw_fast = params.get("fast", ["false"])[0]
fast = raw_fast.lower() in ("true", "1")
response = engine.recall(
query, limit=limit, session_id=session_id, fast=fast,
)
# Return the same field shape as recall_worker._handle_recall,
# so MCP processes that proxy through the daemon get recall_trace-
# compatible data without a second round trip.
memory_ids = list({
r.fact.memory_id for r in response.results[:limit]
if r.fact.memory_id
})
memory_map = (
engine._db.get_memory_content_batch(memory_ids)
if memory_ids else {}
)
results = []
for r in response.results[:limit]:
fact_type = getattr(r.fact, "fact_type", None)
lifecycle = getattr(r.fact, "lifecycle", None)
# v3.5.1: sanitize control chars that break JSON (newlines, tabs in content).
clean = r.fact.content.replace("\r", " ").replace("\n", " ").replace("\t", " ")
sc_raw = memory_map.get(r.fact.memory_id, "")
sc_clean = sc_raw.replace("\r", " ").replace("\n", " ").replace("\t", " ") if sc_raw else ""
results.append({
"fact_id": r.fact.fact_id,
"memory_id": r.fact.memory_id,
"content": clean,
"source_content": sc_clean,
"score": round(r.score, 4),
"confidence": round(r.confidence, 4),
"trust_score": round(r.trust_score, 4),
"channel_scores": {
k: round(v, 4)
for k, v in (r.channel_scores or {}).items()
},
"fact_type": fact_type.value
if fact_type and hasattr(fact_type, "value") else "",
"lifecycle": lifecycle.value
if lifecycle and hasattr(lifecycle, "value") else "",
"access_count": getattr(r.fact, "access_count", 0),
"evidence_chain": list(
getattr(r, "evidence_chain", []) or []
),
})
self._send_json(200, {
"ok": True,
"query": query,
"query_type": response.query_type,
"result_count": len(results),
"retrieval_time_ms": round(response.retrieval_time_ms, 1),
"channel_weights": {
k: round(v, 3)
for k, v in (response.channel_weights or {}).items()
},
"total_candidates": getattr(response, "total_candidates", 0),
"results": results,
"count": len(results), # backward compat alias
})
except Exception as exc:
self._send_json(500, {"error": str(exc)})
return
if self.path == "/list":
try:
engine = _get_engine()
facts = engine.list_facts(limit=50)
items = [
{"content": f.content[:100], "fact_type": getattr(f.fact_type, 'value', str(f.fact_type)),
"created_at": (f.created_at or "")[:19], "fact_id": f.fact_id}
for f in facts
]
self._send_json(200, {"results": items, "count": len(items)})
except Exception as exc:
self._send_json(500, {"error": str(exc)})
return
if self.path == "/status":
engine = _get_engine()
uptime = time.monotonic() - _server_start_time
self._send_json(200, {
"status": "running", "pid": os.getpid(),
"uptime_s": round(uptime),
"mode": engine._config.mode.value,
"fact_count": engine.fact_count,
"idle_s": round(time.monotonic() - _last_activity),
})
return
self._send_json(404, {"error": "not found"})
def do_POST(self) -> None:
global _last_activity
_last_activity = time.monotonic()
if self.path == "/remember":
try:
body = self._read_body()
content = body.get("content", "")
tags = body.get("tags", "")
extra_meta = body.get("metadata") or {}
if not content:
self._send_json(400, {"error": "content required"})
return
engine = _get_engine()
metadata = {"tags": tags} if tags else {}
if isinstance(extra_meta, dict):
metadata.update(extra_meta)
fact_ids = engine.store(content, metadata=metadata)
self._send_json(200, {
"ok": True,
"fact_ids": fact_ids,
"count": len(fact_ids),
})
except Exception as exc:
self._send_json(500, {"error": str(exc)})
return
if self.path == "/observe":
try:
body = self._read_body()
content = body.get("content", "")
if not content:
self._send_json(400, {"error": "content required"})
return
# V3.3.28: Debounced observation processing.
# Buffers observations for 3s, deduplicates, processes as batch.
# Returns immediately — the actual capture happens asynchronously
# via the debounce timer, using the singleton engine.
result = _enqueue_observation(content)
self._send_json(200, result)
except Exception as exc:
self._send_json(500, {"error": str(exc)})
return
if self.path == "/stop":
self._send_json(200, {"status": "stopping"})
Thread(target=_shutdown_server, daemon=True).start()
return
self._send_json(404, {"error": "not found"})
# ---------------------------------------------------------------------------
# Server lifecycle
# ---------------------------------------------------------------------------
_server: HTTPServer | None = None
_server_start_time = time.monotonic()
def _shutdown_server() -> None:
global _engine, _server
try:
_flush_observe_buffer()
except Exception as exc:
logger.warning("flush observe buffer on shutdown failed: %s", exc)
time.sleep(0.5)
if _engine is not None:
try:
_engine.close()
except Exception as exc:
logger.warning("engine close on shutdown failed: %s", exc)
_engine = None
if _server is not None:
_server.shutdown()
_PID_FILE.unlink(missing_ok=True)
_PORT_FILE.unlink(missing_ok=True)
def _idle_watchdog(timeout: int) -> None:
"""Auto-shutdown after idle timeout."""
global _last_activity
while True:
time.sleep(30)
idle = time.monotonic() - _last_activity
if idle > timeout:
logger.info("Daemon idle for %ds, shutting down", int(idle))
_shutdown_server()
os._exit(0)
def start_server(port: int = _DEFAULT_PORT, idle_timeout: int | None = None) -> None:
"""Start the daemon HTTP server. Blocks until stopped."""
global _server, _server_start_time, _last_activity
idle_timeout = idle_timeout or int(os.environ.get(
"SLM_DAEMON_IDLE_TIMEOUT", str(_DEFAULT_IDLE_TIMEOUT),
))
# Banner is advisory — a broken data dir must never prevent the daemon
# from starting, so the swallow here is intentional.
try:
from superlocalmemory import __version__ as _slm_ver
from superlocalmemory.cli.version_banner import check_and_emit_upgrade_banner
check_and_emit_upgrade_banner(_slm_ver)
except Exception as exc:
logger.warning("upgrade banner on daemon start failed: %s", exc)
# Apply the v3.4.26 data-dir migration now — the daemon is the
# authoritative holder of the DB, so this is the right place to do
# it unconditionally (``migrate`` is idempotent).
try:
from pathlib import Path as _P
from superlocalmemory.migrations.v3_4_25_to_v3_4_26 import (
is_ready as _is_ready, migrate as _migrate,
)
_data = _P(os.environ.get("SLM_DATA_DIR")
or _P.home() / ".superlocalmemory")
if not _is_ready(_data):
_migrate(_data)
except Exception as exc:
logger.warning("v3.4.26 migration on daemon start failed: %s", exc)
# Write PID + port files
_PID_FILE.parent.mkdir(parents=True, exist_ok=True)
_PID_FILE.write_text(str(os.getpid()))
_PORT_FILE.write_text(str(port))
# Handle SIGTERM for graceful shutdown
signal.signal(signal.SIGTERM, lambda *_: _shutdown_server() or os._exit(0))
# Pre-warm engine (this is the cold start — daemon absorbs it once)
logger.info("Daemon starting — warming engine...")
_get_engine()
logger.info("Engine warm. Daemon ready on port %d (idle timeout: %ds)", port, idle_timeout)
_server_start_time = time.monotonic()
_last_activity = time.monotonic()
# Start idle watchdog
Thread(target=_idle_watchdog, args=(idle_timeout,), daemon=True, name="idle-watchdog").start()
# Start HTTP server
# SO_REUSEADDR must be set on the class BEFORE __init__ calls bind()
HTTPServer.allow_reuse_address = True
_server = HTTPServer(("127.0.0.1", port), DaemonHandler)
try:
_server.serve_forever()
except KeyboardInterrupt:
pass
finally:
_shutdown_server()
# ---------------------------------------------------------------------------
# CLI entry point
# ---------------------------------------------------------------------------
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
if "--start" in sys.argv:
start_server()
elif "--stop" in sys.argv:
stop_daemon()
else:
print("Usage: python -m superlocalmemory.cli.daemon --start|--stop")

@@ -186,3 +186,5 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar

recall_p = sub.add_parser("recall", help="Semantic search with 4-channel retrieval")
# v3.6.12 (parity-3): `search` is an alias of `recall` so the CLI has the
# same search verb the MCP exposes (handlers dict maps both to cmd_recall).
recall_p = sub.add_parser("recall", aliases=["search"], help="Semantic search with 4-channel retrieval")
recall_p.add_argument("query", help="Search query")

@@ -189,0 +191,0 @@ recall_p.add_argument("--limit", type=int, default=10, help="Max results (default 10)")

@@ -208,3 +208,6 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar

"""
content = redact_secrets(entry.content)[:MAX_CONTENT_CHARS]
# v3.6.12 (redact-1): scrub dashboard/cached content at HIGH aggression
# so Bearer/GitHub-PAT/Anthropic/OpenAI/GENERIC_KEY patterns are caught
# (the default 'normal' skipped them, leaking those shapes to the UI).
content = redact_secrets(entry.content, aggression="high")[:MAX_CONTENT_CHARS]
fact_ids_json = json.dumps(list(entry.fact_ids))

@@ -211,0 +214,0 @@ byte_size = (

@@ -370,3 +370,6 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar

model = getattr(config.llm, 'model', model) or model
timeout = getattr(config.llm, 'timeout', timeout) or timeout
# v3.6.12 (modeb-4): the LLMConfig field is `timeout_seconds`, not
# `timeout` — the old read always missed and silently used 30s.
timeout = getattr(config.llm, 'timeout_seconds', None) or \
getattr(config.llm, 'timeout', None) or timeout

@@ -373,0 +376,0 @@ fact_texts = "\n".join(f"- {f['content']}" for f in facts[:_MAX_CLUSTER_SIZE])

@@ -127,4 +127,7 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar

model = getattr(self._config.llm, 'model', None) or "llama3.1:8b"
# v3.6.12 (modeb-2): honor the configured endpoint instead of hardcoding
# localhost:11434, so a remote/non-default Ollama host works in Mode B.
_base = (getattr(self._config.llm, 'api_base', '') or "http://localhost:11434").rstrip("/")
with httpx.Client(timeout=httpx.Timeout(30.0)) as client:
resp = client.post("http://localhost:11434/api/generate", json={
resp = client.post(f"{_base}/api/generate", json={
"model": model,

@@ -131,0 +134,0 @@ "prompt": prompt,

@@ -141,3 +141,9 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar

return True
return bool(self._api_key)
# v3.6.12 (modeb-1): a custom local OpenAI-compatible endpoint
# (llama.cpp, LM Studio, vLLM) needs NO API key — _build_openai already
# omits the Authorization header when the key is empty. Treat a
# configured base_url as sufficient, otherwise Mode B silently falls
# back to Mode A extraction for keyless local endpoints.
_base = getattr(self, "_base_url", "") or getattr(self, "_api_base", "")
return bool(self._api_key) or bool(_base)

@@ -144,0 +150,0 @@ @property

@@ -18,4 +18,8 @@ """Per-HTTP-request agent ID resolution — ContextVar home.

# v3.6.12 (parity-1): default is "" (the "no agent routed" sentinel), NOT the
# user-visible "mcp_client". Sanitized agent ids are [A-Za-z0-9._-], so "" can
# never collide — a client that explicitly routes to /mcp/mcp_client is now
# distinguishable from a bare /mcp/ request with no agent segment.
_current_agent_id: contextvars.ContextVar[str] = contextvars.ContextVar(
"slm_agent_id", default="mcp_client"
"slm_agent_id", default=""
)

@@ -46,4 +50,4 @@

ctx_id = _current_agent_id.get()
if ctx_id != "mcp_client":
return ctx_id
if ctx_id:
return ctx_id # an explicitly-routed agent id (incl. "mcp_client")
if env_fallback:

@@ -50,0 +54,0 @@ return os.environ.get("SLM_AGENT_ID", "mcp_client")

@@ -312,3 +312,6 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar

pid = profile_id or engine.profile_id
facts = engine._db.get_all_facts(pid)[:limit]
# v3.6.12 (search-2): push the limit into the query — was loading the
# ENTIRE facts table (deserializing every 768-float embedding) just
# to return the top N. get_all_facts preserves created_at DESC order.
facts = engine._db.get_all_facts(pid, limit=limit)
items = []

@@ -405,2 +408,11 @@ for f in facts:

# v3.6.12 (search-3): recall/delete run in a separate worker
# subprocess that caches its engine (and profile_id) at init. Recycle
# it so the NEXT recall uses the new profile instead of the stale one.
try:
from superlocalmemory.core.worker_pool import WorkerPool
WorkerPool.shared().shutdown()
except Exception:
logger.debug("worker-pool recycle on profile switch skipped")
return {

@@ -407,0 +419,0 @@ "success": True,

@@ -79,3 +79,3 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar

"""Register this session with the mesh broker if not already."""
global _REGISTERED, _PROJECT_PATH
global _REGISTERED, _PROJECT_PATH, _PEER_ID
if _REGISTERED:

@@ -93,2 +93,7 @@ return

if result:
# v3.6.12 (mesh-1): the broker mints its OWN peer_id (RegisterRequest has
# no peer_id field, so our body value is dropped by pydantic). Adopt the
# broker's id BEFORE starting the heartbeat, otherwise heartbeat/send/
# inbox all target a non-existent peer → 404s and the session is reaped.
_PEER_ID = result.get("peer_id", _PEER_ID)
_REGISTERED = True

@@ -196,3 +201,3 @@ _start_heartbeat()

)
return result or {"error": "Failed to send message"}
return result or {"ok": False, "error": "Failed to send message"}

@@ -213,4 +218,7 @@ @server.tool()

msg_list = (messages or {}).get("messages", [])
# Auto-mark unread messages as read
unread_ids = [m["id"] for m in msg_list if not m.get("read")]
# Auto-mark unread messages as read. v3.6.12 (failopen-2): use .get("id")
# — a malformed broker message without an "id" key used to raise KeyError
# out to the agent, violating the never-raise contract.
unread_ids = [m["id"] for m in msg_list
if not m.get("read") and m.get("id") is not None]
if unread_ids:

@@ -246,3 +254,3 @@ await asyncio.to_thread(

)
return result or {"error": "Failed to set state"}
return result or {"ok": False, "error": "Failed to set state"}

@@ -274,3 +282,3 @@ if key:

)
return result or {"error": "Lock operation failed"}
return result or {"ok": False, "error": "Lock operation failed"}

@@ -277,0 +285,0 @@ @server.tool(annotations=ToolAnnotations(readOnlyHint=True))

@@ -284,6 +284,9 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar

# Direct messages to this peer
# v3.6.12 (mesh-3): only UNREAD direct messages — was returning read
# ones too, so every poll re-listed already-read messages until the
# 24h cleanup (broadcast/project already filter unread via mesh_reads).
direct = conn.execute(
"SELECT id, from_peer, to_peer, msg_type, content, read, created_at, "
"target_type, project_path FROM mesh_messages "
"WHERE to_peer=? AND target_type='peer' "
"WHERE to_peer=? AND target_type='peer' AND COALESCE(read, 0) = 0 "
"AND (expires_at IS NULL OR expires_at > ?) "

@@ -415,6 +418,14 @@ "ORDER BY created_at DESC LIMIT 100",

elif action == "release":
conn.execute("DELETE FROM mesh_locks WHERE file_path=? AND locked_by=?",
(file_path, locked_by))
# v3.6.12 (mesh-2): report whether we actually released. The
# DELETE is correctly owner-scoped, but it previously returned
# released=ok:true even when a NON-owner released nothing.
cur = conn.execute(
"DELETE FROM mesh_locks WHERE file_path=? AND locked_by=?",
(file_path, locked_by),
)
conn.commit()
return {"ok": True, "action": "released"}
if cur.rowcount and cur.rowcount > 0:
return {"ok": True, "action": "released"}
return {"ok": False, "action": "not_released",
"error": "no lock held by this peer for that file"}

@@ -421,0 +432,0 @@ elif action == "query":

@@ -320,7 +320,12 @@ # compress/router.py

def _normalize_whitespace(text: str) -> str:
"""Layer 1 lossless: collapse excess blank lines, strip trailing spaces per line."""
"""Layer 1 safe: collapse runs of 3+ blank lines to a single blank line.
v3.6.12 (normalize-1): no longer rstrips trailing spaces per line — that
is LOSSY for Markdown hard breaks (two trailing spaces) and padded string
literals, which broke the 'lossless/safe' guarantee that callers (incl.
slm_compress mode=normalize) rely on. Only collapsing excess blank lines
remains, which is semantically safe.
"""
import re
text = re.sub(r"\n{3,}", "\n\n", text)
lines = [line.rstrip() for line in text.split("\n")]
return "\n".join(lines)
return re.sub(r"\n{3,}", "\n\n", text)

@@ -327,0 +332,0 @@ # ── Lazy loaders ─────────────────────────────────────────────────────

@@ -17,3 +17,8 @@ """CacheDB — wraps DatabaseManager for llmcache.db operations.

- CCR original_blob is ALSO AES-256-GCM encrypted.
- Key derivation: PBKDF2-HMAC-SHA256(password=machine_id, salt=_per_db_salt, iter=100_000)
- Key storage: a single MACHINE-WIDE key file (~/.superlocalmemory/opt-key.bin,
0o600) is generated once and reused for all cache DBs on the machine. (The
per-DB salt below is persisted for provenance but does NOT make the AES key
per-DB — a single install has one llmcache.db, so a machine-wide key is the
intended model. A tampered/rotated key now degrades to a cache MISS, not a
crash — see _decrypt fail-open, v3.6.12 cache-1.)
- Salt: os.urandom(32) generated ONCE at DB creation, stored in

@@ -47,2 +52,3 @@ llmcache_schema_version.description='salt:<hex>'. NO hardcoded salt.

from cryptography.exceptions import InvalidTag
from cryptography.hazmat.primitives.ciphers.aead import AESGCM

@@ -373,3 +379,11 @@ from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC

aesgcm = AESGCM(self._aes_key)
return aesgcm.decrypt(nonce, ciphertext, associated_data=None)
# v3.6.12 (cache-1): AES-GCM raises cryptography.exceptions.InvalidTag
# (NOT a ValueError subclass) on a tampered/wrong-key blob. Every caller
# catches ValueError to fail-open; convert InvalidTag -> ValueError here
# at the single chokepoint so a corrupt/rotated-key cache entry degrades
# to a miss instead of raising out of get()/get_value()/ccr_get().
try:
return aesgcm.decrypt(nonce, ciphertext, associated_data=None)
except InvalidTag as exc:
raise ValueError(f"AES-GCM authentication failed: {exc}") from exc

@@ -376,0 +390,0 @@ # ---- assertion ----

@@ -111,4 +111,10 @@ #!/usr/bin/env python3

from superlocalmemory.infra.rate_limiter import RateLimiter
_write_limiter = RateLimiter(max_requests=30, window_seconds=60)
_read_limiter = RateLimiter(max_requests=120, window_seconds=60)
from superlocalmemory.core.remote_mode import (
rate_limit_config,
is_rate_limit_exempt,
)
# v3.6.12 (issue #40): env-tunable thresholds (defaults unchanged).
_rl_write, _rl_read, _rl_window = rate_limit_config()
_write_limiter = RateLimiter(max_requests=_rl_write, window_seconds=_rl_window)
_read_limiter = RateLimiter(max_requests=_rl_read, window_seconds=_rl_window)

@@ -118,2 +124,4 @@ @application.middleware("http")

client_ip = request.client.host if request.client else "unknown"
if is_rate_limit_exempt(client_ip):
return await call_next(request)
is_write = request.method in ("POST", "PUT", "DELETE", "PATCH")

@@ -127,3 +135,3 @@ limiter = _write_limiter if is_write else _read_limiter

content={"error": "Too many requests."},
headers={"Retry-After": str(limiter.window_seconds)},
headers={"Retry-After": str(limiter.window)},
)

@@ -130,0 +138,0 @@ response = await call_next(request)

@@ -80,2 +80,15 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar

raise HTTPException(503, detail="Mesh disabled in config")
# v3.6.12 (mesh-1 security): SLM_MESH_SHARED_SECRET was read by the broker but
# never verified on inbound mesh HTTP calls. When a secret is configured,
# require it (constant-time) from NON-loopback callers via X-Mesh-Secret.
# The local MCP client always calls over loopback and is exempt, so this is
# zero-change for single-machine use and closes the LAN mesh auth bypass.
secret = getattr(broker, "_shared_secret", None)
if secret:
client_host = request.client.host if request.client else ""
if client_host not in ("127.0.0.1", "::1", "localhost"):
import hmac
presented = request.headers.get("x-mesh-secret", "")
if not hmac.compare_digest(presented, secret):
raise HTTPException(401, detail="invalid or missing mesh secret")
return broker

@@ -82,0 +95,0 @@

@@ -36,4 +36,7 @@ """GET /internal/token — serve install token to the local dashboard.

"http://127.0.0.1",
"https://127.0.0.1",
"http://localhost",
"https://localhost",
"http://[::1]",
"https://[::1]",
)

@@ -61,4 +64,13 @@

# v3.6.12 (issue #39): in SLM_REMOTE mode, also serve the token to
# explicitly-allowlisted LAN clients so a remote-browser dashboard can load
# the Brain page. Default stays loopback-only — remote_mode helpers return
# False unless SLM_REMOTE=1 AND the client IP is in SLM_MCP_ALLOWED_HOSTS.
from superlocalmemory.core.remote_mode import (
is_lan_client_allowed,
is_remote_origin_allowed,
)
client_host = request.client.host if request.client else ""
if not is_loopback(client_host):
if not is_loopback(client_host) and not is_lan_client_allowed(client_host):
return JSONResponse({"error": "loopback only"}, status_code=403)

@@ -68,3 +80,3 @@

origin = headers.get("origin", "")
if not _origin_is_loopback(origin):
if not _origin_is_loopback(origin) and not is_remote_origin_allowed(origin):
return JSONResponse(

@@ -71,0 +83,0 @@ {"error": "origin not allowed"}, status_code=403,

@@ -85,4 +85,10 @@ #!/usr/bin/env python3

from superlocalmemory.infra.rate_limiter import RateLimiter
_write_limiter = RateLimiter(max_requests=30, window_seconds=60)
_read_limiter = RateLimiter(max_requests=120, window_seconds=60)
from superlocalmemory.core.remote_mode import (
rate_limit_config,
is_rate_limit_exempt,
)
# v3.6.12 (issue #40): env-tunable thresholds (defaults unchanged).
_rl_write, _rl_read, _rl_window = rate_limit_config()
_write_limiter = RateLimiter(max_requests=_rl_write, window_seconds=_rl_window)
_read_limiter = RateLimiter(max_requests=_rl_read, window_seconds=_rl_window)

@@ -92,2 +98,4 @@ @application.middleware("http")

client_ip = request.client.host if request.client else "unknown"
if is_rate_limit_exempt(client_ip):
return await call_next(request)
is_write = request.method in ("POST", "PUT", "DELETE", "PATCH")

@@ -170,4 +178,7 @@ limiter = _write_limiter if is_write else _read_limiter

application.include_router(_mod.router)
except (ImportError, Exception):
pass
except (ImportError, Exception) as _exc:
# v3.6.12 (settings-3): was a silent `pass` — a transient import
# error in learning.py alone 404s 3 dashboard panes (Learning,
# Patterns, Feedback) with no trace. Log it like the chat loop above.
logger.warning("Optional router %s failed: %s", _module_name, _exc)

@@ -174,0 +185,0 @@ # Wire WebSocket manager into routes that need broadcast capability

@@ -593,2 +593,11 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar

"""Full-text search via FTS5, joined to facts table for reconstruction."""
# v3.6.12 (search-1): the raw query was passed straight into FTS5 MATCH,
# so any '?', '-', quote, or trailing boolean keyword (AND/OR/NOT) raised
# an FTS5 syntax error. Tokenize to word characters, quote each token,
# and OR-join — mirrors the recall BM25 channel's safe MATCH expression.
import re as _re
tokens = [t for t in _re.findall(r"\w+", query.lower()) if t]
if not tokens:
return []
match_expr = " OR ".join(f'"{t}"' for t in tokens)
rows = self.execute(

@@ -599,3 +608,3 @@ """SELECT f.* FROM atomic_facts_fts AS fts

ORDER BY fts.rank LIMIT ?""",
(query, profile_id, limit),
(match_expr, profile_id, limit),
)

@@ -602,0 +611,0 @@ return [self._row_to_fact(r) for r in rows]

@@ -173,2 +173,16 @@ // SuperLocalMemory V3 — Auto-Capture/Recall Settings

// v3.6.12 (issue #39/#40): populate the endpoint field from the SAVED
// config endpoint. updateProviderUI() above sets the field to the
// provider's DEFAULT (e.g. https://api.openai.com/v1), which hides the
// user's real custom endpoint (llama.cpp/LM Studio) and makes Test
// Connection probe the wrong URL → 401. Override with data.endpoint here.
if (data.endpoint) {
setTimeout(function() {
var epEl = document.getElementById('settings-endpoint');
if (epEl) epEl.value = data.endpoint;
var epRow = document.getElementById('settings-endpoint-row');
if (epRow) epRow.style.display = 'block';
}, 0);
}
// After provider UI updates, set the saved model value

@@ -318,2 +332,7 @@ if (model) {

var apiKey = document.getElementById('settings-api-key')?.value || '';
// v3.6.12 (issue #39): include the configured custom endpoint. Without this
// the backend never sees base_url, treats a custom llama.cpp/LM-Studio server
// as official OpenAI, and 401s on an empty key. Was the real cause of the
// "Test Connection fails / API key required" report against Mode B.
var endpoint = document.getElementById('settings-endpoint')?.value || '';
var resultEl = document.getElementById('settings-test-result');

@@ -331,2 +350,3 @@

if (apiKey) testBody.api_key = apiKey;
if (endpoint) { testBody.base_url = endpoint; testBody.endpoint = endpoint; }
var resp = await fetch('/api/v3/provider/test', {

@@ -354,2 +374,5 @@ method: 'POST',

var apiKey = document.getElementById('settings-api-key')?.value || '';
// v3.6.12 (settings-2): persist the custom endpoint too, else a llama.cpp/
// LM-Studio/Azure endpoint can never be saved (backend reads base_url).
var endpoint = document.getElementById('settings-endpoint')?.value || '';

@@ -365,2 +388,3 @@ var statusEl = document.getElementById('settings-save-status');

var payload = Object.assign({mode: mode, provider: provider, model: model, api_key: apiKey}, embParams);
if (endpoint) { payload.base_url = endpoint; payload.endpoint = endpoint; }
var modeResp = await fetch('/api/v3/mode/set', {

@@ -367,0 +391,0 @@ method: 'POST',

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is too big to display