superlocalmemory - npm Package Compare versions

+147

src/superlocalmemory/core/block_hygiene.py

		# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
		# Licensed under AGPL-3.0-or-later - see LICENSE file
		# Part of SuperLocalMemory V3 — Core Block Hygiene (v3.6.6)

		"""Core Memory Block hygiene helpers (v3.6.6 F-5).

		Three pure functions used by the block compiler:
		- dedupe_block_content: normalized-line dedup within a block
		- filter_low_quality_block_facts: drop is_low_quality facts
		- compile_block_content: full compile pipeline (filter + dedup + cap)

		Also exports: _recompile_core_blocks — the hook called by MaintenanceScheduler.

		These are pure functions (no I/O). All I/O lives in the scheduler / consolidation
		engine that calls them.
		"""

		from __future__ import annotations

		import logging

		logger = logging.getLogger(__name__)

		_BLOCK_SEPARATOR = "\n---\n"
		_PLACEHOLDER = "No data available."


		# ---------------------------------------------------------------------------
		# dedupe_block_content
		# ---------------------------------------------------------------------------

		def dedupe_block_content(lines: list[str]) -> list[str]:
		"""Remove duplicate and empty lines from a block content line-list.

		Normalization: lowercased, whitespace-collapsed.
		Order is preserved; only the FIRST occurrence is kept.

		Returns a new list — never mutates input.
		"""
		seen: set[str] = set()
		result: list[str] = []
		for line in lines:
		stripped = line.strip()
		if not stripped:
		continue
		key = " ".join(stripped.lower().split())
		if key in seen:
		continue
		seen.add(key)
		result.append(line)
		return result


		# ---------------------------------------------------------------------------
		# filter_low_quality_block_facts
		# ---------------------------------------------------------------------------

		def filter_low_quality_block_facts(facts: list[dict]) -> list[dict]:
		"""Filter fact dicts whose content is low-quality or prompt-template.

		Delegates to injection.is_low_quality and injection.is_prompt_template.
		Returns a new list — never mutates input.
		"""
		try:
		from superlocalmemory.core.injection import is_low_quality, is_prompt_template
		except Exception:
		return list(facts)

		result: list[dict] = []
		for f in facts:
		content = f.get("content", "") or ""
		if is_low_quality(content):
		continue
		if is_prompt_template(content):
		continue
		result.append(f)
		return result


		# ---------------------------------------------------------------------------
		# compile_block_content
		# ---------------------------------------------------------------------------

		def compile_block_content(
		facts: list[dict],
		max_chars: int = 2000,
		) -> str:
		"""Compile facts into block content with hygiene and char cap.

		Pipeline:
		1. filter_low_quality_block_facts
		2. Extract content lines from each fact (split by newline / separator)
		3. dedupe_block_content across all lines
		4. Join with separator, truncate to max_chars

		Returns a string ≤ max_chars. Returns empty string if all facts filtered.
		"""
		clean_facts = filter_low_quality_block_facts(facts)
		if not clean_facts:
		return ""

		all_lines: list[str] = []
		for f in clean_facts:
		content = (f.get("content") or "").strip()
		if not content:
		continue
		# Split by separator or newline to treat each line independently
		for line in content.replace(_BLOCK_SEPARATOR, "\n").split("\n"):
		all_lines.append(line)

		deduped = dedupe_block_content(all_lines)
		if not deduped:
		return ""

		joined = _BLOCK_SEPARATOR.join(deduped)
		return joined[:max_chars]


		# ---------------------------------------------------------------------------
		# _recompile_core_blocks — scheduler hook (F-5 daily recompile)
		# ---------------------------------------------------------------------------

		def _recompile_core_blocks(
		db,
		config,
		profile_id: str,
		) -> dict:
		"""Recompile core memory blocks with hygiene applied.

		Called by MaintenanceScheduler._run() on the daily cycle.
		Delegates to ConsolidationEngine.compile_core_blocks_mode_a() with
		the hygiene improvements applied at the _facts_to_content step.

		Returns a dict with stats: {blocks_compiled, profile_id}.
		"""
		try:
		from superlocalmemory.core.consolidation_engine import ConsolidationEngine
		engine = ConsolidationEngine(db=db, config=config.consolidation)
		result = engine.compile_core_blocks_mode_a(profile_id)
		logger.info(
		"Daily core-block recompile: profile=%s blocks=%s",
		profile_id, result.get("blocks_compiled", 0),
		)
		return {**result, "profile_id": profile_id}
		except Exception as exc:
		logger.warning("Core-block recompile failed: %s", exc)
		return {"blocks_compiled": 0, "profile_id": profile_id, "error": str(exc)}

+133

src/superlocalmemory/core/ingest_gate.py

		# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
		# Licensed under AGPL-3.0-or-later - see LICENSE file
		# Part of SuperLocalMemory V3 — Ingest Gate (v3.6.6)

		"""Ingest gate for the remember/store write path (v3.6.6 F-4).

		Responsibilities:
		1. Hard reject content > 1MB (nobody's memory is a megabyte).
		2. Clamp content > 24000 chars to head 70% + tail 30% + truncation marker.
		Head+tail (not head-only) preserves OPEN ITEMS that session-close facts put
		at the END. Only pathological pastes (167KB JSON, 50KB logs) are touched;
		normal dense memories (6-15K session handoffs) pass through intact.
		Full original preserved in result.full_content for storage in memories table.
		3. Prompt-template firewall: reject content matching _PROMPT_TEMPLATE_PATTERNS
		(extends v3.6.4 remember-write-02 quality gate).

		Kill-switch: SLM_INGEST_NO_GATE=1 bypasses rules 2 and 3 (but NOT the 1MB hard cap).

		Usage::
		from superlocalmemory.core.ingest_gate import apply_ingest_gate
		result = apply_ingest_gate(content)
		if result.rejected:
		return {"success": False, "error": result.rejection_reason}
		store(result.fact_content, full_content=result.full_content)
		"""

		from __future__ import annotations

		import os
		from dataclasses import dataclass, field

		# v3.6.6: clamp only pathological monsters; preserve normal dense memories.
		# 24K chars ≈ 6K tokens — well beyond any legitimate single memory.
		_MAX_VERBATIM_CHARS = 24000
		_HEAD_FRACTION = 0.70 # head 70% + tail 30% so OPEN ITEMS at the end survive
		_MAX_INGEST_BYTES = 1_048_576 # 1MB — hard cap, NOT bypassed by kill-switch
		_TRUNCATION_MARKER = "\n…[content truncated at ingest; full text in source memory]…\n"


		@dataclass
		class IngestGateResult:
		"""Result of applying the ingest gate to content.

		fact_content: Content to store in atomic_facts.content (may be truncated).
		full_content: Original unmodified content (for memories table storage).
		rejected: True if content should be rejected outright.
		rejection_reason: Human-readable reason for rejection (when rejected=True).
		truncated: True if fact_content was head-sliced.
		"""
		fact_content: str
		full_content: str
		rejected: bool = False
		rejection_reason: str = ""
		truncated: bool = False


		def apply_ingest_gate(
		content: str,
		max_verbatim_chars: int = _MAX_VERBATIM_CHARS,
		max_ingest_bytes: int = _MAX_INGEST_BYTES,
		) -> IngestGateResult:
		"""Apply the ingest quality gate to content before storing.

		Returns IngestGateResult. Callers MUST check result.rejected before
		proceeding with storage.

		Kill-switch SLM_INGEST_NO_GATE=1 bypasses the verbatim size clamp and
		template firewall but NOT the 1MB hard cap (safety boundary).
		"""
		gate_active = os.environ.get("SLM_INGEST_NO_GATE", "0") != "1"

		# --- Hard cap: 1MB regardless of kill-switch ---
		try:
		byte_len = len(content.encode("utf-8", errors="replace"))
		except Exception:
		byte_len = len(content)
		if byte_len > max_ingest_bytes:
		return IngestGateResult(
		fact_content=content,
		full_content=content,
		rejected=True,
		rejection_reason=(
		f"Content size {byte_len} bytes exceeds maximum "
		f"{max_ingest_bytes} bytes (1MB). "
		"Nobody's memory is a megabyte."
		),
		)

		# --- Gate bypassed ---
		if not gate_active:
		return IngestGateResult(
		fact_content=content,
		full_content=content,
		rejected=False,
		truncated=False,
		)

		# --- Prompt-template firewall ---
		# Extends the v3.6.4 remember-write-02 quality gate.
		try:
		from superlocalmemory.core.injection import is_prompt_template
		if is_prompt_template(content):
		return IngestGateResult(
		fact_content=content,
		full_content=content,
		rejected=True,
		rejection_reason=(
		"Content matches internal prompt-template patterns "
		"(low-quality gate). Prompt machinery must not be stored as memory."
		),
		)
		except Exception:
		pass # Defensive: gate failure must never block a store

		# --- Verbatim size clamp (head 70% + tail 30%) ---
		if len(content) > max_verbatim_chars:
		budget = max_verbatim_chars - len(_TRUNCATION_MARKER)
		head_len = int(budget * _HEAD_FRACTION)
		tail_len = budget - head_len
		fact_content = content[:head_len] + _TRUNCATION_MARKER + content[-tail_len:]
		return IngestGateResult(
		fact_content=fact_content,
		full_content=content,
		rejected=False,
		truncated=True,
		)

		return IngestGateResult(
		fact_content=content,
		full_content=content,
		rejected=False,
		truncated=False,
		)

+225

src/superlocalmemory/server/recall_serializer.py

		# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
		# Licensed under AGPL-3.0-or-later - see LICENSE file
		# Part of SuperLocalMemory V3 — Recall Serializer (v3.6.6)

		"""Recall output budget and source_content discipline helpers (v3.6.6).

		F-2: Per-fact content clamp + total budget stubs.
		F-3: source_content preview + template firewall.

		THE single shared serialization chokepoint. Every surface that turns a
		RecallResponse into transport dicts goes through ``serialize_recall_response``
		so MCP, CLI, the daemon HTTP route, the in-process queue adapter, and the
		WorkerPool fallback all return byte-for-byte identical output (parity across
		surfaces AND modes A/B). The evidence floor lives upstream in
		RetrievalEngine.recall (also shared); this layer owns presentation only.

		Pure functions — no side effects, no DB access. Stdlib-only at import
		(hooks import chain must stay light).
		"""

		from __future__ import annotations

		import re
		from typing import Any


		# ---------------------------------------------------------------------------
		# F-2: Per-fact content clamp
		# ---------------------------------------------------------------------------

		def clamp_fact_content(
		content: str,
		max_chars: int = 2400,
		) -> tuple[str, bool]:
		"""Clamp a single fact's content to max_chars.

		Strategy: head 70% + "\\n…[truncated N chars]…\\n" + tail 30%.
		The tail is kept because session-close facts put OPEN ITEMS at the end.

		Returns:
		(clamped_content, was_truncated)
		"""
		if not content or len(content) <= max_chars:
		return content, False

		head_len = int(max_chars * 0.70)
		tail_len = max_chars - head_len
		dropped = len(content) - max_chars
		marker = f"\n…[truncated {dropped} chars]…\n"

		result = content[:head_len] + marker + content[-tail_len:]
		return result, True


		def apply_recall_budget(
		results: list[dict],
		per_fact_max: int = 2400,
		total_max: int = 12000,
		full: bool = False,
		) -> list[dict]:
		"""Apply per-fact clamp and total budget to a list of result dicts.

		Args:
		results: List of result dicts (must have at minimum 'fact_id',
		'score', 'content' keys).
		per_fact_max: Maximum chars for a single fact's content.
		total_max: Maximum total content chars before remaining results
		become stubs.
		full: If True, bypasses all clamping (escape hatch for tools/CLI
		that need full content — additive backward-compat param).

		Returns:
		New list of result dicts with potentially clamped/stubbed content.
		Mutates nothing — returns new dicts.
		"""
		if not results:
		return []

		if full:
		# full=True: return everything as-is, no clamping, no stubs
		return [dict(r) for r in results]

		out: list[dict] = []
		cumulative_chars = 0

		for r in results:
		content = r.get("content", "") or ""

		# Check if we're already over total budget
		if cumulative_chars >= total_max:
		# Emit stub: fact_id, score, first 120 chars + "…"
		stub_content = content[:120] + ("…" if len(content) > 120 else "")
		stub = {k: v for k, v in r.items() if k not in ("content",)}
		stub["content"] = stub_content
		stub["stub"] = True
		out.append(stub)
		continue

		# Per-fact clamp
		clamped, was_truncated = clamp_fact_content(content, max_chars=per_fact_max)
		new_r = dict(r)
		new_r["content"] = clamped
		if was_truncated:
		new_r["truncated"] = True

		cumulative_chars += len(clamped)
		out.append(new_r)

		return out


		# ---------------------------------------------------------------------------
		# F-3: source_content discipline
		# ---------------------------------------------------------------------------

		def apply_source_content_discipline(
		result: dict,
		include_source: bool = False,
		) -> dict:
		"""Apply source_content discipline to a single result dict.

		Default behavior:
		- Trim source_content to ≤ 280 chars
		- Drop entirely if it matches prompt-template patterns

		include_source=True:
		- Returns full source_content (unless it's a template, always dropped)

		Returns a new dict — never mutates input.
		"""
		from superlocalmemory.core.injection import is_prompt_template

		if "source_content" not in result:
		return dict(result)

		src = result.get("source_content") or ""

		# Template firewall: drop regardless of include_source
		if src and is_prompt_template(src):
		new_r = dict(result)
		new_r["source_content"] = ""
		return new_r

		# Empty source: return unchanged
		if not src:
		return dict(result)

		if include_source:
		return dict(result)

		# Default: preview ≤ 280 chars
		new_r = dict(result)
		new_r["source_content"] = src[:280]
		return new_r


		# ---------------------------------------------------------------------------
		# THE shared chokepoint: RecallResponse -> transport dicts (all surfaces)
		# ---------------------------------------------------------------------------

		def serialize_recall_response(
		response: Any,
		*,
		limit: int = 10,
		memory_map: dict[str, str] \| None = None,
		per_fact_max: int = 2400,
		total_max: int = 12000,
		full: bool = False,
		include_source: bool = False,
		) -> tuple[list[dict], bool]:
		"""Convert a RecallResponse into budgeted, source-disciplined dicts.

		This is the ONE function every recall surface calls (daemon HTTP route,
		in-process queue adapter, CLI direct-fallback, WorkerPool). Guarantees
		identical output regardless of surface or mode.

		Args:
		response: A RecallResponse (engine result objects in .results).
		limit: Max results to serialize.
		memory_map: fact.memory_id -> source memory content (optional).
		per_fact_max: Per-fact content char cap (config-driven).
		total_max: Total content char budget before stubs (config-driven).
		full: Bypass clamping/stubs (additive escape hatch).
		include_source: Return full source_content (else ≤280-char preview).

		Returns:
		(results, no_confident_match) — results is a list of dicts; the bool
		is the evidence-floor signal lifted from the response (additive).
		"""
		memory_map = memory_map or {}
		raw: list[dict] = []
		for r in (response.results or [])[:limit]:
		fact = r.fact
		fact_type = getattr(fact, "fact_type", None)
		lifecycle = getattr(fact, "lifecycle", None)
		raw.append({
		"fact_id": fact.fact_id,
		"memory_id": fact.memory_id,
		"content": fact.content or "",
		"source_content": memory_map.get(fact.memory_id, "") or "",
		"score": round(r.score, 4),
		"confidence": round(getattr(r, "confidence", 0.0), 4),
		"trust_score": round(getattr(r, "trust_score", 0.0), 4),
		"channel_scores": {
		k: round(v, 4) for k, v in (getattr(r, "channel_scores", None) or {}).items()
		},
		"fact_type": fact_type.value
		if fact_type is not None and hasattr(fact_type, "value")
		else (getattr(fact, "fact_type", "") or ""),
		"lifecycle": lifecycle.value
		if lifecycle is not None and hasattr(lifecycle, "value")
		else (lifecycle or ""),
		"access_count": getattr(fact, "access_count", 0),
		"created_at": getattr(fact, "created_at", "") or "",
		"evidence_chain": list(getattr(r, "evidence_chain", []) or []),
		})

		# F-3 source discipline, then F-2 budget — order matters (discipline first
		# so the template firewall runs before any preview slicing).
		disciplined = [apply_source_content_discipline(d, include_source=include_source) for d in raw]
		budgeted = apply_recall_budget(
		disciplined, per_fact_max=per_fact_max, total_max=total_max, full=full,
		)
		no_confident_match = bool(getattr(response, "no_confident_match", False))
		return budgeted, no_confident_match

+1

-1

package.json

		{
		"name": "superlocalmemory",
		"version": "3.6.5",
		"version": "3.6.6",
		"description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
		@@ -5,0 +5,0 @@ "keywords": [

+1

-1

pyproject.toml

		[project]
		name = "superlocalmemory"
		version = "3.6.5"
		version = "3.6.6"
		description = "Information-geometric agent memory with mathematical guarantees"
		@@ -5,0 +5,0 @@ readme = "README.md"

+1

-1

src/superlocalmemory.egg-info/PKG-INFO

		Metadata-Version: 2.4
		Name: superlocalmemory
		Version: 3.6.5
		Version: 3.6.6
		Summary: Information-geometric agent memory with mathematical guarantees
		@@ -5,0 +5,0 @@ Author-email: Varun Pratap Bhardwaj <admin@superlocalmemory.com>

+3

-0

src/superlocalmemory.egg-info/SOURCES.txt

		@@ -76,2 +76,3 @@ AUTHORS.md
		src/superlocalmemory/core/backend_orchestrator.py
		src/superlocalmemory/core/block_hygiene.py
		src/superlocalmemory/core/clock_monitor.py
		@@ -96,2 +97,3 @@ src/superlocalmemory/core/config.py
		src/superlocalmemory/core/hooks.py
		src/superlocalmemory/core/ingest_gate.py
		src/superlocalmemory/core/injection.py
		@@ -363,2 +365,3 @@ src/superlocalmemory/core/loop_watchdog.py
		src/superlocalmemory/server/bandit_loops.py
		src/superlocalmemory/server/recall_serializer.py
		src/superlocalmemory/server/security_middleware.py
		@@ -365,0 +368,0 @@ src/superlocalmemory/server/ui.py

+1

-1

src/superlocalmemory/__init__.py

		@@ -31,3 +31,3 @@ """SuperLocalMemory — information-geometric agent memory."""

		__version__ = "3.6.5"
		__version__ = "3.6.6"

		@@ -34,0 +34,0 @@ _REQUIRED_VERSIONS = {

+36

-0

src/superlocalmemory/core/config.py

		@@ -200,3 +200,14 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar

		# v3.6.6: Evidence floor — gate on per-channel scores, not fused/RRF score.
		# Nonsense queries earn 0.0 on every primary channel; real matches earn
		# semantic >= 0.85 or bm25 > 0. The discriminator is earned channel evidence.
		# Env kill-switch: SLM_RECALL_NO_FLOOR=1 disables without release.
		evidence_floor_enabled: bool = True
		min_semantic_evidence: float = 0.60 # Minimum cosine similarity to keep a result

		# v3.6.6: Recall output budget — protect consuming agents from 585KB responses.
		recall_per_fact_max_chars: int = 2400 # ~600 tokens; head 70% + tail 30%
		recall_total_max_chars: int = 12000 # ~3K tokens; stubs beyond this


		# ---------------------------------------------------------------------------
		@@ -238,2 +249,26 @@ # Math Config
		# ---------------------------------------------------------------------------
		# Store Config (v3.6.6)
		# ---------------------------------------------------------------------------

		@dataclass(frozen=True)
		class StoreConfig:
		"""Configuration for the remember/store write path (v3.6.6).

		Ingest gate: protect the DB from oversized facts and prompt-template
		pollution. Defaults ON; env kill-switch SLM_INGEST_NO_GATE=1.
		"""

		# Max chars for the FACT content stored in atomic_facts.content.
		# Content above this is clamped to head 70% + tail 30% + truncation marker.
		# The FULL original is preserved in the memories table row. Set high (24K
		# ≈ 6K tokens) so only pathological pastes are touched; normal dense
		# session-handoff memories (6-15K chars) are stored 100% intact.
		max_verbatim_chars: int = 24000

		# Hard upper bound in bytes. Content above this is rejected outright.
		# Nobody's "memory" is a megabyte (MCP: success=False, HTTP: 413).
		max_ingest_bytes: int = 1_048_576 # 1 MB


		# ---------------------------------------------------------------------------
		# Context Injection (v3.4.65)
		@@ -663,2 +698,3 @@ # ---------------------------------------------------------------------------
		injection: InjectionConfig = field(default_factory=InjectionConfig)
		store: StoreConfig = field(default_factory=StoreConfig)
		# v3.5.0: scaling backends — "sqlite" / "cozo" / "auto" / "lancedb" / "sqlite-vec" / "auto".
		@@ -665,0 +701,0 @@ graph_backend: str = "auto" # "auto" = cozo if pycozo installed, else sqlite

+13

-10

src/superlocalmemory/core/consolidation_engine.py

		@@ -811,18 +811,21 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
		) -> str:
		"""Join fact contents with separators, capped at char_limit."""
		parts = [f.get("content", "") for f in facts if f.get("content")]
		joined = "\n---\n".join(parts)
		return joined[:char_limit] if joined else "No data available."
		"""Compile fact contents into a block with hygiene (v3.6.6 F-5).

		Filters low-quality/template facts, dedupes lines WITHIN the block
		(fixes the "same fixture ×5" core-block bug), caps at char_limit.
		"""
		from superlocalmemory.core.block_hygiene import compile_block_content
		compiled = compile_block_content(facts, max_chars=char_limit)
		return compiled if compiled else "No data available."

		def _rows_to_content(
		self, rows: list \| None, char_limit: int,
		) -> str:
		"""Convert DB rows to content string."""
		"""Convert DB rows to a hygienic block content string (v3.6.6 F-5)."""
		if not rows:
		return "No data available."
		parts = [
		dict(r).get("content", "") for r in rows if dict(r).get("content")
		]
		joined = "\n---\n".join(parts)
		return joined[:char_limit] if joined else "No data available."
		from superlocalmemory.core.block_hygiene import compile_block_content
		facts = [dict(r) for r in rows]
		compiled = compile_block_content(facts, max_chars=char_limit)
		return compiled if compiled else "No data available."

		@@ -829,0 +832,0 @@ def _compile_behavioral_block(

+24

-5

src/superlocalmemory/core/engine.py

		@@ -433,2 +433,21 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
		pass
		# v3.6.6 ingest gate: reject 1MB monsters + prompt-template pollution;
		# clamp the searchable FACT copy (head+tail) while the memories row keeps
		# the FULL original. Embedding/BM25 use the clamped copy so a 167KB paste
		# never produces a garbage vector. Env kill-switch: SLM_INGEST_NO_GATE=1.
		fact_text = content
		try:
		from superlocalmemory.core.ingest_gate import apply_ingest_gate
		_sc = getattr(self._config, "store", None)
		gate = apply_ingest_gate(
		content,
		max_verbatim_chars=getattr(_sc, "max_verbatim_chars", 24000),
		max_ingest_bytes=getattr(_sc, "max_ingest_bytes", 1_048_576),
		)
		if gate.rejected:
		logger.debug("store_fast ingest gate rejected: %s", gate.rejection_reason)
		return []
		fact_text = gate.fact_content
		except ImportError:
		pass # gate module missing → store verbatim (never block a write)
		now = datetime.now(timezone.utc).isoformat()
		@@ -444,4 +463,4 @@ record = MemoryRecord(
		{m.group(1) for m in _re.finditer(
		r"\b([A-Z][a-z]+(?:\s[A-Z][a-z]+){0,3})\b", content)}
		\| {m.group(1) for m in _re.finditer(r"\b([A-Z]{2,})\b", content)}
		r"\b([A-Z][a-z]+(?:\s[A-Z][a-z]+){0,3})\b", fact_text)}
		\| {m.group(1) for m in _re.finditer(r"\b([A-Z]{2,})\b", fact_text)}
		)
		@@ -457,3 +476,3 @@ # v3.5.5: compute the embedding SYNCHRONOUSLY. A single warm embed is
		try:
		emb = self._embedder.embed(content) if self._embedder else None
		emb = self._embedder.embed(fact_text) if self._embedder else None
		if emb:
		@@ -465,3 +484,3 @@ fmean, fvar = self._embedder.compute_fisher_params(emb)
		fact_id=_uuid.uuid4().hex[:16], memory_id=record.memory_id,
		profile_id=self._profile_id, content=content,
		profile_id=self._profile_id, content=fact_text,
		fact_type=FactType.EPISODIC, entities=ents,
		@@ -484,3 +503,3 @@ observation_date=now[:10], confidence=0.7, importance=0.5,
		if bm25:
		bm25.add(fact.fact_id, content, self._profile_id)
		bm25.add(fact.fact_id, fact_text, self._profile_id)
		except Exception:
		@@ -487,0 +506,0 @@ pass

+27

-0

src/superlocalmemory/core/injection.py

		@@ -38,2 +38,29 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar

		# v3.6.6: Prompt-template firewall patterns (F-3, F-4).
		# These patterns identify content that is internal LLM prompt machinery,
		# not genuine user memories. Used at ingest (F-4) and recall serialization (F-3).
		# Stdlib-only (re module) so hooks import chain stays light.
		_PROMPT_TEMPLATE_PATTERNS = (
		re.compile(r"you are summarizing a claude code session", re.IGNORECASE),
		re.compile(r"you are a memory consolidation agent", re.IGNORECASE),
		re.compile(r"apply maximum non-destructive compression", re.IGNORECASE),
		re.compile(r"<task-notification\b", re.IGNORECASE),
		)


		def is_prompt_template(content: str) -> bool:
		"""True if content matches internal prompt-template patterns.

		Used by:
		- F-3: drop source_content that is prompt machinery
		- F-4: reject template content at ingest (extends v3.6.4 quality gate)

		Separate from is_low_quality() so the firewall can be applied without
		the full quality-check heuristics (e.g. at raw ingest before any
		other processing).
		"""
		if not content:
		return False
		return any(pat.search(content) for pat in _PROMPT_TEMPLATE_PATTERNS)

		# Leading category tag like "[active_decisions] " / "[learned_preferences] ".
		@@ -40,0 +67,0 @@ _CATEGORY_TAG_RE = re.compile(r"^\s\[[a-z0-9_]+\]\s", re.IGNORECASE)

+9

-0

src/superlocalmemory/core/maintenance_scheduler.py

		@@ -127,2 +127,11 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar

		# v3.6.6 F-5: Daily core-block recompile with hygiene (dedup + char cap).
		# Ensures blocks stay clean even when purge or new facts arrive between
		# session-init recompiles.
		try:
		from superlocalmemory.core.block_hygiene import _recompile_core_blocks
		_recompile_core_blocks(self._db, self._config, self._profile_id)
		except Exception as exc:
		logger.debug("Core-block recompile skipped: %s", exc)

		self._schedule_next()
		@@ -129,0 +138,0 @@

+9

-0

src/superlocalmemory/core/recall_pipeline.py

		@@ -327,2 +327,4 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
		retrieval_time_ms=response.retrieval_time_ms,
		# v3.6.6: preserve evidence-floor signal across reranking rebuilds.
		no_confident_match=(len(new_results) == 0) and response.no_confident_match,
		)
		@@ -430,2 +432,4 @@
		retrieval_time_ms=response.retrieval_time_ms,
		# v3.6.6: preserve evidence-floor signal across reranking rebuilds.
		no_confident_match=(len(new_results) == 0) and response.no_confident_match,
		)
		@@ -559,2 +563,4 @@ except Exception as exc: # pragma: no cover — defensive
		retrieval_time_ms=response.retrieval_time_ms,
		# v3.6.6: preserve evidence-floor signal across ensemble rebuilds.
		no_confident_match=(len(final_results) == 0) and response.no_confident_match,
		)
		@@ -681,2 +687,5 @@ except Exception as exc: # pragma: no cover — defensive top-level
		retrieval_time_ms=response.retrieval_time_ms,
		# v3.6.6: agentic round-2 may add facts; recompute flag.
		no_confident_match=(len(enhanced_results[:limit]) == 0)
		and response.no_confident_match,
		)
		@@ -683,0 +692,0 @@ except Exception as exc:

+12

-20

src/superlocalmemory/core/recall_worker.py

		@@ -74,22 +74,13 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar

		results = []
		for r in response.results[:limit]:
		fact_type = getattr(r.fact, "fact_type", None)
		lifecycle = getattr(r.fact, "lifecycle", None)
		results.append({
		"fact_id": r.fact.fact_id,
		"memory_id": r.fact.memory_id,
		"content": r.fact.content[:300],
		"source_content": memory_map.get(r.fact.memory_id, ""),
		"score": round(r.score, 4),
		"confidence": round(r.confidence, 4),
		"trust_score": round(r.trust_score, 4),
		"channel_scores": {
		k: round(v, 4) for k, v in (r.channel_scores or {}).items()
		},
		"fact_type": fact_type.value if fact_type and hasattr(fact_type, "value") else "",
		"lifecycle": lifecycle.value if lifecycle and hasattr(lifecycle, "value") else "",
		"access_count": getattr(r.fact, "access_count", 0),
		"evidence_chain": list(getattr(r, "evidence_chain", []) or []),
		})
		# v3.6.6: same shared chokepoint as the daemon HTTP route + CLI fallback,
		# so the MCP WorkerPool subprocess path returns identical budgeted output.
		from superlocalmemory.server.recall_serializer import serialize_recall_response
		_rc = getattr(engine._config, "retrieval", None)
		results, no_confident_match = serialize_recall_response(
		response,
		limit=limit,
		memory_map=memory_map,
		per_fact_max=getattr(_rc, "recall_per_fact_max_chars", 2400),
		total_max=getattr(_rc, "recall_total_max_chars", 12000),
		)
		return {
		@@ -106,2 +97,3 @@ "ok": True,
		"results": results,
		"no_confident_match": no_confident_match,
		}
		@@ -108,0 +100,0 @@

+2

-0

src/superlocalmemory/mcp/tools_core.py

		@@ -239,2 +239,4 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
		"retrieval_time_ms": result.get("retrieval_time_ms", 0),
		# v3.6.6: surface evidence-floor signal to MCP clients.
		"no_confident_match": result.get("no_confident_match", False),
		}
		@@ -241,0 +243,0 @@ return {"success": False, "error": result.get("error", "Recall failed")}

+58

-0

src/superlocalmemory/retrieval/engine.py

		@@ -298,5 +298,26 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar

		# v3.6.6: Evidence floor — gate on per-channel scores (NOT fused/RRF score).
		# Nonsense queries fuse at 0.75-0.78 because RRF is rank-derived and
		# uncalibrated. The discriminator is EARNED CHANNEL EVIDENCE:
		# semantic >= min_semantic_evidence (0.60) OR bm25 > 0
		# OR entity_graph > 0 OR temporal > 0 OR fact is pinned.
		# spreading_activation and hopfield do NOT count — they are associative
		# amplifiers that fabricated the nonsense results in calibration tests.
		# Kill-switch: SLM_RECALL_NO_FLOOR=1 bypasses the floor.
		import os as _os_floor
		floor_enabled = (
		getattr(self._config, "evidence_floor_enabled", True)
		and _os_floor.environ.get("SLM_RECALL_NO_FLOOR", "0") != "1"
		)
		if floor_enabled:
		min_sem = getattr(self._config, "min_semantic_evidence", 0.60)
		final_top = self._apply_evidence_floor(final_top, facts, min_sem)
		# Trim facts dict to match filtered final_top
		filtered_ids = {fr.fact_id for fr in final_top}
		facts = {fid: f for fid, f in facts.items() if fid in filtered_ids}

		# 6. Build response
		results = self._build_results(final_top, facts, strat)
		ms = (time.monotonic() - t0) * 1000.0
		no_match = floor_enabled and len(results) == 0
		return RecallResponse(
		@@ -306,4 +327,41 @@ query=query, mode=mode, results=results,
		total_candidates=total, retrieval_time_ms=ms,
		no_confident_match=no_match,
		)

		# -- Evidence floor (v3.6.6) -------------------------------------------

		@staticmethod
		def _apply_evidence_floor(
		final_top: list[FusionResult],
		facts: dict[str, AtomicFact],
		min_semantic: float,
		) -> list[FusionResult]:
		"""Filter results that earned no channel evidence.

		Keep a result only if it earned:
		- semantic cosine >= min_semantic (default 0.60), OR
		- bm25 > 0, OR entity_graph > 0, OR temporal > 0, OR
		- the underlying fact is pinned.

		spreading_activation and hopfield do NOT count as primary evidence.
		Empty result after filtering is a success (no_confident_match=True).
		"""
		kept: list[FusionResult] = []
		for fr in final_top:
		cs = fr.channel_scores or {}
		# Primary channel evidence check
		if (
		cs.get("semantic", 0.0) >= min_semantic
		or cs.get("bm25", 0.0) > 0.0
		or cs.get("entity_graph", 0.0) > 0.0
		or cs.get("temporal", 0.0) > 0.0
		):
		kept.append(fr)
		continue
		# Pinned fact bypass — always pass regardless of channel scores
		fact = facts.get(fr.fact_id)
		if fact is not None and getattr(fact, "pinned", False):
		kept.append(fr)
		return kept

		# -- Cross-channel intersection boost -----------------------------------
		@@ -310,0 +368,0 @@

+3

-0

src/superlocalmemory/storage/models.py

		@@ -414,1 +414,4 @@ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
		retrieval_time_ms: float = 0.0
		# v3.6.6: Evidence floor. True when floor gates out ALL results.
		# Additive field — backward compatible (defaults to False).
		no_confident_match: bool = False

CHANGELOG.md

Sorry, the diff of this file is too big to display

src/superlocalmemory/cli/commands.py

Sorry, the diff of this file is too big to display

src/superlocalmemory/server/unified_daemon.py

Sorry, the diff of this file is too big to display

superlocalmemory - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics