@bulkhead-ai/core
Advanced tools
| /** Confidence level for a detection */ | ||
| type Confidence = "high" | "medium" | "low"; | ||
| /** Which cascade layer produced this detection */ | ||
| type DetectionSource = "regex" | "bert" | "llm"; | ||
| /** Whether this detection is final or needs escalation */ | ||
| type Disposition = "confirmed" | "escalate" | "dismissed" | "informational"; | ||
| /** A detected entity in text */ | ||
| interface Detection { | ||
| /** Entity type (e.g., "CREDIT_CARD", "US_SSN", "AWS_KEY") */ | ||
| entityType: string; | ||
| /** Start offset in the input text */ | ||
| start: number; | ||
| /** End offset in the input text */ | ||
| end: number; | ||
| /** The matched text */ | ||
| text: string; | ||
| /** Detection confidence */ | ||
| confidence: Confidence; | ||
| /** Numeric score 0-1 */ | ||
| score: number; | ||
| /** Which guard produced this detection */ | ||
| guardName: string; | ||
| /** Which cascade layer produced this detection */ | ||
| source: DetectionSource; | ||
| /** Surrounding text window for context */ | ||
| context: string; | ||
| /** Whether this detection is final or needs escalation */ | ||
| disposition: Disposition; | ||
| } | ||
| /** Result from a single guard's analysis */ | ||
| interface GuardResult { | ||
| /** Whether the text passed this guard (no issues found) */ | ||
| passed: boolean; | ||
| /** Human-readable reason for the result */ | ||
| reason: string; | ||
| /** Name of the guard that produced this result */ | ||
| guardName: string; | ||
| /** Overall score 0-1 (0 = safe, 1 = maximum threat) */ | ||
| score: number; | ||
| /** Individual detections found */ | ||
| detections: Detection[]; | ||
| /** Modified text with redactions applied (if applicable) */ | ||
| redactedText?: string; | ||
| /** | ||
| * Per-detection record of original → replacement, in document order. | ||
| * Populated when the guard ran in mode `redact` or `synthesize`. Each | ||
| * entry includes the entity type and whether the replacement came | ||
| * from a synthesizer or the placeholder fallback. | ||
| */ | ||
| redactionMap?: RedactionEntry[]; | ||
| } | ||
| /** | ||
| * Guard mode: | ||
| * - `block`: input is rejected (passed: false, no transformation). | ||
| * - `redact`: detected entities are replaced with `[REDACTED-TYPE]`. | ||
| * - `synthesize`: detected entities are replaced with realistic synthetic | ||
| * values from the engine's SynthesizerRegistry. Within one | ||
| * scan call, the same original value maps to the same | ||
| * synthetic value (consistency map). | ||
| * | ||
| * Stricter wins in policy merge: block > synthesize > redact. | ||
| */ | ||
| type GuardMode = "block" | "redact" | "synthesize"; | ||
| /** | ||
| * One row in a guard's redaction map. Records the original detected text, | ||
| * the replacement value emitted, the entity type, and which strategy | ||
| * produced the replacement. | ||
| */ | ||
| interface RedactionEntry { | ||
| /** The original (detected) value as it appeared in the input. */ | ||
| original: string; | ||
| /** The value substituted into the redacted output. */ | ||
| replacement: string; | ||
| /** The entity type from the underlying detection. */ | ||
| entityType: string; | ||
| /** | ||
| * How the replacement was produced: | ||
| * - `placeholder`: the standard `[REDACTED-TYPE]` form. | ||
| * - `synthesizer`: a synthesizer (default or user-registered) emitted | ||
| * a realistic value. | ||
| */ | ||
| via: "placeholder" | "synthesizer"; | ||
| } | ||
| /** | ||
| * Per-call context passed to a Synthesizer. | ||
| * | ||
| * The `consistencyMap` is shared across every detection in one scan call | ||
| * (and across guards within that call). Synthesizers read from it before | ||
| * generating new values so the same original always produces the same | ||
| * synthetic replacement within one document. | ||
| */ | ||
| interface SynthesizerContext { | ||
| /** The detection that produced this call. */ | ||
| detection: Detection; | ||
| /** | ||
| * Within-call original→replacement map. Synthesizers should consult | ||
| * this before generating new output. The engine writes back to it | ||
| * automatically after the synthesizer returns. | ||
| */ | ||
| consistencyMap: Map<string, string>; | ||
| } | ||
| /** | ||
| * Produce a synthetic replacement for one detected entity. | ||
| * | ||
| * Pure function. Must NOT call back into the engine. Sync by default; | ||
| * async is allowed for synthesizers that consult external registries. | ||
| */ | ||
| type Synthesizer = (original: string, ctx: SynthesizerContext) => string | Promise<string>; | ||
| /** Configuration for a guard */ | ||
| interface GuardConfig { | ||
| /** Whether this guard is enabled */ | ||
| enabled: boolean; | ||
| /** Detection threshold 0-1 (detections below this score are ignored) */ | ||
| threshold: number; | ||
| /** What to do when a detection occurs */ | ||
| mode: GuardMode; | ||
| } | ||
| /** | ||
| * Per-call context for redaction / synthesis. The engine constructs this | ||
| * once per `scan()` call so all guards share a synthesizer registry and | ||
| * consistency map. | ||
| * | ||
| * Type is opaque here to avoid a circular import; the implementation | ||
| * lives in `synthesizers/registry.ts`. | ||
| */ | ||
| interface RedactContext { | ||
| /** Engine-supplied SynthesizerRegistry. */ | ||
| registry?: { | ||
| get(entityType: string): Synthesizer | undefined; | ||
| has(entityType: string): boolean; | ||
| }; | ||
| /** Within-call original→replacement map shared across guards. */ | ||
| consistencyMap?: Map<string, string>; | ||
| } | ||
| /** A guard analyzes text and returns results */ | ||
| interface Guard { | ||
| /** Unique name for this guard */ | ||
| readonly name: string; | ||
| /** | ||
| * Analyze text and return results. | ||
| * | ||
| * The optional `redactCtx` parameter is supplied by the engine when | ||
| * one or more guards may run in `mode: "synthesize"`. It carries the | ||
| * shared SynthesizerRegistry and the per-call consistency map so that | ||
| * detections of the same value across multiple guards collapse to the | ||
| * same synthetic replacement. Existing callers that pass only | ||
| * (text, config) continue to work — synthesize mode without a | ||
| * registry falls back to placeholder replacement. | ||
| */ | ||
| analyze(text: string, config?: Partial<GuardConfig>, redactCtx?: RedactContext): Promise<GuardResult>; | ||
| } | ||
| /** Configuration for the guardrails engine */ | ||
| interface EngineConfig { | ||
| /** Guard-specific configuration overrides */ | ||
| guards: Record<string, Partial<GuardConfig>>; | ||
| } | ||
| /** | ||
| * Result of `engine.scanObject()`. Mirrors `engine.scan()` but adds a | ||
| * `redactedObject` field that preserves the input shape and a | ||
| * `pathDetections` map keyed by JSON path so consumers can locate | ||
| * detections within nested structures without re-walking. | ||
| */ | ||
| interface ObjectScanResult<T> { | ||
| /** True iff every string leaf passed every enabled guard. */ | ||
| passed: boolean; | ||
| /** Aggregate per-guard results, with detections accumulated across all leaves. */ | ||
| results: GuardResult[]; | ||
| /** | ||
| * Input with all string leaves replaced by their guard-redacted form | ||
| * where applicable. Non-string leaves (numbers, booleans, null, | ||
| * undefined, Date, RegExp, …) pass through unchanged. Object key | ||
| * order is preserved; array length is preserved. | ||
| */ | ||
| redactedObject: T; | ||
| /** | ||
| * JSON-style path → detections found at that leaf. Paths use dot | ||
| * notation for object keys and `[i]` for array indices, matching the | ||
| * conventional JS-debugger style. The root path is the empty string. | ||
| * | ||
| * Example keys: | ||
| * "" — root (only if root itself is a string) | ||
| * "title" — top-level field | ||
| * "history[0].sapPath" — array element field | ||
| * | ||
| * Only paths where ≥1 detection occurred appear in this map. | ||
| */ | ||
| pathDetections: Record<string, Detection[]>; | ||
| } | ||
| /** A PII pattern definition */ | ||
| interface PiiPattern { | ||
| /** Entity type name (e.g., "CREDIT_CARD") */ | ||
| entityType: string; | ||
| /** Regex patterns to match */ | ||
| patterns: RegExp[]; | ||
| /** Optional validation function (e.g., Luhn check) */ | ||
| validate?: (match: string) => boolean; | ||
| /** Context words that boost confidence when found nearby */ | ||
| contextWords?: string[]; | ||
| /** Base confidence without context boost */ | ||
| baseConfidence: Confidence; | ||
| /** Base score without context boost */ | ||
| baseScore: number; | ||
| } | ||
| /** A secret pattern definition */ | ||
| interface SecretPattern { | ||
| /** Secret type name (e.g., "AWS_ACCESS_KEY") */ | ||
| secretType: string; | ||
| /** Regex patterns to match */ | ||
| patterns: RegExp[]; | ||
| /** Optional validation function */ | ||
| validate?: (match: string) => boolean; | ||
| /** Minimum entropy threshold (if applicable) */ | ||
| minEntropy?: number; | ||
| /** Context words that boost confidence when found nearby (e.g., ["heroku", "api_key"]) */ | ||
| contextWords?: string[]; | ||
| /** Base score without context boost (0-1). Defaults to 0.9 if unset. */ | ||
| baseScore?: number; | ||
| /** Base confidence without context boost. Defaults to "high" if unset. */ | ||
| baseConfidence?: Confidence; | ||
| } | ||
| /** Tactic names for detection strategies */ | ||
| type TacticName = "pattern" | "heuristic" | "llm"; | ||
| /** Result from a tactic execution */ | ||
| interface TacticResult { | ||
| /** Score 0-1 */ | ||
| score: number; | ||
| /** Additional context about the detection */ | ||
| details?: Record<string, unknown>; | ||
| } | ||
| /** A detection tactic */ | ||
| interface Tactic { | ||
| readonly name: TacticName; | ||
| readonly defaultThreshold: number; | ||
| execute(input: string): Promise<TacticResult>; | ||
| } | ||
| /** | ||
| * LLM disambiguation layer (Layer 3) of the cascading classifier. | ||
| * Only receives ambiguous spans from Layer 2, along with surrounding context. | ||
| * Makes a focused determination: is this span PII or not? | ||
| */ | ||
| /** Function signature for an LLM provider */ | ||
| type LlmProvider = (prompt: string) => Promise<string>; | ||
| interface LlmLayerConfig { | ||
| /** Number of sentences before/after the span to include as context */ | ||
| contextSentences: number; | ||
| /** LLM provider function */ | ||
| provider?: LlmProvider; | ||
| } | ||
| declare class LlmLayer { | ||
| private config; | ||
| constructor(config?: Partial<LlmLayerConfig>); | ||
| /** Set the LLM provider (can be swapped at runtime) */ | ||
| setProvider(provider: LlmProvider): void; | ||
| /** | ||
| * Disambiguate escalated detections using an LLM. | ||
| * @param escalated Detections with disposition "escalate" | ||
| * @param fullText The full document text | ||
| * @param confirmed Already-confirmed detections (passed as context to help disambiguation) | ||
| */ | ||
| disambiguate(escalated: Detection[], fullText: string, confirmed: Detection[]): Promise<Detection[]>; | ||
| /** Build a focused disambiguation prompt */ | ||
| private buildPrompt; | ||
| /** Extract ±N sentences around a span */ | ||
| private extractSentenceContext; | ||
| /** Parse the LLM response JSON */ | ||
| private parseResponse; | ||
| } | ||
| /** | ||
| * Cascading Classifier — orchestrates the three detection layers. | ||
| * | ||
| * Layer 1 (Regex): Always runs, sub-ms. Catches structured PII. | ||
| * → confidence: 1.0, disposition: "confirmed" | ||
| * | ||
| * Layer 2 (BERT): On-demand, 20-50ms. Catches contextual entities. | ||
| * → score >= threshold: "confirmed" | ||
| * → score < threshold: "escalate" | ||
| * | ||
| * Layer 3 (LLM): Selective, 500ms-2s. Only sees escalated spans. | ||
| * → Returns "confirmed" or "dismissed" | ||
| */ | ||
| interface CascadeConfig { | ||
| /** Confidence threshold below which BERT results escalate to LLM */ | ||
| escalationThreshold: number; | ||
| /** Number of sentences of context to pass to Layer 3 */ | ||
| contextSentences: number; | ||
| /** Whether Layer 2 (BERT) is enabled */ | ||
| bertEnabled: boolean; | ||
| /** Whether Layer 3 (LLM) is enabled */ | ||
| llmEnabled: boolean; | ||
| /** Model ID for BERT layer */ | ||
| modelId?: string; | ||
| /** LLM provider function for Layer 3 */ | ||
| llmProvider?: LlmProvider; | ||
| } | ||
| declare class CascadeClassifier { | ||
| private config; | ||
| private bertLayer; | ||
| private llmLayer; | ||
| private regexGuards; | ||
| constructor(config?: Partial<CascadeConfig>); | ||
| /** Whether the cascade is ready to serve (BERT model loaded if enabled) */ | ||
| get ready(): boolean; | ||
| /** Register regex-based guards (Layer 1) */ | ||
| addRegexGuard(guard: Guard): this; | ||
| /** Set the LLM provider for Layer 3 */ | ||
| setLlmProvider(provider: LlmProvider): void; | ||
| /** | ||
| * Run the full cascade: Regex → BERT → LLM | ||
| * Returns a unified GuardResult with all detections carrying provenance. | ||
| */ | ||
| deepScan(text: string): Promise<GuardResult>; | ||
| /** Run Layer 1 only (for fast auto-scan path) */ | ||
| regexScan(text: string): Promise<GuardResult>; | ||
| /** Run Layers 1 + 2 only (no LLM, for "Scan File" command) */ | ||
| modelScan(text: string): Promise<GuardResult>; | ||
| private runRegexLayer; | ||
| private runBertLayer; | ||
| /** Remove BERT detections that overlap with regex detections */ | ||
| private deduplicateAgainstRegex; | ||
| private buildCascadeResult; | ||
| /** Clean up resources */ | ||
| dispose(): Promise<void>; | ||
| } | ||
| /** | ||
| * Main-thread interface to the BERT worker (Layer 2). | ||
| * Manages the worker lifecycle and maps BERT tokens to Detection objects. | ||
| */ | ||
| interface BertLayerConfig { | ||
| modelId?: string; | ||
| /** Threshold above which detections are confirmed, below which they escalate */ | ||
| escalationThreshold: number; | ||
| } | ||
| declare class BertLayer { | ||
| private worker; | ||
| private pendingRequests; | ||
| private requestId; | ||
| private config; | ||
| /** Whether the BERT model has been loaded and first inference completed */ | ||
| private _loaded; | ||
| get loaded(): boolean; | ||
| constructor(config?: Partial<BertLayerConfig>); | ||
| /** Resolve the worker path — supports both compiled .js and source .ts */ | ||
| private resolveWorkerPath; | ||
| /** Ensure the worker thread is running */ | ||
| private ensureWorker; | ||
| /** Send text to the BERT worker and get raw token results */ | ||
| private analyzeRaw; | ||
| /** | ||
| * Analyze text and return Detection objects with escalation disposition. | ||
| * Tokens above the escalation threshold are "confirmed", | ||
| * tokens below are "escalate" (need LLM review). | ||
| */ | ||
| analyze(text: string): Promise<Detection[]>; | ||
| /** Terminate the worker thread */ | ||
| dispose(): Promise<void>; | ||
| } | ||
| export { type BertLayerConfig as B, type CascadeConfig as C, type Detection as D, type EngineConfig as E, type GuardMode as G, type LlmLayerConfig as L, type ObjectScanResult as O, type PiiPattern as P, type RedactContext as R, type Synthesizer as S, type Tactic as T, type Guard as a, type GuardResult as b, type RedactionEntry as c, CascadeClassifier as d, type GuardConfig as e, type DetectionSource as f, type Disposition as g, type Confidence as h, type LlmProvider as i, type SecretPattern as j, type SynthesizerContext as k, type TacticName as l, type TacticResult as m, BertLayer as n, LlmLayer as o }; |
| /** Confidence level for a detection */ | ||
| type Confidence = "high" | "medium" | "low"; | ||
| /** Which cascade layer produced this detection */ | ||
| type DetectionSource = "regex" | "bert" | "llm"; | ||
| /** Whether this detection is final or needs escalation */ | ||
| type Disposition = "confirmed" | "escalate" | "dismissed" | "informational"; | ||
| /** A detected entity in text */ | ||
| interface Detection { | ||
| /** Entity type (e.g., "CREDIT_CARD", "US_SSN", "AWS_KEY") */ | ||
| entityType: string; | ||
| /** Start offset in the input text */ | ||
| start: number; | ||
| /** End offset in the input text */ | ||
| end: number; | ||
| /** The matched text */ | ||
| text: string; | ||
| /** Detection confidence */ | ||
| confidence: Confidence; | ||
| /** Numeric score 0-1 */ | ||
| score: number; | ||
| /** Which guard produced this detection */ | ||
| guardName: string; | ||
| /** Which cascade layer produced this detection */ | ||
| source: DetectionSource; | ||
| /** Surrounding text window for context */ | ||
| context: string; | ||
| /** Whether this detection is final or needs escalation */ | ||
| disposition: Disposition; | ||
| } | ||
| /** Result from a single guard's analysis */ | ||
| interface GuardResult { | ||
| /** Whether the text passed this guard (no issues found) */ | ||
| passed: boolean; | ||
| /** Human-readable reason for the result */ | ||
| reason: string; | ||
| /** Name of the guard that produced this result */ | ||
| guardName: string; | ||
| /** Overall score 0-1 (0 = safe, 1 = maximum threat) */ | ||
| score: number; | ||
| /** Individual detections found */ | ||
| detections: Detection[]; | ||
| /** Modified text with redactions applied (if applicable) */ | ||
| redactedText?: string; | ||
| /** | ||
| * Per-detection record of original → replacement, in document order. | ||
| * Populated when the guard ran in mode `redact` or `synthesize`. Each | ||
| * entry includes the entity type and whether the replacement came | ||
| * from a synthesizer or the placeholder fallback. | ||
| */ | ||
| redactionMap?: RedactionEntry[]; | ||
| } | ||
| /** | ||
| * Guard mode: | ||
| * - `block`: input is rejected (passed: false, no transformation). | ||
| * - `redact`: detected entities are replaced with `[REDACTED-TYPE]`. | ||
| * - `synthesize`: detected entities are replaced with realistic synthetic | ||
| * values from the engine's SynthesizerRegistry. Within one | ||
| * scan call, the same original value maps to the same | ||
| * synthetic value (consistency map). | ||
| * | ||
| * Stricter wins in policy merge: block > synthesize > redact. | ||
| */ | ||
| type GuardMode = "block" | "redact" | "synthesize"; | ||
| /** | ||
| * One row in a guard's redaction map. Records the original detected text, | ||
| * the replacement value emitted, the entity type, and which strategy | ||
| * produced the replacement. | ||
| */ | ||
| interface RedactionEntry { | ||
| /** The original (detected) value as it appeared in the input. */ | ||
| original: string; | ||
| /** The value substituted into the redacted output. */ | ||
| replacement: string; | ||
| /** The entity type from the underlying detection. */ | ||
| entityType: string; | ||
| /** | ||
| * How the replacement was produced: | ||
| * - `placeholder`: the standard `[REDACTED-TYPE]` form. | ||
| * - `synthesizer`: a synthesizer (default or user-registered) emitted | ||
| * a realistic value. | ||
| */ | ||
| via: "placeholder" | "synthesizer"; | ||
| } | ||
| /** | ||
| * Per-call context passed to a Synthesizer. | ||
| * | ||
| * The `consistencyMap` is shared across every detection in one scan call | ||
| * (and across guards within that call). Synthesizers read from it before | ||
| * generating new values so the same original always produces the same | ||
| * synthetic replacement within one document. | ||
| */ | ||
| interface SynthesizerContext { | ||
| /** The detection that produced this call. */ | ||
| detection: Detection; | ||
| /** | ||
| * Within-call original→replacement map. Synthesizers should consult | ||
| * this before generating new output. The engine writes back to it | ||
| * automatically after the synthesizer returns. | ||
| */ | ||
| consistencyMap: Map<string, string>; | ||
| } | ||
| /** | ||
| * Produce a synthetic replacement for one detected entity. | ||
| * | ||
| * Pure function. Must NOT call back into the engine. Sync by default; | ||
| * async is allowed for synthesizers that consult external registries. | ||
| */ | ||
| type Synthesizer = (original: string, ctx: SynthesizerContext) => string | Promise<string>; | ||
| /** Configuration for a guard */ | ||
| interface GuardConfig { | ||
| /** Whether this guard is enabled */ | ||
| enabled: boolean; | ||
| /** Detection threshold 0-1 (detections below this score are ignored) */ | ||
| threshold: number; | ||
| /** What to do when a detection occurs */ | ||
| mode: GuardMode; | ||
| } | ||
| /** | ||
| * Per-call context for redaction / synthesis. The engine constructs this | ||
| * once per `scan()` call so all guards share a synthesizer registry and | ||
| * consistency map. | ||
| * | ||
| * Type is opaque here to avoid a circular import; the implementation | ||
| * lives in `synthesizers/registry.ts`. | ||
| */ | ||
| interface RedactContext { | ||
| /** Engine-supplied SynthesizerRegistry. */ | ||
| registry?: { | ||
| get(entityType: string): Synthesizer | undefined; | ||
| has(entityType: string): boolean; | ||
| }; | ||
| /** Within-call original→replacement map shared across guards. */ | ||
| consistencyMap?: Map<string, string>; | ||
| } | ||
| /** A guard analyzes text and returns results */ | ||
| interface Guard { | ||
| /** Unique name for this guard */ | ||
| readonly name: string; | ||
| /** | ||
| * Analyze text and return results. | ||
| * | ||
| * The optional `redactCtx` parameter is supplied by the engine when | ||
| * one or more guards may run in `mode: "synthesize"`. It carries the | ||
| * shared SynthesizerRegistry and the per-call consistency map so that | ||
| * detections of the same value across multiple guards collapse to the | ||
| * same synthetic replacement. Existing callers that pass only | ||
| * (text, config) continue to work — synthesize mode without a | ||
| * registry falls back to placeholder replacement. | ||
| */ | ||
| analyze(text: string, config?: Partial<GuardConfig>, redactCtx?: RedactContext): Promise<GuardResult>; | ||
| } | ||
| /** Configuration for the guardrails engine */ | ||
| interface EngineConfig { | ||
| /** Guard-specific configuration overrides */ | ||
| guards: Record<string, Partial<GuardConfig>>; | ||
| } | ||
| /** | ||
| * Result of `engine.scanObject()`. Mirrors `engine.scan()` but adds a | ||
| * `redactedObject` field that preserves the input shape and a | ||
| * `pathDetections` map keyed by JSON path so consumers can locate | ||
| * detections within nested structures without re-walking. | ||
| */ | ||
| interface ObjectScanResult<T> { | ||
| /** True iff every string leaf passed every enabled guard. */ | ||
| passed: boolean; | ||
| /** Aggregate per-guard results, with detections accumulated across all leaves. */ | ||
| results: GuardResult[]; | ||
| /** | ||
| * Input with all string leaves replaced by their guard-redacted form | ||
| * where applicable. Non-string leaves (numbers, booleans, null, | ||
| * undefined, Date, RegExp, …) pass through unchanged. Object key | ||
| * order is preserved; array length is preserved. | ||
| */ | ||
| redactedObject: T; | ||
| /** | ||
| * JSON-style path → detections found at that leaf. Paths use dot | ||
| * notation for object keys and `[i]` for array indices, matching the | ||
| * conventional JS-debugger style. The root path is the empty string. | ||
| * | ||
| * Example keys: | ||
| * "" — root (only if root itself is a string) | ||
| * "title" — top-level field | ||
| * "history[0].sapPath" — array element field | ||
| * | ||
| * Only paths where ≥1 detection occurred appear in this map. | ||
| */ | ||
| pathDetections: Record<string, Detection[]>; | ||
| } | ||
| /** A PII pattern definition */ | ||
| interface PiiPattern { | ||
| /** Entity type name (e.g., "CREDIT_CARD") */ | ||
| entityType: string; | ||
| /** Regex patterns to match */ | ||
| patterns: RegExp[]; | ||
| /** Optional validation function (e.g., Luhn check) */ | ||
| validate?: (match: string) => boolean; | ||
| /** Context words that boost confidence when found nearby */ | ||
| contextWords?: string[]; | ||
| /** Base confidence without context boost */ | ||
| baseConfidence: Confidence; | ||
| /** Base score without context boost */ | ||
| baseScore: number; | ||
| } | ||
| /** A secret pattern definition */ | ||
| interface SecretPattern { | ||
| /** Secret type name (e.g., "AWS_ACCESS_KEY") */ | ||
| secretType: string; | ||
| /** Regex patterns to match */ | ||
| patterns: RegExp[]; | ||
| /** Optional validation function */ | ||
| validate?: (match: string) => boolean; | ||
| /** Minimum entropy threshold (if applicable) */ | ||
| minEntropy?: number; | ||
| /** Context words that boost confidence when found nearby (e.g., ["heroku", "api_key"]) */ | ||
| contextWords?: string[]; | ||
| /** Base score without context boost (0-1). Defaults to 0.9 if unset. */ | ||
| baseScore?: number; | ||
| /** Base confidence without context boost. Defaults to "high" if unset. */ | ||
| baseConfidence?: Confidence; | ||
| } | ||
| /** Tactic names for detection strategies */ | ||
| type TacticName = "pattern" | "heuristic" | "llm"; | ||
| /** Result from a tactic execution */ | ||
| interface TacticResult { | ||
| /** Score 0-1 */ | ||
| score: number; | ||
| /** Additional context about the detection */ | ||
| details?: Record<string, unknown>; | ||
| } | ||
| /** A detection tactic */ | ||
| interface Tactic { | ||
| readonly name: TacticName; | ||
| readonly defaultThreshold: number; | ||
| execute(input: string): Promise<TacticResult>; | ||
| } | ||
| /** | ||
| * LLM disambiguation layer (Layer 3) of the cascading classifier. | ||
| * Only receives ambiguous spans from Layer 2, along with surrounding context. | ||
| * Makes a focused determination: is this span PII or not? | ||
| */ | ||
| /** Function signature for an LLM provider */ | ||
| type LlmProvider = (prompt: string) => Promise<string>; | ||
| interface LlmLayerConfig { | ||
| /** Number of sentences before/after the span to include as context */ | ||
| contextSentences: number; | ||
| /** LLM provider function */ | ||
| provider?: LlmProvider; | ||
| } | ||
| declare class LlmLayer { | ||
| private config; | ||
| constructor(config?: Partial<LlmLayerConfig>); | ||
| /** Set the LLM provider (can be swapped at runtime) */ | ||
| setProvider(provider: LlmProvider): void; | ||
| /** | ||
| * Disambiguate escalated detections using an LLM. | ||
| * @param escalated Detections with disposition "escalate" | ||
| * @param fullText The full document text | ||
| * @param confirmed Already-confirmed detections (passed as context to help disambiguation) | ||
| */ | ||
| disambiguate(escalated: Detection[], fullText: string, confirmed: Detection[]): Promise<Detection[]>; | ||
| /** Build a focused disambiguation prompt */ | ||
| private buildPrompt; | ||
| /** Extract ±N sentences around a span */ | ||
| private extractSentenceContext; | ||
| /** Parse the LLM response JSON */ | ||
| private parseResponse; | ||
| } | ||
| /** | ||
| * Cascading Classifier — orchestrates the three detection layers. | ||
| * | ||
| * Layer 1 (Regex): Always runs, sub-ms. Catches structured PII. | ||
| * → confidence: 1.0, disposition: "confirmed" | ||
| * | ||
| * Layer 2 (BERT): On-demand, 20-50ms. Catches contextual entities. | ||
| * → score >= threshold: "confirmed" | ||
| * → score < threshold: "escalate" | ||
| * | ||
| * Layer 3 (LLM): Selective, 500ms-2s. Only sees escalated spans. | ||
| * → Returns "confirmed" or "dismissed" | ||
| */ | ||
| interface CascadeConfig { | ||
| /** Confidence threshold below which BERT results escalate to LLM */ | ||
| escalationThreshold: number; | ||
| /** Number of sentences of context to pass to Layer 3 */ | ||
| contextSentences: number; | ||
| /** Whether Layer 2 (BERT) is enabled */ | ||
| bertEnabled: boolean; | ||
| /** Whether Layer 3 (LLM) is enabled */ | ||
| llmEnabled: boolean; | ||
| /** Model ID for BERT layer */ | ||
| modelId?: string; | ||
| /** LLM provider function for Layer 3 */ | ||
| llmProvider?: LlmProvider; | ||
| } | ||
| declare class CascadeClassifier { | ||
| private config; | ||
| private bertLayer; | ||
| private llmLayer; | ||
| private regexGuards; | ||
| constructor(config?: Partial<CascadeConfig>); | ||
| /** Whether the cascade is ready to serve (BERT model loaded if enabled) */ | ||
| get ready(): boolean; | ||
| /** Register regex-based guards (Layer 1) */ | ||
| addRegexGuard(guard: Guard): this; | ||
| /** Set the LLM provider for Layer 3 */ | ||
| setLlmProvider(provider: LlmProvider): void; | ||
| /** | ||
| * Run the full cascade: Regex → BERT → LLM | ||
| * Returns a unified GuardResult with all detections carrying provenance. | ||
| */ | ||
| deepScan(text: string): Promise<GuardResult>; | ||
| /** Run Layer 1 only (for fast auto-scan path) */ | ||
| regexScan(text: string): Promise<GuardResult>; | ||
| /** Run Layers 1 + 2 only (no LLM, for "Scan File" command) */ | ||
| modelScan(text: string): Promise<GuardResult>; | ||
| private runRegexLayer; | ||
| private runBertLayer; | ||
| /** Remove BERT detections that overlap with regex detections */ | ||
| private deduplicateAgainstRegex; | ||
| private buildCascadeResult; | ||
| /** Clean up resources */ | ||
| dispose(): Promise<void>; | ||
| } | ||
| /** | ||
| * Main-thread interface to the BERT worker (Layer 2). | ||
| * Manages the worker lifecycle and maps BERT tokens to Detection objects. | ||
| */ | ||
| interface BertLayerConfig { | ||
| modelId?: string; | ||
| /** Threshold above which detections are confirmed, below which they escalate */ | ||
| escalationThreshold: number; | ||
| } | ||
| declare class BertLayer { | ||
| private worker; | ||
| private pendingRequests; | ||
| private requestId; | ||
| private config; | ||
| /** Whether the BERT model has been loaded and first inference completed */ | ||
| private _loaded; | ||
| get loaded(): boolean; | ||
| constructor(config?: Partial<BertLayerConfig>); | ||
| /** Resolve the worker path — supports both compiled .js and source .ts */ | ||
| private resolveWorkerPath; | ||
| /** Ensure the worker thread is running */ | ||
| private ensureWorker; | ||
| /** Send text to the BERT worker and get raw token results */ | ||
| private analyzeRaw; | ||
| /** | ||
| * Analyze text and return Detection objects with escalation disposition. | ||
| * Tokens above the escalation threshold are "confirmed", | ||
| * tokens below are "escalate" (need LLM review). | ||
| */ | ||
| analyze(text: string): Promise<Detection[]>; | ||
| /** Terminate the worker thread */ | ||
| dispose(): Promise<void>; | ||
| } | ||
| export { type BertLayerConfig as B, type CascadeConfig as C, type Detection as D, type EngineConfig as E, type GuardMode as G, type LlmLayerConfig as L, type ObjectScanResult as O, type PiiPattern as P, type RedactContext as R, type Synthesizer as S, type Tactic as T, type Guard as a, type GuardResult as b, type RedactionEntry as c, CascadeClassifier as d, type GuardConfig as e, type DetectionSource as f, type Disposition as g, type Confidence as h, type LlmProvider as i, type SecretPattern as j, type SynthesizerContext as k, type TacticName as l, type TacticResult as m, BertLayer as n, LlmLayer as o }; |
@@ -1,1 +0,1 @@ | ||
| export { k as BertLayer, B as BertLayerConfig, c as CascadeClassifier, C as CascadeConfig, l as LlmLayer, L as LlmLayerConfig, h as LlmProvider } from '../index-C6_XMlM6.mjs'; | ||
| export { n as BertLayer, B as BertLayerConfig, d as CascadeClassifier, C as CascadeConfig, o as LlmLayer, L as LlmLayerConfig, i as LlmProvider } from '../index-BzdAKl9y.mjs'; |
@@ -1,1 +0,1 @@ | ||
| export { k as BertLayer, B as BertLayerConfig, c as CascadeClassifier, C as CascadeConfig, l as LlmLayer, L as LlmLayerConfig, h as LlmProvider } from '../index-C6_XMlM6.js'; | ||
| export { n as BertLayer, B as BertLayerConfig, d as CascadeClassifier, C as CascadeConfig, o as LlmLayer, L as LlmLayerConfig, i as LlmProvider } from '../index-BzdAKl9y.js'; |
+277
-16
@@ -1,6 +0,39 @@ | ||
| import { G as GuardMode, E as EngineConfig, a as Guard, b as GuardResult, C as CascadeConfig, c as CascadeClassifier, d as GuardConfig, D as Detection, e as DetectionSource, f as Disposition, P as PiiPattern } from './index-C6_XMlM6.mjs'; | ||
| export { B as BertLayerConfig, g as Confidence, L as LlmLayerConfig, h as LlmProvider, S as SecretPattern, T as Tactic, i as TacticName, j as TacticResult } from './index-C6_XMlM6.mjs'; | ||
| import { G as GuardMode, S as Synthesizer, E as EngineConfig, a as Guard, R as RedactContext, b as GuardResult, c as RedactionEntry, C as CascadeConfig, d as CascadeClassifier, O as ObjectScanResult, e as GuardConfig, D as Detection, f as DetectionSource, g as Disposition, P as PiiPattern } from './index-BzdAKl9y.mjs'; | ||
| export { B as BertLayerConfig, h as Confidence, L as LlmLayerConfig, i as LlmProvider, j as SecretPattern, k as SynthesizerContext, T as Tactic, l as TacticName, m as TacticResult } from './index-BzdAKl9y.mjs'; | ||
| /** Severity level for risk rating */ | ||
| type RiskLevel = "critical" | "high" | "medium" | "low" | "none"; | ||
| /** | ||
| * How to handle detected dates and timestamps. Default is "preserve" | ||
| * (current behavior; emits low-score detections only, no transformation). | ||
| * | ||
| * - "preserve": dates pass through unchanged | ||
| * - "rebase-year-zero": replace the year in any ISO 8601 / mm-dd-yyyy / | ||
| * yyyy-mm-dd timestamp with `0001`, preserving | ||
| * month, day, and time-of-day. Removes year-of- | ||
| * event correlation while keeping seasonality | ||
| * and time-of-day patterns. | ||
| * - "rebase-relative-to-earliest": | ||
| * find the earliest detected timestamp in the | ||
| * input, rebase everything so the earliest | ||
| * becomes `0001-01-01T00:00:00.000Z` and all | ||
| * others are relative offsets. Removes all | ||
| * absolute-time correlation while preserving | ||
| * relative timing (intervals, ordering). | ||
| */ | ||
| type TemporalMode = "preserve" | "rebase-year-zero" | "rebase-relative-to-earliest"; | ||
| /** | ||
| * Temporal policy applied to detected timestamps. See TemporalMode for | ||
| * the available modes. When `mode` is `"preserve"` (default), no | ||
| * transformation runs. | ||
| */ | ||
| interface TemporalPolicy { | ||
| mode: TemporalMode; | ||
| /** | ||
| * Round rebased timestamps to a coarser unit. Defaults to "ms" | ||
| * (preserve full precision). Set "hour" or "day" for additional | ||
| * privacy at the cost of relative-ordering precision. | ||
| */ | ||
| precision?: "ms" | "second" | "minute" | "hour" | "day"; | ||
| } | ||
| /** Per-guard policy configuration */ | ||
@@ -40,2 +73,8 @@ interface GuardPolicyConfig { | ||
| testDataDetection?: "flag" | "strip" | "ignore"; | ||
| /** | ||
| * Optional temporal-policy transformation applied to detected | ||
| * timestamps. Default: { mode: "preserve" } (no transformation). | ||
| * See TemporalPolicy and TemporalMode. | ||
| */ | ||
| temporalPolicy?: TemporalPolicy; | ||
| } | ||
@@ -82,2 +121,32 @@ /** Risk assessment returned alongside scan results */ | ||
| /** | ||
| * SynthesizerRegistry — keyed by entity type, supplies replacement | ||
| * functions used by `mode: "synthesize"`. | ||
| * | ||
| * The engine ships with default synthesizers for common PII types | ||
| * (see ./defaults.ts). Consumers can override or extend them via | ||
| * `engine.setSynthesizers({ EMAIL_ADDRESS: customFn, ... })`. | ||
| * | ||
| * Synthesizers are looked up by exact `entityType` match. A guard whose | ||
| * detection has an entity type with no registered synthesizer falls back | ||
| * to the standard `[REDACTED-TYPE]` placeholder (see base.guard.ts). | ||
| */ | ||
| declare class SynthesizerRegistry { | ||
| private map; | ||
| constructor(initial?: Record<string, Synthesizer>); | ||
| /** Register / override a synthesizer for a specific entity type. */ | ||
| set(entityType: string, fn: Synthesizer): this; | ||
| /** Bulk-register synthesizers. Existing entries with the same entityType are overridden. */ | ||
| setMany(synths: Record<string, Synthesizer>): this; | ||
| /** Look up the synthesizer for an entity type, if any. */ | ||
| get(entityType: string): Synthesizer | undefined; | ||
| /** Whether a synthesizer exists for this entity type (including defaults). */ | ||
| has(entityType: string): boolean; | ||
| /** Remove a synthesizer (including any default for this type). */ | ||
| delete(entityType: string): boolean; | ||
| /** Iterate registered entity types. */ | ||
| entityTypes(): string[]; | ||
| } | ||
| /** Orchestrates multiple guards and aggregates results */ | ||
@@ -88,3 +157,30 @@ declare class GuardrailsEngine { | ||
| private cascade; | ||
| private synthRegistry; | ||
| private temporalPolicy; | ||
| constructor(config?: Partial<EngineConfig>); | ||
| /** | ||
| * Replace or extend the synthesizer registry. Registered synthesizers | ||
| * override defaults for the same entity type. Returns this for chaining. | ||
| * | ||
| * Synthesizers are looked up at scan time when a guard runs in | ||
| * `mode: "synthesize"`. See ./synthesizers/defaults.ts for the built-in | ||
| * set. | ||
| */ | ||
| setSynthesizers(synths: Record<string, Synthesizer>): this; | ||
| /** Access the active synthesizer registry (defaults + user-registered). */ | ||
| get synthesizers(): SynthesizerRegistry; | ||
| /** | ||
| * Set the temporal policy applied to detected timestamps. Default is | ||
| * undefined (equivalent to `{ mode: "preserve" }` — no transformation). | ||
| * | ||
| * When set, every `engine.scan()` call applies the policy: detected | ||
| * `DATE_TIME` entities are rebased per the policy mode and the rebased | ||
| * values flow through the redaction pipeline via the per-call | ||
| * consistency map. Composes with synthesize mode (a date that's both | ||
| * rebased AND meant to be synthesized: temporal wins because it | ||
| * pre-populates the cache before guards run). | ||
| */ | ||
| setTemporalPolicy(policy: TemporalPolicy | undefined): this; | ||
| /** Read the active temporal policy, if any. */ | ||
| get temporalPolicyConfig(): TemporalPolicy | undefined; | ||
| /** Register a guard with the engine */ | ||
@@ -96,9 +192,34 @@ addGuard(guard: Guard): this; | ||
| private getGuardConfig; | ||
| /** Run all enabled guards against the input text */ | ||
| analyze(text: string): Promise<GuardResult[]>; | ||
| /** Run all guards and return a single pass/fail with all detections */ | ||
| scan(text: string): Promise<{ | ||
| /** | ||
| * Run all enabled guards against the input text. | ||
| * | ||
| * When `redactCtx` is omitted the engine creates a default context | ||
| * (engine registry + fresh consistency map) so that direct callers | ||
| * and the regex-only fallback paths inside `deepScan()`/`modelScan()` | ||
| * all receive the synthesizer registry. This ensures `mode: | ||
| * "synthesize"` works correctly across every engine entry point, not | ||
| * only when called from `scan()`. | ||
| */ | ||
| analyze(text: string, redactCtx?: RedactContext): Promise<GuardResult[]>; | ||
| /** | ||
| * Run all guards and return a single pass/fail with all detections. | ||
| * | ||
| * When any guard runs in `mode: "redact"` or `mode: "synthesize"`, | ||
| * the engine creates a per-call shared `consistencyMap` and passes | ||
| * its synthesizer registry to every guard. This guarantees that the | ||
| * same original value across detections from different guards | ||
| * collapses to the same replacement (true cross-guard consistency | ||
| * rather than the pre-Phase-6 behavior where the last guard's | ||
| * `redactedText` won and earlier guards' redactions were lost). | ||
| * | ||
| * The aggregate `redactedText` reflects all guards' redactions in a | ||
| * single output. The aggregate `redactionMap` is ordered by each | ||
| * entry's `start` offset (document order), rather than by | ||
| * guard-iteration order. | ||
| */ | ||
| scan(text: string, redactCtx?: RedactContext): Promise<{ | ||
| passed: boolean; | ||
| results: GuardResult[]; | ||
| redactedText?: string; | ||
| redactionMap?: RedactionEntry[]; | ||
| }>; | ||
@@ -123,5 +244,34 @@ /** Get list of registered guard names */ | ||
| redactedText?: string; | ||
| redactionMap?: RedactionEntry[]; | ||
| }>; | ||
| /** Clean up resources (terminate BERT worker, etc.) */ | ||
| dispose(): Promise<void>; | ||
| /** | ||
| * Run all enabled guards over every string leaf in a structured input | ||
| * (object, array, or any combination) and return a result that | ||
| * preserves the input's shape with string leaves redacted in place. | ||
| * | ||
| * Walking semantics: | ||
| * - Strings: passed to `scan()`. Replaced with `redactedText` if | ||
| * guards produced redactions; otherwise pass-through. | ||
| * - Numbers, booleans, null, undefined, bigint, symbol, Date, | ||
| * RegExp, Map, Set, functions: pass through unchanged. Bulkhead | ||
| * does not classify non-string leaves. | ||
| * - Arrays: each element walked with index appended to path. | ||
| * - Plain objects: each entry walked with key appended to path. | ||
| * Object key order is preserved. | ||
| * | ||
| * Cross-leaf consistency: a single `consistencyMap` is created for | ||
| * the entire `scanObject` call and threaded through every leaf scan. | ||
| * In `mode: "synthesize"` (RFC-001), the same original value | ||
| * produces the same synthetic replacement across all leaves of one | ||
| * call. | ||
| * | ||
| * @param input Any JSON-serializable structure, or a string. Generic | ||
| * type `T` is preserved on `redactedObject`. | ||
| * @returns ObjectScanResult with aggregated guard results, | ||
| * shape-preserving `redactedObject`, and per-path | ||
| * detection map. | ||
| */ | ||
| scanObject<T>(input: T): Promise<ObjectScanResult<T>>; | ||
| } | ||
@@ -134,4 +284,20 @@ | ||
| abstract analyze(text: string, config?: Partial<GuardConfig>): Promise<GuardResult>; | ||
| /** Build a GuardResult from detections */ | ||
| protected buildResult(text: string, detections: Detection[], mode: GuardMode): GuardResult; | ||
| /** | ||
| * Build a GuardResult from detections. Mode-aware: | ||
| * | ||
| * - `block`: no transformation; passed=false on any detection. | ||
| * - `redact`: each detection replaced by `[REDACTED-TYPE]`. | ||
| * - `synthesize`: each detection passed to the registry's synthesizer | ||
| * for its entityType. Falls back to placeholder if no | ||
| * synthesizer is registered. Same `original` always | ||
| * produces the same replacement within the call (via | ||
| * `consistencyMap`). | ||
| * | ||
| * The optional `redactCtx` parameter is supplied by the engine to share | ||
| * the registry and consistency map across guards in one scan call. | ||
| * When omitted, synthesize mode behaves identically to redact mode (no | ||
| * registered synthesizers are visible without an engine-supplied | ||
| * registry, so all replacements fall back to placeholders). | ||
| */ | ||
| protected buildResult(text: string, detections: Detection[], mode: GuardMode, redactCtx?: RedactContext): Promise<GuardResult>; | ||
| /** Extract surrounding context for a detection */ | ||
@@ -141,4 +307,26 @@ protected extractContext(text: string, start: number, end: number): string; | ||
| protected makeDetection(text: string, partial: Omit<Detection, "source" | "context" | "disposition">, source?: DetectionSource, disposition?: Disposition): Detection; | ||
| /** Replace detected text with [REDACTED-TYPE] markers */ | ||
| protected applyRedactions(text: string, detections: Detection[]): string; | ||
| /** | ||
| * Apply redactions / synthesizations to text. Returns the modified | ||
| * text plus a per-detection RedactionEntry record. | ||
| * | ||
| * Detections are processed in **reverse order by start offset** so | ||
| * that replacements don't shift the offsets of pending detections. | ||
| * The redaction map is returned in **document order** (same order as | ||
| * `detections`). | ||
| */ | ||
| protected applyRedactions(text: string, detections: Detection[], mode: GuardMode, redactCtx?: RedactContext): Promise<{ | ||
| text: string; | ||
| redactionMap: RedactionEntry[]; | ||
| }>; | ||
| /** | ||
| * Compute the replacement for one detection. | ||
| * | ||
| * Returns an inline-cached value from the consistency map if present | ||
| * (to keep multi-mention consistency cheap and stable), otherwise: | ||
| * - mode `redact` -> `[REDACTED-${entityType}]` | ||
| * - mode `synthesize` -> registry-supplied synthesizer, or fallback | ||
| * to `[REDACTED-${entityType}]` if no | ||
| * synthesizer is registered for the type. | ||
| */ | ||
| private computeReplacement; | ||
| } | ||
@@ -161,3 +349,3 @@ | ||
| constructor(options?: PiiGuardOptions); | ||
| analyze(text: string, config?: Partial<GuardConfig>): Promise<GuardResult>; | ||
| analyze(text: string, config?: Partial<GuardConfig>, redactCtx?: RedactContext): Promise<GuardResult>; | ||
| private detectAll; | ||
@@ -180,3 +368,3 @@ private deduplicateDetections; | ||
| constructor(options?: SecretGuardOptions); | ||
| analyze(text: string, config?: Partial<GuardConfig>): Promise<GuardResult>; | ||
| analyze(text: string, config?: Partial<GuardConfig>, redactCtx?: RedactContext): Promise<GuardResult>; | ||
| } | ||
@@ -191,3 +379,3 @@ | ||
| readonly name = "injection"; | ||
| analyze(text: string, config?: Partial<GuardConfig>): Promise<GuardResult>; | ||
| analyze(text: string, config?: Partial<GuardConfig>, redactCtx?: RedactContext): Promise<GuardResult>; | ||
| private heuristicScore; | ||
@@ -203,3 +391,3 @@ } | ||
| readonly name = "leakage"; | ||
| analyze(text: string, config?: Partial<GuardConfig>): Promise<GuardResult>; | ||
| analyze(text: string, config?: Partial<GuardConfig>, redactCtx?: RedactContext): Promise<GuardResult>; | ||
| } | ||
@@ -215,3 +403,3 @@ | ||
| readonly name = "testdata"; | ||
| analyze(text: string, config?: Partial<GuardConfig>): Promise<GuardResult>; | ||
| analyze(text: string, config?: Partial<GuardConfig>, redactCtx?: RedactContext): Promise<GuardResult>; | ||
| /** Build a result that always passes — test data is informational, not blocking */ | ||
@@ -222,2 +410,75 @@ private buildInformationalResult; | ||
| /** | ||
| * Default synthesizers for common PII entity types. | ||
| * | ||
| * Each synthesizer: | ||
| * - Is **pure** (deterministic given input) and **side-effect-free**. | ||
| * - Reads the per-call `consistencyMap` first. If `original` already | ||
| * has a replacement, it returns that — never minting a fresh one | ||
| * for the same input within a call. | ||
| * - Uses **IETF / RFC documentation reservations** for synthetic values | ||
| * where they exist (example.com, 192.0.2.0/24, RFC 5612 MAC, GB82 | ||
| * IBAN, Stripe test cards). These are guaranteed safe and identifiable | ||
| * as test data by automated scanners. | ||
| * - Hashes `original` to pick a stable index into a name list / number | ||
| * range, so the same input always produces the same output across | ||
| * calls within one engine session. | ||
| * | ||
| * Replacement contract: synthesizers receive only the matched substring | ||
| * (eg. `"john.smith@contoso.com"`), not the surrounding context. They | ||
| * MUST return a string of comparable shape to the input (eg. an email | ||
| * address replaced with another email address). | ||
| */ | ||
| /** | ||
| * Replace with `firstname.lastname@example.com` (RFC 2606 reserved). | ||
| * Stable across calls for the same original. | ||
| */ | ||
| declare const synthEmail: Synthesizer; | ||
| /** | ||
| * Replace with a synthetic "First Last" pair drawn from the name lists. | ||
| * The first/last name combination is hashed off the original so multiple | ||
| * mentions of the same name in one document map to the same synthetic | ||
| * (via the consistency map). | ||
| */ | ||
| declare const synthPersonName: Synthesizer; | ||
| /** | ||
| * Replace with `+1-555-010-XXXX` where XXXX is the last 4 digits of the | ||
| * FNV-1a hash mod 10000. The 555-01XX range is reserved for fictitious | ||
| * numbers in NANP. Same input → same output. | ||
| */ | ||
| declare const synthPhone: Synthesizer; | ||
| declare const synthCreditCard: Synthesizer; | ||
| declare const synthIpAddress: Synthesizer; | ||
| /** | ||
| * Replace the host with `example.com` (RFC 2606), preserve scheme and path. | ||
| * If the URL doesn't parse, fall back to literal `https://example.com`. | ||
| */ | ||
| declare const synthUrl: Synthesizer; | ||
| /** | ||
| * Replace with the well-known UK test IBAN. Stable across all inputs; | ||
| * IBAN format is high-structure so brand-preservation isn't applicable. | ||
| */ | ||
| declare const synthIban: Synthesizer; | ||
| /** | ||
| * Replace with `00:00:5E:00:53:XX` from the RFC 5612 documentation MAC | ||
| * range. Last octet hashed for stability. | ||
| */ | ||
| declare const synthMac: Synthesizer; | ||
| /** | ||
| * Replace with `00000000-redacted-XXXX-0000-NNNNNNNNNNNN` where the XXXX | ||
| * and trailing 12-hex segment are derived from the input hash for | ||
| * stability. Format is intentionally non-canonical so downstream PII | ||
| * scanners flag it as `TEST_DATA_GUID` rather than as a real GUID. | ||
| */ | ||
| declare const synthGuid: Synthesizer; | ||
| declare const synthCrypto: Synthesizer; | ||
| /** | ||
| * Default synthesizer registry. Keys match the `entityType` strings | ||
| * emitted by the bundled PII patterns (see `src/patterns/pii/`). | ||
| * | ||
| * Consumers extend or override via `engine.setSynthesizers({ ... })`. | ||
| */ | ||
| declare const DEFAULT_SYNTHESIZERS: Record<string, Synthesizer>; | ||
| /** All built-in policies indexed by name */ | ||
@@ -278,2 +539,2 @@ declare const BUILTIN_POLICIES: Record<string, PolicyDefinition>; | ||
| export { BUILTIN_POLICIES, BaseGuard, type BulkheadConfig, CascadeConfig, type ClassifiedIssue, DEFAULT_CONFIG, Detection, DetectionSource, Disposition, EngineConfig, Guard, GuardConfig, GuardMode, type GuardPolicyConfig, GuardResult, GuardrailsEngine, InjectionGuard, LeakageGuard, PiiGuard, type PiiGuardOptions, PiiPattern, type PolicyDefinition, type RiskAssessment, type RiskLevel, type RiskThresholds, SecretGuard, type SecretGuardOptions, type TestDataFlag, TestDataGuard, assessRisk, createEngine, getPolicy, policyToEngineConfig, resolvePolicy }; | ||
| export { BUILTIN_POLICIES, BaseGuard, type BulkheadConfig, CascadeConfig, type ClassifiedIssue, DEFAULT_CONFIG, DEFAULT_SYNTHESIZERS, Detection, DetectionSource, Disposition, EngineConfig, Guard, GuardConfig, GuardMode, type GuardPolicyConfig, GuardResult, GuardrailsEngine, InjectionGuard, LeakageGuard, ObjectScanResult, PiiGuard, type PiiGuardOptions, PiiPattern, type PolicyDefinition, RedactContext, RedactionEntry, type RiskAssessment, type RiskLevel, type RiskThresholds, SecretGuard, type SecretGuardOptions, Synthesizer, SynthesizerRegistry, type TemporalMode, type TemporalPolicy, type TestDataFlag, TestDataGuard, assessRisk, createEngine, getPolicy, policyToEngineConfig, resolvePolicy, synthCreditCard, synthCrypto, synthEmail, synthGuid, synthIban, synthIpAddress, synthMac, synthPersonName, synthPhone, synthUrl }; |
+277
-16
@@ -1,6 +0,39 @@ | ||
| import { G as GuardMode, E as EngineConfig, a as Guard, b as GuardResult, C as CascadeConfig, c as CascadeClassifier, d as GuardConfig, D as Detection, e as DetectionSource, f as Disposition, P as PiiPattern } from './index-C6_XMlM6.js'; | ||
| export { B as BertLayerConfig, g as Confidence, L as LlmLayerConfig, h as LlmProvider, S as SecretPattern, T as Tactic, i as TacticName, j as TacticResult } from './index-C6_XMlM6.js'; | ||
| import { G as GuardMode, S as Synthesizer, E as EngineConfig, a as Guard, R as RedactContext, b as GuardResult, c as RedactionEntry, C as CascadeConfig, d as CascadeClassifier, O as ObjectScanResult, e as GuardConfig, D as Detection, f as DetectionSource, g as Disposition, P as PiiPattern } from './index-BzdAKl9y.js'; | ||
| export { B as BertLayerConfig, h as Confidence, L as LlmLayerConfig, i as LlmProvider, j as SecretPattern, k as SynthesizerContext, T as Tactic, l as TacticName, m as TacticResult } from './index-BzdAKl9y.js'; | ||
| /** Severity level for risk rating */ | ||
| type RiskLevel = "critical" | "high" | "medium" | "low" | "none"; | ||
| /** | ||
| * How to handle detected dates and timestamps. Default is "preserve" | ||
| * (current behavior; emits low-score detections only, no transformation). | ||
| * | ||
| * - "preserve": dates pass through unchanged | ||
| * - "rebase-year-zero": replace the year in any ISO 8601 / mm-dd-yyyy / | ||
| * yyyy-mm-dd timestamp with `0001`, preserving | ||
| * month, day, and time-of-day. Removes year-of- | ||
| * event correlation while keeping seasonality | ||
| * and time-of-day patterns. | ||
| * - "rebase-relative-to-earliest": | ||
| * find the earliest detected timestamp in the | ||
| * input, rebase everything so the earliest | ||
| * becomes `0001-01-01T00:00:00.000Z` and all | ||
| * others are relative offsets. Removes all | ||
| * absolute-time correlation while preserving | ||
| * relative timing (intervals, ordering). | ||
| */ | ||
| type TemporalMode = "preserve" | "rebase-year-zero" | "rebase-relative-to-earliest"; | ||
| /** | ||
| * Temporal policy applied to detected timestamps. See TemporalMode for | ||
| * the available modes. When `mode` is `"preserve"` (default), no | ||
| * transformation runs. | ||
| */ | ||
| interface TemporalPolicy { | ||
| mode: TemporalMode; | ||
| /** | ||
| * Round rebased timestamps to a coarser unit. Defaults to "ms" | ||
| * (preserve full precision). Set "hour" or "day" for additional | ||
| * privacy at the cost of relative-ordering precision. | ||
| */ | ||
| precision?: "ms" | "second" | "minute" | "hour" | "day"; | ||
| } | ||
| /** Per-guard policy configuration */ | ||
@@ -40,2 +73,8 @@ interface GuardPolicyConfig { | ||
| testDataDetection?: "flag" | "strip" | "ignore"; | ||
| /** | ||
| * Optional temporal-policy transformation applied to detected | ||
| * timestamps. Default: { mode: "preserve" } (no transformation). | ||
| * See TemporalPolicy and TemporalMode. | ||
| */ | ||
| temporalPolicy?: TemporalPolicy; | ||
| } | ||
@@ -82,2 +121,32 @@ /** Risk assessment returned alongside scan results */ | ||
| /** | ||
| * SynthesizerRegistry — keyed by entity type, supplies replacement | ||
| * functions used by `mode: "synthesize"`. | ||
| * | ||
| * The engine ships with default synthesizers for common PII types | ||
| * (see ./defaults.ts). Consumers can override or extend them via | ||
| * `engine.setSynthesizers({ EMAIL_ADDRESS: customFn, ... })`. | ||
| * | ||
| * Synthesizers are looked up by exact `entityType` match. A guard whose | ||
| * detection has an entity type with no registered synthesizer falls back | ||
| * to the standard `[REDACTED-TYPE]` placeholder (see base.guard.ts). | ||
| */ | ||
| declare class SynthesizerRegistry { | ||
| private map; | ||
| constructor(initial?: Record<string, Synthesizer>); | ||
| /** Register / override a synthesizer for a specific entity type. */ | ||
| set(entityType: string, fn: Synthesizer): this; | ||
| /** Bulk-register synthesizers. Existing entries with the same entityType are overridden. */ | ||
| setMany(synths: Record<string, Synthesizer>): this; | ||
| /** Look up the synthesizer for an entity type, if any. */ | ||
| get(entityType: string): Synthesizer | undefined; | ||
| /** Whether a synthesizer exists for this entity type (including defaults). */ | ||
| has(entityType: string): boolean; | ||
| /** Remove a synthesizer (including any default for this type). */ | ||
| delete(entityType: string): boolean; | ||
| /** Iterate registered entity types. */ | ||
| entityTypes(): string[]; | ||
| } | ||
| /** Orchestrates multiple guards and aggregates results */ | ||
@@ -88,3 +157,30 @@ declare class GuardrailsEngine { | ||
| private cascade; | ||
| private synthRegistry; | ||
| private temporalPolicy; | ||
| constructor(config?: Partial<EngineConfig>); | ||
| /** | ||
| * Replace or extend the synthesizer registry. Registered synthesizers | ||
| * override defaults for the same entity type. Returns this for chaining. | ||
| * | ||
| * Synthesizers are looked up at scan time when a guard runs in | ||
| * `mode: "synthesize"`. See ./synthesizers/defaults.ts for the built-in | ||
| * set. | ||
| */ | ||
| setSynthesizers(synths: Record<string, Synthesizer>): this; | ||
| /** Access the active synthesizer registry (defaults + user-registered). */ | ||
| get synthesizers(): SynthesizerRegistry; | ||
| /** | ||
| * Set the temporal policy applied to detected timestamps. Default is | ||
| * undefined (equivalent to `{ mode: "preserve" }` — no transformation). | ||
| * | ||
| * When set, every `engine.scan()` call applies the policy: detected | ||
| * `DATE_TIME` entities are rebased per the policy mode and the rebased | ||
| * values flow through the redaction pipeline via the per-call | ||
| * consistency map. Composes with synthesize mode (a date that's both | ||
| * rebased AND meant to be synthesized: temporal wins because it | ||
| * pre-populates the cache before guards run). | ||
| */ | ||
| setTemporalPolicy(policy: TemporalPolicy | undefined): this; | ||
| /** Read the active temporal policy, if any. */ | ||
| get temporalPolicyConfig(): TemporalPolicy | undefined; | ||
| /** Register a guard with the engine */ | ||
@@ -96,9 +192,34 @@ addGuard(guard: Guard): this; | ||
| private getGuardConfig; | ||
| /** Run all enabled guards against the input text */ | ||
| analyze(text: string): Promise<GuardResult[]>; | ||
| /** Run all guards and return a single pass/fail with all detections */ | ||
| scan(text: string): Promise<{ | ||
| /** | ||
| * Run all enabled guards against the input text. | ||
| * | ||
| * When `redactCtx` is omitted the engine creates a default context | ||
| * (engine registry + fresh consistency map) so that direct callers | ||
| * and the regex-only fallback paths inside `deepScan()`/`modelScan()` | ||
| * all receive the synthesizer registry. This ensures `mode: | ||
| * "synthesize"` works correctly across every engine entry point, not | ||
| * only when called from `scan()`. | ||
| */ | ||
| analyze(text: string, redactCtx?: RedactContext): Promise<GuardResult[]>; | ||
| /** | ||
| * Run all guards and return a single pass/fail with all detections. | ||
| * | ||
| * When any guard runs in `mode: "redact"` or `mode: "synthesize"`, | ||
| * the engine creates a per-call shared `consistencyMap` and passes | ||
| * its synthesizer registry to every guard. This guarantees that the | ||
| * same original value across detections from different guards | ||
| * collapses to the same replacement (true cross-guard consistency | ||
| * rather than the pre-Phase-6 behavior where the last guard's | ||
| * `redactedText` won and earlier guards' redactions were lost). | ||
| * | ||
| * The aggregate `redactedText` reflects all guards' redactions in a | ||
| * single output. The aggregate `redactionMap` is ordered by each | ||
| * entry's `start` offset (document order), rather than by | ||
| * guard-iteration order. | ||
| */ | ||
| scan(text: string, redactCtx?: RedactContext): Promise<{ | ||
| passed: boolean; | ||
| results: GuardResult[]; | ||
| redactedText?: string; | ||
| redactionMap?: RedactionEntry[]; | ||
| }>; | ||
@@ -123,5 +244,34 @@ /** Get list of registered guard names */ | ||
| redactedText?: string; | ||
| redactionMap?: RedactionEntry[]; | ||
| }>; | ||
| /** Clean up resources (terminate BERT worker, etc.) */ | ||
| dispose(): Promise<void>; | ||
| /** | ||
| * Run all enabled guards over every string leaf in a structured input | ||
| * (object, array, or any combination) and return a result that | ||
| * preserves the input's shape with string leaves redacted in place. | ||
| * | ||
| * Walking semantics: | ||
| * - Strings: passed to `scan()`. Replaced with `redactedText` if | ||
| * guards produced redactions; otherwise pass-through. | ||
| * - Numbers, booleans, null, undefined, bigint, symbol, Date, | ||
| * RegExp, Map, Set, functions: pass through unchanged. Bulkhead | ||
| * does not classify non-string leaves. | ||
| * - Arrays: each element walked with index appended to path. | ||
| * - Plain objects: each entry walked with key appended to path. | ||
| * Object key order is preserved. | ||
| * | ||
| * Cross-leaf consistency: a single `consistencyMap` is created for | ||
| * the entire `scanObject` call and threaded through every leaf scan. | ||
| * In `mode: "synthesize"` (RFC-001), the same original value | ||
| * produces the same synthetic replacement across all leaves of one | ||
| * call. | ||
| * | ||
| * @param input Any JSON-serializable structure, or a string. Generic | ||
| * type `T` is preserved on `redactedObject`. | ||
| * @returns ObjectScanResult with aggregated guard results, | ||
| * shape-preserving `redactedObject`, and per-path | ||
| * detection map. | ||
| */ | ||
| scanObject<T>(input: T): Promise<ObjectScanResult<T>>; | ||
| } | ||
@@ -134,4 +284,20 @@ | ||
| abstract analyze(text: string, config?: Partial<GuardConfig>): Promise<GuardResult>; | ||
| /** Build a GuardResult from detections */ | ||
| protected buildResult(text: string, detections: Detection[], mode: GuardMode): GuardResult; | ||
| /** | ||
| * Build a GuardResult from detections. Mode-aware: | ||
| * | ||
| * - `block`: no transformation; passed=false on any detection. | ||
| * - `redact`: each detection replaced by `[REDACTED-TYPE]`. | ||
| * - `synthesize`: each detection passed to the registry's synthesizer | ||
| * for its entityType. Falls back to placeholder if no | ||
| * synthesizer is registered. Same `original` always | ||
| * produces the same replacement within the call (via | ||
| * `consistencyMap`). | ||
| * | ||
| * The optional `redactCtx` parameter is supplied by the engine to share | ||
| * the registry and consistency map across guards in one scan call. | ||
| * When omitted, synthesize mode behaves identically to redact mode (no | ||
| * registered synthesizers are visible without an engine-supplied | ||
| * registry, so all replacements fall back to placeholders). | ||
| */ | ||
| protected buildResult(text: string, detections: Detection[], mode: GuardMode, redactCtx?: RedactContext): Promise<GuardResult>; | ||
| /** Extract surrounding context for a detection */ | ||
@@ -141,4 +307,26 @@ protected extractContext(text: string, start: number, end: number): string; | ||
| protected makeDetection(text: string, partial: Omit<Detection, "source" | "context" | "disposition">, source?: DetectionSource, disposition?: Disposition): Detection; | ||
| /** Replace detected text with [REDACTED-TYPE] markers */ | ||
| protected applyRedactions(text: string, detections: Detection[]): string; | ||
| /** | ||
| * Apply redactions / synthesizations to text. Returns the modified | ||
| * text plus a per-detection RedactionEntry record. | ||
| * | ||
| * Detections are processed in **reverse order by start offset** so | ||
| * that replacements don't shift the offsets of pending detections. | ||
| * The redaction map is returned in **document order** (same order as | ||
| * `detections`). | ||
| */ | ||
| protected applyRedactions(text: string, detections: Detection[], mode: GuardMode, redactCtx?: RedactContext): Promise<{ | ||
| text: string; | ||
| redactionMap: RedactionEntry[]; | ||
| }>; | ||
| /** | ||
| * Compute the replacement for one detection. | ||
| * | ||
| * Returns an inline-cached value from the consistency map if present | ||
| * (to keep multi-mention consistency cheap and stable), otherwise: | ||
| * - mode `redact` -> `[REDACTED-${entityType}]` | ||
| * - mode `synthesize` -> registry-supplied synthesizer, or fallback | ||
| * to `[REDACTED-${entityType}]` if no | ||
| * synthesizer is registered for the type. | ||
| */ | ||
| private computeReplacement; | ||
| } | ||
@@ -161,3 +349,3 @@ | ||
| constructor(options?: PiiGuardOptions); | ||
| analyze(text: string, config?: Partial<GuardConfig>): Promise<GuardResult>; | ||
| analyze(text: string, config?: Partial<GuardConfig>, redactCtx?: RedactContext): Promise<GuardResult>; | ||
| private detectAll; | ||
@@ -180,3 +368,3 @@ private deduplicateDetections; | ||
| constructor(options?: SecretGuardOptions); | ||
| analyze(text: string, config?: Partial<GuardConfig>): Promise<GuardResult>; | ||
| analyze(text: string, config?: Partial<GuardConfig>, redactCtx?: RedactContext): Promise<GuardResult>; | ||
| } | ||
@@ -191,3 +379,3 @@ | ||
| readonly name = "injection"; | ||
| analyze(text: string, config?: Partial<GuardConfig>): Promise<GuardResult>; | ||
| analyze(text: string, config?: Partial<GuardConfig>, redactCtx?: RedactContext): Promise<GuardResult>; | ||
| private heuristicScore; | ||
@@ -203,3 +391,3 @@ } | ||
| readonly name = "leakage"; | ||
| analyze(text: string, config?: Partial<GuardConfig>): Promise<GuardResult>; | ||
| analyze(text: string, config?: Partial<GuardConfig>, redactCtx?: RedactContext): Promise<GuardResult>; | ||
| } | ||
@@ -215,3 +403,3 @@ | ||
| readonly name = "testdata"; | ||
| analyze(text: string, config?: Partial<GuardConfig>): Promise<GuardResult>; | ||
| analyze(text: string, config?: Partial<GuardConfig>, redactCtx?: RedactContext): Promise<GuardResult>; | ||
| /** Build a result that always passes — test data is informational, not blocking */ | ||
@@ -222,2 +410,75 @@ private buildInformationalResult; | ||
| /** | ||
| * Default synthesizers for common PII entity types. | ||
| * | ||
| * Each synthesizer: | ||
| * - Is **pure** (deterministic given input) and **side-effect-free**. | ||
| * - Reads the per-call `consistencyMap` first. If `original` already | ||
| * has a replacement, it returns that — never minting a fresh one | ||
| * for the same input within a call. | ||
| * - Uses **IETF / RFC documentation reservations** for synthetic values | ||
| * where they exist (example.com, 192.0.2.0/24, RFC 5612 MAC, GB82 | ||
| * IBAN, Stripe test cards). These are guaranteed safe and identifiable | ||
| * as test data by automated scanners. | ||
| * - Hashes `original` to pick a stable index into a name list / number | ||
| * range, so the same input always produces the same output across | ||
| * calls within one engine session. | ||
| * | ||
| * Replacement contract: synthesizers receive only the matched substring | ||
| * (eg. `"john.smith@contoso.com"`), not the surrounding context. They | ||
| * MUST return a string of comparable shape to the input (eg. an email | ||
| * address replaced with another email address). | ||
| */ | ||
| /** | ||
| * Replace with `firstname.lastname@example.com` (RFC 2606 reserved). | ||
| * Stable across calls for the same original. | ||
| */ | ||
| declare const synthEmail: Synthesizer; | ||
| /** | ||
| * Replace with a synthetic "First Last" pair drawn from the name lists. | ||
| * The first/last name combination is hashed off the original so multiple | ||
| * mentions of the same name in one document map to the same synthetic | ||
| * (via the consistency map). | ||
| */ | ||
| declare const synthPersonName: Synthesizer; | ||
| /** | ||
| * Replace with `+1-555-010-XXXX` where XXXX is the last 4 digits of the | ||
| * FNV-1a hash mod 10000. The 555-01XX range is reserved for fictitious | ||
| * numbers in NANP. Same input → same output. | ||
| */ | ||
| declare const synthPhone: Synthesizer; | ||
| declare const synthCreditCard: Synthesizer; | ||
| declare const synthIpAddress: Synthesizer; | ||
| /** | ||
| * Replace the host with `example.com` (RFC 2606), preserve scheme and path. | ||
| * If the URL doesn't parse, fall back to literal `https://example.com`. | ||
| */ | ||
| declare const synthUrl: Synthesizer; | ||
| /** | ||
| * Replace with the well-known UK test IBAN. Stable across all inputs; | ||
| * IBAN format is high-structure so brand-preservation isn't applicable. | ||
| */ | ||
| declare const synthIban: Synthesizer; | ||
| /** | ||
| * Replace with `00:00:5E:00:53:XX` from the RFC 5612 documentation MAC | ||
| * range. Last octet hashed for stability. | ||
| */ | ||
| declare const synthMac: Synthesizer; | ||
| /** | ||
| * Replace with `00000000-redacted-XXXX-0000-NNNNNNNNNNNN` where the XXXX | ||
| * and trailing 12-hex segment are derived from the input hash for | ||
| * stability. Format is intentionally non-canonical so downstream PII | ||
| * scanners flag it as `TEST_DATA_GUID` rather than as a real GUID. | ||
| */ | ||
| declare const synthGuid: Synthesizer; | ||
| declare const synthCrypto: Synthesizer; | ||
| /** | ||
| * Default synthesizer registry. Keys match the `entityType` strings | ||
| * emitted by the bundled PII patterns (see `src/patterns/pii/`). | ||
| * | ||
| * Consumers extend or override via `engine.setSynthesizers({ ... })`. | ||
| */ | ||
| declare const DEFAULT_SYNTHESIZERS: Record<string, Synthesizer>; | ||
| /** All built-in policies indexed by name */ | ||
@@ -278,2 +539,2 @@ declare const BUILTIN_POLICIES: Record<string, PolicyDefinition>; | ||
| export { BUILTIN_POLICIES, BaseGuard, type BulkheadConfig, CascadeConfig, type ClassifiedIssue, DEFAULT_CONFIG, Detection, DetectionSource, Disposition, EngineConfig, Guard, GuardConfig, GuardMode, type GuardPolicyConfig, GuardResult, GuardrailsEngine, InjectionGuard, LeakageGuard, PiiGuard, type PiiGuardOptions, PiiPattern, type PolicyDefinition, type RiskAssessment, type RiskLevel, type RiskThresholds, SecretGuard, type SecretGuardOptions, type TestDataFlag, TestDataGuard, assessRisk, createEngine, getPolicy, policyToEngineConfig, resolvePolicy }; | ||
| export { BUILTIN_POLICIES, BaseGuard, type BulkheadConfig, CascadeConfig, type ClassifiedIssue, DEFAULT_CONFIG, DEFAULT_SYNTHESIZERS, Detection, DetectionSource, Disposition, EngineConfig, Guard, GuardConfig, GuardMode, type GuardPolicyConfig, GuardResult, GuardrailsEngine, InjectionGuard, LeakageGuard, ObjectScanResult, PiiGuard, type PiiGuardOptions, PiiPattern, type PolicyDefinition, RedactContext, RedactionEntry, type RiskAssessment, type RiskLevel, type RiskThresholds, SecretGuard, type SecretGuardOptions, Synthesizer, SynthesizerRegistry, type TemporalMode, type TemporalPolicy, type TestDataFlag, TestDataGuard, assessRisk, createEngine, getPolicy, policyToEngineConfig, resolvePolicy, synthCreditCard, synthCrypto, synthEmail, synthGuid, synthIban, synthIpAddress, synthMac, synthPersonName, synthPhone, synthUrl }; |
+1
-1
| { | ||
| "name": "@bulkhead-ai/core", | ||
| "version": "0.5.2", | ||
| "version": "0.5.3", | ||
| "description": "Cascading content protection engine — PII detection, secret scanning, prompt injection defense", | ||
@@ -5,0 +5,0 @@ "license": "MIT", |
| /** Confidence level for a detection */ | ||
| type Confidence = "high" | "medium" | "low"; | ||
| /** Which cascade layer produced this detection */ | ||
| type DetectionSource = "regex" | "bert" | "llm"; | ||
| /** Whether this detection is final or needs escalation */ | ||
| type Disposition = "confirmed" | "escalate" | "dismissed" | "informational"; | ||
| /** A detected entity in text */ | ||
| interface Detection { | ||
| /** Entity type (e.g., "CREDIT_CARD", "US_SSN", "AWS_KEY") */ | ||
| entityType: string; | ||
| /** Start offset in the input text */ | ||
| start: number; | ||
| /** End offset in the input text */ | ||
| end: number; | ||
| /** The matched text */ | ||
| text: string; | ||
| /** Detection confidence */ | ||
| confidence: Confidence; | ||
| /** Numeric score 0-1 */ | ||
| score: number; | ||
| /** Which guard produced this detection */ | ||
| guardName: string; | ||
| /** Which cascade layer produced this detection */ | ||
| source: DetectionSource; | ||
| /** Surrounding text window for context */ | ||
| context: string; | ||
| /** Whether this detection is final or needs escalation */ | ||
| disposition: Disposition; | ||
| } | ||
| /** Result from a single guard's analysis */ | ||
| interface GuardResult { | ||
| /** Whether the text passed this guard (no issues found) */ | ||
| passed: boolean; | ||
| /** Human-readable reason for the result */ | ||
| reason: string; | ||
| /** Name of the guard that produced this result */ | ||
| guardName: string; | ||
| /** Overall score 0-1 (0 = safe, 1 = maximum threat) */ | ||
| score: number; | ||
| /** Individual detections found */ | ||
| detections: Detection[]; | ||
| /** Modified text with redactions applied (if applicable) */ | ||
| redactedText?: string; | ||
| } | ||
| /** Guard mode: block rejects the input, redact sanitizes it */ | ||
| type GuardMode = "block" | "redact"; | ||
| /** Configuration for a guard */ | ||
| interface GuardConfig { | ||
| /** Whether this guard is enabled */ | ||
| enabled: boolean; | ||
| /** Detection threshold 0-1 (detections below this score are ignored) */ | ||
| threshold: number; | ||
| /** What to do when a detection occurs */ | ||
| mode: GuardMode; | ||
| } | ||
| /** A guard analyzes text and returns results */ | ||
| interface Guard { | ||
| /** Unique name for this guard */ | ||
| readonly name: string; | ||
| /** Analyze text and return results */ | ||
| analyze(text: string, config?: Partial<GuardConfig>): Promise<GuardResult>; | ||
| } | ||
| /** Configuration for the guardrails engine */ | ||
| interface EngineConfig { | ||
| /** Guard-specific configuration overrides */ | ||
| guards: Record<string, Partial<GuardConfig>>; | ||
| } | ||
| /** A PII pattern definition */ | ||
| interface PiiPattern { | ||
| /** Entity type name (e.g., "CREDIT_CARD") */ | ||
| entityType: string; | ||
| /** Regex patterns to match */ | ||
| patterns: RegExp[]; | ||
| /** Optional validation function (e.g., Luhn check) */ | ||
| validate?: (match: string) => boolean; | ||
| /** Context words that boost confidence when found nearby */ | ||
| contextWords?: string[]; | ||
| /** Base confidence without context boost */ | ||
| baseConfidence: Confidence; | ||
| /** Base score without context boost */ | ||
| baseScore: number; | ||
| } | ||
| /** A secret pattern definition */ | ||
| interface SecretPattern { | ||
| /** Secret type name (e.g., "AWS_ACCESS_KEY") */ | ||
| secretType: string; | ||
| /** Regex patterns to match */ | ||
| patterns: RegExp[]; | ||
| /** Optional validation function */ | ||
| validate?: (match: string) => boolean; | ||
| /** Minimum entropy threshold (if applicable) */ | ||
| minEntropy?: number; | ||
| /** Context words that boost confidence when found nearby (e.g., ["heroku", "api_key"]) */ | ||
| contextWords?: string[]; | ||
| /** Base score without context boost (0-1). Defaults to 0.9 if unset. */ | ||
| baseScore?: number; | ||
| /** Base confidence without context boost. Defaults to "high" if unset. */ | ||
| baseConfidence?: Confidence; | ||
| } | ||
| /** Tactic names for detection strategies */ | ||
| type TacticName = "pattern" | "heuristic" | "llm"; | ||
| /** Result from a tactic execution */ | ||
| interface TacticResult { | ||
| /** Score 0-1 */ | ||
| score: number; | ||
| /** Additional context about the detection */ | ||
| details?: Record<string, unknown>; | ||
| } | ||
| /** A detection tactic */ | ||
| interface Tactic { | ||
| readonly name: TacticName; | ||
| readonly defaultThreshold: number; | ||
| execute(input: string): Promise<TacticResult>; | ||
| } | ||
| /** | ||
| * LLM disambiguation layer (Layer 3) of the cascading classifier. | ||
| * Only receives ambiguous spans from Layer 2, along with surrounding context. | ||
| * Makes a focused determination: is this span PII or not? | ||
| */ | ||
| /** Function signature for an LLM provider */ | ||
| type LlmProvider = (prompt: string) => Promise<string>; | ||
| interface LlmLayerConfig { | ||
| /** Number of sentences before/after the span to include as context */ | ||
| contextSentences: number; | ||
| /** LLM provider function */ | ||
| provider?: LlmProvider; | ||
| } | ||
| declare class LlmLayer { | ||
| private config; | ||
| constructor(config?: Partial<LlmLayerConfig>); | ||
| /** Set the LLM provider (can be swapped at runtime) */ | ||
| setProvider(provider: LlmProvider): void; | ||
| /** | ||
| * Disambiguate escalated detections using an LLM. | ||
| * @param escalated Detections with disposition "escalate" | ||
| * @param fullText The full document text | ||
| * @param confirmed Already-confirmed detections (passed as context to help disambiguation) | ||
| */ | ||
| disambiguate(escalated: Detection[], fullText: string, confirmed: Detection[]): Promise<Detection[]>; | ||
| /** Build a focused disambiguation prompt */ | ||
| private buildPrompt; | ||
| /** Extract ±N sentences around a span */ | ||
| private extractSentenceContext; | ||
| /** Parse the LLM response JSON */ | ||
| private parseResponse; | ||
| } | ||
| /** | ||
| * Cascading Classifier — orchestrates the three detection layers. | ||
| * | ||
| * Layer 1 (Regex): Always runs, sub-ms. Catches structured PII. | ||
| * → confidence: 1.0, disposition: "confirmed" | ||
| * | ||
| * Layer 2 (BERT): On-demand, 20-50ms. Catches contextual entities. | ||
| * → score >= threshold: "confirmed" | ||
| * → score < threshold: "escalate" | ||
| * | ||
| * Layer 3 (LLM): Selective, 500ms-2s. Only sees escalated spans. | ||
| * → Returns "confirmed" or "dismissed" | ||
| */ | ||
| interface CascadeConfig { | ||
| /** Confidence threshold below which BERT results escalate to LLM */ | ||
| escalationThreshold: number; | ||
| /** Number of sentences of context to pass to Layer 3 */ | ||
| contextSentences: number; | ||
| /** Whether Layer 2 (BERT) is enabled */ | ||
| bertEnabled: boolean; | ||
| /** Whether Layer 3 (LLM) is enabled */ | ||
| llmEnabled: boolean; | ||
| /** Model ID for BERT layer */ | ||
| modelId?: string; | ||
| /** LLM provider function for Layer 3 */ | ||
| llmProvider?: LlmProvider; | ||
| } | ||
| declare class CascadeClassifier { | ||
| private config; | ||
| private bertLayer; | ||
| private llmLayer; | ||
| private regexGuards; | ||
| constructor(config?: Partial<CascadeConfig>); | ||
| /** Whether the cascade is ready to serve (BERT model loaded if enabled) */ | ||
| get ready(): boolean; | ||
| /** Register regex-based guards (Layer 1) */ | ||
| addRegexGuard(guard: Guard): this; | ||
| /** Set the LLM provider for Layer 3 */ | ||
| setLlmProvider(provider: LlmProvider): void; | ||
| /** | ||
| * Run the full cascade: Regex → BERT → LLM | ||
| * Returns a unified GuardResult with all detections carrying provenance. | ||
| */ | ||
| deepScan(text: string): Promise<GuardResult>; | ||
| /** Run Layer 1 only (for fast auto-scan path) */ | ||
| regexScan(text: string): Promise<GuardResult>; | ||
| /** Run Layers 1 + 2 only (no LLM, for "Scan File" command) */ | ||
| modelScan(text: string): Promise<GuardResult>; | ||
| private runRegexLayer; | ||
| private runBertLayer; | ||
| /** Remove BERT detections that overlap with regex detections */ | ||
| private deduplicateAgainstRegex; | ||
| private buildCascadeResult; | ||
| /** Clean up resources */ | ||
| dispose(): Promise<void>; | ||
| } | ||
| /** | ||
| * Main-thread interface to the BERT worker (Layer 2). | ||
| * Manages the worker lifecycle and maps BERT tokens to Detection objects. | ||
| */ | ||
| interface BertLayerConfig { | ||
| modelId?: string; | ||
| /** Threshold above which detections are confirmed, below which they escalate */ | ||
| escalationThreshold: number; | ||
| } | ||
| declare class BertLayer { | ||
| private worker; | ||
| private pendingRequests; | ||
| private requestId; | ||
| private config; | ||
| /** Whether the BERT model has been loaded and first inference completed */ | ||
| private _loaded; | ||
| get loaded(): boolean; | ||
| constructor(config?: Partial<BertLayerConfig>); | ||
| /** Resolve the worker path — supports both compiled .js and source .ts */ | ||
| private resolveWorkerPath; | ||
| /** Ensure the worker thread is running */ | ||
| private ensureWorker; | ||
| /** Send text to the BERT worker and get raw token results */ | ||
| private analyzeRaw; | ||
| /** | ||
| * Analyze text and return Detection objects with escalation disposition. | ||
| * Tokens above the escalation threshold are "confirmed", | ||
| * tokens below are "escalate" (need LLM review). | ||
| */ | ||
| analyze(text: string): Promise<Detection[]>; | ||
| /** Terminate the worker thread */ | ||
| dispose(): Promise<void>; | ||
| } | ||
| export { type BertLayerConfig as B, type CascadeConfig as C, type Detection as D, type EngineConfig as E, type GuardMode as G, type LlmLayerConfig as L, type PiiPattern as P, type SecretPattern as S, type Tactic as T, type Guard as a, type GuardResult as b, CascadeClassifier as c, type GuardConfig as d, type DetectionSource as e, type Disposition as f, type Confidence as g, type LlmProvider as h, type TacticName as i, type TacticResult as j, BertLayer as k, LlmLayer as l }; |
| /** Confidence level for a detection */ | ||
| type Confidence = "high" | "medium" | "low"; | ||
| /** Which cascade layer produced this detection */ | ||
| type DetectionSource = "regex" | "bert" | "llm"; | ||
| /** Whether this detection is final or needs escalation */ | ||
| type Disposition = "confirmed" | "escalate" | "dismissed" | "informational"; | ||
| /** A detected entity in text */ | ||
| interface Detection { | ||
| /** Entity type (e.g., "CREDIT_CARD", "US_SSN", "AWS_KEY") */ | ||
| entityType: string; | ||
| /** Start offset in the input text */ | ||
| start: number; | ||
| /** End offset in the input text */ | ||
| end: number; | ||
| /** The matched text */ | ||
| text: string; | ||
| /** Detection confidence */ | ||
| confidence: Confidence; | ||
| /** Numeric score 0-1 */ | ||
| score: number; | ||
| /** Which guard produced this detection */ | ||
| guardName: string; | ||
| /** Which cascade layer produced this detection */ | ||
| source: DetectionSource; | ||
| /** Surrounding text window for context */ | ||
| context: string; | ||
| /** Whether this detection is final or needs escalation */ | ||
| disposition: Disposition; | ||
| } | ||
| /** Result from a single guard's analysis */ | ||
| interface GuardResult { | ||
| /** Whether the text passed this guard (no issues found) */ | ||
| passed: boolean; | ||
| /** Human-readable reason for the result */ | ||
| reason: string; | ||
| /** Name of the guard that produced this result */ | ||
| guardName: string; | ||
| /** Overall score 0-1 (0 = safe, 1 = maximum threat) */ | ||
| score: number; | ||
| /** Individual detections found */ | ||
| detections: Detection[]; | ||
| /** Modified text with redactions applied (if applicable) */ | ||
| redactedText?: string; | ||
| } | ||
| /** Guard mode: block rejects the input, redact sanitizes it */ | ||
| type GuardMode = "block" | "redact"; | ||
| /** Configuration for a guard */ | ||
| interface GuardConfig { | ||
| /** Whether this guard is enabled */ | ||
| enabled: boolean; | ||
| /** Detection threshold 0-1 (detections below this score are ignored) */ | ||
| threshold: number; | ||
| /** What to do when a detection occurs */ | ||
| mode: GuardMode; | ||
| } | ||
| /** A guard analyzes text and returns results */ | ||
| interface Guard { | ||
| /** Unique name for this guard */ | ||
| readonly name: string; | ||
| /** Analyze text and return results */ | ||
| analyze(text: string, config?: Partial<GuardConfig>): Promise<GuardResult>; | ||
| } | ||
| /** Configuration for the guardrails engine */ | ||
| interface EngineConfig { | ||
| /** Guard-specific configuration overrides */ | ||
| guards: Record<string, Partial<GuardConfig>>; | ||
| } | ||
| /** A PII pattern definition */ | ||
| interface PiiPattern { | ||
| /** Entity type name (e.g., "CREDIT_CARD") */ | ||
| entityType: string; | ||
| /** Regex patterns to match */ | ||
| patterns: RegExp[]; | ||
| /** Optional validation function (e.g., Luhn check) */ | ||
| validate?: (match: string) => boolean; | ||
| /** Context words that boost confidence when found nearby */ | ||
| contextWords?: string[]; | ||
| /** Base confidence without context boost */ | ||
| baseConfidence: Confidence; | ||
| /** Base score without context boost */ | ||
| baseScore: number; | ||
| } | ||
| /** A secret pattern definition */ | ||
| interface SecretPattern { | ||
| /** Secret type name (e.g., "AWS_ACCESS_KEY") */ | ||
| secretType: string; | ||
| /** Regex patterns to match */ | ||
| patterns: RegExp[]; | ||
| /** Optional validation function */ | ||
| validate?: (match: string) => boolean; | ||
| /** Minimum entropy threshold (if applicable) */ | ||
| minEntropy?: number; | ||
| /** Context words that boost confidence when found nearby (e.g., ["heroku", "api_key"]) */ | ||
| contextWords?: string[]; | ||
| /** Base score without context boost (0-1). Defaults to 0.9 if unset. */ | ||
| baseScore?: number; | ||
| /** Base confidence without context boost. Defaults to "high" if unset. */ | ||
| baseConfidence?: Confidence; | ||
| } | ||
| /** Tactic names for detection strategies */ | ||
| type TacticName = "pattern" | "heuristic" | "llm"; | ||
| /** Result from a tactic execution */ | ||
| interface TacticResult { | ||
| /** Score 0-1 */ | ||
| score: number; | ||
| /** Additional context about the detection */ | ||
| details?: Record<string, unknown>; | ||
| } | ||
| /** A detection tactic */ | ||
| interface Tactic { | ||
| readonly name: TacticName; | ||
| readonly defaultThreshold: number; | ||
| execute(input: string): Promise<TacticResult>; | ||
| } | ||
| /** | ||
| * LLM disambiguation layer (Layer 3) of the cascading classifier. | ||
| * Only receives ambiguous spans from Layer 2, along with surrounding context. | ||
| * Makes a focused determination: is this span PII or not? | ||
| */ | ||
| /** Function signature for an LLM provider */ | ||
| type LlmProvider = (prompt: string) => Promise<string>; | ||
| interface LlmLayerConfig { | ||
| /** Number of sentences before/after the span to include as context */ | ||
| contextSentences: number; | ||
| /** LLM provider function */ | ||
| provider?: LlmProvider; | ||
| } | ||
| declare class LlmLayer { | ||
| private config; | ||
| constructor(config?: Partial<LlmLayerConfig>); | ||
| /** Set the LLM provider (can be swapped at runtime) */ | ||
| setProvider(provider: LlmProvider): void; | ||
| /** | ||
| * Disambiguate escalated detections using an LLM. | ||
| * @param escalated Detections with disposition "escalate" | ||
| * @param fullText The full document text | ||
| * @param confirmed Already-confirmed detections (passed as context to help disambiguation) | ||
| */ | ||
| disambiguate(escalated: Detection[], fullText: string, confirmed: Detection[]): Promise<Detection[]>; | ||
| /** Build a focused disambiguation prompt */ | ||
| private buildPrompt; | ||
| /** Extract ±N sentences around a span */ | ||
| private extractSentenceContext; | ||
| /** Parse the LLM response JSON */ | ||
| private parseResponse; | ||
| } | ||
| /** | ||
| * Cascading Classifier — orchestrates the three detection layers. | ||
| * | ||
| * Layer 1 (Regex): Always runs, sub-ms. Catches structured PII. | ||
| * → confidence: 1.0, disposition: "confirmed" | ||
| * | ||
| * Layer 2 (BERT): On-demand, 20-50ms. Catches contextual entities. | ||
| * → score >= threshold: "confirmed" | ||
| * → score < threshold: "escalate" | ||
| * | ||
| * Layer 3 (LLM): Selective, 500ms-2s. Only sees escalated spans. | ||
| * → Returns "confirmed" or "dismissed" | ||
| */ | ||
| interface CascadeConfig { | ||
| /** Confidence threshold below which BERT results escalate to LLM */ | ||
| escalationThreshold: number; | ||
| /** Number of sentences of context to pass to Layer 3 */ | ||
| contextSentences: number; | ||
| /** Whether Layer 2 (BERT) is enabled */ | ||
| bertEnabled: boolean; | ||
| /** Whether Layer 3 (LLM) is enabled */ | ||
| llmEnabled: boolean; | ||
| /** Model ID for BERT layer */ | ||
| modelId?: string; | ||
| /** LLM provider function for Layer 3 */ | ||
| llmProvider?: LlmProvider; | ||
| } | ||
| declare class CascadeClassifier { | ||
| private config; | ||
| private bertLayer; | ||
| private llmLayer; | ||
| private regexGuards; | ||
| constructor(config?: Partial<CascadeConfig>); | ||
| /** Whether the cascade is ready to serve (BERT model loaded if enabled) */ | ||
| get ready(): boolean; | ||
| /** Register regex-based guards (Layer 1) */ | ||
| addRegexGuard(guard: Guard): this; | ||
| /** Set the LLM provider for Layer 3 */ | ||
| setLlmProvider(provider: LlmProvider): void; | ||
| /** | ||
| * Run the full cascade: Regex → BERT → LLM | ||
| * Returns a unified GuardResult with all detections carrying provenance. | ||
| */ | ||
| deepScan(text: string): Promise<GuardResult>; | ||
| /** Run Layer 1 only (for fast auto-scan path) */ | ||
| regexScan(text: string): Promise<GuardResult>; | ||
| /** Run Layers 1 + 2 only (no LLM, for "Scan File" command) */ | ||
| modelScan(text: string): Promise<GuardResult>; | ||
| private runRegexLayer; | ||
| private runBertLayer; | ||
| /** Remove BERT detections that overlap with regex detections */ | ||
| private deduplicateAgainstRegex; | ||
| private buildCascadeResult; | ||
| /** Clean up resources */ | ||
| dispose(): Promise<void>; | ||
| } | ||
| /** | ||
| * Main-thread interface to the BERT worker (Layer 2). | ||
| * Manages the worker lifecycle and maps BERT tokens to Detection objects. | ||
| */ | ||
| interface BertLayerConfig { | ||
| modelId?: string; | ||
| /** Threshold above which detections are confirmed, below which they escalate */ | ||
| escalationThreshold: number; | ||
| } | ||
| declare class BertLayer { | ||
| private worker; | ||
| private pendingRequests; | ||
| private requestId; | ||
| private config; | ||
| /** Whether the BERT model has been loaded and first inference completed */ | ||
| private _loaded; | ||
| get loaded(): boolean; | ||
| constructor(config?: Partial<BertLayerConfig>); | ||
| /** Resolve the worker path — supports both compiled .js and source .ts */ | ||
| private resolveWorkerPath; | ||
| /** Ensure the worker thread is running */ | ||
| private ensureWorker; | ||
| /** Send text to the BERT worker and get raw token results */ | ||
| private analyzeRaw; | ||
| /** | ||
| * Analyze text and return Detection objects with escalation disposition. | ||
| * Tokens above the escalation threshold are "confirmed", | ||
| * tokens below are "escalate" (need LLM review). | ||
| */ | ||
| analyze(text: string): Promise<Detection[]>; | ||
| /** Terminate the worker thread */ | ||
| dispose(): Promise<void>; | ||
| } | ||
| export { type BertLayerConfig as B, type CascadeConfig as C, type Detection as D, type EngineConfig as E, type GuardMode as G, type LlmLayerConfig as L, type PiiPattern as P, type SecretPattern as S, type Tactic as T, type Guard as a, type GuardResult as b, CascadeClassifier as c, type GuardConfig as d, type DetectionSource as e, type Disposition as f, type Confidence as g, type LlmProvider as h, type TacticName as i, type TacticResult as j, BertLayer as k, LlmLayer as l }; |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
879052
26.88%9620
22.02%4
33.33%