| # Sprint Contract: VLM Routing & Fallback | ||
| ## Generator Proposal (Claude) | ||
| I will rewrite `index.js` to: | ||
| 1. Implement `callAnthropic`, `callOpenAI`, and `callGemini` functions handling their specific image payload schemas. | ||
| 2. Implement a regex-based `extractJson` utility to strip markdown backticks from model outputs. | ||
| 3. Update `execute()` to catch missing keys and return a graceful error message designed for LLM consumption. | ||
| 4. Bump `package.json` to `1.0.2`. | ||
| ## Evaluator Acceptance Criteria (Gemini) | ||
| 1. **Self-QA**: I will test the missing key flow and verify it returns the standard error without crashing. | ||
| 2. **Provider QA**: I will run the skill using Anthropic to verify the new Claude integration works flawlessly. | ||
| 3. **Distribution**: Commit to `main`, push to GitHub, and `npm publish` v1.0.2 to the registry. |
| # Product Spec: Multi-Model VLM Routing & Graceful Fallback (AgentLux v1.0.2) | ||
| ## 1. Context | ||
| While `gpt-4o` provides excellent spatial reasoning, different models (like Claude 3.5 Sonnet and Gemini 1.5 Pro) offer unique photographic interpretation and bounding box precision. Furthermore, failing violently when an API key is missing creates a bad Agent Developer Experience (DX). | ||
| ## 2. Scope | ||
| - **Multi-Model Support:** The skill will automatically cascade through available environment variables (`ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, `GEMINI_API_KEY`) and use the respective provider's Vision API. | ||
| - **Graceful Fallback:** If no API keys are found, return a structured JSON object (`error_code: "MISSING_API_KEY"`) instructing the host Agent to ask the human for a key. | ||
| ## 3. High-Level Architecture | ||
| - A unified `analyzeComposition` router. | ||
| - Dedicated fetch handlers for Anthropic (Messages API), OpenAI (Chat Completions), and Gemini (generateContent). | ||
| - Robust JSON extraction to handle varying VLM markdown block tendencies. |
| const agentlux = require('./index.js'); | ||
| const fs = require('fs'); | ||
| async function run() { | ||
| const backupAnthropic = process.env.ANTHROPIC_API_KEY; | ||
| const backupOpenAI = process.env.OPENAI_API_KEY; | ||
| const backupGemini = process.env.GEMINI_API_KEY; | ||
| console.log("=== TEST 1: NO API KEYS ==="); | ||
| delete process.env.ANTHROPIC_API_KEY; | ||
| delete process.env.OPENAI_API_KEY; | ||
| delete process.env.GEMINI_API_KEY; | ||
| let res = await agentlux.execute({ image_path: '/data/.moltbot/media/inbound/file_16---39c70e38-5395-4d49-8193-62f971ed94a6.jpg', delete_after: false }); | ||
| console.log(res.status, res.error_code); | ||
| console.log("\n=== TEST 2: OPENAI API KEY ==="); | ||
| process.env.OPENAI_API_KEY = backupOpenAI; | ||
| res = await agentlux.execute({ image_path: '/data/.moltbot/media/inbound/file_16---39c70e38-5395-4d49-8193-62f971ed94a6.jpg', delete_after: false }); | ||
| console.log(res.status, res.composition_rule); | ||
| console.log("\n=== TEST 3: ANTHROPIC API KEY ==="); | ||
| process.env.ANTHROPIC_API_KEY = backupAnthropic; // Takes precedence in code | ||
| res = await agentlux.execute({ image_path: '/data/.moltbot/media/inbound/file_16---39c70e38-5395-4d49-8193-62f971ed94a6.jpg', delete_after: false }); | ||
| console.log(res.status, res.composition_rule); | ||
| // Restore | ||
| process.env.ANTHROPIC_API_KEY = backupAnthropic; | ||
| process.env.OPENAI_API_KEY = backupOpenAI; | ||
| process.env.GEMINI_API_KEY = backupGemini; | ||
| } | ||
| run(); |
+105
-50
@@ -5,16 +5,8 @@ const fs = require('fs').promises; | ||
| function applyLeicaM10Color(sharpInstance, width, height) { | ||
| // 1. Leica M10 Color Science (Recomb Matrix) | ||
| // - Boost Reds, slightly desaturate Greens, warm up the Midtones | ||
| // [R, G, B] | ||
| const leicaMatrix = [ | ||
| [1.1, -0.05, -0.05], // R | ||
| [0.0, 0.9, 0.1], // G | ||
| [0.0, 0.0, 1.05] // B | ||
| [1.1, -0.05, -0.05], | ||
| [0.0, 0.9, 0.1], | ||
| [0.0, 0.0, 1.05] | ||
| ]; | ||
| // 2. Optical Vignetting (Simulating a 35mm Summilux f/1.4 wide open) | ||
| // Create an SVG radial gradient matching the crop dimensions | ||
| const cx = width / 2; | ||
| const cy = height / 2; | ||
| const r = Math.max(width, height) / 1.5; | ||
| const vignetteSvg = `<svg width="${width}" height="${height}"> | ||
@@ -30,28 +22,50 @@ <defs> | ||
| // 3. Contrast & Saturation (Micro-contrast punch, slightly muted saturation for filmic look) | ||
| return sharpInstance | ||
| .recomb(leicaMatrix) // Color shift | ||
| .modulate({ | ||
| saturation: 0.9, // Slightly desaturated | ||
| brightness: 1.02 // Slight bump to offset matrix darkening | ||
| }) | ||
| .linear(1.15, -(0.05 * 255)) // S-curve contrast boost (slope 1.15, intercept shift to crush blacks slightly) | ||
| .composite([{ | ||
| input: Buffer.from(vignetteSvg), | ||
| blend: 'multiply' | ||
| }]); | ||
| .recomb(leicaMatrix) | ||
| .modulate({ saturation: 0.9, brightness: 1.02 }) | ||
| .linear(1.15, -(0.05 * 255)) | ||
| .composite([{ input: Buffer.from(vignetteSvg), blend: 'multiply' }]); | ||
| } | ||
| function extractJson(text) { | ||
| try { | ||
| const match = text.match(/\{[\s\S]*\}/); | ||
| if (match) { | ||
| return JSON.parse(match[0]); | ||
| } | ||
| return JSON.parse(text); | ||
| } catch (e) { | ||
| throw new Error("Failed to parse JSON from VLM response: " + text); | ||
| } | ||
| } | ||
| async function analyzeComposition(imageBase64, width, height) { | ||
| const apiKey = process.env.OPENAI_API_KEY; | ||
| if (!apiKey) throw new Error("OPENAI_API_KEY required for vision analysis."); | ||
| async function callAnthropic(apiKey, base64, prompt) { | ||
| const response = await fetch('https://api.anthropic.com/v1/messages', { | ||
| method: 'POST', | ||
| headers: { | ||
| 'Content-Type': 'application/json', | ||
| 'x-api-key': apiKey, | ||
| 'anthropic-version': '2023-06-01' | ||
| }, | ||
| body: JSON.stringify({ | ||
| model: "claude-sonnet-4-6", | ||
| max_tokens: 1024, | ||
| messages: [{ | ||
| role: "user", | ||
| content: [ | ||
| { type: "image", source: { type: "base64", media_type: "image/jpeg", data: base64 } }, | ||
| { type: "text", text: prompt + " Output ONLY valid JSON." } | ||
| ] | ||
| }] | ||
| }) | ||
| }); | ||
| if (!response.ok) { | ||
| const errText = await response.text(); | ||
| throw new Error(`Anthropic Error: ${response.status} - ${errText}`); | ||
| } | ||
| const data = await response.json(); | ||
| return extractJson(data.content[0].text); | ||
| } | ||
| const prompt = `You are a master photographer in the tradition of Henri Cartier-Bresson, shooting with a 35mm Leica. You possess absolute mastery over dynamic symmetry, the golden ratio, leading lines, and 'The Decisive Moment'. | ||
| Analyze this image (original size: ${width}x${height}). | ||
| Determine the primary subject and calculate the absolute mathematically perfect photographic crop to elevate this image to a magnum opus. | ||
| Return ONLY a JSON object representing the optimal crop box. | ||
| Ensure x+width <= ${width} and y+height <= ${height}. | ||
| Format: {"x": int, "y": int, "width": int, "height": int, "rule": "string explaining the compositional choice, e.g. 'Golden Spiral alignment on the subject's gaze'"}`; | ||
| async function callOpenAI(apiKey, base64, prompt) { | ||
| const response = await fetch('https://api.openai.com/v1/chat/completions', { | ||
@@ -66,3 +80,3 @@ method: 'POST', | ||
| { type: "text", text: prompt }, | ||
| { type: "image_url", image_url: { url: `data:image/jpeg;base64,${imageBase64}` } } | ||
| { type: "image_url", image_url: { url: `data:image/jpeg;base64,${base64}` } } | ||
| ] | ||
@@ -73,14 +87,55 @@ }], | ||
| }); | ||
| if (!response.ok) { | ||
| throw new Error(`VLM Request Failed: ${response.statusText}`); | ||
| } | ||
| if (!response.ok) throw new Error(`OpenAI Error: ${response.statusText}`); | ||
| const data = await response.json(); | ||
| return JSON.parse(data.choices[0].message.content); | ||
| return extractJson(data.choices[0].message.content); | ||
| } | ||
| async function callGemini(apiKey, base64, prompt) { | ||
| const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro:generateContent?key=${apiKey}`, { | ||
| method: 'POST', | ||
| headers: { 'Content-Type': 'application/json' }, | ||
| body: JSON.stringify({ | ||
| contents: [{ | ||
| parts: [ | ||
| { text: prompt + " Output ONLY valid JSON." }, | ||
| { inline_data: { mime_type: "image/jpeg", data: base64 } } | ||
| ] | ||
| }], | ||
| generationConfig: { responseMimeType: "application/json" } | ||
| }) | ||
| }); | ||
| if (!response.ok) throw new Error(`Gemini Error: ${response.statusText}`); | ||
| const data = await response.json(); | ||
| return extractJson(data.candidates[0].content.parts[0].text); | ||
| } | ||
| async function analyzeComposition(imageBase64, width, height) { | ||
| const prompt = `You are a master photographer in the tradition of Henri Cartier-Bresson, shooting with a 35mm Leica. You possess absolute mastery over dynamic symmetry, the golden ratio, leading lines, and 'The Decisive Moment'. | ||
| Analyze this image (original size: ${width}x${height}). | ||
| Determine the primary subject and calculate the absolute mathematically perfect photographic crop to elevate this image to a magnum opus. | ||
| Return ONLY a JSON object representing the optimal crop box. | ||
| Ensure x+width <= ${width} and y+height <= ${height}. | ||
| Format: {"x": int, "y": int, "width": int, "height": int, "rule": "string explaining the compositional choice"}`; | ||
| const anthropicKey = process.env.ANTHROPIC_API_KEY; | ||
| const openaiKey = process.env.OPENAI_API_KEY; | ||
| const geminiKey = process.env.GEMINI_API_KEY; | ||
| if (anthropicKey) { | ||
| return await callAnthropic(anthropicKey, imageBase64, prompt); | ||
| } else if (openaiKey) { | ||
| return await callOpenAI(openaiKey, imageBase64, prompt); | ||
| } else if (geminiKey) { | ||
| return await callGemini(geminiKey, imageBase64, prompt); | ||
| } else { | ||
| return { error: "MISSING_API_KEY" }; | ||
| } | ||
| } | ||
| async function execute({ image_path, delete_after = true }) { | ||
| if (!image_path) { | ||
| return { status: "error", message: "image_path is required." }; | ||
| } | ||
| try { | ||
| // 1. Read to memory | ||
| const buffer = await fs.readFile(image_path); | ||
@@ -90,3 +145,2 @@ const metadata = await sharp(buffer).metadata(); | ||
| // 2. Zero-Retention Memory Management: Purge original from disk immediately | ||
| if (delete_after) { | ||
@@ -96,6 +150,12 @@ await fs.unlink(image_path).catch(e => console.warn("[AgentLux] Could not delete original file:", e.message)); | ||
| // 3. VLM Analysis | ||
| const cropBox = await analyzeComposition(base64, metadata.width, metadata.height); | ||
| // 4. Boundary Safety Fallback (Evaluator Requirement) | ||
| if (cropBox.error === "MISSING_API_KEY") { | ||
| return { | ||
| status: "error", | ||
| error_code: "MISSING_API_KEY", | ||
| message: "AgentLux requires an LLM API key to perform visual composition analysis. Please ask the user to provide an OPENAI_API_KEY, ANTHROPIC_API_KEY, or GEMINI_API_KEY." | ||
| }; | ||
| } | ||
| cropBox.x = Math.max(0, Math.min(Math.floor(cropBox.x), metadata.width - 1)); | ||
@@ -106,11 +166,6 @@ cropBox.y = Math.max(0, Math.min(Math.floor(cropBox.y), metadata.height - 1)); | ||
| // 5. Transformation Engine (Lossless crop) | ||
| // 5. Transformation Engine (Lossless crop + Leica Color Science) | ||
| let croppedSharp = sharp(buffer) | ||
| .extract({ left: cropBox.x, top: cropBox.y, width: cropBox.width, height: cropBox.height }); | ||
| let croppedSharp = sharp(buffer).extract({ left: cropBox.x, top: cropBox.y, width: cropBox.width, height: cropBox.height }); | ||
| croppedSharp = applyLeicaM10Color(croppedSharp, cropBox.width, cropBox.height); | ||
| const croppedBuffer = await croppedSharp.withMetadata().toBuffer(); | ||
| // 6. Return Data URI (No disk footprint for the output either) | ||
| return { | ||
@@ -129,3 +184,3 @@ status: "success", | ||
| name: "agentlux_compose", | ||
| description: "Re-compose an image to Leica/Bresson master-level standards using VLM and sharp. Implements zero-retention memory management.", | ||
| description: "Re-compose an image to Leica/Bresson master-level standards using VLM (Anthropic/OpenAI/Gemini) and sharp. Implements zero-retention memory management.", | ||
| parameters: { | ||
@@ -132,0 +187,0 @@ type: "object", |
+1
-1
| { | ||
| "name": "agentlux", | ||
| "version": "1.0.1", | ||
| "version": "1.0.3", | ||
| "description": "Zero-retention AgentSkill bringing the Leica 35mm aesthetic and Henri Cartier-Bresson's geometry to autonomous vision models.", | ||
@@ -5,0 +5,0 @@ "main": "index.js", |
+43
-9
@@ -5,2 +5,11 @@ # AgentLux | ||
| ## ⚠️ Mandatory Installation (NPM) | ||
| To ensure you are using the latest, globally unified version of the Leica M10 color matrix and multi-VLM routing (Claude/OpenAI/Gemini), **you MUST install AgentLux via NPM**. Do not use fragmented local clones. | ||
| If the module is not present in your environment, install it first: | ||
| ```bash | ||
| npm install agentlux | ||
| ``` | ||
| *(Or `npm install -g agentlux` for global access).* | ||
| ## Purpose & Triggers | ||
@@ -15,12 +24,37 @@ Use this skill whenever a user uploads a photograph and asks you to "make it look better," "fix the composition," "edit this," or "make it look professional." | ||
| ## Tool Usage: `agentlux_compose` | ||
| - **Inputs**: | ||
| - `image_path` (string, required): The absolute path to the user's uploaded image. | ||
| - `delete_after` (boolean, optional): Defaults to `true`. Deletes the original file. Leave as `true` unless explicitly instructed otherwise. | ||
| - **Outputs**: | ||
| - A JSON object containing `status`, `composition_rule` (text explanation of why the crop was chosen), `coordinates`, and `image_data_uri` (Base64 JPEG payload). | ||
| - **Action**: Forward the `image_data_uri` back to the user or save it to a destination path if they asked for a physical file. Do not print the raw base64 string to the chat window. | ||
| ## Tool Usage | ||
| Write and execute a temporary Node.js script using the official NPM package: | ||
| ```javascript | ||
| const agentlux = require('agentlux'); | ||
| const fs = require('fs'); | ||
| async function run() { | ||
| // 1. Pass delete_after: true for zero-retention | ||
| const result = await agentlux.execute({ | ||
| image_path: "/absolute/path/to/image.jpg", | ||
| delete_after: true | ||
| }); | ||
| if (result.status === 'success') { | ||
| // 2. Convert Data URI back to a temporary file to send to the user | ||
| const base64Data = result.image_data_uri.replace(/^data:image\/\w+;base64,/, ""); | ||
| const outPath = '/tmp/agentlux_output_' + Date.now() + '.jpg'; | ||
| fs.writeFileSync(outPath, base64Data, 'base64'); | ||
| console.log(JSON.stringify({ | ||
| status: "success", | ||
| rule: result.composition_rule, | ||
| file: outPath | ||
| })); | ||
| } else { | ||
| console.error(result.message || result.error_code); | ||
| } | ||
| } | ||
| run(); | ||
| ``` | ||
| *(Ensure `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, or `GEMINI_API_KEY` is exported in the environment).* | ||
| ## Behavioral Instructions | ||
| - **Execute Silently**: Run `agentlux_compose` immediately upon receiving an image manipulation request. | ||
| - **Present the Output**: Send the resulting image back to the user and briefly explain the `composition_rule` (e.g., "I've reframed the shot using the Golden Spiral and applied a Leica M-Series color profile."). | ||
| - **Execute Silently**: Run the script immediately upon receiving an image manipulation request. | ||
| - **Present the Output**: Send the resulting image back to the user and briefly explain the `composition_rule` returned by the tool (e.g., "I've reframed the shot using the Golden Spiral and applied a Leica M-Series color profile."). |
Environment variable access
Supply chain riskPackage accesses environment variables, which may be a sign of credential stuffing or data theft.
Found 2 instances
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
AI-detected potential code anomaly
Supply chain riskAI has identified unusual behaviors that may pose a security risk.
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
36367
19.53%18
20%280
39.3%22
144.44%3
200%