firecrawl-mcp
Advanced tools
+354
| /** | ||
| * Firecrawl Monitor tools. | ||
| * | ||
| * Monitors run recurring scrapes/crawls and diff each result against the last | ||
| * retained snapshot. The SDK exposes monitor methods, but its HttpClient | ||
| * injects a top-level `origin` field into every POST/PATCH body and | ||
| * /v2/monitor rejects that with "Unrecognized key in body". Until the SDK | ||
| * strips `origin` for monitor requests, we hit /v2/monitor directly via fetch | ||
| * — same pattern the CLI uses. | ||
| */ | ||
| import { z } from 'zod'; | ||
| const DEFAULT_API_URL = 'https://api.firecrawl.dev'; | ||
| function resolveAuth(session) { | ||
| const apiKey = session?.firecrawlApiKey ?? process.env.FIRECRAWL_API_KEY; | ||
| const baseUrl = (process.env.FIRECRAWL_API_URL ?? DEFAULT_API_URL).replace(/\/$/, ''); | ||
| return { apiKey, baseUrl }; | ||
| } | ||
| async function monitorRequest(session, path, init = {}) { | ||
| const { apiKey, baseUrl } = resolveAuth(session); | ||
| if (!apiKey && !process.env.FIRECRAWL_API_URL) { | ||
| throw new Error('Unauthorized: API key is required for monitor requests'); | ||
| } | ||
| let url = `${baseUrl}/v2${path}`; | ||
| if (init.query) { | ||
| const qs = new URLSearchParams(); | ||
| for (const [k, v] of Object.entries(init.query)) { | ||
| if (v !== undefined && v !== null && v !== '') | ||
| qs.set(k, String(v)); | ||
| } | ||
| const s = qs.toString(); | ||
| if (s) | ||
| url += `?${s}`; | ||
| } | ||
| const headers = { 'X-Origin': 'mcp' }; | ||
| if (apiKey) | ||
| headers.Authorization = `Bearer ${apiKey}`; | ||
| if (init.body !== undefined) | ||
| headers['Content-Type'] = 'application/json'; | ||
| const response = await fetch(url, { | ||
| method: init.method ?? 'GET', | ||
| headers, | ||
| body: init.body !== undefined ? JSON.stringify(init.body) : undefined, | ||
| }); | ||
| const payload = (await response.json().catch(() => ({}))); | ||
| if (!response.ok || payload?.success === false) { | ||
| const message = payload?.error || | ||
| `HTTP ${response.status}: ${response.statusText || 'Request failed'}`; | ||
| throw new Error(message); | ||
| } | ||
| return payload; | ||
| } | ||
| function asText(data) { | ||
| return JSON.stringify(data, null, 2); | ||
| } | ||
| const pageStatusSchema = z.enum(['same', 'new', 'changed', 'removed', 'error']); | ||
| export function registerMonitorTools(server) { | ||
| server.addTool({ | ||
| name: 'firecrawl_monitor_create', | ||
| annotations: { | ||
| title: 'Create monitor', | ||
| readOnlyHint: false, | ||
| openWorldHint: true, | ||
| }, | ||
| description: ` | ||
| Create a Firecrawl monitor — a recurring scrape or crawl that diffs each result against the last retained snapshot. | ||
| Pass the full request body. Required fields: \`name\`, \`schedule\` (with \`cron\` or \`text\`), and \`targets\` (one or more \`{ type: 'scrape', urls: [...] }\` or \`{ type: 'crawl', url: '...' }\`). Optional: \`webhook\`, \`notification\`, \`retentionDays\`. | ||
| **Markdown-mode (default):** Each check produces a unified text diff of the page's markdown. No extra configuration needed. | ||
| \`\`\`json | ||
| { | ||
| "name": "firecrawl_monitor_create", | ||
| "arguments": { | ||
| "body": { | ||
| "name": "Blog watch", | ||
| "schedule": { "text": "every 30 minutes", "timezone": "UTC" }, | ||
| "targets": [{ "type": "scrape", "urls": ["https://example.com/blog"] }], | ||
| "notification": { "email": { "enabled": true, "recipients": ["a@b.com"] } } | ||
| } | ||
| } | ||
| } | ||
| \`\`\` | ||
| **JSON-mode change tracking:** To detect changes in **specific structured fields** (price, headline, in-stock flag, list items) instead of the whole page, add a \`changeTracking\` format with \`modes: ["json"]\` and a JSON schema to the target's \`scrapeOptions.formats\`. The check response will then carry a per-field diff (keyed by JSON path, e.g. \`plans[0].price\`) and a \`snapshot.json\` with the full current extraction. See \`firecrawl_monitor_check\` for the response shape. | ||
| \`\`\`json | ||
| { | ||
| "name": "firecrawl_monitor_create", | ||
| "arguments": { | ||
| "body": { | ||
| "name": "Pricing watch", | ||
| "schedule": { "text": "hourly", "timezone": "UTC" }, | ||
| "targets": [{ | ||
| "type": "scrape", | ||
| "urls": ["https://example.com/pricing"], | ||
| "scrapeOptions": { | ||
| "formats": [{ | ||
| "type": "changeTracking", | ||
| "modes": ["json"], | ||
| "prompt": "Extract pricing tiers and headline features for each plan.", | ||
| "schema": { | ||
| "type": "object", | ||
| "properties": { | ||
| "plans": { | ||
| "type": "array", | ||
| "items": { | ||
| "type": "object", | ||
| "properties": { | ||
| "name": { "type": "string" }, | ||
| "price": { "type": "string" }, | ||
| "features": { "type": "array", "items": { "type": "string" } } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| }] | ||
| } | ||
| }] | ||
| } | ||
| } | ||
| } | ||
| \`\`\` | ||
| **Mixed mode (JSON + git-diff):** Use \`modes: ["json", "git-diff"]\` to get both per-field diffs and a markdown sidecar. The page is marked \`changed\` whenever either surface changed. | ||
| `, | ||
| parameters: z.object({ | ||
| body: z.record(z.string(), z.any()), | ||
| }), | ||
| execute: async (args, { session, log }) => { | ||
| const { body } = args; | ||
| log.info('Creating monitor', { name: body.name }); | ||
| const res = await monitorRequest(session, '/monitor', { | ||
| method: 'POST', | ||
| body, | ||
| }); | ||
| return asText(res); | ||
| }, | ||
| }); | ||
| server.addTool({ | ||
| name: 'firecrawl_monitor_list', | ||
| annotations: { | ||
| title: 'List monitors', | ||
| readOnlyHint: true, | ||
| openWorldHint: false, | ||
| }, | ||
| description: ` | ||
| List all Firecrawl monitors for the authenticated account. | ||
| **Usage Example:** | ||
| \`\`\`json | ||
| { "name": "firecrawl_monitor_list", "arguments": { "limit": 20 } } | ||
| \`\`\` | ||
| `, | ||
| parameters: z.object({ | ||
| limit: z.number().int().positive().optional(), | ||
| offset: z.number().int().nonnegative().optional(), | ||
| }), | ||
| execute: async (args, { session }) => { | ||
| const { limit, offset } = args; | ||
| const res = await monitorRequest(session, '/monitor', { | ||
| query: { limit, offset }, | ||
| }); | ||
| return asText(res); | ||
| }, | ||
| }); | ||
| server.addTool({ | ||
| name: 'firecrawl_monitor_get', | ||
| annotations: { | ||
| title: 'Get monitor', | ||
| readOnlyHint: true, | ||
| openWorldHint: false, | ||
| }, | ||
| description: ` | ||
| Get a single monitor by ID. | ||
| **Usage Example:** | ||
| \`\`\`json | ||
| { "name": "firecrawl_monitor_get", "arguments": { "id": "mon_abc123" } } | ||
| \`\`\` | ||
| `, | ||
| parameters: z.object({ id: z.string() }), | ||
| execute: async (args, { session }) => { | ||
| const { id } = args; | ||
| const res = await monitorRequest(session, `/monitor/${encodeURIComponent(id)}`); | ||
| return asText(res); | ||
| }, | ||
| }); | ||
| server.addTool({ | ||
| name: 'firecrawl_monitor_update', | ||
| annotations: { | ||
| title: 'Update monitor', | ||
| readOnlyHint: false, | ||
| openWorldHint: true, | ||
| }, | ||
| description: ` | ||
| Update a monitor. Pass any subset of fields to patch: \`name\`, \`status\` ("active" | "paused"), \`schedule\`, \`targets\`, \`webhook\`, \`notification\`, \`retentionDays\`. | ||
| **Usage Example:** | ||
| \`\`\`json | ||
| { | ||
| "name": "firecrawl_monitor_update", | ||
| "arguments": { | ||
| "id": "mon_abc123", | ||
| "body": { "status": "paused" } | ||
| } | ||
| } | ||
| \`\`\` | ||
| `, | ||
| parameters: z.object({ | ||
| id: z.string(), | ||
| body: z.record(z.string(), z.any()), | ||
| }), | ||
| execute: async (args, { session }) => { | ||
| const { id, body } = args; | ||
| const res = await monitorRequest(session, `/monitor/${encodeURIComponent(id)}`, { method: 'PATCH', body }); | ||
| return asText(res); | ||
| }, | ||
| }); | ||
| server.addTool({ | ||
| name: 'firecrawl_monitor_delete', | ||
| annotations: { | ||
| title: 'Delete monitor', | ||
| readOnlyHint: false, | ||
| destructiveHint: true, | ||
| openWorldHint: true, | ||
| }, | ||
| description: ` | ||
| Permanently delete a monitor and stop its schedule. This cannot be undone. | ||
| **Usage Example:** | ||
| \`\`\`json | ||
| { "name": "firecrawl_monitor_delete", "arguments": { "id": "mon_abc123" } } | ||
| \`\`\` | ||
| `, | ||
| parameters: z.object({ id: z.string() }), | ||
| execute: async (args, { session, log }) => { | ||
| const { id } = args; | ||
| log.info('Deleting monitor', { id }); | ||
| const res = await monitorRequest(session, `/monitor/${encodeURIComponent(id)}`, { method: 'DELETE' }); | ||
| return asText(res); | ||
| }, | ||
| }); | ||
| server.addTool({ | ||
| name: 'firecrawl_monitor_run', | ||
| annotations: { | ||
| title: 'Run monitor now', | ||
| readOnlyHint: false, | ||
| openWorldHint: true, | ||
| }, | ||
| description: ` | ||
| Trigger a monitor check immediately, outside its normal schedule. Returns the queued check. | ||
| **Usage Example:** | ||
| \`\`\`json | ||
| { "name": "firecrawl_monitor_run", "arguments": { "id": "mon_abc123" } } | ||
| \`\`\` | ||
| `, | ||
| parameters: z.object({ id: z.string() }), | ||
| execute: async (args, { session }) => { | ||
| const { id } = args; | ||
| const res = await monitorRequest(session, `/monitor/${encodeURIComponent(id)}/run`, { method: 'POST' }); | ||
| return asText(res); | ||
| }, | ||
| }); | ||
| server.addTool({ | ||
| name: 'firecrawl_monitor_checks', | ||
| annotations: { | ||
| title: 'List monitor checks', | ||
| readOnlyHint: true, | ||
| openWorldHint: false, | ||
| }, | ||
| description: ` | ||
| List historical checks for a monitor. | ||
| **Usage Example:** | ||
| \`\`\`json | ||
| { "name": "firecrawl_monitor_checks", "arguments": { "id": "mon_abc123", "limit": 10 } } | ||
| \`\`\` | ||
| `, | ||
| parameters: z.object({ | ||
| id: z.string(), | ||
| limit: z.number().int().positive().optional(), | ||
| offset: z.number().int().nonnegative().optional(), | ||
| }), | ||
| execute: async (args, { session }) => { | ||
| const { id, limit, offset } = args; | ||
| const res = await monitorRequest(session, `/monitor/${encodeURIComponent(id)}/checks`, { query: { limit, offset } }); | ||
| return asText(res); | ||
| }, | ||
| }); | ||
| server.addTool({ | ||
| name: 'firecrawl_monitor_check', | ||
| annotations: { | ||
| title: 'Get monitor check', | ||
| readOnlyHint: true, | ||
| openWorldHint: false, | ||
| }, | ||
| description: ` | ||
| Get a single check with page-level diff results. Filter \`pageStatus\` to surface only the pages that changed (or were new, removed, etc.). | ||
| Each entry in \`data.pages[]\` has \`url\`, \`status\` (\`same\` | \`new\` | \`changed\` | \`removed\` | \`error\`), and — when changed — a \`diff\` and possibly a \`snapshot\`. The shape of \`diff\` depends on the monitor's \`formats\` configuration: | ||
| - **Markdown mode (default).** \`diff.text\` is the unified markdown diff; \`diff.json\` is a parse-diff AST (\`{ files: [...] }\`). No \`snapshot\`. | ||
| - **JSON mode** (\`changeTracking\` with \`modes: ["json"]\`). \`diff.json\` is a per-field map keyed by JSON path into the extraction, e.g. \`plans[0].price\`, with each value being \`{ previous, current }\`. \`snapshot.json\` is the full current extraction. No \`diff.text\`. | ||
| - **Mixed mode** (\`modes: ["json", "git-diff"]\`). Both \`diff.text\` (markdown sidecar) AND \`diff.json\` (per-field map) are present, plus \`snapshot.json\`. | ||
| **Example JSON-mode response \`pages[]\` entry:** | ||
| \`\`\`json | ||
| { | ||
| "url": "https://example.com/pricing", | ||
| "status": "changed", | ||
| "diff": { | ||
| "json": { | ||
| "plans[0].price": { "previous": "$19/mo", "current": "$24/mo" }, | ||
| "plans[1].features[2]": { "previous": "10 GB storage", "current": "25 GB storage" } | ||
| } | ||
| }, | ||
| "snapshot": { "json": { "plans": [/* current full extraction matching the monitor's schema */] } } | ||
| } | ||
| \`\`\` | ||
| When summarizing a check for the user, prefer \`diff.json\` paths (e.g. "plans[0].price changed from $19/mo to $24/mo") over re-printing the markdown diff — it's more concise and grounded in the schema fields they asked for. | ||
| The endpoint paginates via a top-level \`next\` URL; this tool returns one page at a time. Increase \`limit\` (max 100) to fetch fewer pages. | ||
| **Usage Example:** | ||
| \`\`\`json | ||
| { | ||
| "name": "firecrawl_monitor_check", | ||
| "arguments": { | ||
| "id": "mon_abc123", | ||
| "checkId": "chk_xyz", | ||
| "pageStatus": "changed" | ||
| } | ||
| } | ||
| \`\`\` | ||
| `, | ||
| parameters: z.object({ | ||
| id: z.string(), | ||
| checkId: z.string(), | ||
| limit: z.number().int().positive().optional(), | ||
| skip: z.number().int().nonnegative().optional(), | ||
| pageStatus: pageStatusSchema.optional(), | ||
| }), | ||
| execute: async (args, { session }) => { | ||
| const { id, checkId, limit, skip, pageStatus } = args; | ||
| const res = await monitorRequest(session, `/monitor/${encodeURIComponent(id)}/checks/${encodeURIComponent(checkId)}`, { query: { limit, skip, status: pageStatus } }); | ||
| return asText(res); | ||
| }, | ||
| }); | ||
| } |
+2
-2
| { | ||
| "name": "firecrawl-mcp", | ||
| "version": "3.16.0", | ||
| "version": "3.17.0", | ||
| "description": "MCP server for Firecrawl — search, scrape, and interact with the web. Supports both cloud and self-hosted instances. Features include web search, scraping, page interaction, batch processing, and LLM-powered content analysis.", | ||
@@ -18,3 +18,3 @@ "type": "module", | ||
| "dependencies": { | ||
| "@mendable/firecrawl-js": "4.21.0", | ||
| "@mendable/firecrawl-js": "4.24.0", | ||
| "dotenv": "^17.2.2", | ||
@@ -21,0 +21,0 @@ "firecrawl-fastmcp": "^1.0.4", |
+1
-1
@@ -11,3 +11,3 @@ <div align="center"> | ||
| A Model Context Protocol (MCP) server implementation that integrates with [Firecrawl](https://github.com/firecrawl/firecrawl) for searching, scraping, and interacting with the web. | ||
| A Model Context Protocol (MCP) server that brings [Firecrawl](https://github.com/firecrawl/firecrawl) to MCP-compatible AI agents — search, scrape, and interact with the live web for clean, agent-ready context. | ||
@@ -14,0 +14,0 @@ > Big thanks to [@vrknetha](https://github.com/vrknetha), [@knacklabs](https://www.knacklabs.ai) for the initial implementation! |
Sorry, the diff of this file is too big to display
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
111116
13.53%5
25%1900
21.64%40
14.29%3
50%+ Added
+ Added
- Removed
- Removed