Big News: Socket raises $60M Series C at a $1B valuation to secure software supply chains for AI-driven development.Announcement
Sign In

firecrawl-mcp

Package Overview
Dependencies
Maintainers
1
Versions
85
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

firecrawl-mcp - npm Package Compare versions

Comparing version
3.20.2
to
3.20.3
+193
dist/research.js
/**
* Firecrawl Research tools (experimental).
*
* Thin MCP wrappers over the `/v2/research/*` endpoints (arXiv papers + GitHub
* history/readmes). These tools are hidden unless research is enabled for the
* session — locally via `FIRECRAWL_RESEARCH=true`, or remotely via the
* `?research=true` query param on the MCP endpoint (see `isResearchEnabled` in
* index.ts, which sets `session.research`).
*
* The installed `@mendable/firecrawl-js` predates the SDK's `research` client,
* so we call the endpoints directly through the SDK's HTTP layer (auth +
* retries) via `client.http.get(...)`, mirroring how the search tool reaches
* `/v2/search`.
*/
import { z } from 'zod';
const BASE = '/v2/research';
function asText(data) {
return JSON.stringify(data, null, 2);
}
/** Append a value (or repeated array values) to a URLSearchParams instance. */
function appendParam(params, key, value) {
if (value == null)
return;
if (Array.isArray(value)) {
for (const v of value) {
if (v != null && String(v).length > 0)
params.append(key, String(v));
}
}
else {
params.append(key, String(value));
}
}
function withQuery(path, params) {
const qs = params.toString();
return qs ? `${path}?${qs}` : path;
}
/** Only present these tools when the session has research enabled. */
const canAccess = (session) => session?.research === true;
export function registerResearchTools(server, getClient) {
// --- search_papers ---
server.addTool({
name: 'firecrawl_research_search_papers',
canAccess,
annotations: {
title: 'Search arXiv papers',
readOnlyHint: true,
openWorldHint: true,
},
description: 'Primary entry point for finding arXiv papers by topic. Semantic (HyDE) search over arXiv ' +
'abstracts; returns ranked papers with arXiv id, title, and abstract. The query should be a ' +
'natural-language description of what you want. Run SEVERAL distinct framings of the question ' +
'(sibling domains, rival methods, dataset/benchmark names) rather than one query — recall ' +
'improves markedly with diverse framings. Returns up to `k` results (default 40).',
parameters: z.object({
query: z.string().min(1),
k: z.number().int().min(1).max(500).optional(),
authors: z
.array(z.string())
.optional()
.describe('Author substring filter(s); ALL must match (case-insensitive).'),
categories: z
.array(z.string())
.optional()
.describe('arXiv category filter(s) (e.g. `cs.LG`); ALL must match.'),
from: z
.string()
.optional()
.describe('Inclusive lower bound on created/updated date (`YYYY-MM-DD`).'),
to: z
.string()
.optional()
.describe('Inclusive upper bound on created/updated date (`YYYY-MM-DD`).'),
}),
execute: async (args, { session }) => {
const { query, k, authors, categories, from, to } = args;
const params = new URLSearchParams();
appendParam(params, 'query', query);
appendParam(params, 'k', k);
appendParam(params, 'authors', authors);
appendParam(params, 'categories', categories);
appendParam(params, 'from', from);
appendParam(params, 'to', to);
const client = getClient(session);
const res = await client.http.get(withQuery(`${BASE}/papers`, params));
return asText(res.data);
},
});
// --- related_papers ---
server.addTool({
name: 'firecrawl_research_related_papers',
canAccess,
annotations: {
title: 'Find related arXiv papers',
readOnlyHint: true,
openWorldHint: true,
},
description: 'Expand from anchor papers you have already found, via the citation graph, ranked and filtered ' +
'to a natural-language `intent`. Pass arXiv ids of your strongest hits as `seed_ids`. Modes: ' +
'`similar` (cocitation/coupling — papers in the same niche; the default), `citers` (papers ' +
'that cite the anchors), `references` (papers the anchors cite). This reaches relevant papers ' +
'that plain search misses, so use it on your best hits before finishing. A `similar` call ' +
'already runs a DEEP multi-round expansion internally (re-seeding from each round’s best ' +
'finds), so one call reaches the wider neighborhood — no need to chain many. Returns the ' +
'candidates plus the pool size.',
parameters: z.object({
seed_ids: z.array(z.string()).min(1).max(10),
intent: z.string().min(1),
mode: z.enum(['similar', 'citers', 'references']).optional(),
k: z.number().int().min(1).max(500).optional(),
rerank: z
.boolean()
.optional()
.describe('Apply an additional rerank over the fused candidates.'),
}),
execute: async (args, { session }) => {
const { seed_ids, intent, mode, k, rerank } = args;
// The endpoint takes a single primary seed in the path; any additional
// seeds ride along as repeated `anchor` params.
const [primary, ...anchors] = seed_ids;
const params = new URLSearchParams();
appendParam(params, 'intent', intent);
appendParam(params, 'mode', mode);
appendParam(params, 'k', k);
if (rerank != null)
appendParam(params, 'rerank', rerank);
appendParam(params, 'anchor', anchors);
const client = getClient(session);
const res = await client.http.get(withQuery(`${BASE}/papers/${encodeURIComponent(primary)}/similar`, params));
return asText(res.data);
},
});
// --- read_paper ---
server.addTool({
name: 'firecrawl_research_read_paper',
canAccess,
annotations: {
title: 'Read an arXiv paper',
readOnlyHint: true,
openWorldHint: true,
},
description: 'Read the most relevant in-body (full-text) passages of ONE specific paper for a question. Use ' +
'this to VERIFY whether a candidate actually satisfies a constraint before you include or ' +
"reject it (e.g. 'does this paper actually use technique X / report a score on benchmark Y'). " +
"Returns the best-matching passages, or a notice if the paper's full text is unavailable.",
parameters: z.object({
arxiv_id: z.string().min(1),
question: z.string().min(1),
k: z
.number()
.int()
.min(1)
.max(50)
.optional()
.describe('Number of passages to return (default 4).'),
}),
execute: async (args, { session }) => {
const { arxiv_id, question, k } = args;
const params = new URLSearchParams();
appendParam(params, 'query', question);
appendParam(params, 'k', k);
const client = getClient(session);
const res = await client.http.get(withQuery(`${BASE}/papers/${encodeURIComponent(arxiv_id)}`, params));
return asText(res.data);
},
});
// --- search_github ---
// TODO: description pending — the user is writing this one.
server.addTool({
name: 'firecrawl_research_search_github',
canAccess,
annotations: {
title: 'Search GitHub history',
readOnlyHint: true,
openWorldHint: true,
},
description: 'Search GitHub issue/PR history and repository readmes. Returns ranked matches with repo, ' +
'url, a short snippet, and (when available) the full matched content in markdown.',
parameters: z.object({
query: z.string().min(1),
k: z.number().int().min(1).max(100).optional(),
}),
execute: async (args, { session }) => {
const { query, k } = args;
const params = new URLSearchParams();
appendParam(params, 'query', query);
appendParam(params, 'k', k);
const client = getClient(session);
const res = await client.http.get(withQuery(`${BASE}/github`, params));
return asText(res.data);
},
});
}
+2
-2
{
"name": "firecrawl-mcp",
"version": "3.20.2",
"version": "3.20.3",
"description": "MCP server for Firecrawl — search, scrape, and interact with the web. Supports both cloud and self-hosted instances. Features include web search, scraping, page interaction, batch processing, and LLM-powered content analysis.",

@@ -18,3 +18,3 @@ "type": "module",

"dependencies": {
"@mendable/firecrawl-js": "4.24.0",
"@mendable/firecrawl-js": "4.25.2",
"dotenv": "^17.2.2",

@@ -21,0 +21,0 @@ "firecrawl-fastmcp": "^1.0.5",

Sorry, the diff of this file is too big to display