Big News: Socket raises $60M Series C at a $1B valuation to secure software supply chains for AI-driven development.Announcement
Sign In

firecrawl-mcp

Package Overview
Dependencies
Maintainers
1
Versions
85
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

firecrawl-mcp - npm Package Compare versions

Comparing version
3.20.3
to
3.20.4
+106
-8
dist/research.js

@@ -17,5 +17,2 @@ /**

const BASE = '/v2/research';
function asText(data) {
return JSON.stringify(data, null, 2);
}
/** Append a value (or repeated array values) to a URLSearchParams instance. */

@@ -39,2 +36,100 @@ function appendParam(params, key, value) {

}
// --- result formatting (ported from research-index-front/src/agent_eval.ts) ---
// Max authors to print per paper (with affiliations); the rest collapse to a
// "+N more" tail so a large collaboration doesn't flood the context.
const MAX_AUTHORS = 15;
// Cap each abstract so a page of hits stays within the MCP output-token limit.
const MAX_ABSTRACT_CHARS = 600;
// Per-affiliation char cap — keeps one long org string (e.g. a full multi-dept
// university address) from bloating the authors line.
const MAX_AFFIL_CHARS = 60;
// Hard ceiling on the whole authors line, as a final guard.
const MAX_AUTHORS_LINE_CHARS = 400;
/** Best display id for a paper: its arXiv id, falling back to the canonical id. */
function displayId(p) {
return p.ids?.arxiv?.[0] ?? p.paper_id ?? '?';
}
/** Format the authors line, accepting either the string or structured form. */
function fmtAuthors(authors) {
if (!authors)
return null;
let shown;
let total;
if (typeof authors === 'string') {
const names = authors
.split(',')
.map((s) => s.trim())
.filter(Boolean);
if (names.length === 0)
return null;
total = names.length;
shown = names.slice(0, MAX_AUTHORS);
}
else {
if (authors.length === 0)
return null;
total = authors.length;
shown = authors.slice(0, MAX_AUTHORS).map((a) => {
const aff = a.affiliation?.trim();
return aff ? `${a.name} (${aff.slice(0, MAX_AFFIL_CHARS)})` : a.name;
});
}
const extra = total > MAX_AUTHORS ? `; +${total - MAX_AUTHORS} more` : '';
return ('Authors: ' + shown.join('; ') + extra).slice(0, MAX_AUTHORS_LINE_CHARS);
}
/** Render ranked papers as `[id] title` / authors / abstract blocks. */
function fmtHits(results) {
if (!results || results.length === 0)
return '(no results)';
return results
.map((r) => {
const lines = [`[${displayId(r)}] ${r.title ?? '(untitled)'}`];
const authors = fmtAuthors(r.authors);
if (authors)
lines.push(authors);
lines.push((r.abstract || '(no abstract)')
.replace(/\s+/g, ' ')
.slice(0, MAX_ABSTRACT_CHARS));
return lines.join('\n');
})
.join('\n\n');
}
// Cap GitHub matched content so a page of results stays within the MCP
// output-token limit. Higher than abstracts since issue/PR threads carry the
// signal (repro steps, stack traces) the agent actually needs to verify.
const MAX_GITHUB_CONTENT_CHARS = 1200;
/**
* Render GitHub history/readme hits as `[repo#number] (kind)` / url / body
* blocks — the same shape as `fmtHits`, but tuned for issues/PRs and readmes.
* Markdown content keeps its newlines (so tables/code survive); only readmes and
* snippets fall back when full content is absent.
*/
function fmtGithub(results) {
if (!results || results.length === 0)
return '(no results)';
return results
.map((r) => {
const lines = [];
if (r.resultType === 'repo_readme') {
lines.push(`[${r.repo ?? '?'}] README`);
}
else {
const ref = r.number != null ? `#${r.number}` : '';
const meta = [
r.pageType,
r.segmentCount ? `${r.segmentCount} segments` : '',
]
.filter(Boolean)
.join(', ');
lines.push(`[${r.repo ?? '?'}${ref}]${meta ? ` (${meta})` : ''}`);
}
const url = r.readmeUrl ?? r.url;
if (url)
lines.push(url);
const body = (r.contentMd || r.snippet || '').trim();
lines.push(body ? body.slice(0, MAX_GITHUB_CONTENT_CHARS) : '(no content)');
return lines.join('\n');
})
.join('\n\n');
}
/** Only present these tools when the session has research enabled. */

@@ -88,3 +183,3 @@ const canAccess = (session) => session?.research === true;

const res = await client.http.get(withQuery(`${BASE}/papers`, params));
return asText(res.data);
return fmtHits(res.data?.results);
},

@@ -133,3 +228,4 @@ });

const res = await client.http.get(withQuery(`${BASE}/papers/${encodeURIComponent(primary)}/similar`, params));
return asText(res.data);
const note = res.data?.note ? `\nnote: ${res.data.note}` : '';
return `${fmtHits(res.data?.results)}\n(pool_size=${res.data?.pool_size ?? 0})${note}`;
},

@@ -168,7 +264,9 @@ });

const res = await client.http.get(withQuery(`${BASE}/papers/${encodeURIComponent(arxiv_id)}`, params));
return asText(res.data);
const passages = res.data?.passages ?? [];
return passages.length
? passages.map((p) => p.text).join('\n---\n')
: '(no full-text passages available for this paper)';
},
});
// --- search_github ---
// TODO: description pending — the user is writing this one.
server.addTool({

@@ -195,5 +293,5 @@ name: 'firecrawl_research_search_github',

const res = await client.http.get(withQuery(`${BASE}/github`, params));
return asText(res.data);
return fmtGithub(res.data?.results);
},
});
}
+1
-1
{
"name": "firecrawl-mcp",
"version": "3.20.3",
"version": "3.20.4",
"description": "MCP server for Firecrawl — search, scrape, and interact with the web. Supports both cloud and self-hosted instances. Features include web search, scraping, page interaction, batch processing, and LLM-powered content analysis.",

@@ -5,0 +5,0 @@ "type": "module",