firecrawl-mcp
Advanced tools
+106
-8
@@ -17,5 +17,2 @@ /** | ||
| const BASE = '/v2/research'; | ||
| function asText(data) { | ||
| return JSON.stringify(data, null, 2); | ||
| } | ||
| /** Append a value (or repeated array values) to a URLSearchParams instance. */ | ||
@@ -39,2 +36,100 @@ function appendParam(params, key, value) { | ||
| } | ||
| // --- result formatting (ported from research-index-front/src/agent_eval.ts) --- | ||
| // Max authors to print per paper (with affiliations); the rest collapse to a | ||
| // "+N more" tail so a large collaboration doesn't flood the context. | ||
| const MAX_AUTHORS = 15; | ||
| // Cap each abstract so a page of hits stays within the MCP output-token limit. | ||
| const MAX_ABSTRACT_CHARS = 600; | ||
| // Per-affiliation char cap — keeps one long org string (e.g. a full multi-dept | ||
| // university address) from bloating the authors line. | ||
| const MAX_AFFIL_CHARS = 60; | ||
| // Hard ceiling on the whole authors line, as a final guard. | ||
| const MAX_AUTHORS_LINE_CHARS = 400; | ||
| /** Best display id for a paper: its arXiv id, falling back to the canonical id. */ | ||
| function displayId(p) { | ||
| return p.ids?.arxiv?.[0] ?? p.paper_id ?? '?'; | ||
| } | ||
| /** Format the authors line, accepting either the string or structured form. */ | ||
| function fmtAuthors(authors) { | ||
| if (!authors) | ||
| return null; | ||
| let shown; | ||
| let total; | ||
| if (typeof authors === 'string') { | ||
| const names = authors | ||
| .split(',') | ||
| .map((s) => s.trim()) | ||
| .filter(Boolean); | ||
| if (names.length === 0) | ||
| return null; | ||
| total = names.length; | ||
| shown = names.slice(0, MAX_AUTHORS); | ||
| } | ||
| else { | ||
| if (authors.length === 0) | ||
| return null; | ||
| total = authors.length; | ||
| shown = authors.slice(0, MAX_AUTHORS).map((a) => { | ||
| const aff = a.affiliation?.trim(); | ||
| return aff ? `${a.name} (${aff.slice(0, MAX_AFFIL_CHARS)})` : a.name; | ||
| }); | ||
| } | ||
| const extra = total > MAX_AUTHORS ? `; +${total - MAX_AUTHORS} more` : ''; | ||
| return ('Authors: ' + shown.join('; ') + extra).slice(0, MAX_AUTHORS_LINE_CHARS); | ||
| } | ||
| /** Render ranked papers as `[id] title` / authors / abstract blocks. */ | ||
| function fmtHits(results) { | ||
| if (!results || results.length === 0) | ||
| return '(no results)'; | ||
| return results | ||
| .map((r) => { | ||
| const lines = [`[${displayId(r)}] ${r.title ?? '(untitled)'}`]; | ||
| const authors = fmtAuthors(r.authors); | ||
| if (authors) | ||
| lines.push(authors); | ||
| lines.push((r.abstract || '(no abstract)') | ||
| .replace(/\s+/g, ' ') | ||
| .slice(0, MAX_ABSTRACT_CHARS)); | ||
| return lines.join('\n'); | ||
| }) | ||
| .join('\n\n'); | ||
| } | ||
| // Cap GitHub matched content so a page of results stays within the MCP | ||
| // output-token limit. Higher than abstracts since issue/PR threads carry the | ||
| // signal (repro steps, stack traces) the agent actually needs to verify. | ||
| const MAX_GITHUB_CONTENT_CHARS = 1200; | ||
| /** | ||
| * Render GitHub history/readme hits as `[repo#number] (kind)` / url / body | ||
| * blocks — the same shape as `fmtHits`, but tuned for issues/PRs and readmes. | ||
| * Markdown content keeps its newlines (so tables/code survive); only readmes and | ||
| * snippets fall back when full content is absent. | ||
| */ | ||
| function fmtGithub(results) { | ||
| if (!results || results.length === 0) | ||
| return '(no results)'; | ||
| return results | ||
| .map((r) => { | ||
| const lines = []; | ||
| if (r.resultType === 'repo_readme') { | ||
| lines.push(`[${r.repo ?? '?'}] README`); | ||
| } | ||
| else { | ||
| const ref = r.number != null ? `#${r.number}` : ''; | ||
| const meta = [ | ||
| r.pageType, | ||
| r.segmentCount ? `${r.segmentCount} segments` : '', | ||
| ] | ||
| .filter(Boolean) | ||
| .join(', '); | ||
| lines.push(`[${r.repo ?? '?'}${ref}]${meta ? ` (${meta})` : ''}`); | ||
| } | ||
| const url = r.readmeUrl ?? r.url; | ||
| if (url) | ||
| lines.push(url); | ||
| const body = (r.contentMd || r.snippet || '').trim(); | ||
| lines.push(body ? body.slice(0, MAX_GITHUB_CONTENT_CHARS) : '(no content)'); | ||
| return lines.join('\n'); | ||
| }) | ||
| .join('\n\n'); | ||
| } | ||
| /** Only present these tools when the session has research enabled. */ | ||
@@ -88,3 +183,3 @@ const canAccess = (session) => session?.research === true; | ||
| const res = await client.http.get(withQuery(`${BASE}/papers`, params)); | ||
| return asText(res.data); | ||
| return fmtHits(res.data?.results); | ||
| }, | ||
@@ -133,3 +228,4 @@ }); | ||
| const res = await client.http.get(withQuery(`${BASE}/papers/${encodeURIComponent(primary)}/similar`, params)); | ||
| return asText(res.data); | ||
| const note = res.data?.note ? `\nnote: ${res.data.note}` : ''; | ||
| return `${fmtHits(res.data?.results)}\n(pool_size=${res.data?.pool_size ?? 0})${note}`; | ||
| }, | ||
@@ -168,7 +264,9 @@ }); | ||
| const res = await client.http.get(withQuery(`${BASE}/papers/${encodeURIComponent(arxiv_id)}`, params)); | ||
| return asText(res.data); | ||
| const passages = res.data?.passages ?? []; | ||
| return passages.length | ||
| ? passages.map((p) => p.text).join('\n---\n') | ||
| : '(no full-text passages available for this paper)'; | ||
| }, | ||
| }); | ||
| // --- search_github --- | ||
| // TODO: description pending — the user is writing this one. | ||
| server.addTool({ | ||
@@ -195,5 +293,5 @@ name: 'firecrawl_research_search_github', | ||
| const res = await client.http.get(withQuery(`${BASE}/github`, params)); | ||
| return asText(res.data); | ||
| return fmtGithub(res.data?.results); | ||
| }, | ||
| }); | ||
| } |
+1
-1
| { | ||
| "name": "firecrawl-mcp", | ||
| "version": "3.20.3", | ||
| "version": "3.20.4", | ||
| "description": "MCP server for Firecrawl — search, scrape, and interact with the web. Supports both cloud and self-hosted instances. Features include web search, scraping, page interaction, batch processing, and LLM-powered content analysis.", | ||
@@ -5,0 +5,0 @@ "type": "module", |
130987
3.18%2276
4.5%