@jackwener/opencli
Advanced tools
| export {}; |
| import { cli, Strategy } from '../../registry.js'; | ||
| import { apiGet } from '../../bilibili.js'; | ||
| cli({ | ||
| site: 'bilibili', | ||
| name: 'subtitle', | ||
| description: '获取 Bilibili 视频的字幕', | ||
| strategy: Strategy.COOKIE, | ||
| args: [ | ||
| { name: 'bvid', required: true }, | ||
| { name: 'lang', required: false, help: '字幕语言代码 (如 zh-CN, en-US, ai-zh),默认取第一个' }, | ||
| ], | ||
| columns: ['index', 'from', 'to', 'content'], | ||
| func: async (page, kwargs) => { | ||
| if (!page) | ||
| throw new Error('Requires browser'); | ||
| // 1. 先前往视频详情页 (建立有鉴权的 Session,且这里不需要加载完整个视频) | ||
| await page.goto(`https://www.bilibili.com/video/${kwargs.bvid}/`); | ||
| // 2. 利用 __INITIAL_STATE__ 获取基础信息,拿 CID | ||
| const cid = await page.evaluate(`(async () => { | ||
| const state = window.__INITIAL_STATE__ || {}; | ||
| return state?.videoData?.cid; | ||
| })()`); | ||
| if (!cid) { | ||
| throw new Error('无法在页面中提取到当前视频的 CID,请检查页面是否正常加载。'); | ||
| } | ||
| // 3. 在 Node 端使用 apiGet 获取带 Wbi 签名的字幕列表 | ||
| // 之前纯靠 evaluate 里的 fetch 会失败,因为 B 站 /wbi/ 开头的接口强校验 w_rid,未签名直接被风控返回 403 HTML | ||
| const payload = await apiGet(page, '/x/player/wbi/v2', { | ||
| params: { bvid: kwargs.bvid, cid }, | ||
| signed: true, // 开启 wbi_sign 自动签名 | ||
| }); | ||
| if (payload.code !== 0) { | ||
| throw new Error(`获取视频播放信息失败: ${payload.message} (${payload.code})`); | ||
| } | ||
| const subtitles = payload.data?.subtitle?.subtitles || []; | ||
| if (subtitles.length === 0) { | ||
| throw new Error('此视频没有发现外挂或智能字幕。'); | ||
| } | ||
| // 4. 选择目标字幕语言 | ||
| const target = kwargs.lang | ||
| ? subtitles.find((s) => s.lan === kwargs.lang) || subtitles[0] | ||
| : subtitles[0]; | ||
| const targetSubUrl = target.subtitle_url; | ||
| if (!targetSubUrl || targetSubUrl === '') { | ||
| throw new Error('[风控拦截/未登录] 获取到的 subtitle_url 为空!请确保 CLI 已成功登录且风控未封锁此账号。'); | ||
| } | ||
| const finalUrl = targetSubUrl.startsWith('//') ? 'https:' + targetSubUrl : targetSubUrl; | ||
| // 5. 解析并拉取 CDN 的 JSON 文件 | ||
| const fetchJs = ` | ||
| (async () => { | ||
| const url = ${JSON.stringify(finalUrl)}; | ||
| const res = await fetch(url); | ||
| const text = await res.text(); | ||
| if (text.startsWith('<!DOCTYPE') || text.startsWith('<html')) { | ||
| return { error: 'HTML', text: text.substring(0, 100), url }; | ||
| } | ||
| try { | ||
| const subJson = JSON.parse(text); | ||
| // B站真实返回格式是 { font_size: 0.4, font_color: "#FFFFFF", background_alpha: 0.5, background_color: "#9C27B0", Stroke: "none", type: "json" , body: [{from: 0, to: 0, content: ""}] } | ||
| if (Array.isArray(subJson?.body)) return { success: true, data: subJson.body }; | ||
| if (Array.isArray(subJson)) return { success: true, data: subJson }; | ||
| return { error: 'UNKNOWN_JSON', data: subJson }; | ||
| } catch (e) { | ||
| return { error: 'PARSE_FAILED', text: text.substring(0, 100) }; | ||
| } | ||
| })() | ||
| `; | ||
| const items = await page.evaluate(fetchJs); | ||
| if (items?.error) { | ||
| throw new Error(`字幕获取失败: ${items.error}${items.text ? ' — ' + items.text : ''}`); | ||
| } | ||
| const finalItems = items?.data || []; | ||
| if (!Array.isArray(finalItems)) { | ||
| throw new Error('解析到的字幕列表对象不符合数组格式'); | ||
| } | ||
| // 6. 数据映射 | ||
| return finalItems.map((item, idx) => ({ | ||
| index: idx + 1, | ||
| from: Number(item.from || 0).toFixed(2) + 's', | ||
| to: Number(item.to || 0).toFixed(2) + 's', | ||
| content: item.content | ||
| })); | ||
| }, | ||
| }); |
| import { cli, Strategy } from '../../registry.js'; | ||
| import type { IPage } from '../../types.js'; | ||
| import { apiGet } from '../../bilibili.js'; | ||
| cli({ | ||
| site: 'bilibili', | ||
| name: 'subtitle', | ||
| description: '获取 Bilibili 视频的字幕', | ||
| strategy: Strategy.COOKIE, | ||
| args: [ | ||
| { name: 'bvid', required: true }, | ||
| { name: 'lang', required: false, help: '字幕语言代码 (如 zh-CN, en-US, ai-zh),默认取第一个' }, | ||
| ], | ||
| columns: ['index', 'from', 'to', 'content'], | ||
| func: async (page: IPage | null, kwargs: any) => { | ||
| if (!page) throw new Error('Requires browser'); | ||
| // 1. 先前往视频详情页 (建立有鉴权的 Session,且这里不需要加载完整个视频) | ||
| await page.goto(`https://www.bilibili.com/video/${kwargs.bvid}/`); | ||
| // 2. 利用 __INITIAL_STATE__ 获取基础信息,拿 CID | ||
| const cid = await page.evaluate(`(async () => { | ||
| const state = window.__INITIAL_STATE__ || {}; | ||
| return state?.videoData?.cid; | ||
| })()`); | ||
| if (!cid) { | ||
| throw new Error('无法在页面中提取到当前视频的 CID,请检查页面是否正常加载。'); | ||
| } | ||
| // 3. 在 Node 端使用 apiGet 获取带 Wbi 签名的字幕列表 | ||
| // 之前纯靠 evaluate 里的 fetch 会失败,因为 B 站 /wbi/ 开头的接口强校验 w_rid,未签名直接被风控返回 403 HTML | ||
| const payload = await apiGet(page, '/x/player/wbi/v2', { | ||
| params: { bvid: kwargs.bvid, cid }, | ||
| signed: true, // 开启 wbi_sign 自动签名 | ||
| }); | ||
| if (payload.code !== 0) { | ||
| throw new Error(`获取视频播放信息失败: ${payload.message} (${payload.code})`); | ||
| } | ||
| const subtitles = payload.data?.subtitle?.subtitles || []; | ||
| if (subtitles.length === 0) { | ||
| throw new Error('此视频没有发现外挂或智能字幕。'); | ||
| } | ||
| // 4. 选择目标字幕语言 | ||
| const target = kwargs.lang | ||
| ? subtitles.find((s: any) => s.lan === kwargs.lang) || subtitles[0] | ||
| : subtitles[0]; | ||
| const targetSubUrl = target.subtitle_url; | ||
| if (!targetSubUrl || targetSubUrl === '') { | ||
| throw new Error('[风控拦截/未登录] 获取到的 subtitle_url 为空!请确保 CLI 已成功登录且风控未封锁此账号。'); | ||
| } | ||
| const finalUrl = targetSubUrl.startsWith('//') ? 'https:' + targetSubUrl : targetSubUrl; | ||
| // 5. 解析并拉取 CDN 的 JSON 文件 | ||
| const fetchJs = ` | ||
| (async () => { | ||
| const url = ${JSON.stringify(finalUrl)}; | ||
| const res = await fetch(url); | ||
| const text = await res.text(); | ||
| if (text.startsWith('<!DOCTYPE') || text.startsWith('<html')) { | ||
| return { error: 'HTML', text: text.substring(0, 100), url }; | ||
| } | ||
| try { | ||
| const subJson = JSON.parse(text); | ||
| // B站真实返回格式是 { font_size: 0.4, font_color: "#FFFFFF", background_alpha: 0.5, background_color: "#9C27B0", Stroke: "none", type: "json" , body: [{from: 0, to: 0, content: ""}] } | ||
| if (Array.isArray(subJson?.body)) return { success: true, data: subJson.body }; | ||
| if (Array.isArray(subJson)) return { success: true, data: subJson }; | ||
| return { error: 'UNKNOWN_JSON', data: subJson }; | ||
| } catch (e) { | ||
| return { error: 'PARSE_FAILED', text: text.substring(0, 100) }; | ||
| } | ||
| })() | ||
| `; | ||
| const items = await page.evaluate(fetchJs); | ||
| if (items?.error) { | ||
| throw new Error(`字幕获取失败: ${items.error}${items.text ? ' — ' + items.text : ''}`); | ||
| } | ||
| const finalItems = items?.data || []; | ||
| if (!Array.isArray(finalItems)) { | ||
| throw new Error('解析到的字幕列表对象不符合数组格式'); | ||
| } | ||
| // 6. 数据映射 | ||
| return finalItems.map((item: any, idx: number) => ({ | ||
| index: idx + 1, | ||
| from: Number(item.from || 0).toFixed(2) + 's', | ||
| to: Number(item.to || 0).toFixed(2) + 's', | ||
| content: item.content | ||
| })); | ||
| }, | ||
| }); |
+102
-5
@@ -60,4 +60,5 @@ # CLI-CREATOR — 适配器开发完全指南 | ||
| 2. **全局状态查找法 (`__INITIAL_STATE__`)**: 许多服务端渲染 (SSR) 的网站(如小红书、Bilibili)会将首页或详情页的完整数据挂载到全局 window 对象上。与其去拦截网络请求,不如直接 `page.evaluate('() => window.__INITIAL_STATE__')` 获取整个数据树。 | ||
| 3. **框架探测与 Store Action 截断**: 如果站点使用 Vue + Pinia,可以使用 `tap` 步骤调用 action,让前端框架代替你完成复杂的鉴权签名封装。 | ||
| 4. **底层 XHR/Fetch 拦截**: 最后手段,当上述都不行时,使用 TypeScript 适配器进行无侵入式的请求抓取。 | ||
| 3. **主动交互触发法 (Active Interaction)**: 很多深层 API(如视频字幕、评论下的回复)是懒加载的。在静态抓包找不到数据时,尝试在 `evaluate` 步骤或手动打断点时,主动去**点击(Click)页面上的对应按钮**(如"CC"、"展开全部"),从而诱发隐藏的 Network Fetch。 | ||
| 4. **框架探测与 Store Action 截断**: 如果站点使用 Vue + Pinia,可以使用 `tap` 步骤调用 action,让前端框架代替你完成复杂的鉴权签名封装。 | ||
| 5. **底层 XHR/Fetch 拦截**: 最后手段,当上述都不行时,使用 TypeScript 适配器进行无侵入式的请求抓取。 | ||
@@ -415,2 +416,31 @@ ### 1d. 框架检测 | ||
| #### 进阶场景 1: 级联请求 (Cascading Requests) 与鉴权绕过 | ||
| 部分 API 获取是非常复杂的连环请求(例如 B 站获取视频字幕:先需要 `bvid` 获取核心 `cid`,再通过 `cid` 获取包含签名/Wbi 的字幕列表拉取地址,最后 fetch 真实的 CDN 资源)。在此类场景中,你必须在一个 `evaluate` 块内部或者在 TypeScript Node 端编排整个请求链条: | ||
| ```typescript | ||
| // 真实场景:B站获取视频字幕的级联获取思路 | ||
| const subtitleUrls = await page.evaluate(async (bvid) => { | ||
| // Step 1: 拿 CID (通常可以通过页面全局状态极速提取) | ||
| const cid = window.__INITIAL_STATE__?.videoData?.cid; | ||
| // Step 2: 依据 BVID 和 CID 拿字幕配置 (可能需要携带 W_RID 签名或依赖浏览器当前登录状态 Cookie) | ||
| const res = await fetch(\`/x/player/wbi/v2?bvid=\${bvid}&cid=\${cid}\`, { credentials: 'include' }); | ||
| const data = await res.json(); | ||
| // Step 3: 风控拦截/未登录降级空值检测 (Anti-Bot Empty Value Detection) ⚠️ 极其重要 | ||
| // 很多大厂 API 只要签名失败或无强登录 Cookie 依然会返回 HTTP 200,但把关键 URL 设为 "" | ||
| const firstSubUrl = data.data?.subtitle?.subtitles?.[0]?.subtitle_url; | ||
| if (!firstSubUrl) { | ||
| throw new Error('被风控降级或需登录:拿不到真实的 subtitle_url,请检查 Cookie 状态 (Tier 2/3)'); | ||
| } | ||
| return firstSubUrl; | ||
| }, kwargs.bvid); | ||
| // Step 4: 拉取最终的 CDN 静态文件 (无鉴权) | ||
| const finalRes = await fetch(subtitleUrls.startsWith('//') ? 'https:' + subtitleUrls : subtitleUrls); | ||
| const subtitles = await finalRes.json(); | ||
| ``` | ||
| --- | ||
@@ -544,2 +574,66 @@ | ||
| ## 进阶模式: 级联请求 (Cascading Requests) | ||
| 当目标数据需要多步 API 链式获取时(如 `BVID → CID → 字幕列表 → 字幕内容`),必须使用 **TS 适配器**。YAML 无法处理这种多步逻辑。 | ||
| ### 模板代码 | ||
| ```typescript | ||
| import { cli, Strategy } from '../../registry.js'; | ||
| import type { IPage } from '../../types.js'; | ||
| import { apiGet } from '../../bilibili.js'; // 复用平台 SDK | ||
| cli({ | ||
| site: 'bilibili', | ||
| name: 'subtitle', | ||
| strategy: Strategy.COOKIE, | ||
| args: [{ name: 'bvid', required: true }], | ||
| columns: ['index', 'from', 'to', 'content'], | ||
| func: async (page: IPage | null, kwargs: any) => { | ||
| if (!page) throw new Error('Requires browser'); | ||
| // Step 1: 建立 Session | ||
| await page.goto(`https://www.bilibili.com/video/${kwargs.bvid}/`); | ||
| // Step 2: 从页面提取中间 ID (__INITIAL_STATE__) | ||
| const cid = await page.evaluate(`(async () => { | ||
| return window.__INITIAL_STATE__?.videoData?.cid; | ||
| })()`); | ||
| if (!cid) throw new Error('无法提取 CID'); | ||
| // Step 3: 用中间 ID 调用下一级 API (自动 Wbi 签名) | ||
| const payload = await apiGet(page, '/x/player/wbi/v2', { | ||
| params: { bvid: kwargs.bvid, cid }, | ||
| signed: true, // ← 自动生成 w_rid | ||
| }); | ||
| // Step 4: 检测风控降级 (空值断言) | ||
| const subtitles = payload.data?.subtitle?.subtitles || []; | ||
| const url = subtitles[0]?.subtitle_url; | ||
| if (!url) throw new Error('subtitle_url 为空,疑似风控降级'); | ||
| // Step 5: 拉取最终数据 (CDN JSON) | ||
| const items = await page.evaluate(`(async () => { | ||
| const res = await fetch(${JSON.stringify('https:' + url)}); | ||
| const json = await res.json(); | ||
| return { data: json.body || json }; | ||
| })()`); | ||
| return items.data.map((item, idx) => ({ ... })); | ||
| }, | ||
| }); | ||
| ``` | ||
| ### 关键要点 | ||
| | 步骤 | 注意事项 | | ||
| |------|----------| | ||
| | 提取中间 ID | 优先从 `__INITIAL_STATE__` 拿,避免额外 API 调用 | | ||
| | Wbi 签名 | B 站 `/wbi/` 接口**强制校验** `w_rid`,纯 `fetch` 会被 403 | | ||
| | 空值断言 | 即使 HTTP 200,核心字段可能为空串(风控降级) | | ||
| | CDN URL | 常以 `//` 开头,记得补 `https:` | | ||
| | `JSON.stringify` | 拼接 URL 到 evaluate 时必须用它转义,避免注入 | | ||
| --- | ||
| ## 常见陷阱 | ||
@@ -559,2 +653,4 @@ | ||
| | YAML 内嵌大段 JS | 调试困难,字符串转义问题 | 超过 10 行 JS 的命令改用 TS adapter | | ||
| | **风控被拦截(伪200)** | 获取到的 JSON 里核心数据是 `""` (空串) | 极易被误判。必须添加断言!无核心数据立刻要求升级鉴权 Tier 并重新配置 Cookie | | ||
| | **API 没找见** | `explore` 工具打分出来的都拿不到深层数据 | 点击页面按钮诱发懒加载数据,再结合 `getInterceptedRequests` 获取 | | ||
@@ -572,7 +668,8 @@ --- | ||
| # 或分步执行: | ||
| opencli explore https://www.example.com --site mysite # 发现 API | ||
| opencli synthesize mysite # 生成候选 YAML | ||
| opencli verify mysite/hot --smoke # 冒烟测试 | ||
| opencli explore https://www.example.com --site mysite # 发现 API | ||
| opencli explore https://www.example.com --auto --click "字幕,CC" # 模拟点击触发懒加载 API | ||
| opencli synthesize mysite # 生成候选 YAML | ||
| opencli verify mysite/hot --smoke # 冒烟测试 | ||
| ``` | ||
| 生成的候选 YAML 保存在 `.opencli/explore/mysite/candidates/`,可直接复制到 `src/clis/mysite/` 并微调。 |
@@ -26,3 +26,7 @@ /** | ||
| pressKey(key: string): Promise<void>; | ||
| wait(seconds: number): Promise<void>; | ||
| wait(options: number | { | ||
| text?: string; | ||
| time?: number; | ||
| timeout?: number; | ||
| }): Promise<void>; | ||
| tabs(): Promise<any>; | ||
@@ -29,0 +33,0 @@ closeTab(index?: number): Promise<void>; |
+33
-5
@@ -113,4 +113,10 @@ /** | ||
| } | ||
| async wait(seconds) { | ||
| await this.call('tools/call', { name: 'browser_wait_for', arguments: { time: seconds } }); | ||
| async wait(options) { | ||
| if (typeof options === 'number') { | ||
| await this.call('tools/call', { name: 'browser_wait_for', arguments: { time: options } }); | ||
| } | ||
| else { | ||
| // Pass directly to native wait_for, which supports natively awaiting text strings without heavy DOM polling | ||
| await this.call('tools/call', { name: 'browser_wait_for', arguments: options }); | ||
| } | ||
| } | ||
@@ -141,6 +147,28 @@ async tabs() { | ||
| const delayMs = options.delayMs ?? 2000; | ||
| for (let i = 0; i < times; i++) { | ||
| await this.evaluate('() => window.scrollTo(0, document.body.scrollHeight)'); | ||
| await this.wait(delayMs / 1000); | ||
| const js = ` | ||
| async () => { | ||
| const maxTimes = ${times}; | ||
| const maxWaitMs = ${delayMs}; | ||
| for (let i = 0; i < maxTimes; i++) { | ||
| const lastHeight = document.body.scrollHeight; | ||
| window.scrollTo(0, lastHeight); | ||
| await new Promise(resolve => { | ||
| let timeoutId; | ||
| const observer = new MutationObserver(() => { | ||
| if (document.body.scrollHeight > lastHeight) { | ||
| clearTimeout(timeoutId); | ||
| observer.disconnect(); | ||
| setTimeout(resolve, 100); // Small debounce for rendering | ||
| } | ||
| }); | ||
| observer.observe(document.body, { childList: true, subtree: true }); | ||
| timeoutId = setTimeout(() => { | ||
| observer.disconnect(); | ||
| resolve(null); | ||
| }, maxWaitMs); | ||
| }); | ||
| } | ||
| } | ||
| `; | ||
| await this.evaluate(js); | ||
| } | ||
@@ -147,0 +175,0 @@ async installInterceptor(pattern) { |
+50
-0
@@ -178,2 +178,5 @@ /** | ||
| s += 2; | ||
| // Anti-Bot Empty Value Detection: penalize JSON endpoints returning empty data | ||
| if (ep.responseAnalysis && ep.responseAnalysis.itemCount === 0 && ep.contentType.includes('json')) | ||
| s -= 3; | ||
| return s; | ||
@@ -270,2 +273,24 @@ } | ||
| `; | ||
| // ── Auto-Interaction (Fuzzing) ───────────────────────────────────────────── | ||
| const INTERACT_FUZZ_JS = ` | ||
| async () => { | ||
| const sleep = ms => new Promise(r => setTimeout(r, ms)); | ||
| const clickables = Array.from(document.querySelectorAll( | ||
| 'button, [role="button"], [role="tab"], .tab, .btn, a[href="javascript:void(0)"], a[href="#"]' | ||
| )).slice(0, 15); // limit to 15 to avoid endless loops | ||
| let clicked = 0; | ||
| for (const el of clickables) { | ||
| try { | ||
| const rect = el.getBoundingClientRect(); | ||
| if (rect.width > 0 && rect.height > 0) { | ||
| el.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true, view: window })); | ||
| clicked++; | ||
| await sleep(300); // give it time to trigger network | ||
| } | ||
| } catch {} | ||
| } | ||
| return clicked; | ||
| } | ||
| `; | ||
| // ── Main explore function ────────────────────────────────────────────────── | ||
@@ -288,2 +313,27 @@ export async function exploreUrl(url, opts) { | ||
| } | ||
| // Step 2.5: Interactive Fuzzing (if requested) | ||
| if (opts.auto) { | ||
| try { | ||
| // First: targeted clicks by label (e.g. "字幕", "CC", "评论") | ||
| if (opts.clickLabels?.length) { | ||
| for (const label of opts.clickLabels) { | ||
| const safeLabel = label.replace(/'/g, "\\'"); | ||
| await page.evaluate(` | ||
| (() => { | ||
| const el = [...document.querySelectorAll('button, [role="button"], [role="tab"], a, span')] | ||
| .find(e => e.textContent && e.textContent.trim().includes('${safeLabel}')); | ||
| if (el) el.click(); | ||
| })() | ||
| `); | ||
| await page.wait(1); | ||
| } | ||
| } | ||
| // Then: blind fuzzing on generic interactive elements | ||
| const clicks = await page.evaluate(INTERACT_FUZZ_JS); | ||
| await page.wait(2); // wait for XHRs to settle | ||
| } | ||
| catch (e) { | ||
| // fuzzing is best-effort, don't fail the whole explore | ||
| } | ||
| } | ||
| // Step 3: Read page metadata | ||
@@ -290,0 +340,0 @@ const metadata = await readPageMetadata(page); |
+2
-2
@@ -59,4 +59,4 @@ #!/usr/bin/env node | ||
| .action(async (target, opts) => { const { verifyClis, renderVerifyReport } = await import('./verify.js'); const r = await verifyClis({ builtinClis: BUILTIN_CLIS, userClis: USER_CLIS, target, smoke: opts.smoke }); console.log(renderVerifyReport(r)); process.exitCode = r.ok ? 0 : 1; }); | ||
| program.command('explore').alias('probe').description('Explore a website: discover APIs, stores, and recommend strategies').argument('<url>').option('--site <name>').option('--goal <text>').option('--wait <s>', '', '3') | ||
| .action(async (url, opts) => { const { exploreUrl, renderExploreSummary } = await import('./explore.js'); console.log(renderExploreSummary(await exploreUrl(url, { BrowserFactory: PlaywrightMCP, site: opts.site, goal: opts.goal, waitSeconds: parseFloat(opts.wait) }))); }); | ||
| program.command('explore').alias('probe').description('Explore a website: discover APIs, stores, and recommend strategies').argument('<url>').option('--site <name>').option('--goal <text>').option('--wait <s>', '', '3').option('--auto', 'Enable interactive fuzzing (simulate clicks to trigger lazy APIs)').option('--click <labels>', 'Comma-separated labels to click before fuzzing (e.g. "字幕,CC,评论")') | ||
| .action(async (url, opts) => { const { exploreUrl, renderExploreSummary } = await import('./explore.js'); const clickLabels = opts.click ? opts.click.split(',').map((s) => s.trim()) : undefined; console.log(renderExploreSummary(await exploreUrl(url, { BrowserFactory: PlaywrightMCP, site: opts.site, goal: opts.goal, waitSeconds: parseFloat(opts.wait), auto: opts.auto, clickLabels }))); }); | ||
| program.command('synthesize').description('Synthesize CLIs from explore').argument('<target>').option('--top <n>', '', '3') | ||
@@ -63,0 +63,0 @@ .action(async (target, opts) => { const { synthesizeFromExplore, renderSynthesizeSummary } = await import('./synthesize.js'); console.log(renderSynthesizeSummary(synthesizeFromExplore(target, { top: parseInt(opts.top) }))); }); |
@@ -30,10 +30,6 @@ /** | ||
| if ('text' in params) { | ||
| const timeout = params.timeout ?? 10; | ||
| const start = Date.now(); | ||
| while ((Date.now() - start) / 1000 < timeout) { | ||
| const snap = await page.snapshot({ raw: true }); | ||
| if (typeof snap === 'string' && snap.includes(params.text)) | ||
| break; | ||
| await page.wait(0.5); | ||
| } | ||
| await page.wait({ | ||
| text: String(render(params.text, { args, data })), | ||
| timeout: params.timeout | ||
| }); | ||
| } | ||
@@ -40,0 +36,0 @@ else if ('time' in params) |
@@ -5,2 +5,16 @@ /** | ||
| import { render } from '../template.js'; | ||
| /** Simple async concurrency limiter */ | ||
| async function mapConcurrent(items, limit, fn) { | ||
| const results = new Array(items.length); | ||
| let index = 0; | ||
| async function worker() { | ||
| while (index < items.length) { | ||
| const i = index++; | ||
| results[i] = await fn(items[i], i); | ||
| } | ||
| } | ||
| const workers = Array.from({ length: Math.min(limit, items.length) }, () => worker()); | ||
| await Promise.all(workers); | ||
| return results; | ||
| } | ||
| /** Single URL fetch helper */ | ||
@@ -42,8 +56,7 @@ async function fetchSingle(page, url, method, queryParams, headers, args, data) { | ||
| if (Array.isArray(data) && urlTemplate.includes('item')) { | ||
| const results = []; | ||
| for (let i = 0; i < data.length; i++) { | ||
| const itemUrl = String(render(urlTemplate, { args, data, item: data[i], index: i })); | ||
| results.push(await fetchSingle(page, itemUrl, method, queryParams, headers, args, data)); | ||
| } | ||
| return results; | ||
| const concurrency = typeof params?.concurrency === 'number' ? params.concurrency : 5; | ||
| return mapConcurrent(data, concurrency, async (item, index) => { | ||
| const itemUrl = String(render(urlTemplate, { args, data, item, index })); | ||
| return fetchSingle(page, itemUrl, method, queryParams, headers, args, data); | ||
| }); | ||
| } | ||
@@ -50,0 +63,0 @@ const url = render(urlOrObj, { args, data }); |
@@ -39,2 +39,4 @@ /** | ||
| let captured = null; | ||
| let captureResolve; | ||
| const capturePromise = new Promise(r => { captureResolve = r; }); | ||
| const capturePattern = ${JSON.stringify(capturePattern)}; | ||
@@ -50,3 +52,3 @@ | ||
| if (capturePattern && url.includes(capturePattern) && !captured) { | ||
| try { captured = await resp.clone().json(); } catch {} | ||
| try { captured = await resp.clone().json(); captureResolve(); } catch {} | ||
| } | ||
@@ -70,3 +72,3 @@ } catch {} | ||
| if (xhr.readyState === 4 && !captured) { | ||
| try { captured = JSON.parse(xhr.responseText); } catch {} | ||
| try { captured = JSON.parse(xhr.responseText); captureResolve(); } catch {} | ||
| } | ||
@@ -77,3 +79,3 @@ if (origHandler) origHandler.apply(this, arguments); | ||
| xhr.onload = function() { | ||
| if (!captured) { try { captured = JSON.parse(xhr.responseText); } catch {} } | ||
| if (!captured) { try { captured = JSON.parse(xhr.responseText); captureResolve(); } catch {} } | ||
| if (origOnload) origOnload.apply(this, arguments); | ||
@@ -118,5 +120,5 @@ }; | ||
| // ── 4. Wait for network response ── | ||
| const deadline = Date.now() + ${timeout} * 1000; | ||
| while (!captured && Date.now() < deadline) { | ||
| await new Promise(r => setTimeout(r, 200)); | ||
| if (!captured) { | ||
| const timeoutPromise = new Promise(r => setTimeout(r, ${timeout} * 1000)); | ||
| await Promise.race([capturePromise, timeoutPromise]); | ||
| } | ||
@@ -123,0 +125,0 @@ } finally { |
+5
-1
@@ -19,3 +19,7 @@ /** | ||
| pressKey(key: string): Promise<void>; | ||
| wait(seconds: number): Promise<void>; | ||
| wait(options: number | { | ||
| text?: string; | ||
| time?: number; | ||
| timeout?: number; | ||
| }): Promise<void>; | ||
| tabs(): Promise<any>; | ||
@@ -22,0 +26,0 @@ closeTab(index?: number): Promise<void>; |
+1
-1
| { | ||
| "name": "@jackwener/opencli", | ||
| "version": "0.3.0", | ||
| "version": "0.4.0", | ||
| "publishConfig": { | ||
@@ -5,0 +5,0 @@ "access": "public" |
+5
-5
@@ -10,3 +10,3 @@ # OpenCLI | ||
| A CLI tool that turns **any website** into a command-line interface. **35+ commands** across **17 sites** — bilibili, zhihu, xiaohongshu, twitter, reddit, xueqiu, github, v2ex, hackernews, bbc, weibo, boss, yahoo-finance, reuters, smzdm, ctrip, youtube — powered by browser session reuse and AI-native discovery. | ||
| A CLI tool that turns **any website** into a command-line interface. **46 commands** across **17 sites** — bilibili, zhihu, xiaohongshu, twitter, reddit, xueqiu, github, v2ex, hackernews, bbc, weibo, boss, yahoo-finance, reuters, smzdm, ctrip, youtube — powered by browser session reuse and AI-native discovery. | ||
@@ -86,8 +86,8 @@ ## ✨ Highlights | ||
| |------|----------|------| | ||
| | **bilibili** | `hot` `search` `me` `favorite` `history` `feed` `user-videos` | 🔐 Browser | | ||
| | **bilibili** | `hot` `search` `me` `favorite` `history` `feed` `user-videos` `subtitle` `dynamic` `ranking` | 🔐 Browser | | ||
| | **zhihu** | `hot` `search` `question` | 🔐 Browser | | ||
| | **xiaohongshu** | `search` `notifications` `feed` | 🔐 Browser | | ||
| | **xiaohongshu** | `search` `notifications` `feed` `me` `user` | 🔐 Browser | | ||
| | **xueqiu** | `feed` `hot-stock` `hot` `search` `stock` `watchlist` | 🔐 Browser | | ||
| | **twitter** | `trending` `bookmarks` | 🔐 Browser | | ||
| | **reddit** | `hot` | 🔐 Browser | | ||
| | **twitter** | `trending` `bookmarks` `profile` `search` `timeline` | 🔐 Browser | | ||
| | **reddit** | `hot` `frontpage` `search` `subreddit` | 🔐 Browser | | ||
| | **weibo** | `hot` | 🔐 Browser | | ||
@@ -94,0 +94,0 @@ | **boss** | `search` | 🔐 Browser | |
+5
-5
@@ -14,3 +14,3 @@ # OpenCLI | ||
| - 🌐 **35+ 命令,17 个站点** — B站、知乎、小红书、Twitter、Reddit、雪球(xueqiu)、GitHub、V2EX、Hacker News、BBC、微博、BOSS直聘、Yahoo Finance、路透社、什么值得买、携程、YouTube | ||
| - 🌐 **46 个命令,17 个站点** — B站、知乎、小红书、Twitter、Reddit、雪球(xueqiu)、GitHub、V2EX、Hacker News、BBC、微博、BOSS直聘、Yahoo Finance、路透社、什么值得买、携程、YouTube | ||
| - 🔐 **零风控** — 复用 Chrome 登录态,无需存储任何凭证 | ||
@@ -87,8 +87,8 @@ - 🤖 **AI 原生** — `explore` 自动发现 API,`synthesize` 生成适配器,`cascade` 探测认证策略 | ||
| |------|------|------| | ||
| | **bilibili** | `hot` `search` `me` `favorite` `history` `feed` `user-videos` | 🔐 浏览器 | | ||
| | **bilibili** | `hot` `search` `me` `favorite` `history` `feed` `user-videos` `subtitle` `dynamic` `ranking` | 🔐 浏览器 | | ||
| | **zhihu** | `hot` `search` `question` | 🔐 浏览器 | | ||
| | **xiaohongshu** | `search` `notifications` `feed` | 🔐 浏览器 | | ||
| | **xiaohongshu** | `search` `notifications` `feed` `me` `user` | 🔐 浏览器 | | ||
| | **xueqiu** | `feed` `hot-stock` `hot` `search` `stock` `watchlist` | 🔐 浏览器 | | ||
| | **twitter** | `trending` `bookmarks` | 🔐 浏览器 | | ||
| | **reddit** | `hot` | 🔐 浏览器 | | ||
| | **twitter** | `trending` `bookmarks` `profile` `search` `timeline` | 🔐 浏览器 | | ||
| | **reddit** | `hot` `frontpage` `search` `subreddit` | 🔐 浏览器 | | ||
| | **weibo** | `hot` | 🔐 浏览器 | | ||
@@ -95,0 +95,0 @@ | **boss** | `search` | 🔐 浏览器 | |
+15
-1
| --- | ||
| name: opencli | ||
| description: "OpenCLI — Make any website your CLI. Zero risk, AI-powered, reuse Chrome login." | ||
| version: 0.1.0 | ||
| version: 0.4.0 | ||
| author: jackwener | ||
@@ -52,2 +52,5 @@ tags: [cli, browser, web, mcp, playwright, bilibili, zhihu, twitter, github, v2ex, hackernews, reddit, xiaohongshu, xueqiu, AI, agent] | ||
| opencli bilibili user-videos --uid 12345 # 用户投稿 | ||
| opencli bilibili subtitle --bvid BV1xxx # 获取视频字幕 (支持 --lang zh-CN) | ||
| opencli bilibili dynamic --limit 10 # 动态 | ||
| opencli bilibili ranking --limit 10 # 排行榜 | ||
@@ -63,2 +66,4 @@ # 知乎 (browser) | ||
| opencli xiaohongshu feed --limit 10 # 推荐 Feed | ||
| opencli xiaohongshu me # 我的信息 | ||
| opencli xiaohongshu user --uid xxx # 用户主页 | ||
@@ -78,2 +83,5 @@ # 雪球 Xueqiu (browser) | ||
| opencli twitter bookmarks --limit 20 # 获取收藏的书签推文 | ||
| opencli twitter search --keyword "AI" # 搜索推文 | ||
| opencli twitter profile --username elonmusk # 用户资料 | ||
| opencli twitter timeline --limit 20 # 时间线 | ||
@@ -83,2 +91,5 @@ # Reddit (browser) | ||
| opencli reddit hot --subreddit programming # 指定子版块 | ||
| opencli reddit frontpage --limit 10 # 首页 | ||
| opencli reddit search --keyword "AI" # 搜索 | ||
| opencli reddit subreddit --name rust # 子版块浏览 | ||
@@ -142,2 +153,5 @@ # V2EX (public) | ||
| # Explore with interactive fuzzing (click buttons to trigger lazy APIs) | ||
| opencli explore <url> --auto --click "字幕,CC,评论" | ||
| # Verify: smoke-test a generated adapter | ||
@@ -144,0 +158,0 @@ opencli verify <site/name> --smoke |
+33
-6
@@ -107,4 +107,9 @@ /** | ||
| async wait(seconds: number): Promise<void> { | ||
| await this.call('tools/call', { name: 'browser_wait_for', arguments: { time: seconds } }); | ||
| async wait(options: number | { text?: string; time?: number; timeout?: number }): Promise<void> { | ||
| if (typeof options === 'number') { | ||
| await this.call('tools/call', { name: 'browser_wait_for', arguments: { time: options } }); | ||
| } else { | ||
| // Pass directly to native wait_for, which supports natively awaiting text strings without heavy DOM polling | ||
| await this.call('tools/call', { name: 'browser_wait_for', arguments: options }); | ||
| } | ||
| } | ||
@@ -143,6 +148,28 @@ | ||
| const delayMs = options.delayMs ?? 2000; | ||
| for (let i = 0; i < times; i++) { | ||
| await this.evaluate('() => window.scrollTo(0, document.body.scrollHeight)'); | ||
| await this.wait(delayMs / 1000); | ||
| } | ||
| const js = ` | ||
| async () => { | ||
| const maxTimes = ${times}; | ||
| const maxWaitMs = ${delayMs}; | ||
| for (let i = 0; i < maxTimes; i++) { | ||
| const lastHeight = document.body.scrollHeight; | ||
| window.scrollTo(0, lastHeight); | ||
| await new Promise(resolve => { | ||
| let timeoutId; | ||
| const observer = new MutationObserver(() => { | ||
| if (document.body.scrollHeight > lastHeight) { | ||
| clearTimeout(timeoutId); | ||
| observer.disconnect(); | ||
| setTimeout(resolve, 100); // Small debounce for rendering | ||
| } | ||
| }); | ||
| observer.observe(document.body, { childList: true, subtree: true }); | ||
| timeoutId = setTimeout(() => { | ||
| observer.disconnect(); | ||
| resolve(null); | ||
| }, maxWaitMs); | ||
| }); | ||
| } | ||
| } | ||
| `; | ||
| await this.evaluate(js); | ||
| } | ||
@@ -149,0 +176,0 @@ |
+51
-0
@@ -187,2 +187,4 @@ /** | ||
| if (ep.status === 200) s += 2; | ||
| // Anti-Bot Empty Value Detection: penalize JSON endpoints returning empty data | ||
| if (ep.responseAnalysis && ep.responseAnalysis.itemCount === 0 && ep.contentType.includes('json')) s -= 3; | ||
| return s; | ||
@@ -281,2 +283,26 @@ } | ||
| // ── Auto-Interaction (Fuzzing) ───────────────────────────────────────────── | ||
| const INTERACT_FUZZ_JS = ` | ||
| async () => { | ||
| const sleep = ms => new Promise(r => setTimeout(r, ms)); | ||
| const clickables = Array.from(document.querySelectorAll( | ||
| 'button, [role="button"], [role="tab"], .tab, .btn, a[href="javascript:void(0)"], a[href="#"]' | ||
| )).slice(0, 15); // limit to 15 to avoid endless loops | ||
| let clicked = 0; | ||
| for (const el of clickables) { | ||
| try { | ||
| const rect = el.getBoundingClientRect(); | ||
| if (rect.width > 0 && rect.height > 0) { | ||
| el.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true, view: window })); | ||
| clicked++; | ||
| await sleep(300); // give it time to trigger network | ||
| } | ||
| } catch {} | ||
| } | ||
| return clicked; | ||
| } | ||
| `; | ||
| // ── Main explore function ────────────────────────────────────────────────── | ||
@@ -305,2 +331,27 @@ | ||
| // Step 2.5: Interactive Fuzzing (if requested) | ||
| if (opts.auto) { | ||
| try { | ||
| // First: targeted clicks by label (e.g. "字幕", "CC", "评论") | ||
| if (opts.clickLabels?.length) { | ||
| for (const label of opts.clickLabels) { | ||
| const safeLabel = label.replace(/'/g, "\\'"); | ||
| await page.evaluate(` | ||
| (() => { | ||
| const el = [...document.querySelectorAll('button, [role="button"], [role="tab"], a, span')] | ||
| .find(e => e.textContent && e.textContent.trim().includes('${safeLabel}')); | ||
| if (el) el.click(); | ||
| })() | ||
| `); | ||
| await page.wait(1); | ||
| } | ||
| } | ||
| // Then: blind fuzzing on generic interactive elements | ||
| const clicks = await page.evaluate(INTERACT_FUZZ_JS); | ||
| await page.wait(2); // wait for XHRs to settle | ||
| } catch (e) { | ||
| // fuzzing is best-effort, don't fail the whole explore | ||
| } | ||
| } | ||
| // Step 3: Read page metadata | ||
@@ -307,0 +358,0 @@ const metadata = await readPageMetadata(page); |
+2
-2
@@ -56,4 +56,4 @@ #!/usr/bin/env node | ||
| program.command('explore').alias('probe').description('Explore a website: discover APIs, stores, and recommend strategies').argument('<url>').option('--site <name>').option('--goal <text>').option('--wait <s>', '', '3') | ||
| .action(async (url, opts) => { const { exploreUrl, renderExploreSummary } = await import('./explore.js'); console.log(renderExploreSummary(await exploreUrl(url, { BrowserFactory: PlaywrightMCP, site: opts.site, goal: opts.goal, waitSeconds: parseFloat(opts.wait) }))); }); | ||
| program.command('explore').alias('probe').description('Explore a website: discover APIs, stores, and recommend strategies').argument('<url>').option('--site <name>').option('--goal <text>').option('--wait <s>', '', '3').option('--auto', 'Enable interactive fuzzing (simulate clicks to trigger lazy APIs)').option('--click <labels>', 'Comma-separated labels to click before fuzzing (e.g. "字幕,CC,评论")') | ||
| .action(async (url, opts) => { const { exploreUrl, renderExploreSummary } = await import('./explore.js'); const clickLabels = opts.click ? opts.click.split(',').map((s: string) => s.trim()) : undefined; console.log(renderExploreSummary(await exploreUrl(url, { BrowserFactory: PlaywrightMCP, site: opts.site, goal: opts.goal, waitSeconds: parseFloat(opts.wait), auto: opts.auto, clickLabels }))); }); | ||
@@ -60,0 +60,0 @@ program.command('synthesize').description('Synthesize CLIs from explore').argument('<target>').option('--top <n>', '', '3') |
@@ -34,9 +34,6 @@ /** | ||
| if ('text' in params) { | ||
| const timeout = params.timeout ?? 10; | ||
| const start = Date.now(); | ||
| while ((Date.now() - start) / 1000 < timeout) { | ||
| const snap = await page.snapshot({ raw: true }); | ||
| if (typeof snap === 'string' && snap.includes(params.text)) break; | ||
| await page.wait(0.5); | ||
| } | ||
| await page.wait({ | ||
| text: String(render(params.text, { args, data })), | ||
| timeout: params.timeout | ||
| }); | ||
| } else if ('time' in params) await page.wait(Number(params.time)); | ||
@@ -43,0 +40,0 @@ } else if (typeof params === 'string') await page.wait(Number(render(params, { args, data }))); |
@@ -8,2 +8,19 @@ /** | ||
| /** Simple async concurrency limiter */ | ||
| async function mapConcurrent<T, R>(items: T[], limit: number, fn: (item: T, index: number) => Promise<R>): Promise<R[]> { | ||
| const results: R[] = new Array(items.length); | ||
| let index = 0; | ||
| async function worker() { | ||
| while (index < items.length) { | ||
| const i = index++; | ||
| results[i] = await fn(items[i], i); | ||
| } | ||
| } | ||
| const workers = Array.from({ length: Math.min(limit, items.length) }, () => worker()); | ||
| await Promise.all(workers); | ||
| return results; | ||
| } | ||
| /** Single URL fetch helper */ | ||
@@ -52,8 +69,7 @@ async function fetchSingle( | ||
| if (Array.isArray(data) && urlTemplate.includes('item')) { | ||
| const results: any[] = []; | ||
| for (let i = 0; i < data.length; i++) { | ||
| const itemUrl = String(render(urlTemplate, { args, data, item: data[i], index: i })); | ||
| results.push(await fetchSingle(page, itemUrl, method, queryParams, headers, args, data)); | ||
| } | ||
| return results; | ||
| const concurrency = typeof params?.concurrency === 'number' ? params.concurrency : 5; | ||
| return mapConcurrent(data, concurrency, async (item, index) => { | ||
| const itemUrl = String(render(urlTemplate, { args, data, item, index })); | ||
| return fetchSingle(page, itemUrl, method, queryParams, headers, args, data); | ||
| }); | ||
| } | ||
@@ -60,0 +76,0 @@ const url = render(urlOrObj, { args, data }); |
@@ -45,2 +45,4 @@ /** | ||
| let captured = null; | ||
| let captureResolve; | ||
| const capturePromise = new Promise(r => { captureResolve = r; }); | ||
| const capturePattern = ${JSON.stringify(capturePattern)}; | ||
@@ -56,3 +58,3 @@ | ||
| if (capturePattern && url.includes(capturePattern) && !captured) { | ||
| try { captured = await resp.clone().json(); } catch {} | ||
| try { captured = await resp.clone().json(); captureResolve(); } catch {} | ||
| } | ||
@@ -76,3 +78,3 @@ } catch {} | ||
| if (xhr.readyState === 4 && !captured) { | ||
| try { captured = JSON.parse(xhr.responseText); } catch {} | ||
| try { captured = JSON.parse(xhr.responseText); captureResolve(); } catch {} | ||
| } | ||
@@ -83,3 +85,3 @@ if (origHandler) origHandler.apply(this, arguments); | ||
| xhr.onload = function() { | ||
| if (!captured) { try { captured = JSON.parse(xhr.responseText); } catch {} } | ||
| if (!captured) { try { captured = JSON.parse(xhr.responseText); captureResolve(); } catch {} } | ||
| if (origOnload) origOnload.apply(this, arguments); | ||
@@ -124,5 +126,5 @@ }; | ||
| // ── 4. Wait for network response ── | ||
| const deadline = Date.now() + ${timeout} * 1000; | ||
| while (!captured && Date.now() < deadline) { | ||
| await new Promise(r => setTimeout(r, 200)); | ||
| if (!captured) { | ||
| const timeoutPromise = new Promise(r => setTimeout(r, ${timeout} * 1000)); | ||
| await Promise.race([capturePromise, timeoutPromise]); | ||
| } | ||
@@ -129,0 +131,0 @@ } finally { |
+1
-1
@@ -15,3 +15,3 @@ /** | ||
| pressKey(key: string): Promise<void>; | ||
| wait(seconds: number): Promise<void>; | ||
| wait(options: number | { text?: string; time?: number; timeout?: number }): Promise<void>; | ||
| tabs(): Promise<any>; | ||
@@ -18,0 +18,0 @@ closeTab(index?: number): Promise<void>; |
AI-detected potential code anomaly
Supply chain riskAI has identified unusual behaviors that may pose a security risk.
Found 2 instances in 1 package
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
AI-detected potential code anomaly
Supply chain riskAI has identified unusual behaviors that may pose a security risk.
Found 1 instance in 1 package
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
464288
5.01%198
1.54%8431
4.4%43
2.38%