+15
-3
@@ -14,2 +14,3 @@ #!/usr/bin/env node | ||
| const DEFAULT_MODEL = 'openai/gpt-4o-mini' | ||
| const DEFAULT_CRAWLER = 'fetch' | ||
@@ -22,2 +23,7 @@ const modelFlag = ['-m, --model <model>', 'AI model to use', process.env.MODEL ?? DEFAULT_MODEL] | ||
| ] | ||
| const crawlerFlag = [ | ||
| '-c, --crawler <crawler>', | ||
| 'Crawler to use for fetching URLs: fetch, chrome', | ||
| process.env.CRAWLER ?? DEFAULT_CRAWLER, | ||
| ] | ||
| const fileFlag = [ | ||
@@ -55,2 +61,3 @@ '--file <path>', | ||
| const hasSchemaFlag = hasFlag(['--schema', '-s']) | ||
| const hasCrawlerFlag = hasFlag(['--crawler', '-c']) | ||
@@ -120,2 +127,3 @@ const program = new Command() | ||
| schema: z.string().optional(), | ||
| crawler: z.enum(['fetch', 'chrome']), | ||
| files: z.array(z.string()).default([]), | ||
@@ -135,2 +143,3 @@ urls: z.array(z.string()).default([]), | ||
| schema: flags.schema, | ||
| crawler: flags.crawler, | ||
| files: flags.file, | ||
@@ -144,6 +153,7 @@ urls: flags.url, | ||
| return optionsSchema.parse({ | ||
| // Overwrite model, format, schema only if not provided via flags | ||
| // Overwrite model, format, schema, crawler only if not provided via flags | ||
| model: hasModelFlag ? options.model : (presetContent.model ?? options.model), | ||
| format: hasFormatFlag ? options.format : (presetContent.format ?? options.format), | ||
| schema: hasSchemaFlag ? options.schema : (presetContent.schema ?? options.schema), | ||
| crawler: hasCrawlerFlag ? options.crawler : (presetContent.crawler ?? options.crawler), | ||
| // Merge files | ||
@@ -176,3 +186,3 @@ files: [...presetContent.files, ...options.files], | ||
| const userPrompt = replaceVariables(prompt ?? stdinContent, options.vars) | ||
| const finalPrompt = await buildPrompt(userPrompt, options.files, options.urls) | ||
| const finalPrompt = await buildPrompt(userPrompt, options.files, options.urls, options.crawler) | ||
@@ -209,3 +219,3 @@ const result = await executePrompt(finalPrompt, { | ||
| const userPrompt = replaceVariables(prompt, options.vars) | ||
| const finalPrompt = await buildPrompt(userPrompt, options.files, options.urls) | ||
| const finalPrompt = await buildPrompt(userPrompt, options.files, options.urls, options.crawler) | ||
@@ -234,2 +244,3 @@ const result = await executePrompt(finalPrompt, { | ||
| .option(...schemaFlag) | ||
| .option(...crawlerFlag) | ||
| .option(...fileFlag) | ||
@@ -247,2 +258,3 @@ .option(...urlFlag) | ||
| .option(...schemaFlag) | ||
| .option(...crawlerFlag) | ||
| .option(...fileFlag) | ||
@@ -249,0 +261,0 @@ .option(...urlFlag) |
+2
-1
| { | ||
| "name": "heyi", | ||
| "version": "2.0.0", | ||
| "version": "2.1.0", | ||
| "description": "CLI tool to execute AI prompts with flexible output formatting", | ||
@@ -37,2 +37,3 @@ "keywords": [ | ||
| "dotenv": "^16.6.1", | ||
| "puppeteer": "^24.35.0", | ||
| "sanitize-html": "^2.17.0", | ||
@@ -39,0 +40,0 @@ "zod": "^4.3.5" |
+41
-0
@@ -27,2 +27,3 @@ # heyi | ||
| - `-s, --schema <schema>` - Zod schema for object/array format (required when format is `object` or `array`) | ||
| - `-c, --crawler <crawler>` - Crawler to use for fetching URLs: `fetch`, `chrome` (default: `fetch`) | ||
| - `--file <path>` - Read content from file and include as context (can be used multiple times) | ||
@@ -38,2 +39,3 @@ - `--url <url>` - Fetch content from URL and include as context (can be used multiple times) | ||
| - `MODEL` - Default AI model to use (optional, can be overridden with `--model` flag) | ||
| - `CRAWLER` - Default crawler to use for fetching URLs (optional, can be overridden with `--crawler` flag) | ||
@@ -87,2 +89,6 @@ ### Examples | ||
| # Use Chrome crawler for JavaScript-heavy pages | ||
| heyi prompt "Summarize this SPA" --url https://example.com/spa --crawler chrome | ||
| CRAWLER=chrome heyi prompt "Get content from dynamic page" --url https://example.com/dynamic | ||
| # Mix files and URLs as context | ||
@@ -112,2 +118,3 @@ heyi prompt "Compare local and remote content" --file local.txt --url https://example.com/remote.txt | ||
| "schema": "z.string()", | ||
| "crawler": "fetch", | ||
| "files": ["path/to/file1.txt", "path/to/file2.txt"], | ||
@@ -124,2 +131,3 @@ "urls": ["https://example.com/page.html"] | ||
| - **schema** (optional): Zod schema for object/array format (required when format is `object` or `array`). | ||
| - **crawler** (optional): Crawler to use for fetching URLs: `fetch`, `chrome` (default: `fetch`). | ||
| - **files** (optional): Array of file paths to include as context. | ||
@@ -177,2 +185,3 @@ - **urls** (optional): Array of URLs to fetch and include as context. | ||
| - **Schema override**: Using `--schema` flag overrides the schema specified in the preset file. | ||
| - **Crawler override**: Using `--crawler` flag overrides the crawler specified in the preset file. | ||
| - **Files and URLs append**: Using `--file` or `--url` flags adds additional context to the preset's files and URLs. | ||
@@ -188,2 +197,5 @@ - **Variables**: Use `--var` to replace variables in the preset's prompt. | ||
| # Override crawler from preset | ||
| heyi preset file.json --crawler chrome | ||
| # Add additional files to preset's files | ||
@@ -212,2 +224,31 @@ heyi preset file.json --file extra.txt | ||
| ## Crawlers | ||
| The tool supports two crawlers for fetching content from URLs: | ||
| - **fetch** (default): Uses the native `fetch` API to retrieve HTML content. Fast and lightweight, but may not work well with JavaScript-heavy or dynamically rendered pages. | ||
| - **chrome**: Uses Puppeteer to launch a headless Chrome browser and retrieve content after the page has fully loaded. Ideal for single-page applications (SPAs) and JavaScript-heavy websites, but slower and requires more resources. | ||
| ### When to Use Chrome Crawler | ||
| Use the `chrome` crawler when: | ||
| - The target website relies heavily on JavaScript for rendering content | ||
| - Content is loaded dynamically after the initial page load | ||
| - You need to interact with a single-page application (SPA) | ||
| - The `fetch` crawler returns incomplete or missing content | ||
| ### Crawler Examples | ||
| ```sh | ||
| # Use default fetch crawler | ||
| heyi prompt "Summarize this page" --url https://example.com | ||
| # Use Chrome crawler for JS-heavy page | ||
| heyi prompt "Extract data from SPA" --url https://app.example.com --crawler chrome | ||
| # Set Chrome as default crawler via environment | ||
| CRAWLER=chrome heyi prompt "Get content" --url https://dynamic-site.com | ||
| ``` | ||
| ## Development | ||
@@ -214,0 +255,0 @@ |
+72
-7
| import { readFile } from 'node:fs/promises' | ||
| import { createInterface } from 'node:readline' | ||
| import { launch } from 'puppeteer' | ||
| import sanitizeHtml from 'sanitize-html' | ||
@@ -60,14 +61,63 @@ | ||
| /** | ||
| * Fetch content from a URL. | ||
| * Validate that a URL uses http or https protocol. | ||
| * | ||
| * @param {string} url - URL to validate | ||
| * @throws {Error} If URL is invalid or uses a dangerous protocol | ||
| */ | ||
| const validateUrl = (url) => { | ||
| try { | ||
| const parsedUrl = new URL(url) | ||
| if (!['http:', 'https:'].includes(parsedUrl.protocol)) { | ||
| throw new Error(`Invalid protocol '${parsedUrl.protocol}'. Only http and https are supported.`) | ||
| } | ||
| } catch (error) { | ||
| if (error instanceof TypeError) { | ||
| throw new Error(`Invalid URL format: ${url}`) | ||
| } | ||
| throw error | ||
| } | ||
| } | ||
| /** | ||
| * Fetch content from a URL using fetch API. | ||
| * | ||
| * @param {string} url - URL to fetch content from | ||
| * @returns {Promise<string>} The URL content | ||
| */ | ||
| export const fetchUrlContent = async (url) => { | ||
| const fetchUrlContentWithFetch = async (url) => { | ||
| validateUrl(url) | ||
| const response = await fetch(url) | ||
| if (!response.ok) { | ||
| throw new Error(`HTTP ${response.status}: ${response.statusText}`) | ||
| } | ||
| const html = await response.text() | ||
| // Sanitize HTML to extract only text content and avoid large data | ||
| const cleanText = sanitizeHtml(html, { | ||
| allowedTags: [], | ||
| allowedAttributes: {}, | ||
| allowedSchemes: [], | ||
| allowedSchemesAppliedToAttributes: [], | ||
| }) | ||
| return cleanText.trim() | ||
| } | ||
| /** | ||
| * Fetch content from a URL using Chrome/Puppeteer. | ||
| * | ||
| * @param {string} url - URL to fetch content from | ||
| * @returns {Promise<string>} The URL content | ||
| */ | ||
| const fetchUrlContentWithChrome = async (url) => { | ||
| validateUrl(url) | ||
| const browser = await launch({ | ||
| headless: true, | ||
| // These args are required for running in containerized environments (e.g., Docker, CI/CD) | ||
| args: ['--no-sandbox', '--disable-setuid-sandbox'], | ||
| }) | ||
| try { | ||
| const response = await fetch(url) | ||
| if (!response.ok) { | ||
| throw new Error(`HTTP ${response.status}: ${response.statusText}`) | ||
| } | ||
| const html = await response.text() | ||
| const page = await browser.newPage() | ||
| // Wait for network to be idle, with a 30-second timeout to prevent indefinite waiting | ||
| // networkidle0 is specifically used for JavaScript-heavy pages to ensure all dynamic content is loaded | ||
| await page.goto(url, { waitUntil: 'networkidle0', timeout: 30000 }) | ||
| const html = await page.content() | ||
| // Sanitize HTML to extract only text content and avoid large data | ||
@@ -81,2 +131,17 @@ const cleanText = sanitizeHtml(html, { | ||
| return cleanText.trim() | ||
| } finally { | ||
| await browser.close() | ||
| } | ||
| } | ||
| /** | ||
| * Fetch content from a URL. | ||
| * | ||
| * @param {string} url - URL to fetch content from | ||
| * @param {string} crawler - Crawler to use: 'fetch' or 'chrome' (default: 'fetch') | ||
| * @returns {Promise<string>} The URL content | ||
| */ | ||
| export const fetchUrlContent = async (url, crawler = 'fetch') => { | ||
| try { | ||
| return crawler === 'chrome' ? await fetchUrlContentWithChrome(url) : await fetchUrlContentWithFetch(url) | ||
| } catch (error) { | ||
@@ -83,0 +148,0 @@ throw new Error(`Failed to fetch URL '${url}'`, { cause: error }) |
@@ -9,2 +9,3 @@ import { readFile } from 'node:fs/promises' | ||
| schema: z.string().optional(), | ||
| crawler: z.enum(['fetch', 'chrome']).optional(), | ||
| files: z.array(z.string()).default([]), | ||
@@ -11,0 +12,0 @@ urls: z.array(z.string()).default([]), |
@@ -9,5 +9,6 @@ import { fetchUrlContent, readFileContent } from './input.js' | ||
| * @param {string[]} urls - Array of URLs to include as context | ||
| * @param {string} crawler - Crawler to use for fetching URLs: 'fetch' or 'chrome' (default: 'fetch') | ||
| * @returns {Promise<string>} The final prompt with all contexts combined | ||
| */ | ||
| export const buildPrompt = async (prompt, filePaths = [], urls = []) => { | ||
| export const buildPrompt = async (prompt, filePaths = [], urls = [], crawler = 'fetch') => { | ||
| // Handle file content as context | ||
@@ -23,3 +24,3 @@ const fileContents = [] | ||
| for (const url of urls) { | ||
| const content = await fetchUrlContent(url) | ||
| const content = await fetchUrlContent(url, crawler) | ||
| urlContents.push({ path: url, content }) | ||
@@ -26,0 +27,0 @@ } |
Network access
Supply chain riskThis module accesses the network.
Found 1 instance in 1 package
Uses eval
Supply chain riskPackage uses dynamic code execution (e.g., eval()), which is a dangerous practice. This can prevent the code from running in certain environments and increases the risk that the code may contain exploits or malicious behavior.
Found 1 instance in 1 package
Environment variable access
Supply chain riskPackage accesses environment variables, which may be a sign of credential stuffing or data theft.
Found 3 instances in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
Network access
Supply chain riskThis module accesses the network.
Found 1 instance in 1 package
Uses eval
Supply chain riskPackage uses dynamic code execution (e.g., eval()), which is a dangerous practice. This can prevent the code from running in certain environments and increases the risk that the code may contain exploits or malicious behavior.
Found 1 instance in 1 package
Environment variable access
Supply chain riskPackage accesses environment variables, which may be a sign of credential stuffing or data theft.
Found 2 instances in 1 package
29828
18.7%526
16.89%269
17.98%7
16.67%4
33.33%18
200%+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added
+ Added