@@ -14,2 +14,3 @@ #!/usr/bin/env node
		const DEFAULT_MODEL = 'openai/gpt-4o-mini'
		const DEFAULT_CRAWLER = 'fetch'

		@@ -22,2 +23,7 @@ const modelFlag = ['-m, --model <model>', 'AI model to use', process.env.MODEL ?? DEFAULT_MODEL]
		]
		const crawlerFlag = [
		'-c, --crawler <crawler>',
		'Crawler to use for fetching URLs: fetch, chrome',
		process.env.CRAWLER ?? DEFAULT_CRAWLER,
		]
		const fileFlag = [
		@@ -55,2 +61,3 @@ '--file <path>',
		const hasSchemaFlag = hasFlag(['--schema', '-s'])
		const hasCrawlerFlag = hasFlag(['--crawler', '-c'])

		@@ -120,2 +127,3 @@ const program = new Command()
		schema: z.string().optional(),
		crawler: z.enum(['fetch', 'chrome']),
		files: z.array(z.string()).default([]),
		@@ -135,2 +143,3 @@ urls: z.array(z.string()).default([]),
		schema: flags.schema,
		crawler: flags.crawler,
		files: flags.file,
		@@ -144,6 +153,7 @@ urls: flags.url,
		return optionsSchema.parse({
		// Overwrite model, format, schema only if not provided via flags
		// Overwrite model, format, schema, crawler only if not provided via flags
		model: hasModelFlag ? options.model : (presetContent.model ?? options.model),
		format: hasFormatFlag ? options.format : (presetContent.format ?? options.format),
		schema: hasSchemaFlag ? options.schema : (presetContent.schema ?? options.schema),
		crawler: hasCrawlerFlag ? options.crawler : (presetContent.crawler ?? options.crawler),
		// Merge files
		@@ -176,3 +186,3 @@ files: [...presetContent.files, ...options.files],
		const userPrompt = replaceVariables(prompt ?? stdinContent, options.vars)
		const finalPrompt = await buildPrompt(userPrompt, options.files, options.urls)
		const finalPrompt = await buildPrompt(userPrompt, options.files, options.urls, options.crawler)

		@@ -209,3 +219,3 @@ const result = await executePrompt(finalPrompt, {
		const userPrompt = replaceVariables(prompt, options.vars)
		const finalPrompt = await buildPrompt(userPrompt, options.files, options.urls)
		const finalPrompt = await buildPrompt(userPrompt, options.files, options.urls, options.crawler)

		@@ -234,2 +244,3 @@ const result = await executePrompt(finalPrompt, {
		.option(...schemaFlag)
		.option(...crawlerFlag)
		.option(...fileFlag)
		@@ -247,2 +258,3 @@ .option(...urlFlag)
		.option(...schemaFlag)
		.option(...crawlerFlag)
		.option(...fileFlag)
		@@ -249,0 +261,0 @@ .option(...urlFlag)

+2

-1

package.json

		{
		"name": "heyi",
		"version": "2.0.0",
		"version": "2.1.0",
		"description": "CLI tool to execute AI prompts with flexible output formatting",
		@@ -37,2 +37,3 @@ "keywords": [
		"dotenv": "^16.6.1",
		"puppeteer": "^24.35.0",
		"sanitize-html": "^2.17.0",
		@@ -39,0 +40,0 @@ "zod": "^4.3.5"

+41

-0

README.md

		@@ -27,2 +27,3 @@ # heyi
		- `-s, --schema <schema>` - Zod schema for object/array format (required when format is `object` or `array`)
		- `-c, --crawler <crawler>` - Crawler to use for fetching URLs: `fetch`, `chrome` (default: `fetch`)
		- `--file <path>` - Read content from file and include as context (can be used multiple times)
		@@ -38,2 +39,3 @@ - `--url <url>` - Fetch content from URL and include as context (can be used multiple times)
		- `MODEL` - Default AI model to use (optional, can be overridden with `--model` flag)
		- `CRAWLER` - Default crawler to use for fetching URLs (optional, can be overridden with `--crawler` flag)

		@@ -87,2 +89,6 @@ ### Examples

		# Use Chrome crawler for JavaScript-heavy pages
		heyi prompt "Summarize this SPA" --url https://example.com/spa --crawler chrome
		CRAWLER=chrome heyi prompt "Get content from dynamic page" --url https://example.com/dynamic

		# Mix files and URLs as context
		@@ -112,2 +118,3 @@ heyi prompt "Compare local and remote content" --file local.txt --url https://example.com/remote.txt
		"schema": "z.string()",
		"crawler": "fetch",
		"files": ["path/to/file1.txt", "path/to/file2.txt"],
		@@ -124,2 +131,3 @@ "urls": ["https://example.com/page.html"]
		- schema (optional): Zod schema for object/array format (required when format is `object` or `array`).
		- crawler (optional): Crawler to use for fetching URLs: `fetch`, `chrome` (default: `fetch`).
		- files (optional): Array of file paths to include as context.
		@@ -177,2 +185,3 @@ - urls (optional): Array of URLs to fetch and include as context.
		- Schema override: Using `--schema` flag overrides the schema specified in the preset file.
		- Crawler override: Using `--crawler` flag overrides the crawler specified in the preset file.
		- Files and URLs append: Using `--file` or `--url` flags adds additional context to the preset's files and URLs.
		@@ -188,2 +197,5 @@ - Variables: Use `--var` to replace variables in the preset's prompt.

		# Override crawler from preset
		heyi preset file.json --crawler chrome

		# Add additional files to preset's files
		@@ -212,2 +224,31 @@ heyi preset file.json --file extra.txt

		## Crawlers

		The tool supports two crawlers for fetching content from URLs:

		- fetch (default): Uses the native `fetch` API to retrieve HTML content. Fast and lightweight, but may not work well with JavaScript-heavy or dynamically rendered pages.
		- chrome: Uses Puppeteer to launch a headless Chrome browser and retrieve content after the page has fully loaded. Ideal for single-page applications (SPAs) and JavaScript-heavy websites, but slower and requires more resources.

		### When to Use Chrome Crawler

		Use the `chrome` crawler when:

		- The target website relies heavily on JavaScript for rendering content
		- Content is loaded dynamically after the initial page load
		- You need to interact with a single-page application (SPA)
		- The `fetch` crawler returns incomplete or missing content

		### Crawler Examples

		```sh
		# Use default fetch crawler
		heyi prompt "Summarize this page" --url https://example.com

		# Use Chrome crawler for JS-heavy page
		heyi prompt "Extract data from SPA" --url https://app.example.com --crawler chrome

		# Set Chrome as default crawler via environment
		CRAWLER=chrome heyi prompt "Get content" --url https://dynamic-site.com
		```

		## Development
		@@ -214,0 +255,0 @@

+72

-7

src/utils/input.js

		import { readFile } from 'node:fs/promises'
		import { createInterface } from 'node:readline'
		import { launch } from 'puppeteer'
		import sanitizeHtml from 'sanitize-html'
		@@ -60,14 +61,63 @@
		/**
		* Fetch content from a URL.
		* Validate that a URL uses http or https protocol.
		*
		* @param {string} url - URL to validate
		* @throws {Error} If URL is invalid or uses a dangerous protocol
		*/
		const validateUrl = (url) => {
		try {
		const parsedUrl = new URL(url)
		if (!['http:', 'https:'].includes(parsedUrl.protocol)) {
		throw new Error(`Invalid protocol '${parsedUrl.protocol}'. Only http and https are supported.`)
		}
		} catch (error) {
		if (error instanceof TypeError) {
		throw new Error(`Invalid URL format: ${url}`)
		}
		throw error
		}
		}

		/**
		* Fetch content from a URL using fetch API.
		*
		* @param {string} url - URL to fetch content from
		* @returns {Promise<string>} The URL content
		*/
		export const fetchUrlContent = async (url) => {
		const fetchUrlContentWithFetch = async (url) => {
		validateUrl(url)
		const response = await fetch(url)
		if (!response.ok) {
		throw new Error(`HTTP ${response.status}: ${response.statusText}`)
		}
		const html = await response.text()
		// Sanitize HTML to extract only text content and avoid large data
		const cleanText = sanitizeHtml(html, {
		allowedTags: [],
		allowedAttributes: {},
		allowedSchemes: [],
		allowedSchemesAppliedToAttributes: [],
		})
		return cleanText.trim()
		}

		/**
		* Fetch content from a URL using Chrome/Puppeteer.
		*
		* @param {string} url - URL to fetch content from
		* @returns {Promise<string>} The URL content
		*/
		const fetchUrlContentWithChrome = async (url) => {
		validateUrl(url)
		const browser = await launch({
		headless: true,
		// These args are required for running in containerized environments (e.g., Docker, CI/CD)
		args: ['--no-sandbox', '--disable-setuid-sandbox'],
		})
		try {
		const response = await fetch(url)
		if (!response.ok) {
		throw new Error(`HTTP ${response.status}: ${response.statusText}`)
		}
		const html = await response.text()
		const page = await browser.newPage()
		// Wait for network to be idle, with a 30-second timeout to prevent indefinite waiting
		// networkidle0 is specifically used for JavaScript-heavy pages to ensure all dynamic content is loaded
		await page.goto(url, { waitUntil: 'networkidle0', timeout: 30000 })
		const html = await page.content()
		// Sanitize HTML to extract only text content and avoid large data
		@@ -81,2 +131,17 @@ const cleanText = sanitizeHtml(html, {
		return cleanText.trim()
		} finally {
		await browser.close()
		}
		}

		/**
		* Fetch content from a URL.
		*
		* @param {string} url - URL to fetch content from
		* @param {string} crawler - Crawler to use: 'fetch' or 'chrome' (default: 'fetch')
		* @returns {Promise<string>} The URL content
		*/
		export const fetchUrlContent = async (url, crawler = 'fetch') => {
		try {
		return crawler === 'chrome' ? await fetchUrlContentWithChrome(url) : await fetchUrlContentWithFetch(url)
		} catch (error) {
		@@ -83,0 +148,0 @@ throw new Error(`Failed to fetch URL '${url}'`, { cause: error })

+1

-0

src/utils/preset.js

		@@ -9,2 +9,3 @@ import { readFile } from 'node:fs/promises'
		schema: z.string().optional(),
		crawler: z.enum(['fetch', 'chrome']).optional(),
		files: z.array(z.string()).default([]),
		@@ -11,0 +12,0 @@ urls: z.array(z.string()).default([]),

+3

-2

src/utils/prompt.js

		@@ -9,5 +9,6 @@ import { fetchUrlContent, readFileContent } from './input.js'
		* @param {string[]} urls - Array of URLs to include as context
		* @param {string} crawler - Crawler to use for fetching URLs: 'fetch' or 'chrome' (default: 'fetch')
		* @returns {Promise<string>} The final prompt with all contexts combined
		*/
		export const buildPrompt = async (prompt, filePaths = [], urls = []) => {
		export const buildPrompt = async (prompt, filePaths = [], urls = [], crawler = 'fetch') => {
		// Handle file content as context
		@@ -23,3 +24,3 @@ const fileContents = []
		for (const url of urls) {
		const content = await fetchUrlContent(url)
		const content = await fetchUrlContent(url, crawler)
		urlContents.push({ path: url, content })
		@@ -26,0 +27,0 @@ }

heyi - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics

Dependency changes