Latest Threat Research:SANDWORM_MODE: Shai-Hulud-Style npm Worm Hijacks CI Workflows and Poisons AI Toolchains.Details
Socket
Book a DemoInstallSign in
Socket

heyi

Package Overview
Dependencies
Maintainers
1
Versions
6
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

heyi - npm Package Compare versions

Comparing version
2.0.0
to
2.1.0
+15
-3
bin/index.js

@@ -14,2 +14,3 @@ #!/usr/bin/env node

const DEFAULT_MODEL = 'openai/gpt-4o-mini'
const DEFAULT_CRAWLER = 'fetch'

@@ -22,2 +23,7 @@ const modelFlag = ['-m, --model <model>', 'AI model to use', process.env.MODEL ?? DEFAULT_MODEL]

]
const crawlerFlag = [
'-c, --crawler <crawler>',
'Crawler to use for fetching URLs: fetch, chrome',
process.env.CRAWLER ?? DEFAULT_CRAWLER,
]
const fileFlag = [

@@ -55,2 +61,3 @@ '--file <path>',

const hasSchemaFlag = hasFlag(['--schema', '-s'])
const hasCrawlerFlag = hasFlag(['--crawler', '-c'])

@@ -120,2 +127,3 @@ const program = new Command()

schema: z.string().optional(),
crawler: z.enum(['fetch', 'chrome']),
files: z.array(z.string()).default([]),

@@ -135,2 +143,3 @@ urls: z.array(z.string()).default([]),

schema: flags.schema,
crawler: flags.crawler,
files: flags.file,

@@ -144,6 +153,7 @@ urls: flags.url,

return optionsSchema.parse({
// Overwrite model, format, schema only if not provided via flags
// Overwrite model, format, schema, crawler only if not provided via flags
model: hasModelFlag ? options.model : (presetContent.model ?? options.model),
format: hasFormatFlag ? options.format : (presetContent.format ?? options.format),
schema: hasSchemaFlag ? options.schema : (presetContent.schema ?? options.schema),
crawler: hasCrawlerFlag ? options.crawler : (presetContent.crawler ?? options.crawler),
// Merge files

@@ -176,3 +186,3 @@ files: [...presetContent.files, ...options.files],

const userPrompt = replaceVariables(prompt ?? stdinContent, options.vars)
const finalPrompt = await buildPrompt(userPrompt, options.files, options.urls)
const finalPrompt = await buildPrompt(userPrompt, options.files, options.urls, options.crawler)

@@ -209,3 +219,3 @@ const result = await executePrompt(finalPrompt, {

const userPrompt = replaceVariables(prompt, options.vars)
const finalPrompt = await buildPrompt(userPrompt, options.files, options.urls)
const finalPrompt = await buildPrompt(userPrompt, options.files, options.urls, options.crawler)

@@ -234,2 +244,3 @@ const result = await executePrompt(finalPrompt, {

.option(...schemaFlag)
.option(...crawlerFlag)
.option(...fileFlag)

@@ -247,2 +258,3 @@ .option(...urlFlag)

.option(...schemaFlag)
.option(...crawlerFlag)
.option(...fileFlag)

@@ -249,0 +261,0 @@ .option(...urlFlag)

+2
-1
{
"name": "heyi",
"version": "2.0.0",
"version": "2.1.0",
"description": "CLI tool to execute AI prompts with flexible output formatting",

@@ -37,2 +37,3 @@ "keywords": [

"dotenv": "^16.6.1",
"puppeteer": "^24.35.0",
"sanitize-html": "^2.17.0",

@@ -39,0 +40,0 @@ "zod": "^4.3.5"

@@ -27,2 +27,3 @@ # heyi

- `-s, --schema <schema>` - Zod schema for object/array format (required when format is `object` or `array`)
- `-c, --crawler <crawler>` - Crawler to use for fetching URLs: `fetch`, `chrome` (default: `fetch`)
- `--file <path>` - Read content from file and include as context (can be used multiple times)

@@ -38,2 +39,3 @@ - `--url <url>` - Fetch content from URL and include as context (can be used multiple times)

- `MODEL` - Default AI model to use (optional, can be overridden with `--model` flag)
- `CRAWLER` - Default crawler to use for fetching URLs (optional, can be overridden with `--crawler` flag)

@@ -87,2 +89,6 @@ ### Examples

# Use Chrome crawler for JavaScript-heavy pages
heyi prompt "Summarize this SPA" --url https://example.com/spa --crawler chrome
CRAWLER=chrome heyi prompt "Get content from dynamic page" --url https://example.com/dynamic
# Mix files and URLs as context

@@ -112,2 +118,3 @@ heyi prompt "Compare local and remote content" --file local.txt --url https://example.com/remote.txt

"schema": "z.string()",
"crawler": "fetch",
"files": ["path/to/file1.txt", "path/to/file2.txt"],

@@ -124,2 +131,3 @@ "urls": ["https://example.com/page.html"]

- **schema** (optional): Zod schema for object/array format (required when format is `object` or `array`).
- **crawler** (optional): Crawler to use for fetching URLs: `fetch`, `chrome` (default: `fetch`).
- **files** (optional): Array of file paths to include as context.

@@ -177,2 +185,3 @@ - **urls** (optional): Array of URLs to fetch and include as context.

- **Schema override**: Using `--schema` flag overrides the schema specified in the preset file.
- **Crawler override**: Using `--crawler` flag overrides the crawler specified in the preset file.
- **Files and URLs append**: Using `--file` or `--url` flags adds additional context to the preset's files and URLs.

@@ -188,2 +197,5 @@ - **Variables**: Use `--var` to replace variables in the preset's prompt.

# Override crawler from preset
heyi preset file.json --crawler chrome
# Add additional files to preset's files

@@ -212,2 +224,31 @@ heyi preset file.json --file extra.txt

## Crawlers
The tool supports two crawlers for fetching content from URLs:
- **fetch** (default): Uses the native `fetch` API to retrieve HTML content. Fast and lightweight, but may not work well with JavaScript-heavy or dynamically rendered pages.
- **chrome**: Uses Puppeteer to launch a headless Chrome browser and retrieve content after the page has fully loaded. Ideal for single-page applications (SPAs) and JavaScript-heavy websites, but slower and requires more resources.
### When to Use Chrome Crawler
Use the `chrome` crawler when:
- The target website relies heavily on JavaScript for rendering content
- Content is loaded dynamically after the initial page load
- You need to interact with a single-page application (SPA)
- The `fetch` crawler returns incomplete or missing content
### Crawler Examples
```sh
# Use default fetch crawler
heyi prompt "Summarize this page" --url https://example.com
# Use Chrome crawler for JS-heavy page
heyi prompt "Extract data from SPA" --url https://app.example.com --crawler chrome
# Set Chrome as default crawler via environment
CRAWLER=chrome heyi prompt "Get content" --url https://dynamic-site.com
```
## Development

@@ -214,0 +255,0 @@

import { readFile } from 'node:fs/promises'
import { createInterface } from 'node:readline'
import { launch } from 'puppeteer'
import sanitizeHtml from 'sanitize-html'

@@ -60,14 +61,63 @@

/**
* Fetch content from a URL.
* Validate that a URL uses http or https protocol.
*
* @param {string} url - URL to validate
* @throws {Error} If URL is invalid or uses a dangerous protocol
*/
const validateUrl = (url) => {
try {
const parsedUrl = new URL(url)
if (!['http:', 'https:'].includes(parsedUrl.protocol)) {
throw new Error(`Invalid protocol '${parsedUrl.protocol}'. Only http and https are supported.`)
}
} catch (error) {
if (error instanceof TypeError) {
throw new Error(`Invalid URL format: ${url}`)
}
throw error
}
}
/**
* Fetch content from a URL using fetch API.
*
* @param {string} url - URL to fetch content from
* @returns {Promise<string>} The URL content
*/
export const fetchUrlContent = async (url) => {
const fetchUrlContentWithFetch = async (url) => {
validateUrl(url)
const response = await fetch(url)
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`)
}
const html = await response.text()
// Sanitize HTML to extract only text content and avoid large data
const cleanText = sanitizeHtml(html, {
allowedTags: [],
allowedAttributes: {},
allowedSchemes: [],
allowedSchemesAppliedToAttributes: [],
})
return cleanText.trim()
}
/**
* Fetch content from a URL using Chrome/Puppeteer.
*
* @param {string} url - URL to fetch content from
* @returns {Promise<string>} The URL content
*/
const fetchUrlContentWithChrome = async (url) => {
validateUrl(url)
const browser = await launch({
headless: true,
// These args are required for running in containerized environments (e.g., Docker, CI/CD)
args: ['--no-sandbox', '--disable-setuid-sandbox'],
})
try {
const response = await fetch(url)
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`)
}
const html = await response.text()
const page = await browser.newPage()
// Wait for network to be idle, with a 30-second timeout to prevent indefinite waiting
// networkidle0 is specifically used for JavaScript-heavy pages to ensure all dynamic content is loaded
await page.goto(url, { waitUntil: 'networkidle0', timeout: 30000 })
const html = await page.content()
// Sanitize HTML to extract only text content and avoid large data

@@ -81,2 +131,17 @@ const cleanText = sanitizeHtml(html, {

return cleanText.trim()
} finally {
await browser.close()
}
}
/**
* Fetch content from a URL.
*
* @param {string} url - URL to fetch content from
* @param {string} crawler - Crawler to use: 'fetch' or 'chrome' (default: 'fetch')
* @returns {Promise<string>} The URL content
*/
export const fetchUrlContent = async (url, crawler = 'fetch') => {
try {
return crawler === 'chrome' ? await fetchUrlContentWithChrome(url) : await fetchUrlContentWithFetch(url)
} catch (error) {

@@ -83,0 +148,0 @@ throw new Error(`Failed to fetch URL '${url}'`, { cause: error })

@@ -9,2 +9,3 @@ import { readFile } from 'node:fs/promises'

schema: z.string().optional(),
crawler: z.enum(['fetch', 'chrome']).optional(),
files: z.array(z.string()).default([]),

@@ -11,0 +12,0 @@ urls: z.array(z.string()).default([]),

@@ -9,5 +9,6 @@ import { fetchUrlContent, readFileContent } from './input.js'

* @param {string[]} urls - Array of URLs to include as context
* @param {string} crawler - Crawler to use for fetching URLs: 'fetch' or 'chrome' (default: 'fetch')
* @returns {Promise<string>} The final prompt with all contexts combined
*/
export const buildPrompt = async (prompt, filePaths = [], urls = []) => {
export const buildPrompt = async (prompt, filePaths = [], urls = [], crawler = 'fetch') => {
// Handle file content as context

@@ -23,3 +24,3 @@ const fileContents = []

for (const url of urls) {
const content = await fetchUrlContent(url)
const content = await fetchUrlContent(url, crawler)
urlContents.push({ path: url, content })

@@ -26,0 +27,0 @@ }