+11
-5
| import { ZodSchema } from 'zod'; | ||
| type ReasoningEffort = 'medium' | 'high'; | ||
| interface ScrapeConfig { | ||
| interface BaseConfig { | ||
| temperature?: number; | ||
@@ -9,2 +9,8 @@ prompt?: string; | ||
| } | ||
| interface JsonConfig extends BaseConfig { | ||
| method?: 'json'; | ||
| } | ||
| interface MarkdownConfig extends BaseConfig { | ||
| method: 'markdown'; | ||
| } | ||
| export declare class BitBuffet { | ||
@@ -14,8 +20,8 @@ private baseUrl; | ||
| constructor(apiKey: string); | ||
| /** | ||
| * extract a webpage and return a validated result | ||
| */ | ||
| extract<T>(url: string, schema: ZodSchema<T>, config?: ScrapeConfig, timeout?: number): Promise<T>; | ||
| extract<T>(url: string, schema: ZodSchema<T>, config?: JsonConfig, timeout?: number): Promise<T>; | ||
| extract(url: string, config: MarkdownConfig, timeout?: number): Promise<string>; | ||
| extract<T>(url: string, schema: ZodSchema<T>, config: MarkdownConfig, timeout?: number): Promise<string>; | ||
| extract(url: string, config: JsonConfig, timeout?: number): Promise<never>; | ||
| } | ||
| export {}; | ||
| //# sourceMappingURL=bitbuffet.d.ts.map |
@@ -1,1 +0,1 @@ | ||
| {"version":3,"file":"bitbuffet.d.ts","sourceRoot":"","sources":["../src/bitbuffet.ts"],"names":[],"mappings":"AACA,OAAO,EAAK,SAAS,EAAE,MAAM,KAAK,CAAC;AAOnC,KAAK,eAAe,GAAG,QAAQ,GAAG,MAAM,CAAC;AAEzC,UAAU,YAAY;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,gBAAgB,CAAC,EAAE,eAAe,CAAC;IACnC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,qBAAa,SAAS;IACpB,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,MAAM,CAAgB;gBAElB,MAAM,EAAE,MAAM;IAe1B;;OAEG;IACG,OAAO,CAAC,CAAC,EACb,GAAG,EAAE,MAAM,EACX,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC,EACpB,MAAM,GAAE,YAAiB,EACzB,OAAO,GAAE,MAAwB,GAChC,OAAO,CAAC,CAAC,CAAC;CAoDd"} | ||
| {"version":3,"file":"bitbuffet.d.ts","sourceRoot":"","sources":["../src/bitbuffet.ts"],"names":[],"mappings":"AACA,OAAO,EAAK,SAAS,EAAE,MAAM,KAAK,CAAC;AAOnC,KAAK,eAAe,GAAG,QAAQ,GAAG,MAAM,CAAC;AAGzC,UAAU,UAAU;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,gBAAgB,CAAC,EAAE,eAAe,CAAC;IACnC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,UAAU,UAAW,SAAQ,UAAU;IACrC,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,UAAU,cAAe,SAAQ,UAAU;IACzC,MAAM,EAAE,UAAU,CAAC;CACpB;AAED,qBAAa,SAAS;IACpB,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,MAAM,CAAgB;gBAElB,MAAM,EAAE,MAAM;IAgBpB,OAAO,CAAC,CAAC,EACb,GAAG,EAAE,MAAM,EACX,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC,EACpB,MAAM,CAAC,EAAE,UAAU,EACnB,OAAO,CAAC,EAAE,MAAM,GACf,OAAO,CAAC,CAAC,CAAC;IAGP,OAAO,CACX,GAAG,EAAE,MAAM,EACX,MAAM,EAAE,cAAc,EACtB,OAAO,CAAC,EAAE,MAAM,GACf,OAAO,CAAC,MAAM,CAAC;IAGZ,OAAO,CAAC,CAAC,EACb,GAAG,EAAE,MAAM,EACX,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC,EACpB,MAAM,EAAE,cAAc,EACtB,OAAO,CAAC,EAAE,MAAM,GACf,OAAO,CAAC,MAAM,CAAC;IAGZ,OAAO,CACX,GAAG,EAAE,MAAM,EACX,MAAM,EAAE,UAAU,EAClB,OAAO,CAAC,EAAE,MAAM,GACf,OAAO,CAAC,KAAK,CAAC;CAsGlB"} |
+50
-10
@@ -28,7 +28,29 @@ "use strict"; | ||
| } | ||
| /** | ||
| * extract a webpage and return a validated result | ||
| */ | ||
| async extract(url, schema, config = {}, timeout = DEFAULT_TIMEOUT) { | ||
| // Implementation | ||
| async extract(url, schemaOrConfig, configOrTimeout, timeout = DEFAULT_TIMEOUT) { | ||
| try { | ||
| let method = 'json'; | ||
| let schema; | ||
| let config = {}; | ||
| let actualTimeout = timeout; | ||
| // Parse arguments based on overload | ||
| if (schemaOrConfig && typeof schemaOrConfig === 'object' && 'method' in schemaOrConfig) { | ||
| // Config as second parameter: extract(url, config, timeout?) | ||
| method = schemaOrConfig.method; | ||
| config = schemaOrConfig; | ||
| if (typeof configOrTimeout === 'number') { | ||
| actualTimeout = configOrTimeout; | ||
| } | ||
| } | ||
| else { | ||
| // Schema as second parameter: extract(url, schema, config?, timeout?) | ||
| schema = schemaOrConfig; | ||
| if (typeof configOrTimeout === 'object') { | ||
| config = configOrTimeout; | ||
| method = configOrTimeout.method || 'json'; | ||
| } | ||
| else if (typeof configOrTimeout === 'number') { | ||
| actualTimeout = configOrTimeout; | ||
| } | ||
| } | ||
| // Validate that both temperature and top_p are not provided simultaneously | ||
@@ -38,8 +60,20 @@ if (config.temperature !== undefined && config.top_p !== undefined) { | ||
| } | ||
| // Convert Zod schema to JSON schema using the library | ||
| const jsonSchema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema); | ||
| // Validate method and schema requirements | ||
| if (method === 'json' && !schema) { | ||
| throw new Error("json_schema is required when method is 'json'"); | ||
| } | ||
| // Remove the restriction for markdown + schema since it's now allowed | ||
| // The schema will simply be ignored for markdown extraction | ||
| if (method === 'markdown' && schema) { | ||
| throw new Error("json_schema should not be defined when method is 'markdown'"); | ||
| } | ||
| const payload = { | ||
| url, | ||
| json_schema: jsonSchema, | ||
| method, | ||
| }; | ||
| // Add schema for JSON method | ||
| if (method === 'json' && schema) { | ||
| const jsonSchema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema); | ||
| payload.json_schema = jsonSchema; | ||
| } | ||
| // Add optional parameters to payload if provided in config | ||
@@ -59,3 +93,3 @@ if (config.reasoning_effort !== undefined) { | ||
| const response = await this.client.post('/extract', payload, { | ||
| timeout, | ||
| timeout: actualTimeout, | ||
| }); | ||
@@ -66,4 +100,10 @@ const result = response.data; | ||
| } | ||
| // Validate and return the result using Zod | ||
| return schema.parse(result.data); | ||
| // Return based on method | ||
| if (method === 'markdown') { | ||
| return result.data; | ||
| } | ||
| else { | ||
| // Validate and return the result using Zod for JSON method | ||
| return schema.parse(result.data); | ||
| } | ||
| } | ||
@@ -70,0 +110,0 @@ catch (error) { |
@@ -1,1 +0,1 @@ | ||
| {"version":3,"file":"bitbuffet.js","sourceRoot":"","sources":["../src/bitbuffet.ts"],"names":[],"mappings":";;;;;;AAAA,kDAA4D;AAC5D,6BAAmC;AACnC,2DAAqD;AAErD,MAAM,YAAY,GAAC,2BAA2B,CAAA;AAC9C,MAAM,gBAAgB,GAAC,IAAI,CAAA;AAC3B,MAAM,eAAe,GAAC,KAAK,CAAA;AAW3B,MAAa,SAAS;IAIpB,YAAY,MAAc;QACxB,mBAAmB;QACnB,IAAI,CAAC,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,EAAE,CAAC;YAC9B,MAAM,IAAI,KAAK,CAAC,sFAAsF,CAAC,CAAC;QAC1G,CAAC;QACD,IAAI,CAAC,OAAO,GAAG,GAAG,YAAY,IAAI,gBAAgB,EAAE,CAAC;QACrD,IAAI,CAAC,MAAM,GAAG,eAAK,CAAC,MAAM,CAAC;YACzB,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,OAAO,EAAE;gBACP,eAAe,EAAE,UAAU,MAAM,EAAE;gBACnC,cAAc,EAAE,kBAAkB;aACnC;SACF,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO,CACX,GAAW,EACX,MAAoB,EACpB,SAAuB,EAAE,EACzB,UAAkB,eAAe;QAEjC,IAAI,CAAC;YACH,2EAA2E;YAC3E,IAAI,MAAM,CAAC,WAAW,KAAK,SAAS,IAAI,MAAM,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;gBACnE,MAAM,IAAI,KAAK,CAAC,gFAAgF,CAAC,CAAC;YACpG,CAAC;YAED,sDAAsD;YACtD,MAAM,UAAU,GAAG,IAAA,oCAAe,EAAC,MAAM,CAAC,CAAC;YAE3C,MAAM,OAAO,GAAQ;gBACnB,GAAG;gBACH,WAAW,EAAE,UAAU;aACxB,CAAC;YAEF,2DAA2D;YAC3D,IAAI,MAAM,CAAC,gBAAgB,KAAK,SAAS,EAAE,CAAC;gBAC1C,OAAO,CAAC,gBAAgB,GAAG,MAAM,CAAC,gBAAgB,CAAC;YACrD,CAAC;YACD,IAAI,MAAM,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;gBAChC,OAAO,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;YACjC,CAAC;YACD,IAAI,MAAM,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;gBAC/B,OAAO,CAAC,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC;YAC/B,CAAC;YACD,IAAI,MAAM,CAAC,WAAW,KAAK,SAAS,EAAE,CAAC;gBACrC,OAAO,CAAC,WAAW,GAAG,MAAM,CAAC,WAAW,CAAC;YAC3C,CAAC;YAED,MAAM,QAAQ,GAAkB,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,UAAU,EAAE,OAAO,EAAE;gBAC1E,OAAO;aACR,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC;YAE7B,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;gBACpB,MAAM,IAAI,KAAK,CAAC,uBAAuB,MAAM,CAAC,KAAK,IAAI,eAAe,EAAE,CAAC,CAAC;YAC5E,CAAC;YAED,2CAA2C;YAC3C,OAAO,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAEnC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,KAAK,YAAY,OAAC,CAAC,QAAQ,EAAE,CAAC;gBAChC,MAAM,IAAI,KAAK,CAAC,sBAAsB,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;YACzD,CAAC;YACD,IAAI,eAAK,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC9B,MAAM,IAAI,KAAK,CAAC,uBAAuB,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;YAC1D,CAAC;YACD,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;CACF;AA/ED,8BA+EC"} | ||
| {"version":3,"file":"bitbuffet.js","sourceRoot":"","sources":["../src/bitbuffet.ts"],"names":[],"mappings":";;;;;;AAAA,kDAA4D;AAC5D,6BAAmC;AACnC,2DAAqD;AAErD,MAAM,YAAY,GAAC,2BAA2B,CAAA;AAC9C,MAAM,gBAAgB,GAAC,IAAI,CAAA;AAC3B,MAAM,eAAe,GAAC,KAAK,CAAA;AAoB3B,MAAa,SAAS;IAIpB,YAAY,MAAc;QACxB,mBAAmB;QACnB,IAAI,CAAC,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,EAAE,CAAC;YAC9B,MAAM,IAAI,KAAK,CAAC,sFAAsF,CAAC,CAAC;QAC1G,CAAC;QACD,IAAI,CAAC,OAAO,GAAG,GAAG,YAAY,IAAI,gBAAgB,EAAE,CAAC;QACrD,IAAI,CAAC,MAAM,GAAG,eAAK,CAAC,MAAM,CAAC;YACzB,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,OAAO,EAAE;gBACP,eAAe,EAAE,UAAU,MAAM,EAAE;gBACnC,cAAc,EAAE,kBAAkB;aACnC;SACF,CAAC,CAAC;IACL,CAAC;IAgCD,iBAAiB;IACjB,KAAK,CAAC,OAAO,CACX,GAAW,EACX,cAA2D,EAC3D,eAAsD,EACtD,UAAkB,eAAe;QAEjC,IAAI,CAAC;YACH,IAAI,MAAM,GAAqB,MAAM,CAAC;YACtC,IAAI,MAAgC,CAAC;YACrC,IAAI,MAAM,GAAe,EAAE,CAAC;YAC5B,IAAI,aAAa,GAAG,OAAO,CAAC;YAE5B,oCAAoC;YACpC,IAAI,cAAc,IAAI,OAAO,cAAc,KAAK,QAAQ,IAAI,QAAQ,IAAI,cAAc,EAAE,CAAC;gBACvF,6DAA6D;gBAC7D,MAAM,GAAG,cAAe,CAAC,MAAO,CAAC;gBACjC,MAAM,GAAG,cAAc,CAAC;gBACxB,IAAI,OAAO,eAAe,KAAK,QAAQ,EAAE,CAAC;oBACxC,aAAa,GAAG,eAAe,CAAC;gBAClC,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,sEAAsE;gBACtE,MAAM,GAAG,cAA8B,CAAC;gBACxC,IAAI,OAAO,eAAe,KAAK,QAAQ,EAAE,CAAC;oBACxC,MAAM,GAAG,eAAe,CAAC;oBACzB,MAAM,GAAG,eAAe,CAAC,MAAM,IAAI,MAAM,CAAC;gBAC5C,CAAC;qBAAM,IAAI,OAAO,eAAe,KAAK,QAAQ,EAAE,CAAC;oBAC/C,aAAa,GAAG,eAAe,CAAC;gBAClC,CAAC;YACH,CAAC;YAED,2EAA2E;YAC3E,IAAI,MAAM,CAAC,WAAW,KAAK,SAAS,IAAI,MAAM,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;gBACnE,MAAM,IAAI,KAAK,CAAC,gFAAgF,CAAC,CAAC;YACpG,CAAC;YAED,0CAA0C;YAC1C,IAAI,MAAM,KAAK,MAAM,IAAI,CAAC,MAAM,EAAE,CAAC;gBACjC,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC;YACnE,CAAC;YACD,sEAAsE;YACtE,4DAA4D;YAC5D,IAAI,MAAM,KAAK,UAAU,IAAI,MAAM,EAAE,CAAC;gBACpC,MAAM,IAAI,KAAK,CAAC,6DAA6D,CAAC,CAAC;YACjF,CAAC;YAED,MAAM,OAAO,GAAQ;gBACnB,GAAG;gBACH,MAAM;aACP,CAAC;YAEF,6BAA6B;YAC7B,IAAI,MAAM,KAAK,MAAM,IAAI,MAAM,EAAE,CAAC;gBAChC,MAAM,UAAU,GAAG,IAAA,oCAAe,EAAC,MAAM,CAAC,CAAC;gBAC3C,OAAO,CAAC,WAAW,GAAG,UAAU,CAAC;YACnC,CAAC;YAED,2DAA2D;YAC3D,IAAI,MAAM,CAAC,gBAAgB,KAAK,SAAS,EAAE,CAAC;gBAC1C,OAAO,CAAC,gBAAgB,GAAG,MAAM,CAAC,gBAAgB,CAAC;YACrD,CAAC;YACD,IAAI,MAAM,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;gBAChC,OAAO,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;YACjC,CAAC;YACD,IAAI,MAAM,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;gBAC/B,OAAO,CAAC,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC;YAC/B,CAAC;YACD,IAAI,MAAM,CAAC,WAAW,KAAK,SAAS,EAAE,CAAC;gBACrC,OAAO,CAAC,WAAW,GAAG,MAAM,CAAC,WAAW,CAAC;YAC3C,CAAC;YAED,MAAM,QAAQ,GAAkB,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,UAAU,EAAE,OAAO,EAAE;gBAC1E,OAAO,EAAE,aAAa;aACvB,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC;YAE7B,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;gBACpB,MAAM,IAAI,KAAK,CAAC,uBAAuB,MAAM,CAAC,KAAK,IAAI,eAAe,EAAE,CAAC,CAAC;YAC5E,CAAC;YAED,yBAAyB;YACzB,IAAI,MAAM,KAAK,UAAU,EAAE,CAAC;gBAC1B,OAAO,MAAM,CAAC,IAAc,CAAC;YAC/B,CAAC;iBAAM,CAAC;gBACN,2DAA2D;gBAC3D,OAAO,MAAO,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,CAAM,CAAC;YACzC,CAAC;QAEH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,KAAK,YAAY,OAAC,CAAC,QAAQ,EAAE,CAAC;gBAChC,MAAM,IAAI,KAAK,CAAC,sBAAsB,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;YACzD,CAAC;YACD,IAAI,eAAK,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC9B,MAAM,IAAI,KAAK,CAAC,uBAAuB,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;YAC1D,CAAC;YACD,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;CACF;AArJD,8BAqJC"} |
+1
-1
| { | ||
| "name": "bitbuffet", | ||
| "version": "0.1.3", | ||
| "version": "1.0.0", | ||
| "description": "TypeScript SDK for the Structured Scraper API - BitBuffet", | ||
@@ -5,0 +5,0 @@ "main": "dist/index.js", |
+181
-12
@@ -8,3 +8,3 @@ <img src="https://www.bitbuffet.dev/_next/image?url=%2Fbitbuffet-logo-closed-transparent.png&w=64&q=75" alt="BitBuffet Logo" width="64" height="64"> | ||
| A powerful TypeScript/JavaScript SDK for the BitBuffet API that allows you to extract structured data from any web content using Zod schemas in under two seconds. | ||
| A powerful TypeScript/JavaScript SDK for the BitBuffet API that allows you to extract structured data from any web content using Zod schemas or raw markdown content in under two seconds. | ||
@@ -17,2 +17,3 @@ ## 🚀 Features | ||
| - **Flexible**: Support for custom prompts and reasoning levels | ||
| - **Dual Output**: Extract structured JSON data or raw markdown content | ||
| - **Easy to use**: Simple, intuitive API | ||
@@ -33,2 +34,4 @@ - **Well-tested**: Comprehensive test suite with integration tests | ||
| ### JSON Extraction (Structured Data) | ||
| ```typescript | ||
@@ -69,2 +72,56 @@ import { BitBuffet } from 'bitbuffet'; | ||
| ### Markdown Extraction (Raw Content) | ||
| ```typescript | ||
| import { BitBuffet } from 'bitbuffet'; | ||
| const client = new BitBuffet('your-api-key-here'); | ||
| // Extract raw markdown content | ||
| try { | ||
| const markdown: string = await client.extract( | ||
| 'https://example.com/article', | ||
| { method: 'markdown' } | ||
| ); | ||
| console.log('Raw markdown content:'); | ||
| console.log(markdown); | ||
| } catch (error) { | ||
| console.error('Extraction failed:', error); | ||
| } | ||
| ``` | ||
| ## ⚙️ Output Methods | ||
| Choose between structured JSON extraction or raw markdown content: | ||
| ### JSON Method (Default) | ||
| Extracts structured data according to your Zod schema: | ||
| ```typescript | ||
| const ProductSchema = z.object({ | ||
| name: z.string(), | ||
| price: z.number(), | ||
| description: z.string() | ||
| }); | ||
| const product = await client.extract( | ||
| 'https://example.com/product', | ||
| ProductSchema, | ||
| { method: 'json' } // Optional - this is the default | ||
| ); | ||
| ``` | ||
| ### Markdown Method | ||
| Returns the raw markdown content of the webpage: | ||
| ```typescript | ||
| const markdown = await client.extract( | ||
| 'https://example.com/article', | ||
| { method: 'markdown' } | ||
| ); | ||
| ``` | ||
| **Note:** When using `method: 'markdown'`, do not provide a schema as the second parameter. | ||
| ## ⚙️ Configuration Options | ||
@@ -75,2 +132,3 @@ | ||
| ```typescript | ||
| // JSON extraction with configuration | ||
| const result = await client.extract( | ||
@@ -80,4 +138,5 @@ 'https://example.com/complex-page', | ||
| { | ||
| method: 'json', // Optional - default behavior | ||
| reasoning_effort: 'high', // 'medium' | 'high' - Higher effort for complex pages | ||
| prompt: 'Focus on extracting the main article content, ignoring ads and navigation', // Custom prompt (Not recommended) | ||
| prompt: 'Focus on extracting the main article content, ignoring ads and navigation', | ||
| temperature: 0.1, // Lower for more consistent results (0.0 - 1.5) | ||
@@ -89,2 +148,13 @@ // OR use top_p instead of temperature | ||
| ); | ||
| // Markdown extraction with configuration | ||
| const markdown = await client.extract( | ||
| 'https://example.com/article', | ||
| { | ||
| method: 'markdown', | ||
| reasoning_effort: 'medium', | ||
| prompt: 'Focus on the main content, ignore navigation and ads' | ||
| }, | ||
| 30000 | ||
| ); | ||
| ``` | ||
@@ -140,2 +210,19 @@ | ||
| ### Raw Content for Processing | ||
| ```typescript | ||
| // Extract raw markdown for further processing | ||
| const rawContent = await client.extract( | ||
| 'https://blog.example.com/post/123', | ||
| { method: 'markdown' } | ||
| ); | ||
| // Process the markdown content | ||
| const wordCount = rawContent.split(' ').length; | ||
| const hasCodeBlocks = rawContent.includes('```'); | ||
| console.log(`Content has ${wordCount} words`); | ||
| console.log(`Contains code blocks: ${hasCodeBlocks}`); | ||
| ``` | ||
| ## 🔧 API Reference | ||
@@ -152,13 +239,30 @@ | ||
| ##### `extract<T>(url: string, schema: ZodSchema<T>, options?: ExtractionOptions, timeout?: number): Promise<T>` | ||
| ##### JSON Extraction | ||
| ```typescript | ||
| extract<T>( | ||
| url: string, | ||
| schema: ZodSchema<T>, | ||
| config?: JsonConfig, | ||
| timeout?: number | ||
| ): Promise<T> | ||
| ``` | ||
| Extracts structured data from a URL using the provided Zod schema. | ||
| ##### Markdown Extraction | ||
| ```typescript | ||
| extract( | ||
| url: string, | ||
| config: MarkdownConfig, | ||
| timeout?: number | ||
| ): Promise<string> | ||
| ``` | ||
| **Parameters:** | ||
| - `url`: The URL to extract data from | ||
| - `schema`: Zod schema defining the expected data structure | ||
| - `options`: Optional extraction configuration | ||
| - `schema`: Zod schema defining the expected data structure (JSON method only) | ||
| - `config`: Extraction configuration options | ||
| - `timeout`: Request timeout in milliseconds (default: 30000) | ||
| **Returns:** Promise resolving to the extracted data matching your schema | ||
| **Returns:** | ||
| - JSON method: Promise resolving to the extracted data matching your schema | ||
| - Markdown method: Promise resolving to raw markdown content as string | ||
@@ -168,3 +272,4 @@ ### Types | ||
| ```typescript | ||
| interface ExtractionOptions { | ||
| interface JsonConfig { | ||
| method?: 'json'; // Optional - this is the default | ||
| reasoning_effort?: 'medium' | 'high'; | ||
@@ -176,6 +281,11 @@ prompt?: string; | ||
| interface ClientOptions { | ||
| baseURL?: string; | ||
| timeout?: number; | ||
| interface MarkdownConfig { | ||
| method: 'markdown'; // Required for markdown extraction | ||
| reasoning_effort?: 'medium' | 'high'; | ||
| prompt?: string; | ||
| temperature?: number; // 0.0 - 1.5 | ||
| top_p?: number; // Alternative to temperature | ||
| } | ||
| type ExtractionMethod = 'json' | 'markdown'; | ||
| ``` | ||
@@ -229,2 +339,61 @@ | ||
| If you encounter any issues or have questions, please [open an issue](https://github.com/ystefanov6/bitbuffet-clients/issues) on GitHub. | ||
| If you encounter any issues or have questions, please [open an issue](https://github.com/ystefanov6/bitbuffet-clients/issues) on GitHub. | ||
| ## ⚙️ Method Parameter Usage | ||
| The SDK supports two extraction methods via the `method` parameter: | ||
| ### Method 1: JSON Extraction (Default) | ||
| ```typescript | ||
| // Method 1a: Schema with optional config (method defaults to 'json') | ||
| const result = await client.extract( | ||
| 'https://example.com/article', | ||
| ArticleSchema, | ||
| { | ||
| method: 'json', // Optional - this is the default | ||
| reasoning_effort: 'high' | ||
| } | ||
| ); | ||
| // Method 1b: Schema without config (method defaults to 'json') | ||
| const result = await client.extract( | ||
| 'https://example.com/article', | ||
| ArticleSchema | ||
| ); | ||
| ``` | ||
| ### Method 2: Markdown Extraction | ||
| ```typescript | ||
| // Method 2a: Config object with method specified | ||
| const markdown = await client.extract( | ||
| 'https://example.com/article', | ||
| { | ||
| method: 'markdown', | ||
| reasoning_effort: 'medium', | ||
| prompt: 'Focus on main content' | ||
| } | ||
| ); | ||
| // Method 2b: Minimal markdown extraction | ||
| const markdown = await client.extract( | ||
| 'https://example.com/article', | ||
| { method: 'markdown' } | ||
| ); | ||
| ``` | ||
| ### Important Method Rules: | ||
| 1. **JSON Method Requirements:** | ||
| - A Zod schema MUST be provided as the second parameter | ||
| - Returns typed data matching your schema | ||
| - `method: 'json'` is optional (default behavior) | ||
| 2. **Markdown Method Requirements:** | ||
| - NO schema should be provided | ||
| - `method: 'markdown'` MUST be specified in config object | ||
| - Returns raw markdown string | ||
| - Schema and markdown method cannot be used together | ||
| 3. **Method Parameter Location:** | ||
| - Always specified in the configuration object | ||
| - Never passed as a separate parameter |
Environment variable access
Supply chain riskPackage accesses environment variables, which may be a sign of credential stuffing or data theft.
Found 2 instances in 1 package
Environment variable access
Supply chain riskPackage accesses environment variables, which may be a sign of credential stuffing or data theft.
Found 2 instances in 1 package
No v1
QualityPackage is not semver >=1. This means it is not stable and does not support ^ ranges.
Found 1 instance in 1 package
43514
21.8%554
9.06%0
-100%387
77.52%