@intuned/sdk-dev-types
Advanced tools
Comparing version 1.0.1 to 1.0.2-0.cache-extract-from-content.0
@@ -9,3 +9,3 @@ import { Locator, Page } from "@intuned/playwright-core"; | ||
* @param options.prompt optional, a prompt to guide the extraction process and provide more context. | ||
* @param options.strategy optional, the strategy to use for extraction, currently, only `MARKDOWN` is supported. The file will be converted to markdown and then the data will be extracted from it. | ||
* @param options.strategy optional, the strategy to use for extraction. use `IMAGE` if the info you're trying to extract is visual and cannot be converted to markdown. Defaults to `MARKDOWN` strategy with `gpt4-turbo` model. | ||
* @example | ||
@@ -43,3 +43,3 @@ * ```typescript extractStructuredDataFromFile | ||
export declare function extractStructuredDataFromFile( | ||
file: ImageFile | PdfFile, | ||
file: ImageFile | PdfFile | SpreadsheetFile, | ||
options: { | ||
@@ -49,12 +49,3 @@ label: string; | ||
prompt?: string; | ||
strategy?: { | ||
type: "MARKDOWN"; | ||
model: | ||
| "claude-3-opus" | ||
| "claude-3-sonnet" | ||
| "claude-3-haiku" | ||
| "gpt4-turbo" | ||
| "gpt-4o" | ||
| "gpt3.5-turbo"; | ||
}; | ||
strategy?: MarkdownFileStrategy | ImageFileStrategy; | ||
} | ||
@@ -126,2 +117,17 @@ ): Promise<any>; | ||
/** | ||
* Represents a Spreadsheet file source. For now, only Excel spreadsheets are supported. | ||
* | ||
* @interface | ||
* @property type - The type of the file, which is always "spreadsheet". | ||
* @property sheetName - The name of the sheet to extract data from. | ||
* @property source - The source of the file data. | ||
* @property config - Optional. Configurations on how the spreadsheet should be processed when it is converted to a document. | ||
*/ | ||
export interface SpreadsheetFile { | ||
type: "spreadsheet"; | ||
sheetName: string; | ||
source: FileBufferSource | FileUrlSource | FileBase64Source; | ||
} | ||
/** | ||
* Extracts tables from a file (ImageFile or PdfFile). | ||
@@ -153,3 +159,3 @@ * | ||
export declare function extractTablesFromFile( | ||
file: ImageFile | PdfFile, | ||
file: ImageFile | PdfFile | SpreadsheetFile, | ||
options: { | ||
@@ -199,3 +205,3 @@ label: string; | ||
export declare function extractMarkdownFromFile( | ||
file: ImageFile | PdfFile, | ||
file: ImageFile | PdfFile | SpreadsheetFile, | ||
options: { | ||
@@ -302,7 +308,16 @@ label: string; | ||
model: | ||
| "claude-3-haiku" | ||
| "claude-3-haiku-20240307" | ||
| "claude-3-opus" | ||
| "claude-3-opus-20240229" | ||
| "claude-3-sonnet" | ||
| "claude-3-haiku" | ||
| "claude-3-sonnet-20240229" | ||
| "claude-3.5-sonnet" | ||
| "claude-3-5-sonnet-20240620" | ||
| "gpt4-turbo" | ||
| "gpt-4o"; | ||
| "gpt-4-turbo-2024-04-09" | ||
| "gpt-4o" | ||
| "gpt-4o-2024-05-13" | ||
| "gpt-4o-mini" | ||
| "gpt-4o-mini-2024-07-18"; | ||
type: "IMAGE"; | ||
@@ -321,8 +336,18 @@ } | ||
model: | ||
| "claude-3-haiku" | ||
| "claude-3-haiku-20240307" | ||
| "claude-3-opus" | ||
| "claude-3-opus-20240229" | ||
| "claude-3-sonnet" | ||
| "claude-3-haiku" | ||
| "claude-3-sonnet-20240229" | ||
| "claude-3.5-sonnet" | ||
| "claude-3-5-sonnet-20240620" | ||
| "gpt4-turbo" | ||
| "gpt-4-turbo-2024-04-09" | ||
| "gpt3.5-turbo" | ||
| "gpt-4o"; | ||
| "gpt-3.5-turbo-0125" | ||
| "gpt-4o" | ||
| "gpt-4o-2024-05-13" | ||
| "gpt-4o-mini" | ||
| "gpt-4o-mini-2024-07-18"; | ||
type: "HTML"; | ||
@@ -332,2 +357,56 @@ } | ||
/** | ||
* this strategy will extract markdown content from the file then run data extraction on it. | ||
* | ||
* @interface | ||
* @property model - the model to use in the extraction process | ||
* @property type - the type of the strategy | ||
*/ | ||
export interface MarkdownFileStrategy { | ||
model: | ||
| "claude-3-haiku" | ||
| "claude-3-haiku-20240307" | ||
| "claude-3-opus" | ||
| "claude-3-opus-20240229" | ||
| "claude-3-sonnet" | ||
| "claude-3-sonnet-20240229" | ||
| "claude-3.5-sonnet" | ||
| "claude-3-5-sonnet-20240620" | ||
| "gpt4-turbo" | ||
| "gpt-4-turbo-2024-04-09" | ||
| "gpt3.5-turbo" | ||
| "gpt-3.5-turbo-0125" | ||
| "gpt-4o" | ||
| "gpt-4o-2024-05-13" | ||
| "gpt-4o-mini" | ||
| "gpt-4o-mini-2024-07-18"; | ||
type: "MARKDOWN"; | ||
} | ||
/** | ||
* this strategy will use the image content of the file to extract the needed data. | ||
* should be used when the information you're trying to extract cannot be converted to markdown. For example, a checkbox in a pdf file. | ||
* @interface | ||
* @property model - the model to use in the extraction process. | ||
* @property type - the type of the strategy | ||
*/ | ||
export interface ImageFileStrategy { | ||
model: | ||
| "claude-3-haiku" | ||
| "claude-3-haiku-20240307" | ||
| "claude-3-opus" | ||
| "claude-3-opus-20240229" | ||
| "claude-3-sonnet" | ||
| "claude-3-sonnet-20240229" | ||
| "claude-3.5-sonnet" | ||
| "claude-3-5-sonnet-20240620" | ||
| "gpt4-turbo" | ||
| "gpt-4-turbo-2024-04-09" | ||
| "gpt-4o" | ||
| "gpt-4o-2024-05-13" | ||
| "gpt-4o-mini" | ||
| "gpt-4o-mini-2024-07-18"; | ||
type: "IMAGE"; | ||
} | ||
/** | ||
* Extracts structured data from content items (text or images). | ||
@@ -382,8 +461,18 @@ * | ||
model: | ||
| "claude-3-haiku" | ||
| "claude-3-haiku-20240307" | ||
| "claude-3-opus" | ||
| "claude-3-opus-20240229" | ||
| "claude-3-sonnet" | ||
| "claude-3-haiku" | ||
| "claude-3-sonnet-20240229" | ||
| "claude-3.5-sonnet" | ||
| "claude-3-5-sonnet-20240620" | ||
| "gpt4-turbo" | ||
| "gpt-4-turbo-2024-04-09" | ||
| "gpt3.5-turbo" | ||
| "gpt-4o"; | ||
| "gpt-3.5-turbo-0125" | ||
| "gpt-4o" | ||
| "gpt-4o-2024-05-13" | ||
| "gpt-4o-mini" | ||
| "gpt-4o-mini-2024-07-18"; | ||
} | ||
@@ -390,0 +479,0 @@ ): Promise<any>; |
@@ -74,3 +74,3 @@ import type { Page, Locator } from "@intuned/playwright-core"; | ||
/** | ||
* Gets the content of specified pages in the PDF file. | ||
* Gets the text content of specified pages in the PDF file. Does not support links. | ||
* | ||
@@ -221,2 +221,3 @@ * @param {number[]} [pageNumbers] - Optional. An array of page numbers to get content from. | ||
* | ||
* @throws {Error} Throws an error if the endpoint option is provided in the uploadFileToS3 method options. | ||
* @returns {string} The URL descriptor of the file. | ||
@@ -227,2 +228,9 @@ */ | ||
/** | ||
* Gets file path in the s3 bucket | ||
* | ||
* @returns {string} The file path in the s3 bucket. | ||
*/ | ||
filePath(): string; | ||
/** | ||
* Generates a signed URL for the file. | ||
@@ -335,2 +343,3 @@ * | ||
suggestedFilename: () => string | undefined; | ||
mimeType: () => Promise<string | undefined>; | ||
} | ||
@@ -448,3 +457,7 @@ | ||
file: Download | string | Uint8Array | Buffer | ReadStream, | ||
options: { fileNameOverride?: string; s3Configs?: S3Configs } | ||
options?: { | ||
fileNameOverride?: string; | ||
s3Configs?: S3Configs; | ||
endpoint?: string; | ||
} | ||
): Promise<File>; |
{ | ||
"name": "@intuned/sdk-dev-types", | ||
"version": "1.0.1", | ||
"version": "1.0.20.cache-extract-from-content.0", | ||
"description": "intuned runner types", | ||
@@ -5,0 +5,0 @@ "author": "Intuned Team", |
@@ -82,2 +82,6 @@ import { RunEnvironment } from "./enums"; | ||
runId?: string; | ||
jobId?: string; | ||
jobRunId?: string; | ||
queueId?: string; | ||
proxy?: string; | ||
} | ||
@@ -84,0 +88,0 @@ |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Manifest confusion
Supply chain riskThis package has inconsistent metadata. This could be malicious or caused by an error when publishing the package.
Found 1 instance in 1 package
No v1
QualityPackage is not semver >=1. This means it is not stable and does not support ^ ranges.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
New author
Supply chain riskA new npm collaborator published a version of the package for the first time. New collaborators are usually benign additions to a project, but do indicate a change to the security surface area of a package.
Found 1 instance in 1 package
69261
2099
2