"use strict";
		var __create = Object.create;
		var __defProp = Object.defineProperty;
		var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
		var __getOwnPropNames = Object.getOwnPropertyNames;
		var __getOwnPropSymbols = Object.getOwnPropertySymbols;
		var __getProtoOf = Object.getPrototypeOf;
		var __hasOwnProp = Object.prototype.hasOwnProperty;
		var __propIsEnum = Object.prototype.propertyIsEnumerable;
		var __knownSymbol = (name, symbol) => (symbol = Symbol[name]) ? symbol : Symbol.for("Symbol." + name);
		var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
		var __spreadValues = (a, b) => {
		for (var prop in b \|\| (b = {}))
		if (__hasOwnProp.call(b, prop))
		__defNormalProp(a, prop, b[prop]);
		if (__getOwnPropSymbols)
		for (var prop of __getOwnPropSymbols(b)) {
		if (__propIsEnum.call(b, prop))
		__defNormalProp(a, prop, b[prop]);
		}
		return a;
		};
		var __export = (target, all) => {
		for (var name in all)
		__defProp(target, name, { get: all[name], enumerable: true });
		};
		var __copyProps = (to, from, except, desc) => {
		if (from && typeof from === "object" \|\| typeof from === "function") {
		for (let key of __getOwnPropNames(from))
		if (!__hasOwnProp.call(to, key) && key !== except)
		__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) \|\| desc.enumerable });
		}
		return to;
		};
		var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
		// If the importer is in node compatibility mode or this is not an ESM
		// file that has been converted to a CommonJS file using a Babel-
		// compatible transform (i.e. "__esModule" has not been set), then set
		// "default" to the CommonJS "module.exports" for node compatibility.
		isNodeMode \|\| !mod \|\| !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
		mod
		));
		var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
		var __async = (__this, __arguments, generator) => {
		return new Promise((resolve, reject) => {
		var fulfilled = (value) => {
		try {
		step(generator.next(value));
		} catch (e) {
		reject(e);
		}
		};
		var rejected = (value) => {
		try {
		step(generator.throw(value));
		} catch (e) {
		reject(e);
		}
		};
		var step = (x) => x.done ? resolve(x.value) : Promise.resolve(x.value).then(fulfilled, rejected);
		step((generator = generator.apply(__this, __arguments)).next());
		});
		};
		var __forAwait = (obj, it, method) => (it = obj[__knownSymbol("asyncIterator")]) ? it.call(obj) : (obj = obj[__knownSymbol("iterator")](), it = {}, method = (key, fn) => (fn = obj[key]) && (it[key] = (arg) => new Promise((yes, no, done) => (arg = fn.call(obj, arg), done = arg.done, Promise.resolve(arg.value).then((value) => yes({ value, done }), no)))), method("next"), method("return"), it);

		// src/index.ts
		var index_exports = {};
		__export(index_exports, {
		Anyparser: () => Anyparser,
		OCR_LANGUAGES: () => OCR_LANGUAGES,
		OCR_PRESETS: () => OCR_PRESETS
		});
		module.exports = __toCommonJS(index_exports);

		// src/utils/fetcher.ts
		var WrappedError = class extends Error {
		constructor(message, cause, statusCode) {
		super(message);
		this.name = "WrappedError";
		this.cause = cause;
		this.statusCode = statusCode;
		}
		};
		var wrappedFetch = (input, options) => __async(void 0, null, function* () {
		const response = yield fetch(input, options);
		if (!response.ok) {
		const { status, statusText } = response;
		const text = yield response.text();
		throw new WrappedError(
		`HTTP ${status} ${statusText}: ${input}`,
		new Error(text),
		status
		);
		}
		return response;
		});

		// src/form.ts
		function buildForm(parsed) {
		var _a, _b;
		const formData = new FormData();
		formData.append("format", parsed.format);
		formData.append("model", parsed.model);
		if (parsed.model !== "ocr" && parsed.model !== "crawler") {
		if (parsed.image !== void 0) {
		formData.append("image", String(parsed.image));
		}
		if (parsed.table !== void 0) {
		formData.append("table", String(parsed.table));
		}
		}
		if (parsed.model === "ocr") {
		if ((_a = parsed.ocrLanguage) == null ? void 0 : _a.length) {
		formData.append("ocrLanguage", parsed.ocrLanguage.join(","));
		}
		if (parsed.ocrPreset) {
		formData.append("ocrPreset", parsed.ocrPreset);
		}
		}
		if (parsed.model === "crawler") {
		formData.append("url", (_b = parsed.url) != null ? _b : "");
		if (parsed.maxDepth !== void 0) {
		formData.append("maxDepth", String(parsed.maxDepth));
		}
		if (parsed.maxExecutions !== void 0) {
		formData.append("maxExecutions", String(parsed.maxExecutions));
		}
		if (parsed.strategy) {
		formData.append("strategy", parsed.strategy);
		}
		if (parsed.traversalScope) {
		formData.append("traversalScope", parsed.traversalScope);
		}
		} else {
		if (parsed.files) {
		for (const file of parsed.files) {
		formData.append("files", file.contents, file.fileName);
		}
		}
		}
		return formData;
		}

		// src/validator/index.ts
		var fsapi = __toESM(require("fs"), 1);
		var import_node_path = require("path");
		var fs = __toESM(require("fs/promises"), 1);

		// src/config/hardcoded.ts
		var FALLBACK_API_URL = "https://anyparserapi.com";
		var OCR_PRESETS = Object.freeze({
		DOCUMENT: "document",
		HANDWRITING: "handwriting",
		SCAN: "scan",
		RECEIPT: "receipt",
		MAGAZINE: "magazine",
		INVOICE: "invoice",
		BUSINESS_CARD: "business-card",
		PASSPORT: "passport",
		DRIVER_LICENSE: "driver-license"
		});
		var OCR_LANGUAGES = Object.freeze({
		AFRIKAANS: "afr",
		AMHARIC: "amh",
		ARABIC: "ara",
		ASSAMESE: "asm",
		AZERBAIJANI: "aze",
		AZERBAIJANI_CYRILLIC: "aze_cyrl",
		BELARUSIAN: "bel",
		BENGALI: "ben",
		TIBETAN: "bod",
		BOSNIAN: "bos",
		BRETON: "bre",
		BULGARIAN: "bul",
		CATALAN: "cat",
		CEBUANO: "ceb",
		CZECH: "ces",
		SIMPLIFIED_CHINESE: "chi_sim",
		SIMPLIFIED_CHINESE_VERTICAL: "chi_sim_vert",
		TRADITIONAL_CHINESE: "chi_tra",
		TRADITIONAL_CHINESE_VERTICAL: "chi_tra_vert",
		CHEROKEE: "chr",
		CORSICAN: "cos",
		WELSH: "cym",
		DANISH: "dan",
		DANISH_FRAKTUR: "dan_frak",
		GERMAN: "deu",
		GERMAN_FRAKTUR: "deu_frak",
		GERMAN_LATIN: "deu_latf",
		DIVESH: "div",
		DZONGKHA: "dzo",
		GREEK: "ell",
		ENGLISH: "eng",
		MIDDLE_ENGLISH: "enm",
		ESPERANTO: "epo",
		EQUATORIAL_GUINEAN: "equ",
		ESTONIAN: "est",
		BASQUE: "eus",
		FAROESE: "fao",
		PERSIAN: "fas",
		FILIPINO: "fil",
		FINNISH: "fin",
		FRENCH: "fra",
		OLD_FRENCH: "frm",
		FRISIAN: "fry",
		SCOTTISH_GAELIC: "gla",
		IRISH: "gle",
		GALICIAN: "glg",
		ANCIENT_GREEK: "grc",
		GUJARATI: "guj",
		HAITIAN_CREOLE: "hat",
		HEBREW: "heb",
		HINDI: "hin",
		CROATIAN: "hrv",
		HUNGARIAN: "hun",
		ARMENIAN: "hye",
		IGBO: "iku",
		INDONESIAN: "ind",
		ICELANDIC: "isl",
		ITALIAN: "ita",
		OLD_ITALIAN: "ita_old",
		JAVANESE: "jav",
		JAPANESE: "jpn",
		JAPANESE_VERTICAL: "jpn_vert",
		KANNADA: "kan",
		GEORGIAN: "kat",
		OLD_GEORGIAN: "kat_old",
		KAZAKH: "kaz",
		KHMER: "khm",
		KIRGHIZ: "kir",
		KURDISH: "kmr",
		KOREAN: "kor",
		KOREAN_VERTICAL: "kor_vert",
		LAO: "lao",
		LATIN: "lat",
		LATVIAN: "lav",
		LITHUANIAN: "lit",
		LUXEMBOURGISH: "ltz",
		MALAYALAM: "mal",
		MARATHI: "mar",
		MACEDONIAN: "mkd",
		MALTESE: "mlt",
		MONGOLIAN: "mon",
		MAORI: "mri",
		MALAY: "msa",
		MYANMAR: "mya",
		NEPALI: "nep",
		DUTCH: "nld",
		NORWEGIAN: "nor",
		OCCITAN: "oci",
		ODISHA: "ori",
		OSD: "osd",
		PUNJABI: "pan",
		POLISH: "pol",
		PORTUGUESE: "por",
		PASHTO: "pus",
		QUECHUA: "que",
		ROMANIAN: "ron",
		RUSSIAN: "rus",
		SANSKRIT: "san",
		SINHALA: "sin",
		SLOVAK: "slk",
		SLOVAK_FRAKTUR: "slk_frak",
		SLOVENIAN: "slv",
		SINDHI: "snd",
		SPANISH: "spa",
		OLD_SPANISH: "spa_old",
		ALBANIAN: "sqi",
		SERBIAN: "srp",
		SERBIAN_LATIN: "srp_latn",
		SUNDIANESE: "sun",
		SWAHILI: "swa",
		SWEDISH: "swe",
		SYRIAC: "syr",
		TAMIL: "tam",
		TATAR: "tat",
		TELUGU: "tel",
		TAJIK: "tgk",
		TAGALOG: "tgl",
		THAI: "tha",
		TIGRINYA: "tir",
		TONGAN: "ton",
		TURKISH: "tur",
		UIGHUR: "uig",
		UKRAINIAN: "ukr",
		URDU: "urd",
		UZBEK: "uzb",
		UZBEK_CYRILLIC: "uzb_cyrl",
		VIETNAMESE: "vie",
		YIDDISH: "yid",
		YORUBA: "yor"
		});

		// src/utils/nullable.ts
		var isNullOrUndefined = (suspect) => {
		if (typeof suspect === "undefined" \|\| suspect === null) {
		return true;
		}
		if (typeof suspect === "string") {
		return suspect.trim() === "";
		}
		return false;
		};
		var isValidObject = (suspect) => {
		return typeof suspect === "object" && suspect !== void 0 && suspect !== null;
		};

		// src/utils/env.ts
		var env = (key, fallback = "") => {
		const value = process.env[key];
		if (!isNullOrUndefined(value)) {
		return value;
		}
		if (!isNullOrUndefined(fallback)) {
		return fallback;
		}
		return "";
		};

		// src/options.default.ts
		var getApiUrl = () => {
		const value = env("ANYPARSER_API_URL", FALLBACK_API_URL);
		try {
		return new URL(value);
		} catch (e) {
		console.error("Invalid API URL %s", value);
		}
		console.debug("Defaulting to %s", FALLBACK_API_URL);
		return new URL(FALLBACK_API_URL);
		};
		var defaultOptions = {
		apiUrl: getApiUrl(),
		apiKey: env("ANYPARSER_API_KEY"),
		format: "json",
		model: "text",
		image: true,
		table: true
		};

		// src/options.ts
		function validateApiKey(apiKey) {
		if (!apiKey) {
		throw new Error("API key is required");
		}
		if (typeof apiKey !== "string" \|\| apiKey.trim().length === 0) {
		throw new Error("API key must be a non-empty string");
		}
		}
		function buildOptions(options) {
		const mergedOptions = __spreadValues(__spreadValues({}, defaultOptions), options);
		validateApiKey(mergedOptions.apiKey);
		if (!mergedOptions.apiUrl) {
		throw new Error("API URL is required");
		}
		const parsedOptions = {
		apiUrl: mergedOptions.apiUrl,
		apiKey: mergedOptions.apiKey,
		format: mergedOptions.format \|\| "json",
		model: mergedOptions.model \|\| "text",
		encoding: mergedOptions.encoding \|\| "utf-8",
		image: mergedOptions.image,
		table: mergedOptions.table,
		ocrLanguage: mergedOptions.ocrLanguage,
		ocrPreset: mergedOptions.ocrPreset,
		url: mergedOptions.url,
		maxDepth: mergedOptions.maxDepth,
		maxExecutions: mergedOptions.maxExecutions,
		strategy: mergedOptions.strategy,
		traversalScope: mergedOptions.traversalScope
		};
		return parsedOptions;
		}

		// src/validator/option.ts
		var validateOption = (parsed) => {
		if (isNullOrUndefined(parsed.apiUrl)) {
		throw new Error("API URL is required");
		}
		if (!isNullOrUndefined(parsed.ocrLanguage)) {
		parsed.ocrLanguage.forEach((language) => {
		if (!Object.values(OCR_LANGUAGES).includes(language)) {
		throw new Error("Invalid OCR language");
		}
		});
		}
		if (!isNullOrUndefined(parsed.ocrPreset)) {
		if (!Object.values(OCR_PRESETS).includes(parsed.ocrPreset)) {
		throw new Error("Invalid OCR preset");
		}
		}
		};

		// src/validator/path.ts
		var import_promises = require("fs/promises");
		var validatePath = (filePaths) => __async(void 0, null, function* () {
		if (!filePaths) {
		return {
		valid: false,
		error: new Error("No files provided")
		};
		}
		const files = Array.isArray(filePaths) ? filePaths : [filePaths];
		if (files.length === 0) {
		return {
		valid: false,
		error: new Error("No files provided")
		};
		}
		for (const filePath of files) {
		try {
		yield (0, import_promises.access)(filePath);
		} catch (error) {
		return {
		valid: false,
		error
		};
		}
		}
		return {
		valid: true,
		files
		};
		});

		// src/validator/crawler.ts
		var getURLToCrawl = (filePaths) => {
		if (Array.isArray(filePaths)) {
		const filePath = filePaths.find((x) => !isNullOrUndefined(x));
		if (!isNullOrUndefined(filePath)) {
		return new URL(filePath).toString();
		}
		}
		return new URL(filePaths).toString();
		};

		// src/validator/index.ts
		function checkFileAccess(filePath) {
		return __async(this, null, function* () {
		try {
		yield fs.access(filePath);
		} catch (error) {
		if (error instanceof Error && "code" in error && error.code === "ENOENT") {
		throw new Error(`File ${filePath} was not found or was removed`);
		}
		throw error;
		}
		try {
		const fileHandle = yield fs.open(filePath, "r");
		yield fileHandle.close();
		} catch (error) {
		if (error instanceof Error && "code" in error && (error.code === "EBUSY" \|\| error.code === "ELOCK")) {
		throw new Error(`File ${filePath} is locked by another process`);
		}
		throw error;
		}
		});
		}
		function validateAndParse(filePaths, options) {
		return __async(this, null, function* () {
		const parsed = buildOptions(options);
		validateOption(parsed);
		if (!["json", "markdown", "html"].includes(parsed.format)) {
		throw new Error(`Unsupported format: ${parsed.format}`);
		}
		const isCrawler = (options == null ? void 0 : options.model) === "crawler";
		const result = isCrawler ? { valid: true, files: [getURLToCrawl(filePaths)] } : yield validatePath(filePaths);
		if (result.valid === false) {
		throw result.error;
		}
		const parsedOption = {
		apiUrl: parsed.apiUrl,
		apiKey: parsed.apiKey,
		format: parsed.format,
		model: parsed.model,
		image: parsed.image,
		table: parsed.table,
		ocrLanguage: parsed.ocrLanguage,
		ocrPreset: parsed.ocrPreset,
		url: parsed.url,
		maxDepth: parsed.maxDepth,
		maxExecutions: parsed.maxExecutions,
		strategy: parsed.strategy,
		traversalScope: parsed.traversalScope,
		encoding: parsed.encoding
		};
		if (isCrawler) {
		parsedOption.url = result.files[0];
		} else {
		const processed = [];
		for (const filePath of result.files) {
		yield checkFileAccess(filePath);
		const fileStream = fsapi.createReadStream(filePath);
		const chunks = [];
		try {
		for (var iter = __forAwait(fileStream), more, temp, error; more = !(temp = yield iter.next()).done; more = false) {
		const chunk = temp.value;
		chunks.push(chunk);
		}
		} catch (temp) {
		error = [temp];
		} finally {
		try {
		more && (temp = iter.return) && (yield temp.call(iter));
		} finally {
		if (error)
		throw error[0];
		}
		}
		const buffer = Buffer.concat(chunks);
		const contents = new File([buffer], (0, import_node_path.basename)(filePath), {
		type: "application/octet-stream"
		});
		processed.push({
		fileName: (0, import_node_path.basename)(filePath),
		contents
		});
		}
		parsedOption.files = processed;
		}
		return parsedOption;
		});
		}

		// src/utils/casing.ts
		var underscoreToCamel = (x) => x.replace(/_+(.)/g, (_, c) => c.toUpperCase());

		// src/utils/camel-case.ts
		var transformToCamel = (item) => {
		if (item instanceof Date \|\| item instanceof RegExp \|\| item instanceof URL) {
		return item;
		}
		if (typeof item === "function") {
		return item;
		}
		if (item === null \|\| item === void 0) {
		return item;
		}
		if (Array.isArray(item)) {
		return item.map((el) => transformToCamel(el));
		}
		if (item instanceof Map) {
		const transformedMap = /* @__PURE__ */ new Map();
		item.forEach((value, key) => {
		transformedMap.set(transformToCamel(key), transformToCamel(value));
		});
		return transformedMap;
		}
		if (item instanceof Set) {
		const transformedSet = /* @__PURE__ */ new Set();
		item.forEach((value) => {
		transformedSet.add(transformToCamel(value));
		});
		return transformedSet;
		}
		if (isValidObject(item)) {
		return Object.keys(item).reduce((acc, key) => {
		const camelKey = underscoreToCamel(key);
		acc[camelKey] = transformToCamel(item[key]);
		return acc;
		}, {});
		}
		return item;
		};

		// src/parser.ts
		var Anyparser = class {
		/**
		* Initialize the parser with optional configuration.
		* @param options - Configuration options for the parser
		*/
		constructor(options) {
		this.options = options;
		}
		/**
		* Parse files using the Anyparser API.
		* @param filePathsOrUrl - A single file path or list of file paths to parse, or a start URL for crawling
		* @returns List of parsed file results if format is JSON, or raw text content if format is text/markdown
		* @throws Error if the API request fails
		*/
		parse(filePathsOrUrl) {
		return __async(this, null, function* () {
		const parsed = yield validateAndParse(filePathsOrUrl, this.options);
		const { apiUrl, apiKey } = parsed;
		const formData = buildForm(parsed);
		const fetchOptions = {
		method: "POST",
		body: formData,
		headers: __spreadValues({}, apiKey ? { Authorization: `Bearer ${apiKey}` } : {})
		};
		const url = new URL("/parse/v1", apiUrl);
		const response = yield wrappedFetch(url, fetchOptions);
		switch (parsed.format) {
		case "json":
		return transformToCamel(yield response.json());
		case "markdown":
		case "html":
		return yield response.text();
		default:
		throw new Error(`Unsupported format: ${parsed.format}`);
		}
		});
		}
		};
		//# sourceMappingURL=index.cjs.map

+1

dist/index.cjs.map

{"version":3,"sources":["../src/index.ts","../src/utils/fetcher.ts","../src/form.ts","../src/validator/index.ts","../src/config/hardcoded.ts","../src/utils/nullable.ts","../src/utils/env.ts","../src/options.default.ts","../src/options.ts","../src/validator/option.ts","../src/validator/path.ts","../src/validator/crawler.ts","../src/utils/casing.ts","../src/utils/camel-case.ts","../src/parser.ts"],"sourcesContent":["/**\n * Anyparser main entry point\n * @module anyparser\n */\n\nexport * from './parser.ts'\n\nexport { OCR_LANGUAGES, OCR_PRESETS } from './config/hardcoded.ts'\n\nexport type { OcrLanguageType, OcrPresetType, AnyparserOption, AnyparserImageReference, AnyparserResultBase, AnyparserCrawlDirectiveBase, AnyparserCrawlDirective, AnyparserUrl, AnyparserRobotsTxtDirective, AnyparserPdfPage, AnyparserPdfResult, AnyparserCrawlResult, AnyparserResult, Result } from './anyparser.d.ts'\n","export class WrappedError extends Error {\n override cause: Error\n statusCode: number\n\n constructor (message: string, cause: Error, statusCode: number) {\n super(message)\n this.name = 'WrappedError'\n this.cause = cause\n this.statusCode = statusCode\n }\n}\n\nexport const wrappedFetch = async (input: string | URL | Request, options?: RequestInit) => {\n const response = await fetch(input, options)\n\n if (!response.ok) {\n const { status, statusText } = response\n const text = await response.text()\n\n throw new WrappedError(\n `HTTP ${status} ${statusText}: ${input}`,\n new Error(text),\n status\n )\n }\n\n return response\n}\n","/* eslint-disable complexity */\n\n/**\n * Form data builder module for creating multipart form data for API requests.\n * @module form\n */\n\nimport type { AnyparserParsedOption } from './anyparser.d.ts'\n\n/**\n * Builds multipart form data from parsed options.\n * @param parsed - Validated parser options\n * @returns Form data for API request\n */\nexport function buildForm (parsed: AnyparserParsedOption): FormData {\n const formData = new FormData()\n\n // Add regular form fields\n formData.append('format', parsed.format)\n formData.append('model', parsed.model)\n\n // Only add image and table fields if not using OCR model or crawler model\n if (parsed.model !== 'ocr' && parsed.model !== 'crawler') {\n if (parsed.image !== undefined) {\n formData.append('image', String(parsed.image))\n }\n\n if (parsed.table !== undefined) {\n formData.append('table', String(parsed.table))\n }\n }\n\n // Add OCR-specific fields\n if (parsed.model === 'ocr') {\n if (parsed.ocrLanguage?.length) {\n formData.append('ocrLanguage', parsed.ocrLanguage.join(','))\n }\n\n if (parsed.ocrPreset) {\n formData.append('ocrPreset', parsed.ocrPreset)\n }\n }\n\n // Add crawler-specific fields\n if (parsed.model === 'crawler') {\n formData.append('url', parsed.url ?? '')\n\n if (parsed.maxDepth !== undefined) {\n formData.append('maxDepth', String(parsed.maxDepth))\n }\n\n if (parsed.maxExecutions !== undefined) {\n formData.append('maxExecutions', String(parsed.maxExecutions))\n }\n\n if (parsed.strategy) {\n formData.append('strategy', parsed.strategy)\n }\n\n if (parsed.traversalScope) {\n formData.append('traversalScope', parsed.traversalScope)\n }\n } else {\n // Add files to the form for non-crawler models\n if (parsed.files) {\n for (const file of parsed.files) {\n formData.append('files', file.contents, file.fileName)\n }\n }\n }\n\n return formData\n}\n","/**\n * Option validation and parsing module\n * @module validator\n */\nimport * as fsapi from 'node:fs'\nimport { basename } from 'node:path'\nimport * as fs from 'node:fs/promises'\nimport type { AnyparserOption, AnyparserParsedOption } from '../anyparser.d.ts'\nimport { buildOptions } from '../options.ts'\nimport { validateOption } from './option.ts'\nimport { validatePath } from './path.ts'\nimport type { ValidPathValidationResult } from './validation.d.ts'\nimport { getURLToCrawl } from './crawler.ts'\n\n/**\n * Check if a file is accessible and not locked\n * @param filePath - Path to the file to check\n * @returns Promise that resolves when file is accessible, rejects if file is locked or not found\n */\nasync function checkFileAccess (filePath: string): Promise<void> {\n try {\n await fs.access(filePath)\n } catch (error) {\n if (error instanceof Error && 'code' in error && error.code === 'ENOENT') {\n throw new Error(`File ${filePath} was not found or was removed`)\n }\n\n throw error\n }\n\n try {\n // Try to open file for reading to check if it's locked\n const fileHandle = await fs.open(filePath, 'r')\n await fileHandle.close()\n } catch (error) {\n if (error instanceof Error && 'code' in error && (error.code === 'EBUSY' || error.code === 'ELOCK')) {\n throw new Error(`File ${filePath} is locked by another process`)\n }\n\n throw error\n }\n}\n\n/**\n * Validates options and processes input files\n * @param filePaths - Files to process\n * @param options - Parser options\n * @returns Processed options and files\n */\nexport async function validateAndParse (\n filePaths: string | string[],\n options?: AnyparserOption\n): Promise<AnyparserParsedOption> {\n const parsed = buildOptions(options)\n validateOption(parsed)\n\n if (!['json', 'markdown', 'html'].includes(parsed.format)) {\n throw new Error(`Unsupported format: ${parsed.format}`)\n }\n\n // Determine if we're in crawler mode\n const isCrawler = options?.model === 'crawler'\n\n // Validate URL for crawler mode, otherwise validate file paths\n const result = isCrawler ?\n { valid: true, files: [getURLToCrawl(filePaths)] } as ValidPathValidationResult :\n await validatePath(filePaths)\n\n if (result.valid === false) {\n throw result.error\n }\n\n const parsedOption: AnyparserParsedOption = {\n apiUrl: parsed.apiUrl,\n apiKey: parsed.apiKey,\n format: parsed.format,\n model: parsed.model,\n image: parsed.image,\n table: parsed.table,\n ocrLanguage: parsed.ocrLanguage,\n ocrPreset: parsed.ocrPreset,\n url: parsed.url,\n maxDepth: parsed.maxDepth,\n maxExecutions: parsed.maxExecutions,\n strategy: parsed.strategy,\n traversalScope: parsed.traversalScope,\n encoding: parsed.encoding\n }\n\n // Handle crawler mode\n if (isCrawler) {\n parsedOption.url = result.files[0]\n } else {\n // Process files for non-crawler mode\n const processed = []\n\n for (const filePath of result.files) {\n await checkFileAccess(filePath)\n\n const fileStream = fsapi.createReadStream(filePath)\n const chunks = []\n\n for await (const chunk of fileStream) {\n chunks.push(chunk)\n }\n\n const buffer = Buffer.concat(chunks)\n const contents = new File([buffer], basename(filePath), {\n type: 'application/octet-stream'\n })\n\n processed.push({\n fileName: basename(filePath),\n contents\n })\n }\n\n parsedOption.files = processed\n }\n\n return parsedOption\n}\n","/**\n * Hardcoded configuration constants\n * @module config/hardcoded\n */\n\nexport const FALLBACK_API_URL = 'https://anyparserapi.com'\n\nexport const OCR_PRESETS = Object.freeze({\n DOCUMENT: 'document',\n HANDWRITING: 'handwriting',\n SCAN: 'scan',\n RECEIPT: 'receipt',\n MAGAZINE: 'magazine',\n INVOICE: 'invoice',\n BUSINESS_CARD: 'business-card',\n PASSPORT: 'passport',\n DRIVER_LICENSE: 'driver-license'\n} as const)\n\nexport const OCR_LANGUAGES = Object.freeze({\n AFRIKAANS: 'afr',\n AMHARIC: 'amh',\n ARABIC: 'ara',\n ASSAMESE: 'asm',\n AZERBAIJANI: 'aze',\n AZERBAIJANI_CYRILLIC: 'aze_cyrl',\n BELARUSIAN: 'bel',\n BENGALI: 'ben',\n TIBETAN: 'bod',\n BOSNIAN: 'bos',\n BRETON: 'bre',\n BULGARIAN: 'bul',\n CATALAN: 'cat',\n CEBUANO: 'ceb',\n CZECH: 'ces',\n SIMPLIFIED_CHINESE: 'chi_sim',\n SIMPLIFIED_CHINESE_VERTICAL: 'chi_sim_vert',\n TRADITIONAL_CHINESE: 'chi_tra',\n TRADITIONAL_CHINESE_VERTICAL: 'chi_tra_vert',\n CHEROKEE: 'chr',\n CORSICAN: 'cos',\n WELSH: 'cym',\n DANISH: 'dan',\n DANISH_FRAKTUR: 'dan_frak',\n GERMAN: 'deu',\n GERMAN_FRAKTUR: 'deu_frak',\n GERMAN_LATIN: 'deu_latf',\n DIVESH: 'div',\n DZONGKHA: 'dzo',\n GREEK: 'ell',\n ENGLISH: 'eng',\n MIDDLE_ENGLISH: 'enm',\n ESPERANTO: 'epo',\n EQUATORIAL_GUINEAN: 'equ',\n ESTONIAN: 'est',\n BASQUE: 'eus',\n FAROESE: 'fao',\n PERSIAN: 'fas',\n FILIPINO: 'fil',\n FINNISH: 'fin',\n FRENCH: 'fra',\n OLD_FRENCH: 'frm',\n FRISIAN: 'fry',\n SCOTTISH_GAELIC: 'gla',\n IRISH: 'gle',\n GALICIAN: 'glg',\n ANCIENT_GREEK: 'grc',\n GUJARATI: 'guj',\n HAITIAN_CREOLE: 'hat',\n HEBREW: 'heb',\n HINDI: 'hin',\n CROATIAN: 'hrv',\n HUNGARIAN: 'hun',\n ARMENIAN: 'hye',\n IGBO: 'iku',\n INDONESIAN: 'ind',\n ICELANDIC: 'isl',\n ITALIAN: 'ita',\n OLD_ITALIAN: 'ita_old',\n JAVANESE: 'jav',\n JAPANESE: 'jpn',\n JAPANESE_VERTICAL: 'jpn_vert',\n KANNADA: 'kan',\n GEORGIAN: 'kat',\n OLD_GEORGIAN: 'kat_old',\n KAZAKH: 'kaz',\n KHMER: 'khm',\n KIRGHIZ: 'kir',\n KURDISH: 'kmr',\n KOREAN: 'kor',\n KOREAN_VERTICAL: 'kor_vert',\n LAO: 'lao',\n LATIN: 'lat',\n LATVIAN: 'lav',\n LITHUANIAN: 'lit',\n LUXEMBOURGISH: 'ltz',\n MALAYALAM: 'mal',\n MARATHI: 'mar',\n MACEDONIAN: 'mkd',\n MALTESE: 'mlt',\n MONGOLIAN: 'mon',\n MAORI: 'mri',\n MALAY: 'msa',\n MYANMAR: 'mya',\n NEPALI: 'nep',\n DUTCH: 'nld',\n NORWEGIAN: 'nor',\n OCCITAN: 'oci',\n ODISHA: 'ori',\n OSD: 'osd',\n PUNJABI: 'pan',\n POLISH: 'pol',\n PORTUGUESE: 'por',\n PASHTO: 'pus',\n QUECHUA: 'que',\n ROMANIAN: 'ron',\n RUSSIAN: 'rus',\n SANSKRIT: 'san',\n SINHALA: 'sin',\n SLOVAK: 'slk',\n SLOVAK_FRAKTUR: 'slk_frak',\n SLOVENIAN: 'slv',\n SINDHI: 'snd',\n SPANISH: 'spa',\n OLD_SPANISH: 'spa_old',\n ALBANIAN: 'sqi',\n SERBIAN: 'srp',\n SERBIAN_LATIN: 'srp_latn',\n SUNDIANESE: 'sun',\n SWAHILI: 'swa',\n SWEDISH: 'swe',\n SYRIAC: 'syr',\n TAMIL: 'tam',\n TATAR: 'tat',\n TELUGU: 'tel',\n TAJIK: 'tgk',\n TAGALOG: 'tgl',\n THAI: 'tha',\n TIGRINYA: 'tir',\n TONGAN: 'ton',\n TURKISH: 'tur',\n UIGHUR: 'uig',\n UKRAINIAN: 'ukr',\n URDU: 'urd',\n UZBEK: 'uzb',\n UZBEK_CYRILLIC: 'uzb_cyrl',\n VIETNAMESE: 'vie',\n YIDDISH: 'yid',\n YORUBA: 'yor'\n} as const)\n\nexport type OcrPresetType = (typeof OCR_PRESETS)[keyof typeof OCR_PRESETS]\nexport type OcrLanguageType = (typeof OCR_LANGUAGES)[keyof typeof OCR_LANGUAGES]\n","export type Nullable<T> = T | null\nexport type Optional<T> = T | undefined\nexport type Nullish<T> = Nullable<T> | Optional<T>\n\nconst isNullOrUndefined = <T>(suspect: Nullish<T>): suspect is null | undefined => {\n if (typeof suspect === 'undefined' || suspect === null) {\n return true\n }\n\n if (typeof suspect === 'string') {\n return suspect.trim() === ''\n }\n\n return false\n}\n\nconst isValidObject = (suspect: unknown): suspect is object => {\n return typeof suspect === 'object' && suspect !== undefined && suspect !== null\n}\n\n// eslint-disable-next-line @typescript-eslint/no-empty-object-type\nconst isEmptyObject = (suspect: unknown): suspect is {} => {\n return isValidObject(suspect) && Object.keys(suspect).length === 0\n}\n\nconst isInvalidOrEmptyArray = <T>(suspect: Nullish<Array<T>>): suspect is null | undefined | [] => {\n if (isNullOrUndefined(suspect)) {\n return true\n }\n\n if (!Array.isArray(suspect)) {\n throw new Error('isInvalidOrEmptyArray expects an array')\n }\n\n return suspect.length === 0\n}\n\nconst isValidArrayWithMembers = <T>(suspect: Nullish<Array<T>>): suspect is Array<T> => !isInvalidOrEmptyArray(suspect)\n\nconst or = <T>(value: Nullish<T>, or: T): T => {\n if (isNullOrUndefined(value)) {\n return or\n }\n\n return value\n}\n\nexport {\n isInvalidOrEmptyArray,\n isNullOrUndefined,\n isValidArrayWithMembers,\n isValidObject,\n isEmptyObject,\n or\n}\n","import { isNullOrUndefined } from './nullable.ts'\n\nconst env = (key: string, fallback = '') => {\n const value = process.env[key]\n\n if (!isNullOrUndefined(value)) {\n return value\n }\n\n if (!isNullOrUndefined(fallback)) {\n return fallback\n }\n\n return ''\n}\n\nconst envOr = (key: string, fallback = '') => {\n const value = process.env[key] as string\n\n if (isNullOrUndefined(value)) {\n return fallback\n }\n\n return ''\n}\n\nexport { env, envOr }\n","/**\n * Configuration module for Anyparser default options\n * @module options.default\n */\n\nimport { FALLBACK_API_URL } from '@/config/hardcoded.ts'\nimport { env } from '@src/utils/env.ts'\nimport type { AnyparserOption } from '@anyparser/core'\n\n/**\n * Retrieves and validates the API URL from environment or fallback\n * @returns {URL} Valid API URL instance\n */\nexport const getApiUrl = (): URL => {\n const value = env('ANYPARSER_API_URL', FALLBACK_API_URL)\n\n try {\n return new URL(value)\n } catch {\n console.error('Invalid API URL %s', value)\n }\n\n console.debug('Defaulting to %s', FALLBACK_API_URL)\n\n return new URL(FALLBACK_API_URL)\n}\n\n/**\n * Default configuration options for Anyparser\n * @type {Option}\n */\nexport const defaultOptions: AnyparserOption = {\n apiUrl: getApiUrl(),\n apiKey: env('ANYPARSER_API_KEY'),\n format: 'json',\n model: 'text',\n image: true,\n table: true\n}\n","/**\n * Options module for Anyparser configuration and parsing.\n * @module options\n */\n\nimport type { AnyparserOption, AnyparserParsedOption } from './anyparser.d.ts'\nimport { defaultOptions } from './options.default.ts'\n\n/**\n * Validate API key format and presence\n * @param apiKey - API key to validate\n * @throws {Error} If API key is invalid or missing\n */\nexport function validateApiKey (apiKey: string | undefined): void {\n if (!apiKey) {\n throw new Error('API key is required')\n }\n\n if (typeof apiKey !== 'string' || apiKey.trim().length === 0) {\n throw new Error('API key must be a non-empty string')\n }\n}\n\n/**\n * Build final options by merging defaults with provided options.\n * @param options - User-provided options to override defaults\n * @returns Complete options with all required fields\n * @throws {Error} If required options are missing or invalid\n */\nexport function buildOptions (options?: AnyparserOption): AnyparserParsedOption {\n const mergedOptions = { ...defaultOptions, ...options }\n\n validateApiKey(mergedOptions.apiKey)\n\n if (!mergedOptions.apiUrl) {\n throw new Error('API URL is required')\n }\n\n const parsedOptions: AnyparserParsedOption = {\n apiUrl: mergedOptions.apiUrl,\n apiKey: mergedOptions.apiKey as string,\n format: mergedOptions.format || 'json',\n model: mergedOptions.model || 'text',\n encoding: mergedOptions.encoding || 'utf-8',\n image: mergedOptions.image,\n table: mergedOptions.table,\n ocrLanguage: mergedOptions.ocrLanguage,\n ocrPreset: mergedOptions.ocrPreset,\n url: mergedOptions.url,\n maxDepth: mergedOptions.maxDepth,\n maxExecutions: mergedOptions.maxExecutions,\n strategy: mergedOptions.strategy,\n traversalScope: mergedOptions.traversalScope\n }\n\n return parsedOptions\n}\n","/**\n * Validation module for options\n * @module validation\n */\n\nimport { isNullOrUndefined } from '@src/utils/nullable.ts'\nimport type { AnyparserOption } from '@anyparser/core'\nimport { OCR_LANGUAGES, OCR_PRESETS } from '../config/hardcoded.ts'\n\n/**\n * Validates parsing options configuration\n * @param {Option} parsed - Options to validate\n * @throws {Error} If validation fails\n */\nexport const validateOption = (parsed: AnyparserOption) => {\n if (isNullOrUndefined(parsed.apiUrl)) {\n throw new Error('API URL is required')\n }\n\n if (!isNullOrUndefined(parsed.ocrLanguage)) {\n parsed.ocrLanguage.forEach((language: string) => {\n if (!Object.values(OCR_LANGUAGES as Record<string, string>).includes(language)) {\n throw new Error('Invalid OCR language')\n }\n })\n }\n\n if (!isNullOrUndefined(parsed.ocrPreset)) {\n if (!Object.values(OCR_PRESETS as Record<string, string>).includes(parsed.ocrPreset)) {\n throw new Error('Invalid OCR preset')\n }\n }\n}\n","/**\n * Validation module for file paths\n * @module validation\n */\n\nimport { access } from 'node:fs/promises'\nimport type { PathValidationResult } from './validation.d.ts'\n\n/**\n * Validates file paths exist and are accessible\n * @param {string|string[]} filePaths - Single path or array of paths\n * @returns {Promise<PathValidationResult>} Validation result\n */\nexport const validatePath = async (filePaths: string | string[]): Promise<PathValidationResult> => {\n // Add input validation\n if (!filePaths) {\n return {\n valid: false,\n error: new Error('No files provided')\n }\n }\n\n const files = Array.isArray(filePaths) ? filePaths : [filePaths]\n\n if (files.length === 0) {\n return {\n valid: false,\n error: new Error('No files provided')\n }\n }\n\n for (const filePath of files) {\n try {\n await access(filePath)\n } catch (error) {\n return {\n valid: false,\n error: error as Error\n }\n }\n }\n\n return {\n valid: true,\n files\n }\n}\n","import { isNullOrUndefined } from '@src/utils/nullable.ts'\n\nexport const getURLToCrawl = (filePaths: string | string[]) => {\n if (Array.isArray(filePaths)) {\n const filePath = filePaths.find(x => !isNullOrUndefined(x))\n\n if (!isNullOrUndefined(filePath)) {\n return new URL(filePath).toString()\n }\n }\n\n return new URL(filePaths).toString()\n}\n","export const toUnderscore = (x = '') => x.trim().split(/\\.?(?=[A-Z])/).join('_').toLowerCase()\n\nexport const hyphenToCamel = (x: string) => x.replace(/-+(.)/g, (_, c) => c.toUpperCase())\n\nexport const underscoreToCamel = (x: string) => x.replace(/_+(.)/g, (_, c) => c.toUpperCase())\n\nexport const camelToTitle = (c: string) => c\n// Insert space before capital letters, but not if they're part of an acronym\n .replace(/([^A-Z])([A-Z])/g, '$1$2')\n// Capitalize first letter\n .replace(/^./, match => match.toUpperCase())\n// Remove any extra spaces\n .replace(/\\s+/g, '')\n","import { underscoreToCamel } from './casing.ts'\nimport { isValidObject } from './nullable.ts'\n\nexport const transformToCamel = <T>(item: unknown): T => {\n if (item instanceof Date || item instanceof RegExp || item instanceof URL) {\n return item as T\n }\n\n // If the item is a Function, return it as-is\n if (typeof item === 'function') {\n return item as T\n }\n\n // If the item is null or undefined, return it as-is\n if (item === null || item === undefined) {\n return item as T\n }\n\n // If the item is an array, recursively transform each element\n if (Array.isArray(item)) {\n return item.map(el => transformToCamel(el)) as unknown as T\n }\n\n // If the item is a Map or Set, do not modify its structure\n if (item instanceof Map) {\n const transformedMap = new Map()\n item.forEach((value, key) => {\n transformedMap.set(transformToCamel(key), transformToCamel(value))\n })\n\n return transformedMap as unknown as T\n }\n\n if (item instanceof Set) {\n const transformedSet = new Set()\n item.forEach((value) => {\n transformedSet.add(transformToCamel(value))\n })\n\n return transformedSet as unknown as T\n }\n\n // If the item is a plain object, recursively transform its keys to camelCase\n if (isValidObject(item)) {\n return Object.keys(item).reduce((acc: any, key: string) => { // eslint-disable-line @typescript-eslint/no-explicit-any\n const camelKey = underscoreToCamel(key)\n acc[camelKey] = transformToCamel((item as Record<string, unknown>)[key])\n\n return acc\n }, {} as T)\n }\n\n // If it's a primitive type (string, number, boolean), return it as-is\n return item as T\n}\n","import { wrappedFetch } from '@src/utils/fetcher.ts'\nimport type { AnyparserOption, Result } from '@anyparser/core'\nimport { buildForm } from './form.ts'\nimport { validateAndParse } from './validator/index.ts'\nimport { transformToCamel } from '@src/utils/camel-case.ts'\n\n/**\n * Main class for parsing items using the Anyparser API.\n */\nexport class Anyparser {\n public options?: AnyparserOption\n\n /**\n * Initialize the parser with optional configuration.\n * @param options - Configuration options for the parser\n */\n constructor (options?: AnyparserOption) {\n this.options = options\n }\n\n /**\n * Parse files using the Anyparser API.\n * @param filePathsOrUrl - A single file path or list of file paths to parse, or a start URL for crawling\n * @returns List of parsed file results if format is JSON, or raw text content if format is text/markdown\n * @throws Error if the API request fails\n */\n async parse (filePathsOrUrl: string | string[]): Promise<Result> {\n const parsed = await validateAndParse(filePathsOrUrl, this.options)\n const { apiUrl, apiKey } = parsed\n\n const formData = buildForm(parsed)\n\n const fetchOptions = {\n method: 'POST',\n body: formData,\n headers: {\n ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {})\n }\n }\n\n const url = new URL('/parse/v1', apiUrl)\n\n const response = await wrappedFetch(url, fetchOptions)\n\n switch (parsed.format) {\n case 'json':\n return transformToCamel<Result>(await response.json())\n case 'markdown':\n case 'html':\n return await response.text()\n default:\n throw new Error(`Unsupported format: ${parsed.format}`)\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAO,IAAM,eAAN,cAA2B,MAAM;AAAA,EAItC,YAAa,SAAiB,OAAc,YAAoB;AAC9D,UAAM,OAAO;AACb,SAAK,OAAO;AACZ,SAAK,QAAQ;AACb,SAAK,aAAa;AAAA,EACpB;AACF;AAEO,IAAM,eAAe,CAAO,OAA+B,YAA0B;AAC1F,QAAM,WAAW,MAAM,MAAM,OAAO,OAAO;AAE3C,MAAI,CAAC,SAAS,IAAI;AAChB,UAAM,EAAE,QAAQ,WAAW,IAAI;AAC/B,UAAM,OAAO,MAAM,SAAS,KAAK;AAEjC,UAAM,IAAI;AAAA,MACR,QAAQ,MAAM,IAAI,UAAU,KAAK,KAAK;AAAA,MACtC,IAAI,MAAM,IAAI;AAAA,MACd;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;;;ACbO,SAAS,UAAW,QAAyC;AAdpE;AAeE,QAAM,WAAW,IAAI,SAAS;AAG9B,WAAS,OAAO,UAAU,OAAO,MAAM;AACvC,WAAS,OAAO,SAAS,OAAO,KAAK;AAGrC,MAAI,OAAO,UAAU,SAAS,OAAO,UAAU,WAAW;AACxD,QAAI,OAAO,UAAU,QAAW;AAC9B,eAAS,OAAO,SAAS,OAAO,OAAO,KAAK,CAAC;AAAA,IAC/C;AAEA,QAAI,OAAO,UAAU,QAAW;AAC9B,eAAS,OAAO,SAAS,OAAO,OAAO,KAAK,CAAC;AAAA,IAC/C;AAAA,EACF;AAGA,MAAI,OAAO,UAAU,OAAO;AAC1B,SAAI,YAAO,gBAAP,mBAAoB,QAAQ;AAC9B,eAAS,OAAO,eAAe,OAAO,YAAY,KAAK,GAAG,CAAC;AAAA,IAC7D;AAEA,QAAI,OAAO,WAAW;AACpB,eAAS,OAAO,aAAa,OAAO,SAAS;AAAA,IAC/C;AAAA,EACF;AAGA,MAAI,OAAO,UAAU,WAAW;AAC9B,aAAS,OAAO,QAAO,YAAO,QAAP,YAAc,EAAE;AAEvC,QAAI,OAAO,aAAa,QAAW;AACjC,eAAS,OAAO,YAAY,OAAO,OAAO,QAAQ,CAAC;AAAA,IACrD;AAEA,QAAI,OAAO,kBAAkB,QAAW;AACtC,eAAS,OAAO,iBAAiB,OAAO,OAAO,aAAa,CAAC;AAAA,IAC/D;AAEA,QAAI,OAAO,UAAU;AACnB,eAAS,OAAO,YAAY,OAAO,QAAQ;AAAA,IAC7C;AAEA,QAAI,OAAO,gBAAgB;AACzB,eAAS,OAAO,kBAAkB,OAAO,cAAc;AAAA,IACzD;AAAA,EACF,OAAO;AAEL,QAAI,OAAO,OAAO;AAChB,iBAAW,QAAQ,OAAO,OAAO;AAC/B,iBAAS,OAAO,SAAS,KAAK,UAAU,KAAK,QAAQ;AAAA,MACvD;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;;;ACpEA,YAAuB;AACvB,uBAAyB;AACzB,SAAoB;;;ACDb,IAAM,mBAAmB;AAEzB,IAAM,cAAc,OAAO,OAAO;AAAA,EACvC,UAAU;AAAA,EACV,aAAa;AAAA,EACb,MAAM;AAAA,EACN,SAAS;AAAA,EACT,UAAU;AAAA,EACV,SAAS;AAAA,EACT,eAAe;AAAA,EACf,UAAU;AAAA,EACV,gBAAgB;AAClB,CAAU;AAEH,IAAM,gBAAgB,OAAO,OAAO;AAAA,EACzC,WAAW;AAAA,EACX,SAAS;AAAA,EACT,QAAQ;AAAA,EACR,UAAU;AAAA,EACV,aAAa;AAAA,EACb,sBAAsB;AAAA,EACtB,YAAY;AAAA,EACZ,SAAS;AAAA,EACT,SAAS;AAAA,EACT,SAAS;AAAA,EACT,QAAQ;AAAA,EACR,WAAW;AAAA,EACX,SAAS;AAAA,EACT,SAAS;AAAA,EACT,OAAO;AAAA,EACP,oBAAoB;AAAA,EACpB,6BAA6B;AAAA,EAC7B,qBAAqB;AAAA,EACrB,8BAA8B;AAAA,EAC9B,UAAU;AAAA,EACV,UAAU;AAAA,EACV,OAAO;AAAA,EACP,QAAQ;AAAA,EACR,gBAAgB;AAAA,EAChB,QAAQ;AAAA,EACR,gBAAgB;AAAA,EAChB,cAAc;AAAA,EACd,QAAQ;AAAA,EACR,UAAU;AAAA,EACV,OAAO;AAAA,EACP,SAAS;AAAA,EACT,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,oBAAoB;AAAA,EACpB,UAAU;AAAA,EACV,QAAQ;AAAA,EACR,SAAS;AAAA,EACT,SAAS;AAAA,EACT,UAAU;AAAA,EACV,SAAS;AAAA,EACT,QAAQ;AAAA,EACR,YAAY;AAAA,EACZ,SAAS;AAAA,EACT,iBAAiB;AAAA,EACjB,OAAO;AAAA,EACP,UAAU;AAAA,EACV,eAAe;AAAA,EACf,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,QAAQ;AAAA,EACR,OAAO;AAAA,EACP,UAAU;AAAA,EACV,WAAW;AAAA,EACX,UAAU;AAAA,EACV,MAAM;AAAA,EACN,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,SAAS;AAAA,EACT,aAAa;AAAA,EACb,UAAU;AAAA,EACV,UAAU;AAAA,EACV,mBAAmB;AAAA,EACnB,SAAS;AAAA,EACT,UAAU;AAAA,EACV,cAAc;AAAA,EACd,QAAQ;AAAA,EACR,OAAO;AAAA,EACP,SAAS;AAAA,EACT,SAAS;AAAA,EACT,QAAQ;AAAA,EACR,iBAAiB;AAAA,EACjB,KAAK;AAAA,EACL,OAAO;AAAA,EACP,SAAS;AAAA,EACT,YAAY;AAAA,EACZ,eAAe;AAAA,EACf,WAAW;AAAA,EACX,SAAS;AAAA,EACT,YAAY;AAAA,EACZ,SAAS;AAAA,EACT,WAAW;AAAA,EACX,OAAO;AAAA,EACP,OAAO;AAAA,EACP,SAAS;AAAA,EACT,QAAQ;AAAA,EACR,OAAO;AAAA,EACP,WAAW;AAAA,EACX,SAAS;AAAA,EACT,QAAQ;AAAA,EACR,KAAK;AAAA,EACL,SAAS;AAAA,EACT,QAAQ;AAAA,EACR,YAAY;AAAA,EACZ,QAAQ;AAAA,EACR,SAAS;AAAA,EACT,UAAU;AAAA,EACV,SAAS;AAAA,EACT,UAAU;AAAA,EACV,SAAS;AAAA,EACT,QAAQ;AAAA,EACR,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,QAAQ;AAAA,EACR,SAAS;AAAA,EACT,aAAa;AAAA,EACb,UAAU;AAAA,EACV,SAAS;AAAA,EACT,eAAe;AAAA,EACf,YAAY;AAAA,EACZ,SAAS;AAAA,EACT,SAAS;AAAA,EACT,QAAQ;AAAA,EACR,OAAO;AAAA,EACP,OAAO;AAAA,EACP,QAAQ;AAAA,EACR,OAAO;AAAA,EACP,SAAS;AAAA,EACT,MAAM;AAAA,EACN,UAAU;AAAA,EACV,QAAQ;AAAA,EACR,SAAS;AAAA,EACT,QAAQ;AAAA,EACR,WAAW;AAAA,EACX,MAAM;AAAA,EACN,OAAO;AAAA,EACP,gBAAgB;AAAA,EAChB,YAAY;AAAA,EACZ,SAAS;AAAA,EACT,QAAQ;AACV,CAAU;;;ACjJV,IAAM,oBAAoB,CAAI,YAAqD;AACjF,MAAI,OAAO,YAAY,eAAe,YAAY,MAAM;AACtD,WAAO;AAAA,EACT;AAEA,MAAI,OAAO,YAAY,UAAU;AAC/B,WAAO,QAAQ,KAAK,MAAM;AAAA,EAC5B;AAEA,SAAO;AACT;AAEA,IAAM,gBAAgB,CAAC,YAAwC;AAC7D,SAAO,OAAO,YAAY,YAAY,YAAY,UAAa,YAAY;AAC7E;;;AChBA,IAAM,MAAM,CAAC,KAAa,WAAW,OAAO;AAC1C,QAAM,QAAQ,QAAQ,IAAI,GAAG;AAE7B,MAAI,CAAC,kBAAkB,KAAK,GAAG;AAC7B,WAAO;AAAA,EACT;AAEA,MAAI,CAAC,kBAAkB,QAAQ,GAAG;AAChC,WAAO;AAAA,EACT;AAEA,SAAO;AACT;;;ACDO,IAAM,YAAY,MAAW;AAClC,QAAM,QAAQ,IAAI,qBAAqB,gBAAgB;AAEvD,MAAI;AACF,WAAO,IAAI,IAAI,KAAK;AAAA,EACtB,SAAQ;AACN,YAAQ,MAAM,sBAAsB,KAAK;AAAA,EAC3C;AAEA,UAAQ,MAAM,oBAAoB,gBAAgB;AAElD,SAAO,IAAI,IAAI,gBAAgB;AACjC;AAMO,IAAM,iBAAkC;AAAA,EAC7C,QAAQ,UAAU;AAAA,EAClB,QAAQ,IAAI,mBAAmB;AAAA,EAC/B,QAAQ;AAAA,EACR,OAAO;AAAA,EACP,OAAO;AAAA,EACP,OAAO;AACT;;;ACzBO,SAAS,eAAgB,QAAkC;AAChE,MAAI,CAAC,QAAQ;AACX,UAAM,IAAI,MAAM,qBAAqB;AAAA,EACvC;AAEA,MAAI,OAAO,WAAW,YAAY,OAAO,KAAK,EAAE,WAAW,GAAG;AAC5D,UAAM,IAAI,MAAM,oCAAoC;AAAA,EACtD;AACF;AAQO,SAAS,aAAc,SAAkD;AAC9E,QAAM,gBAAgB,kCAAK,iBAAmB;AAE9C,iBAAe,cAAc,MAAM;AAEnC,MAAI,CAAC,cAAc,QAAQ;AACzB,UAAM,IAAI,MAAM,qBAAqB;AAAA,EACvC;AAEA,QAAM,gBAAuC;AAAA,IAC3C,QAAQ,cAAc;AAAA,IACtB,QAAQ,cAAc;AAAA,IACtB,QAAQ,cAAc,UAAU;AAAA,IAChC,OAAO,cAAc,SAAS;AAAA,IAC9B,UAAU,cAAc,YAAY;AAAA,IACpC,OAAO,cAAc;AAAA,IACrB,OAAO,cAAc;AAAA,IACrB,aAAa,cAAc;AAAA,IAC3B,WAAW,cAAc;AAAA,IACzB,KAAK,cAAc;AAAA,IACnB,UAAU,cAAc;AAAA,IACxB,eAAe,cAAc;AAAA,IAC7B,UAAU,cAAc;AAAA,IACxB,gBAAgB,cAAc;AAAA,EAChC;AAEA,SAAO;AACT;;;AC1CO,IAAM,iBAAiB,CAAC,WAA4B;AACzD,MAAI,kBAAkB,OAAO,MAAM,GAAG;AACpC,UAAM,IAAI,MAAM,qBAAqB;AAAA,EACvC;AAEA,MAAI,CAAC,kBAAkB,OAAO,WAAW,GAAG;AAC1C,WAAO,YAAY,QAAQ,CAAC,aAAqB;AAC/C,UAAI,CAAC,OAAO,OAAO,aAAuC,EAAE,SAAS,QAAQ,GAAG;AAC9E,cAAM,IAAI,MAAM,sBAAsB;AAAA,MACxC;AAAA,IACF,CAAC;AAAA,EACH;AAEA,MAAI,CAAC,kBAAkB,OAAO,SAAS,GAAG;AACxC,QAAI,CAAC,OAAO,OAAO,WAAqC,EAAE,SAAS,OAAO,SAAS,GAAG;AACpF,YAAM,IAAI,MAAM,oBAAoB;AAAA,IACtC;AAAA,EACF;AACF;;;AC3BA,sBAAuB;AAQhB,IAAM,eAAe,CAAO,cAAgE;AAEjG,MAAI,CAAC,WAAW;AACd,WAAO;AAAA,MACL,OAAO;AAAA,MACP,OAAO,IAAI,MAAM,mBAAmB;AAAA,IACtC;AAAA,EACF;AAEA,QAAM,QAAQ,MAAM,QAAQ,SAAS,IAAI,YAAY,CAAC,SAAS;AAE/D,MAAI,MAAM,WAAW,GAAG;AACtB,WAAO;AAAA,MACL,OAAO;AAAA,MACP,OAAO,IAAI,MAAM,mBAAmB;AAAA,IACtC;AAAA,EACF;AAEA,aAAW,YAAY,OAAO;AAC5B,QAAI;AACF,gBAAM,wBAAO,QAAQ;AAAA,IACvB,SAAS,OAAO;AACd,aAAO;AAAA,QACL,OAAO;AAAA,QACP;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AAAA,IACL,OAAO;AAAA,IACP;AAAA,EACF;AACF;;;AC5CO,IAAM,gBAAgB,CAAC,cAAiC;AAC7D,MAAI,MAAM,QAAQ,SAAS,GAAG;AAC5B,UAAM,WAAW,UAAU,KAAK,OAAK,CAAC,kBAAkB,CAAC,CAAC;AAE1D,QAAI,CAAC,kBAAkB,QAAQ,GAAG;AAChC,aAAO,IAAI,IAAI,QAAQ,EAAE,SAAS;AAAA,IACpC;AAAA,EACF;AAEA,SAAO,IAAI,IAAI,SAAS,EAAE,SAAS;AACrC;;;AROA,SAAe,gBAAiB,UAAiC;AAAA;AAC/D,QAAI;AACF,YAAS,UAAO,QAAQ;AAAA,IAC1B,SAAS,OAAO;AACd,UAAI,iBAAiB,SAAS,UAAU,SAAS,MAAM,SAAS,UAAU;AACxE,cAAM,IAAI,MAAM,QAAQ,QAAQ,+BAA+B;AAAA,MACjE;AAEA,YAAM;AAAA,IACR;AAEA,QAAI;AAEF,YAAM,aAAa,MAAS,QAAK,UAAU,GAAG;AAC9C,YAAM,WAAW,MAAM;AAAA,IACzB,SAAS,OAAO;AACd,UAAI,iBAAiB,SAAS,UAAU,UAAU,MAAM,SAAS,WAAW,MAAM,SAAS,UAAU;AACnG,cAAM,IAAI,MAAM,QAAQ,QAAQ,+BAA+B;AAAA,MACjE;AAEA,YAAM;AAAA,IACR;AAAA,EACF;AAAA;AAQA,SAAsB,iBACpB,WACA,SACgC;AAAA;AAChC,UAAM,SAAS,aAAa,OAAO;AACnC,mBAAe,MAAM;AAErB,QAAI,CAAC,CAAC,QAAQ,YAAY,MAAM,EAAE,SAAS,OAAO,MAAM,GAAG;AACzD,YAAM,IAAI,MAAM,uBAAuB,OAAO,MAAM,EAAE;AAAA,IACxD;AAGA,UAAM,aAAY,mCAAS,WAAU;AAGrC,UAAM,SAAS,YACX,EAAE,OAAO,MAAM,OAAO,CAAC,cAAc,SAAS,CAAC,EAAE,IACnD,MAAM,aAAa,SAAS;AAE9B,QAAI,OAAO,UAAU,OAAO;AAC1B,YAAM,OAAO;AAAA,IACf;AAEA,UAAM,eAAsC;AAAA,MAC1C,QAAQ,OAAO;AAAA,MACf,QAAQ,OAAO;AAAA,MACf,QAAQ,OAAO;AAAA,MACf,OAAO,OAAO;AAAA,MACd,OAAO,OAAO;AAAA,MACd,OAAO,OAAO;AAAA,MACd,aAAa,OAAO;AAAA,MACpB,WAAW,OAAO;AAAA,MAClB,KAAK,OAAO;AAAA,MACZ,UAAU,OAAO;AAAA,MACjB,eAAe,OAAO;AAAA,MACtB,UAAU,OAAO;AAAA,MACjB,gBAAgB,OAAO;AAAA,MACvB,UAAU,OAAO;AAAA,IACnB;AAGA,QAAI,WAAW;AACb,mBAAa,MAAM,OAAO,MAAM,CAAC;AAAA,IACnC,OAAO;AAEL,YAAM,YAAY,CAAC;AAEnB,iBAAW,YAAY,OAAO,OAAO;AACnC,cAAM,gBAAgB,QAAQ;AAE9B,cAAM,aAAmB,uBAAiB,QAAQ;AAClD,cAAM,SAAS,CAAC;AAEhB;AAAA,qCAA0B,aAA1B,0EAAsC;AAA3B,kBAAM,QAAjB;AACE,mBAAO,KAAK,KAAK;AAAA,UACnB;AAAA,iBAFA,MAtGN;AAsGM;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,cAAM,SAAS,OAAO,OAAO,MAAM;AACnC,cAAM,WAAW,IAAI,KAAK,CAAC,MAAM,OAAG,2BAAS,QAAQ,GAAG;AAAA,UACtD,MAAM;AAAA,QACR,CAAC;AAED,kBAAU,KAAK;AAAA,UACb,cAAU,2BAAS,QAAQ;AAAA,UAC3B;AAAA,QACF,CAAC;AAAA,MACH;AAEA,mBAAa,QAAQ;AAAA,IACvB;AAEA,WAAO;AAAA,EACT;AAAA;;;ASrHO,IAAM,oBAAoB,CAAC,MAAc,EAAE,QAAQ,UAAU,CAAC,GAAG,MAAM,EAAE,YAAY,CAAC;;;ACDtF,IAAM,mBAAmB,CAAI,SAAqB;AACvD,MAAI,gBAAgB,QAAQ,gBAAgB,UAAU,gBAAgB,KAAK;AACzE,WAAO;AAAA,EACT;AAGA,MAAI,OAAO,SAAS,YAAY;AAC9B,WAAO;AAAA,EACT;AAGA,MAAI,SAAS,QAAQ,SAAS,QAAW;AACvC,WAAO;AAAA,EACT;AAGA,MAAI,MAAM,QAAQ,IAAI,GAAG;AACvB,WAAO,KAAK,IAAI,QAAM,iBAAiB,EAAE,CAAC;AAAA,EAC5C;AAGA,MAAI,gBAAgB,KAAK;AACvB,UAAM,iBAAiB,oBAAI,IAAI;AAC/B,SAAK,QAAQ,CAAC,OAAO,QAAQ;AAC3B,qBAAe,IAAI,iBAAiB,GAAG,GAAG,iBAAiB,KAAK,CAAC;AAAA,IACnE,CAAC;AAED,WAAO;AAAA,EACT;AAEA,MAAI,gBAAgB,KAAK;AACvB,UAAM,iBAAiB,oBAAI,IAAI;AAC/B,SAAK,QAAQ,CAAC,UAAU;AACtB,qBAAe,IAAI,iBAAiB,KAAK,CAAC;AAAA,IAC5C,CAAC;AAED,WAAO;AAAA,EACT;AAGA,MAAI,cAAc,IAAI,GAAG;AACvB,WAAO,OAAO,KAAK,IAAI,EAAE,OAAO,CAAC,KAAU,QAAgB;AACzD,YAAM,WAAW,kBAAkB,GAAG;AACtC,UAAI,QAAQ,IAAI,iBAAkB,KAAiC,GAAG,CAAC;AAEvE,aAAO;AAAA,IACT,GAAG,CAAC,CAAM;AAAA,EACZ;AAGA,SAAO;AACT;;;AC7CO,IAAM,YAAN,MAAgB;AAAA;AAAA;AAAA;AAAA;AAAA,EAOrB,YAAa,SAA2B;AACtC,SAAK,UAAU;AAAA,EACjB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQM,MAAO,gBAAoD;AAAA;AAC/D,YAAM,SAAS,MAAM,iBAAiB,gBAAgB,KAAK,OAAO;AAClE,YAAM,EAAE,QAAQ,OAAO,IAAI;AAE3B,YAAM,WAAW,UAAU,MAAM;AAEjC,YAAM,eAAe;AAAA,QACnB,QAAQ;AAAA,QACR,MAAM;AAAA,QACN,SAAS,mBACH,SAAS,EAAE,eAAe,UAAU,MAAM,GAAG,IAAI,CAAC;AAAA,MAE1D;AAEA,YAAM,MAAM,IAAI,IAAI,aAAa,MAAM;AAEvC,YAAM,WAAW,MAAM,aAAa,KAAK,YAAY;AAErD,cAAQ,OAAO,QAAQ;AAAA,QACrB,KAAK;AACH,iBAAO,iBAAyB,MAAM,SAAS,KAAK,CAAC;AAAA,QACvD,KAAK;AAAA,QACL,KAAK;AACH,iBAAO,MAAM,SAAS,KAAK;AAAA,QAC7B;AACE,gBAAM,IAAI,MAAM,uBAAuB,OAAO,MAAM,EAAE;AAAA,MAC1D;AAAA,IACF;AAAA;AACF;","names":[]}

+266

dist/index.d.cts

		/**
		* Main class for parsing items using the Anyparser API.
		*/
		declare class Anyparser {
		options?: AnyparserOption;
		/**
		* Initialize the parser with optional configuration.
		* @param options - Configuration options for the parser
		*/
		constructor(options?: AnyparserOption);
		/**
		* Parse files using the Anyparser API.
		* @param filePathsOrUrl - A single file path or list of file paths to parse, or a start URL for crawling
		* @returns List of parsed file results if format is JSON, or raw text content if format is text/markdown
		* @throws Error if the API request fails
		*/
		parse(filePathsOrUrl: string \| string[]): Promise<Result>;
		}

		declare const OCR_PRESETS: Readonly<{
		readonly DOCUMENT: "document";
		readonly HANDWRITING: "handwriting";
		readonly SCAN: "scan";
		readonly RECEIPT: "receipt";
		readonly MAGAZINE: "magazine";
		readonly INVOICE: "invoice";
		readonly BUSINESS_CARD: "business-card";
		readonly PASSPORT: "passport";
		readonly DRIVER_LICENSE: "driver-license";
		}>;
		declare const OCR_LANGUAGES: Readonly<{
		readonly AFRIKAANS: "afr";
		readonly AMHARIC: "amh";
		readonly ARABIC: "ara";
		readonly ASSAMESE: "asm";
		readonly AZERBAIJANI: "aze";
		readonly AZERBAIJANI_CYRILLIC: "aze_cyrl";
		readonly BELARUSIAN: "bel";
		readonly BENGALI: "ben";
		readonly TIBETAN: "bod";
		readonly BOSNIAN: "bos";
		readonly BRETON: "bre";
		readonly BULGARIAN: "bul";
		readonly CATALAN: "cat";
		readonly CEBUANO: "ceb";
		readonly CZECH: "ces";
		readonly SIMPLIFIED_CHINESE: "chi_sim";
		readonly SIMPLIFIED_CHINESE_VERTICAL: "chi_sim_vert";
		readonly TRADITIONAL_CHINESE: "chi_tra";
		readonly TRADITIONAL_CHINESE_VERTICAL: "chi_tra_vert";
		readonly CHEROKEE: "chr";
		readonly CORSICAN: "cos";
		readonly WELSH: "cym";
		readonly DANISH: "dan";
		readonly DANISH_FRAKTUR: "dan_frak";
		readonly GERMAN: "deu";
		readonly GERMAN_FRAKTUR: "deu_frak";
		readonly GERMAN_LATIN: "deu_latf";
		readonly DIVESH: "div";
		readonly DZONGKHA: "dzo";
		readonly GREEK: "ell";
		readonly ENGLISH: "eng";
		readonly MIDDLE_ENGLISH: "enm";
		readonly ESPERANTO: "epo";
		readonly EQUATORIAL_GUINEAN: "equ";
		readonly ESTONIAN: "est";
		readonly BASQUE: "eus";
		readonly FAROESE: "fao";
		readonly PERSIAN: "fas";
		readonly FILIPINO: "fil";
		readonly FINNISH: "fin";
		readonly FRENCH: "fra";
		readonly OLD_FRENCH: "frm";
		readonly FRISIAN: "fry";
		readonly SCOTTISH_GAELIC: "gla";
		readonly IRISH: "gle";
		readonly GALICIAN: "glg";
		readonly ANCIENT_GREEK: "grc";
		readonly GUJARATI: "guj";
		readonly HAITIAN_CREOLE: "hat";
		readonly HEBREW: "heb";
		readonly HINDI: "hin";
		readonly CROATIAN: "hrv";
		readonly HUNGARIAN: "hun";
		readonly ARMENIAN: "hye";
		readonly IGBO: "iku";
		readonly INDONESIAN: "ind";
		readonly ICELANDIC: "isl";
		readonly ITALIAN: "ita";
		readonly OLD_ITALIAN: "ita_old";
		readonly JAVANESE: "jav";
		readonly JAPANESE: "jpn";
		readonly JAPANESE_VERTICAL: "jpn_vert";
		readonly KANNADA: "kan";
		readonly GEORGIAN: "kat";
		readonly OLD_GEORGIAN: "kat_old";
		readonly KAZAKH: "kaz";
		readonly KHMER: "khm";
		readonly KIRGHIZ: "kir";
		readonly KURDISH: "kmr";
		readonly KOREAN: "kor";
		readonly KOREAN_VERTICAL: "kor_vert";
		readonly LAO: "lao";
		readonly LATIN: "lat";
		readonly LATVIAN: "lav";
		readonly LITHUANIAN: "lit";
		readonly LUXEMBOURGISH: "ltz";
		readonly MALAYALAM: "mal";
		readonly MARATHI: "mar";
		readonly MACEDONIAN: "mkd";
		readonly MALTESE: "mlt";
		readonly MONGOLIAN: "mon";
		readonly MAORI: "mri";
		readonly MALAY: "msa";
		readonly MYANMAR: "mya";
		readonly NEPALI: "nep";
		readonly DUTCH: "nld";
		readonly NORWEGIAN: "nor";
		readonly OCCITAN: "oci";
		readonly ODISHA: "ori";
		readonly OSD: "osd";
		readonly PUNJABI: "pan";
		readonly POLISH: "pol";
		readonly PORTUGUESE: "por";
		readonly PASHTO: "pus";
		readonly QUECHUA: "que";
		readonly ROMANIAN: "ron";
		readonly RUSSIAN: "rus";
		readonly SANSKRIT: "san";
		readonly SINHALA: "sin";
		readonly SLOVAK: "slk";
		readonly SLOVAK_FRAKTUR: "slk_frak";
		readonly SLOVENIAN: "slv";
		readonly SINDHI: "snd";
		readonly SPANISH: "spa";
		readonly OLD_SPANISH: "spa_old";
		readonly ALBANIAN: "sqi";
		readonly SERBIAN: "srp";
		readonly SERBIAN_LATIN: "srp_latn";
		readonly SUNDIANESE: "sun";
		readonly SWAHILI: "swa";
		readonly SWEDISH: "swe";
		readonly SYRIAC: "syr";
		readonly TAMIL: "tam";
		readonly TATAR: "tat";
		readonly TELUGU: "tel";
		readonly TAJIK: "tgk";
		readonly TAGALOG: "tgl";
		readonly THAI: "tha";
		readonly TIGRINYA: "tir";
		readonly TONGAN: "ton";
		readonly TURKISH: "tur";
		readonly UIGHUR: "uig";
		readonly UKRAINIAN: "ukr";
		readonly URDU: "urd";
		readonly UZBEK: "uzb";
		readonly UZBEK_CYRILLIC: "uzb_cyrl";
		readonly VIETNAMESE: "vie";
		readonly YIDDISH: "yid";
		readonly YORUBA: "yor";
		}>;
		type OcrPresetType$1 = (typeof OCR_PRESETS)[keyof typeof OCR_PRESETS];
		type OcrLanguageType$1 = (typeof OCR_LANGUAGES)[keyof typeof OCR_LANGUAGES];

		type AnyparserFormatType = 'json' \| 'markdown' \| 'html'
		type AnyparserModelType = 'text' \| 'ocr' \| 'vlm' \| 'lam' \| 'crawler'
		type AnyparserEncodingType = 'utf-8' \| 'latin1'

		interface AnyparserOption {
		apiUrl?: URL
		apiKey?: string
		format?: AnyparserFormatType
		model?: AnyparserModelType
		encoding?: AnyparserEncodingType
		image?: boolean
		table?: boolean
		files?: string \| string[]
		ocrLanguage?: OcrLanguageType[]
		ocrPreset?: OcrPresetType
		url?: string
		maxDepth?: number
		maxExecutions?: number
		strategy?: 'LIFO' \| 'FIFO'
		traversalScope?: 'subtree' \| 'domain'
		}

		// ---- Parser

		interface AnyparserImageReference {
		base64Data: string
		displayName: string
		page?: number
		imageIndex: number
		}

		interface AnyparserResultBase {
		rid: string
		originalFilename: string
		checksum: string
		totalCharacters?: number
		markdown?: string
		}

		interface AnyparserCrawlDirectiveBase {
		type: 'HTTP Header' \| 'HTML Meta' \| 'Combined'
		priority: number
		name?: string
		noindex?: boolean
		nofollow?: boolean
		crawlDelay?: number
		unavailableAfter?: Date
		}

		interface AnyparserCrawlDirective extends AnyparserCrawlDirectiveBase {
		type: 'Combined'
		name: undefined
		underlying: AnyparserCrawlDirectiveBase[]
		}

		interface AnyparserUrl {
		url: URL
		title?: string
		crawledAt?: string
		statusCode: number
		statusMessage: string
		directive: AnyparserCrawlDirective
		totalCharacters?: number
		markdown?: string
		images?: AnyparserImageReference[]
		text?: string
		politenessDelay: number
		}

		interface AnyparserRobotsTxtDirective {
		userAgent: string
		disallow: Set<string>
		allow: Set<string>
		crawlDelay?: number
		}

		interface AnyparserPdfPage {
		pageNumber: number
		markdown?: string
		text?: string
		images?: AnyparserImageReference[]
		}

		interface AnyparserPdfResult extends AnyparserResultBase {
		totalItems?: number
		items?: AnyparserPdfPage[]
		}

		interface AnyparserCrawlResult {
		rid: string
		startUrl: URL
		totalCharacters: number
		totalItems: number
		markdown: string
		items?: AnyparserUrl[]
		robotsDirective: AnyparserRobotsTxtDirective
		}

		type AnyparserResult = AnyparserCrawlResult \| AnyparserPdfResult \| AnyparserResultBase
		type Result = AnyparserResult[] \| string

		export { Anyparser, type AnyparserCrawlDirective, type AnyparserCrawlDirectiveBase, type AnyparserCrawlResult, type AnyparserImageReference, type AnyparserOption, type AnyparserPdfPage, type AnyparserPdfResult, type AnyparserResult, type AnyparserResultBase, type AnyparserRobotsTxtDirective, type AnyparserUrl, OCR_LANGUAGES, OCR_PRESETS, type OcrLanguageType$1 as OcrLanguageType, type OcrPresetType$1 as OcrPresetType, type Result };

+266

dist/index.d.ts

		/**
		* Main class for parsing items using the Anyparser API.
		*/
		declare class Anyparser {
		options?: AnyparserOption;
		/**
		* Initialize the parser with optional configuration.
		* @param options - Configuration options for the parser
		*/
		constructor(options?: AnyparserOption);
		/**
		* Parse files using the Anyparser API.
		* @param filePathsOrUrl - A single file path or list of file paths to parse, or a start URL for crawling
		* @returns List of parsed file results if format is JSON, or raw text content if format is text/markdown
		* @throws Error if the API request fails
		*/
		parse(filePathsOrUrl: string \| string[]): Promise<Result>;
		}

		declare const OCR_PRESETS: Readonly<{
		readonly DOCUMENT: "document";
		readonly HANDWRITING: "handwriting";
		readonly SCAN: "scan";
		readonly RECEIPT: "receipt";
		readonly MAGAZINE: "magazine";
		readonly INVOICE: "invoice";
		readonly BUSINESS_CARD: "business-card";
		readonly PASSPORT: "passport";
		readonly DRIVER_LICENSE: "driver-license";
		}>;
		declare const OCR_LANGUAGES: Readonly<{
		readonly AFRIKAANS: "afr";
		readonly AMHARIC: "amh";
		readonly ARABIC: "ara";
		readonly ASSAMESE: "asm";
		readonly AZERBAIJANI: "aze";
		readonly AZERBAIJANI_CYRILLIC: "aze_cyrl";
		readonly BELARUSIAN: "bel";
		readonly BENGALI: "ben";
		readonly TIBETAN: "bod";
		readonly BOSNIAN: "bos";
		readonly BRETON: "bre";
		readonly BULGARIAN: "bul";
		readonly CATALAN: "cat";
		readonly CEBUANO: "ceb";
		readonly CZECH: "ces";
		readonly SIMPLIFIED_CHINESE: "chi_sim";
		readonly SIMPLIFIED_CHINESE_VERTICAL: "chi_sim_vert";
		readonly TRADITIONAL_CHINESE: "chi_tra";
		readonly TRADITIONAL_CHINESE_VERTICAL: "chi_tra_vert";
		readonly CHEROKEE: "chr";
		readonly CORSICAN: "cos";
		readonly WELSH: "cym";
		readonly DANISH: "dan";
		readonly DANISH_FRAKTUR: "dan_frak";
		readonly GERMAN: "deu";
		readonly GERMAN_FRAKTUR: "deu_frak";
		readonly GERMAN_LATIN: "deu_latf";
		readonly DIVESH: "div";
		readonly DZONGKHA: "dzo";
		readonly GREEK: "ell";
		readonly ENGLISH: "eng";
		readonly MIDDLE_ENGLISH: "enm";
		readonly ESPERANTO: "epo";
		readonly EQUATORIAL_GUINEAN: "equ";
		readonly ESTONIAN: "est";
		readonly BASQUE: "eus";
		readonly FAROESE: "fao";
		readonly PERSIAN: "fas";
		readonly FILIPINO: "fil";
		readonly FINNISH: "fin";
		readonly FRENCH: "fra";
		readonly OLD_FRENCH: "frm";
		readonly FRISIAN: "fry";
		readonly SCOTTISH_GAELIC: "gla";
		readonly IRISH: "gle";
		readonly GALICIAN: "glg";
		readonly ANCIENT_GREEK: "grc";
		readonly GUJARATI: "guj";
		readonly HAITIAN_CREOLE: "hat";
		readonly HEBREW: "heb";
		readonly HINDI: "hin";
		readonly CROATIAN: "hrv";
		readonly HUNGARIAN: "hun";
		readonly ARMENIAN: "hye";
		readonly IGBO: "iku";
		readonly INDONESIAN: "ind";
		readonly ICELANDIC: "isl";
		readonly ITALIAN: "ita";
		readonly OLD_ITALIAN: "ita_old";
		readonly JAVANESE: "jav";
		readonly JAPANESE: "jpn";
		readonly JAPANESE_VERTICAL: "jpn_vert";
		readonly KANNADA: "kan";
		readonly GEORGIAN: "kat";
		readonly OLD_GEORGIAN: "kat_old";
		readonly KAZAKH: "kaz";
		readonly KHMER: "khm";
		readonly KIRGHIZ: "kir";
		readonly KURDISH: "kmr";
		readonly KOREAN: "kor";
		readonly KOREAN_VERTICAL: "kor_vert";
		readonly LAO: "lao";
		readonly LATIN: "lat";
		readonly LATVIAN: "lav";
		readonly LITHUANIAN: "lit";
		readonly LUXEMBOURGISH: "ltz";
		readonly MALAYALAM: "mal";
		readonly MARATHI: "mar";
		readonly MACEDONIAN: "mkd";
		readonly MALTESE: "mlt";
		readonly MONGOLIAN: "mon";
		readonly MAORI: "mri";
		readonly MALAY: "msa";
		readonly MYANMAR: "mya";
		readonly NEPALI: "nep";
		readonly DUTCH: "nld";
		readonly NORWEGIAN: "nor";
		readonly OCCITAN: "oci";
		readonly ODISHA: "ori";
		readonly OSD: "osd";
		readonly PUNJABI: "pan";
		readonly POLISH: "pol";
		readonly PORTUGUESE: "por";
		readonly PASHTO: "pus";
		readonly QUECHUA: "que";
		readonly ROMANIAN: "ron";
		readonly RUSSIAN: "rus";
		readonly SANSKRIT: "san";
		readonly SINHALA: "sin";
		readonly SLOVAK: "slk";
		readonly SLOVAK_FRAKTUR: "slk_frak";
		readonly SLOVENIAN: "slv";
		readonly SINDHI: "snd";
		readonly SPANISH: "spa";
		readonly OLD_SPANISH: "spa_old";
		readonly ALBANIAN: "sqi";
		readonly SERBIAN: "srp";
		readonly SERBIAN_LATIN: "srp_latn";
		readonly SUNDIANESE: "sun";
		readonly SWAHILI: "swa";
		readonly SWEDISH: "swe";
		readonly SYRIAC: "syr";
		readonly TAMIL: "tam";
		readonly TATAR: "tat";
		readonly TELUGU: "tel";
		readonly TAJIK: "tgk";
		readonly TAGALOG: "tgl";
		readonly THAI: "tha";
		readonly TIGRINYA: "tir";
		readonly TONGAN: "ton";
		readonly TURKISH: "tur";
		readonly UIGHUR: "uig";
		readonly UKRAINIAN: "ukr";
		readonly URDU: "urd";
		readonly UZBEK: "uzb";
		readonly UZBEK_CYRILLIC: "uzb_cyrl";
		readonly VIETNAMESE: "vie";
		readonly YIDDISH: "yid";
		readonly YORUBA: "yor";
		}>;
		type OcrPresetType$1 = (typeof OCR_PRESETS)[keyof typeof OCR_PRESETS];
		type OcrLanguageType$1 = (typeof OCR_LANGUAGES)[keyof typeof OCR_LANGUAGES];

		type AnyparserFormatType = 'json' \| 'markdown' \| 'html'
		type AnyparserModelType = 'text' \| 'ocr' \| 'vlm' \| 'lam' \| 'crawler'
		type AnyparserEncodingType = 'utf-8' \| 'latin1'

		interface AnyparserOption {
		apiUrl?: URL
		apiKey?: string
		format?: AnyparserFormatType
		model?: AnyparserModelType
		encoding?: AnyparserEncodingType
		image?: boolean
		table?: boolean
		files?: string \| string[]
		ocrLanguage?: OcrLanguageType[]
		ocrPreset?: OcrPresetType
		url?: string
		maxDepth?: number
		maxExecutions?: number
		strategy?: 'LIFO' \| 'FIFO'
		traversalScope?: 'subtree' \| 'domain'
		}

		// ---- Parser

		interface AnyparserImageReference {
		base64Data: string
		displayName: string
		page?: number
		imageIndex: number
		}

		interface AnyparserResultBase {
		rid: string
		originalFilename: string
		checksum: string
		totalCharacters?: number
		markdown?: string
		}

		interface AnyparserCrawlDirectiveBase {
		type: 'HTTP Header' \| 'HTML Meta' \| 'Combined'
		priority: number
		name?: string
		noindex?: boolean
		nofollow?: boolean
		crawlDelay?: number
		unavailableAfter?: Date
		}

		interface AnyparserCrawlDirective extends AnyparserCrawlDirectiveBase {
		type: 'Combined'
		name: undefined
		underlying: AnyparserCrawlDirectiveBase[]
		}

		interface AnyparserUrl {
		url: URL
		title?: string
		crawledAt?: string
		statusCode: number
		statusMessage: string
		directive: AnyparserCrawlDirective
		totalCharacters?: number
		markdown?: string
		images?: AnyparserImageReference[]
		text?: string
		politenessDelay: number
		}

		interface AnyparserRobotsTxtDirective {
		userAgent: string
		disallow: Set<string>
		allow: Set<string>
		crawlDelay?: number
		}

		interface AnyparserPdfPage {
		pageNumber: number
		markdown?: string
		text?: string
		images?: AnyparserImageReference[]
		}

		interface AnyparserPdfResult extends AnyparserResultBase {
		totalItems?: number
		items?: AnyparserPdfPage[]
		}

		interface AnyparserCrawlResult {
		rid: string
		startUrl: URL
		totalCharacters: number
		totalItems: number
		markdown: string
		items?: AnyparserUrl[]
		robotsDirective: AnyparserRobotsTxtDirective
		}

		type AnyparserResult = AnyparserCrawlResult \| AnyparserPdfResult \| AnyparserResultBase
		type Result = AnyparserResult[] \| string

		export { Anyparser, type AnyparserCrawlDirective, type AnyparserCrawlDirectiveBase, type AnyparserCrawlResult, type AnyparserImageReference, type AnyparserOption, type AnyparserPdfPage, type AnyparserPdfResult, type AnyparserResult, type AnyparserResultBase, type AnyparserRobotsTxtDirective, type AnyparserUrl, OCR_LANGUAGES, OCR_PRESETS, type OcrLanguageType$1 as OcrLanguageType, type OcrPresetType$1 as OcrPresetType, type Result };

+574

dist/index.js

		var __defProp = Object.defineProperty;
		var __getOwnPropSymbols = Object.getOwnPropertySymbols;
		var __hasOwnProp = Object.prototype.hasOwnProperty;
		var __propIsEnum = Object.prototype.propertyIsEnumerable;
		var __knownSymbol = (name, symbol) => (symbol = Symbol[name]) ? symbol : Symbol.for("Symbol." + name);
		var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
		var __spreadValues = (a, b) => {
		for (var prop in b \|\| (b = {}))
		if (__hasOwnProp.call(b, prop))
		__defNormalProp(a, prop, b[prop]);
		if (__getOwnPropSymbols)
		for (var prop of __getOwnPropSymbols(b)) {
		if (__propIsEnum.call(b, prop))
		__defNormalProp(a, prop, b[prop]);
		}
		return a;
		};
		var __async = (__this, __arguments, generator) => {
		return new Promise((resolve, reject) => {
		var fulfilled = (value) => {
		try {
		step(generator.next(value));
		} catch (e) {
		reject(e);
		}
		};
		var rejected = (value) => {
		try {
		step(generator.throw(value));
		} catch (e) {
		reject(e);
		}
		};
		var step = (x) => x.done ? resolve(x.value) : Promise.resolve(x.value).then(fulfilled, rejected);
		step((generator = generator.apply(__this, __arguments)).next());
		});
		};
		var __forAwait = (obj, it, method) => (it = obj[__knownSymbol("asyncIterator")]) ? it.call(obj) : (obj = obj[__knownSymbol("iterator")](), it = {}, method = (key, fn) => (fn = obj[key]) && (it[key] = (arg) => new Promise((yes, no, done) => (arg = fn.call(obj, arg), done = arg.done, Promise.resolve(arg.value).then((value) => yes({ value, done }), no)))), method("next"), method("return"), it);

		// src/utils/fetcher.ts
		var WrappedError = class extends Error {
		constructor(message, cause, statusCode) {
		super(message);
		this.name = "WrappedError";
		this.cause = cause;
		this.statusCode = statusCode;
		}
		};
		var wrappedFetch = (input, options) => __async(void 0, null, function* () {
		const response = yield fetch(input, options);
		if (!response.ok) {
		const { status, statusText } = response;
		const text = yield response.text();
		throw new WrappedError(
		`HTTP ${status} ${statusText}: ${input}`,
		new Error(text),
		status
		);
		}
		return response;
		});

		// src/form.ts
		function buildForm(parsed) {
		var _a, _b;
		const formData = new FormData();
		formData.append("format", parsed.format);
		formData.append("model", parsed.model);
		if (parsed.model !== "ocr" && parsed.model !== "crawler") {
		if (parsed.image !== void 0) {
		formData.append("image", String(parsed.image));
		}
		if (parsed.table !== void 0) {
		formData.append("table", String(parsed.table));
		}
		}
		if (parsed.model === "ocr") {
		if ((_a = parsed.ocrLanguage) == null ? void 0 : _a.length) {
		formData.append("ocrLanguage", parsed.ocrLanguage.join(","));
		}
		if (parsed.ocrPreset) {
		formData.append("ocrPreset", parsed.ocrPreset);
		}
		}
		if (parsed.model === "crawler") {
		formData.append("url", (_b = parsed.url) != null ? _b : "");
		if (parsed.maxDepth !== void 0) {
		formData.append("maxDepth", String(parsed.maxDepth));
		}
		if (parsed.maxExecutions !== void 0) {
		formData.append("maxExecutions", String(parsed.maxExecutions));
		}
		if (parsed.strategy) {
		formData.append("strategy", parsed.strategy);
		}
		if (parsed.traversalScope) {
		formData.append("traversalScope", parsed.traversalScope);
		}
		} else {
		if (parsed.files) {
		for (const file of parsed.files) {
		formData.append("files", file.contents, file.fileName);
		}
		}
		}
		return formData;
		}

		// src/validator/index.ts
		import * as fsapi from "node:fs";
		import { basename } from "node:path";
		import * as fs from "node:fs/promises";

		// src/config/hardcoded.ts
		var FALLBACK_API_URL = "https://anyparserapi.com";
		var OCR_PRESETS = Object.freeze({
		DOCUMENT: "document",
		HANDWRITING: "handwriting",
		SCAN: "scan",
		RECEIPT: "receipt",
		MAGAZINE: "magazine",
		INVOICE: "invoice",
		BUSINESS_CARD: "business-card",
		PASSPORT: "passport",
		DRIVER_LICENSE: "driver-license"
		});
		var OCR_LANGUAGES = Object.freeze({
		AFRIKAANS: "afr",
		AMHARIC: "amh",
		ARABIC: "ara",
		ASSAMESE: "asm",
		AZERBAIJANI: "aze",
		AZERBAIJANI_CYRILLIC: "aze_cyrl",
		BELARUSIAN: "bel",
		BENGALI: "ben",
		TIBETAN: "bod",
		BOSNIAN: "bos",
		BRETON: "bre",
		BULGARIAN: "bul",
		CATALAN: "cat",
		CEBUANO: "ceb",
		CZECH: "ces",
		SIMPLIFIED_CHINESE: "chi_sim",
		SIMPLIFIED_CHINESE_VERTICAL: "chi_sim_vert",
		TRADITIONAL_CHINESE: "chi_tra",
		TRADITIONAL_CHINESE_VERTICAL: "chi_tra_vert",
		CHEROKEE: "chr",
		CORSICAN: "cos",
		WELSH: "cym",
		DANISH: "dan",
		DANISH_FRAKTUR: "dan_frak",
		GERMAN: "deu",
		GERMAN_FRAKTUR: "deu_frak",
		GERMAN_LATIN: "deu_latf",
		DIVESH: "div",
		DZONGKHA: "dzo",
		GREEK: "ell",
		ENGLISH: "eng",
		MIDDLE_ENGLISH: "enm",
		ESPERANTO: "epo",
		EQUATORIAL_GUINEAN: "equ",
		ESTONIAN: "est",
		BASQUE: "eus",
		FAROESE: "fao",
		PERSIAN: "fas",
		FILIPINO: "fil",
		FINNISH: "fin",
		FRENCH: "fra",
		OLD_FRENCH: "frm",
		FRISIAN: "fry",
		SCOTTISH_GAELIC: "gla",
		IRISH: "gle",
		GALICIAN: "glg",
		ANCIENT_GREEK: "grc",
		GUJARATI: "guj",
		HAITIAN_CREOLE: "hat",
		HEBREW: "heb",
		HINDI: "hin",
		CROATIAN: "hrv",
		HUNGARIAN: "hun",
		ARMENIAN: "hye",
		IGBO: "iku",
		INDONESIAN: "ind",
		ICELANDIC: "isl",
		ITALIAN: "ita",
		OLD_ITALIAN: "ita_old",
		JAVANESE: "jav",
		JAPANESE: "jpn",
		JAPANESE_VERTICAL: "jpn_vert",
		KANNADA: "kan",
		GEORGIAN: "kat",
		OLD_GEORGIAN: "kat_old",
		KAZAKH: "kaz",
		KHMER: "khm",
		KIRGHIZ: "kir",
		KURDISH: "kmr",
		KOREAN: "kor",
		KOREAN_VERTICAL: "kor_vert",
		LAO: "lao",
		LATIN: "lat",
		LATVIAN: "lav",
		LITHUANIAN: "lit",
		LUXEMBOURGISH: "ltz",
		MALAYALAM: "mal",
		MARATHI: "mar",
		MACEDONIAN: "mkd",
		MALTESE: "mlt",
		MONGOLIAN: "mon",
		MAORI: "mri",
		MALAY: "msa",
		MYANMAR: "mya",
		NEPALI: "nep",
		DUTCH: "nld",
		NORWEGIAN: "nor",
		OCCITAN: "oci",
		ODISHA: "ori",
		OSD: "osd",
		PUNJABI: "pan",
		POLISH: "pol",
		PORTUGUESE: "por",
		PASHTO: "pus",
		QUECHUA: "que",
		ROMANIAN: "ron",
		RUSSIAN: "rus",
		SANSKRIT: "san",
		SINHALA: "sin",
		SLOVAK: "slk",
		SLOVAK_FRAKTUR: "slk_frak",
		SLOVENIAN: "slv",
		SINDHI: "snd",
		SPANISH: "spa",
		OLD_SPANISH: "spa_old",
		ALBANIAN: "sqi",
		SERBIAN: "srp",
		SERBIAN_LATIN: "srp_latn",
		SUNDIANESE: "sun",
		SWAHILI: "swa",
		SWEDISH: "swe",
		SYRIAC: "syr",
		TAMIL: "tam",
		TATAR: "tat",
		TELUGU: "tel",
		TAJIK: "tgk",
		TAGALOG: "tgl",
		THAI: "tha",
		TIGRINYA: "tir",
		TONGAN: "ton",
		TURKISH: "tur",
		UIGHUR: "uig",
		UKRAINIAN: "ukr",
		URDU: "urd",
		UZBEK: "uzb",
		UZBEK_CYRILLIC: "uzb_cyrl",
		VIETNAMESE: "vie",
		YIDDISH: "yid",
		YORUBA: "yor"
		});

		// src/utils/nullable.ts
		var isNullOrUndefined = (suspect) => {
		if (typeof suspect === "undefined" \|\| suspect === null) {
		return true;
		}
		if (typeof suspect === "string") {
		return suspect.trim() === "";
		}
		return false;
		};
		var isValidObject = (suspect) => {
		return typeof suspect === "object" && suspect !== void 0 && suspect !== null;
		};

		// src/utils/env.ts
		var env = (key, fallback = "") => {
		const value = process.env[key];
		if (!isNullOrUndefined(value)) {
		return value;
		}
		if (!isNullOrUndefined(fallback)) {
		return fallback;
		}
		return "";
		};

		// src/options.default.ts
		var getApiUrl = () => {
		const value = env("ANYPARSER_API_URL", FALLBACK_API_URL);
		try {
		return new URL(value);
		} catch (e) {
		console.error("Invalid API URL %s", value);
		}
		console.debug("Defaulting to %s", FALLBACK_API_URL);
		return new URL(FALLBACK_API_URL);
		};
		var defaultOptions = {
		apiUrl: getApiUrl(),
		apiKey: env("ANYPARSER_API_KEY"),
		format: "json",
		model: "text",
		image: true,
		table: true
		};

		// src/options.ts
		function validateApiKey(apiKey) {
		if (!apiKey) {
		throw new Error("API key is required");
		}
		if (typeof apiKey !== "string" \|\| apiKey.trim().length === 0) {
		throw new Error("API key must be a non-empty string");
		}
		}
		function buildOptions(options) {
		const mergedOptions = __spreadValues(__spreadValues({}, defaultOptions), options);
		validateApiKey(mergedOptions.apiKey);
		if (!mergedOptions.apiUrl) {
		throw new Error("API URL is required");
		}
		const parsedOptions = {
		apiUrl: mergedOptions.apiUrl,
		apiKey: mergedOptions.apiKey,
		format: mergedOptions.format \|\| "json",
		model: mergedOptions.model \|\| "text",
		encoding: mergedOptions.encoding \|\| "utf-8",
		image: mergedOptions.image,
		table: mergedOptions.table,
		ocrLanguage: mergedOptions.ocrLanguage,
		ocrPreset: mergedOptions.ocrPreset,
		url: mergedOptions.url,
		maxDepth: mergedOptions.maxDepth,
		maxExecutions: mergedOptions.maxExecutions,
		strategy: mergedOptions.strategy,
		traversalScope: mergedOptions.traversalScope
		};
		return parsedOptions;
		}

		// src/validator/option.ts
		var validateOption = (parsed) => {
		if (isNullOrUndefined(parsed.apiUrl)) {
		throw new Error("API URL is required");
		}
		if (!isNullOrUndefined(parsed.ocrLanguage)) {
		parsed.ocrLanguage.forEach((language) => {
		if (!Object.values(OCR_LANGUAGES).includes(language)) {
		throw new Error("Invalid OCR language");
		}
		});
		}
		if (!isNullOrUndefined(parsed.ocrPreset)) {
		if (!Object.values(OCR_PRESETS).includes(parsed.ocrPreset)) {
		throw new Error("Invalid OCR preset");
		}
		}
		};

		// src/validator/path.ts
		import { access } from "node:fs/promises";
		var validatePath = (filePaths) => __async(void 0, null, function* () {
		if (!filePaths) {
		return {
		valid: false,
		error: new Error("No files provided")
		};
		}
		const files = Array.isArray(filePaths) ? filePaths : [filePaths];
		if (files.length === 0) {
		return {
		valid: false,
		error: new Error("No files provided")
		};
		}
		for (const filePath of files) {
		try {
		yield access(filePath);
		} catch (error) {
		return {
		valid: false,
		error
		};
		}
		}
		return {
		valid: true,
		files
		};
		});

		// src/validator/crawler.ts
		var getURLToCrawl = (filePaths) => {
		if (Array.isArray(filePaths)) {
		const filePath = filePaths.find((x) => !isNullOrUndefined(x));
		if (!isNullOrUndefined(filePath)) {
		return new URL(filePath).toString();
		}
		}
		return new URL(filePaths).toString();
		};

		// src/validator/index.ts
		function checkFileAccess(filePath) {
		return __async(this, null, function* () {
		try {
		yield fs.access(filePath);
		} catch (error) {
		if (error instanceof Error && "code" in error && error.code === "ENOENT") {
		throw new Error(`File ${filePath} was not found or was removed`);
		}
		throw error;
		}
		try {
		const fileHandle = yield fs.open(filePath, "r");
		yield fileHandle.close();
		} catch (error) {
		if (error instanceof Error && "code" in error && (error.code === "EBUSY" \|\| error.code === "ELOCK")) {
		throw new Error(`File ${filePath} is locked by another process`);
		}
		throw error;
		}
		});
		}
		function validateAndParse(filePaths, options) {
		return __async(this, null, function* () {
		const parsed = buildOptions(options);
		validateOption(parsed);
		if (!["json", "markdown", "html"].includes(parsed.format)) {
		throw new Error(`Unsupported format: ${parsed.format}`);
		}
		const isCrawler = (options == null ? void 0 : options.model) === "crawler";
		const result = isCrawler ? { valid: true, files: [getURLToCrawl(filePaths)] } : yield validatePath(filePaths);
		if (result.valid === false) {
		throw result.error;
		}
		const parsedOption = {
		apiUrl: parsed.apiUrl,
		apiKey: parsed.apiKey,
		format: parsed.format,
		model: parsed.model,
		image: parsed.image,
		table: parsed.table,
		ocrLanguage: parsed.ocrLanguage,
		ocrPreset: parsed.ocrPreset,
		url: parsed.url,
		maxDepth: parsed.maxDepth,
		maxExecutions: parsed.maxExecutions,
		strategy: parsed.strategy,
		traversalScope: parsed.traversalScope,
		encoding: parsed.encoding
		};
		if (isCrawler) {
		parsedOption.url = result.files[0];
		} else {
		const processed = [];
		for (const filePath of result.files) {
		yield checkFileAccess(filePath);
		const fileStream = fsapi.createReadStream(filePath);
		const chunks = [];
		try {
		for (var iter = __forAwait(fileStream), more, temp, error; more = !(temp = yield iter.next()).done; more = false) {
		const chunk = temp.value;
		chunks.push(chunk);
		}
		} catch (temp) {
		error = [temp];
		} finally {
		try {
		more && (temp = iter.return) && (yield temp.call(iter));
		} finally {
		if (error)
		throw error[0];
		}
		}
		const buffer = Buffer.concat(chunks);
		const contents = new File([buffer], basename(filePath), {
		type: "application/octet-stream"
		});
		processed.push({
		fileName: basename(filePath),
		contents
		});
		}
		parsedOption.files = processed;
		}
		return parsedOption;
		});
		}

		// src/utils/casing.ts
		var underscoreToCamel = (x) => x.replace(/_+(.)/g, (_, c) => c.toUpperCase());

		// src/utils/camel-case.ts
		var transformToCamel = (item) => {
		if (item instanceof Date \|\| item instanceof RegExp \|\| item instanceof URL) {
		return item;
		}
		if (typeof item === "function") {
		return item;
		}
		if (item === null \|\| item === void 0) {
		return item;
		}
		if (Array.isArray(item)) {
		return item.map((el) => transformToCamel(el));
		}
		if (item instanceof Map) {
		const transformedMap = /* @__PURE__ */ new Map();
		item.forEach((value, key) => {
		transformedMap.set(transformToCamel(key), transformToCamel(value));
		});
		return transformedMap;
		}
		if (item instanceof Set) {
		const transformedSet = /* @__PURE__ */ new Set();
		item.forEach((value) => {
		transformedSet.add(transformToCamel(value));
		});
		return transformedSet;
		}
		if (isValidObject(item)) {
		return Object.keys(item).reduce((acc, key) => {
		const camelKey = underscoreToCamel(key);
		acc[camelKey] = transformToCamel(item[key]);
		return acc;
		}, {});
		}
		return item;
		};

		// src/parser.ts
		var Anyparser = class {
		/**
		* Initialize the parser with optional configuration.
		* @param options - Configuration options for the parser
		*/
		constructor(options) {
		this.options = options;
		}
		/**
		* Parse files using the Anyparser API.
		* @param filePathsOrUrl - A single file path or list of file paths to parse, or a start URL for crawling
		* @returns List of parsed file results if format is JSON, or raw text content if format is text/markdown
		* @throws Error if the API request fails
		*/
		parse(filePathsOrUrl) {
		return __async(this, null, function* () {
		const parsed = yield validateAndParse(filePathsOrUrl, this.options);
		const { apiUrl, apiKey } = parsed;
		const formData = buildForm(parsed);
		const fetchOptions = {
		method: "POST",
		body: formData,
		headers: __spreadValues({}, apiKey ? { Authorization: `Bearer ${apiKey}` } : {})
		};
		const url = new URL("/parse/v1", apiUrl);
		const response = yield wrappedFetch(url, fetchOptions);
		switch (parsed.format) {
		case "json":
		return transformToCamel(yield response.json());
		case "markdown":
		case "html":
		return yield response.text();
		default:
		throw new Error(`Unsupported format: ${parsed.format}`);
		}
		});
		}
		};
		export {
		Anyparser,
		OCR_LANGUAGES,
		OCR_PRESETS
		};
		//# sourceMappingURL=index.js.map

+1

dist/index.js.map

{"version":3,"sources":["../src/utils/fetcher.ts","../src/form.ts","../src/validator/index.ts","../src/config/hardcoded.ts","../src/utils/nullable.ts","../src/utils/env.ts","../src/options.default.ts","../src/options.ts","../src/validator/option.ts","../src/validator/path.ts","../src/validator/crawler.ts","../src/utils/casing.ts","../src/utils/camel-case.ts","../src/parser.ts"],"sourcesContent":["export class WrappedError extends Error {\n override cause: Error\n statusCode: number\n\n constructor (message: string, cause: Error, statusCode: number) {\n super(message)\n this.name = 'WrappedError'\n this.cause = cause\n this.statusCode = statusCode\n }\n}\n\nexport const wrappedFetch = async (input: string | URL | Request, options?: RequestInit) => {\n const response = await fetch(input, options)\n\n if (!response.ok) {\n const { status, statusText } = response\n const text = await response.text()\n\n throw new WrappedError(\n `HTTP ${status} ${statusText}: ${input}`,\n new Error(text),\n status\n )\n }\n\n return response\n}\n","/* eslint-disable complexity */\n\n/**\n * Form data builder module for creating multipart form data for API requests.\n * @module form\n */\n\nimport type { AnyparserParsedOption } from './anyparser.d.ts'\n\n/**\n * Builds multipart form data from parsed options.\n * @param parsed - Validated parser options\n * @returns Form data for API request\n */\nexport function buildForm (parsed: AnyparserParsedOption): FormData {\n const formData = new FormData()\n\n // Add regular form fields\n formData.append('format', parsed.format)\n formData.append('model', parsed.model)\n\n // Only add image and table fields if not using OCR model or crawler model\n if (parsed.model !== 'ocr' && parsed.model !== 'crawler') {\n if (parsed.image !== undefined) {\n formData.append('image', String(parsed.image))\n }\n\n if (parsed.table !== undefined) {\n formData.append('table', String(parsed.table))\n }\n }\n\n // Add OCR-specific fields\n if (parsed.model === 'ocr') {\n if (parsed.ocrLanguage?.length) {\n formData.append('ocrLanguage', parsed.ocrLanguage.join(','))\n }\n\n if (parsed.ocrPreset) {\n formData.append('ocrPreset', parsed.ocrPreset)\n }\n }\n\n // Add crawler-specific fields\n if (parsed.model === 'crawler') {\n formData.append('url', parsed.url ?? '')\n\n if (parsed.maxDepth !== undefined) {\n formData.append('maxDepth', String(parsed.maxDepth))\n }\n\n if (parsed.maxExecutions !== undefined) {\n formData.append('maxExecutions', String(parsed.maxExecutions))\n }\n\n if (parsed.strategy) {\n formData.append('strategy', parsed.strategy)\n }\n\n if (parsed.traversalScope) {\n formData.append('traversalScope', parsed.traversalScope)\n }\n } else {\n // Add files to the form for non-crawler models\n if (parsed.files) {\n for (const file of parsed.files) {\n formData.append('files', file.contents, file.fileName)\n }\n }\n }\n\n return formData\n}\n","/**\n * Option validation and parsing module\n * @module validator\n */\nimport * as fsapi from 'node:fs'\nimport { basename } from 'node:path'\nimport * as fs from 'node:fs/promises'\nimport type { AnyparserOption, AnyparserParsedOption } from '../anyparser.d.ts'\nimport { buildOptions } from '../options.ts'\nimport { validateOption } from './option.ts'\nimport { validatePath } from './path.ts'\nimport type { ValidPathValidationResult } from './validation.d.ts'\nimport { getURLToCrawl } from './crawler.ts'\n\n/**\n * Check if a file is accessible and not locked\n * @param filePath - Path to the file to check\n * @returns Promise that resolves when file is accessible, rejects if file is locked or not found\n */\nasync function checkFileAccess (filePath: string): Promise<void> {\n try {\n await fs.access(filePath)\n } catch (error) {\n if (error instanceof Error && 'code' in error && error.code === 'ENOENT') {\n throw new Error(`File ${filePath} was not found or was removed`)\n }\n\n throw error\n }\n\n try {\n // Try to open file for reading to check if it's locked\n const fileHandle = await fs.open(filePath, 'r')\n await fileHandle.close()\n } catch (error) {\n if (error instanceof Error && 'code' in error && (error.code === 'EBUSY' || error.code === 'ELOCK')) {\n throw new Error(`File ${filePath} is locked by another process`)\n }\n\n throw error\n }\n}\n\n/**\n * Validates options and processes input files\n * @param filePaths - Files to process\n * @param options - Parser options\n * @returns Processed options and files\n */\nexport async function validateAndParse (\n filePaths: string | string[],\n options?: AnyparserOption\n): Promise<AnyparserParsedOption> {\n const parsed = buildOptions(options)\n validateOption(parsed)\n\n if (!['json', 'markdown', 'html'].includes(parsed.format)) {\n throw new Error(`Unsupported format: ${parsed.format}`)\n }\n\n // Determine if we're in crawler mode\n const isCrawler = options?.model === 'crawler'\n\n // Validate URL for crawler mode, otherwise validate file paths\n const result = isCrawler ?\n { valid: true, files: [getURLToCrawl(filePaths)] } as ValidPathValidationResult :\n await validatePath(filePaths)\n\n if (result.valid === false) {\n throw result.error\n }\n\n const parsedOption: AnyparserParsedOption = {\n apiUrl: parsed.apiUrl,\n apiKey: parsed.apiKey,\n format: parsed.format,\n model: parsed.model,\n image: parsed.image,\n table: parsed.table,\n ocrLanguage: parsed.ocrLanguage,\n ocrPreset: parsed.ocrPreset,\n url: parsed.url,\n maxDepth: parsed.maxDepth,\n maxExecutions: parsed.maxExecutions,\n strategy: parsed.strategy,\n traversalScope: parsed.traversalScope,\n encoding: parsed.encoding\n }\n\n // Handle crawler mode\n if (isCrawler) {\n parsedOption.url = result.files[0]\n } else {\n // Process files for non-crawler mode\n const processed = []\n\n for (const filePath of result.files) {\n await checkFileAccess(filePath)\n\n const fileStream = fsapi.createReadStream(filePath)\n const chunks = []\n\n for await (const chunk of fileStream) {\n chunks.push(chunk)\n }\n\n const buffer = Buffer.concat(chunks)\n const contents = new File([buffer], basename(filePath), {\n type: 'application/octet-stream'\n })\n\n processed.push({\n fileName: basename(filePath),\n contents\n })\n }\n\n parsedOption.files = processed\n }\n\n return parsedOption\n}\n","/**\n * Hardcoded configuration constants\n * @module config/hardcoded\n */\n\nexport const FALLBACK_API_URL = 'https://anyparserapi.com'\n\nexport const OCR_PRESETS = Object.freeze({\n DOCUMENT: 'document',\n HANDWRITING: 'handwriting',\n SCAN: 'scan',\n RECEIPT: 'receipt',\n MAGAZINE: 'magazine',\n INVOICE: 'invoice',\n BUSINESS_CARD: 'business-card',\n PASSPORT: 'passport',\n DRIVER_LICENSE: 'driver-license'\n} as const)\n\nexport const OCR_LANGUAGES = Object.freeze({\n AFRIKAANS: 'afr',\n AMHARIC: 'amh',\n ARABIC: 'ara',\n ASSAMESE: 'asm',\n AZERBAIJANI: 'aze',\n AZERBAIJANI_CYRILLIC: 'aze_cyrl',\n BELARUSIAN: 'bel',\n BENGALI: 'ben',\n TIBETAN: 'bod',\n BOSNIAN: 'bos',\n BRETON: 'bre',\n BULGARIAN: 'bul',\n CATALAN: 'cat',\n CEBUANO: 'ceb',\n CZECH: 'ces',\n SIMPLIFIED_CHINESE: 'chi_sim',\n SIMPLIFIED_CHINESE_VERTICAL: 'chi_sim_vert',\n TRADITIONAL_CHINESE: 'chi_tra',\n TRADITIONAL_CHINESE_VERTICAL: 'chi_tra_vert',\n CHEROKEE: 'chr',\n CORSICAN: 'cos',\n WELSH: 'cym',\n DANISH: 'dan',\n DANISH_FRAKTUR: 'dan_frak',\n GERMAN: 'deu',\n GERMAN_FRAKTUR: 'deu_frak',\n GERMAN_LATIN: 'deu_latf',\n DIVESH: 'div',\n DZONGKHA: 'dzo',\n GREEK: 'ell',\n ENGLISH: 'eng',\n MIDDLE_ENGLISH: 'enm',\n ESPERANTO: 'epo',\n EQUATORIAL_GUINEAN: 'equ',\n ESTONIAN: 'est',\n BASQUE: 'eus',\n FAROESE: 'fao',\n PERSIAN: 'fas',\n FILIPINO: 'fil',\n FINNISH: 'fin',\n FRENCH: 'fra',\n OLD_FRENCH: 'frm',\n FRISIAN: 'fry',\n SCOTTISH_GAELIC: 'gla',\n IRISH: 'gle',\n GALICIAN: 'glg',\n ANCIENT_GREEK: 'grc',\n GUJARATI: 'guj',\n HAITIAN_CREOLE: 'hat',\n HEBREW: 'heb',\n HINDI: 'hin',\n CROATIAN: 'hrv',\n HUNGARIAN: 'hun',\n ARMENIAN: 'hye',\n IGBO: 'iku',\n INDONESIAN: 'ind',\n ICELANDIC: 'isl',\n ITALIAN: 'ita',\n OLD_ITALIAN: 'ita_old',\n JAVANESE: 'jav',\n JAPANESE: 'jpn',\n JAPANESE_VERTICAL: 'jpn_vert',\n KANNADA: 'kan',\n GEORGIAN: 'kat',\n OLD_GEORGIAN: 'kat_old',\n KAZAKH: 'kaz',\n KHMER: 'khm',\n KIRGHIZ: 'kir',\n KURDISH: 'kmr',\n KOREAN: 'kor',\n KOREAN_VERTICAL: 'kor_vert',\n LAO: 'lao',\n LATIN: 'lat',\n LATVIAN: 'lav',\n LITHUANIAN: 'lit',\n LUXEMBOURGISH: 'ltz',\n MALAYALAM: 'mal',\n MARATHI: 'mar',\n MACEDONIAN: 'mkd',\n MALTESE: 'mlt',\n MONGOLIAN: 'mon',\n MAORI: 'mri',\n MALAY: 'msa',\n MYANMAR: 'mya',\n NEPALI: 'nep',\n DUTCH: 'nld',\n NORWEGIAN: 'nor',\n OCCITAN: 'oci',\n ODISHA: 'ori',\n OSD: 'osd',\n PUNJABI: 'pan',\n POLISH: 'pol',\n PORTUGUESE: 'por',\n PASHTO: 'pus',\n QUECHUA: 'que',\n ROMANIAN: 'ron',\n RUSSIAN: 'rus',\n SANSKRIT: 'san',\n SINHALA: 'sin',\n SLOVAK: 'slk',\n SLOVAK_FRAKTUR: 'slk_frak',\n SLOVENIAN: 'slv',\n SINDHI: 'snd',\n SPANISH: 'spa',\n OLD_SPANISH: 'spa_old',\n ALBANIAN: 'sqi',\n SERBIAN: 'srp',\n SERBIAN_LATIN: 'srp_latn',\n SUNDIANESE: 'sun',\n SWAHILI: 'swa',\n SWEDISH: 'swe',\n SYRIAC: 'syr',\n TAMIL: 'tam',\n TATAR: 'tat',\n TELUGU: 'tel',\n TAJIK: 'tgk',\n TAGALOG: 'tgl',\n THAI: 'tha',\n TIGRINYA: 'tir',\n TONGAN: 'ton',\n TURKISH: 'tur',\n UIGHUR: 'uig',\n UKRAINIAN: 'ukr',\n URDU: 'urd',\n UZBEK: 'uzb',\n UZBEK_CYRILLIC: 'uzb_cyrl',\n VIETNAMESE: 'vie',\n YIDDISH: 'yid',\n YORUBA: 'yor'\n} as const)\n\nexport type OcrPresetType = (typeof OCR_PRESETS)[keyof typeof OCR_PRESETS]\nexport type OcrLanguageType = (typeof OCR_LANGUAGES)[keyof typeof OCR_LANGUAGES]\n","export type Nullable<T> = T | null\nexport type Optional<T> = T | undefined\nexport type Nullish<T> = Nullable<T> | Optional<T>\n\nconst isNullOrUndefined = <T>(suspect: Nullish<T>): suspect is null | undefined => {\n if (typeof suspect === 'undefined' || suspect === null) {\n return true\n }\n\n if (typeof suspect === 'string') {\n return suspect.trim() === ''\n }\n\n return false\n}\n\nconst isValidObject = (suspect: unknown): suspect is object => {\n return typeof suspect === 'object' && suspect !== undefined && suspect !== null\n}\n\n// eslint-disable-next-line @typescript-eslint/no-empty-object-type\nconst isEmptyObject = (suspect: unknown): suspect is {} => {\n return isValidObject(suspect) && Object.keys(suspect).length === 0\n}\n\nconst isInvalidOrEmptyArray = <T>(suspect: Nullish<Array<T>>): suspect is null | undefined | [] => {\n if (isNullOrUndefined(suspect)) {\n return true\n }\n\n if (!Array.isArray(suspect)) {\n throw new Error('isInvalidOrEmptyArray expects an array')\n }\n\n return suspect.length === 0\n}\n\nconst isValidArrayWithMembers = <T>(suspect: Nullish<Array<T>>): suspect is Array<T> => !isInvalidOrEmptyArray(suspect)\n\nconst or = <T>(value: Nullish<T>, or: T): T => {\n if (isNullOrUndefined(value)) {\n return or\n }\n\n return value\n}\n\nexport {\n isInvalidOrEmptyArray,\n isNullOrUndefined,\n isValidArrayWithMembers,\n isValidObject,\n isEmptyObject,\n or\n}\n","import { isNullOrUndefined } from './nullable.ts'\n\nconst env = (key: string, fallback = '') => {\n const value = process.env[key]\n\n if (!isNullOrUndefined(value)) {\n return value\n }\n\n if (!isNullOrUndefined(fallback)) {\n return fallback\n }\n\n return ''\n}\n\nconst envOr = (key: string, fallback = '') => {\n const value = process.env[key] as string\n\n if (isNullOrUndefined(value)) {\n return fallback\n }\n\n return ''\n}\n\nexport { env, envOr }\n","/**\n * Configuration module for Anyparser default options\n * @module options.default\n */\n\nimport { FALLBACK_API_URL } from '@/config/hardcoded.ts'\nimport { env } from '@src/utils/env.ts'\nimport type { AnyparserOption } from '@anyparser/core'\n\n/**\n * Retrieves and validates the API URL from environment or fallback\n * @returns {URL} Valid API URL instance\n */\nexport const getApiUrl = (): URL => {\n const value = env('ANYPARSER_API_URL', FALLBACK_API_URL)\n\n try {\n return new URL(value)\n } catch {\n console.error('Invalid API URL %s', value)\n }\n\n console.debug('Defaulting to %s', FALLBACK_API_URL)\n\n return new URL(FALLBACK_API_URL)\n}\n\n/**\n * Default configuration options for Anyparser\n * @type {Option}\n */\nexport const defaultOptions: AnyparserOption = {\n apiUrl: getApiUrl(),\n apiKey: env('ANYPARSER_API_KEY'),\n format: 'json',\n model: 'text',\n image: true,\n table: true\n}\n","/**\n * Options module for Anyparser configuration and parsing.\n * @module options\n */\n\nimport type { AnyparserOption, AnyparserParsedOption } from './anyparser.d.ts'\nimport { defaultOptions } from './options.default.ts'\n\n/**\n * Validate API key format and presence\n * @param apiKey - API key to validate\n * @throws {Error} If API key is invalid or missing\n */\nexport function validateApiKey (apiKey: string | undefined): void {\n if (!apiKey) {\n throw new Error('API key is required')\n }\n\n if (typeof apiKey !== 'string' || apiKey.trim().length === 0) {\n throw new Error('API key must be a non-empty string')\n }\n}\n\n/**\n * Build final options by merging defaults with provided options.\n * @param options - User-provided options to override defaults\n * @returns Complete options with all required fields\n * @throws {Error} If required options are missing or invalid\n */\nexport function buildOptions (options?: AnyparserOption): AnyparserParsedOption {\n const mergedOptions = { ...defaultOptions, ...options }\n\n validateApiKey(mergedOptions.apiKey)\n\n if (!mergedOptions.apiUrl) {\n throw new Error('API URL is required')\n }\n\n const parsedOptions: AnyparserParsedOption = {\n apiUrl: mergedOptions.apiUrl,\n apiKey: mergedOptions.apiKey as string,\n format: mergedOptions.format || 'json',\n model: mergedOptions.model || 'text',\n encoding: mergedOptions.encoding || 'utf-8',\n image: mergedOptions.image,\n table: mergedOptions.table,\n ocrLanguage: mergedOptions.ocrLanguage,\n ocrPreset: mergedOptions.ocrPreset,\n url: mergedOptions.url,\n maxDepth: mergedOptions.maxDepth,\n maxExecutions: mergedOptions.maxExecutions,\n strategy: mergedOptions.strategy,\n traversalScope: mergedOptions.traversalScope\n }\n\n return parsedOptions\n}\n","/**\n * Validation module for options\n * @module validation\n */\n\nimport { isNullOrUndefined } from '@src/utils/nullable.ts'\nimport type { AnyparserOption } from '@anyparser/core'\nimport { OCR_LANGUAGES, OCR_PRESETS } from '../config/hardcoded.ts'\n\n/**\n * Validates parsing options configuration\n * @param {Option} parsed - Options to validate\n * @throws {Error} If validation fails\n */\nexport const validateOption = (parsed: AnyparserOption) => {\n if (isNullOrUndefined(parsed.apiUrl)) {\n throw new Error('API URL is required')\n }\n\n if (!isNullOrUndefined(parsed.ocrLanguage)) {\n parsed.ocrLanguage.forEach((language: string) => {\n if (!Object.values(OCR_LANGUAGES as Record<string, string>).includes(language)) {\n throw new Error('Invalid OCR language')\n }\n })\n }\n\n if (!isNullOrUndefined(parsed.ocrPreset)) {\n if (!Object.values(OCR_PRESETS as Record<string, string>).includes(parsed.ocrPreset)) {\n throw new Error('Invalid OCR preset')\n }\n }\n}\n","/**\n * Validation module for file paths\n * @module validation\n */\n\nimport { access } from 'node:fs/promises'\nimport type { PathValidationResult } from './validation.d.ts'\n\n/**\n * Validates file paths exist and are accessible\n * @param {string|string[]} filePaths - Single path or array of paths\n * @returns {Promise<PathValidationResult>} Validation result\n */\nexport const validatePath = async (filePaths: string | string[]): Promise<PathValidationResult> => {\n // Add input validation\n if (!filePaths) {\n return {\n valid: false,\n error: new Error('No files provided')\n }\n }\n\n const files = Array.isArray(filePaths) ? filePaths : [filePaths]\n\n if (files.length === 0) {\n return {\n valid: false,\n error: new Error('No files provided')\n }\n }\n\n for (const filePath of files) {\n try {\n await access(filePath)\n } catch (error) {\n return {\n valid: false,\n error: error as Error\n }\n }\n }\n\n return {\n valid: true,\n files\n }\n}\n","import { isNullOrUndefined } from '@src/utils/nullable.ts'\n\nexport const getURLToCrawl = (filePaths: string | string[]) => {\n if (Array.isArray(filePaths)) {\n const filePath = filePaths.find(x => !isNullOrUndefined(x))\n\n if (!isNullOrUndefined(filePath)) {\n return new URL(filePath).toString()\n }\n }\n\n return new URL(filePaths).toString()\n}\n","export const toUnderscore = (x = '') => x.trim().split(/\\.?(?=[A-Z])/).join('_').toLowerCase()\n\nexport const hyphenToCamel = (x: string) => x.replace(/-+(.)/g, (_, c) => c.toUpperCase())\n\nexport const underscoreToCamel = (x: string) => x.replace(/_+(.)/g, (_, c) => c.toUpperCase())\n\nexport const camelToTitle = (c: string) => c\n// Insert space before capital letters, but not if they're part of an acronym\n .replace(/([^A-Z])([A-Z])/g, '$1$2')\n// Capitalize first letter\n .replace(/^./, match => match.toUpperCase())\n// Remove any extra spaces\n .replace(/\\s+/g, '')\n","import { underscoreToCamel } from './casing.ts'\nimport { isValidObject } from './nullable.ts'\n\nexport const transformToCamel = <T>(item: unknown): T => {\n if (item instanceof Date || item instanceof RegExp || item instanceof URL) {\n return item as T\n }\n\n // If the item is a Function, return it as-is\n if (typeof item === 'function') {\n return item as T\n }\n\n // If the item is null or undefined, return it as-is\n if (item === null || item === undefined) {\n return item as T\n }\n\n // If the item is an array, recursively transform each element\n if (Array.isArray(item)) {\n return item.map(el => transformToCamel(el)) as unknown as T\n }\n\n // If the item is a Map or Set, do not modify its structure\n if (item instanceof Map) {\n const transformedMap = new Map()\n item.forEach((value, key) => {\n transformedMap.set(transformToCamel(key), transformToCamel(value))\n })\n\n return transformedMap as unknown as T\n }\n\n if (item instanceof Set) {\n const transformedSet = new Set()\n item.forEach((value) => {\n transformedSet.add(transformToCamel(value))\n })\n\n return transformedSet as unknown as T\n }\n\n // If the item is a plain object, recursively transform its keys to camelCase\n if (isValidObject(item)) {\n return Object.keys(item).reduce((acc: any, key: string) => { // eslint-disable-line @typescript-eslint/no-explicit-any\n const camelKey = underscoreToCamel(key)\n acc[camelKey] = transformToCamel((item as Record<string, unknown>)[key])\n\n return acc\n }, {} as T)\n }\n\n // If it's a primitive type (string, number, boolean), return it as-is\n return item as T\n}\n","import { wrappedFetch } from '@src/utils/fetcher.ts'\nimport type { AnyparserOption, Result } from '@anyparser/core'\nimport { buildForm } from './form.ts'\nimport { validateAndParse } from './validator/index.ts'\nimport { transformToCamel } from '@src/utils/camel-case.ts'\n\n/**\n * Main class for parsing items using the Anyparser API.\n */\nexport class Anyparser {\n public options?: AnyparserOption\n\n /**\n * Initialize the parser with optional configuration.\n * @param options - Configuration options for the parser\n */\n constructor (options?: AnyparserOption) {\n this.options = options\n }\n\n /**\n * Parse files using the Anyparser API.\n * @param filePathsOrUrl - A single file path or list of file paths to parse, or a start URL for crawling\n * @returns List of parsed file results if format is JSON, or raw text content if format is text/markdown\n * @throws Error if the API request fails\n */\n async parse (filePathsOrUrl: string | string[]): Promise<Result> {\n const parsed = await validateAndParse(filePathsOrUrl, this.options)\n const { apiUrl, apiKey } = parsed\n\n const formData = buildForm(parsed)\n\n const fetchOptions = {\n method: 'POST',\n body: formData,\n headers: {\n ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {})\n }\n }\n\n const url = new URL('/parse/v1', apiUrl)\n\n const response = await wrappedFetch(url, fetchOptions)\n\n switch (parsed.format) {\n case 'json':\n return transformToCamel<Result>(await response.json())\n case 'markdown':\n case 'html':\n return await response.text()\n default:\n throw new Error(`Unsupported format: ${parsed.format}`)\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAO,IAAM,eAAN,cAA2B,MAAM;AAAA,EAItC,YAAa,SAAiB,OAAc,YAAoB;AAC9D,UAAM,OAAO;AACb,SAAK,OAAO;AACZ,SAAK,QAAQ;AACb,SAAK,aAAa;AAAA,EACpB;AACF;AAEO,IAAM,eAAe,CAAO,OAA+B,YAA0B;AAC1F,QAAM,WAAW,MAAM,MAAM,OAAO,OAAO;AAE3C,MAAI,CAAC,SAAS,IAAI;AAChB,UAAM,EAAE,QAAQ,WAAW,IAAI;AAC/B,UAAM,OAAO,MAAM,SAAS,KAAK;AAEjC,UAAM,IAAI;AAAA,MACR,QAAQ,MAAM,IAAI,UAAU,KAAK,KAAK;AAAA,MACtC,IAAI,MAAM,IAAI;AAAA,MACd;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;;;ACbO,SAAS,UAAW,QAAyC;AAdpE;AAeE,QAAM,WAAW,IAAI,SAAS;AAG9B,WAAS,OAAO,UAAU,OAAO,MAAM;AACvC,WAAS,OAAO,SAAS,OAAO,KAAK;AAGrC,MAAI,OAAO,UAAU,SAAS,OAAO,UAAU,WAAW;AACxD,QAAI,OAAO,UAAU,QAAW;AAC9B,eAAS,OAAO,SAAS,OAAO,OAAO,KAAK,CAAC;AAAA,IAC/C;AAEA,QAAI,OAAO,UAAU,QAAW;AAC9B,eAAS,OAAO,SAAS,OAAO,OAAO,KAAK,CAAC;AAAA,IAC/C;AAAA,EACF;AAGA,MAAI,OAAO,UAAU,OAAO;AAC1B,SAAI,YAAO,gBAAP,mBAAoB,QAAQ;AAC9B,eAAS,OAAO,eAAe,OAAO,YAAY,KAAK,GAAG,CAAC;AAAA,IAC7D;AAEA,QAAI,OAAO,WAAW;AACpB,eAAS,OAAO,aAAa,OAAO,SAAS;AAAA,IAC/C;AAAA,EACF;AAGA,MAAI,OAAO,UAAU,WAAW;AAC9B,aAAS,OAAO,QAAO,YAAO,QAAP,YAAc,EAAE;AAEvC,QAAI,OAAO,aAAa,QAAW;AACjC,eAAS,OAAO,YAAY,OAAO,OAAO,QAAQ,CAAC;AAAA,IACrD;AAEA,QAAI,OAAO,kBAAkB,QAAW;AACtC,eAAS,OAAO,iBAAiB,OAAO,OAAO,aAAa,CAAC;AAAA,IAC/D;AAEA,QAAI,OAAO,UAAU;AACnB,eAAS,OAAO,YAAY,OAAO,QAAQ;AAAA,IAC7C;AAEA,QAAI,OAAO,gBAAgB;AACzB,eAAS,OAAO,kBAAkB,OAAO,cAAc;AAAA,IACzD;AAAA,EACF,OAAO;AAEL,QAAI,OAAO,OAAO;AAChB,iBAAW,QAAQ,OAAO,OAAO;AAC/B,iBAAS,OAAO,SAAS,KAAK,UAAU,KAAK,QAAQ;AAAA,MACvD;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;;;ACpEA,YAAY,WAAW;AACvB,SAAS,gBAAgB;AACzB,YAAY,QAAQ;;;ACDb,IAAM,mBAAmB;AAEzB,IAAM,cAAc,OAAO,OAAO;AAAA,EACvC,UAAU;AAAA,EACV,aAAa;AAAA,EACb,MAAM;AAAA,EACN,SAAS;AAAA,EACT,UAAU;AAAA,EACV,SAAS;AAAA,EACT,eAAe;AAAA,EACf,UAAU;AAAA,EACV,gBAAgB;AAClB,CAAU;AAEH,IAAM,gBAAgB,OAAO,OAAO;AAAA,EACzC,WAAW;AAAA,EACX,SAAS;AAAA,EACT,QAAQ;AAAA,EACR,UAAU;AAAA,EACV,aAAa;AAAA,EACb,sBAAsB;AAAA,EACtB,YAAY;AAAA,EACZ,SAAS;AAAA,EACT,SAAS;AAAA,EACT,SAAS;AAAA,EACT,QAAQ;AAAA,EACR,WAAW;AAAA,EACX,SAAS;AAAA,EACT,SAAS;AAAA,EACT,OAAO;AAAA,EACP,oBAAoB;AAAA,EACpB,6BAA6B;AAAA,EAC7B,qBAAqB;AAAA,EACrB,8BAA8B;AAAA,EAC9B,UAAU;AAAA,EACV,UAAU;AAAA,EACV,OAAO;AAAA,EACP,QAAQ;AAAA,EACR,gBAAgB;AAAA,EAChB,QAAQ;AAAA,EACR,gBAAgB;AAAA,EAChB,cAAc;AAAA,EACd,QAAQ;AAAA,EACR,UAAU;AAAA,EACV,OAAO;AAAA,EACP,SAAS;AAAA,EACT,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,oBAAoB;AAAA,EACpB,UAAU;AAAA,EACV,QAAQ;AAAA,EACR,SAAS;AAAA,EACT,SAAS;AAAA,EACT,UAAU;AAAA,EACV,SAAS;AAAA,EACT,QAAQ;AAAA,EACR,YAAY;AAAA,EACZ,SAAS;AAAA,EACT,iBAAiB;AAAA,EACjB,OAAO;AAAA,EACP,UAAU;AAAA,EACV,eAAe;AAAA,EACf,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,QAAQ;AAAA,EACR,OAAO;AAAA,EACP,UAAU;AAAA,EACV,WAAW;AAAA,EACX,UAAU;AAAA,EACV,MAAM;AAAA,EACN,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,SAAS;AAAA,EACT,aAAa;AAAA,EACb,UAAU;AAAA,EACV,UAAU;AAAA,EACV,mBAAmB;AAAA,EACnB,SAAS;AAAA,EACT,UAAU;AAAA,EACV,cAAc;AAAA,EACd,QAAQ;AAAA,EACR,OAAO;AAAA,EACP,SAAS;AAAA,EACT,SAAS;AAAA,EACT,QAAQ;AAAA,EACR,iBAAiB;AAAA,EACjB,KAAK;AAAA,EACL,OAAO;AAAA,EACP,SAAS;AAAA,EACT,YAAY;AAAA,EACZ,eAAe;AAAA,EACf,WAAW;AAAA,EACX,SAAS;AAAA,EACT,YAAY;AAAA,EACZ,SAAS;AAAA,EACT,WAAW;AAAA,EACX,OAAO;AAAA,EACP,OAAO;AAAA,EACP,SAAS;AAAA,EACT,QAAQ;AAAA,EACR,OAAO;AAAA,EACP,WAAW;AAAA,EACX,SAAS;AAAA,EACT,QAAQ;AAAA,EACR,KAAK;AAAA,EACL,SAAS;AAAA,EACT,QAAQ;AAAA,EACR,YAAY;AAAA,EACZ,QAAQ;AAAA,EACR,SAAS;AAAA,EACT,UAAU;AAAA,EACV,SAAS;AAAA,EACT,UAAU;AAAA,EACV,SAAS;AAAA,EACT,QAAQ;AAAA,EACR,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,QAAQ;AAAA,EACR,SAAS;AAAA,EACT,aAAa;AAAA,EACb,UAAU;AAAA,EACV,SAAS;AAAA,EACT,eAAe;AAAA,EACf,YAAY;AAAA,EACZ,SAAS;AAAA,EACT,SAAS;AAAA,EACT,QAAQ;AAAA,EACR,OAAO;AAAA,EACP,OAAO;AAAA,EACP,QAAQ;AAAA,EACR,OAAO;AAAA,EACP,SAAS;AAAA,EACT,MAAM;AAAA,EACN,UAAU;AAAA,EACV,QAAQ;AAAA,EACR,SAAS;AAAA,EACT,QAAQ;AAAA,EACR,WAAW;AAAA,EACX,MAAM;AAAA,EACN,OAAO;AAAA,EACP,gBAAgB;AAAA,EAChB,YAAY;AAAA,EACZ,SAAS;AAAA,EACT,QAAQ;AACV,CAAU;;;ACjJV,IAAM,oBAAoB,CAAI,YAAqD;AACjF,MAAI,OAAO,YAAY,eAAe,YAAY,MAAM;AACtD,WAAO;AAAA,EACT;AAEA,MAAI,OAAO,YAAY,UAAU;AAC/B,WAAO,QAAQ,KAAK,MAAM;AAAA,EAC5B;AAEA,SAAO;AACT;AAEA,IAAM,gBAAgB,CAAC,YAAwC;AAC7D,SAAO,OAAO,YAAY,YAAY,YAAY,UAAa,YAAY;AAC7E;;;AChBA,IAAM,MAAM,CAAC,KAAa,WAAW,OAAO;AAC1C,QAAM,QAAQ,QAAQ,IAAI,GAAG;AAE7B,MAAI,CAAC,kBAAkB,KAAK,GAAG;AAC7B,WAAO;AAAA,EACT;AAEA,MAAI,CAAC,kBAAkB,QAAQ,GAAG;AAChC,WAAO;AAAA,EACT;AAEA,SAAO;AACT;;;ACDO,IAAM,YAAY,MAAW;AAClC,QAAM,QAAQ,IAAI,qBAAqB,gBAAgB;AAEvD,MAAI;AACF,WAAO,IAAI,IAAI,KAAK;AAAA,EACtB,SAAQ;AACN,YAAQ,MAAM,sBAAsB,KAAK;AAAA,EAC3C;AAEA,UAAQ,MAAM,oBAAoB,gBAAgB;AAElD,SAAO,IAAI,IAAI,gBAAgB;AACjC;AAMO,IAAM,iBAAkC;AAAA,EAC7C,QAAQ,UAAU;AAAA,EAClB,QAAQ,IAAI,mBAAmB;AAAA,EAC/B,QAAQ;AAAA,EACR,OAAO;AAAA,EACP,OAAO;AAAA,EACP,OAAO;AACT;;;ACzBO,SAAS,eAAgB,QAAkC;AAChE,MAAI,CAAC,QAAQ;AACX,UAAM,IAAI,MAAM,qBAAqB;AAAA,EACvC;AAEA,MAAI,OAAO,WAAW,YAAY,OAAO,KAAK,EAAE,WAAW,GAAG;AAC5D,UAAM,IAAI,MAAM,oCAAoC;AAAA,EACtD;AACF;AAQO,SAAS,aAAc,SAAkD;AAC9E,QAAM,gBAAgB,kCAAK,iBAAmB;AAE9C,iBAAe,cAAc,MAAM;AAEnC,MAAI,CAAC,cAAc,QAAQ;AACzB,UAAM,IAAI,MAAM,qBAAqB;AAAA,EACvC;AAEA,QAAM,gBAAuC;AAAA,IAC3C,QAAQ,cAAc;AAAA,IACtB,QAAQ,cAAc;AAAA,IACtB,QAAQ,cAAc,UAAU;AAAA,IAChC,OAAO,cAAc,SAAS;AAAA,IAC9B,UAAU,cAAc,YAAY;AAAA,IACpC,OAAO,cAAc;AAAA,IACrB,OAAO,cAAc;AAAA,IACrB,aAAa,cAAc;AAAA,IAC3B,WAAW,cAAc;AAAA,IACzB,KAAK,cAAc;AAAA,IACnB,UAAU,cAAc;AAAA,IACxB,eAAe,cAAc;AAAA,IAC7B,UAAU,cAAc;AAAA,IACxB,gBAAgB,cAAc;AAAA,EAChC;AAEA,SAAO;AACT;;;AC1CO,IAAM,iBAAiB,CAAC,WAA4B;AACzD,MAAI,kBAAkB,OAAO,MAAM,GAAG;AACpC,UAAM,IAAI,MAAM,qBAAqB;AAAA,EACvC;AAEA,MAAI,CAAC,kBAAkB,OAAO,WAAW,GAAG;AAC1C,WAAO,YAAY,QAAQ,CAAC,aAAqB;AAC/C,UAAI,CAAC,OAAO,OAAO,aAAuC,EAAE,SAAS,QAAQ,GAAG;AAC9E,cAAM,IAAI,MAAM,sBAAsB;AAAA,MACxC;AAAA,IACF,CAAC;AAAA,EACH;AAEA,MAAI,CAAC,kBAAkB,OAAO,SAAS,GAAG;AACxC,QAAI,CAAC,OAAO,OAAO,WAAqC,EAAE,SAAS,OAAO,SAAS,GAAG;AACpF,YAAM,IAAI,MAAM,oBAAoB;AAAA,IACtC;AAAA,EACF;AACF;;;AC3BA,SAAS,cAAc;AAQhB,IAAM,eAAe,CAAO,cAAgE;AAEjG,MAAI,CAAC,WAAW;AACd,WAAO;AAAA,MACL,OAAO;AAAA,MACP,OAAO,IAAI,MAAM,mBAAmB;AAAA,IACtC;AAAA,EACF;AAEA,QAAM,QAAQ,MAAM,QAAQ,SAAS,IAAI,YAAY,CAAC,SAAS;AAE/D,MAAI,MAAM,WAAW,GAAG;AACtB,WAAO;AAAA,MACL,OAAO;AAAA,MACP,OAAO,IAAI,MAAM,mBAAmB;AAAA,IACtC;AAAA,EACF;AAEA,aAAW,YAAY,OAAO;AAC5B,QAAI;AACF,YAAM,OAAO,QAAQ;AAAA,IACvB,SAAS,OAAO;AACd,aAAO;AAAA,QACL,OAAO;AAAA,QACP;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AAAA,IACL,OAAO;AAAA,IACP;AAAA,EACF;AACF;;;AC5CO,IAAM,gBAAgB,CAAC,cAAiC;AAC7D,MAAI,MAAM,QAAQ,SAAS,GAAG;AAC5B,UAAM,WAAW,UAAU,KAAK,OAAK,CAAC,kBAAkB,CAAC,CAAC;AAE1D,QAAI,CAAC,kBAAkB,QAAQ,GAAG;AAChC,aAAO,IAAI,IAAI,QAAQ,EAAE,SAAS;AAAA,IACpC;AAAA,EACF;AAEA,SAAO,IAAI,IAAI,SAAS,EAAE,SAAS;AACrC;;;AROA,SAAe,gBAAiB,UAAiC;AAAA;AAC/D,QAAI;AACF,YAAS,UAAO,QAAQ;AAAA,IAC1B,SAAS,OAAO;AACd,UAAI,iBAAiB,SAAS,UAAU,SAAS,MAAM,SAAS,UAAU;AACxE,cAAM,IAAI,MAAM,QAAQ,QAAQ,+BAA+B;AAAA,MACjE;AAEA,YAAM;AAAA,IACR;AAEA,QAAI;AAEF,YAAM,aAAa,MAAS,QAAK,UAAU,GAAG;AAC9C,YAAM,WAAW,MAAM;AAAA,IACzB,SAAS,OAAO;AACd,UAAI,iBAAiB,SAAS,UAAU,UAAU,MAAM,SAAS,WAAW,MAAM,SAAS,UAAU;AACnG,cAAM,IAAI,MAAM,QAAQ,QAAQ,+BAA+B;AAAA,MACjE;AAEA,YAAM;AAAA,IACR;AAAA,EACF;AAAA;AAQA,SAAsB,iBACpB,WACA,SACgC;AAAA;AAChC,UAAM,SAAS,aAAa,OAAO;AACnC,mBAAe,MAAM;AAErB,QAAI,CAAC,CAAC,QAAQ,YAAY,MAAM,EAAE,SAAS,OAAO,MAAM,GAAG;AACzD,YAAM,IAAI,MAAM,uBAAuB,OAAO,MAAM,EAAE;AAAA,IACxD;AAGA,UAAM,aAAY,mCAAS,WAAU;AAGrC,UAAM,SAAS,YACX,EAAE,OAAO,MAAM,OAAO,CAAC,cAAc,SAAS,CAAC,EAAE,IACnD,MAAM,aAAa,SAAS;AAE9B,QAAI,OAAO,UAAU,OAAO;AAC1B,YAAM,OAAO;AAAA,IACf;AAEA,UAAM,eAAsC;AAAA,MAC1C,QAAQ,OAAO;AAAA,MACf,QAAQ,OAAO;AAAA,MACf,QAAQ,OAAO;AAAA,MACf,OAAO,OAAO;AAAA,MACd,OAAO,OAAO;AAAA,MACd,OAAO,OAAO;AAAA,MACd,aAAa,OAAO;AAAA,MACpB,WAAW,OAAO;AAAA,MAClB,KAAK,OAAO;AAAA,MACZ,UAAU,OAAO;AAAA,MACjB,eAAe,OAAO;AAAA,MACtB,UAAU,OAAO;AAAA,MACjB,gBAAgB,OAAO;AAAA,MACvB,UAAU,OAAO;AAAA,IACnB;AAGA,QAAI,WAAW;AACb,mBAAa,MAAM,OAAO,MAAM,CAAC;AAAA,IACnC,OAAO;AAEL,YAAM,YAAY,CAAC;AAEnB,iBAAW,YAAY,OAAO,OAAO;AACnC,cAAM,gBAAgB,QAAQ;AAE9B,cAAM,aAAmB,uBAAiB,QAAQ;AAClD,cAAM,SAAS,CAAC;AAEhB;AAAA,qCAA0B,aAA1B,0EAAsC;AAA3B,kBAAM,QAAjB;AACE,mBAAO,KAAK,KAAK;AAAA,UACnB;AAAA,iBAFA,MAtGN;AAsGM;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,cAAM,SAAS,OAAO,OAAO,MAAM;AACnC,cAAM,WAAW,IAAI,KAAK,CAAC,MAAM,GAAG,SAAS,QAAQ,GAAG;AAAA,UACtD,MAAM;AAAA,QACR,CAAC;AAED,kBAAU,KAAK;AAAA,UACb,UAAU,SAAS,QAAQ;AAAA,UAC3B;AAAA,QACF,CAAC;AAAA,MACH;AAEA,mBAAa,QAAQ;AAAA,IACvB;AAEA,WAAO;AAAA,EACT;AAAA;;;ASrHO,IAAM,oBAAoB,CAAC,MAAc,EAAE,QAAQ,UAAU,CAAC,GAAG,MAAM,EAAE,YAAY,CAAC;;;ACDtF,IAAM,mBAAmB,CAAI,SAAqB;AACvD,MAAI,gBAAgB,QAAQ,gBAAgB,UAAU,gBAAgB,KAAK;AACzE,WAAO;AAAA,EACT;AAGA,MAAI,OAAO,SAAS,YAAY;AAC9B,WAAO;AAAA,EACT;AAGA,MAAI,SAAS,QAAQ,SAAS,QAAW;AACvC,WAAO;AAAA,EACT;AAGA,MAAI,MAAM,QAAQ,IAAI,GAAG;AACvB,WAAO,KAAK,IAAI,QAAM,iBAAiB,EAAE,CAAC;AAAA,EAC5C;AAGA,MAAI,gBAAgB,KAAK;AACvB,UAAM,iBAAiB,oBAAI,IAAI;AAC/B,SAAK,QAAQ,CAAC,OAAO,QAAQ;AAC3B,qBAAe,IAAI,iBAAiB,GAAG,GAAG,iBAAiB,KAAK,CAAC;AAAA,IACnE,CAAC;AAED,WAAO;AAAA,EACT;AAEA,MAAI,gBAAgB,KAAK;AACvB,UAAM,iBAAiB,oBAAI,IAAI;AAC/B,SAAK,QAAQ,CAAC,UAAU;AACtB,qBAAe,IAAI,iBAAiB,KAAK,CAAC;AAAA,IAC5C,CAAC;AAED,WAAO;AAAA,EACT;AAGA,MAAI,cAAc,IAAI,GAAG;AACvB,WAAO,OAAO,KAAK,IAAI,EAAE,OAAO,CAAC,KAAU,QAAgB;AACzD,YAAM,WAAW,kBAAkB,GAAG;AACtC,UAAI,QAAQ,IAAI,iBAAkB,KAAiC,GAAG,CAAC;AAEvE,aAAO;AAAA,IACT,GAAG,CAAC,CAAM;AAAA,EACZ;AAGA,SAAO;AACT;;;AC7CO,IAAM,YAAN,MAAgB;AAAA;AAAA;AAAA;AAAA;AAAA,EAOrB,YAAa,SAA2B;AACtC,SAAK,UAAU;AAAA,EACjB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQM,MAAO,gBAAoD;AAAA;AAC/D,YAAM,SAAS,MAAM,iBAAiB,gBAAgB,KAAK,OAAO;AAClE,YAAM,EAAE,QAAQ,OAAO,IAAI;AAE3B,YAAM,WAAW,UAAU,MAAM;AAEjC,YAAM,eAAe;AAAA,QACnB,QAAQ;AAAA,QACR,MAAM;AAAA,QACN,SAAS,mBACH,SAAS,EAAE,eAAe,UAAU,MAAM,GAAG,IAAI,CAAC;AAAA,MAE1D;AAEA,YAAM,MAAM,IAAI,IAAI,aAAa,MAAM;AAEvC,YAAM,WAAW,MAAM,aAAa,KAAK,YAAY;AAErD,cAAQ,OAAO,QAAQ;AAAA,QACrB,KAAK;AACH,iBAAO,iBAAyB,MAAM,SAAS,KAAK,CAAC;AAAA,QACvD,KAAK;AAAA,QACL,KAAK;AACH,iBAAO,MAAM,SAAS,KAAK;AAAA,QAC7B;AACE,gBAAM,IAAI,MAAM,uBAAuB,OAAO,MAAM,EAAE;AAAA,MAC1D;AAAA,IACF;AAAA;AACF;","names":[]}

+202

LICENSE.md


		Apache License
		Version 2.0, January 2004
		http://www.apache.org/licenses/

		TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

		1. Definitions.

		"License" shall mean the terms and conditions for use, reproduction,
		and distribution as defined by Sections 1 through 9 of this document.

		"Licensor" shall mean the copyright owner or entity authorized by
		the copyright owner that is granting the License.

		"Legal Entity" shall mean the union of the acting entity and all
		other entities that control, are controlled by, or are under common
		control with that entity. For the purposes of this definition,
		"control" means (i) the power, direct or indirect, to cause the
		direction or management of such entity, whether by contract or
		otherwise, or (ii) ownership of fifty percent (50%) or more of the
		outstanding shares, or (iii) beneficial ownership of such entity.

		"You" (or "Your") shall mean an individual or Legal Entity
		exercising permissions granted by this License.

		"Source" form shall mean the preferred form for making modifications,
		including but not limited to software source code, documentation
		source, and configuration files.

		"Object" form shall mean any form resulting from mechanical
		transformation or translation of a Source form, including but
		not limited to compiled object code, generated documentation,
		and conversions to other media types.

		"Work" shall mean the work of authorship, whether in Source or
		Object form, made available under the License, as indicated by a
		copyright notice that is included in or attached to the work
		(an example is provided in the Appendix below).

		"Derivative Works" shall mean any work, whether in Source or Object
		form, that is based on (or derived from) the Work and for which the
		editorial revisions, annotations, elaborations, or other modifications
		represent, as a whole, an original work of authorship. For the purposes
		of this License, Derivative Works shall not include works that remain
		separable from, or merely link (or bind by name) to the interfaces of,
		the Work and Derivative Works thereof.

		"Contribution" shall mean any work of authorship, including
		the original version of the Work and any modifications or additions
		to that Work or Derivative Works thereof, that is intentionally
		submitted to Licensor for inclusion in the Work by the copyright owner
		or by an individual or Legal Entity authorized to submit on behalf of
		the copyright owner. For the purposes of this definition, "submitted"
		means any form of electronic, verbal, or written communication sent
		to the Licensor or its representatives, including but not limited to
		communication on electronic mailing lists, source code control systems,
		and issue tracking systems that are managed by, or on behalf of, the
		Licensor for the purpose of discussing and improving the Work, but
		excluding communication that is conspicuously marked or otherwise
		designated in writing by the copyright owner as "Not a Contribution."

		"Contributor" shall mean Licensor and any individual or Legal Entity
		on behalf of whom a Contribution has been received by Licensor and
		subsequently incorporated within the Work.

		2. Grant of Copyright License. Subject to the terms and conditions of
		this License, each Contributor hereby grants to You a perpetual,
		worldwide, non-exclusive, no-charge, royalty-free, irrevocable
		copyright license to reproduce, prepare Derivative Works of,
		publicly display, publicly perform, sublicense, and distribute the
		Work and such Derivative Works in Source or Object form.

		3. Grant of Patent License. Subject to the terms and conditions of
		this License, each Contributor hereby grants to You a perpetual,
		worldwide, non-exclusive, no-charge, royalty-free, irrevocable
		(except as stated in this section) patent license to make, have made,
		use, offer to sell, sell, import, and otherwise transfer the Work,
		where such license applies only to those patent claims licensable
		by such Contributor that are necessarily infringed by their
		Contribution(s) alone or by combination of their Contribution(s)
		with the Work to which such Contribution(s) was submitted. If You
		institute patent litigation against any entity (including a
		cross-claim or counterclaim in a lawsuit) alleging that the Work
		or a Contribution incorporated within the Work constitutes direct
		or contributory patent infringement, then any patent licenses
		granted to You under this License for that Work shall terminate
		as of the date such litigation is filed.

		4. Redistribution. You may reproduce and distribute copies of the
		Work or Derivative Works thereof in any medium, with or without
		modifications, and in Source or Object form, provided that You
		meet the following conditions:

		(a) You must give any other recipients of the Work or
		Derivative Works a copy of this License; and

		(b) You must cause any modified files to carry prominent notices
		stating that You changed the files; and

		(c) You must retain, in the Source form of any Derivative Works
		that You distribute, all copyright, patent, trademark, and
		attribution notices from the Source form of the Work,
		excluding those notices that do not pertain to any part of
		the Derivative Works; and

		(d) If the Work includes a "NOTICE" text file as part of its
		distribution, then any Derivative Works that You distribute must
		include a readable copy of the attribution notices contained
		within such NOTICE file, excluding those notices that do not
		pertain to any part of the Derivative Works, in at least one
		of the following places: within a NOTICE text file distributed
		as part of the Derivative Works; within the Source form or
		documentation, if provided along with the Derivative Works; or,
		within a display generated by the Derivative Works, if and
		wherever such third-party notices normally appear. The contents
		of the NOTICE file are for informational purposes only and
		do not modify the License. You may add Your own attribution
		notices within Derivative Works that You distribute, alongside
		or as an addendum to the NOTICE text from the Work, provided
		that such additional attribution notices cannot be construed
		as modifying the License.

		You may add Your own copyright statement to Your modifications and
		may provide additional or different license terms and conditions
		for use, reproduction, or distribution of Your modifications, or
		for any such Derivative Works as a whole, provided Your use,
		reproduction, and distribution of the Work otherwise complies with
		the conditions stated in this License.

		5. Submission of Contributions. Unless You explicitly state otherwise,
		any Contribution intentionally submitted for inclusion in the Work
		by You to the Licensor shall be under the terms and conditions of
		this License, without any additional terms or conditions.
		Notwithstanding the above, nothing herein shall supersede or modify
		the terms of any separate license agreement you may have executed
		with Licensor regarding such Contributions.

		6. Trademarks. This License does not grant permission to use the trade
		names, trademarks, service marks, or product names of the Licensor,
		except as required for reasonable and customary use in describing the
		origin of the Work and reproducing the content of the NOTICE file.

		7. Disclaimer of Warranty. Unless required by applicable law or
		agreed to in writing, Licensor provides the Work (and each
		Contributor provides its Contributions) on an "AS IS" BASIS,
		WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
		implied, including, without limitation, any warranties or conditions
		of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
		PARTICULAR PURPOSE. You are solely responsible for determining the
		appropriateness of using or redistributing the Work and assume any
		risks associated with Your exercise of permissions under this License.

		8. Limitation of Liability. In no event and under no legal theory,
		whether in tort (including negligence), contract, or otherwise,
		unless required by applicable law (such as deliberate and grossly
		negligent acts) or agreed to in writing, shall any Contributor be
		liable to You for damages, including any direct, indirect, special,
		incidental, or consequential damages of any character arising as a
		result of this License or out of the use or inability to use the
		Work (including but not limited to damages for loss of goodwill,
		work stoppage, computer failure or malfunction, or any and all
		other commercial damages or losses), even if such Contributor
		has been advised of the possibility of such damages.

		9. Accepting Warranty or Additional Liability. While redistributing
		the Work or Derivative Works thereof, You may choose to offer,
		and charge a fee for, acceptance of support, warranty, indemnity,
		or other liability obligations and/or rights consistent with this
		License. However, in accepting such obligations, You may act only
		on Your own behalf and on Your sole responsibility, not on behalf
		of any other Contributor, and only if You agree to indemnify,
		defend, and hold each Contributor harmless for any liability
		incurred by, or claims asserted against, such Contributor by reason
		of your accepting any such warranty or additional liability.

		END OF TERMS AND CONDITIONS

		APPENDIX: How to apply the Apache License to your work.

		To apply the Apache License to your work, attach the following
		boilerplate notice, with the fields enclosed by brackets "[]"
		replaced with your own identifying information. (Don't include
		the brackets!) The text should be enclosed in the appropriate
		comment syntax for the file format. We also recommend that a
		file or class name and description of purpose be included on the
		same "printed page" as the copyright notice for easier
		identification within third-party archives.

		Copyright [yyyy] [name of copyright owner]

		Licensed under the Apache License, Version 2.0 (the "License");
		you may not use this file except in compliance with the License.
		You may obtain a copy of the License at

		http://www.apache.org/licenses/LICENSE-2.0

		Unless required by applicable law or agreed to in writing, software
		distributed under the License is distributed on an "AS IS" BASIS,
		WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		See the License for the specific language governing permissions and
		limitations under the License.

+346

README.md

		# Anyparser Core: Your Foundation for AI Data Preparation

		https://anyparser.com

		Unlock the potential of your AI models with Anyparser Core, the Typescript SDK designed for high-performance content extraction and format conversion. Built for developers, this SDK streamlines the process of acquiring clean, structured data from diverse sources, making it an indispensable tool for building cutting-edge applications in Retrieval Augmented Generation (RAG), Agentic AI, Generative AI, and robust ETL Pipelines.

		Key Benefits for AI Developers:

		* Rapid Data Acquisition for RAG: Extract information up to 10x faster than traditional methods, accelerating the creation of your knowledge bases for efficient RAG implementations.
		* High-Accuracy Data for Generative AI: Achieve up to 10x improvement in extraction accuracy, ensuring your Generative AI models are trained and operate on reliable, high-quality data. Output in JSON or Markdown is directly consumable by AI processes.
		* Cost-Effective Knowledge Base Construction: Efficiently build and maintain knowledge bases from unstructured data, significantly reducing the overhead for RAG, Agentic AI, and other AI applications.
		* Developer-First Design: Unlimited local usage (fair use policies apply) allows for rapid experimentation and seamless integration into your existing AI workflows.
		* Optimized for ETL Pipelines: Provides a robust extraction layer for your ETL processes, handling a wide variety of file types and URLs to feed your data lakes and AI systems.

		Get Started Quickly:

		1. Free Access: Obtain your API credentials and start building your AI data pipelines today at [Anyparser Studio](https://studio.anyparser.com/).
		2. Installation: Install the SDK with a simple `npm install` command.
		3. Run Examples: Copy and paste the provided examples to see how easy it is to extract data for your AI projects.

		Before starting, add a new API key on the [Anyparser Studio](https://studio.anyparser.com/).


		```bash
		export ANYPARSER_API_URL=https://anyparserapi.com
		export ANYPARSER_API_KEY=<your-api-key>
		```

		or

		```bash
		export ANYPARSER_API_URL=https://eu.anyparserapi.com
		export ANYPARSER_API_KEY=<your-api-key>
		```

		## Installation

		```bash
		npm install @anyparser/core
		```

		## Core Usage Examples for AI Applications

		These examples demonstrate how to use `Anyparser Core` for common AI tasks, arranged from basic to advanced usage.


		### Example 1: Quick Start with Single Document

		When you're just getting started or prototyping, you can use this simplified approach with minimal configuration:

		```typescript
		import { Anyparser } from '@anyparser/core'

		async function main () {
		// Instantiate with default settings, assuming API credentials are
		// set as environment variables.
		console.log(await new Anyparser().parse('docs/sample.docx'))
		}

		main().catch(console.error)
		```

		### Example 2: Building a RAG Knowledge Base from Local Documents

		This example showcases how to extract structured data from local files with full configuration, preparing them for indexing in a RAG system. The JSON output is ideal for vector databases and downstream AI processing. Perfect for building your initial knowledge base with high-quality, structured data.

		```typescript
		import type { AnyparserOption, AnyparserResultBase } from '@anyparser/core'
		import { Anyparser } from '@anyparser/core'

		const multipleFiles = ['docs/sample.docx', 'docs/sample.pdf']

		const options: AnyparserOption = {
		apiUrl: new URL(process.env.ANYPARSER_API_URL ?? 'https://anyparserapi.com'),
		apiKey: process.env.ANYPARSER_API_KEY,
		format: 'json',
		image: true,
		table: true
		}

		const parser = new Anyparser(options)

		async function main () {
		const result = await parser.parse(multipleFiles) as AnyparserResultBase[]

		for (const item of result) {
		console.log('-'.repeat(100))
		console.log('File:', item.originalFilename)
		console.log('Checksum:', item.checksum)
		console.log('Total characters:', item.totalCharacters)
		console.log('Markdown:', item.markdown?.substring(0, 500))
		}

		console.log('-'.repeat(100))
		}

		main().catch(console.error)
		```

		### Example 3: OCR Processing for Image-Based Documents

		Extract text from images and scanned documents using our advanced OCR capabilities. This example shows how to configure language and preset options for optimal results, particularly useful for processing historical documents, receipts, or any image-based content:

		```typescript
		import type { AnyparserOption } from '@anyparser/core'
		import { Anyparser, OCR_LANGUAGES } from '@anyparser/core'

		const singleFile = 'docs/document.png'

		const options: AnyparserOption = {
		apiUrl: new URL(process.env.ANYPARSER_API_URL ?? 'https://anyparserapi.com'),
		apiKey: process.env.ANYPARSER_API_KEY,
		model: 'ocr',
		format: 'markdown',
		// ocrLanguage: ['eng'],
		ocrLanguage: [OCR_LANGUAGES.JAPANESE],
		ocrPreset: 'scan'
		}

		const parser = new Anyparser(options)

		async function main () {
		const result = await parser.parse(singleFile)
		console.log(result)
		}

		main().catch(console.error)
		```

		### Example 4: Web Crawling for Dynamic Knowledge Base Updates

		Keep your knowledge base fresh with our powerful web crawling capabilities. This example shows how to crawl websites while respecting robots.txt directives and maintaining politeness delays:

		```typescript
		import type { AnyparserCrawlResult, AnyparserOption, AnyparserUrl } from '@anyparser/core'
		import { Anyparser } from '@anyparser/core'

		const url = 'https://anyparser.com/docs'

		const options: AnyparserOption = {
		apiUrl: new URL(process.env.ANYPARSER_API_URL ?? 'https://anyparserapi.com'),
		apiKey: process.env.ANYPARSER_API_KEY,
		model: 'crawler',
		format: 'json',
		maxDepth: 50,
		maxExecutions: 2,
		strategy: 'LIFO',
		traversalScope: 'subtree'
		}

		const parser = new Anyparser(options)

		async function main () {
		const result = await parser.parse(url) as AnyparserCrawlResult[]

		for (const candidate of result) {
		console.log('\n')
		console.log('Start URL :', candidate.startUrl)
		console.log('Total characters :', candidate.totalCharacters)
		console.log('Total items :', candidate.totalItems)
		console.log('Robots directive :', candidate.robotsDirective)
		console.log('\n')
		console.log('*'.repeat(100))
		console.log('Begin Crawl')
		console.log('*'.repeat(100))
		console.log('\n')

		const resources = candidate.items \|\| []

		for (let index = 0; index < resources.length; index++) {
		const item = resources[index] as AnyparserUrl

		if (index > 0) {
		console.log('-'.repeat(100))
		console.log('\n')
		}

		console.log('URL :', item.url)
		console.log('Title :', item.title)
		console.log('Status message :', item.statusMessage)
		console.log('Total characters :', item.totalCharacters)
		console.log('Politeness delay :', item.politenessDelay)
		console.log('Content:\n')
		console.log(item.markdown)
		}
		}
		}

		main().catch(console.error)
		```

		## Configuration for Optimized AI Workloads

		The `Anyparser` class defines the `AnyparserOption` interface for flexible configuration, allowing you to fine-tune the extraction process for different AI use cases.

		```typescript
		export interface AnyparserOption {
		apiUrl?: URL
		apiKey?: string
		format?: AnyparserFormatType
		model?: AnyparserModelType
		encoding?: AnyparserEncodingType
		image?: boolean
		table?: boolean
		files?: string \| string[]
		ocrLanguage?: OcrLanguageType[]
		ocrPreset?: OcrPresetType
		url?: string
		maxDepth?: number
		maxExecutions?: number
		strategy?: 'LIFO' \| 'FIFO'
		traversalScope?: 'subtree' \| 'domain'
		}
		```

		Key Configuration Parameters:

		\| Parameter \| Type \| Default \| Description \|
		\|-----------\|------\|---------\|-------------\|
		\| `apiUrl` \| `string (optional)` \| `undefined` \| API endpoint URL. Defaults to `ANYPARSER_API_URL` environment variable \|
		\| `apiKey` \| `string (optional)` \| `undefined` \| API key for authentication. Defaults to `ANYPARSER_API_KEY` environment variable \|
		\| `format` \| `str` \| `"json"` \| Output format: `"json"`, `"markdown"`, or `"html"` \|
		\| `model` \| `str` \| `"text"` \| Processing model: `"text"`, `"ocr"`, `"vlm"`, `"lam"`, or `"crawler"` \|
		\| `encoding` \| `str` \| `"utf-8"` \| Text encoding: `"utf-8"` or `"latin1"` \|
		\| `image` \| `boolean (optional)` \| `undefined` \| Enable/disable image extraction \|
		\| `table` \| `boolean (optional)` \| `undefined` \| Enable/disable table extraction \|
		\| `files` \| `string or string[] (optional)` \| `undefined` \| Input files to process \|
		\| `url` \| `string (optional)` \| `undefined` \| URL for crawler model \|
		\| `ocrLanguage` \| `OcrLanguageType[] (optional)` \| `undefined` \| Languages for OCR processing \|
		\| `ocrPreset` \| `OcrPresetType (optional)` \| `undefined` \| Preset configuration for OCR \|
		\| `maxDepth` \| `number (optional)` \| `undefined` \| Maximum crawl depth for crawler model \|
		\| `maxExecutions` \| `number (optional)` \| `undefined` \| Maximum number of pages to crawl \|
		\| `strategy` \| `string (optional)` \| `undefined` \| Crawling strategy: `"LIFO"` or `"FIFO"` \|
		\| `traversalScope` \| `string (optional)` \| `undefined` \| Crawling scope: `"subtree"` or `"domain"` \|

		OCR Presets:

		The following OCR presets are available for optimized document processing:

		- `OCRPreset.DOCUMENT` - General document processing
		- `OCRPreset.HANDWRITING` - Handwritten text recognition
		- `OCRPreset.SCAN` - Scanned document processing
		- `OCRPreset.RECEIPT` - Receipt processing
		- `OCRPreset.MAGAZINE` - Magazine/article processing
		- `OCRPreset.INVOICE` - Invoice processing
		- `OCRPreset.BUSINESS_CARD` - Business card processing
		- `OCRPreset.PASSPORT` - Passport document processing
		- `OCRPreset.DRIVER_LICENSE` - Driver's license processing
		- `OCRPreset.IDENTITY_CARD` - ID card processing
		- `OCRPreset.LICENSE_PLATE` - License plate recognition
		- `OCRPreset.MEDICAL_REPORT` - Medical document processing
		- `OCRPreset.BANK_STATEMENT` - Bank statement processing

		OCR Language:

		[List of OCR Languages](https://github.com/anyparser/anyparserjs/blob/master/src/config/hardcoded.ts#L20-L150)


		Model Types for AI Data Pipelines:

		Select the appropriate processing model based on your AI application needs:

		* `'text'`: Optimized for extracting textual content for language models and general text-based RAG.
		* `'ocr'`: Performs Optical Character Recognition to extract text from image-based documents, expanding your data sources for AI training and knowledge bases. Essential for processing scanned documents for RAG and Generative AI.
		* `'vlm'`: Utilizes a Vision-Language Model for advanced understanding of image content, enabling richer context for Generative AI and more sophisticated Agentic AI perception.
		* `'lam'` (Coming Soon): Employs a Large-Audio Model for extracting insights from audio data, opening new possibilities for multimodal AI applications.
		* `'crawler'`: Enables website crawling to gather up-to-date information for dynamic AI knowledge bases and Agentic AI agents.

		OCR Configuration for Enhanced AI Data Quality (when `model='ocr'`):

		Fine-tune OCR settings for optimal accuracy when processing image-based documents. This is critical for ensuring high-quality data for your AI models.

		\| Option \| Type \| Default \| Description \| Relevance for AI \|
		\|----------------\|-------------------\|---------\|-------------------------------------------------------------------------------------------------------------\|---------------------------------------------------------------------------------------------\|
		\| `ocrLanguage` \| `OcrLanguageType[] (optional)` \| `undefined` \| List of ISO 639-2 language codes for OCR, ensuring accurate text extraction for multilingual documents. \| Essential for accurate data extraction from documents in different languages for global AI. \|
		\| `ocrPreset` \| `string (optional)` \| `undefined` \| Predefined configuration for specific document types to optimize OCR accuracy. \| Use presets to improve accuracy for specific document types used in your AI workflows. \|

		Available OCR Presets for AI Data Preparation:

		Leverage these presets for common document types used in AI datasets:

		* `'document'`: General-purpose OCR for standard documents.
		* `'handwriting'`: Optimized for handwritten text, useful for digitizing historical documents or notes for AI analysis.
		* `'scan'`: For scanned documents and images.
		* Specific Document Presets (valuable for structured data extraction): `'receipt'`, `'magazine'`, `'invoice'`, `'business-card'`, `'passport'`, `'driver-license'`, `'identity-card'`, `'license-plate'`, `'medical-report'`, `'bank-statement'`. These presets are crucial for building structured datasets for training specialized AI models or powering Agentic AI agents that interact with these document types.


		## Contributing to AI-Ready Data Extraction

		We welcome contributions to the `Anyparser Core` SDK, particularly those that enhance its capabilities for AI data preparation. Please refer to the [Contribution Guidelines](CONTRIBUTING.md).

		# Frequently Asked Questions (FAQ)

		1. Do I have to buy a license to use the SDK?
		- No, there's no need to buy a license to use the SDK. You can get started right away.
		2. Do you store my documents?
		- No, we do not store any of your documents. All data is processed in real-time and discarded after the task is completed.
		3. Is there a way to test the software or have a free trial?
		- You don't need to commit. You can use the API on your developer machine for free with a fair usage policy. OCR and VLM models are not free, but you can purchase a tiny (as low as $5) credit to test the quality of the output.
		4. Can I get a refund?
		- No, we do not offer any refunds.
		5. Is Anyparser Available in My Region?
		- Currently, Anyparser is only available in the EU, US, and a few other countries. We are working on expanding to more regions.
		6. Why don't you have paid plans?
		- We use a pay-per-use model to offer [flexible pricing](https://anyparser.com/pricing) and avoid locking customers into expensive subscriptions.
		7. Does the license allow me to use the software in a SaaS product?
		- Yes, the license permits usage in SaaS products.
		8. What kind of support will I get?
		- We offer email and ticket-based support.
		9. Does the SDK support chunking and embedding?
		- No, our service focuses on the extraction layer in your ETL pipeline. Chunking and embedding would be handled separately by your own system.
		10. Does the SDK support multiple file uploads?
		- Yes.
		11. Does it support converting receipts to structured output?
		- Yes, Anyparser can extract data from receipts and convert it into structured formats.
		12. Does it support multiple languages?
		- Yes, Anyparser supports multiple languages for text extraction.
		13. Where are your servers located?
		- Our servers are located in the US with a federated setup across United States, Europe, and Asia. We are adding more regions as we move forward.

		### Why Use Anyparser SDKs?

		- 100% Free for Developers: As long as you're running Anyparser on your local laptop or personal machine (not on servers), unlimited extraction is completely free under our fair usage policy. There’s no need to pay for anything, making it perfect for developers testing and building on their development environment.
		- Speed: Up to 10x faster than traditional parsing tools, with optimized processing for both small and large documents.
		- Accuracy: Get highly accurate, structured outputs even from complex formats like OCR, tables, and charts.
		- Scalability: Whether you're processing a few documents or millions, our SDKs ensure smooth integration with your workflows.
		- Multiple File Support: Effortlessly parse bulk files, saving time and optimizing batch processing.
		- Zero Learning Curve: The SDKs come with comprehensive examples, documentation, and minimal setup, allowing you to get started immediately without needing deep technical expertise.

		# Product Roadmap

		While Anyparser is already a powerful solution for document parsing, we’re committed to continually improving and expanding our platform. Our roadmap includes:

		- Further Integrations: We plan to add more integrations with industry-standard tools and platforms, enabling deeper workflows and expanding compatibility.
		- Additional Models: We aim to introduce new parsing models to handle more complex or specialized data extraction tasks.
		- Enhanced Features: Continuous improvement of our existing features, such as support for additional file formats, optimization of processing speed, and improved accuracy.


		## License

		Apache-2.0

		## Support for AI Developers

		For technical support or inquiries related to using Anyparser Core for AI applications, please visit our [Community Discussions](https://github.com/anyparser/anyparser_core/discussions). We are here to help you build the next generation of AI applications.

+88

-8

package.json

		{
		"name": "@anyparser/core",
		"version": "0.1.0",
		"description": "Anyparser Core SDK for Node.js",
		"main": "index.js",
		"license": "Apache-2.0",
		"description": "The `@anyparser/core` Typescript SDK enables developers to quickly extract structured data from a wide variety of file formats like PDFs, images, websites, audio, and videos.",
		"keywords": [
		"anyparser",
		"ai",
		"artificial-intelligence",
		"rag",
		"retrieval-augmented-generation",
		"graph-rag",
		"cag",
		"cache-augmented-generation",
		"pdf",
		"pdf-processing",
		"pdf-extraction",
		"ms-office",
		"microsoft-office",
		"microsoft-word",
		"microsoft-excel",
		"microsoft-powerpoint",
		"document-extraction",
		"ocr",
		"object-character-recognition",
		"text-recognition",
		"knowledgebase",
		"knowledge-graph",
		"etl",
		"etl-pipeline",
		"etl-framework",
		"langchain",
		"n8n",
		"crewai",
		"llamaindex",
		"typescript",
		"ts"
		],
		"homepage": "https://github.com/anyparser/anyparserjs",
		"version": "1.0.0",
		"main": "dist/index.js",
		"exports": {
		".": {
		"import": "./dist/index.js",
		"require": "./dist/index.cjs"
		}
		},
		"files": [
		"dist"
		],
		"type": "module",
		"devDependencies": {
		"@arethetypeswrong/cli": "^0.17.4",
		"@changesets/cli": "^2.28.1",
		"@types/node": "^22.13.5",
		"@typescript-eslint/eslint-plugin": "^8.24.1",
		"@typescript-eslint/parser": "^8.24.1",
		"@vitest/coverage-v8": "^3.0.6",
		"eslint": "^8.57.1",
		"eslint-config-standard": "^17.1.0",
		"eslint-import-resolver-typescript": "^3.8.3",
		"eslint-plugin-import": "^2.31.0",
		"eslint-plugin-n": "^16.6.2",
		"eslint-plugin-promise": "^6.6.0",
		"ts-node": "^10.9.2",
		"tsup": "^8.3.6",
		"typescript": "^5.5.4",
		"vite": "^6.1.1",
		"vite-tsconfig-paths": "^5.1.4",
		"vitest": "^3.0.6"
		},
		"publishConfig": {
		"access": "public"
		},
		"scripts": {
		"test": "echo \"Error: no test specified\" && exit 1"
		"pre:compile": "npm run lint && npm run test",
		"start:compile": "tsup",
		"post:compile": "npx --yes @arethetypeswrong/cli --pack .",
		"build": "npm run pre:compile && npm run start:compile && npm run post:compile",
		"lint": "tsc && eslint src --ext .ts --fix",
		"test": "vitest run",
		"coverage": "vitest run --coverage",
		"check:state": "git diff --quiet && git diff --cached --quiet \|\| { echo 'Git working directory is not clean. Please commit or stash changes before publishing.'; exit 1; }",
		"check:branch": "if [ $(git symbolic-ref --short HEAD) != 'master' ]; then echo 'You must be on the master branch to publish.'; exit 1; fi",
		"prepublishOnly": "npm run check:state && npm run check:branch && npm run build"
		},
		"keywords": ["anyparser", "core"],
		"author": "",
		"license": "ISC"
		}
		"engines": {
		"node": ">=20"
		},
		"volta": {
		"node": "20.18.0"
		}
		}

-6

index.js

		class Client {
		constructor() {
		}
		}

		module.exports = { Client };

@anyparser/core - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics