@enterprise_search/tika
Advanced tools
Comparing version 0.5.0 to 0.5.1
/// <reference types="node" /> | ||
import { FetchFn } from "@enterprise_search/indexing"; | ||
import { NameAnd } from "@laoban/utils"; | ||
export declare function cleaned(result: string): string; | ||
@@ -11,1 +13,2 @@ export type TikaClient = { | ||
export declare function processDocument(config: TikaClient, documentBuffer: Buffer): Promise<string>; | ||
export declare function getTextOfContentsFromUrl(fetch: FetchFn, tika: TikaClient, errorPrefix: string, url: string, headers: NameAnd<string>, mimeType: string, debug?: boolean): Promise<string>; |
@@ -6,4 +6,5 @@ "use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.processDocument = exports.findType = exports.cleaned = void 0; | ||
exports.getTextOfContentsFromUrl = exports.processDocument = exports.findType = exports.cleaned = void 0; | ||
const axios_1 = __importDefault(require("axios")); | ||
const tika_client_streaming_1 = require("./tika.client.streaming"); | ||
function cleaned(result) { | ||
@@ -45,1 +46,18 @@ return result.split('\n').map((line) => line.trim()).filter((line) => line.length > 0).join('\n'); | ||
exports.processDocument = processDocument; | ||
async function getTextOfContentsFromUrl(fetch, tika, errorPrefix, url, headers, mimeType, debug) { | ||
try { | ||
if (debug) | ||
console.log(url); | ||
const response = await fetch(url, { headers }); | ||
if (debug) | ||
console.log('response from', url, response.status); | ||
const text = await (0, tika_client_streaming_1.processDocumentWithTika)({ ...tika, debug: debug === true }, mimeType)(response.body, errorPrefix); | ||
if (debug) | ||
console.log('content', text?.slice(0, 100)); | ||
return text; | ||
} | ||
catch (error) { | ||
console.error(`${errorPrefix}. Failed to get text of contents from ${url}: ${error.message}`); | ||
} | ||
} | ||
exports.getTextOfContentsFromUrl = getTextOfContentsFromUrl; |
{ | ||
"name": "@enterprise_search/tika", | ||
"description": "Transforming documents into text", | ||
"version": "0.5.0", | ||
"version": "0.5.1", | ||
"main": "dist/index", | ||
@@ -6,0 +6,0 @@ "types": "dist/index", |
Network access
Supply chain riskThis module accesses the network.
Found 1 instance in 1 package
20504
406
1