Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

office-text-extractor

Package Overview
Dependencies
Maintainers
0
Versions
23
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

office-text-extractor - npm Package Compare versions

Comparing version 3.0.3 to 3.4.0-beta.0

build/index.html

3

build/lib.d.ts

@@ -1,3 +0,2 @@

/// <reference types="node" resolution-mode="require"/>
import { Buffer } from 'node:buffer';
import { Buffer } from 'buffer/index.js';
/**

@@ -4,0 +3,0 @@ * A method of text extraction.

// source/lib.ts
// The source code for the library.
import { Buffer } from 'node:buffer';
import { Buffer } from 'buffer/index.js';
import { fileTypeFromBuffer as getFileType } from 'file-type';

@@ -5,0 +5,0 @@ import { readFile, fetchUrl } from './util.js';

@@ -1,2 +0,2 @@

import { type Buffer } from 'node:buffer';
import { type Buffer } from 'buffer/';
import type { TextExtractionMethod } from '../lib.js';

@@ -3,0 +3,0 @@ export declare class DocExtractor implements TextExtractionMethod {

@@ -20,2 +20,3 @@ // source/parsers/docx.ts

// Convert the DOCX to text and return the text.
// @ts-expect-error: see feross/buffer#353, the types are incomplete.
const parsedDocx = await parseWordFile({ buffer: input });

@@ -22,0 +23,0 @@ return parsedDocx.value;

@@ -1,2 +0,2 @@

import { type Buffer } from 'node:buffer';
import { type Buffer } from 'buffer/';
import type { TextExtractionMethod } from '../lib.js';

@@ -3,0 +3,0 @@ export declare class ExcelExtractor implements TextExtractionMethod {

@@ -1,2 +0,2 @@

import { type Buffer } from 'node:buffer';
import { type Buffer } from 'buffer/';
import type { TextExtractionMethod } from '../lib.js';

@@ -3,0 +3,0 @@ export declare class PdfExtractor implements TextExtractionMethod {

@@ -1,2 +0,2 @@

import { type Buffer } from 'node:buffer';
import { type Buffer } from 'buffer/';
import type { TextExtractionMethod } from '../lib.js';

@@ -3,0 +3,0 @@ export declare class PptExtractor implements TextExtractionMethod {

@@ -1,3 +0,3 @@

import { type Buffer } from 'node:buffer';
import { type Buffer } from 'buffer/';
export declare const readFile: (filePath: string) => Promise<Buffer>;
export declare const fetchUrl: (url: string) => Promise<Buffer>;

@@ -5,3 +5,3 @@ // source/util.ts

import { got as fetch } from 'got';
export const readFile = async (filePath) => read(filePath);
export const fetchUrl = async (url) => fetch(url).buffer();
export const readFile = async (filePath) => (await read(filePath));
export const fetchUrl = async (url) => (await fetch(url).buffer());
{
"name": "office-text-extractor",
"version": "3.0.3",
"version": "3.4.0-beta.0",
"description": "Yet another library to extract text from MS Office and PDF files",

@@ -43,24 +43,25 @@ "keywords": [

"dependencies": {
"fflate": "0.8.1",
"file-type": "18.5.0",
"got": "13.0.0",
"buffer": "6.0.3",
"fflate": "0.8.2",
"file-type": "19.3.0",
"got": "14.4.1",
"js-yaml": "4.1.0",
"mammoth": "1.6.0",
"mammoth": "1.8.0",
"pdf-parse": "1.1.1",
"text-encoding": "0.7.0",
"xlsx": "https://cdn.sheetjs.com/xlsx-0.20.2/xlsx-0.20.2.tgz",
"xlsx": "https://cdn.sheetjs.com/xlsx-0.20.3/xlsx-0.20.3.tgz",
"xml2js": "0.6.2"
},
"devDependencies": {
"@types/js-yaml": "4.0.6",
"@types/node": "20.8.3",
"@types/text-encoding": "0.0.37",
"@types/xml2js": "0.4.12",
"ava": "5.3.1",
"np": "8.0.4",
"@types/js-yaml": "4.0.9",
"@types/node": "20.14.11",
"@types/text-encoding": "0.0.39",
"@types/xml2js": "0.4.14",
"ava": "6.1.3",
"np": "10.0.7",
"npm-run-all": "4.1.5",
"prettier": "3.0.3",
"tsx": "3.13.0",
"typescript": "5.2.2",
"xo": "0.56.0"
"prettier": "3.3.3",
"tsimp": "2.0.11",
"typescript": "5.5.4",
"xo": "0.59.0"
},

@@ -90,3 +91,3 @@ "prettier": {

"nodeArguments": [
"--no-warnings"
"--import=tsimp"
]

@@ -99,4 +100,4 @@ },

"test:quality": "xo source/ test/",
"test:integration": "NODE_OPTIONS='--loader=tsx' ava"
"test:integration": "TSIMP_DIAG=ignore ava"
}
}
// source/lib.ts
// The source code for the library.
import { Buffer } from 'node:buffer'
import { Buffer } from 'buffer/index.js'
import { fileTypeFromBuffer as getFileType } from 'file-type'

@@ -6,0 +6,0 @@ import { readFile, fetchUrl } from './util.js'

// source/parsers/docx.ts
// The text extracter for DOCX files.
import { type Buffer } from 'node:buffer'
import { type Buffer } from 'buffer/'
import { extractRawText as parseWordFile } from 'mammoth'

@@ -25,2 +25,3 @@

// Convert the DOCX to text and return the text.
// @ts-expect-error: see feross/buffer#353, the types are incomplete.
const parsedDocx = await parseWordFile({ buffer: input })

@@ -27,0 +28,0 @@ return parsedDocx.value

// source/parsers/excel.ts
// The text extracter for Excel files.
import { type Buffer } from 'node:buffer'
import { type Buffer } from 'buffer/'
import Xlsx, { utils as sheetUtils } from 'xlsx'

@@ -6,0 +6,0 @@ import { dump as convertToYaml } from 'js-yaml'

// source/parsers/pdf.ts
// The text extracter for PDF files.
import { type Buffer } from 'node:buffer'
import { type Buffer } from 'buffer/'
// @ts-expect-error There are no types for this package.

@@ -6,0 +6,0 @@ import parsePdf from 'pdf-parse/lib/pdf-parse.js'

@@ -6,3 +6,3 @@ // source/parsers/ppt.ts

import { type Buffer } from 'node:buffer'
import { type Buffer } from 'buffer/'
import { unzip } from 'fflate'

@@ -9,0 +9,0 @@ import { parseStringPromise as xmlToJson } from 'xml2js'

// source/util.ts
// Utility functions to help with the handling of input.
import { type Buffer } from 'node:buffer'
import { readFile as read } from 'node:fs/promises'
import { got as fetch } from 'got'
import { type Buffer } from 'buffer/'
export const readFile = async (filePath: string): Promise<Buffer> =>
read(filePath)
(await read(filePath)) as unknown as Buffer
export const fetchUrl = async (url: string): Promise<Buffer> =>
fetch(url).buffer()
(await fetch(url).buffer()) as unknown as Buffer
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc