@sabirmgd/cimb-statement-parser
Advanced tools
@@ -120,2 +120,7 @@ const { extractDataFromPdf } = require("./pdf-helper"); | ||
| function escapeCsvValue(value) { | ||
| const stringValue = value == null ? "" : String(value); | ||
| return `"${stringValue.replace(/"/g, '""')}"`; | ||
| } | ||
| function getDescriptionFromBlock(block, columns) { | ||
@@ -320,5 +325,28 @@ const rows = rowsFromBlock(block); | ||
| function transactionsToCsv(transactions) { | ||
| const header = [ | ||
| "Date", | ||
| "Transaction Details", | ||
| "Money In", | ||
| "Money Out", | ||
| "Balance", | ||
| ]; | ||
| const rows = transactions.map((transaction) => { | ||
| return [ | ||
| escapeCsvValue(transaction.date), | ||
| escapeCsvValue(transaction.description), | ||
| escapeCsvValue(transaction.moneyIn), | ||
| escapeCsvValue(transaction.moneyOut), | ||
| escapeCsvValue(transaction.balance), | ||
| ].join(","); | ||
| }); | ||
| return [header.join(","), ...rows].join("\n"); | ||
| } | ||
| module.exports = { | ||
| parseCimbStatementDeterministic, | ||
| parseCimbStatementPdfDeterministic, | ||
| transactionsToCsv, | ||
| }; |
| #!/usr/bin/env node | ||
| const fs = require("fs"); | ||
| const path = require("path"); | ||
| const { | ||
| parseCimbStatementPdfDeterministic, | ||
| transactionsToCsv, | ||
| } = require("./cimb-deterministic"); | ||
| function parseArgs(argv) { | ||
| const args = argv.slice(2); | ||
| const pdfPath = args[0]; | ||
| let outputPath = null; | ||
| let format = null; | ||
| for (let index = 1; index < args.length; index += 1) { | ||
| const arg = args[index]; | ||
| if (arg === "--format") { | ||
| format = args[index + 1] || null; | ||
| index += 1; | ||
| continue; | ||
| } | ||
| if (arg.startsWith("--format=")) { | ||
| format = arg.split("=")[1] || null; | ||
| continue; | ||
| } | ||
| if (!outputPath) { | ||
| outputPath = arg; | ||
| continue; | ||
| } | ||
| } | ||
| return { pdfPath, outputPath, format }; | ||
| } | ||
| function resolveFormat(outputPath, requestedFormat) { | ||
| if (requestedFormat) { | ||
| const normalized = requestedFormat.toLowerCase(); | ||
| if (normalized !== "json" && normalized !== "csv") { | ||
| throw new Error(`Unsupported format: ${requestedFormat}`); | ||
| } | ||
| return normalized; | ||
| } | ||
| if (!outputPath) { | ||
| return "json"; | ||
| } | ||
| const extension = path.extname(outputPath).toLowerCase(); | ||
| if (extension === ".csv") { | ||
| return "csv"; | ||
| } | ||
| return "json"; | ||
| } | ||
| async function main() { | ||
| const pdfPath = process.argv[2]; | ||
| const outputPath = process.argv[3]; | ||
| const { pdfPath, outputPath, format: requestedFormat } = parseArgs(process.argv); | ||
| if (!pdfPath) { | ||
| console.error( | ||
| "Usage: node index-deterministic.js <path-to-pdf-file> [output.json]" | ||
| "Usage: node index-deterministic.js <path-to-pdf-file> [output.json|output.csv] [--format json|csv]" | ||
| ); | ||
@@ -22,12 +72,15 @@ process.exit(1); | ||
| const transactions = await parseCimbStatementPdfDeterministic(pdfPath); | ||
| const json = JSON.stringify(transactions, null, 2); | ||
| const format = resolveFormat(outputPath, requestedFormat); | ||
| const output = | ||
| format === "csv" | ||
| ? transactionsToCsv(transactions) | ||
| : JSON.stringify(transactions, null, 2); | ||
| if (outputPath) { | ||
| fs.writeFileSync(outputPath, json, "utf8"); | ||
| console.log( | ||
| `Written ${transactions.length} transactions to ${outputPath}` | ||
| ); | ||
| fs.writeFileSync(outputPath, output, "utf8"); | ||
| console.log(`Written ${transactions.length} transactions to ${outputPath}`); | ||
| return; | ||
| } | ||
| console.log(json); | ||
| console.log(output); | ||
| } catch (error) { | ||
@@ -34,0 +87,0 @@ console.error("Error processing PDF:", error); |
+2
-2
| { | ||
| "name": "@sabirmgd/cimb-statement-parser", | ||
| "version": "0.1.0", | ||
| "version": "0.2.0", | ||
| "description": "Deterministic CIMB bank statement PDF parser with CLI and library API.", | ||
@@ -23,3 +23,3 @@ "main": "cimb-deterministic.js", | ||
| "start": "node index-deterministic.js /Users/sabirsalah/Desktop/projects/choreon/pdf-extraction/bank-statement.pdf", | ||
| "test": "node test/test-bank-reference.js && node test/test-deterministic-first-10.js", | ||
| "test": "node test/test-bank-reference.js && node test/test-deterministic-first-10.js && node test/test-csv-export.js", | ||
| "pack:check": "npm pack --dry-run" | ||
@@ -26,0 +26,0 @@ }, |
+16
-1
@@ -9,2 +9,3 @@ # CIMB Statement Parser | ||
| - a library export: `parseCimbStatementPdfDeterministic` | ||
| - a CSV serializer: `transactionsToCsv` | ||
@@ -31,2 +32,14 @@ ## Install | ||
| CSV output: | ||
| ```bash | ||
| npx @sabirmgd/cimb-statement-parser /path/to/CIMBClicks.pdf output.csv | ||
| ``` | ||
| Or force the format explicitly: | ||
| ```bash | ||
| npx @sabirmgd/cimb-statement-parser /path/to/CIMBClicks.pdf report.txt --format csv | ||
| ``` | ||
| Or after install: | ||
@@ -45,2 +58,3 @@ | ||
| parseCimbStatementPdfDeterministic, | ||
| transactionsToCsv, | ||
| } = require("@sabirmgd/cimb-statement-parser"); | ||
@@ -50,3 +64,4 @@ | ||
| const transactions = await parseCimbStatementPdfDeterministic(pdfPath); | ||
| console.log(transactions); | ||
| const csv = transactionsToCsv(transactions); | ||
| console.log(csv); | ||
| } | ||
@@ -53,0 +68,0 @@ ``` |
14624
18.41%383
21.2%109
15.96%