@silencesys/xlsx-to-xml
Advanced tools
Comparing version 1.0.2 to 1.0.3
import xlsx from 'xlsx' | ||
import fs from 'fs' | ||
import yargs from 'yargs' | ||
import * as cheerio from 'cheerio' | ||
import { hideBin } from 'yargs/helpers' | ||
import { exit } from 'process' | ||
import COLOR from '../utils/colors.js' | ||
import { defaultConfig, dirtyConfig } from '../utils/config.js' | ||
@@ -24,2 +26,12 @@ const argv = yargs(hideBin(process.argv)) | ||
}) | ||
.option('dirty', { | ||
describe: 'Convert XLSX file to XML without cleaning.', | ||
type: 'boolean', | ||
default: false | ||
}) | ||
.option('--ignore-halves', { | ||
describe: 'Ignore rows that has value only in one column.', | ||
type: 'boolean', | ||
default: false | ||
}) | ||
.help() | ||
@@ -31,3 +43,3 @@ .alias('help', 'h') | ||
// Fail if no input file is set. | ||
console.info('Please provide an input file') | ||
console.error(`${COLOR.fgRed}Please provide an input file, use --help or -h for help.${COLOR.reset}`) | ||
exit(0) | ||
@@ -37,8 +49,16 @@ } | ||
// Set global variables | ||
const DIRTY_OUTPUT = argv.dirty | ||
const IGNORE_HALVES = argv.ignoreHalves | ||
const INPUT_FILE = argv.input | ||
const OUTPUT_FILE = argv.output || argv.input.replace('.xlsx', '.xml') | ||
let CONFIG = null | ||
let OUTPUT_FILE = argv.output || argv.input.replace('.xlsx', '.xml') | ||
let CONFIG = defaultConfig | ||
if (argv.config) { | ||
// Set user defined config | ||
CONFIG = JSON.parse(fs.readFileSync(argv.config)) | ||
} | ||
if (DIRTY_OUTPUT) { | ||
// Set the config file to dirty and change output filename. | ||
CONFIG = dirtyConfig | ||
OUTPUT_FILE = OUTPUT_FILE.replace('.xml', '-dirty.xml') | ||
} | ||
@@ -118,3 +138,18 @@ /** | ||
const fixXHTML = (xml) => { | ||
const $ = cheerio.load(xml, { xmlMode: true, decodeEntities: false }) | ||
return ($.html()) | ||
} | ||
/** | ||
* Clean and format the column value. | ||
* | ||
* @param {string} text value to be cleaned | ||
* @returns {string} | ||
*/ | ||
const cleanAndFormatColumnValue = (text) => { | ||
if (DIRTY_OUTPUT) { | ||
return fixXHTML(text) | ||
} | ||
try { | ||
@@ -125,3 +160,3 @@ let cleanedText = stripTags(text) | ||
return cleanedText | ||
return fixXHTML(cleanedText) | ||
} catch (e) { | ||
@@ -139,11 +174,23 @@ console.error(`${COLOR.fgRed}${e.message}${COLOR.reset}\n`) | ||
const checkNextRowForContent = (sheet, rowNum, cells = [0, 1], content = '', currentRow) => { | ||
const emptyCell = sheet[xlsx.utils.encode_cell({ r: rowNum + 1, c: cells[0] })] | ||
const contentCell = sheet[xlsx.utils.encode_cell({ r: rowNum + 1, c: cells[1] })] | ||
/** | ||
* Check following rows for content, if there is none then append column value | ||
* to the previous (current) row. | ||
* | ||
* @param {Object} sheet the XLSX sheet object | ||
* @param {number} rowIndex the row index | ||
* @param {Array<number>} cells indexes of cells to be checked for content | ||
* @param {string} content the content of the current row | ||
* @param {number} parentRowIndex the index of parent row, eg. the last row | ||
* that had content in both columns | ||
* @returns {string} | ||
*/ | ||
const checkNextRowForContent = (sheet, rowIndex, cells = [0, 1], content = '', parentRowIndex) => { | ||
const emptyCell = sheet[xlsx.utils.encode_cell({ r: rowIndex + 1, c: cells[0] })] | ||
const contentCell = sheet[xlsx.utils.encode_cell({ r: rowIndex + 1, c: cells[1] })] | ||
if (!emptyCell &&contentCell) { | ||
content = ` ${cleanAndFormatColumnValue(contentCell.h)}` | ||
console.info(`${COLOR.fgYellow}[${rowNum + 2}]\t${COLOR.reset}Has one cell empty, the other cell: "${COLOR.fgBlue}${content}${COLOR.reset}" was appended to the row #${currentRow}`) | ||
console.info(`${COLOR.fgYellow}[${rowIndex + 2}]\t${COLOR.reset}Has one cell empty, the other cell: "${COLOR.fgBlue}${content}${COLOR.reset}" was appended to the row #${parentRowIndex}`) | ||
return checkNextRowForContent(sheet, rowNum + 1, cells, content, currentRow) | ||
return checkNextRowForContent(sheet, rowIndex + 1, cells, content, parentRowIndex) | ||
} | ||
@@ -154,5 +201,25 @@ | ||
/** | ||
* Create XML file and save it to the disk. | ||
* @param {string} fileName the name of the file | ||
* @param {string} content the content of the file | ||
* @returns {void} | ||
*/ | ||
const storeFile = (fileName, content) => { | ||
fs.writeFile(fileName, content, (err) => { | ||
if (err) { | ||
console.info(`\n${COLOR.bgRed}${COLOR.fgBlack}Error when saving file.${COLOR.reset}`) | ||
console.error(`${COLOR.fgRed}${err.message}${COLOR.reset}\n`) | ||
exit(1) | ||
} else { | ||
console.info(`\n${COLOR.bgGreen}${COLOR.fgBlack}Successfully created file: ${OUTPUT_FILE}${COLOR.reset}`) | ||
exit(0) | ||
} | ||
}) | ||
} | ||
// Read the input file | ||
try { | ||
console.info(`${COLOR.bgBlue}${COLOR.fgBlack}Starting exporting file: ${INPUT_FILE}${COLOR.reset}\n`) | ||
console.info(`${COLOR.bgBlue}${COLOR.fgBlack}Starting conversion of file: ${INPUT_FILE}${COLOR.reset}\n`) | ||
const file = xlsx.readFile(INPUT_FILE) | ||
@@ -171,12 +238,22 @@ const sheet = file.Sheets[file.SheetNames[0]] | ||
const additionToSecondCell = await checkNextRowForContent(sheet, rowNum, [0, 1], '', rowNum + 1) | ||
const additionToFirstCell = await checkNextRowForContent(sheet, rowNum, [1, 0], '', rowNum + 1) | ||
// Check for additional content in the following rows in case some of them has one cell empty. | ||
let additionToFirstCell = '' | ||
let additionToSecondCell = '' | ||
if (!IGNORE_HALVES) { | ||
additionToSecondCell = await checkNextRowForContent(sheet, rowNum, [0, 1], '', rowNum + 1) | ||
additionToFirstCell = await checkNextRowForContent(sheet, rowNum, [1, 0], '', rowNum + 1) | ||
} | ||
const firstColumnLanguage = CONFIG.language.length > 0 ? `xml:lang="${CONFIG.language[0]}"` : '' | ||
const secondColumnLanguage = CONFIG.language.length > 1 ? `xml:lang="${CONFIG.language[1]}"` : '' | ||
code.push(` <${CONFIG.rowTagName} id="${idFirstCell}" corresp="${idSecondCell}" ${firstColumnLanguage}>${cleanAndFormatColumnValue(firstCell.h) + additionToFirstCell}</${CONFIG.rowTagName}>\n <${CONFIG.rowTagName} id="${idSecondCell}" corresp="${idFirstCell}" ${secondColumnLanguage}>${cleanAndFormatColumnValue(secondCell.h) + additionToSecondCell}</${CONFIG.rowTagName}>\n`) | ||
// Language definitions if there are any | ||
const firstColumnLanguage = CONFIG?.language?.length > 0 ? `xml:lang="${CONFIG.language[0]}"` : '' | ||
const secondColumnLanguage = CONFIG?.language?.length > 1 ? `xml:lang="${CONFIG.language[1]}"` : '' | ||
// The code snippet | ||
code.push(` <${CONFIG.rowTagName} id="${idFirstCell}" corresp="${idSecondCell}" ${firstColumnLanguage}>${cleanAndFormatColumnValue(firstCell.h) + additionToFirstCell}</${CONFIG.rowTagName}>\n <${CONFIG.rowTagName} id="${idSecondCell}" corresp="${idFirstCell}" ${secondColumnLanguage}>${cleanAndFormatColumnValue(secondCell.h) + additionToSecondCell}</${CONFIG.rowTagName}>\n`) | ||
} else if (secondCell) { | ||
// console.log(`${COLOR.dim}[${rowNum + 1}]\tSecond cell is empty.${COLOR.reset}`) | ||
// Do something when only first cell is empty | ||
} else if (firstCell) { | ||
// console.log(`${COLOR.dim}[${rowNum + 1}]\tFirst cell is empty.${COLOR.reset}`) | ||
// Do something when only second cell is empty | ||
} else { | ||
@@ -187,15 +264,12 @@ console.warn(`${COLOR.fgRed}[${rowNum + 1}]\tRow is empty${COLOR.reset}`) | ||
// Create the output file | ||
let xml = `<?xml version="1.0" encoding="UTF-8" ?>\n <${CONFIG.parentTagName}>\n` | ||
let content = `<?xml version="1.0" encoding="UTF-8" ?>\n<${CONFIG.parentTagName}>\n` | ||
code.map(line => { | ||
xml += line | ||
content += line | ||
}) | ||
xml += ` </${CONFIG.parentTagName}>\n</xml>` | ||
content += `</${CONFIG.parentTagName}>\n` | ||
fs.writeFileSync(OUTPUT_FILE, xml) | ||
console.info(`\n${COLOR.bgGreen}${COLOR.fgBlack}Successfully created file: ${OUTPUT_FILE}${COLOR.reset}`) | ||
exit(0) | ||
storeFile(OUTPUT_FILE, content) | ||
} catch (err) { | ||
console.error(`${COLOR.fgRed}${err.message}${COLOR.reset}`) | ||
exit(1) | ||
} | ||
} |
{ | ||
"name": "@silencesys/xlsx-to-xml", | ||
"version": "1.0.2", | ||
"version": "1.0.3", | ||
"description": "Export XLSX files to XML", | ||
@@ -20,2 +20,3 @@ "main": "./lib/exporter.js", | ||
"dependencies": { | ||
"cheerio": "^1.0.0-rc.10", | ||
"xlsx": "^0.17.2", | ||
@@ -22,0 +23,0 @@ "yargs": "^17.2.1" |
@@ -6,3 +6,2 @@ # XLSX to XML | ||
<br> | ||
<br> | ||
@@ -18,3 +17,3 @@ ## How to use | ||
```bash | ||
xlsx-to-xml --input your_input_file.xml --output your_output_file.xml --config your_config.json | ||
xlsx-to-xml --input your_input_file.xlsx --output your_output_file.xml --config your_config.json | ||
``` | ||
@@ -27,9 +26,10 @@ | ||
| ------ | ----------- | | ||
| --input, -i | Set path to input file | | ||
| --output, -o | Set output file path | | ||
| --config, -c | Set path to config file | | ||
| --input, -i | The input file to be converted to xlsx | | ||
| --output, -o | The output file to be created | | ||
| --config, -c | The config file that will be used | | ||
| --dirty | Set this flag if you want to see XHTML tags added by XLSX transformer | | ||
| --ignore-halves | Set this flag if you want to ignore rows that has one column empty | | ||
| --help, -h | Show help | | ||
<br> | ||
<br> | ||
@@ -69,2 +69,10 @@ ## Config file | ||
<br> | ||
### How to write a config file | ||
I'm sure you ask how am I supposed to know which XHTML tags will be used in my XLSX document after conversion. Well, for that purpose there is a very simple way. Just run following command: | ||
```bash | ||
xlsx-to-xml --input your_input_file.xlsx --dirty | ||
``` | ||
This will output a _dirty_ file containing XML with all XHTML tags. You can then decide which tags should be stripped or replaced and which should be kept. Just keep in mind that tags `<dirty-list>` and `<dirty-row>` are added by this tool and will be replaced by `parentTagName` and `rowTagName` respectively. | ||
<br> | ||
@@ -76,5 +84,4 @@ | ||
<br> | ||
<br> | ||
## License | ||
This project is licensed under the [MIT license](LICENSE.md). |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
13921
7
273
83
3
+ Addedcheerio@^1.0.0-rc.10
+ Addedboolbase@1.0.0(transitive)
+ Addedcheerio@1.0.0(transitive)
+ Addedcheerio-select@2.1.0(transitive)
+ Addedcss-select@5.1.0(transitive)
+ Addedcss-what@6.1.0(transitive)
+ Addeddom-serializer@2.0.0(transitive)
+ Addeddomelementtype@2.3.0(transitive)
+ Addeddomhandler@5.0.3(transitive)
+ Addeddomutils@3.1.0(transitive)
+ Addedencoding-sniffer@0.2.0(transitive)
+ Addedentities@4.5.0(transitive)
+ Addedhtmlparser2@9.1.0(transitive)
+ Addediconv-lite@0.6.3(transitive)
+ Addednth-check@2.1.1(transitive)
+ Addedparse5@7.2.1(transitive)
+ Addedparse5-htmlparser2-tree-adapter@7.1.0(transitive)
+ Addedparse5-parser-stream@7.1.2(transitive)
+ Addedsafer-buffer@2.1.2(transitive)
+ Addedundici@6.21.0(transitive)
+ Addedwhatwg-encoding@3.1.1(transitive)
+ Addedwhatwg-mimetype@4.0.0(transitive)