word-to-html
Advanced tools
Comparing version 2.2.4 to 2.2.5
{ | ||
"name": "word-to-html", | ||
"version": "2.2.4", | ||
"version": "2.2.5", | ||
"description": "convert docx files to html", | ||
@@ -5,0 +5,0 @@ "main": "src/index.js", |
@@ -1,8 +0,8 @@ | ||
let templateFormate = require('./template.js') | ||
const templateFormate = require('./template.js') | ||
let template; | ||
//解压缩word文件 | ||
let admZip = require('adm-zip'); | ||
let fs = require('fs'); | ||
let path = require('path'); | ||
let {convert} = require('./word2html.js'); | ||
const admZip = require('adm-zip'); | ||
const fs = require('fs'); | ||
const path = require('path'); | ||
const {convert} = require('./word2html.js'); | ||
@@ -48,3 +48,2 @@ const jsdom = require("jsdom") | ||
let name = abspath.slice(0,len-4) + ".html" | ||
// console.log(name,'is ok') | ||
let res = main(contentXml); | ||
@@ -51,0 +50,0 @@ // 获取res |
let tdTextAlignDefault = 'left'; | ||
let tdVerticalAlignDefault = 'top'; | ||
const tdTextAlignDefault = 'left'; | ||
const tdVerticalAlignDefault = 'top'; | ||
@@ -6,0 +6,0 @@ /** |
@@ -1,13 +0,1 @@ | ||
/** | ||
* 获取元素下某一层某个tagName的所有元素 | ||
* @param {*} dom | ||
*/ | ||
let getDirectDomsByTagName = function(dom, tagName){ | ||
let childs = Array.prototype.slice.call(dom.children); | ||
let doms = childs.filter((item,index)=>{ | ||
return item.tagName === tagName | ||
}) | ||
return doms | ||
} | ||
// amd-zip将docx格式的文件转换成xml的规则是: | ||
@@ -25,104 +13,116 @@ // table规则: | ||
/** | ||
* | ||
* @param {*} tblDom 处理<w:tbl>标签对应的DOM | ||
* @return {string} tblText 返回table标签对应的html字符串 | ||
* 获取元素下某一层某个tagName的所有元素 | ||
* @param {*} dom | ||
*/ | ||
let tblFn = function(tblDom){ | ||
// | ||
let tblLeft = `<table><tbody>` | ||
let tblRight = `</tbody></table>` | ||
let tblText = tblLeft; | ||
const getDirectDomsByTagName = function(dom, tagName){ | ||
let childs = Array.prototype.slice.call(dom.children); | ||
let doms = childs.filter((item)=>{ | ||
return item.tagName === tagName | ||
}) | ||
return doms | ||
} | ||
let trArray = getDirectDomsByTagName(tblDom,'w:tr'), len = trArray.length; | ||
for(let i = 0;i<len; i++){ | ||
let tr = trArray[i]; | ||
tblText = tblText + trFn(tr,i,trArray); | ||
class Tbl{ | ||
constructor(tblDom){ | ||
this.tblDom = tblDom; | ||
} | ||
/** | ||
* | ||
* @return {string} tblText 返回table标签对应的html字符串 | ||
*/ | ||
tblFn(){ | ||
// | ||
let tblLeft = `<table><tbody>` | ||
let tblRight = `</tbody></table>` | ||
let tblText = tblLeft; | ||
tblText = tblText + tblRight; | ||
return tblText; | ||
} | ||
let trArray = getDirectDomsByTagName(this.tblDom,'w:tr'), len = trArray.length; | ||
this.trArray = trArray; | ||
for(let i = 0;i<len; i++){ | ||
let tr = trArray[i]; | ||
tblText = tblText + this.trFn(tr,i); | ||
} | ||
/** | ||
* @param trDom: 处理<w:tr>标签对应的DOM | ||
* @param rNum:trDom所处的trArray的第几行 | ||
* @param trArray:表的所有行trArray | ||
* @return trText: 字符串,表示的是表格的一行的html字符串 | ||
*/ | ||
let trFn = function(trDom,rNum,trArray){ | ||
let trStart = `<tr>`, | ||
trEnd = `</tr>`, | ||
trText = trStart; | ||
let tcArray = getDirectDomsByTagName(trDom,'w:tc'), len = tcArray.length; | ||
for(let i = 0;i<len; i++){ | ||
let tc = tcArray[i]; | ||
trText = trText + tcFn(tc,rNum, i,trArray); | ||
tblText = tblText + tblRight; | ||
return tblText; | ||
} | ||
trText = trText + trEnd; | ||
return trText; | ||
} | ||
/** | ||
* @param tcDom: 处理<w:tc>标签对应的DOM | ||
* @param rNum:trDom所处的trArray的第几行 | ||
* @param cNum: 传入的tcDom处于tr中tcArray的第几个,即第几列 | ||
* @param trArray:表的所有行trArray | ||
* @return tcText: 字符串,表示的是表格的一行的html字符串 | ||
*/ | ||
let tcFn = function(tcDom,rNum,cNum,trArray){ | ||
let {colspan, vMerge, hasT} = getTcDomOptions(tcDom); | ||
if(vMerge === '1' &&!hasT){ | ||
return '' | ||
/** | ||
* @param trDom: 处理<w:tr>标签对应的DOM | ||
* @param rNum:trDom所处的trArray的第几行 | ||
* @return trText: 字符串,表示的是表格的一行的html字符串 | ||
*/ | ||
trFn(trDom,rNum){ | ||
let trStart = `<tr>`, | ||
trEnd = `</tr>`, | ||
trText = trStart; | ||
let tcArray = getDirectDomsByTagName(trDom,'w:tc'), len = tcArray.length; | ||
for(let i = 0;i<len; i++){ | ||
let tc = tcArray[i]; | ||
trText = trText + this.tcFn(tc,rNum, i); | ||
} | ||
trText = trText + trEnd; | ||
return trText; | ||
} | ||
// 合并行 | ||
let rowspan; | ||
if(vMerge === 'restart'){ | ||
let len = trArray.length; | ||
rowspan =1; | ||
for(let n = rNum+1;n<len;n++){ | ||
let tcArray = getDirectDomsByTagName(trArray[n],'w:tc') | ||
if(tcArray.length-1 < cNum) break; | ||
let tcPrDom = getDirectDomsByTagName(tcArray[cNum],'w:tcPr')[0]; | ||
let vMergeDom = getDirectDomsByTagName(tcPrDom,'w:vMerge')[0]; | ||
if(vMergeDom && vMergeDom.getAttribute('w:val')!=='restart'){ | ||
rowspan++ | ||
}else{ | ||
break; | ||
/** | ||
* @param tcDom: 处理<w:tc>标签对应的DOM | ||
* @param rNum:trDom所处的trArray的第几行 | ||
* @param cNum: 传入的tcDom处于tr中tcArray的第几个,即第几列 | ||
* @return tcText: 字符串,表示的是表格的一行的html字符串 | ||
*/ | ||
tcFn(tcDom,rNum,cNum){ | ||
let {colspan, vMerge, hasT} = this.getTcDomOptions(tcDom); | ||
if(vMerge === '1' &&!hasT){ | ||
return '' | ||
} | ||
// 合并行 | ||
let rowspan; | ||
if(vMerge === 'restart'){ | ||
let len = this.trArray.length; | ||
rowspan =1; | ||
for(let n = rNum+1;n<len;n++){ | ||
let tcArray = getDirectDomsByTagName(this.trArray[n],'w:tc') | ||
if(tcArray.length-1 < cNum) break; | ||
let tcPrDom = getDirectDomsByTagName(tcArray[cNum],'w:tcPr')[0]; | ||
let vMergeDom = getDirectDomsByTagName(tcPrDom,'w:vMerge')[0]; | ||
if(vMergeDom && vMergeDom.getAttribute('w:val')!=='restart'){ | ||
rowspan++ | ||
}else{ | ||
break; | ||
} | ||
} | ||
} | ||
} | ||
let tdStart = `<td ${colspan?`colspan=${colspan}`:''} ${rowspan?`rowspan=${rowspan}`:''}>`, //合并列 | ||
tdEnd = `</td>`, | ||
tcText = tdStart; | ||
} | ||
let tdStart = `<td ${colspan?`colspan=${colspan}`:''} ${rowspan?`rowspan=${rowspan}`:''}>`, //合并列 | ||
tdEnd = `</td>`, | ||
tcText = tdStart; | ||
tcText = tcText + wanderDom(tcDom) + tdEnd; | ||
tcText = tcText + wanderDom(tcDom) + tdEnd; | ||
return tcText; | ||
return tcText; | ||
} | ||
/** | ||
* @param tcDom: 处理<w:tc>标签对应的DOM | ||
* @return : combinations:{colspan, vMerge, hasT} 对象,表示的是表格的一行的html字符串 | ||
*/ | ||
getTcDomOptions(tcDom){ | ||
let tcPrDom = getDirectDomsByTagName(tcDom,'w:tcPr')[0]; | ||
let gridSpanDom = getDirectDomsByTagName(tcPrDom,'w:gridSpan')[0], | ||
vMergeDom = getDirectDomsByTagName(tcPrDom,'w:vMerge')[0], | ||
tDom = tcDom.getElementsByTagName('w:t'); | ||
let colspan = gridSpanDom?gridSpanDom.getAttribute('w:val'):''; | ||
let vMerge = vMergeDom ? | ||
vMergeDom.getAttribute('w:val') ? vMergeDom.getAttribute('w:val') : '1' | ||
: | ||
'' ; | ||
let hasT = tDom.length?true:false; | ||
return {colspan,vMerge, hasT} | ||
} | ||
} | ||
/** | ||
* @param tcDom: 处理<w:tc>标签对应的DOM | ||
* @return : combinations:{colspan, vMerge, hasT} 对象,表示的是表格的一行的html字符串 | ||
*/ | ||
let getTcDomOptions = function(tcDom){ | ||
let tcPrDom = getDirectDomsByTagName(tcDom,'w:tcPr')[0]; | ||
let gridSpanDom = getDirectDomsByTagName(tcPrDom,'w:gridSpan')[0], | ||
vMergeDom = getDirectDomsByTagName(tcPrDom,'w:vMerge')[0], | ||
tDom = tcDom.getElementsByTagName('w:t'); | ||
let colspan = gridSpanDom?gridSpanDom.getAttribute('w:val'):''; | ||
let vMerge = vMergeDom ? | ||
vMergeDom.getAttribute('w:val') ? vMergeDom.getAttribute('w:val') : '1' | ||
: | ||
'' ; | ||
let hasT = tDom.length?true:false; | ||
return {colspan,vMerge, hasT} | ||
} | ||
/** | ||
* @param {*} rArray 数组,<w:r>标签对应的DOM组成的数组 | ||
* @return {string} textContent 返回table标签对应的html字符串 | ||
*/ | ||
let rFn = function(rArray){ | ||
const rFn = function(rArray){ | ||
let br = `<br>`,textContent = '',rTextArray = []; | ||
@@ -154,10 +154,16 @@ // In browser | ||
/** | ||
* 无论是p还是table最终还是会到这个函数,用于取出最后的文字内容 | ||
* @param {*} pDom 处理<w:p>标签对应的DOM,这个标签和tbl是互斥的 | ||
* @return {string} htmlStr 返回table标签对应的html字符串 | ||
*/ | ||
let pFn = function(pDom){ | ||
let rArray = getDirectDomsByTagName(pDom,'w:r'); | ||
return '<p>'+rFn(rArray)+'</p>'; | ||
class P{ | ||
constructor(pDom){ | ||
this.pDom = pDom | ||
} | ||
/** | ||
* 无论是p还是table最终还是会到这个函数,用于取出最后的文字内容 | ||
* @param {*} pDom 处理<w:p>标签对应的DOM,这个标签和tbl是互斥的 | ||
* @return {string} htmlStr 返回table标签对应的html字符串 | ||
*/ | ||
pFn(){ | ||
let rArray = getDirectDomsByTagName(this.pDom,'w:r'); | ||
return '<p>'+ rFn(rArray)+'</p>'; | ||
} | ||
} | ||
@@ -170,3 +176,3 @@ | ||
*/ | ||
let wanderDom = function(dom){ | ||
const wanderDom = function(dom){ | ||
let htmlStr = '',childrens = dom.children,len = childrens.length; | ||
@@ -177,4 +183,4 @@ for(let i=0; i<len;i++){ | ||
switch(tagName){ | ||
case 'w:tbl': htmlStr= htmlStr + tblFn(children) ;break; | ||
case 'w:p': htmlStr= htmlStr + pFn(children) ;break; | ||
case 'w:tbl': htmlStr= htmlStr + (new Tbl(children)).tblFn() ;break; | ||
case 'w:p': htmlStr= htmlStr + (new P(children)).pFn() ;break; | ||
default:break ; | ||
@@ -186,6 +192,4 @@ } | ||
/** | ||
* | ||
* 创建一个Wander类的实例,然后开始执行这个实例的start方法开始执行转换 | ||
* @param {*} xmlDoc 整个XML的DOM树 | ||
@@ -192,0 +196,0 @@ * @return htmlStr 字符串 |
27605
307