经常有一些将doc文件转换成HTML的需求,比如说明、协议之类的。虽然有https://word2cleanhtml.com/ 这种神器,像下图这样
可以直接将doc内容粘贴到剪贴板上,即可得到对应的html。但是,复制粘贴也是个体力活,如果一天突然要做10个这样的文档,可能真的要抓狂了。基于此目的,一起来做个脚本一键傻瓜式生成HTML吧。
首先,我们脚本的主角是mammoth ,这个东西可以读取doc文档,并把内容给转换出来。
准备工作就是安装示例文档里的两个依赖
npm install mammoth
npm install underscore
文档里给的转换html的示例是这样的。
var mammoth = require("mammoth");
mammoth.convertToHtml({path: "path/to/document.docx"})
.then(function(result){
var html = result.value; // The generated HTML
var messages = result.messages; // Any messages, such as warnings during conversion
})
.done();
可以试一试,会发现做出来的html效果会生成strong、p、tr、td这种标签,套上head、body、css之后页面就非常完整了,可是一些特殊的效果,比如说下划线u就没有。
怎么让他们出现呢?可以直接在文档里搜下划线的英文Underline,找到这条说明
underline说明
原来在转换的过程中,下划线被忽略了,文档里也给了解决方法。可以显式地设置下划线转换成的标签去让下划线生效,像这样。
var options = {
styleMap: ['u => u'],
};
然后,我发现,生成的页面没有带class样式。其实在doc文档里面,有些行是居中效果的,如何做到居中效果在页面里也生效呢?
我们在template里写一个.center样式,然后options里试一试这个功能。
function transformElement(element) {
if (element.children) {
var children = _.map(element.children, transformElement);
element = { ...element, children: children };
}
if (element.type === 'paragraph') {
element = transformParagraph(element);
}
return element;
}
function transformParagraph(element) {
if (element.alignment === 'center' && !element.styleId) {
return { ...element, styleName: 'center' }; // 给标签增加style-name
} else {
return element;
}
}
var options = {
styleMap: ['u => u', "p[style-name='center'] => p.center"], // 在这里,将这个规则转换成样式名
transformDocument: transformElement,
};
以下是完整的生成脚本。
var mammoth = require('mammoth');
const fs = require('fs');
const _ = require('underscore');
const template = require('./template');
const filesPath = './测试的文档.docx';
function transformElement(element) {
if (element.children) {
var children = _.map(element.children, transformElement);
element = { ...element, children: children };
}
if (element.type === 'paragraph') {
element = transformParagraph(element);
}
return element;
}
function transformParagraph(element) {
if (element.alignment === 'center' && !element.styleId) {
return { ...element, styleName: 'center' };
} else {
return element;
}
}
var options = {
styleMap: ['u => u', "p[style-name='center'] => p.center"],
transformDocument: transformElement,
};
mammoth
.convertToHtml({ path: filesPath }, options)
.then(function(result) {
var html = result.value; // The generated HTML
var messages = result.messages; // Any messages, such as warnings during conversion
fs.writeFile('file/' + filesPath.match(/^(?:.\/)(.*)(?:\.docx)$/)[1] + '.html', template(html), res => {
console.log('文件写入成功:');
});
})
.done();
// template.js,包含样式的完整html生成方法
module.exports = function template(str) {
let htmlTemple1 = getHtmlTemple1(str.match(/[^><]+(?=<\/p>)/gim)[0]);
return `${htmlTemple1}${str}${htmlTemple2}`;
};
function getHtmlTemple1(title) {
return `<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta
name="viewport"
content="width=device-width,initial-scale=1.0, minimum-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui"
/>
<link href="//www.ppdaicdn.com/favicon.ico" rel="shortcut icon" />
<link href="./common/common.css"/>
<link href="./common/protocol.css"/>
<title>${title}</title>
<meta http-equiv="Cache-Control" content="no-cache" />
<meta http-equiv="Pragma" content="no-cache" />
<meta http-equiv="expires" content="0" />
<meta name="format-detection" content="telephone=no" />
<meta name="apple-mobile-web-app-capable" content="yes" />
<meta name="applicable-device" content="mobile" />
<style>
body {
color: #384369;
background: #fff;
margin: 0;
font-size: 3.7333vw;
display: flex;
flex-direction: column;
overflow-x: hidden;
margin:0 auto;
padding:3vw;
}
p{
border: none;
margin: 0;
padding: 0;
line-height:6vw;
margin-bottom:1vw
}
.container>p:first-child{
width: 100%;
text-align: center;
font-size: 5.8667vw;
font-weight: 700;
margin-top: 5vw;
margin-bottom: 7vw;
}
.container {
display: flex;
flex-direction: column;
flex: 1;
overflow-x: hidden;
/* padding: 5.3333vw; */
background: #fff;
-webkit-overflow-scrolling: touch;
}
.center{
text-align: center;
}
pre {
font-size: 3.2vw;
transform: scale(0.8);
transform-origin: 0;
}
.title {
width: 100%;
text-align: center;
font-size: 5.8667vw;
font-weight: 700;
margin-top: 5vw;
margin-bottom: 7vw;
}
.service-wrap-title {
color: #384369;
font-size: 4.2667vw;
display: inline-block;
padding: 1.3333vw 0;
font-weight: 600;
}
.service-wrap-ele {
margin: 0;
color: #384369;
padding-bottom: 0;
}
.service-wrap-ele span {
min-width: 12vw;
display: inline-block;
text-align: center;
border-bottom: solid 1px;
line-height: 0.9;
padding: 0 6px;
}
table {
color: #384369;
border-right: 0.2667vw solid #384369;
border-bottom: 0.2667vw solid #384369;
border-color: #384369;
border-collapse: collapse;
border-spacing: 0;
margin: 2vw 0;
width: 100%;
box-sizing:border-box;
}
table td {
border-left: 0.2667vw solid #384369;
border-top: 0.2667vw solid #384369;
width: 53.3333vw;
text-align: left;
height: 9.3333vw;
}
th{
text-align: center !important;
}
tr>td:first-child strong {
text-align: center !important;
width: 100%;
display: block;
}
th,
td {
padding: 0 1.2vw;
font-weight: normal;
word-wrap: break-word;
word-break: break-all;
box-sizing: border-box;
-webkit-box-sizing: border-box;
}
.bottom-btn-container {
height: 13.3333vw;
background: #fff;
border-top: 0.1333vw solid #f0eded;
display: flex;
justify-content: space-between;
padding: 0 5.3333vw;
box-sizing: border-box;
}
.bottom-btn-container span {
display: inline-block;
line-height: 13.3333vw;
font-size: 3.2vw;
color: #6685ed;
}
</style>
</head>
<body><div class="container">`;
}
const htmlTemple2 = `</div></body></html>`;
这样一来,就算一天十个文档也是分分钟的事情了。
网友评论