word文档全篇加拼音(批量注音)+批量修改读音
加拼音(旧格式)-代码
package main
import (
"archive/zip"
"runtime"
"bufio"
"bytes"
"errors"
"fmt"
"io"
"io/ioutil"
"os"
"path"
_ "regexp"
"strconv"
"strings"
"time"
"unicode"
"golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/transform"
// "github.com/aurelien-rainone/assertgo"
"github.com/etree"
"github.com/mozillazg/go-pinyin"
)
func assert(cond bool, failPrompt string) {
if !cond {
pc, file, line, _ := runtime.Caller(1)
pcName := runtime.FuncForPC(pc).Name()
panic(file + ":" + strconv.Itoa(line) + ":" + pcName + " " + failPrompt)
}
}
// xml中字号值是word中字号值的两倍
// 拼音字号默认是文字字号的一半
// xml默认偏移值=xml拼音字号值-1
var g_configPinyinFont string = "微软雅黑"
var g_configPinyinFontSize int = 5
var g_configPinyinUseFontSize bool = false // 是否使用同一的拼音字号
var g_configPinyinAlignmentMap map[string]string = map[string]string{
"居中": "jc0",
"0-1-0": "jc1",
"1-2-1": "jc2",
"左对齐": "jc3",
"右对齐": "jc4",
}
var g_configPinyinAlignment = "jc0" // 居中对齐
var g_configPinyinOffset int = g_configPinyinFontSize*2 - 1
func parseConfigLine(line string) error {
line2 := strings.TrimSpace(line)
vals := strings.Split(line2, "=")
if 2 != len(vals) {
return errors.New("无效配置:" + line)
}
switch vals[0] {
case "拼音字体":
{
if "" != vals[1] {
g_configPinyinFont = vals[1]
}
}
case "拼音字号":
{
if "" != vals[1] {
v, err := strconv.Atoi(vals[1])
if (nil != err) || (v < 1) {
return errors.New(fmt.Sprintf("字号无效:%s: %s\r\n", line, err))
}
g_configPinyinUseFontSize = true
g_configPinyinFontSize = v
}
}
case "拼音对齐":
{
if "" != vals[1] {
v := g_configPinyinAlignmentMap[vals[1]]
if "" == v {
return errors.New(fmt.Sprintf("无效拼音对齐:%s\r\n", line))
}
g_configPinyinAlignment = v
}
}
// case "拼音偏移":
// {
// v, err := strconv.Atoi(vals[1])
// if (nil != err) || (v < 0) {
// return errors.New(fmt.Sprintf("偏移无效:%s: %s\r\n", line, err))
// }
// g_configPinyinOffset = v
// }
default:
{
return errors.New("无效配置:" + line)
}
}
return nil
}
func parseConfig() error {
fileName := "拼音配置.txt"
fileinfo, err := os.Stat(fileName)
if nil != err {
// 文件不存在,创建一个新的,并写入注释说明
newFile, err := os.Create(fileName)
if err != nil {
return err
}
defer newFile.Close()
newFile.WriteString(
fmt.Sprintf(
// 默认不填写具体值
`# 说明:只支持docx格式的word文档;
# 拼音字号默认动态变化,指定字号后全篇拼音使用指定字号;
拼音字体=%s
拼音字号=
拼音对齐=
`,
// +"拼音偏移=\r\n"
g_configPinyinFont,
// g_configPinyinFontSize/2,
// g_configPinyinOffset,
),
// fmt.Sprintf("# 说明:只支持docx格式的word文档\r\n拼音字体=%s",
// g_configPinyinFont,
// ),
)
} else if fileinfo.IsDir() {
return errors.New("无法创建配置文件:" + fileName)
} else {
f, err := os.Open(fileName)
if err != nil {
return nil
}
br := bufio.NewReader(f)
for {
line, _, err := br.ReadLine()
if err == io.EOF {
break
}
// line = strings.TrimSpace(line)
strLine := strings.ReplaceAll(string(line), " ", "") // 去掉所有空格
if "" == strLine {
continue
}
// strLine := strings.TrimLeft(string(line), " ")
if strings.HasPrefix(strLine, "#") {
continue
}
if err = parseConfigLine(strLine); nil != err {
return err
}
}
}
return nil
}
func createWrPrNode(oldWrPrNode *etree.Element) *etree.Element {
newWrPr := oldWrPrNode.Copy()
wrFonts := newWrPr.FindElement("w:rFonts")
if nil == wrFonts {
// panic(fileLine() + "no w:rFonts")
// wrFonts = newWrPr.CreateElement("w:rFonts")
} else {
whint := wrFonts.SelectAttr("w:hint")
if nil != whint {
whint.Value = "default"
} else {
// panic(fileLine() + ":no w:hint")
wrFonts.CreateAttr("w:hint", "default")
}
}
// wrFonts.SortAttrs()
// wlang := newwrPr.FindElement("w:lang")
// if nil == wlang {
// wlang = etree.NewElement("w:lang")
// newwrPr.AddChild(wlang)
// }
// wval := wlang.SelectAttr("w:val")
// if nil == wval {
// wlang.CreateAttr("w:val", "en-US")
// } else {
// wval.Value = "en-US"
// }
return newWrPr
}
func createWrBegin(wrPrNode *etree.Element) *etree.Element {
wr := etree.NewElement("w:r")
if nil != wrPrNode {
wr.AddChild(createWrPrNode(wrPrNode))
}
wfldChar := etree.NewElement("w:fldChar")
wfldChar.CreateAttr("w:fldCharType", "begin")
wr.AddChild(wfldChar)
return wr
}
func createWrinstrText(text string, wrPrNode *etree.Element) *etree.Element {
if nil == wrPrNode {
// panic(fileLine() + "nil == wrPrNode")
}
wr := etree.NewElement("w:r")
if nil != wrPrNode {
wr.AddChild(createWrPrNode(wrPrNode))
}
winstrText := etree.NewElement("w:instrText")
winstrText.CreateAttr("xml:space", "preserve")
winstrText.SetText(text)
// fmt.Println(wfldChar.Text())
wr.AddChild(winstrText)
return wr
}
func createWrEnd(wrPrNode *etree.Element) *etree.Element {
wr := etree.NewElement("w:r")
if nil != wrPrNode {
wr.AddChild(createWrPrNode(wrPrNode))
}
wfldChar := etree.NewElement("w:fldChar")
wfldChar.CreateAttr("w:fldCharType", "end")
wr.AddChild(wfldChar)
return wr
}
func createWrwt(w string, oldWrNode *etree.Element) *etree.Element {
wr := oldWrNode.Copy()
wt := wr.FindElement("w:t")
if nil == wt {
wt = etree.NewElement("w:t")
wr.AddChild(wt)
}
wt.SetText(w)
return wr
}
// "一"字
// 單用或在一詞一句的末尾,念陰平聲;
// 在去聲字前,念陽平聲;
// 在陰平、陽平、上聲之前,念去聲。
// 阴阳上去分别为第一二三四声
func procYI(nextHans []string, nextPinyins [][]string) string {
// 句末
if 0 == len(nextHans) || !unicode.Is(unicode.Han, []rune(nextHans[0])[0]) {
return "yī"
}
// 后面跟着汉字
if strings.ContainsAny(nextPinyins[0][0], "àòèìùǜ") {
return "yí"
}
return "yì"
}
// 「不」字
// 在去聲字之前,變讀為陽平
// 阴阳上去分别为第一二三四声
func procBU(nextHans []string, nextPinyins [][]string) string {
if 0 != len(nextHans) && strings.ContainsAny(nextPinyins[0][0], "àòèìùǜ") {
return "bú"
}
return "bù"
}
func addPinyin(buf []byte) (string, error) {
pinyinArg := pinyin.NewArgs()
pinyinArg.Style = pinyin.Tone // 包含声调
doc := etree.NewDocument()
err := doc.ReadFromBytes(buf)
if nil != err {
fmt.Println(err)
transformers := []transform.Transformer{
simplifiedchinese.GBK.NewDecoder(),
simplifiedchinese.HZGB2312.NewDecoder(),
}
fmt.Println("尝试转码")
for _, t := range transformers {
I := bytes.NewReader(buf)
O := transform.NewReader(I, t)
var d []byte
d, err = ioutil.ReadAll(O)
if nil != err {
continue
}
err = doc.ReadFromBytes(d)
if nil == err {
fmt.Println("转码成功")
break
}
}
if nil != err {
fmt.Println("转码失败")
return "", err
}
}
wdocument := doc.SelectElement("w:document")
// fmtPinyin := ""
// if g_configPinyinUseFontSize {
// // fmtPinyin := ` *EQ \* jc0 \* "Font:微软雅黑" \* hps20 \o \ad(\s \up 19(%s),%s)`
// fmtPinyin = ` EQ \* jc0 \* "Font:` + g_configPinyinFont +
// `" \* hps` + strconv.Itoa(g_configPinyinFontSize) +
// ` \o \ad(\s \up ` + strconv.Itoa(g_configPinyinFontSize-1) +
// `(%s),%s)`
// }
// w:p是一个段落,一段一段的处理
for _, wp := range wdocument.FindElements("w:body/w:p") {
allStrOfWp := "" // 段内所有文字
for _, wr := range wp.FindElements("w:r") {
if wt := wr.FindElement("w:t"); nil != wt {
allStrOfWp += wt.Text()
}
}
pinyins := pinyin.Pinyin(allStrOfWp, pinyinArg)
pinyinIndex := 0
allHansOfWp := strings.Split(allStrOfWp, "")
hanIndex := 0
for _, wr := range wp.FindElements("w:r") {
wrPr := wr.FindElement("w:rPr")
pinyinFontSize := g_configPinyinFontSize * 2
pinyinOffset := g_configPinyinOffset
if nil != wrPr {
wsz := wrPr.FindElement("w:sz")
if nil != wsz {
s := wsz.SelectAttrValue("w:val", "")
i, err := strconv.Atoi(s)
if nil == err {
if !g_configPinyinUseFontSize {
pinyinFontSize = i / 2
}
pinyinOffset = i/2 - 1
}
}
}
// fmtPinyin := ` EQ \* jc0 \* "Font:微软雅黑" \* hps20 \o \ad(\s \up 19(%s),%s)`
// fmtPinyin := ` EQ \* jc0 \* "Font:` + g_configPinyinFont +
// `" \* hps` + strconv.Itoa(pinyinFontSize) +
// ` \o \ad(\s \up ` + strconv.Itoa(pinyinOffset-1) +
// `(%s),%s)`
// "&"会被转义为"&","""会被转成"&quot",从而导致楷体拼音居中效果失效,所以直接使用双引号
fmtPinyin := ` EQ \* ` + g_configPinyinAlignment + ` \* "Font:` + g_configPinyinFont +
`" \* hps` + strconv.Itoa(pinyinFontSize) +
` \o \ad(\s \up ` + strconv.Itoa(pinyinOffset) +
`(%s),%s)`
// fmt.Println(fmtPinyin)
if wt := wr.FindElement("w:t"); nil != wt {
text := wt.Text()
// fmt.Println(text)
lastStr := ""
for _, w := range text {
assert(string(w) == allHansOfWp[hanIndex], "出错啦")
if unicode.Is(unicode.Han, w) {
if "" != lastStr {
wp.InsertChild(wr, createWrwt(lastStr, wr))
lastStr = ""
}
wp.InsertChild(wr, createWrBegin(wrPr))
var hanPinyin string
switch string(w) {
case "一":
{
hanPinyin = procYI(allHansOfWp[hanIndex+1:], pinyins[pinyinIndex+1:])
}
case "不":
{
hanPinyin = procBU(allHansOfWp[hanIndex+1:], pinyins[pinyinIndex+1:])
}
default:
{
hanPinyin = pinyins[pinyinIndex][0]
}
}
newText := fmt.Sprintf(fmtPinyin, hanPinyin, string(w))
// fmt.Println(newText)
wp.InsertChild(wr, createWrinstrText(newText, wrPr))
wp.InsertChild(wr, createWrEnd(wrPr))
pinyinIndex++
} else {
lastStr += string(w)
}
hanIndex++
}
if "" != lastStr {
wp.InsertChild(wr, createWrwt(lastStr, wr))
lastStr = ""
}
wp.RemoveChild(wr)
}
}
}
newXml, err := doc.WriteToString()
if nil != err {
fmt.Println(err)
return "", err
}
return newXml, nil
}
func procOneDocxFile(fromPath string, toPath string) error {
zipReader, err := zip.OpenReader(fromPath)
if err != nil {
fmt.Print(err)
return err
}
defer zipReader.Close()
newZipFile, err := os.Create(toPath)
if err != nil {
fmt.Println(err)
return err
}
defer newZipFile.Close()
zipWriter := zip.NewWriter(newZipFile)
defer zipWriter.Close()
var f *zip.File
for _, file := range zipReader.File {
rc, err := file.Open()
if nil != err {
return err
}
buf := make([]byte, file.UncompressedSize)
// zipfile文件一次可能不能读完,循环读完为止
readLen := 0
for file.UncompressedSize != uint32(readLen) {
n, err := rc.Read(buf[readLen:])
if nil != err && (0 != strings.Compare("EOF", err.Error())) {
fmt.Println(err)
return err
}
if 0 == n {
return errors.New("读取zip出错")
}
readLen += n
}
var newBuf []byte
if "word/document.xml" == file.Name {
f = file
assert(file.UncompressedSize == uint32(readLen), "读取错误")
newXmlStr, err := addPinyin(buf)
if nil != err {
return err
}
newBuf = []byte(newXmlStr)
} else {
newBuf = buf
}
newFile, err := zipWriter.Create(file.Name)
if err != nil {
return err
}
_, err = newFile.Write(newBuf)
if err != nil {
return err
}
}
if nil == f {
err = errors.New(fromPath + ": 没有 word/document.xml")
return err
}
return nil
}
func initDir(paths []string) error {
for _, path := range paths {
fileinfo, err := os.Stat(path)
if nil != err {
err = os.Mkdir(path, os.ModePerm)
if err != nil {
fmt.Println(err)
return err
}
} else if !fileinfo.IsDir() {
return errors.New("无法创建目录:" + path)
}
}
return nil
}
func main() {
var err error
var startTime, endTime time.Time
startTime = time.Now()
defer func() {
if p := recover(); nil != p {
fmt.Printf("panic recover! : %v\r\n", p)
}
if nil != err {
fmt.Printf("error : %v\r\n", err)
}
endTime = time.Now()
fmt.Println("耗时:", endTime.Sub(startTime))
fmt.Println("按任意键结束")
var data int
fmt.Scanf("%d", &data)
return
}()
todoDir := "./1-加拼音的docx-待处理"
doneDir := "./2-加拼音的docx-结果"
if err = initDir([]string{todoDir, doneDir}); nil != err {
return
}
if err = parseConfig(); nil != err {
return
}
files, err := ioutil.ReadDir(todoDir)
if nil != err {
return
}
for _, f := range files {
if f.IsDir() {
continue
}
if strings.HasPrefix(path.Base(f.Name()), "~$") {
continue
}
ext := path.Ext(f.Name())
if !strings.EqualFold(".docx", ext) {
continue
}
fmt.Println("正在处理文件:" + f.Name())
fromPath := todoDir + "/" + f.Name()
toPath := doneDir + "/" + strings.TrimSuffix(path.Base(f.Name()), ext) + time.Now().Format("_20060102_150405.docx")
err = procOneDocxFile(fromPath, toPath)
if nil != err {
return
}
}
return
}
网友评论