美文网首页
加拼音(旧格式)-代码

加拼音(旧格式)-代码

作者: 姜附 | 来源:发表于2020-07-16 09:57 被阅读0次

word文档全篇加拼音(批量注音)+批量修改读音

加拼音(旧格式)-代码

package main

import (
    "archive/zip"
    "runtime"

    "bufio"
    "bytes"
    "errors"
    "fmt"
    "io"
    "io/ioutil"
    "os"
    "path"
    _ "regexp"
    "strconv"
    "strings"
    "time"
    "unicode"

    "golang.org/x/text/encoding/simplifiedchinese"
    "golang.org/x/text/transform"

    // "github.com/aurelien-rainone/assertgo"
    "github.com/etree"
    "github.com/mozillazg/go-pinyin"
)

func assert(cond bool, failPrompt string) {
    if !cond {
        pc, file, line, _ := runtime.Caller(1)
        pcName := runtime.FuncForPC(pc).Name()
        panic(file + ":" + strconv.Itoa(line) + ":" + pcName + "  " + failPrompt)
    }
}

// xml中字号值是word中字号值的两倍
// 拼音字号默认是文字字号的一半
// xml默认偏移值=xml拼音字号值-1

var g_configPinyinFont string = "微软雅黑"
var g_configPinyinFontSize int = 5
var g_configPinyinUseFontSize bool = false // 是否使用同一的拼音字号
var g_configPinyinAlignmentMap map[string]string = map[string]string{
    "居中":    "jc0",
    "0-1-0": "jc1",
    "1-2-1": "jc2",
    "左对齐":   "jc3",
    "右对齐":   "jc4",
}

var g_configPinyinAlignment = "jc0" // 居中对齐

var g_configPinyinOffset int = g_configPinyinFontSize*2 - 1

func parseConfigLine(line string) error {

    line2 := strings.TrimSpace(line)
    vals := strings.Split(line2, "=")
    if 2 != len(vals) {
        return errors.New("无效配置:" + line)
    }

    switch vals[0] {
    case "拼音字体":
        {
            if "" != vals[1] {
                g_configPinyinFont = vals[1]
            }
        }
    case "拼音字号":
        {
            if "" != vals[1] {
                v, err := strconv.Atoi(vals[1])
                if (nil != err) || (v < 1) {
                    return errors.New(fmt.Sprintf("字号无效:%s: %s\r\n", line, err))
                }
                g_configPinyinUseFontSize = true
                g_configPinyinFontSize = v
            }

        }
    case "拼音对齐":
        {
            if "" != vals[1] {
                v := g_configPinyinAlignmentMap[vals[1]]
                if "" == v {
                    return errors.New(fmt.Sprintf("无效拼音对齐:%s\r\n", line))
                }
                g_configPinyinAlignment = v
            }
        }
    // case "拼音偏移":
    //  {
    //      v, err := strconv.Atoi(vals[1])
    //      if (nil != err) || (v < 0) {
    //          return errors.New(fmt.Sprintf("偏移无效:%s: %s\r\n", line, err))
    //      }
    //      g_configPinyinOffset = v
    //  }
    default:
        {
            return errors.New("无效配置:" + line)
        }
    }

    return nil
}

func parseConfig() error {
    fileName := "拼音配置.txt"
    fileinfo, err := os.Stat(fileName)
    if nil != err {
        // 文件不存在,创建一个新的,并写入注释说明
        newFile, err := os.Create(fileName)
        if err != nil {
            return err
        }
        defer newFile.Close()
        newFile.WriteString(
            fmt.Sprintf(
                // 默认不填写具体值
                `# 说明:只支持docx格式的word文档;
# 拼音字号默认动态变化,指定字号后全篇拼音使用指定字号;
拼音字体=%s
拼音字号=
拼音对齐=
`,
                // +"拼音偏移=\r\n"
                g_configPinyinFont,
                // g_configPinyinFontSize/2,
                // g_configPinyinOffset,
            ),
            // fmt.Sprintf("# 说明:只支持docx格式的word文档\r\n拼音字体=%s",
            //  g_configPinyinFont,
            // ),
        )
    } else if fileinfo.IsDir() {
        return errors.New("无法创建配置文件:" + fileName)
    } else {

        f, err := os.Open(fileName)
        if err != nil {
            return nil
        }

        br := bufio.NewReader(f)
        for {
            line, _, err := br.ReadLine()
            if err == io.EOF {
                break
            }

            // line = strings.TrimSpace(line)
            strLine := strings.ReplaceAll(string(line), " ", "") // 去掉所有空格

            if "" == strLine {
                continue
            }

            // strLine := strings.TrimLeft(string(line), " ")
            if strings.HasPrefix(strLine, "#") {
                continue
            }

            if err = parseConfigLine(strLine); nil != err {
                return err
            }
        }

    }
    return nil
}

func createWrPrNode(oldWrPrNode *etree.Element) *etree.Element {
    newWrPr := oldWrPrNode.Copy()

    wrFonts := newWrPr.FindElement("w:rFonts")
    if nil == wrFonts {
        // panic(fileLine() + "no w:rFonts")
        // wrFonts = newWrPr.CreateElement("w:rFonts")
    } else {

        whint := wrFonts.SelectAttr("w:hint")
        if nil != whint {
            whint.Value = "default"
        } else {
            // panic(fileLine() + ":no w:hint")
            wrFonts.CreateAttr("w:hint", "default")
        }
    }

    // wrFonts.SortAttrs()

    // wlang := newwrPr.FindElement("w:lang")
    // if nil == wlang {
    //  wlang = etree.NewElement("w:lang")
    //  newwrPr.AddChild(wlang)
    // }

    // wval := wlang.SelectAttr("w:val")
    // if nil == wval {
    //  wlang.CreateAttr("w:val", "en-US")
    // } else {
    //  wval.Value = "en-US"
    // }

    return newWrPr
}

func createWrBegin(wrPrNode *etree.Element) *etree.Element {

    wr := etree.NewElement("w:r")
    if nil != wrPrNode {
        wr.AddChild(createWrPrNode(wrPrNode))
    }

    wfldChar := etree.NewElement("w:fldChar")
    wfldChar.CreateAttr("w:fldCharType", "begin")
    wr.AddChild(wfldChar)

    return wr
}

func createWrinstrText(text string, wrPrNode *etree.Element) *etree.Element {
    if nil == wrPrNode {
        // panic(fileLine() + "nil == wrPrNode")
    }

    wr := etree.NewElement("w:r")

    if nil != wrPrNode {
        wr.AddChild(createWrPrNode(wrPrNode))
    }

    winstrText := etree.NewElement("w:instrText")
    winstrText.CreateAttr("xml:space", "preserve")
    winstrText.SetText(text)
    // fmt.Println(wfldChar.Text())
    wr.AddChild(winstrText)

    return wr
}

func createWrEnd(wrPrNode *etree.Element) *etree.Element {
    wr := etree.NewElement("w:r")

    if nil != wrPrNode {
        wr.AddChild(createWrPrNode(wrPrNode))
    }

    wfldChar := etree.NewElement("w:fldChar")
    wfldChar.CreateAttr("w:fldCharType", "end")
    wr.AddChild(wfldChar)

    return wr
}

func createWrwt(w string, oldWrNode *etree.Element) *etree.Element {
    wr := oldWrNode.Copy()

    wt := wr.FindElement("w:t")
    if nil == wt {
        wt = etree.NewElement("w:t")
        wr.AddChild(wt)
    }
    wt.SetText(w)

    return wr
}

// "一"字
// 單用或在一詞一句的末尾,念陰平聲;
// 在去聲字前,念陽平聲;
// 在陰平、陽平、上聲之前,念去聲。
// 阴阳上去分别为第一二三四声
func procYI(nextHans []string, nextPinyins [][]string) string {

    // 句末
    if 0 == len(nextHans) || !unicode.Is(unicode.Han, []rune(nextHans[0])[0]) {
        return "yī"
    }

    // 后面跟着汉字
    if strings.ContainsAny(nextPinyins[0][0], "àòèìùǜ") {
        return "yí"
    }

    return "yì"
}

// 「不」字
// 在去聲字之前,變讀為陽平
// 阴阳上去分别为第一二三四声
func procBU(nextHans []string, nextPinyins [][]string) string {

    if 0 != len(nextHans) && strings.ContainsAny(nextPinyins[0][0], "àòèìùǜ") {
        return "bú"
    }

    return "bù"
}

func addPinyin(buf []byte) (string, error) {

    pinyinArg := pinyin.NewArgs()
    pinyinArg.Style = pinyin.Tone // 包含声调

    doc := etree.NewDocument()
    err := doc.ReadFromBytes(buf)
    if nil != err {
        fmt.Println(err)

        transformers := []transform.Transformer{
            simplifiedchinese.GBK.NewDecoder(),
            simplifiedchinese.HZGB2312.NewDecoder(),
        }

        fmt.Println("尝试转码")
        for _, t := range transformers {

            I := bytes.NewReader(buf)
            O := transform.NewReader(I, t)
            var d []byte
            d, err = ioutil.ReadAll(O)
            if nil != err {
                continue
            }

            err = doc.ReadFromBytes(d)
            if nil == err {
                fmt.Println("转码成功")
                break
            }
        }

        if nil != err {
            fmt.Println("转码失败")
            return "", err
        }
    }

    wdocument := doc.SelectElement("w:document")

    // fmtPinyin := ""
    // if g_configPinyinUseFontSize {
    //  // fmtPinyin := ` *EQ \* jc0 \* &quot;Font:微软雅黑&quot; \* hps20 \o \ad(\s \up 19(%s),%s)`
    //  fmtPinyin = ` EQ \* jc0 \* &quot;Font:` + g_configPinyinFont +
    //      `&quot; \* hps` + strconv.Itoa(g_configPinyinFontSize) +
    //      ` \o \ad(\s \up ` + strconv.Itoa(g_configPinyinFontSize-1) +
    //      `(%s),%s)`
    // }

    // w:p是一个段落,一段一段的处理
    for _, wp := range wdocument.FindElements("w:body/w:p") {

        allStrOfWp := "" // 段内所有文字
        for _, wr := range wp.FindElements("w:r") {
            if wt := wr.FindElement("w:t"); nil != wt {
                allStrOfWp += wt.Text()
            }
        }

        pinyins := pinyin.Pinyin(allStrOfWp, pinyinArg)
        pinyinIndex := 0
        allHansOfWp := strings.Split(allStrOfWp, "")
        hanIndex := 0

        for _, wr := range wp.FindElements("w:r") {
            wrPr := wr.FindElement("w:rPr")

            pinyinFontSize := g_configPinyinFontSize * 2
            pinyinOffset := g_configPinyinOffset

            if nil != wrPr {
                wsz := wrPr.FindElement("w:sz")
                if nil != wsz {
                    s := wsz.SelectAttrValue("w:val", "")
                    i, err := strconv.Atoi(s)
                    if nil == err {
                        if !g_configPinyinUseFontSize {
                            pinyinFontSize = i / 2
                        }
                        pinyinOffset = i/2 - 1
                    }
                }
            }

            // fmtPinyin := ` EQ \* jc0 \* &quot;Font:微软雅黑&quot; \* hps20 \o \ad(\s \up 19(%s),%s)`
            // fmtPinyin := ` EQ \* jc0 \* &quot;Font:` + g_configPinyinFont +
            //  `&quot; \* hps` + strconv.Itoa(pinyinFontSize) +
            //  ` \o \ad(\s \up ` + strconv.Itoa(pinyinOffset-1) +
            //  `(%s),%s)`

            // "&"会被转义为"&amp;","&quot"会被转成"&amp;quot",从而导致楷体拼音居中效果失效,所以直接使用双引号
            fmtPinyin := ` EQ \* ` + g_configPinyinAlignment + ` \* "Font:` + g_configPinyinFont +
                `" \* hps` + strconv.Itoa(pinyinFontSize) +
                ` \o \ad(\s \up ` + strconv.Itoa(pinyinOffset) +
                `(%s),%s)`

            // fmt.Println(fmtPinyin)

            if wt := wr.FindElement("w:t"); nil != wt {
                text := wt.Text()
                // fmt.Println(text)

                lastStr := ""

                for _, w := range text {
                    assert(string(w) == allHansOfWp[hanIndex], "出错啦")

                    if unicode.Is(unicode.Han, w) {
                        if "" != lastStr {
                            wp.InsertChild(wr, createWrwt(lastStr, wr))
                            lastStr = ""
                        }

                        wp.InsertChild(wr, createWrBegin(wrPr))

                        var hanPinyin string
                        switch string(w) {
                        case "一":
                            {

                                hanPinyin = procYI(allHansOfWp[hanIndex+1:], pinyins[pinyinIndex+1:])
                            }
                        case "不":
                            {
                                hanPinyin = procBU(allHansOfWp[hanIndex+1:], pinyins[pinyinIndex+1:])
                            }
                        default:
                            {
                                hanPinyin = pinyins[pinyinIndex][0]
                            }
                        }

                        newText := fmt.Sprintf(fmtPinyin, hanPinyin, string(w))
                        // fmt.Println(newText)

                        wp.InsertChild(wr, createWrinstrText(newText, wrPr))
                        wp.InsertChild(wr, createWrEnd(wrPr))

                        pinyinIndex++

                    } else {
                        lastStr += string(w)
                    }

                    hanIndex++
                }

                if "" != lastStr {
                    wp.InsertChild(wr, createWrwt(lastStr, wr))
                    lastStr = ""
                }
                wp.RemoveChild(wr)
            }
        }
    }

    newXml, err := doc.WriteToString()
    if nil != err {
        fmt.Println(err)
        return "", err
    }

    return newXml, nil
}

func procOneDocxFile(fromPath string, toPath string) error {
    zipReader, err := zip.OpenReader(fromPath)
    if err != nil {
        fmt.Print(err)
        return err
    }
    defer zipReader.Close()

    newZipFile, err := os.Create(toPath)
    if err != nil {
        fmt.Println(err)
        return err
    }
    defer newZipFile.Close()

    zipWriter := zip.NewWriter(newZipFile)
    defer zipWriter.Close()

    var f *zip.File
    for _, file := range zipReader.File {

        rc, err := file.Open()
        if nil != err {
            return err
        }

        buf := make([]byte, file.UncompressedSize)

        // zipfile文件一次可能不能读完,循环读完为止
        readLen := 0
        for file.UncompressedSize != uint32(readLen) {
            n, err := rc.Read(buf[readLen:])
            if nil != err && (0 != strings.Compare("EOF", err.Error())) {
                fmt.Println(err)
                return err
            }
            if 0 == n {
                return errors.New("读取zip出错")
            }
            readLen += n
        }

        var newBuf []byte
        if "word/document.xml" == file.Name {
            f = file

            assert(file.UncompressedSize == uint32(readLen), "读取错误")

            newXmlStr, err := addPinyin(buf)
            if nil != err {
                return err
            }

            newBuf = []byte(newXmlStr)

        } else {
            newBuf = buf

        }

        newFile, err := zipWriter.Create(file.Name)
        if err != nil {
            return err
        }

        _, err = newFile.Write(newBuf)
        if err != nil {
            return err
        }
    }

    if nil == f {
        err = errors.New(fromPath + ": 没有 word/document.xml")
        return err
    }

    return nil
}

func initDir(paths []string) error {

    for _, path := range paths {
        fileinfo, err := os.Stat(path)
        if nil != err {
            err = os.Mkdir(path, os.ModePerm)
            if err != nil {
                fmt.Println(err)
                return err
            }
        } else if !fileinfo.IsDir() {
            return errors.New("无法创建目录:" + path)
        }
    }

    return nil
}

func main() {

    var err error
    var startTime, endTime time.Time
    startTime = time.Now()
    defer func() {
        if p := recover(); nil != p {
            fmt.Printf("panic recover! : %v\r\n", p)
        }
        if nil != err {
            fmt.Printf("error : %v\r\n", err)
        }

        endTime = time.Now()
        fmt.Println("耗时:", endTime.Sub(startTime))

        fmt.Println("按任意键结束")
        var data int
        fmt.Scanf("%d", &data)

        return
    }()

    todoDir := "./1-加拼音的docx-待处理"
    doneDir := "./2-加拼音的docx-结果"
    if err = initDir([]string{todoDir, doneDir}); nil != err {
        return
    }

    if err = parseConfig(); nil != err {
        return
    }

    files, err := ioutil.ReadDir(todoDir)
    if nil != err {
        return
    }
    for _, f := range files {

        if f.IsDir() {
            continue
        }
        if strings.HasPrefix(path.Base(f.Name()), "~$") {
            continue
        }

        ext := path.Ext(f.Name())
        if !strings.EqualFold(".docx", ext) {
            continue
        }

        fmt.Println("正在处理文件:" + f.Name())

        fromPath := todoDir + "/" + f.Name()
        toPath := doneDir + "/" + strings.TrimSuffix(path.Base(f.Name()), ext) + time.Now().Format("_20060102_150405.docx")
        err = procOneDocxFile(fromPath, toPath)
        if nil != err {
            return
        }

    }

    return
}

相关文章

网友评论

      本文标题:加拼音(旧格式)-代码

      本文链接:https://www.haomeiwen.com/subject/yrouhktx.html