美文网首页
pinyin4j实现汉字转拼音

pinyin4j实现汉字转拼音

作者: Pines_ | 来源:发表于2017-01-25 15:24 被阅读0次

    说明本文汉字转换汉语拼音工具类 支持多音字、保留其他字符

    源代码
    https://github.com/whitePines/pinyinTip.git

    工具类

    package com.test.caoxs.pinyinTest;
    
    import java.util.ArrayList;
    import java.util.HashSet;
    
    import net.sourceforge.pinyin4j.PinyinHelper;
    import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
    import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
    import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
    import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
    
    /**
     * 汉字转换汉语拼音工具类 支持多音字、保留其他字符
     * 
     * @author whitePines
     *
     */
    public class PinYinUtil {
        public enum TypeEnum {
            py, piny, pinyin;
        }
        /**
         * 按照传入的格式,获取传入字符串的所有可能性。
         * @param chineseStr
         * @param type
         * @return
         */
        public static HashSet<String> allPossiblePys(String chineseStr, TypeEnum type) {
            HashSet<String> allPossiblePys = new HashSet<String>();
            allPossiblePys.add("");
            if(chineseStr == null || "".equals(chineseStr)){
                return allPossiblePys;
            }
            ArrayList<String[]> list = getStringPys(chineseStr, type);
            HashSet<String> tmp = new HashSet<String>();
            for (String[] strs : list) {
                HashSet<String> tmpSet = new HashSet<String>();
                for (String s : strs) {
                    for (String nowResult : allPossiblePys) {
                        nowResult = nowResult + s;
                        tmpSet.add(nowResult);
                    }
                }
                allPossiblePys = tmpSet;
            }
            return allPossiblePys;
        }
    
        private static ArrayList<String[]> getStringPys(String chineseStr, TypeEnum type) {
            char[] chars = chineseStr.toCharArray();
            ArrayList<String[]> pinyinList = new ArrayList<String[]>(chars.length);
            for (int i = 0; i < chars.length; i++) {
                String[] strs = null;
                char c = chars[i];
                strs = getCharPYs(c, i, type);
                pinyinList.add(strs);
            }
            return pinyinList;
        }
    
        private static String[] getCharPYs(char c, int index, TypeEnum type) {
            String[] strs = null;
            switch (type) {
            case py:
                strs = getPy(c);
                break;
            case piny:
                strs = index == 0 ? getPinyin(c) : getPy(c);
                break;
            case pinyin:
                strs = getPinyin(c);
                break;
            default:
                strs = getPinyin(c);
                break;
            }
    
            return strs;
        }
        
        public static String[] getPy(char chineseChar) {
            String[] strs = turnProcess(chineseChar);
            for (int i = 0; i < strs.length; i++) {
                strs[i] = strs[i].substring(0, 1);
            }
            return strs;
        }
    
        public static String[] getPinyin(char chineseChar) {
            String[] strs = turnProcess(chineseChar);
            return strs;
    
        }
    
        private static String[] turnProcess(char chineseChar) {
            HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
            defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
            defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
            String[] strs = null;
            try {
                strs = PinyinHelper.toHanyuPinyinStringArray(chineseChar, defaultFormat);
            } catch (BadHanyuPinyinOutputFormatCombination e) {
                e.printStackTrace();
            }
            //对不可以进行拼音转换的字符串进行,保留原字符的处理
            if (strs == null || strs.length == 0) {
                strs = new String[1];
                strs[0] = chineseChar + "";
            }
            return strs;
        }
    }
    
    

    测试类

    package com.test.caoxs.pinyinTest;
    
    import java.util.ArrayList;
    import java.util.HashMap;
    import java.util.HashSet;
    
    import org.junit.Test;
    
    import com.test.caoxs.pinyinTest.PinYinUtil.TypeEnum;
    
    public class PinYinUtilTester {
        @Test
        public void testChinesePunctuation() {
            String[] ss = PinYinUtil.getPy('!');
            for (String s : ss) {
                System.out.println(s);
            }
            System.out.println("--------------------------------------");
        }
    
        @Test
        public void testGetCharPy() {
            String[] ss = PinYinUtil.getPy('都');
            for (String s : ss) {
                System.out.println(s);
            }
            System.out.println("--------------------------------------");
        }
    
        @Test
        public void testGetCharPinyin() {
            String[] ss = PinYinUtil.getPinyin('都');
            for (String s : ss) {
                System.out.println(s);
            }
            System.out.println("--------------------------------------");
        }
    
        @Test
        public void testGetStrPy() {
            HashSet<String> pys = PinYinUtil.allPossiblePys("都在中华人民共和国", TypeEnum.py);
            int i = 1;
            for (String py : pys) {
                System.out.println("py格式的第" + i + "条" + py);
                i++;
            }
            System.out.println("--------------------------------------");
    
        }
    
        @Test
        public void testGetStrPiny() {
            HashSet<String> pinys = PinYinUtil.allPossiblePys("都在中华人民共和国", TypeEnum.piny);
            int i = 1;
            for (String piny : pinys) {
                System.out.println("piny格式的第" + i + "条" + piny);
                i++;
            }
            System.out.println("--------------------------------------");
    
        }
    
        @Test
        public void testGetStrPiny2() {
            HashSet<String> pinys = PinYinUtil.allPossiblePys("!都在中华人民共和国", TypeEnum.piny);
            int i = 1;
            for (String piny : pinys) {
                System.out.println("piny格式的第" + i + "条" + piny);
                i++;
            }
            System.out.println("--------------------------------------");
    
        }
    
        @Test
        public void testGetStrPiny3() {
            HashSet<String> pinys = PinYinUtil.allPossiblePys("!<>《》_102都在中华人民共和国", TypeEnum.piny);
            int i = 1;
            for (String piny : pinys) {
                System.out.println("piny格式的第" + i + "条" + piny);
                i++;
            }
            System.out.println("--------------------------------------");
    
        }
    
        @Test
        public void testGetStrPinyin() {
            HashSet<String> pinyins = PinYinUtil.allPossiblePys("都在中华人民共和国", TypeEnum.pinyin);
            int i = 1;
            for (String pinyin : pinyins) {
                System.out.println("pinyin格式的第" + i + "条" + pinyin);
                i++;
            }
            System.out.println("--------------------------------------");
    
        }
        
        @Test
        public void testGetNullStrPinyin() {
            HashSet<String> pinyins = PinYinUtil.allPossiblePys("", TypeEnum.pinyin);
            int i = 1;
            for (String pinyin : pinyins) {
                System.out.println("pinyin格式的第" + i + "条" + pinyin);
                i++;
            }
            System.out.println("--------------------------------------");
        }
        @Test
        public void testGetDataPinyin(){
            System.out.println("testGetDataPinyin--------------------------------------begin");
            Data d= new Data();
            ArrayList<String> dList = d.strs;
            HashMap<String,HashSet<String>> pyMap = new HashMap<String, HashSet<String>>();
            HashMap<String,HashSet<String>> pinyMap = new HashMap<String, HashSet<String>>();
            HashMap<String,HashSet<String>> pinyinMap = new HashMap<String, HashSet<String>>();
            for(String s : dList){
                HashSet<String> pys = PinYinUtil.allPossiblePys(s, TypeEnum.py);
                HashSet<String> pinys = PinYinUtil.allPossiblePys(s, TypeEnum.piny);
                HashSet<String> pinyins = PinYinUtil.allPossiblePys(s, TypeEnum.pinyin);
                pyMap.put(s, pys);
                pinyMap.put(s, pinys);
                pinyinMap.put(s, pinyins);
            }
            System.out.println("----------->py");
            for(String s : dList){
                for(String py : pyMap.get(s)){
                    System.out.println(s + "-->" + py);
                }
            }
            System.out.println("----------->piny");
            for(String s : dList){
                for(String piny : pinyMap.get(s)){
                    System.out.println(s + "-->" + piny);
                }
            }
            System.out.println("----------->pinyin");
            for(String s : dList){
                for(String pinyin : pinyinMap.get(s)){
                    System.out.println(s + "-->" + pinyin);
                }
            }
            
            System.out.println("testGetDataPinyin--------------------------------------end");
    
        }
    
    }
    
    

    Data类

    package com.test.caoxs.pinyinTest;
    
    import java.util.ArrayList;
    
    public class Data {
        ArrayList<String> strs = new ArrayList<String>();
        Data(){
            strs.add("work");
            strs.add("log");
            strs.add("maxiang");
            strs.add("diary");
            strs.add("work1");
            strs.add("DB");
            strs.add("git");
            strs.add("mix");
            strs.add("工作");
            strs.add("学习");
            strs.add("杂");
            strs.add("马克飞象");
            strs.add("日记");
            strs.add("兴趣");
            strs.add("项目");
            strs.add("附件");
            strs.add("开发工具");
        }
    }
    
    

    笔者对粘贴大段代码是深恶痛绝的,然而,到自己开始写的时候竟然也这么做了。真该拉出去枪毙。

    本文略显得单薄了。毕竟只是临时写的。如果,还会用到pinyin4j会在这个基础上新增和修改内容。

    相关文章

      网友评论

          本文标题:pinyin4j实现汉字转拼音

          本文链接:https://www.haomeiwen.com/subject/fhnzbttx.html