说明本文汉字转换汉语拼音工具类 支持多音字、保留其他字符
源代码
https://github.com/whitePines/pinyinTip.git
工具类
package com.test.caoxs.pinyinTest;
import java.util.ArrayList;
import java.util.HashSet;
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
/**
* 汉字转换汉语拼音工具类 支持多音字、保留其他字符
*
* @author whitePines
*
*/
public class PinYinUtil {
public enum TypeEnum {
py, piny, pinyin;
}
/**
* 按照传入的格式,获取传入字符串的所有可能性。
* @param chineseStr
* @param type
* @return
*/
public static HashSet<String> allPossiblePys(String chineseStr, TypeEnum type) {
HashSet<String> allPossiblePys = new HashSet<String>();
allPossiblePys.add("");
if(chineseStr == null || "".equals(chineseStr)){
return allPossiblePys;
}
ArrayList<String[]> list = getStringPys(chineseStr, type);
HashSet<String> tmp = new HashSet<String>();
for (String[] strs : list) {
HashSet<String> tmpSet = new HashSet<String>();
for (String s : strs) {
for (String nowResult : allPossiblePys) {
nowResult = nowResult + s;
tmpSet.add(nowResult);
}
}
allPossiblePys = tmpSet;
}
return allPossiblePys;
}
private static ArrayList<String[]> getStringPys(String chineseStr, TypeEnum type) {
char[] chars = chineseStr.toCharArray();
ArrayList<String[]> pinyinList = new ArrayList<String[]>(chars.length);
for (int i = 0; i < chars.length; i++) {
String[] strs = null;
char c = chars[i];
strs = getCharPYs(c, i, type);
pinyinList.add(strs);
}
return pinyinList;
}
private static String[] getCharPYs(char c, int index, TypeEnum type) {
String[] strs = null;
switch (type) {
case py:
strs = getPy(c);
break;
case piny:
strs = index == 0 ? getPinyin(c) : getPy(c);
break;
case pinyin:
strs = getPinyin(c);
break;
default:
strs = getPinyin(c);
break;
}
return strs;
}
public static String[] getPy(char chineseChar) {
String[] strs = turnProcess(chineseChar);
for (int i = 0; i < strs.length; i++) {
strs[i] = strs[i].substring(0, 1);
}
return strs;
}
public static String[] getPinyin(char chineseChar) {
String[] strs = turnProcess(chineseChar);
return strs;
}
private static String[] turnProcess(char chineseChar) {
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
String[] strs = null;
try {
strs = PinyinHelper.toHanyuPinyinStringArray(chineseChar, defaultFormat);
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
//对不可以进行拼音转换的字符串进行,保留原字符的处理
if (strs == null || strs.length == 0) {
strs = new String[1];
strs[0] = chineseChar + "";
}
return strs;
}
}
测试类
package com.test.caoxs.pinyinTest;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import org.junit.Test;
import com.test.caoxs.pinyinTest.PinYinUtil.TypeEnum;
public class PinYinUtilTester {
@Test
public void testChinesePunctuation() {
String[] ss = PinYinUtil.getPy('!');
for (String s : ss) {
System.out.println(s);
}
System.out.println("--------------------------------------");
}
@Test
public void testGetCharPy() {
String[] ss = PinYinUtil.getPy('都');
for (String s : ss) {
System.out.println(s);
}
System.out.println("--------------------------------------");
}
@Test
public void testGetCharPinyin() {
String[] ss = PinYinUtil.getPinyin('都');
for (String s : ss) {
System.out.println(s);
}
System.out.println("--------------------------------------");
}
@Test
public void testGetStrPy() {
HashSet<String> pys = PinYinUtil.allPossiblePys("都在中华人民共和国", TypeEnum.py);
int i = 1;
for (String py : pys) {
System.out.println("py格式的第" + i + "条" + py);
i++;
}
System.out.println("--------------------------------------");
}
@Test
public void testGetStrPiny() {
HashSet<String> pinys = PinYinUtil.allPossiblePys("都在中华人民共和国", TypeEnum.piny);
int i = 1;
for (String piny : pinys) {
System.out.println("piny格式的第" + i + "条" + piny);
i++;
}
System.out.println("--------------------------------------");
}
@Test
public void testGetStrPiny2() {
HashSet<String> pinys = PinYinUtil.allPossiblePys("!都在中华人民共和国", TypeEnum.piny);
int i = 1;
for (String piny : pinys) {
System.out.println("piny格式的第" + i + "条" + piny);
i++;
}
System.out.println("--------------------------------------");
}
@Test
public void testGetStrPiny3() {
HashSet<String> pinys = PinYinUtil.allPossiblePys("!<>《》_102都在中华人民共和国", TypeEnum.piny);
int i = 1;
for (String piny : pinys) {
System.out.println("piny格式的第" + i + "条" + piny);
i++;
}
System.out.println("--------------------------------------");
}
@Test
public void testGetStrPinyin() {
HashSet<String> pinyins = PinYinUtil.allPossiblePys("都在中华人民共和国", TypeEnum.pinyin);
int i = 1;
for (String pinyin : pinyins) {
System.out.println("pinyin格式的第" + i + "条" + pinyin);
i++;
}
System.out.println("--------------------------------------");
}
@Test
public void testGetNullStrPinyin() {
HashSet<String> pinyins = PinYinUtil.allPossiblePys("", TypeEnum.pinyin);
int i = 1;
for (String pinyin : pinyins) {
System.out.println("pinyin格式的第" + i + "条" + pinyin);
i++;
}
System.out.println("--------------------------------------");
}
@Test
public void testGetDataPinyin(){
System.out.println("testGetDataPinyin--------------------------------------begin");
Data d= new Data();
ArrayList<String> dList = d.strs;
HashMap<String,HashSet<String>> pyMap = new HashMap<String, HashSet<String>>();
HashMap<String,HashSet<String>> pinyMap = new HashMap<String, HashSet<String>>();
HashMap<String,HashSet<String>> pinyinMap = new HashMap<String, HashSet<String>>();
for(String s : dList){
HashSet<String> pys = PinYinUtil.allPossiblePys(s, TypeEnum.py);
HashSet<String> pinys = PinYinUtil.allPossiblePys(s, TypeEnum.piny);
HashSet<String> pinyins = PinYinUtil.allPossiblePys(s, TypeEnum.pinyin);
pyMap.put(s, pys);
pinyMap.put(s, pinys);
pinyinMap.put(s, pinyins);
}
System.out.println("----------->py");
for(String s : dList){
for(String py : pyMap.get(s)){
System.out.println(s + "-->" + py);
}
}
System.out.println("----------->piny");
for(String s : dList){
for(String piny : pinyMap.get(s)){
System.out.println(s + "-->" + piny);
}
}
System.out.println("----------->pinyin");
for(String s : dList){
for(String pinyin : pinyinMap.get(s)){
System.out.println(s + "-->" + pinyin);
}
}
System.out.println("testGetDataPinyin--------------------------------------end");
}
}
Data类
package com.test.caoxs.pinyinTest;
import java.util.ArrayList;
public class Data {
ArrayList<String> strs = new ArrayList<String>();
Data(){
strs.add("work");
strs.add("log");
strs.add("maxiang");
strs.add("diary");
strs.add("work1");
strs.add("DB");
strs.add("git");
strs.add("mix");
strs.add("工作");
strs.add("学习");
strs.add("杂");
strs.add("马克飞象");
strs.add("日记");
strs.add("兴趣");
strs.add("项目");
strs.add("附件");
strs.add("开发工具");
}
}
笔者对粘贴大段代码是深恶痛绝的,然而,到自己开始写的时候竟然也这么做了。真该拉出去枪毙。
本文略显得单薄了。毕竟只是临时写的。如果,还会用到pinyin4j会在这个基础上新增和修改内容。
网友评论