美文网首页
中文字符串拼音转化和首字母获取demo

中文字符串拼音转化和首字母获取demo

作者: cyclone_29 | 来源:发表于2019-05-14 16:25 被阅读0次

    构建springboot项目,pom文件如下:

    <?xml version="1.0" encoding="UTF-8"?>
        <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
        <modelVersion>4.0.0</modelVersion>
            <parent>
                  <groupId>org.springframework.boot</groupId>
                  <artifactId>spring-boot-starter-parent</artifactId>
                  <version>2.1.4.RELEASE</version>
              <relativePath/>
            </parent>
            <groupId>com.cyclone</groupId>
            <artifactId>initials</artifactId>
            <version>0.0.1-SNAPSHOT</version>
            <name>initials</name>
    
            <properties>
                <java.version>1.8</java.version>
                <scala.version>2.11.7</scala.version>
                <scala.compat.version>2.11</scala.compat.version>
            </properties>
    
      <dependencies>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter</artifactId>
        </dependency>
    
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
        </dependency>
    
        <dependency>
            <groupId>org.scala-lang</groupId>
            <artifactId>scala-library</artifactId>
            <version>${scala.version}</version>
        </dependency>
    
        <dependency>
            <groupId>com.belerweb</groupId>
            <artifactId>pinyin4j</artifactId>
            <version>2.5.1</version>
        </dependency>
    </dependencies>
    
    <build>
        <plugins>
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
            </plugin>
            <plugin>
                <groupId>net.alchim31.maven</groupId>
                <artifactId>scala-maven-plugin</artifactId>
                <version>3.2.0</version>
                <executions>
                </executions>
            </plugin>
        </plugins>
    </build>
    </project>
    

    代码实现:

    package com.cyclone
    import net.sourceforge.pinyin4j.PinyinHelper
    import net.sourceforge.pinyin4j.format.{HanyuPinyinCaseType, HanyuPinyinOutputFormat, HanyuPinyinToneType,    HanyuPinyinVCharType}
    
    object Initials {
    /**
    * 将中文字符串分解为字,获取每个字拼音的首字母,组成字符串返回
    *
    * @param chineseString           需要进行转化的中文字符串,不能为null并且不能只包含非中文字符
    * @param stripNoChineseCharacter 是否需要移除中文字符串中的非汉字字符,只有汉字字符才会进行转化,其他的非汉字字符原样返回
    * @param caseType                返回字符串的大小写样式,默认为小写
    * @param toneType                返回的字母的声调处理,默认为不带声调
    * @param vCharType               返回字母的u处理方式,默认按v返回
    * @return
    */
    def chineseString2Initials(chineseString: String, stripNoChineseCharacter: Boolean = true, caseType: HanyuPinyinCaseType = HanyuPinyinCaseType.LOWERCASE,
                             toneType: HanyuPinyinToneType = HanyuPinyinToneType.WITHOUT_TONE,
                             vCharType: HanyuPinyinVCharType = HanyuPinyinVCharType.WITH_V): String = {
    val tmp = if(stripNoChineseCharacter) strip(chineseString) else chineseString
    val format = getFormatInstance(caseType, toneType, vCharType)
    val result = StringBuilder.newBuilder
    val words = tmp.toCharArray
    words.foreach { (x: Char) =>
      if (x > 128) result.append(PinyinHelper.toHanyuPinyinStringArray(x, format)(0).charAt(0)) else result.append(x)
    }
    result.toString()
    }
    
    
    /**
    * 将中文字符串分解为字,转化为拼音,组成字符串返回
    *
    * @param chineseString           需要进行转化的中文字符串,不能为null并且不能只包含非中文字符
    * @param stripNoChineseCharacter 是否需要移除中文字符串中的非汉字字符,只有汉字字符才会进行转化,其他的非汉字字符原样返回
    * @param caseType                返回字符串的大小写样式,默认为小写
    * @param toneType                返回的字母的声调处理,默认为不带声调
    * @param vCharType               返回字母的u处理方式,默认按v返回
    * @return
    */
    def chineseString2Letters(chineseString: String, stripNoChineseCharacter: Boolean = true, caseType: HanyuPinyinCaseType = HanyuPinyinCaseType.LOWERCASE,
                            toneType: HanyuPinyinToneType = HanyuPinyinToneType.WITHOUT_TONE,
                            vCharType: HanyuPinyinVCharType = HanyuPinyinVCharType.WITH_V): String = {
    val tmp = if(stripNoChineseCharacter) strip(chineseString) else chineseString
    val format = getFormatInstance(caseType, toneType, vCharType)
    val result = StringBuilder.newBuilder
    val words = tmp.toCharArray
    words.foreach { (x: Char) =>
      if (x > 128) result.append(PinyinHelper.toHanyuPinyinStringArray(x, format)(0)) else result.append(x)
    }
    result.toString()
    }
    
    private def getFormatInstance(caseType: HanyuPinyinCaseType = HanyuPinyinCaseType.LOWERCASE,
                                toneType: HanyuPinyinToneType = HanyuPinyinToneType.WITHOUT_TONE,
                                vCharType: HanyuPinyinVCharType = HanyuPinyinVCharType.WITH_V): HanyuPinyinOutputFormat = {
    val format = new HanyuPinyinOutputFormat
    format.setCaseType(caseType)
    format.setToneType(toneType)
    format.setVCharType(vCharType)
    format
    }
    
    //移除字符串中的非中文字符
    private def strip(arg: String): String = {
    if(arg ==null) throw  new NullPointerException
    val regex = "[^\\u4e00-\\u9fa5]+"
    val tmp = arg.replaceAll(regex, "")
    if(tmp.length ==0) throw new IllegalArgumentException
    tmp
    }
    }
    

    测试代码:

    package com.cyclone
    import org.junit.Test
    class InitialTest {
    
    @Test
    def init():Unit = {
    val str = "中华 @ $人名绿*树"
    import com.cyclone.Initials.{chineseString2Initials,chineseString2Letters}
    println(chineseString2Letters(str))
    println(chineseString2Initials(str))
    }
    }
    

    测试结果:


    image.png

    相关文章

      网友评论

          本文标题:中文字符串拼音转化和首字母获取demo

          本文链接:https://www.haomeiwen.com/subject/rzteaqtx.html