美文网首页
Java源码浅析,Character(4)

Java源码浅析,Character(4)

作者: Tomy_Jx_Li | 来源:发表于2018-10-27 00:14 被阅读49次

源码分析,基本上都加载注解上了,如有谬误,请指正,谢谢。
Character由于比较长,只能分多篇了

package com.jiyx.test.java.lang;

import java.io.Serializable;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;

/**
 * auther: jiyx
 * date: 2018/9/13.
 */
public class Character implements Serializable, Comparable<Character> {
/**
     * Character真正存储的char
     *
     * @serial
     */
    private final char value;

    /**
     * 序列化
     */
    private static final long serialVersionUID = 3786198910865385080L;

    /**
     * 实例化
     */
    public Character(char value) {
        this.value = value;
    }

    /**
     * Character缓存
     */
    private static class CharacterCache {
        private CharacterCache() {
        }

        /**
         * 缓存数组
         */
        static final Character cache[] = new Character[127 + 1];

        /**
         * 类加载的时候初始化缓存数组
         */
        static {
            for (int i = 0; i < cache.length; i++)
                cache[i] = new Character((char) i);
        }
    }

    /**
     * 根据指定的char返回Character,这里如果char在0到127之间,就会走缓存
     */
    public static Character valueOf(char c) {
        if (c <= 127) { // must cache
            return Character.CharacterCache.cache[(int) c];
        }
        return new Character(c);
    }

    /**
     * 返回Character中的char
     */
    public char charValue() {
        return value;
    }

    /**
     * hashCode
     */
    @Override
    public int hashCode() {
        return Character.hashCode(value);
    }

    /**
     * hashCode,静态,其实就是讲char专为int
     */
    public static int hashCode(char value) {
        return (int) value;
    }

    /**
     * equals
     */
    public boolean equals(Object obj) {
        if (obj instanceof Character) {
            return value == ((Character) obj).charValue();
        }
        return false;
    }

    /**
     * toString
     */
    public String toString() {
        char buf[] = {value};
        return String.valueOf(buf);
    }

    /**
     * toString
     */
    public static String toString(char c) {
        return String.valueOf(c);
    }

    /**
     * 判断指定字符的Unicode编码是否是有效的
     */
    public static boolean isValidCodePoint(int codePoint) {
        // Optimized form of:
        //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
        int plane = codePoint >>> 16;
        return plane < ((MAX_CODE_POINT + 1) >>> 16);
    }

    /**
     * 判断指定字符的Unicode编码是否在Basic Multilingual Plane (BMP)中,
     * 根据这个结果可以知道代码点是否能被单字符表示。
     */
    public static boolean isBmpCodePoint(int codePoint) {
        return codePoint >>> 16 == 0;
        // Optimized form of:
        //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
        // We consistently use logical shift (>>>) to facilitate
        // additional runtime optimizations.
    }

    /**
     * 判断给定字符的Unicode编码是否在增补码范围
     */
    public static boolean isSupplementaryCodePoint(int codePoint) {
        return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
                && codePoint < MAX_CODE_POINT + 1;
    }

    /**
     * 判断指定字符的Unicode编码是否是,高位表示法。
     */
    public static boolean isHighSurrogate(char ch) {
        // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
        return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
    }

    /**
     * 判断指定字符的Unicode编码是否是,低位表示法。
     */
    public static boolean isLowSurrogate(char ch) {
        return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
    }

    /**
     * 判断指定字符的Unicode编码是否是,低位或者高位表示法。
     */
    public static boolean isSurrogate(char ch) {
        return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
    }

    /**
     * 判断指定的pair是否是有效的
     */
    public static boolean isSurrogatePair(char high, char low) {
        return isHighSurrogate(high) && isLowSurrogate(low);
    }

    /**
     * 计算指定的代码点需要的char数量
     */
    public static int charCount(int codePoint) {
        return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
    }

    /**
     * 将指定的surrogate pair转换成增补码表示的值,这里只转换不校验,要校验就要调用上面的方法isSurrogatePair(char, char)
     */
    public static int toCodePoint(char high, char low) {
        // Optimized form of:
        // return ((high - MIN_HIGH_SURROGATE) << 10)
        //         + (low - MIN_LOW_SURROGATE)
        //         + MIN_SUPPLEMENTARY_CODE_POINT;
        return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
                - (MIN_HIGH_SURROGATE << 10)
                - MIN_LOW_SURROGATE);
    }

    /**
     * 返回指定seq的指定下标的char的代码点,如果c1在高位表示范围内,
     * 并且随后的字符有在低位表示范围内的,那么就会返回增补代码点。
     * 否则,返回指定索引下的int值
     */
    public static int codePointAt(CharSequence seq, int index) {
        char c1 = seq.charAt(index);
        if (isHighSurrogate(c1) && ++index < seq.length()) {
            char c2 = seq.charAt(index);
            if (isLowSurrogate(c2)) {
                return toCodePoint(c1, c2);
            }
        }
        return c1;
    }

    /**
     * 结果同上,只不过将字符串换成字符数组
     */
    public static int codePointAt(char[] a, int index) {
        return codePointAtImpl(a, index, a.length);
    }

    /**
     * 同上,只不过后续的char查找不能超过limit
     */
    public static int codePointAt(char[] a, int index, int limit) {
        if (index >= limit || limit < 0 || limit > a.length) {
            throw new IndexOutOfBoundsException();
        }
        return codePointAtImpl(a, index, limit);
    }

    /**
     * 具体的查找逻辑
     * @param a
     * @param index
     * @param limit
     * @return
     */
    static int codePointAtImpl(char[] a, int index, int limit) {
        char c1 = a[index];
        if (isHighSurrogate(c1) && ++index < limit) {
            char c2 = a[index];
            if (isLowSurrogate(c2)) {
                return toCodePoint(c1, c2);
            }
        }
        return c1;
    }

    /**
     * 返回指定下标的前一个字符,但是如果前一个字符是以补码形式存在的,那么会返回完整的字符
     * @since 1.5
     */
    public static int codePointBefore(CharSequence seq, int index) {
        char c2 = seq.charAt(--index);
        if (isLowSurrogate(c2) && index > 0) {
            char c1 = seq.charAt(--index);
            if (isHighSurrogate(c1)) {
                return toCodePoint(c1, c2);
            }
        }
        return c2;
    }

    /**
     * 同上,将字符串换成字符数组。
     */
    public static int codePointBefore(char[] a, int index) {
        return codePointBeforeImpl(a, index, 0);
    }

    /**
     * 同上,不过这个方法指定了开始下标
     */
    public static int codePointBefore(char[] a, int index, int start) {
        if (index <= start || start < 0 || start >= a.length) {
            throw new IndexOutOfBoundsException();
        }
        return codePointBeforeImpl(a, index, start);
    }

    /**
     * 从指定下标开始,到指定下标结束的向前查找
     * @param a
     * @param index
     * @param start
     * @return
     */
    static int codePointBeforeImpl(char[] a, int index, int start) {
        char c2 = a[--index];
        if (isLowSurrogate(c2) && index > start) {
            char c1 = a[--index];
            if (isHighSurrogate(c1)) {
                return toCodePoint(c1, c2);
            }
        }
        return c2;
    }

    /**
     * 返回增补码代表的字符的高位表达,或者是非增补码字符,则返回未指定的字符。
     */
    public static char highSurrogate(int codePoint) {
        return (char) ((codePoint >>> 10)
                + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
    }

    /**
     * 返回增补码代表的字符的低位表达,或者是非增补码字符,则返回未指定的字符。
     */
    public static char lowSurrogate(int codePoint) {
        return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
    }

    /**
     * 将指定unicode字符表示形式,转换成utf-16表示形式,
     * 并放到指定字符数组的指定开始位置,如果不是增补码,
     * 那么直接放到指定下标,返回1。如果是放到指定下标和下一个,然后返回2.
     */
    public static int toChars(int codePoint, char[] dst, int dstIndex) {
        if (isBmpCodePoint(codePoint)) {
            // 判断是否是bmp
            dst[dstIndex] = (char) codePoint;
            return 1;
        } else if (isValidCodePoint(codePoint)) {
            // 判断是否是增补码范围
            toSurrogates(codePoint, dst, dstIndex);
            return 2;
        } else {
            throw new IllegalArgumentException();
        }
    }

    /**
     * 调用上面的代码,返回一个新的字符数组
     */
    public static char[] toChars(int codePoint) {
        if (isBmpCodePoint(codePoint)) {
            return new char[]{(char) codePoint};
        } else if (isValidCodePoint(codePoint)) {
            char[] result = new char[2];
            toSurrogates(codePoint, result, 0);
            return result;
        } else {
            throw new IllegalArgumentException();
        }
    }

    /**
     * 具体的处理增补码
     * @param codePoint
     * @param dst
     * @param index
     */
    static void toSurrogates(int codePoint, char[] dst, int index) {
        // We write elements "backwards" to guarantee all-or-nothing
        dst[index + 1] = lowSurrogate(codePoint);
        dst[index] = highSurrogate(codePoint);
    }

    /**
     * 计算字符串的长度,这个长度是传入字符串的长度,但是和不同的str.length不同,
     * 因为有增补码的愿意,可能一个字符返回了2个长度,但是使用这个只返回一个
     * 如:𠮷
     */
    public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
        int length = seq.length();
        if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
            throw new IndexOutOfBoundsException();
        }
        int n = endIndex - beginIndex;
        for (int i = beginIndex; i < endIndex; ) {
            if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
                    isLowSurrogate(seq.charAt(i))) {
                n--;
                i++;
            }
        }
        return n;
    }

    /**
     * 计算字符数组长度,和上一个方法一样,从offset开始计算count个字符
     */
    public static int codePointCount(char[] a, int offset, int count) {
        if (count > a.length - offset || offset < 0 || count < 0) {
            throw new IndexOutOfBoundsException();
        }
        return codePointCountImpl(a, offset, count);
    }

    static int codePointCountImpl(char[] a, int offset, int count) {
        int endIndex = offset + count;
        int n = count;
        for (int i = offset; i < endIndex; ) {
            if (isHighSurrogate(a[i++]) && i < endIndex &&
                    isLowSurrogate(a[i])) {
                n--;
                i++;
            }
        }
        return n;
    }

    /**
     * 返回指定的char序列,该索引由codePointOffset代码点从给定索引偏移。
     * 根据自己测试,这里
     */
    public static int offsetByCodePoints(CharSequence seq, int index,
                                         int codePointOffset) {
        // 获取数组长度
        int length = seq.length();
        // 判断初始索引是否越界
        if (index < 0 || index > length) {
            throw new IndexOutOfBoundsException();
        }

        int x = index;
        if (codePointOffset >= 0) {
            // 向后偏移
            int i;
            for (i = 0; x < length && i < codePointOffset; i++) {
                // 直接判断下下一位的字符是否是高位补码,如果是需要索引再次加加
                if (isHighSurrogate(seq.charAt(x++)) && x < length &&
                        isLowSurrogate(seq.charAt(x))) {
                    x++;
                }
            }
            // 最后如果x到头了,但是i还没到头,证明x+codePointOffset会越界的
            if (i < codePointOffset) {
                throw new IndexOutOfBoundsException();
            }
        } else {
            // 向前偏移
            int i;
            for (i = codePointOffset; x > 0 && i < 0; i++) {
                // 直接判断前前一位的字符是否是低位补码,如果是需要索引再次减减
                if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
                        isHighSurrogate(seq.charAt(x - 1))) {
                    x--;
                }
            }
            // 因为x已经减减到最小值了,但是偏移量还没完,证明出界了
            if (i < 0) {
                throw new IndexOutOfBoundsException();
            }
        }
        return x;
    }

    /**
     * 上面方法的变种
     */
    public static int offsetByCodePoints(char[] a, int start, int count,
                                         int index, int codePointOffset) {
        if (count > a.length - start || start < 0 || count < 0
                || index < start || index > start + count) {
            throw new IndexOutOfBoundsException();
        }
        return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
    }

    static int offsetByCodePointsImpl(char[] a, int start, int count,
                                      int index, int codePointOffset) {
        int x = index;
        if (codePointOffset >= 0) {
            int limit = start + count;
            int i;
            for (i = 0; x < limit && i < codePointOffset; i++) {
                if (isHighSurrogate(a[x++]) && x < limit &&
                        isLowSurrogate(a[x])) {
                    x++;
                }
            }
            if (i < codePointOffset) {
                throw new IndexOutOfBoundsException();
            }
        } else {
            int i;
            for (i = codePointOffset; x > start && i < 0; i++) {
                if (isLowSurrogate(a[--x]) && x > start &&
                        isHighSurrogate(a[x - 1])) {
                    x--;
                }
            }
            if (i < 0) {
                throw new IndexOutOfBoundsException();
            }
        }
        return x;
    }

    /**
     * 是否是小写字符
     */
    public static boolean isLowerCase(char ch) {
        return isLowerCase((int) ch);
    }

    /**
     * 同上,不过能处理增补码的情况
     */
    public static boolean isLowerCase(int codePoint) {
        return getType(codePoint) == Character.LOWERCASE_LETTER ||
                CharacterData.of(codePoint).isOtherLowercase(codePoint);
    }

    /**
     * 大写判断
     */
    public static boolean isUpperCase(char ch) {
        return isUpperCase((int) ch);
    }

    /**
     * 同上,不过能处理增补码的情况
     */
    public static boolean isUpperCase(int codePoint) {
        return getType(codePoint) == Character.UPPERCASE_LETTER ||
                CharacterData.of(codePoint).isOtherUppercase(codePoint);
    }

    /**
     * 确定指定的字符是否为标题字符(首字母大写),这个好像是一些特殊字符的判断
     * These are some of the Unicode characters for which this method returns
     * {@code true}:
     * <ul>
     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
     * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
     * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
     * </ul>
     * <p> Many other Unicode characters are titlecase too.
     */
    public static boolean isTitleCase(char ch) {
        return isTitleCase((int) ch);
    }

    /**
     * 同上,不过能处理增补码的情况
     */
    public static boolean isTitleCase(int codePoint) {
        return getType(codePoint) == Character.TITLECASE_LETTER;
    }

    /**
     * 是否是数字
     */
    public static boolean isDigit(char ch) {
        return isDigit((int) ch);
    }

    /**
     * 同上,不过能处理增补码的情况
     */
    public static boolean isDigit(int codePoint) {
        return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
    }

    /**
     * 字符是否在unicode范围
     */
    public static boolean isDefined(char ch) {
        return isDefined((int) ch);
    }

    /**
     * 同上,不过能处理增补码的情况
     */
    public static boolean isDefined(int codePoint) {
        return getType(codePoint) != Character.UNASSIGNED;
    }

    /**
     * 判断是否是字母,包括大小写、标题字符、变换字符、其他字符
     */
    public static boolean isLetter(char ch) {
        return isLetter((int) ch);
    }

    /**
     * 同上,不过能处理增补码的情况
     */
    public static boolean isLetter(int codePoint) {
        return ((((1 << Character.UPPERCASE_LETTER) |
                (1 << Character.LOWERCASE_LETTER) |
                (1 << Character.TITLECASE_LETTER) |
                (1 << Character.MODIFIER_LETTER) |
                (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
                != 0;
    }

    /**
     * 判断是否是字母或数字
     */
    public static boolean isLetterOrDigit(char ch) {
        return isLetterOrDigit((int) ch);
    }

    /**
     * 同上,不过能处理增补码的情况
     */
    public static boolean isLetterOrDigit(int codePoint) {
        return ((((1 << Character.UPPERCASE_LETTER) |
                (1 << Character.LOWERCASE_LETTER) |
                (1 << Character.TITLECASE_LETTER) |
                (1 << Character.MODIFIER_LETTER) |
                (1 << Character.OTHER_LETTER) |
                (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
                != 0;
    }

    /**
     * 判断指定的字符是否允许作为Java标识符中的第一个字符
     */
    @Deprecated
    public static boolean isJavaLetter(char ch) {
        return isJavaIdentifierStart(ch);
    }

    /**
     * 确定指定的字符是否可能是Java标识符的一部分,而不是第一个字符
     */
    @Deprecated
    public static boolean isJavaLetterOrDigit(char ch) {
        return isJavaIdentifierPart(ch);
    }

    /**
     * 确定指定的字符(Unicode代码点)是否为字母,这里试了下中文也是true
     * 所以需要切丁下一下几个代表的范围了
     */
    public static boolean isAlphabetic(int codePoint) {
        return (((((1 << Character.UPPERCASE_LETTER) |
                (1 << Character.LOWERCASE_LETTER) |
                (1 << Character.TITLECASE_LETTER) |
                (1 << Character.MODIFIER_LETTER) |
                (1 << Character.OTHER_LETTER) |
                (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
                CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
    }

    /**
     * 判断指定的字符(Unicode代码点)是否为中文、日文、朝鲜文、越南文的文字。
     */
    public static boolean isIdeographic(int codePoint) {
        return CharacterData.of(codePoint).isIdeographic(codePoint);
    }

    /**
     * 确定指定的字符是否允许作为Java标识符中的第一个字符。
     * isJavaLetter过期了,由此替代
     */
    public static boolean isJavaIdentifierStart(char ch) {
        return isJavaIdentifierStart((int) ch);
    }

    /**
     * 同上,不过能处理增补码的情况
     */
    public static boolean isJavaIdentifierStart(int codePoint) {
        return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
    }

    /**
     * 确定指定的字符是否可能是Java标识符的一部分,而不是第一个字符
     * 替代isJavaLetterOrDigit
     */
    public static boolean isJavaIdentifierPart(char ch) {
        return isJavaIdentifierPart((int) ch);
    }

    /**
     * 同上,不过能处理增补码的情况
     */
    public static boolean isJavaIdentifierPart(int codePoint) {
        return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
    }

    /**
     * 确定指定的字符是否允许作为Unicode标识符中的第一个字符
     */
    public static boolean isUnicodeIdentifierStart(char ch) {
        return isUnicodeIdentifierStart((int) ch);
    }

    /**
     * 同上,不过能处理增补码的情况
     */
    public static boolean isUnicodeIdentifierStart(int codePoint) {
        return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
    }

    /**
     * 确定指定的字符是否可能是Unicode标标识符的一部分,而不是第一个字符
     */
    public static boolean isUnicodeIdentifierPart(char ch) {
        return isUnicodeIdentifierPart((int) ch);
    }

    /**
     * 同上,不过能处理增补码的情况
     */
    public static boolean isUnicodeIdentifierPart(int codePoint) {
        return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
    }

    /**
     * 确定指定的字符是否应被视为Java标识符或Unicode标识符中的可忽略字符
     */
    public static boolean isIdentifierIgnorable(char ch) {
        return isIdentifierIgnorable((int) ch);
    }

    /**
     * 同上,不过能处理增补码的情况
     */
    public static boolean isIdentifierIgnorable(int codePoint) {
        return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
    }

    /**
     * 转小写
     */
    public static char toLowerCase(char ch) {
        return (char) toLowerCase((int) ch);
    }

    /**
     * 同上,不过能处理增补码的情况
     */
    public static int toLowerCase(int codePoint) {
        return CharacterData.of(codePoint).toLowerCase(codePoint);
    }

    /**
     * 转大写
     */
    public static char toUpperCase(char ch) {
        return (char) toUpperCase((int) ch);
    }

    /**
     * 同上,不过能处理增补码的情况
     */
    public static int toUpperCase(int codePoint) {
        return CharacterData.of(codePoint).toUpperCase(codePoint);
    }

    /**
     * 转标题字符
     */
    public static char toTitleCase(char ch) {
        return (char) toTitleCase((int) ch);
    }

    /**
     * 同上,不过能处理增补码的情况
     */
    public static int toTitleCase(int codePoint) {
        return CharacterData.of(codePoint).toTitleCase(codePoint);
    }

    /**
     * 指定基数下字符对应的值,比如a在16进制下对应10
     */
    public static int digit(char ch, int radix) {
        return digit((int) ch, radix);
    }

    /**
     * 同上,不过能处理增补码的情况
     */
    public static int digit(int codePoint, int radix) {
        return CharacterData.of(codePoint).digit(codePoint, radix);
    }

    /**
     * 返回指定的Unicode字符表示的int值。
     * 例如,字符'\ u216C'(罗马数字50)将返回值为50的int。
     */
    public static int getNumericValue(char ch) {
        return getNumericValue((int) ch);
    }

    /**
     * 同上,不过能处理增补码的情况
     */
    public static int getNumericValue(int codePoint) {
        return CharacterData.of(codePoint).getNumericValue(codePoint);
    }

    /**
     * 判断字符是否是ISO-LATIN-1下的空白字符,
     * 只有当传入的字符是'\t'、'\n'、'\f'、'\r'、' '之一才返回true。
     */
    @Deprecated
    public static boolean isSpace(char ch) {
        return (ch <= 0x0020) &&
                (((((1L << 0x0009) |
                        (1L << 0x000A) |
                        (1L << 0x000C) |
                        (1L << 0x000D) |
                        (1L << 0x0020)) >> ch) & 1L) != 0);
    }


    /**
     * 确定指定的字符是否为Unicode空格字符。
     * 当且仅当Unicode标准将字符指定为空格字符时,才将字符视为空格字符。
     * 如果角色的常规类别类型是以下任何一种,则此方法返回true:
     */
    public static boolean isSpaceChar(char ch) {
        return isSpaceChar((int) ch);
    }

    /**
     * 同上,不过能处理增补码的情况
     */
    public static boolean isSpaceChar(int codePoint) {
        return ((((1 << Character.SPACE_SEPARATOR) |
                (1 << Character.LINE_SEPARATOR) |
                (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
                != 0;
    }

    /**
     * 判断字符是否是java中的空白字符,必须是下面这几个菜返回true。
     * 1. It is a Unicode space character ({@code SPACE_SEPARATOR},
     * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
     * but is not also a non-breaking space ({@code '\u005Cu00A0'},
     * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
     * 2. It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
     * 3. It is {@code '\u005Cn'}, U+000A LINE FEED.
     * 4. It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
     * 5. It is {@code '\u005Cf'}, U+000C FORM FEED.
     * 6. It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
     * 7. It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
     * 8. It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
     * 9. It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
     * 10. It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
     */
    public static boolean isWhitespace(char ch) {
        return isWhitespace((int) ch);
    }

    /**
     * 同上,不过能处理增补码的情况
     */
    public static boolean isWhitespace(int codePoint) {
        return CharacterData.of(codePoint).isWhitespace(codePoint);
    }

    /**
     * 确定指定的字符是否为ISO控制字符。
     * 如果字符的代码在'\ u0000'到'\ u001F'范围内或在'\ u007F'到'\ u009F'范围内,则该字符被视为ISO控制字符。
     */
    public static boolean isISOControl(char ch) {
        return isISOControl((int) ch);
    }

    /**
     * 同上,不过能处理增补码的情况
     */
    public static boolean isISOControl(int codePoint) {
        // Optimized form of:
        //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
        //     (codePoint >= 0x7F && codePoint <= 0x9F);
        return codePoint <= 0x9F &&
                (codePoint >= 0x7F || (codePoint >>> 5 == 0));
    }

    /**
     * 返回表示字符常规类别的值。
     */
    public static int getType(char ch) {
        return getType((int) ch);
    }

    /**
     * 同上,不过能处理增补码的情况
     */
    public static int getType(int codePoint) {
        return CharacterData.of(codePoint).getType(codePoint);
    }

    /**
     * 确定指定基数中特定数字的字符表示形式。
     * 如果radix的值不是有效的基数,或者digit的值不是指定基数中的有效数字,则返回空字符('\ u0000')。
     */
    public static char forDigit(int digit, int radix) {
        if ((digit >= radix) || (digit < 0)) {
            return '\0';
        }
        if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
            return '\0';
        }
        if (digit < 10) {
            return (char) ('0' + digit);
        }
        return (char) ('a' - 10 + digit);
    }

    /**
     * 返回给定字符的Unicode方向性属性。
     * 字符方向性用于计算文本的视觉排序。
     * 未定义的char值的方向性值为DIRECTIONALITY_UNDEFINED。
     * 完全不懂是什么
     * @param ch {@code char} for which the directionality property
     *           is requested.
     * @return the directionality property of the {@code char} value.
     * @see Character#DIRECTIONALITY_UNDEFINED
     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
     * @see Character#DIRECTIONALITY_ARABIC_NUMBER
     * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
     * @see Character#DIRECTIONALITY_NONSPACING_MARK
     * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
     * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
     * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
     * @see Character#DIRECTIONALITY_WHITESPACE
     * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
     * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
     * @since 1.4
     */
    public static byte getDirectionality(char ch) {
        return getDirectionality((int) ch);
    }

    /**
     * 同上,不过能处理增补码的情况
     *
     * @param codePoint the character (Unicode code point) for which
     *                  the directionality property is requested.
     * @return the directionality property of the character.
     * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
     * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
     * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
     * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
     * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
     * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
     * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
     * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
     * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
     * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
     * @since 1.5
     */
    public static byte getDirectionality(int codePoint) {
        return CharacterData.of(codePoint).getDirectionality(codePoint);
    }

    /**
     * 确定是否根据Unicode规范镜像该字符。
     * 当以从右到左的文本显示时,镜像字符应使其字形水平镜像。
     * 例如,'\ u0028'LEFT PARENTHESIS在语义上被定义为左括号。
     * 这将显示为从右到左的文本中从左到右但作为“)”的文本中的“(”。
     *
     */
    public static boolean isMirrored(char ch) {
        return isMirrored((int) ch);
    }

    /**
     * 同上,不过能处理增补码的情况
     */
    public static boolean isMirrored(int codePoint) {
        return CharacterData.of(codePoint).isMirrored(codePoint);
    }

    /**
     * 比较字符
     */
    public int compareTo(Character anotherCharacter) {
        return compare(this.value, anotherCharacter.value);
    }

    /**
     * 同上
     */
    public static int compare(char x, char y) {
        return x - y;
    }

    /**
     * Converts the character (Unicode code point) argument to uppercase using
     * information from the UnicodeData file.
     * <p>
     *
     * @param codePoint the character (Unicode code point) to be converted.
     * @return either the uppercase equivalent of the character, if
     * any, or an error flag ({@code Character.ERROR})
     * that indicates that a 1:M {@code char} mapping exists.
     * @see Character#isLowerCase(char)
     * @see Character#isUpperCase(char)
     * @see Character#toLowerCase(char)
     * @see Character#toTitleCase(char)
     * @since 1.4
     */
    static int toUpperCaseEx(int codePoint) {
        assert isValidCodePoint(codePoint);
        return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
    }

    /**
     * Converts the character (Unicode code point) argument to uppercase using case
     * mapping information from the SpecialCasing file in the Unicode
     * specification. If a character has no explicit uppercase
     * mapping, then the {@code char} itself is returned in the
     * {@code char[]}.
     *
     * @param codePoint the character (Unicode code point) to be converted.
     * @return a {@code char[]} with the uppercased character.
     * @since 1.4
     */
    static char[] toUpperCaseCharArray(int codePoint) {
        // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
        assert isBmpCodePoint(codePoint);
        return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
    }

    /**
     * java中一个char占两个byte,16bit
     */
    public static final int SIZE = 16;

    /**
     * 占的字节数
     */
    public static final int BYTES = SIZE / Byte.SIZE;

    /**
     * 返回通过反转指定char值中的字节顺序获得的值。
     */
    public static char reverseBytes(char ch) {
        return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
    }

    /**
     * 返回指定字符codePoint的Unicode名称,如果未分配代码点,则返回null。
     */
    public static String getName(int codePoint) {
        if (!isValidCodePoint(codePoint)) {
            throw new IllegalArgumentException();
        }
        String name = CharacterName.get(codePoint);
        if (name != null)
            return name;
        if (getType(codePoint) == UNASSIGNED)
            return null;
        Character.UnicodeBlock block = Character.UnicodeBlock.of(codePoint);
        if (block != null)
            return block.toString().replace('_', ' ') + " "
                    + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
        // should never come here
        return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
    }
}

相关文章

网友评论

      本文标题:Java源码浅析,Character(4)

      本文链接:https://www.haomeiwen.com/subject/yksrtqtx.html