源码分析,基本上都加载注解上了,如有谬误,请指正,谢谢。
Character由于比较长,只能分多篇了
package com.jiyx.test.java.lang;
import java.io.Serializable;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
/**
* auther: jiyx
* date: 2018/9/13.
*/
public class Character implements Serializable, Comparable<Character> {
/**
* Character真正存储的char
*
* @serial
*/
private final char value;
/**
* 序列化
*/
private static final long serialVersionUID = 3786198910865385080L;
/**
* 实例化
*/
public Character(char value) {
this.value = value;
}
/**
* Character缓存
*/
private static class CharacterCache {
private CharacterCache() {
}
/**
* 缓存数组
*/
static final Character cache[] = new Character[127 + 1];
/**
* 类加载的时候初始化缓存数组
*/
static {
for (int i = 0; i < cache.length; i++)
cache[i] = new Character((char) i);
}
}
/**
* 根据指定的char返回Character,这里如果char在0到127之间,就会走缓存
*/
public static Character valueOf(char c) {
if (c <= 127) { // must cache
return Character.CharacterCache.cache[(int) c];
}
return new Character(c);
}
/**
* 返回Character中的char
*/
public char charValue() {
return value;
}
/**
* hashCode
*/
@Override
public int hashCode() {
return Character.hashCode(value);
}
/**
* hashCode,静态,其实就是讲char专为int
*/
public static int hashCode(char value) {
return (int) value;
}
/**
* equals
*/
public boolean equals(Object obj) {
if (obj instanceof Character) {
return value == ((Character) obj).charValue();
}
return false;
}
/**
* toString
*/
public String toString() {
char buf[] = {value};
return String.valueOf(buf);
}
/**
* toString
*/
public static String toString(char c) {
return String.valueOf(c);
}
/**
* 判断指定字符的Unicode编码是否是有效的
*/
public static boolean isValidCodePoint(int codePoint) {
// Optimized form of:
// codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
int plane = codePoint >>> 16;
return plane < ((MAX_CODE_POINT + 1) >>> 16);
}
/**
* 判断指定字符的Unicode编码是否在Basic Multilingual Plane (BMP)中,
* 根据这个结果可以知道代码点是否能被单字符表示。
*/
public static boolean isBmpCodePoint(int codePoint) {
return codePoint >>> 16 == 0;
// Optimized form of:
// codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
// We consistently use logical shift (>>>) to facilitate
// additional runtime optimizations.
}
/**
* 判断给定字符的Unicode编码是否在增补码范围
*/
public static boolean isSupplementaryCodePoint(int codePoint) {
return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
&& codePoint < MAX_CODE_POINT + 1;
}
/**
* 判断指定字符的Unicode编码是否是,高位表示法。
*/
public static boolean isHighSurrogate(char ch) {
// Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
}
/**
* 判断指定字符的Unicode编码是否是,低位表示法。
*/
public static boolean isLowSurrogate(char ch) {
return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
}
/**
* 判断指定字符的Unicode编码是否是,低位或者高位表示法。
*/
public static boolean isSurrogate(char ch) {
return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
}
/**
* 判断指定的pair是否是有效的
*/
public static boolean isSurrogatePair(char high, char low) {
return isHighSurrogate(high) && isLowSurrogate(low);
}
/**
* 计算指定的代码点需要的char数量
*/
public static int charCount(int codePoint) {
return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
}
/**
* 将指定的surrogate pair转换成增补码表示的值,这里只转换不校验,要校验就要调用上面的方法isSurrogatePair(char, char)
*/
public static int toCodePoint(char high, char low) {
// Optimized form of:
// return ((high - MIN_HIGH_SURROGATE) << 10)
// + (low - MIN_LOW_SURROGATE)
// + MIN_SUPPLEMENTARY_CODE_POINT;
return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
- (MIN_HIGH_SURROGATE << 10)
- MIN_LOW_SURROGATE);
}
/**
* 返回指定seq的指定下标的char的代码点,如果c1在高位表示范围内,
* 并且随后的字符有在低位表示范围内的,那么就会返回增补代码点。
* 否则,返回指定索引下的int值
*/
public static int codePointAt(CharSequence seq, int index) {
char c1 = seq.charAt(index);
if (isHighSurrogate(c1) && ++index < seq.length()) {
char c2 = seq.charAt(index);
if (isLowSurrogate(c2)) {
return toCodePoint(c1, c2);
}
}
return c1;
}
/**
* 结果同上,只不过将字符串换成字符数组
*/
public static int codePointAt(char[] a, int index) {
return codePointAtImpl(a, index, a.length);
}
/**
* 同上,只不过后续的char查找不能超过limit
*/
public static int codePointAt(char[] a, int index, int limit) {
if (index >= limit || limit < 0 || limit > a.length) {
throw new IndexOutOfBoundsException();
}
return codePointAtImpl(a, index, limit);
}
/**
* 具体的查找逻辑
* @param a
* @param index
* @param limit
* @return
*/
static int codePointAtImpl(char[] a, int index, int limit) {
char c1 = a[index];
if (isHighSurrogate(c1) && ++index < limit) {
char c2 = a[index];
if (isLowSurrogate(c2)) {
return toCodePoint(c1, c2);
}
}
return c1;
}
/**
* 返回指定下标的前一个字符,但是如果前一个字符是以补码形式存在的,那么会返回完整的字符
* @since 1.5
*/
public static int codePointBefore(CharSequence seq, int index) {
char c2 = seq.charAt(--index);
if (isLowSurrogate(c2) && index > 0) {
char c1 = seq.charAt(--index);
if (isHighSurrogate(c1)) {
return toCodePoint(c1, c2);
}
}
return c2;
}
/**
* 同上,将字符串换成字符数组。
*/
public static int codePointBefore(char[] a, int index) {
return codePointBeforeImpl(a, index, 0);
}
/**
* 同上,不过这个方法指定了开始下标
*/
public static int codePointBefore(char[] a, int index, int start) {
if (index <= start || start < 0 || start >= a.length) {
throw new IndexOutOfBoundsException();
}
return codePointBeforeImpl(a, index, start);
}
/**
* 从指定下标开始,到指定下标结束的向前查找
* @param a
* @param index
* @param start
* @return
*/
static int codePointBeforeImpl(char[] a, int index, int start) {
char c2 = a[--index];
if (isLowSurrogate(c2) && index > start) {
char c1 = a[--index];
if (isHighSurrogate(c1)) {
return toCodePoint(c1, c2);
}
}
return c2;
}
/**
* 返回增补码代表的字符的高位表达,或者是非增补码字符,则返回未指定的字符。
*/
public static char highSurrogate(int codePoint) {
return (char) ((codePoint >>> 10)
+ (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
}
/**
* 返回增补码代表的字符的低位表达,或者是非增补码字符,则返回未指定的字符。
*/
public static char lowSurrogate(int codePoint) {
return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
}
/**
* 将指定unicode字符表示形式,转换成utf-16表示形式,
* 并放到指定字符数组的指定开始位置,如果不是增补码,
* 那么直接放到指定下标,返回1。如果是放到指定下标和下一个,然后返回2.
*/
public static int toChars(int codePoint, char[] dst, int dstIndex) {
if (isBmpCodePoint(codePoint)) {
// 判断是否是bmp
dst[dstIndex] = (char) codePoint;
return 1;
} else if (isValidCodePoint(codePoint)) {
// 判断是否是增补码范围
toSurrogates(codePoint, dst, dstIndex);
return 2;
} else {
throw new IllegalArgumentException();
}
}
/**
* 调用上面的代码,返回一个新的字符数组
*/
public static char[] toChars(int codePoint) {
if (isBmpCodePoint(codePoint)) {
return new char[]{(char) codePoint};
} else if (isValidCodePoint(codePoint)) {
char[] result = new char[2];
toSurrogates(codePoint, result, 0);
return result;
} else {
throw new IllegalArgumentException();
}
}
/**
* 具体的处理增补码
* @param codePoint
* @param dst
* @param index
*/
static void toSurrogates(int codePoint, char[] dst, int index) {
// We write elements "backwards" to guarantee all-or-nothing
dst[index + 1] = lowSurrogate(codePoint);
dst[index] = highSurrogate(codePoint);
}
/**
* 计算字符串的长度,这个长度是传入字符串的长度,但是和不同的str.length不同,
* 因为有增补码的愿意,可能一个字符返回了2个长度,但是使用这个只返回一个
* 如:𠮷
*/
public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
int length = seq.length();
if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
throw new IndexOutOfBoundsException();
}
int n = endIndex - beginIndex;
for (int i = beginIndex; i < endIndex; ) {
if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
isLowSurrogate(seq.charAt(i))) {
n--;
i++;
}
}
return n;
}
/**
* 计算字符数组长度,和上一个方法一样,从offset开始计算count个字符
*/
public static int codePointCount(char[] a, int offset, int count) {
if (count > a.length - offset || offset < 0 || count < 0) {
throw new IndexOutOfBoundsException();
}
return codePointCountImpl(a, offset, count);
}
static int codePointCountImpl(char[] a, int offset, int count) {
int endIndex = offset + count;
int n = count;
for (int i = offset; i < endIndex; ) {
if (isHighSurrogate(a[i++]) && i < endIndex &&
isLowSurrogate(a[i])) {
n--;
i++;
}
}
return n;
}
/**
* 返回指定的char序列,该索引由codePointOffset代码点从给定索引偏移。
* 根据自己测试,这里
*/
public static int offsetByCodePoints(CharSequence seq, int index,
int codePointOffset) {
// 获取数组长度
int length = seq.length();
// 判断初始索引是否越界
if (index < 0 || index > length) {
throw new IndexOutOfBoundsException();
}
int x = index;
if (codePointOffset >= 0) {
// 向后偏移
int i;
for (i = 0; x < length && i < codePointOffset; i++) {
// 直接判断下下一位的字符是否是高位补码,如果是需要索引再次加加
if (isHighSurrogate(seq.charAt(x++)) && x < length &&
isLowSurrogate(seq.charAt(x))) {
x++;
}
}
// 最后如果x到头了,但是i还没到头,证明x+codePointOffset会越界的
if (i < codePointOffset) {
throw new IndexOutOfBoundsException();
}
} else {
// 向前偏移
int i;
for (i = codePointOffset; x > 0 && i < 0; i++) {
// 直接判断前前一位的字符是否是低位补码,如果是需要索引再次减减
if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
isHighSurrogate(seq.charAt(x - 1))) {
x--;
}
}
// 因为x已经减减到最小值了,但是偏移量还没完,证明出界了
if (i < 0) {
throw new IndexOutOfBoundsException();
}
}
return x;
}
/**
* 上面方法的变种
*/
public static int offsetByCodePoints(char[] a, int start, int count,
int index, int codePointOffset) {
if (count > a.length - start || start < 0 || count < 0
|| index < start || index > start + count) {
throw new IndexOutOfBoundsException();
}
return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
}
static int offsetByCodePointsImpl(char[] a, int start, int count,
int index, int codePointOffset) {
int x = index;
if (codePointOffset >= 0) {
int limit = start + count;
int i;
for (i = 0; x < limit && i < codePointOffset; i++) {
if (isHighSurrogate(a[x++]) && x < limit &&
isLowSurrogate(a[x])) {
x++;
}
}
if (i < codePointOffset) {
throw new IndexOutOfBoundsException();
}
} else {
int i;
for (i = codePointOffset; x > start && i < 0; i++) {
if (isLowSurrogate(a[--x]) && x > start &&
isHighSurrogate(a[x - 1])) {
x--;
}
}
if (i < 0) {
throw new IndexOutOfBoundsException();
}
}
return x;
}
/**
* 是否是小写字符
*/
public static boolean isLowerCase(char ch) {
return isLowerCase((int) ch);
}
/**
* 同上,不过能处理增补码的情况
*/
public static boolean isLowerCase(int codePoint) {
return getType(codePoint) == Character.LOWERCASE_LETTER ||
CharacterData.of(codePoint).isOtherLowercase(codePoint);
}
/**
* 大写判断
*/
public static boolean isUpperCase(char ch) {
return isUpperCase((int) ch);
}
/**
* 同上,不过能处理增补码的情况
*/
public static boolean isUpperCase(int codePoint) {
return getType(codePoint) == Character.UPPERCASE_LETTER ||
CharacterData.of(codePoint).isOtherUppercase(codePoint);
}
/**
* 确定指定的字符是否为标题字符(首字母大写),这个好像是一些特殊字符的判断
* These are some of the Unicode characters for which this method returns
* {@code true}:
* <ul>
* <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
* <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
* <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
* <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
* </ul>
* <p> Many other Unicode characters are titlecase too.
*/
public static boolean isTitleCase(char ch) {
return isTitleCase((int) ch);
}
/**
* 同上,不过能处理增补码的情况
*/
public static boolean isTitleCase(int codePoint) {
return getType(codePoint) == Character.TITLECASE_LETTER;
}
/**
* 是否是数字
*/
public static boolean isDigit(char ch) {
return isDigit((int) ch);
}
/**
* 同上,不过能处理增补码的情况
*/
public static boolean isDigit(int codePoint) {
return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
}
/**
* 字符是否在unicode范围
*/
public static boolean isDefined(char ch) {
return isDefined((int) ch);
}
/**
* 同上,不过能处理增补码的情况
*/
public static boolean isDefined(int codePoint) {
return getType(codePoint) != Character.UNASSIGNED;
}
/**
* 判断是否是字母,包括大小写、标题字符、变换字符、其他字符
*/
public static boolean isLetter(char ch) {
return isLetter((int) ch);
}
/**
* 同上,不过能处理增补码的情况
*/
public static boolean isLetter(int codePoint) {
return ((((1 << Character.UPPERCASE_LETTER) |
(1 << Character.LOWERCASE_LETTER) |
(1 << Character.TITLECASE_LETTER) |
(1 << Character.MODIFIER_LETTER) |
(1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
!= 0;
}
/**
* 判断是否是字母或数字
*/
public static boolean isLetterOrDigit(char ch) {
return isLetterOrDigit((int) ch);
}
/**
* 同上,不过能处理增补码的情况
*/
public static boolean isLetterOrDigit(int codePoint) {
return ((((1 << Character.UPPERCASE_LETTER) |
(1 << Character.LOWERCASE_LETTER) |
(1 << Character.TITLECASE_LETTER) |
(1 << Character.MODIFIER_LETTER) |
(1 << Character.OTHER_LETTER) |
(1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
!= 0;
}
/**
* 判断指定的字符是否允许作为Java标识符中的第一个字符
*/
@Deprecated
public static boolean isJavaLetter(char ch) {
return isJavaIdentifierStart(ch);
}
/**
* 确定指定的字符是否可能是Java标识符的一部分,而不是第一个字符
*/
@Deprecated
public static boolean isJavaLetterOrDigit(char ch) {
return isJavaIdentifierPart(ch);
}
/**
* 确定指定的字符(Unicode代码点)是否为字母,这里试了下中文也是true
* 所以需要切丁下一下几个代表的范围了
*/
public static boolean isAlphabetic(int codePoint) {
return (((((1 << Character.UPPERCASE_LETTER) |
(1 << Character.LOWERCASE_LETTER) |
(1 << Character.TITLECASE_LETTER) |
(1 << Character.MODIFIER_LETTER) |
(1 << Character.OTHER_LETTER) |
(1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
}
/**
* 判断指定的字符(Unicode代码点)是否为中文、日文、朝鲜文、越南文的文字。
*/
public static boolean isIdeographic(int codePoint) {
return CharacterData.of(codePoint).isIdeographic(codePoint);
}
/**
* 确定指定的字符是否允许作为Java标识符中的第一个字符。
* isJavaLetter过期了,由此替代
*/
public static boolean isJavaIdentifierStart(char ch) {
return isJavaIdentifierStart((int) ch);
}
/**
* 同上,不过能处理增补码的情况
*/
public static boolean isJavaIdentifierStart(int codePoint) {
return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
}
/**
* 确定指定的字符是否可能是Java标识符的一部分,而不是第一个字符
* 替代isJavaLetterOrDigit
*/
public static boolean isJavaIdentifierPart(char ch) {
return isJavaIdentifierPart((int) ch);
}
/**
* 同上,不过能处理增补码的情况
*/
public static boolean isJavaIdentifierPart(int codePoint) {
return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
}
/**
* 确定指定的字符是否允许作为Unicode标识符中的第一个字符
*/
public static boolean isUnicodeIdentifierStart(char ch) {
return isUnicodeIdentifierStart((int) ch);
}
/**
* 同上,不过能处理增补码的情况
*/
public static boolean isUnicodeIdentifierStart(int codePoint) {
return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
}
/**
* 确定指定的字符是否可能是Unicode标标识符的一部分,而不是第一个字符
*/
public static boolean isUnicodeIdentifierPart(char ch) {
return isUnicodeIdentifierPart((int) ch);
}
/**
* 同上,不过能处理增补码的情况
*/
public static boolean isUnicodeIdentifierPart(int codePoint) {
return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
}
/**
* 确定指定的字符是否应被视为Java标识符或Unicode标识符中的可忽略字符
*/
public static boolean isIdentifierIgnorable(char ch) {
return isIdentifierIgnorable((int) ch);
}
/**
* 同上,不过能处理增补码的情况
*/
public static boolean isIdentifierIgnorable(int codePoint) {
return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
}
/**
* 转小写
*/
public static char toLowerCase(char ch) {
return (char) toLowerCase((int) ch);
}
/**
* 同上,不过能处理增补码的情况
*/
public static int toLowerCase(int codePoint) {
return CharacterData.of(codePoint).toLowerCase(codePoint);
}
/**
* 转大写
*/
public static char toUpperCase(char ch) {
return (char) toUpperCase((int) ch);
}
/**
* 同上,不过能处理增补码的情况
*/
public static int toUpperCase(int codePoint) {
return CharacterData.of(codePoint).toUpperCase(codePoint);
}
/**
* 转标题字符
*/
public static char toTitleCase(char ch) {
return (char) toTitleCase((int) ch);
}
/**
* 同上,不过能处理增补码的情况
*/
public static int toTitleCase(int codePoint) {
return CharacterData.of(codePoint).toTitleCase(codePoint);
}
/**
* 指定基数下字符对应的值,比如a在16进制下对应10
*/
public static int digit(char ch, int radix) {
return digit((int) ch, radix);
}
/**
* 同上,不过能处理增补码的情况
*/
public static int digit(int codePoint, int radix) {
return CharacterData.of(codePoint).digit(codePoint, radix);
}
/**
* 返回指定的Unicode字符表示的int值。
* 例如,字符'\ u216C'(罗马数字50)将返回值为50的int。
*/
public static int getNumericValue(char ch) {
return getNumericValue((int) ch);
}
/**
* 同上,不过能处理增补码的情况
*/
public static int getNumericValue(int codePoint) {
return CharacterData.of(codePoint).getNumericValue(codePoint);
}
/**
* 判断字符是否是ISO-LATIN-1下的空白字符,
* 只有当传入的字符是'\t'、'\n'、'\f'、'\r'、' '之一才返回true。
*/
@Deprecated
public static boolean isSpace(char ch) {
return (ch <= 0x0020) &&
(((((1L << 0x0009) |
(1L << 0x000A) |
(1L << 0x000C) |
(1L << 0x000D) |
(1L << 0x0020)) >> ch) & 1L) != 0);
}
/**
* 确定指定的字符是否为Unicode空格字符。
* 当且仅当Unicode标准将字符指定为空格字符时,才将字符视为空格字符。
* 如果角色的常规类别类型是以下任何一种,则此方法返回true:
*/
public static boolean isSpaceChar(char ch) {
return isSpaceChar((int) ch);
}
/**
* 同上,不过能处理增补码的情况
*/
public static boolean isSpaceChar(int codePoint) {
return ((((1 << Character.SPACE_SEPARATOR) |
(1 << Character.LINE_SEPARATOR) |
(1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
!= 0;
}
/**
* 判断字符是否是java中的空白字符,必须是下面这几个菜返回true。
* 1. It is a Unicode space character ({@code SPACE_SEPARATOR},
* {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
* but is not also a non-breaking space ({@code '\u005Cu00A0'},
* {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
* 2. It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
* 3. It is {@code '\u005Cn'}, U+000A LINE FEED.
* 4. It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
* 5. It is {@code '\u005Cf'}, U+000C FORM FEED.
* 6. It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
* 7. It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
* 8. It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
* 9. It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
* 10. It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
*/
public static boolean isWhitespace(char ch) {
return isWhitespace((int) ch);
}
/**
* 同上,不过能处理增补码的情况
*/
public static boolean isWhitespace(int codePoint) {
return CharacterData.of(codePoint).isWhitespace(codePoint);
}
/**
* 确定指定的字符是否为ISO控制字符。
* 如果字符的代码在'\ u0000'到'\ u001F'范围内或在'\ u007F'到'\ u009F'范围内,则该字符被视为ISO控制字符。
*/
public static boolean isISOControl(char ch) {
return isISOControl((int) ch);
}
/**
* 同上,不过能处理增补码的情况
*/
public static boolean isISOControl(int codePoint) {
// Optimized form of:
// (codePoint >= 0x00 && codePoint <= 0x1F) ||
// (codePoint >= 0x7F && codePoint <= 0x9F);
return codePoint <= 0x9F &&
(codePoint >= 0x7F || (codePoint >>> 5 == 0));
}
/**
* 返回表示字符常规类别的值。
*/
public static int getType(char ch) {
return getType((int) ch);
}
/**
* 同上,不过能处理增补码的情况
*/
public static int getType(int codePoint) {
return CharacterData.of(codePoint).getType(codePoint);
}
/**
* 确定指定基数中特定数字的字符表示形式。
* 如果radix的值不是有效的基数,或者digit的值不是指定基数中的有效数字,则返回空字符('\ u0000')。
*/
public static char forDigit(int digit, int radix) {
if ((digit >= radix) || (digit < 0)) {
return '\0';
}
if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
return '\0';
}
if (digit < 10) {
return (char) ('0' + digit);
}
return (char) ('a' - 10 + digit);
}
/**
* 返回给定字符的Unicode方向性属性。
* 字符方向性用于计算文本的视觉排序。
* 未定义的char值的方向性值为DIRECTIONALITY_UNDEFINED。
* 完全不懂是什么
* @param ch {@code char} for which the directionality property
* is requested.
* @return the directionality property of the {@code char} value.
* @see Character#DIRECTIONALITY_UNDEFINED
* @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
* @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
* @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
* @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
* @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
* @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
* @see Character#DIRECTIONALITY_ARABIC_NUMBER
* @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
* @see Character#DIRECTIONALITY_NONSPACING_MARK
* @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
* @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
* @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
* @see Character#DIRECTIONALITY_WHITESPACE
* @see Character#DIRECTIONALITY_OTHER_NEUTRALS
* @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
* @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
* @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
* @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
* @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
* @since 1.4
*/
public static byte getDirectionality(char ch) {
return getDirectionality((int) ch);
}
/**
* 同上,不过能处理增补码的情况
*
* @param codePoint the character (Unicode code point) for which
* the directionality property is requested.
* @return the directionality property of the character.
* @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
* @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
* @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
* @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
* @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
* @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
* @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
* @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
* @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
* @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
* @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
* @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
* @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
* @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
* @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
* @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
* @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
* @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
* @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
* @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
* @since 1.5
*/
public static byte getDirectionality(int codePoint) {
return CharacterData.of(codePoint).getDirectionality(codePoint);
}
/**
* 确定是否根据Unicode规范镜像该字符。
* 当以从右到左的文本显示时,镜像字符应使其字形水平镜像。
* 例如,'\ u0028'LEFT PARENTHESIS在语义上被定义为左括号。
* 这将显示为从右到左的文本中从左到右但作为“)”的文本中的“(”。
*
*/
public static boolean isMirrored(char ch) {
return isMirrored((int) ch);
}
/**
* 同上,不过能处理增补码的情况
*/
public static boolean isMirrored(int codePoint) {
return CharacterData.of(codePoint).isMirrored(codePoint);
}
/**
* 比较字符
*/
public int compareTo(Character anotherCharacter) {
return compare(this.value, anotherCharacter.value);
}
/**
* 同上
*/
public static int compare(char x, char y) {
return x - y;
}
/**
* Converts the character (Unicode code point) argument to uppercase using
* information from the UnicodeData file.
* <p>
*
* @param codePoint the character (Unicode code point) to be converted.
* @return either the uppercase equivalent of the character, if
* any, or an error flag ({@code Character.ERROR})
* that indicates that a 1:M {@code char} mapping exists.
* @see Character#isLowerCase(char)
* @see Character#isUpperCase(char)
* @see Character#toLowerCase(char)
* @see Character#toTitleCase(char)
* @since 1.4
*/
static int toUpperCaseEx(int codePoint) {
assert isValidCodePoint(codePoint);
return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
}
/**
* Converts the character (Unicode code point) argument to uppercase using case
* mapping information from the SpecialCasing file in the Unicode
* specification. If a character has no explicit uppercase
* mapping, then the {@code char} itself is returned in the
* {@code char[]}.
*
* @param codePoint the character (Unicode code point) to be converted.
* @return a {@code char[]} with the uppercased character.
* @since 1.4
*/
static char[] toUpperCaseCharArray(int codePoint) {
// As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
assert isBmpCodePoint(codePoint);
return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
}
/**
* java中一个char占两个byte,16bit
*/
public static final int SIZE = 16;
/**
* 占的字节数
*/
public static final int BYTES = SIZE / Byte.SIZE;
/**
* 返回通过反转指定char值中的字节顺序获得的值。
*/
public static char reverseBytes(char ch) {
return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
}
/**
* 返回指定字符codePoint的Unicode名称,如果未分配代码点,则返回null。
*/
public static String getName(int codePoint) {
if (!isValidCodePoint(codePoint)) {
throw new IllegalArgumentException();
}
String name = CharacterName.get(codePoint);
if (name != null)
return name;
if (getType(codePoint) == UNASSIGNED)
return null;
Character.UnicodeBlock block = Character.UnicodeBlock.of(codePoint);
if (block != null)
return block.toString().replace('_', ' ') + " "
+ Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
// should never come here
return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
}
}
网友评论