测试在编码转换过程是否会丢失信息
事实证明是会的
但是只要在转换的过程 原字节不会发生变化 还是可以还原出原来的结果的
/**
* 编码之间的转换
*
* 只要在转换的过程 原字节不会发生变化 还是可以还原出原来的结果的
*/
public static void conversion() {
List<String> srcStrings = Arrays.asList("少年", "少µ年", "少");
Map<Charset, Charset[]> charsetMap = new LinkedHashMap<Charset, Charset[]>(){{
put(StandardCharsets.UTF_8, new Charset[]{StandardCharsets.UTF_8, new GBK(), StandardCharsets.ISO_8859_1});
put(new GBK(), new Charset[]{StandardCharsets.UTF_8, new GBK(), StandardCharsets.ISO_8859_1});
put(StandardCharsets.ISO_8859_1, new Charset[]{StandardCharsets.UTF_8, new GBK(), StandardCharsets.ISO_8859_1});
}};
charsetMap.forEach((key, value) -> {
Arrays.stream(value).forEach(
middleCharset->srcStrings.forEach(e -> translate(e, key, middleCharset)));
});
}
private static void translate(String src, Charset srcCharset, Charset middleCharset) {
log.info("src :{}, srcCharset: {}, middleCharset: {}",
src, srcCharset.name(), middleCharset.name());
byte[] uBytes = src.getBytes(srcCharset);
log.info("src bytes:{}",
HexUtil.encodeHexStr(uBytes));
String gStr = new String(uBytes, middleCharset);
log.info("after decode : {}", gStr);
byte[] afterGbkDecodeBytes = gStr.getBytes(middleCharset);
log.info("after middle conversion bytes:{}", HexUtil.encodeHexStr(afterGbkDecodeBytes));
String finalStr = new String(afterGbkDecodeBytes, srcCharset);
log.info("final String:{}", finalStr);
if (src.equals(finalStr)) {
log.error("[equals]--- src :{}, srcCharset: {}, middleCharset: {}",
src, srcCharset.name(), middleCharset.name());
}
}
结果:
src :少年, srcCharset: UTF-8, middleCharset: UTF-8
src bytes:e5b091e5b9b4
after decode : 少年
after middle conversion bytes:e5b091e5b9b4
final String:少年
[equals]--- src :少年, srcCharset: UTF-8, middleCharset: UTF-8
src :少µ年, srcCharset: UTF-8, middleCharset: UTF-8
src bytes:e5b091c2b5e5b9b4
after decode : 少µ年
after middle conversion bytes:e5b091c2b5e5b9b4
final String:少µ年
[equals]--- src :少µ年, srcCharset: UTF-8, middleCharset: UTF-8
src :少, srcCharset: UTF-8, middleCharset: UTF-8
src bytes:e5b091
after decode : 少
after middle conversion bytes:e5b091
final String:少
[equals]--- src :少, srcCharset: UTF-8, middleCharset: UTF-8
src :少年, srcCharset: UTF-8, middleCharset: GBK
src bytes:e5b091e5b9b4
after decode : 灏戝勾
after middle conversion bytes:e5b091e5b9b4
final String:少年
[equals]--- src :少年, srcCharset: UTF-8, middleCharset: GBK
src :少µ年, srcCharset: UTF-8, middleCharset: GBK
src bytes:e5b091c2b5e5b9b4
after decode : 灏懧靛勾
after middle conversion bytes:e5b091c2b5e5b9b4
final String:少µ年
[equals]--- src :少µ年, srcCharset: UTF-8, middleCharset: GBK
src :少, srcCharset: UTF-8, middleCharset: GBK
src bytes:e5b091
after decode : 灏�
after middle conversion bytes:e5b03f
final String:�?
src :少年, srcCharset: UTF-8, middleCharset: ISO-8859-1
src bytes:e5b091e5b9b4
after decode : �年
after middle conversion bytes:e5b091e5b9b4
final String:少年
[equals]--- src :少年, srcCharset: UTF-8, middleCharset: ISO-8859-1
src :少µ年, srcCharset: UTF-8, middleCharset: ISO-8859-1
src bytes:e5b091c2b5e5b9b4
after decode : �µ年
after middle conversion bytes:e5b091c2b5e5b9b4
final String:少µ年
[equals]--- src :少µ年, srcCharset: UTF-8, middleCharset: ISO-8859-1
src :少, srcCharset: UTF-8, middleCharset: ISO-8859-1
src bytes:e5b091
after decode : å°�
after middle conversion bytes:e5b091
final String:少
[equals]--- src :少, srcCharset: UTF-8, middleCharset: ISO-8859-1
src :少年, srcCharset: GBK, middleCharset: UTF-8
src bytes:c9d9c4ea
after decode : ����
after middle conversion bytes:efbfbdefbfbdefbfbdefbfbd
final String:锟斤拷锟斤拷
src :少µ年, srcCharset: GBK, middleCharset: UTF-8
src bytes:c9d93fc4ea
after decode : ��?��
after middle conversion bytes:efbfbdefbfbd3fefbfbdefbfbd
final String:锟斤拷?锟斤拷
src :少, srcCharset: GBK, middleCharset: UTF-8
src bytes:c9d9
after decode : ��
after middle conversion bytes:efbfbdefbfbd
final String:锟斤拷
src :少年, srcCharset: GBK, middleCharset: GBK
src bytes:c9d9c4ea
after decode : 少年
after middle conversion bytes:c9d9c4ea
final String:少年
[equals]--- src :少年, srcCharset: GBK, middleCharset: GBK
src :少µ年, srcCharset: GBK, middleCharset: GBK
src bytes:c9d93fc4ea
after decode : 少?年
after middle conversion bytes:c9d93fc4ea
final String:少?年
src :少, srcCharset: GBK, middleCharset: GBK
src bytes:c9d9
after decode : 少
after middle conversion bytes:c9d9
final String:少
[equals]--- src :少, srcCharset: GBK, middleCharset: GBK
src :少年, srcCharset: GBK, middleCharset: ISO-8859-1
src bytes:c9d9c4ea
after decode : ÉÙÄê
after middle conversion bytes:c9d9c4ea
final String:少年
[equals]--- src :少年, srcCharset: GBK, middleCharset: ISO-8859-1
src :少µ年, srcCharset: GBK, middleCharset: ISO-8859-1
src bytes:c9d93fc4ea
after decode : ÉÙ?Äê
after middle conversion bytes:c9d93fc4ea
final String:少?年
src :少, srcCharset: GBK, middleCharset: ISO-8859-1
src bytes:c9d9
after decode : ÉÙ
after middle conversion bytes:c9d9
final String:少
[equals]--- src :少, srcCharset: GBK, middleCharset: ISO-8859-1
src :少年, srcCharset: ISO-8859-1, middleCharset: UTF-8
src bytes:3f3f
after decode : ??
after middle conversion bytes:3f3f
final String:??
src :少µ年, srcCharset: ISO-8859-1, middleCharset: UTF-8
src bytes:3fb53f
after decode : ?�?
after middle conversion bytes:3fefbfbd3f
final String:?�?
src :少, srcCharset: ISO-8859-1, middleCharset: UTF-8
src bytes:3f
after decode : ?
after middle conversion bytes:3f
final String:?
src :少年, srcCharset: ISO-8859-1, middleCharset: GBK
src bytes:3f3f
after decode : ??
after middle conversion bytes:3f3f
final String:??
src :少µ年, srcCharset: ISO-8859-1, middleCharset: GBK
src bytes:3fb53f
after decode : ?�?
after middle conversion bytes:3f3f3f
final String:???
src :少, srcCharset: ISO-8859-1, middleCharset: GBK
src bytes:3f
after decode : ?
after middle conversion bytes:3f
final String:?
src :少年, srcCharset: ISO-8859-1, middleCharset: ISO-8859-1
src bytes:3f3f
after decode : ??
after middle conversion bytes:3f3f
final String:??
src :少µ年, srcCharset: ISO-8859-1, middleCharset: ISO-8859-1
src bytes:3fb53f
after decode : ?µ?
after middle conversion bytes:3fb53f
final String:?µ?
src :少, srcCharset: ISO-8859-1, middleCharset: ISO-8859-1
src bytes:3f
after decode : ?
after middle conversion bytes:3f
final String:?
网友评论