记得获取unicode 对应的二进制型这样写(这个貌似跟环境有关,使用前用characters_to_binary打印一下看是否能正确编码):
能正确编码直接用:
unicode:characters_to_binary("中文"),
不能正确编码的用下面这句:
erlang:list_to_binary(unicode:characters_to_list(“中文”)),
另外附上统计字符(中英文都算一个字符)的方法:
%% get_utf8_len(Unicode) -> int()
get_utf8_len(UnicodeStream) ->
{AsciiNum, Utf8Num} = utf8_len(UnicodeStream,0,0),
AsciiNum + Utf8Num.
utf8_len(<<>>,AsciiNum,Utf8Num) -> {AsciiNum,Utf8Num};
utf8_len(<<H:8,Rest/binary>>,AsciiNum,Utf8Num) when H =< 127 ->utf8_len(Rest,AsciiNum+1,Utf8Num);
utf8_len(<<H:8,_B:1/binary,Rest/binary>>,AsciiNum,Utf8Num) when H >= 192,H =< 223 -> utf8_len(Rest,AsciiNum,Utf8Num + 1);
utf8_len(<<H:8,_B:2/binary,Rest/binary>>,AsciiNum,Utf8Num) when H >= 224,H =< 239 -> utf8_len(Rest,AsciiNum,Utf8Num + 1);
utf8_len(<<H:8,_B:3/binary,Rest/binary>>,AsciiNum,Utf8Num) when H >= 240,H =< 247 -> utf8_len(Rest,AsciiNum,Utf8Num + 1);
utf8_len(<<H:8,_B:4/binary,Rest/binary>>,AsciiNum,Utf8Num) when H >= 248,H =< 251 -> utf8_len(Rest,AsciiNum,Utf8Num + 1);
utf8_len(<<H:8,_B:5/binary,Rest/binary>>,AsciiNum,Utf8Num) when H >= 252,H =< 253 -> utf8_len(Rest,AsciiNum,Utf8Num + 1);
utf8_len(<<_H:8,Rest/binary>>,AsciiNum,Utf8Num) -> utf8_len(Rest,AsciiNum+1,Utf8Num).
网友评论