美文网首页
大端 Unicode 字符串

大端 Unicode 字符串

作者: 自由快挂 | 来源:发表于2017-05-03 14:21 被阅读26次

    首先,lua 内部使用 utf8 存储字符串。

    -- 转换成大端字符串
    local function utf8_to_unicode(convertStr)
        if type(convertStr)~="string" then
            return convertStr
        end
    
        local resultStr=""
        local i=1
        local num1=string.byte(convertStr,i)
    
        while num1~=nil do
            local tempVar1,tempVar2
    
            if num1 >= 0x00 and num1 <= 0x7f then
                tempVar1=num1
                tempVar2=0
            elseif bit.band(num1,0xe0)== 0xc0 then
                local t1 = 0
                local t2 = 0
                t1 = bit.band(num1,bit.rshift(0xff,3))
                i=i+1
                num1=string.byte(convertStr,i)
                t2 = bit.band(num1,bit.rshift(0xff,2))
    
                tempVar1=bit.bor(t2,bit.lshift(bit.band(t1,bit.rshift(0xff,6)),6))
                tempVar2=bit.rshift(t1,2)
            elseif bit.band(num1,0xf0)== 0xe0 then
                local t1 = 0
                local t2 = 0
                local t3 = 0
                t1 = bit.band(num1,bit.rshift(0xff,3))
                i=i+1
                num1=string.byte(convertStr,i)
                t2 = bit.band(num1,bit.rshift(0xff,2))
                i=i+1
                num1=string.byte(convertStr,i)
                t3 = bit.band(num1,bit.rshift(0xff,2))
    
                tempVar1=bit.bor(bit.lshift(bit.band(t2,bit.rshift(0xff,6)),6),t3)
                tempVar2=bit.bor(bit.lshift(t1,4),bit.rshift(t2,2))
    
            end
    
            resultStr=resultStr..string.format("\\u%02x%02x",tempVar2,tempVar1) -- tempVar1, tempVar2 就是小端结果
    
            i=i+1
            num1=string.byte(convertStr,i)
        end
        return resultStr
    end
    

    使用:

    local ret = string.upper(string.gsub(utf8_to_unicode('吉米 abc'), '\\u', ''))
    print(ret)
    

    配合 java 验证:

    public static void printbyte(byte[] bt)  
    {  
          for (int i = 0; i < bt.length; i++){  
           int hex = (int)bt[i] & 0xff;  
           System.out.print(Integer.toHexString(hex) + " ");  
          }  
           System.out.println(" length = "+bt.length);  
        }  
      
       @Test  
    public void test1() throws UnsupportedEncodingException {  
          
        String name = "杨彬abc";  
        // TODO Auto-generated method stub  
         byte[] defaultBytes = name.getBytes();  
         printbyte(defaultBytes);  
         byte[] utf_8 = name.getBytes("utf-8");  
         printbyte(utf_8);  
         byte[] utf_16be = name.getBytes("utf-16be");  
         printbyte(utf_16be);  
         byte[] utf_16le = name.getBytes("utf-16le");  
         printbyte(utf_16le);  
           
         byte[] gbk = name.getBytes("gbk");  
         printbyte(gbk);  
               /* 
                *  
                e6 9d a8 e5 bd ac 61 62 63  length = 9   默认 
                e6 9d a8 e5 bd ac 61 62 63  length = 9 utf-8             汉字占三个字节 
                而utf-8 采用  3 个字节存储汉字  1个字节存储英文字符 
                unicode 采用两个字节存储汉字及英文字符  
                67 68 5f 6c 0 61 0 62 0 63  length = 10 大端方式    汉字占两个字节 
                68 67 6c 5f 61 0 62 0 63 0  length = 10 小端方式 
                gbk采用两个字节存储汉字   
                d1 ee b1 f2 61 62 63  length = 7    
                 */  
         //网络编程socket 通讯 针对字符串采用  utf-16le 这种方式存储  
    }  
    

    http://www.cocoachina.com/bbs/read.php?tid-312194-page-1.html
    http://yangbinfx.iteye.com/blog/1768501
    http://blog.csdn.net/operhero1990/article/details/47044697

    相关文章

      网友评论

          本文标题:大端 Unicode 字符串

          本文链接:https://www.haomeiwen.com/subject/ckwktxtx.html