美文网首页
c++实现UTF-16转UCS4

c++实现UTF-16转UCS4

作者: 一路向后 | 来源:发表于2021-11-23 22:32 被阅读0次

    1.UTF-16编码规则

    在UTF-16中,表示数据有单16位和双16位(32位)两种,1101 10xx xxxx xxxx,区间就是D800~DBFF为前16位,1101 11xx xxxx xxxx,区间就是DC00~DFFF为后16位,其余为单16位。

    2.源码实现

    #include <iostream>
    #include <cstring>
    
    using namespace std;
    
    class UTF16 {
    public:
        static int toUCS4(const unsigned short *utf16, unsigned short *ucs4);
    };
    
    int UTF16::toUCS4(const unsigned short *utf16, unsigned short *ucs4)
    {
        if(utf16[0] >= 0xd800 && utf16[0] <= 0xdfff)
        {
            if(utf16[0] < 0xdc00)
            {
                if(utf16[1] >= 0xdc00 && utf16[1] <= 0xdfff)
                {
                    ucs4[1] = (utf16[0] & 0x3ff);
                    ucs4[0] = (utf16[1] & 0x3ff);
                    ucs4[0] = ((ucs4[1] << 10) | ucs4[0]);
                    ucs4[1] = ((ucs4[1] >> 6) | 1);
    
                    //printf("%04x\n", ucs4[0]);
                    //printf("%04x\n", ucs4[1]);
    
                    return 2;
                }
    
                return -1;
            }
    
            return -1;
        }
        else
        {
            ucs4[0] = utf16[0];
            ucs4[1] = 0x00;
        }
    
        return 1;
    }
    
    int main()
    {
        const unsigned short utf16[4] = {0xd802, 0xdc01, 0x00, 0x00};
        unsigned short ucs4[2];
        unsigned int *p = (unsigned int *)ucs4;
    
        UTF16::toUCS4(utf16, ucs4);
    
        printf("%08x\n", *p);
    
        return 0;
    }
    

    3.编译源码

    $ g++ -o test test.cpp -std=c++11
    

    4.运行及其结果

    $ ./test
    00010801
    

    相关文章

      网友评论

          本文标题:c++实现UTF-16转UCS4

          本文链接:https://www.haomeiwen.com/subject/chygtrtx.html