Redis sds

作者: 多多的大白 | 来源:发表于2019-08-15 10:57 被阅读0次

    一、SDS结构

    老版本
    // 3.0及以前
    //查看到redis2.6版本 sds.h/sdshdr
    struct sdshdr {
        // 记录buf数组中已使用字节数量
        unsigned int len;
        // 记录buf数组中未使用的字节数量
        unsigned int free;
        // 字节数组,存储字符串 
        //最后一个字节保存为空字符 '\0'
        char buf[];
    };
    
    sds优点
     频繁操作数据会对性能存在很大影响
    
    1. 空间预分配
      a、小于1MB SDS分配的len和free 是相同,如len=5 ,free=5,buf总长度为 5+5+1 ,其中1为结束符\0。
      b、大于1MB SDS分配的空间 3MB+1MB+1btype 每次只扩充1MB。
      
      在扩展SDS空间之前,SDS API会先检查未使用空间是否足够,如果足够的话,API就会直接使用未使用空间,而无须执行内存重分配。
      
      通过这种预分配策略,SDS将连续增长N次字符串所需的内存重分配次数从必定N次降低为最多N次。
      (空间就是这么节省的)
      
    
    1. 惰性空间释放
       sds在对字符串做缩短操作时候 程序并不立即使用内存重新分配来回收多出来的字符,而是通过free属性来记录起来多出来的字符长度,等待回来使用
    
    现在使用版本
    typedef char *sds;
    /* Note: sdshdr5 is never used, we just access the flags byte directly.
     * However is here to document the layout of type 5 SDS strings. */
    struct __attribute__ ((__packed__)) sdshdr5 {
        unsigned char flags; /* 3 lsb of type, and 5 msb of string length */
        char buf[];
    };
    struct __attribute__ ((__packed__)) sdshdr8 {
        uint8_t len; /* used */
        uint8_t alloc; /* excluding the header and null terminator */
        unsigned char flags; /* 3 lsb of type, 5 unused bits */
        char buf[];
    };
    struct __attribute__ ((__packed__)) sdshdr16 {
        uint16_t len; /* used */
        uint16_t alloc; /* excluding the header and null terminator */
        unsigned char flags; /* 3 lsb of type, 5 unused bits */
        char buf[];
    };
    struct __attribute__ ((__packed__)) sdshdr32 {
        uint32_t len; /* used */
        uint32_t alloc; /* excluding the header and null terminator */
        unsigned char flags; /* 3 lsb of type, 5 unused bits */
        char buf[];
    };
    struct __attribute__ ((__packed__)) sdshdr64 {
        uint64_t len; /* used */
        uint64_t alloc; /* excluding the header and null terminator */
        unsigned char flags; /* 3 lsb of type, 5 unused bits */
        char buf[];
    };
    
    
    1. len 记录当前字节数组的长度(不包括\0)
    2. alloc记录了当前字节数组总共分配的内存大小(不包括\0)
    3. flags记录了当前字节数组的属性、用来标识到底是sdshdr8还是sdshdr16等
    4. buf保存了字符串真正的值以及末尾的一个\0
    

    5种不同类型的数据结构分别对应不同长度的字符串需求

    static inline char sdsReqType(size_t string_size) {
        if (string_size < 1<<5)
            return SDS_TYPE_5;
        if (string_size < 1<<8)
            return SDS_TYPE_8;
        if (string_size < 1<<16)
            return SDS_TYPE_16;
    #if (LONG_MAX == LLONG_MAX)
        if (string_size < 1ll<<32)
            return SDS_TYPE_32;
    #endif
        return SDS_TYPE_64;
    }
    

    sdshdr5数据结构说明:
    没有alloc 和len

    原因:

    #define SDS_TYPE_5  0
    #define SDS_TYPE_8  1
    #define SDS_TYPE_16 2
    #define SDS_TYPE_32 3
    #define SDS_TYPE_64 4
    #define SDS_TYPE_MASK 7
    

    可以看出SDS_TYPE只占用了0,1,2,3,4五个数字,正好占用三位。

    由于sdshdr5的只用来存储长度为32字节以下的字符数,
    因此flags的5个bit就能满足长度记录,加上type所需的3bit,刚好为8bit一个字节。
    因此sdshdr5不需要单独的len记录长度,并且只有32个字节的存储空间,动态的变更内存余地较小,
    所以 redis 直接不存储alloc,当sdshdr5需要扩展时会直接变更成更大的SDS数据结构。
    除此之外,SDS都会多分配1个字节用来保存'\0'。

    SDS 创建

    /* Create a new sds string with the content specified by the 'init' pointer
     * and 'initlen'.
     * If NULL is used for 'init' the string is initialized with zero bytes.
     *
     * The string is always null-termined (all the sds strings are, always) so
     * even if you create an sds string with:
     *
     * mystring = sdsnewlen("abc",3);
     *
     * You can print the string with printf() as there is an implicit \0 at the
     * end of the string. However the string is binary safe and can contain
     * \0 characters in the middle, as the length is stored in the sds header. 
     */
    sds sdsnewlen(const void *init, size_t initlen) {
        void *sh;
        sds s;
        char type = sdsReqType(initlen);
        /* Empty strings are usually created in order to append. Use type 8
         * since type 5 is not good at this.
         直接强制赋值为SDS_TYPE_8
          */
        if (type == SDS_TYPE_5 && initlen == 0) type = SDS_TYPE_8;
        int hdrlen = sdsHdrSize(type);
        unsigned char *fp; /* flags pointer. */
        sh = s_malloc(hdrlen+initlen+1);
        if (!init)
            memset(sh, 0, hdrlen+initlen+1);
        if (sh == NULL) return NULL;
        s = (char*)sh+hdrlen;
        fp = ((unsigned char*)s)-1;
        switch(type) {
            case SDS_TYPE_5: {
                *fp = type | (initlen << SDS_TYPE_BITS);
                break;
            }
            case SDS_TYPE_8: {
                SDS_HDR_VAR(8,s);
                sh->len = initlen;
                sh->alloc = initlen;
                *fp = type;
                break;
            }
            case SDS_TYPE_16: {
                SDS_HDR_VAR(16,s);
                sh->len = initlen;
                sh->alloc = initlen;
                *fp = type;
                break;
            }
            case SDS_TYPE_32: {
                SDS_HDR_VAR(32,s);
                sh->len = initlen;
                sh->alloc = initlen;
                *fp = type;
                break;
            }
            case SDS_TYPE_64: {
                SDS_HDR_VAR(64,s);
                sh->len = initlen;
                sh->alloc = initlen;
                *fp = type;
                break;
            }
        }
        if (initlen && init)
            memcpy(s, init, initlen);
        s[initlen] = '\0';
        return s;
    }                        
    

    SDS扩容

    #define SDS_MAX_PREALLOC (1024*1024) 为1M
    
    /* Enlarge the free space at the end of the sds string so that the caller
     * is sure that after calling this function can overwrite up to addlen
     * bytes after the end of the string, plus one more byte for nul term.
     *
     * Note: this does not change the *length* of the sds string as returned
     * by sdslen(), but only the free buffer space we have. */
    sds sdsMakeRoomFor(sds s, size_t addlen) {
        void *sh, *newsh;
        size_t avail = sdsavail(s);
        size_t len, newlen;
        char type, oldtype = s[-1] & SDS_TYPE_MASK;
        int hdrlen;
        /* Return ASAP if there is enough space left. */
        if (avail >= addlen) return s;
        len = sdslen(s);
        sh = (char*)s-sdsHdrSize(oldtype);
        newlen = (len+addlen);
        if (newlen < SDS_MAX_PREALLOC)
            newlen *= 2;
        else
            newlen += SDS_MAX_PREALLOC;
        type = sdsReqType(newlen);
        /* Don't use type 5: the user is appending to the string and type 5 is
         * not able to remember empty space, so sdsMakeRoomFor() must be called
         * at every appending operation. */
        if (type == SDS_TYPE_5) type = SDS_TYPE_8;
        hdrlen = sdsHdrSize(type);
        if (oldtype==type) {
            newsh = s_realloc(sh, hdrlen+newlen+1);
            if (newsh == NULL) return NULL;
            s = (char*)newsh+hdrlen;
        } else {
            /* Since the header size changes, need to move the string forward,
             * and can't use realloc */
            newsh = s_malloc(hdrlen+newlen+1);
            if (newsh == NULL) return NULL;
            memcpy((char*)newsh+hdrlen, s, len+1);
            s_free(sh);
            s = (char*)newsh+hdrlen;
            s[-1] = type;
            sdssetlen(s, len);
        }
        sdssetalloc(s, newlen);
        return s;
    }
    
    该函数便是扩大sds空间,但是感觉上还是想让sds中available空间的大小能够容纳addlen大小的字符串,并不是改变了sds中buf的长度,
    而是改变了sds中available空间的大小,
    如果当前available空间的大小大于addlen的大小,那么便不作修;
    如果available空间的大小小于addlen的大小,那么就会重新分配sds中alloc的大小,
    newlen并不是无脑直接让alloc加上addlen,而且使用sds的长度加上addlen的长度
    作为newlen,但是经常重新分配内存会对效率有所影响,但是为了防止重新分配内存
    对效率的影响而让newlen无脑翻倍的话,又会对内存造成影响,造成内存占用过高,
    但是很大一部分内存并没有使用,所以取得了一个折中的办法,就是在newlen小于
    SDS_MAX_PREALLOC(1M),对newlen进行翻倍,
    在newlen大于SDS_MAX_PREALLOC的情况下,让newlen加上SDS_MAX_PREALLOC。
    
    符合前面提的空间预分配
    

    SDS惰性空间释放

    在SDS的字符串缩短操作中,多余出来的空间并不会直接释放,而是保留这部分空间,待以后再用
    
    /* Remove the part of the string from left and from right composed just of
     * contiguous characters found in 'cset', that is a null terminted C string.
     *
     * After the call, the modified sds string is no longer valid and all the
     * references must be substituted with the new pointer returned by the call.
     *
     * Example:
     *
     * s = sdsnew("AA...AA.a.aa.aHelloWorld     :::");
     * s = sdstrim(s,"Aa. :");
     * printf("%s\n", s);
     *
     * Output will be just "Hello World".
     */
    sds sdstrim(sds s, const char *cset) {
        char *start, *end, *sp, *ep;
        size_t len;
        sp = start = s;
        ep = end = s+sdslen(s)-1;
        while(sp <= end && strchr(cset, *sp)) sp++;
        while(ep > sp && strchr(cset, *ep)) ep--;
        len = (sp > ep) ? 0 : ((ep-sp)+1);
        if (s != sp) memmove(s, sp, len);
        s[len] = '\0';
        sdssetlen(s,len);
        return s;
    }
    

    真正将空间释放还是会根据实际字符串情况返回对应类型。

    例如以前是一个sdshdr64的sds,在redis运行过程中,buf的内容被修改了,变短了,那么多出来的内容就需要释放掉,还给系统,并且,如果修改得比较多,现在一个sdshdr16的sds就能容纳下,那么当前sds的type还会被修改,因为不同的sds类型占用的空间也是不一样的。

    /* Reallocate the sds string so that it has no free space at the end. The
     * contained string remains not altered, but next concatenation operations
     * will require a reallocation.
     *
     * After the call, the passed sds string is no longer valid and all the
     * references must be substituted with the new pointer returned by the call. */
    sds sdsRemoveFreeSpace(sds s) {
        void *sh, *newsh;
        char type, oldtype = s[-1] & SDS_TYPE_MASK;
        int hdrlen, oldhdrlen = sdsHdrSize(oldtype);
        size_t len = sdslen(s);
        sh = (char*)s-oldhdrlen;
        /* Check what would be the minimum SDS header that is just good enough to
         * fit this string. */
        type = sdsReqType(len);
        hdrlen = sdsHdrSize(type);
        /* If the type is the same, or at least a large enough type is still
         * required, we just realloc(), letting the allocator to do the copy
         * only if really needed. Otherwise if the change is huge, we manually
         * reallocate the string to use the different header type. */
        if (oldtype==type || type > SDS_TYPE_8) {
            newsh = s_realloc(sh, oldhdrlen+len+1);
            if (newsh == NULL) return NULL;
            s = (char*)newsh+oldhdrlen;
        } else {
            newsh = s_malloc(hdrlen+len+1);
            if (newsh == NULL) return NULL;
            memcpy((char*)newsh+hdrlen, s, len+1);
            s_free(sh);
            s = (char*)newsh+hdrlen;
            s[-1] = type;
            sdssetlen(s, len);
        }
        sdssetalloc(s, len);
        return s;
    
    
    总结:
    类似java 的ArrayList
    

    遗留问题

    1. Redis的embstr编码方式和raw编码方式分别是什么,区别是什么
    2. 为什么会sdstype5 不被使用

    相关文章

      网友评论

        本文标题:Redis sds

        本文链接:https://www.haomeiwen.com/subject/idiajctx.html