redis ziplist（9）

2023年8月22日 309次阅读来源: lmem

首先要了解设计思想！！！
在 Redis 中，list 有两种存储方式：双链表（LinkedList）和压缩双链表（ziplist）。双链表即普通数据结构中遇到的，在 adlist.h 和 adlist.c 中实现。压缩双链表以连续的内存空间来表示双链表，压缩双链表节省前驱和后驱指针的空间（8b），这在小的list 上，压缩效率是非常明显的，因为一个普通的双链表中，前驱后驱指针在 64 位机器上需分别占用 8B；压缩双链表在 ziplist.h 和 ziplist.c 中实现。这篇主要详述压缩双链表，普通双链表可以参看其他
在压缩双链表中，节省了前驱和后驱指针的空间，在 64 位机器上共节省了 8 个字节，这让数据在内存中更为紧凑。只要清晰的描述每个数据项的边界，就可以轻易得到前驱后驱数据项的位置，ziplist 就是这么做的。

1.ziplist 的数据格式

<zlbytes><zltail><zllen><entry>...<entry><zlend>

2. entry 的数据格式

|<prelen><<encoding+lensize><len>><data>|
|---1----------------2--------------3---|
prelen 1 或5 字节
<encoding+lensize><len> 1 或 5 字节

域 1）是前驱数据项的大小。因为不用描述前驱的数据类型，描述较为简单。
域 2）是此数据项的的类型和数据大小。为了节省空间，redis 预设定了多种长度的字符串和整数。
3 种长度的字符串：

#define ZIP_STR_06B (0 << 6)
#define ZIP_STR_14B (1 << 6)
#define ZIP_STR_32B (2 << 6)

5 种长度的整数：

#define ZIP_INT_16B (0xc0 | 0<<4)
#define ZIP_INT_32B (0xc0 | 1<<4)
#define ZIP_INT_64B (0xc0 | 2<<4)
#define ZIP_INT_24B (0xc0 | 3<<4)
#define ZIP_INT_8B 0xfe

头部信息中的另一个值记录着编码的方式，当编码的是字符串，头部的前2位为00,01,10共3种  
 * 如果编码的是整型数字的时候，则头部的前2位为11,代表的是整数编码，后面2位代表什么类型整型值将会在头部后面被编码  
 * 00-int16_t, 01-int32_t, 10-int64_t, 11-24 bit signed，还有比较特殊的2个，11111110-8 bit signed,  
 * 1111 0000 - 1111 1101,代表的是整型值0-12,头尾都已经存在，都不能使用，与传统的通过固定的指针表示长度，这么做的好处实现  
 * 可以更合理的分配内存  
 *  
 * String字符串编码的3种形式  
 * |00pppppp| - 1 byte  
 *      String value with length less than or equal to 63 bytes (6 bits).  
 * |01pppppp|qqqqqqqq| - 2 bytes  
 *      String value with length less than or equal to 16383 bytes (14 bits).  
 * |10______|qqqqqqqq|rrrrrrrr|ssssssss|tttttttt| - 5 bytes  
 *      String value with length greater than or equal to 16384 bytes.  
 * |11000000| - 1 byte  
 *      Integer encoded as int16_t (2 bytes).  
 * |11010000| - 1 byte  
 *      Integer encoded as int32_t (4 bytes).  
 * |11100000| - 1 byte  
 *      Integer encoded as int64_t (8 bytes).  
 * |11110000| - 1 byte  
 *      Integer encoded as 24 bit signed (3 bytes).  
 * |11111110| - 1 byte  
 *      Integer encoded as 8 bit signed (1 byte).  
 * |1111xxxx| - (with xxxx between 0000 and 1101) immediate 4 bit integer.  
 *      Unsigned integer from 0 to 12. The encoded value is actually from  
 *      1 to 13 because 0000 and 1111 can not be used, so 1 should be  
 *      subtracted from the encoded 4 bit value to obtain the right value.  
 * |11111111| - End of ziplist.  
 *

3.插入操作

/* Insert item at "p". */
static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {
    size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), reqlen;
    unsigned int prevlensize, prevlen = 0;
    size_t offset;
    int nextdiff = 0;
    unsigned char encoding = 0;
    long long value = 123456789; /* initialized to avoid warning. Using a value
                                    that is easy to see if for some reason
                                    we use it uninitialized. */
    zlentry tail;

    /* Find out prevlen for the entry that is inserted. */
    if (p[0] != ZIP_END) {
        ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);
    } else {
        unsigned char *ptail = ZIPLIST_ENTRY_TAIL(zl);//获取尾部数据
        if (ptail[0] != ZIP_END) {
            prevlen = zipRawEntryLength(ptail);
        }
    }

    /* See if the entry can be encoded */
    if (zipTryEncoding(s,slen,&value,&encoding)) {
        /* 'encoding' is set to the appropriate integer encoding */
        reqlen = zipIntSize(encoding);
    } else {
        /* 'encoding' is untouched, however zipEncodeLength will use the
         * string length to figure out how to encode it. */
        reqlen = slen;
    }
    /* We need space for both the length of the previous entry and
     * the length of the payload. */
    reqlen += zipPrevEncodeLength(NULL,prevlen);
    reqlen += zipEncodeLength(NULL,encoding,slen);

    /* When the insert position is not equal to the tail, we need to
     * make sure that the next entry can hold this entry's length in
     * its prevlen field. */
    // 确保下一个的prelen字段可以使用插入值
    nextdiff = (p[0] != ZIP_END) ? zipPrevLenByteDiff(p,reqlen) : 0;

    /* Store offset because a realloc may change the address of zl. */
    offset = p-zl;
    zl = ziplistResize(zl,curlen+reqlen+nextdiff);//改变大小
    p = zl+offset;//定位到p

    /* Apply memory move when necessary and update tail offset. */
    if (p[0] != ZIP_END) {
        /* Subtract one because of the ZIP_END bytes */
        //腾出位置
        //dest src len
        memmove(p+reqlen,p-nextdiff,curlen-offset-1+nextdiff);

        /* Encode this entry's raw length in the next entry. */
        zipPrevEncodeLength(p+reqlen,reqlen);

        /* Update offset for tail */
        ZIPLIST_TAIL_OFFSET(zl) =
            intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+reqlen);

        /* When the tail contains more than one entry, we need to take
         * "nextdiff" in account as well. Otherwise, a change in the
         * size of prevlen doesn't have an effect on the *tail* offset. */
        zipEntry(p+reqlen, &tail);
        if (p[reqlen+tail.headersize+tail.len] != ZIP_END) {
            ZIPLIST_TAIL_OFFSET(zl) =
                intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
        }
    } else {
        /* This element will be the new tail. */
        ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(p-zl);
    }

    /* When nextdiff != 0, the raw length of the next entry has changed, so
     * we need to cascade the update throughout the ziplist */
    if (nextdiff != 0) {
        offset = p-zl;
        zl = __ziplistCascadeUpdate(zl,p+reqlen);
        p = zl+offset;
    }

    /* Write the entry */
    //把前一个的lenth写入p，返回字节大小
    p += zipPrevEncodeLength(p,prevlen);
    //写入 slen,返回当前entry所占用的长度1,或5
    p += zipEncodeLength(p,encoding,slen);
    //写入数据，字符串直接拷贝，int需要单独判断长度
    if (ZIP_IS_STR(encoding)) {
        memcpy(p,s,slen);
    } else {
        zipSaveInteger(p,value,encoding);
    }
    ZIPLIST_INCR_LENGTH(zl,1);
    return zl;
}

4. 查找

/* Find pointer to the entry equal to the specified entry. Skip 'skip' entries
 * between every comparison. Returns NULL when the field could not be found.
 * |<prelen><<encoding+lensize><len>><data>|
 * |---1----------------2--------------3---|*/
//skip 每隔skip个跳过一个
unsigned char *ziplistFind(unsigned char *p, unsigned char *vstr, unsigned int vlen, unsigned int skip) {
    int skipcnt = 0;
    unsigned char vencoding = 0;
    long long vll = 0;

    while (p[0] != ZIP_END) {//最后一个字节结束标识为111111111
        unsigned int prevlensize, encoding, lensize, len;
        unsigned char *q;
        // prevlensize 1或者5
        // |00pppppp| - 1 byte
        //*      String value with length less than or equal to 63 bytes (6 bits).
        //* |01pppppp|qqqqqqqq| - 2 bytes
        //*      String value with length less than or equal to 16383 bytes (14 bits).
        //* |10______|qqqqqqqq|rrrrrrrr|ssssssss|tttttttt| - 5 bytes
        ZIP_DECODE_PREVLENSIZE(p, prevlensize);
        //encoding 编码方式1字节 lensize len的字节大小 len 数据的长度
        ZIP_DECODE_LENGTH(p + prevlensize, encoding, lensize, len);
        //指针移动到数据位置
        q = p + prevlensize + lensize;

        if (skipcnt == 0) {//可以取数据判断了
            /* Compare current entry with specified entry */
            if (ZIP_IS_STR(encoding)) {
                //字符串比较
                if (len == vlen && memcmp(q, vstr, vlen) == 0) {
                    return p;
                }
            } else {
                /* Find out if the searched field can be encoded. Note that
                 * we do it only the first time, once done vencoding is set
                 * to non-zero and vll is set to the integer value. */
                if (vencoding == 0) {
                    //长度vlen，获取整数的编码vencoding，以及值vll
                    if (!zipTryEncoding(vstr, vlen, &vll, &vencoding)) {
                        /* If the entry can't be encoded we set it to
                         * UCHAR_MAX so that we don't retry again the next
                         * time. */
                        vencoding = UCHAR_MAX;
                    }
                    /* Must be non-zero by now */
                    assert(vencoding);
                }

                /* Compare current entry with specified entry, do it only
                 * if vencoding != UCHAR_MAX because if there is no encoding
                 * possible for the field it can't be a valid integer. */
                if (vencoding != UCHAR_MAX) {
                    // 链表中的value
                    long long ll = zipLoadInteger(q, encoding);
                    if (ll == vll) {
                        return p;
                    }
                }
            }

            /* Reset skip count */
            skipcnt = skip;
        } else {
            /* Skip entry */
            skipcnt--;
        }

        /* Move to next entry */
        p = q + len;
    }

    return NULL;
}

    原文作者：lmem
    原文地址: https://www.jianshu.com/p/d23a7ff42f2b
    本文转自网络文章，转载此文章仅为分享知识，如有侵权，请联系博主进行删除。