BoltDB（五）元素的查找

作者: wayyyy | 来源:发表于2022-08-15 00:47 被阅读0次

BoltDB（五）元素的查找
BoltDB（六）元素的插入
2020 算法列表查找
js
JavaScript基础 --- DOM操作
2018.7.21
( థ ౪ థ)梅西踢走我一套房
boltdb源码阅读
查找元素
查找元素

一个 Bucket 就是一棵 B+ 树，从元素的查找从上到下分为：

首先找到 Bucket 的根节点。也就是 B+ 树的根节点的 page id
取对应的 page，转化为内存中的 node
如果是非叶子节点，则按照key查找合适的叶子节点的 page id
重复2 3 直到找到叶子节点，返回node中对应val

// Get retrieves the value for a key in the bucket.
// Returns a nil value if the key does not exist or if the key is a nested bucket.
// The returned value is only valid for the life of the transaction.
func (b *Bucket) Get(key []byte) []byte {
    k, v, flags := b.Cursor().seek(key)
    // Return nil if this is a bucket.
    if (flags & bucketLeafFlag) != 0 {
        return nil
    }
    // If our target node isn't the same key as what's passed in then return nil.
    if !bytes.Equal(key, k) {
        return nil
    }
    return v
}

Cursor

对Bucket这颗b+树的遍历工作由Cursor来执行。一个Bucket对象关联一个Cursor

type Cursor struct {
    bucket *Bucket
    stack []elemRef     // 保存遍历搜索的路径
}

// elemRef represents a reference to an element on a given page/node.
type elemRef struct {
    page  *page    // 记录page
    node  *node    //  page对应的node 
    index int      // 记录key在叶子节点中的key-value 切片中的索引
}

// isLeaf returns whether the ref is pointing at a leaf page/node.
func (r *elemRef) isLeaf() bool {
    if r.node != nil {
        return r.node.isLeaf
    }
    return (r.page.flags & leafPageFlag) != 0
}

// count returns the number of inodes or page elements.
func (r *elemRef) count() int {
    if r.node != nil {
        return len(r.node.inodes)
    }
    return int(r.page.count)
}

Cursor 查找接口分为3类：

定位到某一个元素的位置

// Seek moves the cursor to a given key and returns it.
// If the key does not exist then the next key is used. If no keys
// follow, a nil key is returned.
// The returned key and value are only valid for the life of the transaction.
func (c *Cursor) Seek(seek []byte) (key []byte, value []byte)

// First moves the cursor to the first item in the bucket and returns its key and value.
// If the bucket is empty then a nil key and value are returned.
// The returned key and value are only valid for the life of the transaction.
func (c *Cursor) First() (key []byte, value []byte)

// Last moves the cursor to the last item in the bucket and returns its key and value.
// If the bucket is empty then a nil key and value are returned.
// The returned key and value are only valid for the life of the transaction.
func (c *Cursor) Last() (key []byte, value []byte)

在当前位置从前往后找

// Prev moves the cursor to the previous item in the bucket and returns its key and value.
// If the cursor is at the beginning of the bucket then a nil key and value are returned.
// The returned key and value are only valid for the life of the transaction.
func (c *Cursor) Prev() (key []byte, value []byte)

在当前位置从后往前找

// Next moves the cursor to the next item in the bucket and returns its key and value.
// If the cursor is at the end of the bucket then a nil key and value are returned.
// The returned key and value are only valid for the life of the transaction.
func (c *Cursor) Next() (key []byte, value []byte)

seek

// seek moves the cursor to a given key and returns it.
// If the key does not exist then the next key is used.
func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) {
    _assert(c.bucket.tx.db != nil, "tx closed")

    // Start from root page/node and traverse to correct page.
    c.stack = c.stack[:0]
    // 开始根据seek的key值搜索root
    c.search(seek, c.bucket.root)
    ref := &c.stack[len(c.stack)-1]

    // If the cursor is pointing to the end of page/node then return nil.
    if ref.index >= ref.count() {
        return nil, nil, 0
    }

    // If this is a bucket then return a nil value.
    return c.keyValue()
}

传入 c.serch 方法的参数就是要查找的 key 和 value

// 从根节点开始遍历
// search recursively performs a binary search against a given page/node until it finds a given key.
func (c *Cursor) search(key []byte, pgid pgid) {
    // root，3
    // 层层找page
    p, n := c.bucket.pageNode(pgid)
    if p != nil && (p.flags&(branchPageFlag|leafPageFlag)) == 0 {
        panic(fmt.Sprintf("invalid page type: %d: %x", p.id, p.flags))
    }
    e := elemRef{page: p, node: n}
    // 记录遍历过的路径
    c.stack = append(c.stack, e)
    // If we're on a leaf page/node then find the specific node.
    // 如果是叶子节点，找具体的值node
    if e.isLeaf() {
        c.nsearch(key)
        return
    }
    if n != nil {
        // 先搜索node，因为node是加载到内存中的
        c.searchNode(key, n)
        return
    }
    // 其次再在page中搜索
    c.searchPage(key, p)
}

c (b *Bucket) pageNode(id pgid) (*page, *node) {
    // Inline buckets have a fake page embedded in their value so treat them
    // differently. We'll return the rootNode (if available) or the fake page.
    // 内联页的话，就直接返回其page了
    if b.root == 0 {
        if id != 0 {
            panic(fmt.Sprintf("inline bucket non-zero page access(2): %d != 0", id))
        }
        if b.rootNode != nil {
            return nil, b.rootNode
        }
        return b.page, nil
    }
    // Check the node cache for non-inline buckets.
    if b.nodes != nil {
        if n := b.nodes[id]; n != nil {
            return nil, n
        }
    }
    // Finally lookup the page from the transaction if no node is materialized.
    return b.tx.page(id), nil
}

nsearch负责完成页内搜素，当key存在时，获取该key的index，当key不存在时，获取到第一个大于该key的index。

func (c *Cursor) nsearch(key []byte) {
    e := &c.stack[len(c.stack)-1]
    p, n := e.page, e.node
    // If we have a node then search its inodes.
    // 先搜索node
    if n != nil {
        //又是二分搜索
        index := sort.Search(len(n.inodes), func(i int) bool {
            return bytes.Compare(n.inodes[i].key, key) != -1
        })
        e.index = index
        return
    }
    // If we have a page then search its leaf elements.
    // 再搜索page
    inodes := p.leafPageElements()
    index := sort.Search(int(p.count), func(i int) bool {
        return bytes.Compare(inodes[i].key(), key) != -1
    })
    e.index = index
}

Cursor.stack 中保存了查找对应 key 的路径，栈顶保存了 key 所在的结点和位置:

func (c *Cursor) keyValue() ([]byte, []byte, uint32) {
  //最后一个节点为叶子节点
    ref := &c.stack[len(c.stack)-1]
    if ref.count() == 0 || ref.index >= ref.count() {
        return nil, nil, 0
    }
    // Retrieve value from node.
    // 先从内存中取
    if ref.node != nil {
        inode := &ref.node.inodes[ref.index]
        return inode.key, inode.value, inode.flags
    }
    // 其次再从文件page中取
    // Or retrieve value from page.
    elem := ref.page.leafPageElement(uint16(ref.index))
    return elem.key(), elem.value(), elem.flags
}

为什么这里需要栈这一数据结构？
因为Boltdb中的B+树也不是传统意义上的B+树，即叶子节点并没有连接起来，所以实现下面的功能，需要stack。