一个 Bucket 就是一棵 B+ 树,从元素的查找从上到下分为:
- 首先找到 Bucket 的根节点。也就是 B+ 树的根节点的
page id
- 取对应的 page,转化为内存中的
node
- 如果是非叶子节点,则按照
key
查找合适的叶子节点的page id
- 重复2 3 直到找到叶子节点,返回
node
中对应val
// Get retrieves the value for a key in the bucket.
// Returns a nil value if the key does not exist or if the key is a nested bucket.
// The returned value is only valid for the life of the transaction.
func (b *Bucket) Get(key []byte) []byte {
k, v, flags := b.Cursor().seek(key)
// Return nil if this is a bucket.
if (flags & bucketLeafFlag) != 0 {
return nil
}
// If our target node isn't the same key as what's passed in then return nil.
if !bytes.Equal(key, k) {
return nil
}
return v
}
Cursor
对Bucket这颗b+树的遍历工作由Cursor来执行。一个Bucket对象关联一个Cursor
type Cursor struct {
bucket *Bucket
stack []elemRef // 保存遍历搜索的路径
}
// elemRef represents a reference to an element on a given page/node.
type elemRef struct {
page *page // 记录page
node *node // page对应的node
index int // 记录key在叶子节点中的key-value 切片中的索引
}
// isLeaf returns whether the ref is pointing at a leaf page/node.
func (r *elemRef) isLeaf() bool {
if r.node != nil {
return r.node.isLeaf
}
return (r.page.flags & leafPageFlag) != 0
}
// count returns the number of inodes or page elements.
func (r *elemRef) count() int {
if r.node != nil {
return len(r.node.inodes)
}
return int(r.page.count)
}
Cursor 查找接口 分为3类:
-
定位到某一个元素的位置
// Seek moves the cursor to a given key and returns it. // If the key does not exist then the next key is used. If no keys // follow, a nil key is returned. // The returned key and value are only valid for the life of the transaction. func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) // First moves the cursor to the first item in the bucket and returns its key and value. // If the bucket is empty then a nil key and value are returned. // The returned key and value are only valid for the life of the transaction. func (c *Cursor) First() (key []byte, value []byte) // Last moves the cursor to the last item in the bucket and returns its key and value. // If the bucket is empty then a nil key and value are returned. // The returned key and value are only valid for the life of the transaction. func (c *Cursor) Last() (key []byte, value []byte)
-
在当前位置从前往后找
// Prev moves the cursor to the previous item in the bucket and returns its key and value. // If the cursor is at the beginning of the bucket then a nil key and value are returned. // The returned key and value are only valid for the life of the transaction. func (c *Cursor) Prev() (key []byte, value []byte)
-
在当前位置从后往前找
// Next moves the cursor to the next item in the bucket and returns its key and value. // If the cursor is at the end of the bucket then a nil key and value are returned. // The returned key and value are only valid for the life of the transaction. func (c *Cursor) Next() (key []byte, value []byte)
seek
// seek moves the cursor to a given key and returns it.
// If the key does not exist then the next key is used.
func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) {
_assert(c.bucket.tx.db != nil, "tx closed")
// Start from root page/node and traverse to correct page.
c.stack = c.stack[:0]
// 开始根据seek的key值搜索root
c.search(seek, c.bucket.root)
ref := &c.stack[len(c.stack)-1]
// If the cursor is pointing to the end of page/node then return nil.
if ref.index >= ref.count() {
return nil, nil, 0
}
// If this is a bucket then return a nil value.
return c.keyValue()
}
传入 c.serch
方法的参数就是要查找的 key 和 value
// 从根节点开始遍历
// search recursively performs a binary search against a given page/node until it finds a given key.
func (c *Cursor) search(key []byte, pgid pgid) {
// root,3
// 层层找page
p, n := c.bucket.pageNode(pgid)
if p != nil && (p.flags&(branchPageFlag|leafPageFlag)) == 0 {
panic(fmt.Sprintf("invalid page type: %d: %x", p.id, p.flags))
}
e := elemRef{page: p, node: n}
// 记录遍历过的路径
c.stack = append(c.stack, e)
// If we're on a leaf page/node then find the specific node.
// 如果是叶子节点,找具体的值node
if e.isLeaf() {
c.nsearch(key)
return
}
if n != nil {
// 先搜索node,因为node是加载到内存中的
c.searchNode(key, n)
return
}
// 其次再在page中搜索
c.searchPage(key, p)
}
c (b *Bucket) pageNode(id pgid) (*page, *node) {
// Inline buckets have a fake page embedded in their value so treat them
// differently. We'll return the rootNode (if available) or the fake page.
// 内联页的话,就直接返回其page了
if b.root == 0 {
if id != 0 {
panic(fmt.Sprintf("inline bucket non-zero page access(2): %d != 0", id))
}
if b.rootNode != nil {
return nil, b.rootNode
}
return b.page, nil
}
// Check the node cache for non-inline buckets.
if b.nodes != nil {
if n := b.nodes[id]; n != nil {
return nil, n
}
}
// Finally lookup the page from the transaction if no node is materialized.
return b.tx.page(id), nil
}
nsearch
负责完成页内搜素,当key存在时,获取该key的index,当key不存在时,获取到第一个大于该key的index。
func (c *Cursor) nsearch(key []byte) {
e := &c.stack[len(c.stack)-1]
p, n := e.page, e.node
// If we have a node then search its inodes.
// 先搜索node
if n != nil {
//又是二分搜索
index := sort.Search(len(n.inodes), func(i int) bool {
return bytes.Compare(n.inodes[i].key, key) != -1
})
e.index = index
return
}
// If we have a page then search its leaf elements.
// 再搜索page
inodes := p.leafPageElements()
index := sort.Search(int(p.count), func(i int) bool {
return bytes.Compare(inodes[i].key(), key) != -1
})
e.index = index
}
Cursor.stack
中保存了查找对应 key 的路径,栈顶保存了 key 所在的结点和位置:
func (c *Cursor) keyValue() ([]byte, []byte, uint32) {
//最后一个节点为叶子节点
ref := &c.stack[len(c.stack)-1]
if ref.count() == 0 || ref.index >= ref.count() {
return nil, nil, 0
}
// Retrieve value from node.
// 先从内存中取
if ref.node != nil {
inode := &ref.node.inodes[ref.index]
return inode.key, inode.value, inode.flags
}
// 其次再从文件page中取
// Or retrieve value from page.
elem := ref.page.leafPageElement(uint16(ref.index))
return elem.key(), elem.value(), elem.flags
}
为什么这里需要栈这一数据结构?
因为Boltdb中的B+树也不是传统意义上的B+树,即叶子节点并没有连接起来,所以实现下面的功能,需要stack。
参考资料:
1、https://juejin.cn/post/6999262524411494430
2、https://youjiali1995.github.io/storage/boltdb/
网友评论