美文网首页MySQL
用Python实现一颗B树

用Python实现一颗B树

作者: RingKun | 来源:发表于2020-01-15 14:15 被阅读0次

    不了解B树的,可以先看下这边博客B树和B+树的插入、删除图文详解
    借一张图:

    834468-20180406232634472-395289491.png
    总体实现原理:
    • 插入:新增元素全部在叶子节点,个数达到阶数则分裂、进1到父节点,父节点满阶则继续分裂、进1,以此类推。总体上呈现为新增下沉,满阶上浮。
    • 删除:首先要弄清楚的是,关键字数=子节点数-1,每一个关键字对应有左右两个节点。找到要删除的节点关键字,取出对应该关键字的左右子节点,若左右节点包含的关键字总数量大于阶数则合并到一起重新分裂并上浮键值居中的关键字(和插入操作相同),否则随便上浮一个最接近的关键字到被删除位置即可。删除操作完全是插入的逆向过程,后续有时间再补上。

    实现:

    这里的阶数用的奇数,实际使用中,也不要用偶数恶心自己,毕竟数据库阶数一般都大于100.

    • 关键字
    class BKeyword(object):
        def __init__(self, key, data):
            self.key = key
            self.data = data
    
    • 结点
    class BNode(object):
        def __init__(self):
            self._parent: BNode = None
            self.keywords = []
            self.child_nodes = []
    
    • 完整代码
    
    from random import shuffle
    import random
    
    root_node = None
    
    _M = 5
    
    
    class Logger(object):
        @classmethod
        def tree(cls, node, child_name, dsc, depth):
            if depth == 0:
                head = "|   " * depth
                print(head + "+--" + dsc(node))
                depth = depth + 1
    
            for child in getattr(node, child_name):
                head = "|   " * depth
                print(head + "+--" + dsc(child))
                cls.tree(child, child_name, dsc, depth + 1)
    
    
    class BKeyword(object):
        def __init__(self, key, data):
            self.key = key
            self.data = data
    
    
    class BNode(object):
        def __init__(self):
            self._parent: BNode = None
            self.keywords = []
            self.child_nodes = []
    
        # 设置父节点
        def set_parent(self, node):
            self._parent = node
            if node.get_parent() is None:
                global root_node
                root_node = node.get_parent()
    
        # 获取父节点
        def get_parent(self):
            return self._parent
    
        # 添加子节点到指定位置
        def insert_child_node(self, index, add_node):
            add_node.set_parent(self)
            self.child_nodes.insert(index, add_node)
    
        # 添加子节点
        def append_child_node(self, add_node):
            add_node.set_parent(self)
            self.child_nodes.append(add_node)
    
        # 找到合适的插入点
        def find_add_index(self, add_word):
            if len(self.keywords) == 0:
                return 0
            index = 0
            while True:
                if index >= len(self.keywords):
                    break
                key = self.keywords[index].key
                if add_word.key < key:
                    break
                index = index + 1
            return index
    
        # 盲插,无论是否超出M值,先插入数据到合适位置
        def blind_add(self, word: BKeyword) -> int:
            index = self.find_add_index(word)
            self.keywords.insert(index, word)
    
        # 分裂
        def split(self):
            # 分裂节点
            parent, center_keyword, left_node, right_node = self.split_to_piece()
            # 添加分类后的两个新节点到父节点,并建立关系
            parent_add_index = parent.find_add_index(center_keyword)
            parent.insert_child_node(parent_add_index, right_node)
            parent.insert_child_node(parent_add_index, left_node)
            # 移除自身(包含M个关键字)
            if self in parent.child_nodes:
                parent.child_nodes.remove(self)
            # 注意:先解决子节点再解决父节点,否则会出问题
            parent.add_word(center_keyword, force=True)
            # 重新定义根结点
            root = self
            while root.get_parent() is not None:
                root = root.get_parent()
            global root_node
            root_node = root
    
        # 分裂成碎片
        def split_to_piece(self):
            center_keyword = self.keywords[int((_M-1)/2)]
            if self.get_parent() is None:
                self.set_parent(BNode())
            left_node = BNode()
            right_node = BNode()
            for keyword in self.keywords:
                if keyword.key < center_keyword.key:
                    left_node.keywords.append(keyword)
                elif keyword.key > center_keyword.key:
                    right_node.keywords.append(keyword)
            for i in range(len(self.child_nodes)):
                if i <= int((len(self.child_nodes) - 1)/2):
                    left_node.append_child_node(self.child_nodes[i])
                else:
                    right_node.append_child_node(self.child_nodes[i])
            return self.get_parent(), center_keyword, left_node, right_node
    
        # 新增关键字,force表示是否进位。默认添加到叶子结点,进位添加为直接添加,需要分裂则继续往父节点增加关键字。方向完全相反。
        def add_word(self, keyword, force=False):
            if keyword.key == 0:
                print("")
            # 叶子节点 或 进位添加
            if len(self.child_nodes) == 0 or force:
                if keyword.key == 20:
                    print("")
                self.blind_add(keyword)
                if len(self.keywords) == _M:
                    # 开始分裂
                    print("新增:" + str(keyword.key) + ", 达到m值,分裂")
                    self.split()
            else:
                # 非叶子节点
                index = self.find_add_index(keyword)
                if index >= len(self.child_nodes):
                    index = index - 1
                self.child_nodes[index].add_word(keyword)
    
    
    def print_tree(node):
    
        print("\n******************************")
    
        def dsc(node):
            s = ''
            for keyword in node.keywords:
                s = s + str(keyword.key) + ','
            # 打印内存地址
            # s = s + '[' + str(id(node)) + ']'
            s = s[:-1]
            return s
        Logger.tree(node, 'child_nodes', dsc,  0)
        print("******************************")
    
    
    def prepare():
        array = []
        number = 0
        for i in range(200):
            number = number + random.randint(1, 4)
            # number = number + 1
            array.append(number)
        shuffle(array)
        return array
    
    
    if __name__ == '__main__':
        root_node = BNode()
        array = prepare()
        for i in array:
            keyword = BKeyword(i, "数据" + str(i))
            root_node.add_word(keyword)
        print_tree(root_node)
    

    output:

    输出结果如下(横过来看,O(∩_∩)O哈哈~)

    +--97,202,341
    |   +--45,71
    |   |   +--6,14,27,35
    |   |   |   +--1,2
    |   |   |   +--10,12
    |   |   |   +--16,19,23
    |   |   |   +--31,34
    |   |   |   +--36,38,41
    |   |   +--51,61
    |   |   |   +--48,49,50
    |   |   |   +--53,54,58
    |   |   |   +--65,69
    |   |   +--79,89
    |   |   |   +--75,77
    |   |   |   +--83,87
    |   |   |   +--90,92,96
    |   +--126,158
    |   |   +--103,117
    |   |   |   +--98,99,101,102
    |   |   |   +--106,109,111,113
    |   |   |   +--121,125
    |   |   +--133,146
    |   |   |   +--127,129
    |   |   |   +--137,140,141,144
    |   |   |   +--148,152,156
    |   |   +--169,183,189
    |   |   |   +--159,162,166,167
    |   |   |   +--172,173,177,181
    |   |   |   +--186,188
    |   |   |   +--193,196,199
    |   +--247,292
    |   |   +--211,224,233,241
    |   |   |   +--205,206,208
    |   |   |   +--215,218,220
    |   |   |   +--225,226,227,231
    |   |   |   +--235,237
    |   |   |   +--245,246
    |   |   +--254,264,275,283
    |   |   |   +--251,252
    |   |   |   +--255,259,261
    |   |   |   +--268,272
    |   |   |   +--276,279
    |   |   |   +--285,288
    |   |   +--302,311,324,331
    |   |   |   +--293,297,298
    |   |   |   +--305,306,308
    |   |   |   +--314,317,320
    |   |   |   +--325,326,328
    |   |   |   +--335,337
    |   +--402,457,493
    |   |   +--353,364,372,388
    |   |   |   +--343,346,348,352
    |   |   |   +--356,359,363
    |   |   |   +--366,367,370
    |   |   |   +--376,380,382,386
    |   |   |   +--391,394,397,400
    |   |   +--409,420,429,447
    |   |   |   +--404,406,407
    |   |   |   +--411,415,416
    |   |   |   +--421,424,428
    |   |   |   +--433,437,441,443
    |   |   |   +--450,451,453
    |   |   +--465,474,481
    |   |   |   +--461,462
    |   |   |   +--467,468,470
    |   |   |   +--477,479
    |   |   |   +--485,489,491
    |   |   +--501,511
    |   |   |   +--494,497
    |   |   |   +--504,507
    |   |   |   +--514,518
    

    相关文章

      网友评论

        本文标题:用Python实现一颗B树

        本文链接:https://www.haomeiwen.com/subject/pfjtzctx.html