美文网首页
200行C++代码的Huffman编码实现

200行C++代码的Huffman编码实现

作者: Collie | 来源:发表于2019-08-15 18:04 被阅读0次

    好吧,其实这个说是C语言写的更合适,因为只是最后遍历Huffman编码树的时候用到了C++标准库的vector。是我学习《数据结构(C语言版)》(清华大学出版社,严蔚敏,吴伟民,1997.4)时编写的代码。如果从学习的角度,那么遍历生成编码时所用的栈,也应该自己实现。但是栈的存储结构一般用的就是链式线性表,所以没有必要自己再去实现,并且也非常易于实现。
    我不知道为什么该教材第148页可以只用不到30行就实现了。但是这本教材用的命名方式让我非常不舒服,于是我自己编写了包括KMP算法,Huffman编码的实现。

    #include <iostream>
    #include <vector>
    
    //考试的时候怎么可能足够时间去编写哈夫曼编码啊
    namespace huffman
    {
        typedef struct Node_s {
            struct {
                struct { struct Node_s *left, *right; } children;
                struct { struct Node_s *last, *next; } siblings;
            } relatedNodes;
    
            //dataFields
            double weight;//权重
            size_t symbolIndex;
        } Node_t;
    
        inline Node_t *initAsLinkedList(double *weights, size_t size)
        {
            if (size == 0 || weights == 0) return NULL;
            Node_t *current, *head;
            for (size_t i = 0; i < size; i++)
            {
                if (i == 0)
                {
                    //分配首个空间赋值给head
                    current = (Node_t *)malloc(sizeof(Node_t));
                    current->relatedNodes.siblings.last = NULL;
                    current->relatedNodes.siblings.next = NULL;
    
                    head = current;
                }
                else
                {
                    //创建并跳转到下一个结点
                    current->relatedNodes.siblings.next = (Node_t *)malloc(sizeof(Node_t));
                    current->relatedNodes.siblings.next->relatedNodes.siblings.last = current;
                    current = current->relatedNodes.siblings.next;
                }
                //在初始时,所有的结点都没有子树
                current->relatedNodes.children.left = current->relatedNodes.children.right = NULL;
                //赋值数据域
                current->weight = *(weights + i);
                current->symbolIndex = i;
            }
            current->relatedNodes.siblings.next = NULL;
            return head;
        }
    
        inline Node_t *combindNodes(Node_t *left, Node_t *right)
        {
            Node_t *newRoot = (Node_t *)malloc(sizeof(Node_t));
            newRoot->relatedNodes.siblings.last = newRoot->relatedNodes.siblings.next = NULL;
    
            left->relatedNodes.siblings.last = left->relatedNodes.siblings.next = NULL;
            right->relatedNodes.siblings.last = right->relatedNodes.siblings.next = NULL;
            newRoot->relatedNodes.children.left = left;
            newRoot->relatedNodes.children.right = right;
            newRoot->weight = left->weight + right->weight;
            newRoot->symbolIndex = -1;
            return newRoot;
        }
    
        inline Node_t *generateHuffmanTree(Node_t *initialLinkedList)
        {
            if (initialLinkedList == NULL) return NULL;
    
            if (initialLinkedList != NULL &&
                initialLinkedList->relatedNodes.siblings.next == NULL)
            {
                return initialLinkedList;
            }
            //main process
            Node_t *linkedList = initialLinkedList;
            while (true)
            {
                if (linkedList != NULL &&
                    linkedList->relatedNodes.siblings.next != NULL &&
                    linkedList->relatedNodes.siblings.next->relatedNodes.siblings.next == NULL)
                {
                    //the number of node in the linked list is 2
                    //just combind them together and return
                    return combindNodes(linkedList, linkedList->relatedNodes.siblings.next);
                }
                else
                {//the number of node in the linked list is more than 3 or equals 3
                    //搜索权值最小的两个结点
                    Node_t *smallest[2] = { linkedList,linkedList->relatedNodes.siblings.next };
                    for (Node_t *current = linkedList->relatedNodes.siblings.next->relatedNodes.siblings.next;
                        current != NULL; current = current->relatedNodes.siblings.next)
                    {
                        if (current->weight >= smallest[0]->weight &&
                            current->weight >= smallest[1]->weight) continue;
                        else
                        {
                            //用当前的替换掉最大的那一个
                            if (smallest[0]->weight < smallest[1]->weight)
                            {
                                smallest[1] = current;
                            }
                            else
                            {
                                smallest[0] = current;
                            }
                        }
                    }
    
                    //找到了最小的两个之后就先将两个结点从链表中移除
                    for (unsigned short int i = 0; i < 2; i++)
                    {
                        if (smallest[i]->relatedNodes.siblings.last == NULL)
                        {
                            //说明刚好是链表的头结点,那么需要改变链表的起始指针到下一个结点
                            smallest[i]->relatedNodes.siblings.next->relatedNodes.siblings.last = NULL;
                            linkedList = smallest[i]->relatedNodes.siblings.next;
                            smallest[i]->relatedNodes.siblings.last = 
                                smallest[i]->relatedNodes.siblings.next = NULL;
                        }
                        else if (smallest[i]->relatedNodes.siblings.next == NULL)
                        {
                            //说明刚好是链表的尾结点,那么需要将上一个置为尾结点
                            smallest[i]->relatedNodes.siblings.last->relatedNodes.siblings.next = NULL;
                            smallest[i]->relatedNodes.siblings.last =
                                smallest[i]->relatedNodes.siblings.next = NULL;
                        }
                        else
                        {
                            //既不是链表的头也不是链表的尾
                            smallest[i]->relatedNodes.siblings.last->relatedNodes.siblings.next =
                                smallest[i]->relatedNodes.siblings.next;
                            smallest[i]->relatedNodes.siblings.next->relatedNodes.siblings.last =
                                smallest[i]->relatedNodes.siblings.last;
                            smallest[i]->relatedNodes.siblings.last =
                                smallest[i]->relatedNodes.siblings.next = NULL;
                        }
                    }
                    //然后将生成的新结点加入链表(就加入链表的尾部吧)
                    for (Node_t *currentNode = linkedList;;
                        currentNode = currentNode->relatedNodes.siblings.next)
                    {
                        if (currentNode->relatedNodes.siblings.next == NULL)
                        {
                            //找到了最尾的一个结点
                            currentNode->relatedNodes.siblings.next = 
                                combindNodes(smallest[0], smallest[1]);
                            currentNode->relatedNodes.siblings.next->relatedNodes.siblings.last =
                                currentNode;
                            break;
                        }
                    }
                }
            }
        }
    
        //codePrefix相当于上层传下来的编码
        inline void encode(Node_t *node, std::vector<bool> &codePrefix)
        {
            //如果左子树和右子树都空,说明这是叶子结点
            if (node->relatedNodes.children.left == NULL&&
                node->relatedNodes.children.right == NULL)
            {
                std::cout << node->symbolIndex << " " << node->weight << "\t";
                for (std::vector<bool>::const_iterator bitIterator = codePrefix.begin();
                    bitIterator != codePrefix.end(); bitIterator++)
                {
                    std::cout << *bitIterator;
                }
                std::cout << std::endl << std::flush;
            }
            else
            {
                //说明这是内部结点
                //先处理左子树
                codePrefix.push_back(false);
                encode(node->relatedNodes.children.left, codePrefix);
                codePrefix.erase(codePrefix.end() - 1);
                codePrefix.push_back(true);
                encode(node->relatedNodes.children.right, codePrefix);
                codePrefix.erase(codePrefix.end() - 1);
            }
        }
    
        void test(Node_t *linkedList)
        {
            std::vector<bool> code;
            code.clear();
            encode(linkedList, code);
        }
    }
    

    相关文章

      网友评论

          本文标题:200行C++代码的Huffman编码实现

          本文链接:https://www.haomeiwen.com/subject/rmhtsctx.html