好吧,其实这个说是C语言写的更合适,因为只是最后遍历Huffman编码树的时候用到了C++标准库的vector。是我学习《数据结构(C语言版)》(清华大学出版社,严蔚敏,吴伟民,1997.4)时编写的代码。如果从学习的角度,那么遍历生成编码时所用的栈,也应该自己实现。但是栈的存储结构一般用的就是链式线性表,所以没有必要自己再去实现,并且也非常易于实现。
我不知道为什么该教材第148页可以只用不到30行就实现了。但是这本教材用的命名方式让我非常不舒服,于是我自己编写了包括KMP算法,Huffman编码的实现。
#include <iostream>
#include <vector>
//考试的时候怎么可能足够时间去编写哈夫曼编码啊
namespace huffman
{
typedef struct Node_s {
struct {
struct { struct Node_s *left, *right; } children;
struct { struct Node_s *last, *next; } siblings;
} relatedNodes;
//dataFields
double weight;//权重
size_t symbolIndex;
} Node_t;
inline Node_t *initAsLinkedList(double *weights, size_t size)
{
if (size == 0 || weights == 0) return NULL;
Node_t *current, *head;
for (size_t i = 0; i < size; i++)
{
if (i == 0)
{
//分配首个空间赋值给head
current = (Node_t *)malloc(sizeof(Node_t));
current->relatedNodes.siblings.last = NULL;
current->relatedNodes.siblings.next = NULL;
head = current;
}
else
{
//创建并跳转到下一个结点
current->relatedNodes.siblings.next = (Node_t *)malloc(sizeof(Node_t));
current->relatedNodes.siblings.next->relatedNodes.siblings.last = current;
current = current->relatedNodes.siblings.next;
}
//在初始时,所有的结点都没有子树
current->relatedNodes.children.left = current->relatedNodes.children.right = NULL;
//赋值数据域
current->weight = *(weights + i);
current->symbolIndex = i;
}
current->relatedNodes.siblings.next = NULL;
return head;
}
inline Node_t *combindNodes(Node_t *left, Node_t *right)
{
Node_t *newRoot = (Node_t *)malloc(sizeof(Node_t));
newRoot->relatedNodes.siblings.last = newRoot->relatedNodes.siblings.next = NULL;
left->relatedNodes.siblings.last = left->relatedNodes.siblings.next = NULL;
right->relatedNodes.siblings.last = right->relatedNodes.siblings.next = NULL;
newRoot->relatedNodes.children.left = left;
newRoot->relatedNodes.children.right = right;
newRoot->weight = left->weight + right->weight;
newRoot->symbolIndex = -1;
return newRoot;
}
inline Node_t *generateHuffmanTree(Node_t *initialLinkedList)
{
if (initialLinkedList == NULL) return NULL;
if (initialLinkedList != NULL &&
initialLinkedList->relatedNodes.siblings.next == NULL)
{
return initialLinkedList;
}
//main process
Node_t *linkedList = initialLinkedList;
while (true)
{
if (linkedList != NULL &&
linkedList->relatedNodes.siblings.next != NULL &&
linkedList->relatedNodes.siblings.next->relatedNodes.siblings.next == NULL)
{
//the number of node in the linked list is 2
//just combind them together and return
return combindNodes(linkedList, linkedList->relatedNodes.siblings.next);
}
else
{//the number of node in the linked list is more than 3 or equals 3
//搜索权值最小的两个结点
Node_t *smallest[2] = { linkedList,linkedList->relatedNodes.siblings.next };
for (Node_t *current = linkedList->relatedNodes.siblings.next->relatedNodes.siblings.next;
current != NULL; current = current->relatedNodes.siblings.next)
{
if (current->weight >= smallest[0]->weight &&
current->weight >= smallest[1]->weight) continue;
else
{
//用当前的替换掉最大的那一个
if (smallest[0]->weight < smallest[1]->weight)
{
smallest[1] = current;
}
else
{
smallest[0] = current;
}
}
}
//找到了最小的两个之后就先将两个结点从链表中移除
for (unsigned short int i = 0; i < 2; i++)
{
if (smallest[i]->relatedNodes.siblings.last == NULL)
{
//说明刚好是链表的头结点,那么需要改变链表的起始指针到下一个结点
smallest[i]->relatedNodes.siblings.next->relatedNodes.siblings.last = NULL;
linkedList = smallest[i]->relatedNodes.siblings.next;
smallest[i]->relatedNodes.siblings.last =
smallest[i]->relatedNodes.siblings.next = NULL;
}
else if (smallest[i]->relatedNodes.siblings.next == NULL)
{
//说明刚好是链表的尾结点,那么需要将上一个置为尾结点
smallest[i]->relatedNodes.siblings.last->relatedNodes.siblings.next = NULL;
smallest[i]->relatedNodes.siblings.last =
smallest[i]->relatedNodes.siblings.next = NULL;
}
else
{
//既不是链表的头也不是链表的尾
smallest[i]->relatedNodes.siblings.last->relatedNodes.siblings.next =
smallest[i]->relatedNodes.siblings.next;
smallest[i]->relatedNodes.siblings.next->relatedNodes.siblings.last =
smallest[i]->relatedNodes.siblings.last;
smallest[i]->relatedNodes.siblings.last =
smallest[i]->relatedNodes.siblings.next = NULL;
}
}
//然后将生成的新结点加入链表(就加入链表的尾部吧)
for (Node_t *currentNode = linkedList;;
currentNode = currentNode->relatedNodes.siblings.next)
{
if (currentNode->relatedNodes.siblings.next == NULL)
{
//找到了最尾的一个结点
currentNode->relatedNodes.siblings.next =
combindNodes(smallest[0], smallest[1]);
currentNode->relatedNodes.siblings.next->relatedNodes.siblings.last =
currentNode;
break;
}
}
}
}
}
//codePrefix相当于上层传下来的编码
inline void encode(Node_t *node, std::vector<bool> &codePrefix)
{
//如果左子树和右子树都空,说明这是叶子结点
if (node->relatedNodes.children.left == NULL&&
node->relatedNodes.children.right == NULL)
{
std::cout << node->symbolIndex << " " << node->weight << "\t";
for (std::vector<bool>::const_iterator bitIterator = codePrefix.begin();
bitIterator != codePrefix.end(); bitIterator++)
{
std::cout << *bitIterator;
}
std::cout << std::endl << std::flush;
}
else
{
//说明这是内部结点
//先处理左子树
codePrefix.push_back(false);
encode(node->relatedNodes.children.left, codePrefix);
codePrefix.erase(codePrefix.end() - 1);
codePrefix.push_back(true);
encode(node->relatedNodes.children.right, codePrefix);
codePrefix.erase(codePrefix.end() - 1);
}
}
void test(Node_t *linkedList)
{
std::vector<bool> code;
code.clear();
encode(linkedList, code);
}
}
网友评论