使用赫夫曼算法实现压缩
步骤:
-
压缩一个字符串 "i like like like java do you like a java"
-
将字符串变成一个 byte 数组 byte[]
byte[]
-
统计字符串中各各字母的个数,并返回一个List集合
public static List<Node> getNode(byte[] str){
//通过map来统计个数
HashMap<Byte, Integer> map = new HashMap<>();
for (byte b : str) {
Integer count = map.get(b);
if (count==null){
map.put(b,1);
}else {
map.put(b,count+1);
}
}
ArrayList<Node> nodes = new ArrayList<>();
for (Map.Entry<Byte, Integer> entry : map.entrySet()) {
nodes.add(new Node(entry.getKey(),entry.getValue()));
}
return nodes;
}
- 通过List集合构建赫夫曼树()需要一个Node节点
class Node implements Comparable<Node>{
Byte data;
int weight;
Node left;
Node right;
public Node() {
}
public Node(Byte data, int weight) {
this.data = data;
this.weight = weight;
}
public Node(int weight, Node left, Node right) {
this.weight = weight;
this.left = left;
this.right = right;
}
@Override
public int compareTo(Node o) {
return this.weight-o.weight;
}
@Override
public String toString() {
return "Node{" +
"data=" + data +
", weight=" + weight +
'}';
}
//前序遍历
public void preOrder(){
System.out.println(this);
if (this.left!=null){
this.left.preOrder();
}
if (this.right!=null){
this.right.preOrder();
}
}
}
- 创建赫夫曼树
//创建赫夫曼树
public static Node createHuffmanTree(List<Node> nodes){
while (nodes.size()>1){
//排序
Collections.sort(nodes);
//取出前两个节点,也就是最小的两个节点
Node left = nodes.get(0);
Node right = nodes.get(1);
//创建父节点
Node parent = new Node(left.weight+right.weight,left,right);
//将取出的两个节点在集合中删除
nodes.remove(left);
nodes.remove(right);
//将父加点加入集合
nodes.add(parent);
}
//返回集合中唯一的节点
return nodes.get(0);
}
- 获取赫夫曼编码
static Map<Byte,String> huffmanMap = new HashMap<Byte,String>();
static StringBuilder stringBuilder = new StringBuilder();
/**
*获取赫夫曼编码
* @param node 传入的节点
* @param code 路径:左:0 右:1
* @param stringBuilder 用于拼接路径
*/
private static void getCode(Node node,String code,StringBuilder stringBuilder){
StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);
stringBuilder2.append(code);
if (node!=null){
if (node.data==null){
getCode(node.left,"0",stringBuilder2);
getCode(node.right,"1",stringBuilder2);
}else {
huffmanMap.put(node.data,stringBuilder2.toString());
}
}
}
//重构一下getCode
private static Map<Byte,String> getCode(Node root){
if (root==null){
return null;
}
getCode(root.left,"0",stringBuilder);
getCode(root.right,"1",stringBuilder);
return huffmanMap;
}
- 进行赫夫曼压缩
/**
* 进行赫夫曼压缩
* @param huffmanCode
* @param bytes
* @return
*/
private static byte[] zip(Map<Byte,String> huffmanCode,byte[] bytes){
StringBuilder stringBuilder = new StringBuilder();
//将原来的二进制的码转为赫夫曼压缩过的码
for (byte aByte : bytes) {
stringBuilder.append(huffmanCode.get(aByte));
}
//求出编码的长度,便于创建数组
int len = 0;
if (stringBuilder.length()%8==0){
len = stringBuilder.length()/8;
}else {
len = stringBuilder.length()/8+1;
}
byte[] by= new byte[len];
int index = 0;
for (int i = 0; i < stringBuilder.length(); i+=8) {
if (i+8>stringBuilder.length()){
by[index] = (byte) Integer.parseInt(stringBuilder.substring(i),2);
}else {
by[index] = (byte) Integer.parseInt(stringBuilder.substring(i,i+8),2);
}
index++;
}
return by;
}
- 对以上函数进行整合
private static byte[] huffmanZip(byte[] content){
// 将内容封装成一个集合
List<Node> nodes = getNode(content);
//将内容变成一个赫夫曼树
Node huffmanTree = createHuffmanTree(nodes);
//获取赫夫曼编码
Map<Byte, String> huffmanCode = getCode(huffmanTree);
//压缩
byte[] zip = zip(huffmanCode, content);
return zip;
}
- 压缩文件
/**
* 压缩文件
*/
private static void huffmanFileZip(String src,String dst){
InputStream is = null;
OutputStream os = null;
ObjectOutputStream oos = null;
try {
is = new FileInputStream(new File(src));
byte[] b = new byte[is.available()];
is.read(b);
byte[] bytes = huffmanZip(b);
os = new FileOutputStream(new File(dst));
oos = new ObjectOutputStream(os);
oos.writeObject(bytes);
oos.writeObject(huffmanMap);
}catch (Exception e) {
e.printStackTrace();
} finally {
try {
oos.close();
os.close();
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
解压:
- (准备)将一个比特转为一个二进制的字符串
//将一个比特转为一个二进制的字符串
private static String byteToString(boolean flag,byte b){
int temp = b;
if (flag){
temp |=256;
}
String str = Integer.toBinaryString(temp);
if (flag||temp<0){
return str.substring(str.length()-8);
}else {
return str;
}
}
- 对压缩数据的解码
/**
* 完成对压缩数据的解码
* @param huffmanCodes 赫夫曼编码
* @param huffmanBytes 压缩后的编码
* @return
*/
private static byte[] decode(Map<Byte,String> huffmanCodes,byte[] huffmanBytes){
// 将byte转为一个二进制的字符串
StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < huffmanBytes.length; i++) {
//判断是否为最后一位
Boolean flag = (i==huffmanBytes.length-1);
String string = byteToString(!flag, huffmanBytes[i]);
stringBuilder.append(string);
}
// 获取反向的哈夫曼编码表
Map<String, Byte> map = new HashMap<String, Byte>();
for (Map.Entry<Byte, String> entry : huffmanCodes.entrySet()) {
map.put(entry.getValue(),entry.getKey());
}
// 通过哈夫曼编码表将二进制字符串转为原来的二进制
List<Byte> list = new ArrayList<Byte>();
for (int i = 0; i < stringBuilder.length();) {
int count = 1;//小计数器
Byte temp;
while (true){
temp = map.get(stringBuilder.substring(i, i + count));
if (temp==null){
count++;
}else {
break;//推出循环
}
}
list.add(temp);
i += count;
}
byte[] b = new byte[list.size()];
for (int i = 0; i < list.size(); i++) {
b[i] = list.get(i);
}
return b;
}
- 解压文件
/**
* 解压文件
*/
private static void unZipFile(String src,String dst){
InputStream is = null;
ObjectInputStream ois = null;
OutputStream os = null;
try {
is = new FileInputStream(src);
ois = new ObjectInputStream(is);
byte[] bytes = (byte[]) ois.readObject();
Map<Byte,String> map = (Map<Byte, String>) ois.readObject();
byte[] decode = decode(map, bytes);
os = new FileOutputStream(dst);
os.write(decode);
} catch (Exception e) {
e.printStackTrace();
}finally {
try {
os.close();
ois.close();
is.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
网友评论