香农(Shannon)编码是一种常见的可变字长编码,其效率不高,实用性不大,但对其他编码方法有很好的理论指导意义。
实现步骤
- 将信源符号按概率从大到小顺序排列
- 计算第i个符号的累加概率
- 计算第i个符号对应的码字的码长(取整)
- 将累加概率变换成二进制小数,取小数点后k位数(k为对应的码长)作为第i个符号的码字。
源代码
类文件
import numpy as np
import math
import sys
class ShannonCoding:
""" 香农编码
parameters
----------
complement : int
编码进制
dim : int
信源维度
N : int
符号个数
symbols : list
符号列表
P : list
符号取值概率列表
cumulative_p : lsit
累加概率列表
L : list
码长列表
code : list
码字列表
H : float
信源熵
R : float
信息率
K : float
编码效率
----------
"""
def __init__(self, symbols, p, complement=2, dim=1):
p = np.array(p)
n = len(p)
if len(symbols) != n:
print('符号与取值概率个数不匹配!')
sys.exit(1)
# 按概率大小排序
for i in range(n):
for j in range(n - i - 1):
if p[j] <= p[j + 1]:
p[j], p[j + 1] = p[j + 1], p[j]
symbols[j], symbols[j + 1] = symbols[j + 1], symbols[j]
# 计算累加概率
cum_p = []
for i in range(n):
cum_p.append(0) if i == 0 else cum_p.append(cum_p[i - 1] + p[i - 1])
cum_p = np.array(cum_p)
# 计算码长序列
length = [int(math.ceil(math.log(1 / p[i], complement))) for i in range(n)]
# 编码
code = []
for i in range(n):
single_code = ''
t = cum_p[i]
for j in range(length[i]):
t = t * complement
t, z = math.modf(t)
single_code += str(int(z))
code.append(single_code)
hx = np.sum((-1) * np.log2(p) * p)
r = np.sum(np.array(length) * p) * math.log2(complement) / dim
k = hx / r
self.complement = complement # 编码进制
self.dim = dim # 信源维度
self.N = n # 符号个数
self.symbols = symbols # 符号列表
self.P = p # 符号取值概率
self.cumulative_p = cum_p # 累加概率
self.L = length # 码长列表
self.code = code # 码字列表
self.H = hx # 信源熵
self.R = r # 信息率
self.K = k # 编码效率
def encode(self, img, path='code.txt'):
""" 编码 """
c = ''
for point in list(img.flatten()):
for i in range(self.N):
if self.symbols[i] == point:
c += self.code[i]
f = open(path, 'w')
f.write(c)
f.close()
return c
def decode(self, c):
""" 解码 """
a = []
s = ''
loc = 0
while c != '':
s += c[loc]
loc += 1
for i in range(self.N):
if self.code[i] == s:
a.append(self.symbols[i])
c = c[loc:]
loc = 0
s = ''
break
return np.array(a)
def print_format(self, describe='Symbols'):
""" 格式化输出信息 """
print('{:<10}\t{:<20}\t{:<25}\t{:<10}\t{}'.
format(describe, 'Probability', 'Cumulative Probability', 'Length', 'Code'))
print('-' * 100)
if self.N > 15:
for i in range(5):
print('{:<10}\t{:<20}\t{:<25}\t{:<10}\t{}'.
format(self.symbols[i], self.P[i], self.cumulative_p[i], self.L[i], self.code[i]))
print('{:<10}\t{:<20}\t{:<25}\t{:<10}\t{}'.
format(' ...', ' ...', ' ...', ' ...', ' ...'))
for i in range(5):
print('{:<10}\t{:<20}\t{:<25}\t{:<10}\t{}'.
format(self.symbols[i-5], self.P[i-5], self.cumulative_p[i-5], self.L[i-5], self.code[i-5]))
else:
for i in range(self.N):
print('{:<10}\t{:<20}\t{:<25}\t{:<10}\t{}'.
format(self.symbols[i], self.P[i], self.cumulative_p[i], self.L[i], self.code[i]))
print('-' * 100)
print('编码效率\t', self.K)
print('\n\n')
测试文件
利用香农编码对一张黑白图片进行图像编码来测试刚刚编写的类文件。
import ShannonCoding
from skimage import data
import numpy as np
import matplotlib.pyplot as plt
import collections
plt.rcParams['font.sans-serif'] = ['SimHei']
img = data.moon()
# 统计色度频次
count = collections.Counter(list(img.flatten()))
# 色度列表
color = list(count.keys())
# 频次列表
number = list(count.values())
number = np.array(number)
# 计算概率列表
p = number / np.sum(number)
shannon = ShannonCoding.ShannonCoding(color, p)
# 图片的编码
total_code = shannon.encode(img)
# 获取图片高宽
height = img.shape[0]
weight = img.shape[1]
# 灰度列表
a = shannon.decode(total_code)
# 将列表还原为高x宽的矩阵
a = a.reshape(height, weight)
shannon.print_format('Gray')
print('压缩率:\t', len(total_code) / (height * weight * 8))
plt.subplot(121)
plt.title(u'原图像')
plt.imshow(img, cmap='gray')
plt.subplot(122)
plt.title(u'编码后还原图像')
plt.imshow(a, cmap='gray')
plt.show()
输出结果
Gray Probability Cumulative Probability Length Code
----------------------------------------------------------------------------------------------------
115 0.0888671875 0.0 4 0000
113 0.0818023681640625 0.0888671875 4 0001
112 0.0775299072265625 0.1706695556640625 4 0010
111 0.0677947998046875 0.248199462890625 4 0011
114 0.0666961669921875 0.3159942626953125 4 0101
... ... ... ... ...
204 3.0517578125e-05 0.999908447265625 15 111111111111101
255 1.52587890625e-05 0.99993896484375 16 1111111111111100
250 1.52587890625e-05 0.9999542236328125 16 1111111111111101
200 1.52587890625e-05 0.999969482421875 16 1111111111111110
13 1.52587890625e-05 0.9999847412109375 16 1111111111111111
----------------------------------------------------------------------------------------------------
编码效率 0.9130107091
压缩率: 0.6688022613525391
香农编码的编码效率比较低,但是一种无失真的熵编码,因此编码前后的图像是一样的。
编码前后图像的比较
网友评论