使用差分金字塔提取高频成分DOG(Difference of Gaussian)
- 差分金字塔,DOG(Difference of Gaussian)金字塔是在高斯金字塔的基础上构建起来的,其实生成高斯金字塔的目的就是为了构建DOG金字塔。
import cv2
import numpy as np
import matplotlib.pyplot as plt#照例导入这三个库
# Grayscale
def BGR2GRAY(img):#图像转灰度
# Grayscale
gray = 0.2126 * img[..., 2] + 0.7152 * img[..., 1] + 0.0722 * img[..., 0]
return gray
# Bi-Linear interpolation
def bl_interpolate(img, ax=1., ay=1.):#缩放因子
if len(img.shape) > 2:
H, W, C = img.shape#原图像尺寸
else:
H, W = img.shape#原图像尺寸
C = 1
aH = int(ay * H)#缩放之后的图像尺寸
aW = int(ax * W)#缩放之后的图像尺寸
# get position of resized image获取缩放之后原像素所在位置
y = np.arange(aH).repeat(aW).reshape(aW, -1)#构造与缩放之后图像尺寸等大的矩阵
x = np.tile(np.arange(aW), (aH, 1))#构造与缩放之后图像尺寸等大的矩阵
# get position of original position
y = (y / ay)#缩放之后对应的y坐标
x = (x / ax)#缩放之后对应的x坐标
ix = np.floor(x).astype(np.int)#向下取整
iy = np.floor(y).astype(np.int)#向下取整
ix = np.minimum(ix, W-2)#划定边界
iy = np.minimum(iy, H-2)#划定边界
# get distance
dx = x - ix#计算dx的值
dy = y - iy#计算dy的值进行双线性插值
if C > 1:
dx = np.repeat(np.expand_dims(dx, axis=-1), C, axis=-1)
dy = np.repeat(np.expand_dims(dy, axis=-1), C, axis=-1)
# interpolation
out = (1-dx) * (1-dy) * img[iy, ix] + dx * (1 - dy) * img[iy, ix+1] + (1 - dx) * dy * img[iy+1, ix] + dx * dy * img[iy+1, ix+1]#计算输出矩阵
out = np.clip(out, 0, 255)
out = out.astype(np.uint8)
return out
# Read image
img = cv2.imread("img11.png").astype(np.float)#读取图像
gray = BGR2GRAY(img)#图像转灰度
# Bilinear interpolation
out = bl_interpolate(gray.astype(np.float32), ax=0.5, ay=0.5)#以0.5为因子进行缩放
# Bilinear interpolation
out = bl_interpolate(out, ax=2., ay=2.)#以2为因子进行缩放
out = np.abs(out - gray)#得到的图像矩阵减去原图,就得到了图像的边缘成分
out = out / out.max() * 255
out = out.astype(np.uint8)
# Save result
cv2.imshow("result", out)
cv2.waitKey(0)
cv2.imwrite("out.jpg", out)
高斯金字塔
- 高斯金字塔是用于生成不同分辨率图像的一种方法。基本原理即为利用插值法先将图像减小为原来的倍数,后续还可以利用插值将图像还原为原来大小然后与原图做减法,这样一操作,图像边缘部分就被提取出来了。高斯金字塔的方法也用于提高图像清晰度的超分辨率成像(Super-Resolution )深度学习方法。
import cv2
import numpy as np
import matplotlib.pyplot as plt
# Grayscale
def BGR2GRAY(img):
# Grayscale
gray = 0.2126 * img[..., 2] + 0.7152 * img[..., 1] + 0.0722 * img[..., 0]
return gray
# Bi-Linear interpolation
def bl_interpolate(img, ax=1., ay=1.):#该函数代码与上述代码完全一致,是
if len(img.shape) > 2:
H, W, C = img.shape
else:
H, W = img.shape
C = 1
aH = int(ay * H)
aW = int(ax * W)
# get position of resized image
y = np.arange(aH).repeat(aW).reshape(aW, -1)
x = np.tile(np.arange(aW), (aH, 1))
# get position of original position
y = (y / ay)
x = (x / ax)
ix = np.floor(x).astype(np.int)
iy = np.floor(y).astype(np.int)
ix = np.minimum(ix, W - 2)
iy = np.minimum(iy, H - 2)
# get distance
dx = x - ix
dy = y - iy
if C > 1:
dx = np.repeat(np.expand_dims(dx, axis=-1), C, axis=-1)
dy = np.repeat(np.expand_dims(dy, axis=-1), C, axis=-1)
# interpolation
out = (1 - dx) * (1 - dy) * img[iy, ix] + dx * (1 - dy) * img[iy, ix + 1] + (1 - dx) * dy * img[
iy + 1, ix] + dx * dy * img[iy + 1, ix + 1]
out = np.clip(out, 0, 255)
out = out.astype(np.uint8)
return out
# make image pyramid制作图像金字塔
def make_pyramid(gray):
# first element
pyramid = [gray]
# each scale
for i in range(1, 6):#制作5个不同分辨率下的图片,加上原图,一共刘张图片
# define scale
a = 2. ** i
# down scale
p = bl_interpolate(gray, ax=1. / a, ay=1. / a)#设置不同的缩放因子
# add pyramid list
pyramid.append(p)
return pyramid
# Read image
img = cv2.imread("img11.png").astype(np.float)
gray = BGR2GRAY(img)
# pyramid
pyramid = make_pyramid(gray)
for i in range(6):
cv2.imwrite("out_{}.jpg".format(2 ** i), pyramid[i].astype(np.uint8))
plt.subplot(1, 6, i + 1)
plt.imshow(pyramid[i], cmap='gray')
plt.axis('off')
plt.xticks(color="None")
plt.yticks(color="None")
plt.show()
显著图
- 显著图是将一副图像中容易吸引人的眼睛注意的部分(突出)表现的图像。
- 虽然现在通常使用深度学习的方法计算显著图,但是一开始人们用图像的RGB成分或者HSV成分创建高斯金字塔,并通过求差来得到显著图
算法原理
- 我们使用双线性插值调整图像大小至、、……一开始是缩放至。
- 将得到的金字塔(我们将金字塔的各层分别编号为0,1,2,3,4,5)两两求差。
- 将第2步中求得的差分全部相加,并正规化至[0,255]。
需要注意的是,图像尺寸最好在2的幂次方倍数,因为要层层除以2再乘以2,防止变换前后尺寸不对
import cv2
import numpy as np
import matplotlib.pyplot as plt
# Grayscale
def BGR2GRAY(img):
# Grayscale
gray = 0.2126 * img[..., 2] + 0.7152 * img[..., 1] + 0.0722 * img[..., 0]
return gray
# Bi-Linear interpolation
def bl_interpolate(img, ax=1., ay=1.):
if len(img.shape) > 2:
H, W, C = img.shape
else:
H, W = img.shape
C = 1
aH = int(ay * H)
aW = int(ax * W)
# get position of resized image
y = np.arange(aH).repeat(aW).reshape(aW, -1)
x = np.tile(np.arange(aW), (aH, 1)
# get position of original position
y = (y / ay)
x = (x / ax)
ix = np.floor(x).astype(np.int)
iy = np.floor(y).astype(np.int)
ix = np.minimum(ix, W - 2)
iy = np.minimum(iy, H - 2)
# get distance
dx = x - ix
dy = y - iy
if C > 1:
dx = np.repeat(np.expand_dims(dx, axis=-1), C, axis=-1)
dy = np.repeat(np.expand_dims(dy, axis=-1), C, axis=-1)
# interpolation
out = (1 - dx) * (1 - dy) * img[iy, ix] + dx * (1 - dy) * img[iy, ix + 1] + (1 - dx) * dy * img[
iy + 1, ix] + dx * dy * img[iy + 1, ix + 1]
out = np.clip(out, 0, 255)
out = out.astype(np.uint8)
return out
# make image pyramid
def make_pyramid(gray):#利用插值法,将图像缩小到1/2、1/4、1/8、1/16、1/32、1/64倍然后再把缩小的变为2、4、8、16、32、64倍,得到和原图同样大小但分辨率依次降低的图像,对他们编号
# first element
pyramid = [gray]
# each scale
for i in range(1, 6):
# define scale
a = 2. ** i
# down scale
p = bl_interpolate(gray, ax=1. / a, ay=1. / a)
# up scale
p = bl_interpolate(p, ax=a, ay=a)
# add pyramid list
pyramid.append(p.astype(np.float32))
return pyramid
# make saliency map
def saliency_map(pyramid):
# get shape
H, W = pyramid[0].shape
# prepare out image
out = np.zeros((H, W), dtype=np.float32)
# add each difference
out += np.abs(pyramid[0] - pyramid[1])
out += np.abs(pyramid[0] - pyramid[3])
out += np.abs(pyramid[0] - pyramid[5])
out += np.abs(pyramid[1] - pyramid[4])
out += np.abs(pyramid[2] - pyramid[3])
out += np.abs(pyramid[3] - pyramid[5])
# normalization
out = out / out.max() * 255#将差分图归一到0-255
return out
# Read image
img = cv2.imread("img11.png").astype(np.float)
# grayscale
gray = BGR2GRAY(img)
# pyramid
pyramid = make_pyramid(gray)
# pyramid -> saliency
out = saliency_map(pyramid)
out = out.astype(np.uint8)
# Save result
cv2.imshow("result", out)
cv2.waitKey(0)
cv2.imwrite("out.jpg", out)
网友评论