美文网首页python日常学习
【平均、感知、差异】哈希算法 +余弦+直方图距离筛选相似帧

【平均、感知、差异】哈希算法 +余弦+直方图距离筛选相似帧

作者: 小小杨树 | 来源:发表于2021-10-07 11:06 被阅读0次

    一.构造【平均、感知、差异】哈希算法 +余弦+直方图距离 特征获取方式

    1.计算平均哈希算法相似度(ahash)
    # 正则化图像
    def regularizeImage(img, size = (8, 8)):
        return img.resize(size).convert('L')
    
    # 计算hash值
    def getHashCode(img, size = (8, 8)):
    
        pixel = []
        for i in range(size[0]):
            for j in range(size[1]):
                pixel.append(img.getpixel((i, j)))
    
        mean = sum(pixel) / len(pixel)
    
        result = []
        for i in pixel:
            if i > mean:
                result.append(1)
            else:
                result.append(0)
        
        return result
    
    # 比较hash值
    def compHashCode(hc1, hc2):
        cnt = 0
        for i, j in zip(hc1, hc2):
            if i == j:
                cnt += 1
        return cnt
    
    # 计算平均哈希算法相似度
    def calaHashSimilarity(img1, img2):
        img1 = regularizeImage(img1)
        img2 = regularizeImage(img2)
        hc1 = getHashCode(img1)
        hc2 = getHashCode(img2)
        return compHashCode(hc1, hc2)
    
    2.感知哈希计算(phash)
    import math
    import unittest
    
    # 正则化图像
    def regularizeImage(img, size = (32, 32)):
        return img.resize(size).convert('L')
    
    # 获得图像像素矩阵
    def getMatrix(img):
        matrix = []
        size = img.size
        for i in range(size[1]):
            pixel = []
            for j in range(size[0]):
                pixel.append(img.getpixel((j, i)))
            matrix.append(pixel)
        return matrix
    
    # 计算系数矩阵
    def getCoefficient(length):
        matrix = []
        sqr = 1.0 / math.sqrt(length)
        value = []
        for i in range(length):
            value.append(sqr)
        matrix.append(value)
        for i in range(1, length):
            value = []
            for j in range(0, length):
                value.append(math.sqrt(2.0 / length) * math.cos(i * math.pi * (j + 0.5) / length))
            matrix.append(value)
        return matrix
    
    # 计算矩阵转秩
    def getTranspose(matrix):
        new_matrix = []
        for i in range(len(matrix)):
            value = []
            for j in range(len(matrix[i])):
                value.append(matrix[j][i])
            new_matrix.append(value)
        return new_matrix
    
    # 计算矩阵乘法
    def getMultiply(matrix1, matrix2):
        new_matrix = []
        for i in range(len(matrix1)):
            value = []
            for j in range(len(matrix2[i])): 
                ans = 0.0
                for h in range(len(matrix1[i])):
                    ans += matrix1[i][h] * matrix2[h][j]
                value.append(ans)
            new_matrix.append(value)
        return new_matrix
    
    # 计算DCT
    def DCT(matrix):
        length = len(matrix)
        A = getCoefficient(length)
        AT = getTranspose(A)
        temp = getMultiply(A, matrix)
        DCT_matrix = getMultiply(matrix, AT)
        return DCT_matrix
    
    # 计算左上角8*8并转化为list
    def submatrix_list(matrix, size = (8, 8)):
        value = []
        for i in range(size[0]):
            for j in range(size[1]):
                value.append(matrix[i][j])
        return value
    
    # 计算hash值
    def getHashCode(sub_list):
        length = len(sub_list)
        mean = sum(sub_list) / length
        
        result = []
        for i in sub_list:
            if i > mean:
                result.append(1)
            else:
                result.append(0)
    
        return result
    
    # 比较hash值
    def compHashCode(hc1, hc2):
        cnt = 0
        for i, j in zip(hc1, hc2):
            if i == j:
                cnt += 1
        return cnt
    
    # 计算感知哈希算法相似度
    def calpHashSimilarity(img1, img2):
        img1 = regularizeImage(img1)
        img2 = regularizeImage(img2)
    
        matrix1 = getMatrix(img1)
        matrix2 = getMatrix(img2)
    
        DCT1 = DCT(matrix1)
        DCT2 = DCT(matrix2)
        
        sub_list1 = submatrix_list(DCT1)
        sub_list2 = submatrix_list(DCT2)
    
        hc1 = getHashCode(sub_list1)
        hc2 = getHashCode(sub_list2)
        return compHashCode(hc1, hc2)
    

    3.获取图像直方图距离

    # 正则化图像
    def regularizeImage(img, size = (256, 256)):
        return img.resize(size).convert('RGB')
    
    # 分块图像4x4
    def splitImage(img, part_size = (64, 64)):
        w, h = img.size
        pw, ph = part_size
        data = []
        for i in range(0, w, pw):
            for j in range(0, h, ph):
                data.append(img.crop((i, j, i + pw, j + ph)).copy())
        return data
    
    # 利用单块图片的直方图距离计算相似度
    def calSingleHistogramSimilarity(hg1, hg2):
        if len(hg1) != len(hg2):
            raise Exception('样本点个数不一样')
        sum = 0
        for x1, x2 in zip(hg1, hg2):
            if x1 != x2:
                sum += 1 - float(abs(x1 - x2) / max(x1, x2))
            else:
                sum += 1
        return sum / len(hg1)
    
    # 利用分块图片的直方图距离计算相似度
    def calMultipleHistogramSimilarity(img1, img2):
        answer = 0
        for sub_img1, sub_img2 in zip(splitImage(img1), splitImage(img2)):
            answer += calSingleHistogramSimilarity(sub_img1.histogram(), sub_img2.histogram())
        return float(answer / 16.0)
    

    4.差异哈希算法(dhash)

    # 正则化图像
    def regularizeImage(img, size=(9, 8)):
        return img.resize(size).convert('L')
    
    # 计算hash值
    def getHashCode(img, size = (9, 8)):
    
        result = []
        for i in range(size[0] - 1):
            for j in range(size[1]):
                current_val = img.getpixel((i, j))
                next_val = img.getpixel((i + 1, j))
                if current_val > next_val:
                    result.append(1)
                else:
                    result.append(0)
        
        return result
    
    # 比较hash值
    def compHashCode(hc1, hc2):
        cnt = 0
        for i, j in zip(hc1, hc2):
            if i == j:
                cnt += 1
        return cnt
    
    # 计算差异哈希算法相似度
    def caldHashSimilarity(img1, img2):
        img1 = regularizeImage(img1)
        img2 = regularizeImage(img2)
        hc1 = getHashCode(img1)
        hc2 = getHashCode(img2)
        return compHashCode(hc1, hc2)
    

    5.余弦计算(co)

    from PIL import Image
    from numpy import average, linalg, dot
    
    
    def get_thumbnail(image, size=(608, 608), greyscale=False):
        image = image.resize(size, Image.ANTIALIAS)
        if greyscale:
            image = image.convert('L')
        return image
    
    
    def image_similarity_vectors_via_numpy(image1, image2):
        image1 = get_thumbnail(image1)
        image2 = get_thumbnail(image2)
        images = [image1, image2]
        vectors = []
        norms = []
        for image in images:
            vector = []
            for pixel_tuple in image.getdata():
                vector.append(average(pixel_tuple))
            vectors.append(vector)
            norms.append(linalg.norm(vector, 2))
        a, b = vectors
        a_norm, b_norm = norms
        res = dot(a / a_norm, b / b_norm)
        return res
    

    二.构造【平均、感知、差异】哈希算法 +余弦+直方图距离计算汉明距离方式,这五大类计算方法构造完毕可方便于我们后期调用,在这里为了后面的方便使用,我统一使用OPENCV读取图像。

    import histogram as htg
    import aHash as ah
    import pHash as ph
    import dHash as dh
    import co
    
    
    def dsh(img1, img2):
        # print('依据差异哈希算法计算相似度:{}/{}'.format(dh.caldHashSimilarity(img1, img2), 64))
        dHash_Calculation = dh.caldHashSimilarity(img1, img2) / 64
        dHash_Calculation = round(dHash_Calculation, 3)  # 差异哈希算法计算相似度
        return dHash_Calculation
    
    
    def cin(img1, img2):
        cosin = co.image_similarity_vectors_via_numpy(img1, img2)  # 获取两张图的cosin值
        cosin = round(cosin, 3)
        return cosin
    
    
    def psh(img1, img2):
        # print('依据感知哈希算法计算相似度:{}/{}'.format(ph.calpHashSimilarity(img1, img2), 64))
        pHash_Calculation = ph.calpHashSimilarity(img1, img2) / 64
        pHash_Calculation = round(pHash_Calculation, 3)  # 感知哈希算法计算相似度
        return pHash_Calculation
    
    
    def ash(img1, img2):
        # print('依据平均哈希算法计算相似度:{}/{}'.format(ah.calaHashSimilarity(img1, img2), 64))
        aHash_Calculation = (ah.calaHashSimilarity(img1, img2)) / 64
        aHash_Calculation = round(aHash_Calculation, 3)
        return aHash_Calculation  # 平均哈希算法计算相似度
    
    
    def ham(img1, img2):
        img1_htg = htg.regularizeImage(img1)
        img2_htg = htg.regularizeImage(img2)
    
        # print('依据图片直方图距离计算相似度:{}'.format(htg.calMultipleHistogramSimilarity(img1_htg, img2_htg)))
        histogram_similarity = htg.calMultipleHistogramSimilarity(img1_htg, img2_htg)  # 图片直方图距离计算相似度
        histogram_similarity = round(histogram_similarity, 3)
        return histogram_similarity
    

    三.通过这五大类特征筛选相似度,数值可以任意设置,视自己情况而定。

    import adhp
    import os
    from PIL import Image
    import shutil
    
    def found_same_img(path):
        dir_list = []        # 创建一个空列表
        for img_name in os.listdir(path):
            img_dir = path + img_name    # 拼接好每个照片的绝对路径
            dir_list.append(img_dir)        # 生成一个列表
        i = 1
        for No, img_message in enumerate(dir_list):
            img1 = Image.open(dir_list[No])
            img2 = Image.open(dir_list[No + 1])
            # 差异哈希算法  余弦 感知哈希算法  平均哈希算法  直方图距离
            if adhp.dsh(img1, img2) > 0.98 and adhp.cin(img1, img2) > 0.98 and adhp.psh(img1, img2) > 0.98 and adhp.ash(img1, img2) > 0.98 and adhp.ham(img1, img2) > 0.98:
                shutil.copy(dir_list[No], 'D:/wys/image/1/')
            i += 1
            if i == len(dir_list):
                break
    
    
    if __name__ == '__main__':
        path = 'D:/wys/image/2/'  # 改
        found_same_img(path)     # 调用函数
    
    

    相关文章

      网友评论

        本文标题:【平均、感知、差异】哈希算法 +余弦+直方图距离筛选相似帧

        本文链接:https://www.haomeiwen.com/subject/oynailtx.html