-
https://zhuanlan.zhihu.com/p/46626607
-
import pandas as pd
import numpy as np
from scipy.spatial.distance import mahalanobis
import matplotlib.pyplot as plt
import seaborn as sns
# 加载上传的Excel文件
file_path = 'C:/Users/Aaron_zhang/Desktop/马氏距离数据.xlsx'
data = pd.read_excel(file_path)
print('数据集的形状:', data.shape)
# 提取数值数据(排除标识符列)
numerical_data = data.iloc[:, 1:].values
# 计算协方差矩阵及其逆矩阵
cov_matrix = np.cov(numerical_data, rowvar=False) # 计算协方差矩阵
inv_cov_matrix = np.linalg.inv(cov_matrix) # 计算协方差矩阵的逆矩阵
# 初始化马氏距离矩阵
n = numerical_data.shape[0]
mahalanobis_distances = np.zeros((n, n)) # 初始化全零矩阵用于存储马氏距离
# 计算每对样本(行)之间的马氏距离
for i in range(n):
for j in range(n):
mahalanobis_distances[i, j] = mahalanobis(numerical_data[i], numerical_data[j], inv_cov_matrix)
print('马氏距离矩阵的形状:', mahalanobis_distances.shape)
# 保存马氏距离矩阵到 Excel 文件
output_file_path = 'C:/Users/Aaron_zhang/Desktop/mahalanobis_distances.xlsx'
mahalanobis_df = pd.DataFrame(mahalanobis_distances)
mahalanobis_df.to_excel(output_file_path, index=False)
# 可视化马氏距离矩阵的热图
plt.figure(figsize=(10, 8))
sns.heatmap(mahalanobis_distances, cmap="YlGnBu", xticklabels=False, yticklabels=False)
plt.title("Mahalanobis Distance Matrix")
plt.xlabel("Sample Index")
plt.ylabel("Sample Index")
plt.show()
网友评论