美文网首页
芯片质量检测(混淆矩阵,不同n_neighbors)

芯片质量检测(混淆矩阵,不同n_neighbors)

作者: y_7539 | 来源:发表于2023-01-11 10:07 被阅读0次

    异常点检测

    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    %matplotlib inline
    
    raw_data = pd.read_csv("datas/data_class_raw.csv")
    raw_data.head()
    
    image.png
    # 定义x y
    x = raw_data.drop("y", axis=1)
    x1 = raw_data["x1"]
    x2 = raw_data["x2"]
    y = raw_data["y"]
    
    image.png
    #异常检测
    from sklearn.covariance import EllipticEnvelope
    ad_model = EllipticEnvelope(contamination=0.02)
    ad_model.fit(x[y==0])
    bad_predict = ad_model.predict(x[y==0])
    
    # 异常点展示
    plt.figure()
    bad = plt.scatter(x1[y==0], x2[y==0])
    good = plt.scatter(x1[y==1], x2[y==1])
    anoma = plt.scatter(x1[y==0][bad_predict==-1], x2[y==0][bad_predict==-1], marker="x", s=150)
    plt.legend((bad, good, anoma), ("bad", "good", "anoma"))
    plt.xlabel("x1")
    plt.ylabel("x2")
    plt.show()
    
    image.png

    去除异常点的数据pca处理

    # 去除异常点的数据pca处理
    process_data = pd.read_csv("datas/data_class_processed.csv")
    # 定义x y
    x = raw_data.drop("y", axis=1)
    y = raw_data["y"]
    
    from sklearn.decomposition import PCA
    from sklearn.preprocessing import StandardScaler
    
    # 数据标准化处理
    x_stand = StandardScaler().fit_transform(x)
    # pca 同等维度
    pca = PCA(n_components=2)
    x_pca = pca.fit_transform(x_stand)
    # 主成分方差
    var_radio = pca.explained_variance_ratio_    #array([0.51664723, 0.48335277])不需要降维
    

    数据分离,knn计算准确率

    # 数据分离
    from sklearn.model_selection import train_test_split
    x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=4, test_size=0.4)
    
    #knn算法计算准确率 n_neighbor=10
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.metrics import accuracy_score
    
    knn = KNeighborsClassifier(n_neighbors=10)
    knn.fit(x_train, y_train)
    y_train_predict = knn.predict(x_train)
    y_test_predict = knn.predict(x_test)
    acc_train = accuracy_score(y_train, y_train_predict)
    acc_test = accuracy_score(y_test, y_test_predict)
    print(acc_train, acc_test)    #0.9047619047619048    0.5333333333333333
    
    #可视化分类边界
    # 生成一组二维数据,预测出边界
    xx, yy = np.meshgrid(np.arange(0, 10, 0.05), np.arange(0, 10, 0.05))
    # 转换成若干行,两列的数据
    x_range = np.c_[xx.ravel(), yy.ravel()]
    
    y_range = knn.predict(x_range)
    
    # 展示
    plt.figure(figsize=(17, 8))
    knn_bad = plt.scatter(x_range[:, 0][y_range==0], x_range[:, 1][y_range==0])
    knn_good = plt.scatter(x_range[:, 0][y_range==1], x_range[:, 1][y_range==1])
    bad = plt.scatter(x1[y==0], x2[y==0])
    good = plt.scatter(x1[y==1], x2[y==1])
    plt.legend((bad, good, knn_bad, knn_good), ("bad", "good", "knn_bad", "knn_good"))
    plt.xlabel("x1")
    plt.ylabel("x2")
    plt.show()
    
    image.png

    混淆矩阵

    # 混淆矩阵
    from sklearn.metrics import confusion_matrix
    
    cm = confusion_matrix(y_test, y_test_predict)
    tn = cm[0, 0]
    fn = cm[1, 0]
    tp = cm[1, 1]
    fp = cm[0, 1]
    
    #准确率 (tp+tn)/(tp+tn+fp+fn)
    accuracy = (tp + tn )/(tp + tn + fp + fn)
    
    #灵敏度(召回率) tp/(tp+fn)
    recall = tp / (tp + fn)
    
    #特异度 tn/(tn+fp)
    spec = tn / (tn + fp)
    
    # 精确率 tp/(tp+fp)
    precision = tp / (tp + fp)
    
    #f1 分数 2*precison * recall/(precision + recall)
    f1_score = 2 * precision * recall/(precision + recall)
    

    不同n_neighbors准确率

    # knn取不同数量邻近点
    acc_trains = []
    acc_tests = []
    for i in range(1, 21):
        knn = KNeighborsClassifier(n_neighbors=i)
        knn.fit(x_train, y_train)
        y_train_predict = knn.predict(x_train)
        y_test_predict = knn.predict(x_test)
        acc_train = accuracy_score(y_train, y_train_predict)
        acc_test = accuracy_score(y_test, y_test_predict)
        acc_trains.append(acc_train)
        acc_tests.append(acc_test)
    
    # 展示
    plt.figure()
    train = plt.plot(range(1, 21), acc_trains, label="train")
    test = plt.plot(range(1, 21), acc_tests, label="test")
    plt.xlabel("n_neighbors")
    plt.ylabel("accuracy")
    plt.legend()
    plt.show()
    
    image.png

    相关文章

      网友评论

          本文标题:芯片质量检测(混淆矩阵,不同n_neighbors)

          本文链接:https://www.haomeiwen.com/subject/zxubcdtx.html