美文网首页
使用sklearn库:SVM

使用sklearn库:SVM

作者: 还闹不闹 | 来源:发表于2020-05-29 21:23 被阅读0次

    参考:
    scikit-learn (sklearn) 官方文档中文版
    多分类问题OvO,OvR,MvM
    在python sklearn使用 SVM做分类
    Sklearn之支持向量机分类
    SVM:利用sklearn 实现SVM分类 相关参数说明ING

    • 样本数据集
    hua_se  huaban_yeshu    huaban_type
    101 1   3
    102 1   3
    103 2   3
    104 1   3
    105 3   3
    106 1   3
    107 3   3
    109 4   3
    110 2   3
    101 27  3
    102 28  3
    103 28  3
    104 29  3
    105 2   3
    106 27  3
    107 29  3
    109 30  3
    110 30  3
    101 4   3
    102 4   3
    103 3   3
    104 2   3
    105 1   3
    106 1   3
    107 2   3
    109 2   3
    110 4   3
    101 29  3
    102 30  3
    103 30  3
    104 29  3
    105 27  3
    106 28  3
    107 29  3
    109 29  3
    110 30  3
    1   1   1
    2   2   1
    3   3   1
    4   1   1
    5   4   1
    6   3   1
    7   4   1
    9   2   1
    8   3   1
    10  1   1
    6   2   1
    7   3   1
    3   1   1
    5   3   1
    5   4   1
    2   3   1
    3   2   1
    2   3   1
    2   2   1
    10  1   1
    9   3   1
    7   1   1
    9   4   1
    4   3   1
    6   3   1
    3   1   1
    7   1   1
    1   2   1
    8   4   1
    10  4   1
    10  27  2
    9   29  2
    8   29  2
    7   30  2
    5   29  2
    6   27  2
    4   27  2
    3   28  2
    1   29  2
    2   29  2
    10  29  2
    9   29  2
    8   30  2
    7   30  2
    5   27  2
    6   28  2
    4   28  2
    3   29  2
    1   27  2
    2   30  2
    10  30  2
    9   29  2
    8   30  2
    7   28  2
    5   29  2
    6   30  2
    4   30  2
    3   30  2
    1   27  2
    2   29  2
    

    二分类.

    import numpy as np
    import pandas as pd
    from sklearn.svm import SVC
    from sklearn.model_selection import train_test_split
    from sklearn.externals import joblib
    from sklearn.impute import SimpleImputer
    
    
    # 显示所有列
    pd.set_option('display.max_columns', None)
    # 显示所有行
    pd.set_option('display.max_rows', None)
    # 设置value的显示长度为10000,默认为50
    pd.set_option('display.width',10000)
    pd.set_option('display.unicode.ambiguous_as_wide', True)
    pd.set_option('display.unicode.east_asian_width', True)
    #
    np.set_printoptions(linewidth=1000)
    
    df = pd.read_csv('G:\\rasa_demo\stack\data\\train.csv.txt', sep='\t', encoding='GBK', header=0)
    df = df[36:97]
    # print(df)
    # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    # imp = SimpleImputer(missing_values=np.nan, strategy='most_frequent', copy=True)
    # df = imp.fit_transform(df)
    # df = pd.DataFrame(df)
    # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    # # 检查数据中是否有缺失值,以下两种方式均可
    # # Flase:对应特征的特征值中无缺失值
    # # True:有缺失值
    # print(df.isnull().any())
    # print(np.isnan(df).any())
    # print(np.isfinite(df).all())
    # # # 查看缺失值记录
    # # df_null = pd.isnull(df)
    # # df_null = df[df_null == True]
    # # print(df_null)
    # # 缺失值处理,以下两种方式均可
    # # 删除包含缺失值的行
    # df.dropna(inplace=True)
    # # # 缺失值填充
    # # df.fillna('10.0')
    # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    # print(df.columns)
    # print(df[['hua_se','huaban_yeshu']])
    # print(df.iloc[:,[0,1]])
    print(df.iloc[0:3,[2]])
    X = df.iloc[:,[0,1]]
    Y = df.iloc[:,[2]]
    print(X.shape, Y.shape)
    # Y = Y.values.reshape(-1,1)
    print(Y.values.ravel())
    X_train,X_test,Y_train,Y_test = train_test_split(X, Y.values.ravel(), test_size=0.2, random_state=35)
    # print(type(X_test))
    
    svm_double_clf_model = SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False)
    svm_double_clf_model.fit(X_train, Y_train)
    
    result = svm_double_clf_model.predict(X_test)
    count = 0
    for item1,item2 in zip(result, Y_test):
        if item1 == item2:
            count += 1
    print('准确率:', float(count)/float(len(Y_test)))
    
    # # 保存训练好的模型
    # joblib.dump(svm_double_clf_model, 'G:\\rasa_demo\stack\model\svm_double_clf_model.pkl') # pkl是sklearn默认的保存格式
    # print('模型保存成功!')
    # # 加载已训练好的模型
    # model = joblib.load('G:\\rasa_demo\stack\model\svm_double_clf_model.pkl')
    # pred_y = model.predict(X_test)
    
    pred_x = {'hua_se':12,'huaban_yeshu':7}
    tmp = pd.DataFrame(pred_x, index=[0])
    print(tmp)
    print(svm_double_clf_model.predict(tmp))
    

    多分类.

    import numpy as np
    import pandas as pd
    from sklearn.svm import SVC
    from sklearn.model_selection import train_test_split
    from sklearn.externals import joblib
    
    # 显示所有列
    pd.set_option('display.max_columns', None)
    # 显示所有行
    pd.set_option('display.max_rows', None)
    # 设置value的显示长度为10000,默认为50
    pd.set_option('display.width',10000)
    pd.set_option('display.unicode.ambiguous_as_wide', True)
    pd.set_option('display.unicode.east_asian_width', True)
    #
    np.set_printoptions(linewidth=1000)
    
    df = pd.read_csv('G:\\rasa_demo\stack\data\\train.csv.txt', sep='\t', encoding='GBK', header=0)
    # print(df)
    # print(df.columns)
    # print(df[['hua_se','huaban_yeshu']])
    # print(df.iloc[:,[0,1]])
    print(df.iloc[0:3,[2]])
    X = df.iloc[:,[0,1]]
    Y = df.iloc[:,[2]]
    print(X.shape, Y.shape)
    # Y = Y.values.reshape(-1,1)
    print(Y.values.ravel())
    X_train,X_test,Y_train,Y_test = train_test_split(X, Y.values.ravel(), test_size=0.2, random_state=35)
    # print(type(X_test))
    
    # svm_double_clf_model = SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False)
    svm_multi_clf_model = SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False)
    # svm_multi_clf_model = SVC(kernel='linear') # 训练svm模型---基于线性核函数
    # svm_multi_clf_model = SVC(kernel='poly', degree=3) # 训练svm模型---基于多项式核函数
    # svm_multi_clf_model = SVC(kernel='rbf', C=1) # 训练svm模型---基于径向基核函数
    svm_multi_clf_model.fit(X_train, Y_train)
    
    result = svm_multi_clf_model.predict(X_test)
    count = 0
    for item1,item2 in zip(result, Y_test):
        if item1 == item2:
            count += 1
    print('准确率:', float(count)/float(len(Y_test)))
    
    # 保存训练好的模型
    joblib.dump(svm_multi_clf_model, 'G:\\rasa_demo\stack\model\svm_multi_clf_model.pkl') # pkl是sklearn默认的保存格式
    print('模型保存成功!')
    # 加载已训练好的模型
    model = joblib.load('G:\\rasa_demo\stack\model\svm_multi_clf_model.pkl')
    pred_y = model.predict(X_test)
    
    pred_x = {'hua_se':2,'huaban_yeshu':27}
    tmp = pd.DataFrame(pred_x, index=[0])
    print(tmp)
    print(svm_multi_clf_model.predict(tmp))
    
    盗图

    相关文章

      网友评论

          本文标题:使用sklearn库:SVM

          本文链接:https://www.haomeiwen.com/subject/vaafzhtx.html