ML汇总

作者: ckawyh | 来源:发表于2016-07-30 22:47 被阅读0次

    常用的scikit-learn分类器

    # -*- coding: utf-8 -*-
    """
    Created on Fri Jul 29 21:51:11 2016
    
    The use of ten classic machine learning algorithm!
    
    @author: ckawyh
    """
    
    
    import sys
    import time
    from sklearn import metrics
    import numpy as np
    import cPickle as pickle
    
    reload(sys)
    sys.setdefaultencoding('utf8')
    
    # Multinomial Naive Bayes Classifier
    def naive_bayes_classifier(train_x, train_y):
       from sklearn.naive_bayes import MultinomialNB
       model = MultinomialNB(alpha=0.01)
       model.fit(train_x, train_y)
       return model
    
    
    # KNN Classifier
    def knn_classifier(train_x, train_y):
       from sklearn.neighbors import KNeighborsClassifier
       model = KNeighborsClassifier()
       model.fit(train_x, train_y)
       return model
    
    
    # Logistic Regression Classifier
    def logistic_regression_classifier(train_x, train_y):
       from sklearn.linear_model import LogisticRegression
       model = LogisticRegression(penalty='l2')
       model.fit(train_x, train_y)
       return model
    
    
    # Random Forest Classifier
    def random_forest_classifier(train_x, train_y):
       from sklearn.ensemble import RandomForestClassifier
       model = RandomForestClassifier(n_estimators=8)
       model.fit(train_x, train_y)
       return model
    
    
    # Decision Tree Classifier
    def decision_tree_classifier(train_x, train_y):
       from sklearn import tree
       model = tree.DecisionTreeClassifier()
       model.fit(train_x, train_y)
       return model
    
    
    # GBDT(Gradient Boosting Decision Tree) Classifier
    def gradient_boosting_classifier(train_x, train_y):
       from sklearn.ensemble import GradientBoostingClassifier
       model = GradientBoostingClassifier(n_estimators=200)
       model.fit(train_x, train_y)
       return model
    
    
    # SVM Classifier
    def svm_classifier(train_x, train_y):
       from sklearn.svm import SVC
       model = SVC(kernel='rbf', probability=True)
       model.fit(train_x, train_y)
       return model
    
    # SVM Classifier using cross validation
    def svm_cross_validation(train_x, train_y):
       from sklearn.grid_search import GridSearchCV
       from sklearn.svm import SVC
       model = SVC(kernel='rbf', probability=True)
       param_grid = {'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000], 'gamma': [0.001, 0.0001]}
       grid_search = GridSearchCV(model, param_grid, n_jobs = 1, verbose=1)
       grid_search.fit(train_x, train_y)
       best_parameters = grid_search.best_estimator_.get_params()
       for para, val in best_parameters.items():
           print para, val
       model = SVC(kernel='rbf', C=best_parameters['C'], gamma=best_parameters['gamma'], probability=True)
       model.fit(train_x, train_y)
       return model
    
        
    if __name__ == '__main__':
       from sklearn.datasets import  load_iris
       from sklearn import cross_validation
       from pandas import DataFrame
       data_dict = load_iris()
       data = data_dict.data
       label = data_dict.target
       df = DataFrame(data)
       df[4] = label
       data_array = df.as_matrix()
       split_train, split_cv = cross_validation.train_test_split(data_array,test_size=0.3,random_state=0)
       
       train_x = split_train[:,0:4]
       train_y = split_train[:,4]
       test_x = split_cv[:,0:4]
       test_y = split_cv[:,4]    
       
       model_save_file = None
       model_save = {}
       test_classifiers = ['NB', 'KNN', 'LR', 'RF', 'DT', 'SVM', 'SVMCV', 'GBDT']
       classifiers = {'NB':naive_bayes_classifier,
                     'KNN':knn_classifier,
                     'LR':logistic_regression_classifier,
                     'RF':random_forest_classifier,
                     'DT':decision_tree_classifier,
                     'SVM':svm_classifier,
                     'SVMCV':svm_cross_validation,
                     'GBDT':gradient_boosting_classifier
       }
       num_train, num_feat = train_x.shape
       num_test, num_feat = test_x.shape
       is_binary_class = (len(np.unique(train_y)) == 2)
       print '******************** Data Info *********************'
       print '#training data: %d, #testing_data: %d, dimension: %d' % (num_train, num_test, num_feat)
       
       for classifier in test_classifiers:
           print '******************* %s ********************' % classifier
           start_time = time.time()
           model = classifiers[classifier](train_x, train_y)
           print 'training took %fs!' % (time.time() - start_time)
           predict = model.predict(test_x)
           if model_save_file != None:
               model_save[classifier] = model
           accuracy = metrics.accuracy_score(test_y, predict)
           report = metrics.classification_report(test_y, predict)
           print 'accuracy: %.2f%%' % (100 * accuracy)
           print report
       
       if model_save_file != None:
           pickle.dump(model_save, open(model_save_file, 'wb'))
    
    运行结果:
    
    ******************** Data Info *********************
    #training data: 105, #testing_data: 45, dimension: 4
    ******************* NB ********************
    training took 0.001000s!
    accuracy: 60.00%
                 precision    recall  f1-score   support
    
            0.0       1.00      1.00      1.00        16
            1.0       0.00      0.00      0.00        18
            2.0       0.38      1.00      0.55        11
    
    avg / total       0.45      0.60      0.49        45
    
    ******************* KNN ********************
    training took 0.000000s!
    accuracy: 97.78%
                 precision    recall  f1-score   support
    
            0.0       1.00      1.00      1.00        16
            1.0       1.00      0.94      0.97        18
            2.0       0.92      1.00      0.96        11
    
    avg / total       0.98      0.98      0.98        45
    
    ******************* LR ********************
    training took 0.001000s!
    accuracy: 88.89%
                 precision    recall  f1-score   support
    
            0.0       1.00      1.00      1.00        16
            1.0       1.00      0.72      0.84        18
            2.0       0.69      1.00      0.81        11
    
    avg / total       0.92      0.89      0.89        45
    
    ******************* RF ********************
    training took 0.019000s!
    accuracy: 97.78%
                 precision    recall  f1-score   support
    
            0.0       1.00      1.00      1.00        16
            1.0       1.00      0.94      0.97        18
            2.0       0.92      1.00      0.96        11
    
    avg / total       0.98      0.98      0.98        45
    
    ******************* DT ********************
    training took 0.000000s!
    accuracy: 97.78%
                 precision    recall  f1-score   support
    
            0.0       1.00      1.00      1.00        16
            1.0       1.00      0.94      0.97        18
            2.0       0.92      1.00      0.96        11
    
    avg / total       0.98      0.98      0.98        45
    
    ******************* SVM ********************
    training took 0.001000s!
    accuracy: 97.78%
                 precision    recall  f1-score   support
    
            0.0       1.00      1.00      1.00        16
            1.0       1.00      0.94      0.97        18
            2.0       0.92      1.00      0.96        11
    
    avg / total       0.98      0.98      0.98        45
    
    ******************* SVMCV ********************
    Fitting 3 folds for each of 14 candidates, totalling 42 fits
    kernel rbf
    C 1000
    verbose False
    probability True
    degree 3
    shrinking True
    max_iter -1
    decision_function_shape None
    random_state None
    tol 0.001
    cache_size 200
    coef0 0.0
    gamma 0.001
    class_weight None
    training took 0.143000s!
    accuracy: 97.78%
                 precision    recall  f1-score   support
    
            0.0       1.00      1.00      1.00        16
            1.0       1.00      0.94      0.97        18
            2.0       0.92      1.00      0.96        11
    
    avg / total       0.98      0.98      0.98        45
    
    ******************* GBDT ********************
    [Parallel(n_jobs=1)]: Done  42 out of  42 | elapsed:    0.0s finished
    training took 0.176000s!
    accuracy: 97.78%
                 precision    recall  f1-score   support
    
            0.0       1.00      1.00      1.00        16
            1.0       1.00      0.94      0.97        18
            2.0       0.92      1.00      0.96        11
    
    avg / total       0.98      0.98      0.98        45
    

    相关文章

      网友评论

          本文标题:ML汇总

          本文链接:https://www.haomeiwen.com/subject/ebutsttx.html