美文网首页机器学习与数据挖掘
林轩田机器学习基石课程 - Pocket PLA算法 pytho

林轩田机器学习基石课程 - Pocket PLA算法 pytho

作者: Spareribs | 来源:发表于2019-02-09 22:21 被阅读54次

    作业1:

    Q1. Run the pocket algorithm with a total of 50 updates on D, and verify the performance of w pocket using the test set.
    Please repeat your experiment for 2000 times, each with a different random seed.
    What is the average error rate on the test set? Plot a histogram to show error rate versus frequency.

    # calculate error count
    def calError(self, X, Y, W):
        score = np.dot(X, W)
        Y_pred = np.ones_like(Y)
        Y_pred[score < 0] = -1
        err_cnt = np.sum(Y_pred != Y)
        return err_cnt
    
    def pocket_pla_1(self, X_train, Y_train, X_test, Y_test):
        Iteration = 2000  # number of iteration
        Update = 50
        Errors = []  # list store error rate every iteration
    
        for iter in range(Iteration):
            np.random.seed(iter)  # set random seed, different by iteration
            permutation = np.random.permutation(X_train.shape[0])  # random select index
            X_train = X_train[permutation]  # random order X_train
            Y_train = Y_train[permutation]  # random order Y_train, as the same as X_train
    
            # look through the 50 iterations
            W = np.zeros(X_train.shape[1])  # weights initialization
            min_err = self.calError(X_train, Y_train, W)  # set initial W can make minimal error
            for i in range(Update):
                score = np.dot(X_train[i, :], W)  # score
                if score * Y_train[i] <= 0:  # classification error
                    tmp = W + np.dot(X_train[i, :].T, Y_train[i])  # new tmp, wait to decide replace W
                    tmp_err = self.calError(X_train, Y_train, tmp)  # calculate new error
                    if tmp_err < min_err:
                        W = tmp  # update W
                        min_err = tmp_err  # update min_err
    
            # get W to test data
            Y_pred_test = np.dot(X_test, W)  # calculate score
            Y_pred_test[Y_pred_test > 0] = 1  # positive
            Y_pred_test[Y_pred_test < 0] = -1  # negative
            error = np.mean(Y_pred_test != Y_test)
            Errors.append(error)  # store error to list
    
        # mean of errors
        error_mean = np.mean(Errors)
    
        return error_mean
    

    作业2

    Q2. Modify your algorithm to return w50w50 (the PLA vector after 50 updates) instead of w (the pocket vector) after 50 updates. Run the modified algorithm on D, and verify the performance using the test set. Please repeat your experiment for 2000 times, each with a different random seed. What is the average error rate on the test set? Plot a histogram to show error rate versus frequency. Compare your result to the previous problem and briefly discuss your findings.

    def pocket_pla_2(self, X_train, Y_train, X_test, Y_test):
        Iteration = 2000  # number of iteration
        Update = 50
        Errors = []  # list store error rate every iteration
    
        for iter in range(Iteration):
            np.random.seed(iter)  # set random seed, different by iteration
            permutation = np.random.permutation(X_train.shape[0])  # random select index
            X_train = X_train[permutation]  # random order X_train
            Y_train = Y_train[permutation]  # random order Y_train, as the same as X_train
    
            # look through the 50 iterations
            W = np.zeros(X_train.shape[1])  # weights initialization
            for i in range(Update):
                score = np.dot(X_train[i, :], W)  # score
                if score * Y_train[i] <= 0:  # classification error
                    W = W + np.dot(X_train[i, :].T, Y_train[i])
    
            # get W to test data
            Y_pred_test = np.dot(X_test, W)  # calculate score
            Y_pred_test[Y_pred_test > 0] = 1  # positive
            Y_pred_test[Y_pred_test < 0] = -1  # negative
            error = np.mean(Y_pred_test != Y_test)
            Errors.append(error)  # store error to list
    
        # mean of error
        error_mean = np.mean(Errors)
    
        return error_mean
    

    作业3

    Q3. Modify your algorithm in Problem 1 to run for 100 updates instead of 50, and verify the performance of w pocket using the test set.
    Please repeat your experiment for 2000 times, each with a different random seed. What is the average error rate on the test set? Plot a histogram to show error rate versus frequency. Compare your result to Problem 18 and briefly discuss your findings.

    def pocket_pla_3(self, X_train, Y_train, X_test, Y_test):
        Iteration = 2000  # number of iteration
        Update = 100
        Errors = []  # list store error rate every iteration
    
        for iter in range(Iteration):
            np.random.seed(iter)  # set random seed, different by iteration
            permutation = np.random.permutation(X_train.shape[0])  # random select index
            X_train = X_train[permutation]  # random order X_train
            Y_train = Y_train[permutation]  # random order Y_train, as the same as X_train
    
            # look through the 50 iterations
            W = np.zeros(X_train.shape[1])  # weights initialization
            min_err = self.calError(X_train, Y_train, W)  # set initial W can make minimal error
            for i in range(Update):
                score = np.dot(X_train[i, :], W)  # score
                if score * Y_train[i] <= 0:  # classification error
                    tmp = W + np.dot(X_train[i, :].T, Y_train[i])  # new tmp, wait to decide replace W
                    tmp_err = self.calError(X_train, Y_train, tmp)  # calculate new error
                    if tmp_err < min_err:
                        W = tmp  # update W
                        min_err = tmp_err  # update min_err
    
            # get W to test data
            Y_pred_test = np.dot(X_test, W)  # calculate score
            Y_pred_test[Y_pred_test > 0] = 1  # positive
            Y_pred_test[Y_pred_test < 0] = -1  # negative
            error = np.mean(Y_pred_test != Y_test)
            Errors.append(error)  # store error to list
    
        # mean of errors
        error_mean = np.mean(Errors)
    
        return error_mean
    

    相关文章

      网友评论

        本文标题:林轩田机器学习基石课程 - Pocket PLA算法 pytho

        本文链接:https://www.haomeiwen.com/subject/iogzsqtx.html