逻辑回归代码-python

作者: hufengreborn | 来源:发表于2018-08-09 16:28 被阅读0次

机器学习专题：代码实现（R）
逻辑回归代码-python
案例
机器学习之逻辑回归
逻辑归回
Python从零开始第六章机器学习②逻辑回归实战（2）
python 数据分析基础 day19－使用statsmodel
(二十四)逻辑回归-python数据分析与机器学习实战(学习笔记
逻辑回归代码实现与调用
2018-10-19

from __future__ import print_function, division
import sys
import os
import math
from sklearn import datasets
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def shuffle_data(X, y, seed=None):
    if seed:
        np.random.seed(seed)
    idx = np.arange(X.shape[0])
    np.random.shuffle(idx)
    return X[idx], y[idx]

# 正规化数据集 X
def normalize(X, axis=-1, p=2):
    lp_norm = np.atleast_1d(np.linalg.norm(X, p, axis))
    lp_norm[lp_norm == 0] = 1
    return X / np.expand_dims(lp_norm, axis)

# 标准化数据集 X
def standardize(X):
    X_std = np.zeros(X.shape)
    mean = X.mean(axis=0)
    std = X.std(axis=0)
    # 做除法运算时请永远记住分母不能等于0的情形
    # X_std = (X - X.mean(axis=0)) / X.std(axis=0) 
    for col in range(np.shape(X)[1]):
        if std[col]:
            X_std[:, col] = (X_std[:, col] - mean[col]) / std[col]
    return X_std

# 划分数据集为训练集和测试集
def train_test_split(X, y, test_size=0.3, shuffle=True, seed=None):
    if shuffle:
        X, y = shuffle_data(X, y, seed)
    n_train_samples = int(X.shape[0] * (1-test_size))
    x_train, x_test = X[:n_train_samples], X[n_train_samples:]
    y_train, y_test = y[:n_train_samples], y[n_train_samples:]
    return x_train, x_test, y_train, y_test

# 将一个向量转换成对角阵，其中对角阵上的元素就是向量中元素
def vec2diagonal(vec):
    vec_length = len(vec)
    diagonal = np.zeros((vec_length, vec_length))
    for i in range(vec_length):
        diagonal[i][i] = vec[i]
    return diagonal

def accuracy(y, y_pred):
    y = y.reshape(y.shape[0], -1)
    y_pred = y_pred.reshape(y_pred.shape[0], -1)
    return np.sum(y == y_pred)/len(y)

class Sigmoid:
    def function(self, x):
        return 1/(1 + np.exp(-x))

    def derivative(self, x):
        return self.function(x) * (1 - self.function(x))

class LogisticRegression():
    #逻辑回归分类模型. 
    def __init__(self, learning_rate=.1):
        self.w = None
        self.learning_rate = learning_rate
        self.sigmoid = Sigmoid()

    def fit(self, X, y, n_iterations=4000):
        # 在第一列添加偏置列，全部初始化为1
        X = np.insert(X, 0, 1, axis=1)
        X = X.reshape(X.shape[0], -1)
        y = y.reshape(y.shape[0], -1)
        n_samples, n_features = np.shape(X)
        # 参数初始化 [-1/n_features, 1/n_features]
        limit = 1 / math.sqrt(n_features)
        self.w = np.random.uniform(-limit, limit, (n_features, 1))
        for i in range(n_iterations):
            # 通过初始化的参数w计算预测值
            y_pred = self.sigmoid.function(X.dot(self.w))
            # 梯度下降更新参数w.
            self.w -= self.learning_rate * X.T.dot(-(y - y_pred) * 
                      self.sigmoid.function(X.dot(self.w)) * 
                      (1 - self.sigmoid.function(X.dot(self.w))))

    def predict(self, X):
        # 训练模型的时候我们添加了偏置，预测的时候也需要添加偏置
        X = X.reshape(X.shape[0], -1)
        X = np.insert(X, 0, 1, axis=1)
        # 预测
        y_pred = np.round(self.sigmoid.function(X.dot(self.w))).astype(int)
        return y_pred


def main():
    # 加载数据集
    data = datasets.load_iris()
    X = normalize(data.data[data.target != 0])
    y = data.target[data.target != 0]
    y[y == 1] = 0
    y[y == 2] = 1
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1)
    clf = LogisticRegression()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    accu = accuracy(y_test, y_pred)
    print ("Accuracy:", accu)
    plt.figure(figsize=(12, 8))
    plt.scatter(X[y==0][:,0], X[y==0][:,1])
    plt.scatter(X[y==1][:,0], X[y==1][:,1])
    plt.show()

if __name__ == "__main__":
    main()