数据清洗时,将dataframe文本转化为数值。
def Replace (X,columns):
a = X.groupby([columns],as_index=False)[columns].agg({'cnt':'count'})
for i in a[columns]:
X[columns] = X[columns].replace(i,a[(a[columns]== i )].index.tolist()[0])
return (X)
划分测试集与训练集(3:7)
def Data(X,columns):
from sklearn import model_selection
from sklearn import cross_validation
Y = X[columns]
X= X.drop([columns],axis = 1)
X_train, X_test, y_train, y_test = \
cross_validation.train_test_split( X, Y, test_size=0.3, random_state=0)
return (X_train, X_test, y_train, y_test)
基础的机器学习代码,查看得分
y_train.astype('int')
def RF(X_train, X_test, y_train, y_test): #随机森林
from sklearn.ensemble import RandomForestClassifier
model= RandomForestClassifier(n_estimators=100)
model.fit(X_train, y_train)
predicted= model.predict(X_test)
score = accuracy_score(y_test, predicted)
return (score)
def LOR(X_train, X_test, y_train, y_test): #逻辑回归
from sklearn.linear_model import LogisticRegression
lor = LogisticRegression(penalty='l1',C=100,multi_class='ovr')
lor.fit(X_train, y_train)
predicted= lor.predict(X_test)
score = accuracy_score(y_test, predicted)
return (score)
def Svm(X_train, X_test, y_train, y_test): #支持向量机
from sklearn import svm
model = svm.SVC(kernel='rbf')
model.fit(X_train, y_train)
predicted= model.predict(X_test)
score = accuracy_score(y_test, predicted)
return (score)
def LR(X_train, X_test, y_train, y_test): #线性回归
from sklearn.linear_model import LinearRegression
LR = LinearRegression()
LR.fit(X_train, y_train)
predicted = LR.predict(X_test)
score = accuracy_score(y_test, predicted)
return ( score,LR.intercept_,LR.coef_)
网友评论