@官方
#!/usr/bin/python
# -*- coding:utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.font_manager
from sklearn import svm
xx, yy = np.meshgrid(np.linspace(-5, 5, 500), np.linspace(-5, 5, 500))
# Generate train data
X = 0.3 * np.random.randn(100, 2)
X_train = np.r_[X + 2, X - 2]
# Generate some regular novel observations
X = 0.3 * np.random.randn(20, 2)
X_test = np.r_[X + 2, X - 2]
# Generate some abnormal novel observations
X_outliers = np.random.uniform(low=-4, high=4, size=(20, 2))
# fit the model
clf = svm.OneClassSVM(nu=0.1, kernel="rbf", gamma=0.1)
clf.fit(X_train)
y_pred_train = clf.predict(X_train)
y_pred_test = clf.predict(X_test)
y_pred_outliers = clf.predict(X_outliers)
n_error_train = y_pred_train[y_pred_train == -1].size
n_error_test = y_pred_test[y_pred_test == -1].size
n_error_outliers = y_pred_outliers[y_pred_outliers == 1].size
# plot the line, the points, and the nearest vectors to the plane
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.title("Novelty Detection")
plt.contourf(xx, yy, Z, levels=np.linspace(Z.min(), 0, 7), cmap=plt.cm.PuBu) #绘制异常样本的区域
a = plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors='darkred') #绘制正常样本和异常样本的边界
plt.contourf(xx, yy, Z, levels=[0, Z.max()], colors='palevioletred') #绘制正常样本的区域
s = 40
b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c='white', s=s, edgecolors='k')
b2 = plt.scatter(X_test[:, 0], X_test[:, 1], c='blueviolet', s=s,
edgecolors='k')
c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='gold', s=s,
edgecolors='k')
plt.axis('tight')
plt.xlim((-5, 5))
plt.ylim((-5, 5))
plt.legend([a.collections[0], b1, b2, c],
["learned frontier", "training observations",
"new regular observations", "new abnormal observations"],
loc="upper left",
prop=matplotlib.font_manager.FontProperties(size=11))
plt.xlabel(
"error train: %d/200 ; errors novel regular: %d/40 ; "
"errors novel abnormal: %d/40"
% (n_error_train, n_error_test, n_error_outliers))
plt.show()

anthor:usst2019zp_l@163.com
#####################OneClassSVM_NJ#####################
########################数据处理########################
import pandas as pd
df = pd.read_excel('4_NJ.xlsx',sheet_name = 'Sheet1')
data = pd.read_excel('OP011_NJ.xlsx',sheet_name = 'Sheet1')
df = df.append(data)
# dic = pd.DataFrame()
# flag = 0
# nums = df.shape[0]#返回行数
r_list = df.keys()[0]
clo_r = df[r_list]#id列
# angle = df[df.keys()[1]]
class_l = list(set(clo_r))
# 设置成“category”数据类型
df['id'] = df['id'].astype('category')
# inplace = True,使 recorder_categories生效
df['id'].cat.reorder_categories(class_l, inplace=True)
# inplace = True,使 df生效
df.sort_values('id', inplace=True)
# 将DataFrame中index重排
df.reset_index(drop=True, inplace=True)
df_new = []
for i in class_l:
time = list(range(1,1+df.loc[df['id'] == i].shape[0]))
df_new = df_new + time
df['time'] = df_new
#######################特征选择#########################
from tsfresh import extract_features
extracted_features = extract_features(df, column_id="id", column_sort="time")
a = extracted_features.values.tolist()
import numpy as np
b = np.array(a)
##################OneClassSVM异常检测###################
#fit the model
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.font_manager
from sklearn import svm
b[np.isinf(b)]=0
b[np.isnan(b)]=0
model = svm.OneClassSVM(nu=0.1, kernel="rbf", gamma=0.1)
model.fit(X_train)
y_pred_train = model.predict(b)
n_error_train = y_pred_train[y_pred_train == -1].size
#将检测结果存入新表
g = pd.DataFrame()
g['scores']=model.decision_function(b)
g['anomaly']=y_pred_train
g['id'] = extracted_features.index.values
order = ['id','scores','anomaly']
g = g[order]
g.to_excel('NJ_OneClassSVM_tsfresh.xlsx')
参数
fit(X):训练,根据训练样本和上面两个参数探测边界。(注意是无监督)
predict(X):返回预测值,+1就是正常样本,-1就是异常样本。
decision_function(X):返回各样本点到超平面的函数距离(signed distance),正的维正常样本,负的为异常样本。
set_params(**params):设置这个评估器的参数,该方法适用于简单估计器以及嵌套对象(例如管道),而后者具有表单<component>_<parameter>的参数,,因此可以更新嵌套对象的每个组件。
get_params([deep]):获取这个评估器的参数。
fit_predict(X[, y]):在X上执行拟合并返回X的标签,对于异常值,返回 -1 ,对于内点,返回1。
网友评论