IF
#########################IF_JD##########################
########################数据处理########################
import pandas as pd
df = pd.read_excel('4_JD.xlsx',sheet_name = 'Sheet1')
data = pd.read_excel('OP011_JD.xlsx',sheet_name = 'Sheet1')
df = df.append(data)
# dic = pd.DataFrame()
# flag = 0
# nums = df.shape[0]#返回行数
r_list = df.keys()[0]
clo_r = df[r_list]#id列
# angle = df[df.keys()[1]]
class_l = list(set(clo_r))
# 设置成“category”数据类型
df['id'] = df['id'].astype('category')
# inplace = True,使 recorder_categories生效
df['id'].cat.reorder_categories(class_l, inplace=True)
# inplace = True,使 df生效
df.sort_values('id', inplace=True)
# 将DataFrame中index重排
df.reset_index(drop=True, inplace=True)
df_new = []
for i in class_l:
time = list(range(1,1+df.loc[df['id'] == i].shape[0]))
df_new = df_new + time
df['time'] = df_new
#######################特征选择#########################
from tsfresh import extract_features
extracted_features = extract_features(df, column_id="id", column_sort="time")
a = extracted_features.values.tolist()
import numpy as np
b = np.array(a)
###################孤立森林异常检测####################
#1-替换样本特征中出现的inf,nan
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest
b[np.isinf(b)]=0
b[np.isnan(b)]=0
#2-构造孤立森林
model=IsolationForest(n_estimators=100, max_samples='auto', contamination=float(0.1),max_features=1.0)
model.fit(b)
#3-将检测结果存入新表
g = pd.DataFrame()
g['scores']=model.decision_function(b)
g['anomaly']=model.predict(b)
g['id'] = extracted_features.index.values
order = ['id','scores','anomaly']
g = g[order]
g.to_excel('JD_IF_tsfresh.xlsx')
#########################IF_NJ##########################
########################数据处理########################
import pandas as pd
df = pd.read_excel('4_NJ.xlsx',sheet_name = 'Sheet1')
data = pd.read_excel('OP011_NJ.xlsx',sheet_name = 'Sheet1')
df = df.append(data)
# dic = pd.DataFrame()
# flag = 0
# nums = df.shape[0]#返回行数
r_list = df.keys()[0]
clo_r = df[r_list]#id列
# angle = df[df.keys()[1]]
class_l = list(set(clo_r))
# 设置成“category”数据类型
df['id'] = df['id'].astype('category')
# inplace = True,使 recorder_categories生效
df['id'].cat.reorder_categories(class_l, inplace=True)
# inplace = True,使 df生效
df.sort_values('id', inplace=True)
# 将DataFrame中index重排
df.reset_index(drop=True, inplace=True)
df_new = []
for i in class_l:
time = list(range(1,1+df.loc[df['id'] == i].shape[0]))
df_new = df_new + time
df['time'] = df_new
#######################特征选择#########################
from tsfresh import extract_features
extracted_features = extract_features(df, column_id="id", column_sort="time")
a = extracted_features.values.tolist()
import numpy as np
b = np.array(a)
###################孤立森林异常检测####################
#1-替换样本特征中出现的inf,nan
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest
b[np.isinf(b)]=0
b[np.isnan(b)]=0
#2-构造孤立森林
model=IsolationForest(n_estimators=100, max_samples='auto', contamination=float(0.05),max_features=1.0)
model.fit(b)
#3-将检测结果存入新表
g = pd.DataFrame()
g['scores']=model.decision_function(b)
g['anomaly']=model.predict(b)
g['id'] = extracted_features.index.values
order = ['id','scores','anomaly']
g = g[order]
g.to_excel('NJ_IF_tsfresh.xlsx')
LOF
#########################LOF_JD##########################
########################数据处理########################
import pandas as pd
df = pd.read_excel('4_JD.xlsx',sheet_name = 'Sheet1')
data = pd.read_excel('OP011_JD.xlsx',sheet_name = 'Sheet1')
df = df.append(data)
# dic = pd.DataFrame()
# flag = 0
# nums = df.shape[0]#返回行数
r_list = df.keys()[0]
clo_r = df[r_list]#id列
# angle = df[df.keys()[1]]
class_l = list(set(clo_r))
# 设置成“category”数据类型
df['id'] = df['id'].astype('category')
# inplace = True,使 recorder_categories生效
df['id'].cat.reorder_categories(class_l, inplace=True)
# inplace = True,使 df生效
df.sort_values('id', inplace=True)
# 将DataFrame中index重排
df.reset_index(drop=True, inplace=True)
df_new = []
for i in class_l:
time = list(range(1,1+df.loc[df['id'] == i].shape[0]))
df_new = df_new + time
df['time'] = df_new
#######################特征选择#########################
from tsfresh import extract_features
extracted_features = extract_features(df, column_id="id", column_sort="time")
a = extracted_features.values.tolist()
import numpy as np
b = np.array(a)
###################LOF异常检测####################
#1-替换样本特征中出现的inf,nan
# !/usr/bin/python
# -*- coding:utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import LocalOutlierFactor
from scipy import stats
# fit the model
b[np.isinf(b)]=0
b[np.isnan(b)]=0
model = LocalOutlierFactor(n_neighbors=2518, contamination=float(0.1))
y_pred = model.fit_predict(b)
scores_pred = model.negative_outlier_factor_
#3-将检测结果存入新表
g = pd.DataFrame()
g['scores']=scores_pred
g['anomaly']=y_pred
g['id'] = extracted_features.index.values
order = ['id','scores','anomaly']
g = g[order]
g.to_excel('JD_LOF_tsfresh.xlsx')
#########################LOF_NJ#########################
########################数据处理########################
import pandas as pd
df = pd.read_excel('4_NJ.xlsx',sheet_name = 'Sheet1')
data = pd.read_excel('OP011_NJ.xlsx',sheet_name = 'Sheet1')
df = df.append(data)
# dic = pd.DataFrame()
# flag = 0
# nums = df.shape[0]#返回行数
r_list = df.keys()[0]
clo_r = df[r_list]#id列
# angle = df[df.keys()[1]]
class_l = list(set(clo_r))
# 设置成“category”数据类型
df['id'] = df['id'].astype('category')
# inplace = True,使 recorder_categories生效
df['id'].cat.reorder_categories(class_l, inplace=True)
# inplace = True,使 df生效
df.sort_values('id', inplace=True)
# 将DataFrame中index重排
df.reset_index(drop=True, inplace=True)
df_new = []
for i in class_l:
time = list(range(1,1+df.loc[df['id'] == i].shape[0]))
df_new = df_new + time
df['time'] = df_new
#######################特征选择#########################
from tsfresh import extract_features
extracted_features = extract_features(df, column_id="id", column_sort="time")
a = extracted_features.values.tolist()
import numpy as np
b = np.array(a)
###################LOF异常检测####################
#1-替换样本特征中出现的inf,nan
# !/usr/bin/python
# -*- coding:utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import LocalOutlierFactor
from scipy import stats
# fit the model
b[np.isinf(b)]=0
b[np.isnan(b)]=0
model = LocalOutlierFactor(n_neighbors=2518, contamination=float(0.1))
y_pred = model.fit_predict(b)
scores_pred = model.negative_outlier_factor_
#3-将检测结果存入新表
g = pd.DataFrame()
g['scores']=scores_pred
g['anomaly']=y_pred
g['id'] = extracted_features.index.values
order = ['id','scores','anomaly']
g = g[order]
g.to_excel('NJ_LOF_tsfresh.xlsx')
OneClassSVM
#####################OneClassSVM_JD#####################
########################数据处理########################
import pandas as pd
df = pd.read_excel('4_JD.xlsx',sheet_name = 'Sheet1')
data = pd.read_excel('OP011_JD.xlsx',sheet_name = 'Sheet1')
df = df.append(data)
# dic = pd.DataFrame()
# flag = 0
# nums = df.shape[0]#返回行数
r_list = df.keys()[0]
clo_r = df[r_list]#id列
# angle = df[df.keys()[1]]
class_l = list(set(clo_r))
# 设置成“category”数据类型
df['id'] = df['id'].astype('category')
# inplace = True,使 recorder_categories生效
df['id'].cat.reorder_categories(class_l, inplace=True)
# inplace = True,使 df生效
df.sort_values('id', inplace=True)
# 将DataFrame中index重排
df.reset_index(drop=True, inplace=True)
df_new = []
for i in class_l:
time = list(range(1,1+df.loc[df['id'] == i].shape[0]))
df_new = df_new + time
df['time'] = df_new
#######################特征选择#########################
from tsfresh import extract_features
extracted_features = extract_features(df, column_id="id", column_sort="time")
a = extracted_features.values.tolist()
import numpy as np
b = np.array(a)
##################OneClassSVM异常检测###################
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.font_manager
from sklearn import svm
b[np.isinf(b)]=0
b[np.isnan(b)]=0
model = svm.OneClassSVM(nu=0.1, kernel="rbf", gamma=0.1)
model.fit(X_train)
y_pred_train = model.predict(b)
n_error_train = y_pred_train[y_pred_train == -1].size
#将检测结果存入新表
g = pd.DataFrame()
g['scores']=model.decision_function(b)
g['anomaly']=y_pred_train
g['id'] = extracted_features.index.values
order = ['id','scores','anomaly']
g = g[order]
g.to_excel('JD_OneClassSVM_tsfresh.xlsx')
#####################OneClassSVM_NJ#####################
########################数据处理########################
import pandas as pd
df = pd.read_excel('4_NJ.xlsx',sheet_name = 'Sheet1')
data = pd.read_excel('OP011_NJ.xlsx',sheet_name = 'Sheet1')
df = df.append(data)
# dic = pd.DataFrame()
# flag = 0
# nums = df.shape[0]#返回行数
r_list = df.keys()[0]
clo_r = df[r_list]#id列
# angle = df[df.keys()[1]]
class_l = list(set(clo_r))
# 设置成“category”数据类型
df['id'] = df['id'].astype('category')
# inplace = True,使 recorder_categories生效
df['id'].cat.reorder_categories(class_l, inplace=True)
# inplace = True,使 df生效
df.sort_values('id', inplace=True)
# 将DataFrame中index重排
df.reset_index(drop=True, inplace=True)
df_new = []
for i in class_l:
time = list(range(1,1+df.loc[df['id'] == i].shape[0]))
df_new = df_new + time
df['time'] = df_new
#######################特征选择#########################
from tsfresh import extract_features
extracted_features = extract_features(df, column_id="id", column_sort="time")
a = extracted_features.values.tolist()
import numpy as np
b = np.array(a)
##################OneClassSVM异常检测###################
#fit the model
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.font_manager
from sklearn import svm
b[np.isinf(b)]=0
b[np.isnan(b)]=0
model = svm.OneClassSVM(nu=0.1, kernel="rbf", gamma=0.1)
model.fit(X_train)
y_pred_train = model.predict(b)
n_error_train = y_pred_train[y_pred_train == -1].size
#将检测结果存入新表
g = pd.DataFrame()
g['scores']=model.decision_function(b)
g['anomaly']=y_pred_train
g['id'] = extracted_features.index.values
order = ['id','scores','anomaly']
g = g[order]
g.to_excel('NJ_OneClassSVM_tsfresh.xlsx')
网友评论