最开始我是按照方案①来做的,其实这样很笨。
有时候会发现数据时间序列有缺测,为了与其他数据对应需要把缺测的时间行补全。
### script 1
import numpy as np
import pandas as pd
import matplotlib as mpl
import datetime
path = r"D:\python\data.xlsx"
data = pd.read_excel(path,sheet_name = "筛选后小于等于0",parse_dates=[2],index_col = 2,header=0)
t_index = pd.date_range("2019-12-10 18:00","2020-01-01 7:00",freq="0.5H")
data = data.reindex(t_index)
data.to_csv("data_补缺测.csv")
### 数据处理
data["hour"] = pd.to_datetime(data.index).hour
data = data.set_index([data.index,data['hour']])
cal_q=lambda x:x.describe(percentiles=[0.05,0.25,.5,.75, .95])
data.loc[:,"Fluxm"].unstack().apply(cal_q).to_csv("Fluxm_describe_down.csv")
data.loc[:,"Fluxn"].unstack().apply(cal_q).to_csv("Fluxn_describe_down.csv")
data.loc[:,"V_m"].unstack().apply(cal_q).to_csv("V_m_describe_down.csv")
data.loc[:,"V_n"].unstack().apply(cal_q).to_csv("V_n_down.csv")
data.loc[:,"mean(co2)"].unstack().apply(cal_q).to_csv("BCm_describe_down.csv")
data.loc[:,"mean(ch4)"].unstack().apply(cal_q).to_csv("BCn_describe_down.csv")
### script 2
data.resample("0.5H").asfreq()
网友评论