替换异常数据
#替换inf型数据为空
df.replace([np.inf,-np.inf],np.nan,inplace=True)
#将inf或nan型数据替换为0
df.replace([np.inf,-np.inf,np.nan],0,inplace=True)
合并多个df
#左连接
df = pd.merge(df1,df2,how = 'left',on = 'id')
df = pd.merge(df1,df2,how = 'left',left_on = 'id_1',right_on = 'id_2)
多列判断
#多列中至少有一个指标包含测试
df[['商家名称','商品名称', '一级类目', '二级类目','供应商']].apply(lambda x:['正常','测试'][any(['测试' in i for i in x])],axis = 1)
计算时间差
from dateutil.parser import parse
def datediff(start,end):
return (parse(end)-parse(start)).total_seconds()
转json
df.to_json(orient="columns",force_ascii=False)
{"宠物":{"李四":"汪星人","王五":"喵星人"},"年龄":{"李四":25,"王五":23}}
df.to_json(orient="records",force_ascii=False)
{"columns":["宠物","年龄"],"index":["李四","王五"],"data":[["汪星人",25],["喵星人",23]]}
df.to_json(orient="split",force_ascii=False)
[{"宠物":"汪星人","年龄":25},{"宠物":"喵星人","年龄":23}]
df.to_json(orient="index",force_ascii=False)
{"李四":{"宠物":"汪星人","年龄":25},"王五":{"宠物":"喵星人","年龄":23}}
df.to_json(orient="values",force_ascii=False)
[["汪星人",25],["喵星人",23]]
网友评论