本章内容包括:更改时间字段
//input1
import pandas as pd
import numpy as np
columns = ['user_id','order_dt','order_products','order_amount']
df = pd.read_table('CDNOW_master.txt',names = columns,sep = '\s+')
//markdown user_id:用户ID
//markdown order_dt:购买日期
//markdown orderproducts:购买产品数
//markdown order_amount:购买金额
df.head()
//output1
user_id order_dt order_products order_amount
0 1 19970101 1 11.77
1 2 19970112 1 12.00
2 2 19970112 5 77.00
3 3 19970102 2 20.76
4 3 19970330 2 20.76
//input2
df.info()
//output2
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 69659 entries, 0 to 69658
Data columns (total 4 columns):
user_id 69659 non-null int64
order_dt 69659 non-null int64
order_products 69659 non-null int64
order_amount 69659 non-null float64
dtypes: float64(1), int64(3)
memory usage: 2.1 MB
//input3
df['order_dt'] = pd.to_datetime(df.order_dt,format='%Y%m%d')
df['month'] = df.order_dt.values.astype('datetime64[M]')
df.info()
//output3
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 69659 entries, 0 to 69658
Data columns (total 5 columns):
user_id 69659 non-null int64
order_dt 69659 non-null datetime64[ns]
order_products 69659 non-null int64
order_amount 69659 non-null float64
month 69659 non-null datetime64[ns]
dtypes: datetime64[ns](2), float64(1), int64(2)
memory usage: 2.7 MB
//input4
df.head()
//output4
user_id order_dt order_products order_amount month
0 1 1997-01-01 1 11.77 1997-01-01
1 2 1997-01-12 1 12.00 1997-01-01
2 2 1997-01-12 5 77.00 1997-01-01
3 3 1997-01-02 2 20.76 1997-01-01
4 3 1997-03-30 2 20.76 1997-03-01
网友评论