直接复制到编辑器里面学习就好了,哪个函数不清楚再去了解
例1
import pandas as pd
import numpy as np
data = pd.DataFrame(np.arange(12).reshape(3,4),index=list("123"),columns=list("abcd"))
print(data.head(1))
print(data.info) #z展示dataForm里面的数据信息
print(data.describe()) #展示数据的统计信息
print(data.sort_values(by='c',ascending=False)) #by通过哪个属性进行排序,ascending=True,表示升序
print("*"*100)
print(data.loc['1']['a']) #根据标签索引获取数据
print(data.iloc[1][2]) #根据位置获取数据,这里是第2行第3列,所以显示6
print("*"*100)
'''
------------------------------------------------------
'''
#插入一列
data.insert(4,'e',1)
print(data)
#读取一行
print(data[:1])
#读取多行
print('读取多行\n',data.iloc[[1,2]])
#读取多列
print(data[['a','b']])
#读取a列并转化成数组
print(np.array(data.a))
#转化成一维数组
print(np.array(data[1:]).flatten())
data.loc[4]=[1,2,3,4,5]
print('增加行\n',data)
data['f']=[1,2,3,4]
print('增加列\n',data)
print("*"*100)
#读取列,可以直接指明列的索引值
print(data['b'])
print("*"*100)
print(data[1:]['c'])
print('data.index->******',list(data.index))
print("*"*100)
data2=[{"name":'zhangsan','age':12},{'age':12,'tel':113},{"name":'zhangsan','age':12,'tel':113}]
data2_dataframe=pd.DataFrame(data2)
print(data2_dataframe)
print("*"*100)
mean = data2_dataframe['tel'].mean()
print(mean)
#这里mean如果不知名列会自动计算当前数据列的平均值,也可以指明列名如data2_dataframe['tel'].mean()
data2_dataframe=data2_dataframe.fillna(data2_dataframe.mean())
print(data2_dataframe)
print("*"*100)
data3=[[1,2,3],[4,5,6],[1,2,3],[2,3,4]]
data4=np.array(data3)
print(type(data4))
array = np.array(data3).flatten()
print(set(array))
输出
E:\Python\python.exe E:/ideaproject/pythonProject/matplotlib/day04/pandas03.py
a b c d
1 0 1 2 3
<bound method DataFrame.info of a b c d
1 0 1 2 3
2 4 5 6 7
3 8 9 10 11>
a b c d
count 3.0 3.0 3.0 3.0
mean 4.0 5.0 6.0 7.0
std 4.0 4.0 4.0 4.0
min 0.0 1.0 2.0 3.0
25% 2.0 3.0 4.0 5.0
50% 4.0 5.0 6.0 7.0
75% 6.0 7.0 8.0 9.0
max 8.0 9.0 10.0 11.0
a b c d
3 8 9 10 11
2 4 5 6 7
1 0 1 2 3
****************************************************************************************************
0
6
****************************************************************************************************
a b c d e
1 0 1 2 3 1
2 4 5 6 7 1
3 8 9 10 11 1
a b c d e
1 0 1 2 3 1
读取多行
a b c d e
2 4 5 6 7 1
3 8 9 10 11 1
a b
1 0 1
2 4 5
3 8 9
[0 4 8]
[ 4 5 6 7 1 8 9 10 11 1]
增加行
a b c d e
1 0 1 2 3 1
2 4 5 6 7 1
3 8 9 10 11 1
4 1 2 3 4 5
增加列
a b c d e f
1 0 1 2 3 1 1
2 4 5 6 7 1 2
3 8 9 10 11 1 3
4 1 2 3 4 5 4
****************************************************************************************************
1 1
2 5
3 9
4 2
Name: b, dtype: int64
****************************************************************************************************
2 6
3 10
4 3
Name: c, dtype: int64
data.index->****** ['1', '2', '3', 4]
****************************************************************************************************
name age tel
0 zhangsan 12 NaN
1 NaN 12 113.0
2 zhangsan 12 113.0
****************************************************************************************************
113.0
name age tel
0 zhangsan 12 113.0
1 NaN 12 113.0
2 zhangsan 12 113.0
****************************************************************************************************
<class 'numpy.ndarray'>
{1, 2, 3, 4, 5, 6}
Process finished with exit code 0
例2
import pandas as pd
import numpy as np
df1=pd.DataFrame(np.ones((2,4)),index=['A','B'],columns=list('abcd'))
print('df1\n',df1)
df2=pd.DataFrame(np.zeros((3,3)),index=['A','B','C'],columns=list('xyz'))
print('df2\n',df2)
data1=df1.join(df2)
print('data1\n',data1)
data2=df2.join(df1)
print('data2\n',data2)
输出
x y z
A 0.0 0.0 0.0
B 0.0 0.0 0.0
C 0.0 0.0 0.0
data1
a b c d x y z
A 1.0 1.0 1.0 1.0 0.0 0.0 0.0
B 1.0 1.0 1.0 1.0 0.0 0.0 0.0
data2
x y z a b c d
A 0.0 0.0 0.0 1.0 1.0 1.0 1.0
B 0.0 0.0 0.0 1.0 1.0 1.0 1.0
C 0.0 0.0 0.0 NaN NaN NaN NaN
例3
import numpy as np
import pandas as pd
df1=pd.DataFrame(np.ones((2,4)),index=['A','B'],columns=list('abcd'))
print('df1\n',df1)
df2=pd.DataFrame(np.arange(9).reshape(3,3),columns=list("fax"))
print('df2\n',df2)
#根据a属性进行内连接 交集
merge = df1.merge(df2,on='a')
print('merge\n',merge)
df1.loc['A']['a']=100
merge2 = df1.merge(df2,on='a')
print('merge2\n',merge2)
#外连接 并集
merge_outer=df1.merge(df2,on='a',how='outer')
print('merge_outer\n',merge_outer)
#左连接
merge_left=df1.merge(df2,on='a',how='left')
print('merge_left\n',merge_left)
#有连接
merge_right=df1.merge(df2,on='a',how='right')
print('merge_right\n',merge_right)
输出
df1
a b c d
A 1.0 1.0 1.0 1.0
B 1.0 1.0 1.0 1.0
df2
f a x
0 0 1 2
1 3 4 5
2 6 7 8
merge
a b c d f x
0 1.0 1.0 1.0 1.0 0 2
1 1.0 1.0 1.0 1.0 0 2
merge2
a b c d f x
0 1.0 1.0 1.0 1.0 0 2
merge_outer
a b c d f x
0 100.0 1.0 1.0 1.0 NaN NaN
1 1.0 1.0 1.0 1.0 0.0 2.0
2 4.0 NaN NaN NaN 3.0 5.0
3 7.0 NaN NaN NaN 6.0 8.0
merge_left
a b c d f x
0 100.0 1.0 1.0 1.0 NaN NaN
1 1.0 1.0 1.0 1.0 0.0 2.0
merge_right
a b c d f x
0 1.0 1.0 1.0 1.0 0 2
1 4.0 NaN NaN NaN 3 5
2 7.0 NaN NaN NaN 6 8
Process finished with exit code 0
例4
import pandas as pd
import numpy as np
'''
分组和聚合
'''
data1=pd.DataFrame(np.arange(24).reshape(4,6),index=['a','b','c','d'],columns=list("qwerty"))
print('data1\n',data1)
data1.iloc[[1,2]]=100
print(data1)
grouped = data1.groupby(by='r')
for i,j in grouped:
print(i)
print("*"*100)
print(j)
print("*"*100)
count=grouped['r'].count()
print('count\n',count)
输出
data1
q w e r t y
a 0 1 2 3 4 5
b 6 7 8 9 10 11
c 12 13 14 15 16 17
d 18 19 20 21 22 23
q w e r t y
a 0 1 2 3 4 5
b 100 100 100 100 100 100
c 100 100 100 100 100 100
d 18 19 20 21 22 23
3
****************************************************************************************************
q w e r t y
a 0 1 2 3 4 5
21
****************************************************************************************************
q w e r t y
d 18 19 20 21 22 23
100
****************************************************************************************************
q w e r t y
b 100 100 100 100 100 100
c 100 100 100 100 100 100
****************************************************************************************************
count
r
3 1
21 1
100 2
Name: r, dtype: int64
例5
import pandas as pd
import numpy as np
'''
分组聚合联系和总结
'''
data = pd.DataFrame({'a':range(7),'b':range(7,0,-1),'c':['one','one','one','two','two','two','two'],'d':list("hjklmno")})
print("data****\n",data)
data2=data.set_index(['c','d'],drop=False)
print('data2****\n',data2)
data3=data.set_index(['d','c'])['a']
print('data3*****\n',data3)
data4 = data3.swaplevel()
print('data4*****\n',data4)
data5 = data2.loc['one'].loc['h']
print('data5*****\n',data5)
输出
data****
a b c d
0 0 7 one h
1 1 6 one j
2 2 5 one k
3 3 4 two l
4 4 3 two m
5 5 2 two n
6 6 1 two o
data2****
a b c d
c d
one h 0 7 one h
j 1 6 one j
k 2 5 one k
two l 3 4 two l
m 4 3 two m
n 5 2 two n
o 6 1 two o
data3*****
d c
h one 0
j one 1
k one 2
l two 3
m two 4
n two 5
o two 6
Name: a, dtype: int64
data4*****
c d
one h 0
j 1
k 2
two l 3
m 4
n 5
o 6
Name: a, dtype: int64
data5*****
a 0
b 7
c one
d h
Name: h, dtype: object
Process finished with exit code 0
网友评论