应工作需要,做数据分析要用到numpy,pandas的第三方库,做做笔记。
1.numpy相关笔记
In [1]: import numpy as np
In [2]: data = np.array([1, 3, 4, 8])
In [3]: data
Out[3]: array([1, 3, 4, 8])
In [4]: data.shape
Out[4]: (4,)
In [5]: data.dtype
Out[5]: dtype('int32')
In [6]: data[1]
Out[6]: 3
In [7]: data[1] = 9
In [8]: data
Out[8]: array([1, 9, 4, 8])
In [9]: data = np.array([[1,2,3],[4,5,6]])
In [10]: data
Out[10]:
array([[1, 2, 3],
[4, 5, 6]])
In [11]: data.shape
Out[11]: (2, 3)
In [12]: data[0,1]
Out[12]: 2
In [13]: np.arange(10)
Out[13]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [14]: data[1,2] = 7
In [15]: data
Out[15]:
array([[1, 2, 3],
[4, 5, 7]])
In [18]: np.arange(5,15)
Out[18]: array([ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
In [19]: data = np.arange(10)
In [20]: data.reshape(2,5)
Out[20]:
array([[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9]])
In [22]: data2 = data.reshape(2,5)
In [23]: data2
Out[23]:
array([[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9]])
In [24]: data[4] = 10
In [25]: data
Out[25]: array([ 0, 1, 2, 3, 10, 5, 6, 7, 8, 9])
In [26]: data2
Out[26]:
array([[ 0, 1, 2, 3, 10],
[ 5, 6, 7, 8, 9]])
In [28]: np.zeros((2,2))
Out[28]:
array([[ 0., 0.],
[ 0., 0.]])
In [29]: np.ones((2,3,3))
Out[29]:
array([[[ 1., 1., 1.],
[ 1., 1., 1.],
[ 1., 1., 1.]],
[[ 1., 1., 1.],
[ 1., 1., 1.],
[ 1., 1., 1.]]])
In [30]: np.eye(4)
Out[30]:
array([[ 1., 0., 0., 0.],
[ 0., 1., 0., 0.],
[ 0., 0., 1., 0.],
[ 0., 0., 0., 1.]])
In [31]: np.arange(16).reshape(4,4)
Out[31]:
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11],
[12, 13, 14, 15]])
In [37]: data = np.arange(100, step=10)
In [38]: data
Out[38]: array([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])
In [39]: data[2]
Out[39]: 20
In [40]: data[2:5]
Out[40]: array([20, 30, 40])
In [41]: data[:3]
Out[41]: array([ 0, 10, 20])
In [42]: data[5:] = -1
In [43]: data
Out[43]: array([ 0, 10, 20, 30, 40, -1, -1, -1, -1, -1])
In [44]: data = np.arange(16).reshape(4,4)
In [45]: data
Out[45]:
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11],
[12, 13, 14, 15]])
In [46]: data[1]
Out[46]: array([4, 5, 6, 7])
In [47]: data[1:3]
Out[47]:
array([[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
In [48]: data[:, 2:4]
Out[48]:
array([[ 2, 3],
[ 6, 7],
[10, 11],
[14, 15]])
In [49]: data[[1,3],[2,3]]
Out[49]: array([ 6, 15])
In [53]: print(data[1,2],data[3,3])
6 15
In [54]: data > 10
Out[54]:
array([[False, False, False, False],
[False, False, False, False],
[False, False, False, True],
[ True, True, True, True]], dtype=bool)
In [55]: data[data > 10]
Out[55]: array([11, 12, 13, 14, 15])
In [56]: data[data % 2 == 0]
Out[56]: array([ 0, 2, 4, 6, 8, 10, 12, 14])
In [57]: x = np.arange(1,5).reshape(2,2)
In [58]: x
Out[58]:
array([[1, 2],
[3, 4]])
In [59]: y = np.arange(5,9).reshape(2,2)
In [60]: y
Out[60]:
array([[5, 6],
[7, 8]])
In [61]: x + y
Out[61]:
array([[ 6, 8],
[10, 12]])
In [62]: x - y
Out[62]:
array([[-4, -4],
[-4, -4]])
In [63]: x * y
Out[63]:
array([[ 5, 12],
[21, 32]])
In [65]: x.dot(y)
Out[65]:
array([[19, 22],
[43, 50]])
In [66]: x / y
Out[66]:
array([[ 0.2 , 0.33333333],
[ 0.42857143, 0.5 ]])
In [67]: np.sqrt(x)
Out[67]:
array([[ 1. , 1.41421356],
[ 1.73205081, 2. ]])
In [68]: x.T
Out[68]:
array([[1, 3],
[2, 4]])
In [69]: np.linspace(1,10)
Out[69]:
array([ 1. , 1.18367347, 1.36734694, 1.55102041,
1.73469388, 1.91836735, 2.10204082, 2.28571429,
2.46938776, 2.65306122, 2.83673469, 3.02040816,
3.20408163, 3.3877551 , 3.57142857, 3.75510204,
3.93877551, 4.12244898, 4.30612245, 4.48979592,
4.67346939, 4.85714286, 5.04081633, 5.2244898 ,
5.40816327, 5.59183673, 5.7755102 , 5.95918367,
6.14285714, 6.32653061, 6.51020408, 6.69387755,
6.87755102, 7.06122449, 7.24489796, 7.42857143,
7.6122449 , 7.79591837, 7.97959184, 8.16326531,
8.34693878, 8.53061224, 8.71428571, 8.89795918,
9.08163265, 9.26530612, 9.44897959, 9.63265306,
9.81632653, 10. ])
In [70]: np.linspace(1,10, num=200)
Out[70]:
array([ 1. , 1.04522613, 1.09045226, 1.13567839,
1.18090452, 1.22613065, 1.27135678, 1.31658291,
1.36180905, 1.40703518, 1.45226131, 1.49748744,
1.54271357, 1.5879397 , 1.63316583, 1.67839196,
1.72361809, 1.76884422, 1.81407035, 1.85929648,
1.90452261, 1.94974874, 1.99497487, 2.04020101,
2.08542714, 2.13065327, 2.1758794 , 2.22110553,
2.26633166, 2.31155779, 2.35678392, 2.40201005,
2.44723618, 2.49246231, 2.53768844, 2.58291457,
2.6281407 , 2.67336683, 2.71859296, 2.7638191 ,
2.80904523, 2.85427136, 2.89949749, 2.94472362,
2.98994975, 3.03517588, 3.08040201, 3.12562814,
3.17085427, 3.2160804 , 3.26130653, 3.30653266,
3.35175879, 3.39698492, 3.44221106, 3.48743719,
3.53266332, 3.57788945, 3.62311558, 3.66834171,
3.71356784, 3.75879397, 3.8040201 , 3.84924623,
3.89447236, 3.93969849, 3.98492462, 4.03015075,
4.07537688, 4.12060302, 4.16582915, 4.21105528,
4.25628141, 4.30150754, 4.34673367, 4.3919598 ,
4.43718593, 4.48241206, 4.52763819, 4.57286432,
4.61809045, 4.66331658, 4.70854271, 4.75376884,
4.79899497, 4.84422111, 4.88944724, 4.93467337,
4.9798995 , 5.02512563, 5.07035176, 5.11557789,
5.16080402, 5.20603015, 5.25125628, 5.29648241,
5.34170854, 5.38693467, 5.4321608 , 5.47738693,
5.52261307, 5.5678392 , 5.61306533, 5.65829146,
5.70351759, 5.74874372, 5.79396985, 5.83919598,
5.88442211, 5.92964824, 5.97487437, 6.0201005 ,
6.06532663, 6.11055276, 6.15577889, 6.20100503,
6.24623116, 6.29145729, 6.33668342, 6.38190955,
6.42713568, 6.47236181, 6.51758794, 6.56281407,
6.6080402 , 6.65326633, 6.69849246, 6.74371859,
6.78894472, 6.83417085, 6.87939698, 6.92462312,
6.96984925, 7.01507538, 7.06030151, 7.10552764,
7.15075377, 7.1959799 , 7.24120603, 7.28643216,
7.33165829, 7.37688442, 7.42211055, 7.46733668,
7.51256281, 7.55778894, 7.60301508, 7.64824121,
7.69346734, 7.73869347, 7.7839196 , 7.82914573,
7.87437186, 7.91959799, 7.96482412, 8.01005025,
8.05527638, 8.10050251, 8.14572864, 8.19095477,
8.2361809 , 8.28140704, 8.32663317, 8.3718593 ,
8.41708543, 8.46231156, 8.50753769, 8.55276382,
8.59798995, 8.64321608, 8.68844221, 8.73366834,
8.77889447, 8.8241206 , 8.86934673, 8.91457286,
8.95979899, 9.00502513, 9.05025126, 9.09547739,
9.14070352, 9.18592965, 9.23115578, 9.27638191,
9.32160804, 9.36683417, 9.4120603 , 9.45728643,
9.50251256, 9.54773869, 9.59296482, 9.63819095,
9.68341709, 9.72864322, 9.77386935, 9.81909548,
9.86432161, 9.90954774, 9.95477387, 10. ])
In [71]: x = np.linspace(0,2*np.pi,num=50)
In [72]: x
Out[72]:
array([ 0. , 0.12822827, 0.25645654, 0.38468481, 0.51291309,
0.64114136, 0.76936963, 0.8975979 , 1.02582617, 1.15405444,
1.28228272, 1.41051099, 1.53873926, 1.66696753, 1.7951958 ,
1.92342407, 2.05165235, 2.17988062, 2.30810889, 2.43633716,
2.56456543, 2.6927937 , 2.82102197, 2.94925025, 3.07747852,
3.20570679, 3.33393506, 3.46216333, 3.5903916 , 3.71861988,
3.84684815, 3.97507642, 4.10330469, 4.23153296, 4.35976123,
4.48798951, 4.61621778, 4.74444605, 4.87267432, 5.00090259,
5.12913086, 5.25735913, 5.38558741, 5.51381568, 5.64204395,
5.77027222, 5.89850049, 6.02672876, 6.15495704, 6.28318531])
In [74]: y
Out[74]:
array([ 0.00000000e+00, 1.27877162e-01, 2.53654584e-01,
3.75267005e-01, 4.90717552e-01, 5.98110530e-01,
6.95682551e-01, 7.81831482e-01, 8.55142763e-01,
9.14412623e-01, 9.58667853e-01, 9.87181783e-01,
9.99486216e-01, 9.95379113e-01, 9.74927912e-01,
9.38468422e-01, 8.86599306e-01, 8.20172255e-01,
7.40277997e-01, 6.48228395e-01, 5.45534901e-01,
4.33883739e-01, 3.15108218e-01, 1.91158629e-01,
6.40702200e-02, -6.40702200e-02, -1.91158629e-01,
-3.15108218e-01, -4.33883739e-01, -5.45534901e-01,
-6.48228395e-01, -7.40277997e-01, -8.20172255e-01,
-8.86599306e-01, -9.38468422e-01, -9.74927912e-01,
-9.95379113e-01, -9.99486216e-01, -9.87181783e-01,
-9.58667853e-01, -9.14412623e-01, -8.55142763e-01,
-7.81831482e-01, -6.95682551e-01, -5.98110530e-01,
-4.90717552e-01, -3.75267005e-01, -2.53654584e-01,
-1.27877162e-01, -2.44929360e-16])
2.pandas相关笔记
In [1]: import pandas as pd
In [2]: import numpy as np
In [3]: s = pd.Series([1, 3, 5, np.NaN, 8, 4])
In [4]: s
Out[4]:
0 1.0
1 3.0
2 5.0
3 NaN
4 8.0
5 4.0
dtype: float64
In [5]: dates = pd.date_range('20160301', periods=6)
In [6]: dates
Out[6]:
DatetimeIndex(['2016-03-01', '2016-03-02', '2016-03-03', '2016-03-04',
'2016-03-05', '2016-03-06'],
dtype='datetime64[ns]', freq='D')
In [7]: data = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
In [8]: data
Out[8]:
A B C D
2016-03-01 2.027839 -0.292588 0.051875 -0.297805
2016-03-02 1.098002 -0.206494 0.038982 0.043550
2016-03-03 1.152904 0.103704 0.393410 -0.421418
2016-03-04 -0.379631 0.165321 -2.121213 1.706808
2016-03-05 0.020574 -1.697299 1.230499 1.826499
2016-03-06 -0.727000 -0.072301 -1.511767 0.131407
In [9]: data.shape
Out[9]: (6, 4)
In [10]: data.values
Out[10]:
array([[ 2.02783946, -0.29258761, 0.05187483, -0.2978054 ],
[ 1.09800225, -0.20649433, 0.03898198, 0.04355011],
[ 1.15290424, 0.10370374, 0.39340982, -0.42141752],
[-0.37963074, 0.1653215 , -2.12121315, 1.70680791],
[ 0.02057416, -1.69729895, 1.23049898, 1.82649881],
[-0.72699977, -0.0723013 , -1.5117672 , 0.13140707]])
In [13]: d = {'A': 1, 'B': pd.Timestamp('20130301'), 'C': [i for i in range(4)], 'D': np.arange(4)}
In [14]: d
Out[14]:
{'A': 1,
'B': Timestamp('2013-03-01 00:00:00'),
'C': [0, 1, 2, 3],
'D': array([0, 1, 2, 3])}
In [16]: df = pd.DataFrame(d)
In [17]: df
Out[17]:
A B C D
0 1 2013-03-01 0 0
1 1 2013-03-01 1 1
2 1 2013-03-01 2 2
3 1 2013-03-01 3 3
In [18]: df.dtypes
Out[18]:
A int64
B datetime64[ns]
C int64
D int32
dtype: object
In [19]: df.A
Out[19]:
0 1
1 1
2 1
3 1
Name: A, dtype: int64
In [20]: df.C
Out[20]:
0 0
1 1
2 2
3 3
Name: C, dtype: int64
In [21]: df.B
Out[21]:
0 2013-03-01
1 2013-03-01
2 2013-03-01
3 2013-03-01
Name: B, dtype: datetime64[ns]
In [22]: df.D
Out[22]:
0 0
1 1
2 2
3 3
Name: D, dtype: int32
In [23]: type(df.B)
Out[23]: pandas.core.series.Series
In [24]: data.head()
Out[24]:
A B C D
2016-03-01 2.027839 -0.292588 0.051875 -0.297805
2016-03-02 1.098002 -0.206494 0.038982 0.043550
2016-03-03 1.152904 0.103704 0.393410 -0.421418
2016-03-04 -0.379631 0.165321 -2.121213 1.706808
2016-03-05 0.020574 -1.697299 1.230499 1.826499
In [25]: data.tail()
Out[25]:
A B C D
2016-03-02 1.098002 -0.206494 0.038982 0.043550
2016-03-03 1.152904 0.103704 0.393410 -0.421418
2016-03-04 -0.379631 0.165321 -2.121213 1.706808
2016-03-05 0.020574 -1.697299 1.230499 1.826499
2016-03-06 -0.727000 -0.072301 -1.511767 0.131407
In [26]: data.head(2)
Out[26]:
A B C D
2016-03-01 2.027839 -0.292588 0.051875 -0.297805
2016-03-02 1.098002 -0.206494 0.038982 0.043550
In [27]: data.index
Out[27]:
DatetimeIndex(['2016-03-01', '2016-03-02', '2016-03-03', '2016-03-04',
'2016-03-05', '2016-03-06'],
dtype='datetime64[ns]', freq='D')
In [28]: data.describe()
Out[28]:
A B C D
count 6.000000 6.000000 6.000000 6.000000
mean 0.532115 -0.333276 -0.319702 0.498173
std 1.060295 0.690789 1.252584 1.004557
min -0.727000 -1.697299 -2.121213 -0.421418
25% -0.279580 -0.271064 -1.124080 -0.212467
50% 0.559288 -0.139398 0.045428 0.087479
75% 1.139179 0.059702 0.308026 1.312958
max 2.027839 0.165321 1.230499 1.826499
In [29]: data.T
Out[29]:
2016-03-01 2016-03-02 2016-03-03 2016-03-04 2016-03-05 2016-03-06
A 2.027839 1.098002 1.152904 -0.379631 0.020574 -0.727000
B -0.292588 -0.206494 0.103704 0.165321 -1.697299 -0.072301
C 0.051875 0.038982 0.393410 -2.121213 1.230499 -1.511767
D -0.297805 0.043550 -0.421418 1.706808 1.826499 0.131407
In [30]: data.T.shape
Out[30]: (4, 6)
In [31]: data.shape
Out[31]: (6, 4)
In [32]: data.sort_index(axis=1)
Out[32]:
A B C D
2016-03-01 2.027839 -0.292588 0.051875 -0.297805
2016-03-02 1.098002 -0.206494 0.038982 0.043550
2016-03-03 1.152904 0.103704 0.393410 -0.421418
2016-03-04 -0.379631 0.165321 -2.121213 1.706808
2016-03-05 0.020574 -1.697299 1.230499 1.826499
2016-03-06 -0.727000 -0.072301 -1.511767 0.131407
In [33]: data.sort_index(axis=1, ascending=False)
Out[33]:
D C B A
2016-03-01 -0.297805 0.051875 -0.292588 2.027839
2016-03-02 0.043550 0.038982 -0.206494 1.098002
2016-03-03 -0.421418 0.393410 0.103704 1.152904
2016-03-04 1.706808 -2.121213 0.165321 -0.379631
2016-03-05 1.826499 1.230499 -1.697299 0.020574
2016-03-06 0.131407 -1.511767 -0.072301 -0.727000
In [34]: data.sort_index(axis=0, ascending=False)
Out[34]:
A B C D
2016-03-06 -0.727000 -0.072301 -1.511767 0.131407
2016-03-05 0.020574 -1.697299 1.230499 1.826499
2016-03-04 -0.379631 0.165321 -2.121213 1.706808
2016-03-03 1.152904 0.103704 0.393410 -0.421418
2016-03-02 1.098002 -0.206494 0.038982 0.043550
2016-03-01 2.027839 -0.292588 0.051875 -0.297805
In [35]: data.sort_values(by='A')
Out[35]:
A B C D
2016-03-06 -0.727000 -0.072301 -1.511767 0.131407
2016-03-04 -0.379631 0.165321 -2.121213 1.706808
2016-03-05 0.020574 -1.697299 1.230499 1.826499
2016-03-02 1.098002 -0.206494 0.038982 0.043550
2016-03-03 1.152904 0.103704 0.393410 -0.421418
2016-03-01 2.027839 -0.292588 0.051875 -0.297805
In [36]: data['A']
Out[36]:
2016-03-01 2.027839
2016-03-02 1.098002
2016-03-03 1.152904
2016-03-04 -0.379631
2016-03-05 0.020574
2016-03-06 -0.727000
Freq: D, Name: A, dtype: float64
In [37]: data.A
Out[37]:
2016-03-01 2.027839
2016-03-02 1.098002
2016-03-03 1.152904
2016-03-04 -0.379631
2016-03-05 0.020574
2016-03-06 -0.727000
Freq: D, Name: A, dtype: float64
In [39]: data[2:4]
Out[39]:
A B C D
2016-03-03 1.152904 0.103704 0.393410 -0.421418
2016-03-04 -0.379631 0.165321 -2.121213 1.706808
In [41]: data['20160302':'20160305']
Out[41]:
A B C D
2016-03-02 1.098002 -0.206494 0.038982 0.043550
2016-03-03 1.152904 0.103704 0.393410 -0.421418
2016-03-04 -0.379631 0.165321 -2.121213 1.706808
2016-03-05 0.020574 -1.697299 1.230499 1.826499
In [42]: data.loc['20160302':'20160305']
Out[42]:
A B C D
2016-03-02 1.098002 -0.206494 0.038982 0.043550
2016-03-03 1.152904 0.103704 0.393410 -0.421418
2016-03-04 -0.379631 0.165321 -2.121213 1.706808
2016-03-05 0.020574 -1.697299 1.230499 1.826499
In [43]: data.iloc[2:4]
Out[43]:
A B C D
2016-03-03 1.152904 0.103704 0.393410 -0.421418
2016-03-04 -0.379631 0.165321 -2.121213 1.706808
In [44]: data.iloc[:,1:3]
Out[44]:
B C
2016-03-01 -0.292588 0.051875
2016-03-02 -0.206494 0.038982
2016-03-03 0.103704 0.393410
2016-03-04 0.165321 -2.121213
2016-03-05 -1.697299 1.230499
2016-03-06 -0.072301 -1.511767
In [45]: data.loc[:,'B':'C']
Out[45]:
B C
2016-03-01 -0.292588 0.051875
2016-03-02 -0.206494 0.038982
2016-03-03 0.103704 0.393410
2016-03-04 0.165321 -2.121213
2016-03-05 -1.697299 1.230499
2016-03-06 -0.072301 -1.511767
In [46]: data.loc['20160302':'20160305', ['B','C']]
Out[46]:
B C
2016-03-02 -0.206494 0.038982
2016-03-03 0.103704 0.393410
2016-03-04 0.165321 -2.121213
2016-03-05 -1.697299 1.230499
In [48]: data.iloc[1:3, 1:3]
Out[48]:
B C
2016-03-02 -0.206494 0.038982
2016-03-03 0.103704 0.393410
In [49]: data.loc['20160302', 'B']
Out[49]: -0.20649432992272151
In [50]: data.at[pd.Timestamp('20160302'), 'B']
Out[50]: -0.20649432992272151
In [51]: data.iloc[1]
Out[51]:
A 1.098002
B -0.206494
C 0.038982
D 0.043550
Name: 2016-03-02 00:00:00, dtype: float64
In [52]: data.iloc[1:3]
Out[52]:
A B C D
2016-03-02 1.098002 -0.206494 0.038982 0.043550
2016-03-03 1.152904 0.103704 0.393410 -0.421418
In [53]: data.iloc[:,1:3]
Out[53]:
B C
2016-03-01 -0.292588 0.051875
2016-03-02 -0.206494 0.038982
2016-03-03 0.103704 0.393410
2016-03-04 0.165321 -2.121213
2016-03-05 -1.697299 1.230499
2016-03-06 -0.072301 -1.511767
In [54]: data.iloc[1,1]
Out[54]: -0.20649432992272151
In [55]: data.iat[1,1]
Out[55]: -0.20649432992272151
In [56]: %timeit data.iloc[1,1]
8.08 µs ± 17.4 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
In [57]: %timeit data.iat[1,1]
5.38 µs ± 10.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
In [58]: data[data.A > 0]
Out[58]:
A B C D
2016-03-01 2.027839 -0.292588 0.051875 -0.297805
2016-03-02 1.098002 -0.206494 0.038982 0.043550
2016-03-03 1.152904 0.103704 0.393410 -0.421418
2016-03-05 0.020574 -1.697299 1.230499 1.826499
In [59]: data[data > 0]
Out[59]:
A B C D
2016-03-01 2.027839 NaN 0.051875 NaN
2016-03-02 1.098002 NaN 0.038982 0.043550
2016-03-03 1.152904 0.103704 0.393410 NaN
2016-03-04 NaN 0.165321 NaN 1.706808
2016-03-05 0.020574 NaN 1.230499 1.826499
2016-03-06 NaN NaN NaN 0.131407
In [60]: data2 = data.copy()
In [61]: data2
Out[61]:
A B C D
2016-03-01 2.027839 -0.292588 0.051875 -0.297805
2016-03-02 1.098002 -0.206494 0.038982 0.043550
2016-03-03 1.152904 0.103704 0.393410 -0.421418
2016-03-04 -0.379631 0.165321 -2.121213 1.706808
2016-03-05 0.020574 -1.697299 1.230499 1.826499
2016-03-06 -0.727000 -0.072301 -1.511767 0.131407
In [62]: tag = ['a'] * 2 + ['b'] * 2 + ['c'] * 2
In [63]: data2['TAG'] = tag
In [64]: data2
Out[64]:
A B C D TAG
2016-03-01 2.027839 -0.292588 0.051875 -0.297805 a
2016-03-02 1.098002 -0.206494 0.038982 0.043550 a
2016-03-03 1.152904 0.103704 0.393410 -0.421418 b
2016-03-04 -0.379631 0.165321 -2.121213 1.706808 b
2016-03-05 0.020574 -1.697299 1.230499 1.826499 c
2016-03-06 -0.727000 -0.072301 -1.511767 0.131407 c
In [66]: data2[data2.TAG.isin(['a','c'])]
Out[66]:
A B C D TAG
2016-03-01 2.027839 -0.292588 0.051875 -0.297805 a
2016-03-02 1.098002 -0.206494 0.038982 0.043550 a
2016-03-05 0.020574 -1.697299 1.230499 1.826499 c
2016-03-06 -0.727000 -0.072301 -1.511767 0.131407 c
In [68]: data.iat[0,0] = 100
In [69]: data
Out[69]:
A B C D
2016-03-01 100.000000 -0.292588 0.051875 -0.297805
2016-03-02 1.098002 -0.206494 0.038982 0.043550
2016-03-03 1.152904 0.103704 0.393410 -0.421418
2016-03-04 -0.379631 0.165321 -2.121213 1.706808
2016-03-05 0.020574 -1.697299 1.230499 1.826499
2016-03-06 -0.727000 -0.072301 -1.511767 0.131407
In [70]: data.A = range(6)
In [71]: data
Out[71]:
A B C D
2016-03-01 0 -0.292588 0.051875 -0.297805
2016-03-02 1 -0.206494 0.038982 0.043550
2016-03-03 2 0.103704 0.393410 -0.421418
2016-03-04 3 0.165321 -2.121213 1.706808
2016-03-05 4 -1.697299 1.230499 1.826499
2016-03-06 5 -0.072301 -1.511767 0.131407
In [72]: data.B = 100
In [73]: data
Out[73]:
A B C D
2016-03-01 0 100 0.051875 -0.297805
2016-03-02 1 100 0.038982 0.043550
2016-03-03 2 100 0.393410 -0.421418
2016-03-04 3 100 -2.121213 1.706808
2016-03-05 4 100 1.230499 1.826499
2016-03-06 5 100 -1.511767 0.131407
In [74]: data.iloc[:,2:4] = 1000
In [75]: data
Out[75]:
A B C D
2016-03-01 0 100 1000 1000
2016-03-02 1 100 1000 1000
2016-03-03 2 100 1000 1000
2016-03-04 3 100 1000 1000
2016-03-05 4 100 1000 1000
2016-03-06 5 100 1000 1000
3.ipython pandas
In [2]: import pandas as pd^M
...: import numpy as np^M
...: import matplotlib.pyplot as plt
...:
...:
In [3]: dates = pd.date_range('20190501', periods=6)^M
...: df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))^M
...: df
...:
...:
Out[3]:
A B C D
2019-05-01 -1.203323 -1.023017 0.994808 1.845361
2019-05-02 0.413818 0.350563 0.718125 -0.100747
2019-05-03 -0.976996 0.298922 -0.563673 1.431109
2019-05-04 -1.395960 -0.415227 -1.583835 0.022370
2019-05-05 -2.466317 -0.819741 -0.417125 -2.290065
2019-05-06 1.290431 -1.629373 -1.530487 -1.452107
In [4]: df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])^M
...: df1
...:
...:
Out[4]:
A B C D E
2019-05-01 -1.203323 -1.023017 0.994808 1.845361 NaN
2019-05-02 0.413818 0.350563 0.718125 -0.100747 NaN
2019-05-03 -0.976996 0.298922 -0.563673 1.431109 NaN
2019-05-04 -1.395960 -0.415227 -1.583835 0.022370 NaN
In [5]: df1.iloc[1:3,4] = 2^M
...: df1
...:
...:
Out[5]:
A B C D E
2019-05-01 -1.203323 -1.023017 0.994808 1.845361 NaN
2019-05-02 0.413818 0.350563 0.718125 -0.100747 2.0
2019-05-03 -0.976996 0.298922 -0.563673 1.431109 2.0
2019-05-04 -1.395960 -0.415227 -1.583835 0.022370 NaN
In [6]: df1.dropna()
Out[6]:
A B C D E
2019-05-02 0.413818 0.350563 0.718125 -0.100747 2.0
2019-05-03 -0.976996 0.298922 -0.563673 1.431109 2.0
In [7]: df1.fillna(value=5)
Out[7]:
A B C D E
2019-05-01 -1.203323 -1.023017 0.994808 1.845361 5.0
2019-05-02 0.413818 0.350563 0.718125 -0.100747 2.0
2019-05-03 -0.976996 0.298922 -0.563673 1.431109 2.0
2019-05-04 -1.395960 -0.415227 -1.583835 0.022370 5.0
In [8]: pd.isnull(df1)
Out[8]:
A B C D E
2019-05-01 False False False False True
2019-05-02 False False False False False
2019-05-03 False False False False False
2019-05-04 False False False False True
In [9]: pd.isnull(df1).any().any()
Out[9]: True
In [10]: df1.mean()
Out[10]:
A -0.790615
B -0.197190
C -0.108644
D 0.799523
E 2.000000
dtype: float64
In [11]: df1.mean(axis=1)
Out[11]:
2019-05-01 0.153457
2019-05-02 0.676352
2019-05-03 0.437872
2019-05-04 -0.843163
Freq: D, dtype: float64
In [12]: df1.cumsum()
Out[12]:
A B C D E
2019-05-01 -1.203323 -1.023017 0.994808 1.845361 NaN
2019-05-02 -0.789505 -0.672454 1.712933 1.744614 2.0
2019-05-03 -1.766501 -0.373532 1.149259 3.175724 4.0
2019-05-04 -3.162461 -0.788759 -0.434575 3.198094 NaN
In [13]: s = pd.Series([1,3,5,np.nan,6,8],index=dates).shift(2)^M
...: s
...:
...:
Out[13]:
2019-05-01 NaN
2019-05-02 NaN
2019-05-03 1.0
2019-05-04 3.0
2019-05-05 5.0
2019-05-06 NaN
Freq: D, dtype: float64
In [14]: df
Out[14]:
A B C D
2019-05-01 -1.203323 -1.023017 0.994808 1.845361
2019-05-02 0.413818 0.350563 0.718125 -0.100747
2019-05-03 -0.976996 0.298922 -0.563673 1.431109
2019-05-04 -1.395960 -0.415227 -1.583835 0.022370
2019-05-05 -2.466317 -0.819741 -0.417125 -2.290065
2019-05-06 1.290431 -1.629373 -1.530487 -1.452107
In [15]: df.sub(s,axis='index')
Out[15]:
A B C D
2019-05-01 NaN NaN NaN NaN
2019-05-02 NaN NaN NaN NaN
2019-05-03 -1.976996 -0.701078 -1.563673 0.431109
2019-05-04 -4.395960 -3.415227 -4.583835 -2.977630
2019-05-05 -7.466317 -5.819741 -5.417125 -7.290065
2019-05-06 NaN NaN NaN NaN
In [16]: df.apply(np.cumsum)
Out[16]:
A B C D
2019-05-01 -1.203323 -1.023017 0.994808 1.845361
2019-05-02 -0.789505 -0.672454 1.712933 1.744614
2019-05-03 -1.766501 -0.373532 1.149259 3.175724
2019-05-04 -3.162461 -0.788759 -0.434575 3.198094
2019-05-05 -5.628777 -1.608500 -0.851700 0.908028
2019-05-06 -4.338346 -3.237874 -2.382187 -0.544078
In [17]: df.apply(lambda x : x.max() - x.min())
Out[17]:
A 3.756748
B 1.979937
C 2.578643
D 4.135427
dtype: float64
In [18]: def _sum(x):^M
...: print(type(x))^M
...: return x.sum()^M
...: df.apply(_sum)
...:
...:
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
Out[18]:
A -4.338346
B -3.237874
C -2.382187
D -0.544078
dtype: float64
In [19]: s = pd.Series(np.random.randint(10,20,size=20))^M
...: s
...:
...:
Out[19]:
0 19
1 16
2 11
3 17
4 13
5 14
6 13
7 11
8 17
9 12
10 19
11 10
12 19
13 18
14 12
15 10
16 19
17 12
18 17
19 10
dtype: int32
In [20]: s.value_counts()
Out[20]:
19 4
17 3
12 3
10 3
13 2
11 2
18 1
16 1
14 1
dtype: int64
In [21]: s.mode()
Out[21]:
0 19
dtype: int32
In [22]: df = pd.DataFrame(np.random.randn(10,4), columns=list('ABCD'))^M
...: df
...:
...:
Out[22]:
A B C D
0 1.852904 0.224001 -0.873486 -0.098751
1 -0.005724 -0.433029 0.059684 -0.424876
2 0.382297 1.121435 -0.572000 0.624490
3 -1.304039 -0.523107 1.759417 0.367895
4 0.030497 1.768304 0.242685 -0.921089
5 -0.086144 -0.516301 0.704865 0.195875
6 -0.015493 -1.004401 0.775551 -0.349997
7 0.542791 -2.144951 0.208070 0.930271
8 1.709792 0.170925 -0.102421 0.544754
9 -1.135963 1.863820 -0.789279 -1.587587
In [23]: df.iloc[:3]
Out[23]:
A B C D
0 1.852904 0.224001 -0.873486 -0.098751
1 -0.005724 -0.433029 0.059684 -0.424876
2 0.382297 1.121435 -0.572000 0.624490
In [24]: df.iloc[3:7]
Out[24]:
A B C D
3 -1.304039 -0.523107 1.759417 0.367895
4 0.030497 1.768304 0.242685 -0.921089
5 -0.086144 -0.516301 0.704865 0.195875
6 -0.015493 -1.004401 0.775551 -0.349997
In [25]: df.iloc[7:]
Out[25]:
A B C D
7 0.542791 -2.144951 0.208070 0.930271
8 1.709792 0.170925 -0.102421 0.544754
9 -1.135963 1.863820 -0.789279 -1.587587
In [26]: df1 = pd.concat([df.iloc[:3], df.iloc[3:7], df.iloc[7:]])^M
...: df1
...:
...:
Out[26]:
A B C D
0 1.852904 0.224001 -0.873486 -0.098751
1 -0.005724 -0.433029 0.059684 -0.424876
2 0.382297 1.121435 -0.572000 0.624490
3 -1.304039 -0.523107 1.759417 0.367895
4 0.030497 1.768304 0.242685 -0.921089
5 -0.086144 -0.516301 0.704865 0.195875
6 -0.015493 -1.004401 0.775551 -0.349997
7 0.542791 -2.144951 0.208070 0.930271
8 1.709792 0.170925 -0.102421 0.544754
9 -1.135963 1.863820 -0.789279 -1.587587
In [27]: df == df1
Out[27]:
A B C D
0 True True True True
1 True True True True
2 True True True True
3 True True True True
4 True True True True
5 True True True True
6 True True True True
7 True True True True
8 True True True True
9 True True True True
In [28]: (df == df1).all().all()
Out[28]: True
In [29]: left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1,2]})^M
...: right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4,5]})
...:
...:
In [30]: left
Out[30]:
key lval
0 foo 1
1 foo 2
In [31]: right
Out[31]:
key rval
0 foo 4
1 foo 5
In [32]: pd.merge(left,right,on='key')
Out[32]:
key lval rval
0 foo 1 4
1 foo 1 5
2 foo 2 4
3 foo 2 5
In [33]: s = pd.Series(np.random.randint(1,5,size=4), index=list('ABCD'))^M
...: s
...:
...:
Out[33]:
A 4
B 4
C 3
D 3
dtype: int32
In [34]: df.append(s, ignore_index=True)
Out[34]:
A B C D
0 1.852904 0.224001 -0.873486 -0.098751
1 -0.005724 -0.433029 0.059684 -0.424876
2 0.382297 1.121435 -0.572000 0.624490
3 -1.304039 -0.523107 1.759417 0.367895
4 0.030497 1.768304 0.242685 -0.921089
5 -0.086144 -0.516301 0.704865 0.195875
6 -0.015493 -1.004401 0.775551 -0.349997
7 0.542791 -2.144951 0.208070 0.930271
8 1.709792 0.170925 -0.102421 0.544754
9 -1.135963 1.863820 -0.789279 -1.587587
10 4.000000 4.000000 3.000000 3.000000
In [35]: s = pd.Series(np.random.randint(1,5,size=5), index=list('ABCDE'))^M
...: s
...:
...:
Out[35]:
A 1
B 2
C 3
D 3
E 1
dtype: int32
In [36]: df.append(s, ignore_index=True)
Out[36]:
A B C D E
0 1.852904 0.224001 -0.873486 -0.098751 NaN
1 -0.005724 -0.433029 0.059684 -0.424876 NaN
2 0.382297 1.121435 -0.572000 0.624490 NaN
3 -1.304039 -0.523107 1.759417 0.367895 NaN
4 0.030497 1.768304 0.242685 -0.921089 NaN
5 -0.086144 -0.516301 0.704865 0.195875 NaN
6 -0.015493 -1.004401 0.775551 -0.349997 NaN
7 0.542791 -2.144951 0.208070 0.930271 NaN
8 1.709792 0.170925 -0.102421 0.544754 NaN
9 -1.135963 1.863820 -0.789279 -1.587587 NaN
10 1.000000 2.000000 3.000000 3.000000 1.0
In [37]: df = pd.DataFrame({^M
...: 'A': ['foo', 'bar', 'foo', 'bar', 'bar', 'foo', 'bar', 'foo'],^M
...: 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],^M
...: 'C': np.random.randn(8),^M
...: 'D': np.random.randn(8)^M
...: })^M
...: df
...:
...:
Out[37]:
A B C D
0 foo one 0.833845 0.260860
1 bar one -1.066645 -0.240832
2 foo two 0.470603 -0.276248
3 bar three 2.288018 0.661833
4 bar two -1.213533 -1.602429
5 foo two -1.439622 0.518569
6 bar one 0.012314 0.789365
7 foo three 0.280255 0.611326
In [38]: df.groupby('A').sum()
Out[38]:
C D
A
bar 0.020155 -0.392063
foo 0.145082 1.114506
In [39]: df.groupby(['A', 'B']).sum()
Out[39]:
C D
A B
bar one -1.054330 0.548534
three 2.288018 0.661833
two -1.213533 -1.602429
foo one 0.833845 0.260860
three 0.280255 0.611326
two -0.969019 0.242320
In [40]: df.groupby(['B', 'A']).sum()
Out[40]:
C D
B A
one bar -1.054330 0.548534
foo 0.833845 0.260860
three bar 2.288018 0.661833
foo 0.280255 0.611326
two bar -1.213533 -1.602429
foo -0.969019 0.242320
4.ipython pandas 2
In [1]: import pandas as pd^M
...: import numpy as np^M
...: import matplotlib.pyplot as plt
...:
...:
In [2]: tuples = list(zip(*[['bar','bar','baz','baz',^M
...: 'foo','foo','qux','qux'],^M
...: ['one','two','one','two',^M
...: 'one','two','one','two']]))^M
...: tuples
...:
Out[2]:
[('bar', 'one'),
('bar', 'two'),
('baz', 'one'),
('baz', 'two'),
('foo', 'one'),
('foo', 'two'),
('qux', 'one'),
('qux', 'two')]
In [3]: index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])^M
...: index
...:
...:
Out[3]:
MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],
labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],
names=['first', 'second'])
In [4]: df = pd.DataFrame(np.random.randn(8,2), index=index, columns=['A', 'B'])^M
...: df
...:
...:
Out[4]:
A B
first second
bar one -0.573821 0.448645
two -0.317258 0.253702
baz one -1.181118 1.788925
two -0.762551 0.226626
foo one 0.184454 0.118198
two 1.062119 1.506467
qux one -0.894020 -1.233768
two -0.005463 -1.275630
In [5]: stacked = df.stack()^M
...: stacked
...:
...:
Out[5]:
first second
bar one A -0.573821
B 0.448645
two A -0.317258
B 0.253702
baz one A -1.181118
B 1.788925
two A -0.762551
B 0.226626
foo one A 0.184454
B 0.118198
two A 1.062119
B 1.506467
qux one A -0.894020
B -1.233768
two A -0.005463
B -1.275630
dtype: float64
In [6]: stacked.index
Out[6]:
MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two'], ['A', 'B']],
labels=[[0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3], [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1], [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]],
names=['first', 'second', None])
In [7]: stacked.unstack().unstack()
Out[7]:
A B
second one two one two
first
bar -0.573821 -0.317258 0.448645 0.253702
baz -1.181118 -0.762551 1.788925 0.226626
foo 0.184454 1.062119 0.118198 1.506467
qux -0.894020 -0.005463 -1.233768 -1.275630
In [8]: stacked.unstack()
Out[8]:
A B
first second
bar one -0.573821 0.448645
two -0.317258 0.253702
baz one -1.181118 1.788925
two -0.762551 0.226626
foo one 0.184454 0.118198
two 1.062119 1.506467
qux one -0.894020 -1.233768
two -0.005463 -1.275630
In [9]: df = pd.DataFrame({'A':['one','one','two','three'] * 3,^M
...: 'B':['A','B','C'] * 4,^M
...: 'C':['foo','foo','foo','bar','bar','bar'] * 2,^M
...: 'D':np.random.randn(12),^M
...: 'E':np.random.randn(12)})^M
...: df
...:
Out[9]:
A B C D E
0 one A foo 1.095168 1.385659
1 one B foo 0.026642 1.454903
2 two C foo 0.835684 0.080938
3 three A bar -2.338476 0.920093
4 one B bar -0.383956 -1.063160
5 one C bar 1.708665 -0.021806
6 two A foo 0.725428 -0.031022
7 three B foo -0.386248 1.205046
8 one C foo -0.203927 -0.259308
9 one A bar 1.184749 0.368413
10 two B bar 1.602919 -1.816103
11 three C bar -0.419879 0.303990
In [10]: df.pivot_table(values=['D'], index=['A', 'B'], columns=['C'])
Out[10]:
D
C bar foo
A B
one A 1.184749 1.095168
B -0.383956 0.026642
C 1.708665 -0.203927
three A -2.338476 NaN
B NaN -0.386248
C -0.419879 NaN
two A NaN 0.725428
B 1.602919 NaN
C NaN 0.835684
In [11]: df.pivot_table(values=['E'],index=['A'], columns=['C'])
Out[11]:
E
C bar foo
A
one -0.238851 0.860418
three 0.612041 1.205046
two -1.816103 0.024958
In [12]: df[df.A == 'one']
Out[12]:
A B C D E
0 one A foo 1.095168 1.385659
1 one B foo 0.026642 1.454903
4 one B bar -0.383956 -1.063160
5 one C bar 1.708665 -0.021806
8 one C foo -0.203927 -0.259308
9 one A bar 1.184749 0.368413
In [13]: df[df.A == 'one'].groupby('C').mean()
Out[13]:
D E
C
bar 0.836486 -0.238851
foo 0.305961 0.860418
In [14]: rng = pd.date_range('20160301', periods=600, freq='s')^M
...: rng
...:
...:
Out[14]:
DatetimeIndex(['2016-03-01 00:00:00', '2016-03-01 00:00:01',
'2016-03-01 00:00:02', '2016-03-01 00:00:03',
'2016-03-01 00:00:04', '2016-03-01 00:00:05',
'2016-03-01 00:00:06', '2016-03-01 00:00:07',
'2016-03-01 00:00:08', '2016-03-01 00:00:09',
...
'2016-03-01 00:09:50', '2016-03-01 00:09:51',
'2016-03-01 00:09:52', '2016-03-01 00:09:53',
'2016-03-01 00:09:54', '2016-03-01 00:09:55',
'2016-03-01 00:09:56', '2016-03-01 00:09:57',
'2016-03-01 00:09:58', '2016-03-01 00:09:59'],
dtype='datetime64[ns]', length=600, freq='S')
In [15]: s = pd.Series(np.random.randint(0 ,500, len(rng)), index=rng)^M
...: s
...:
...:
Out[15]:
2016-03-01 00:00:00 86
2016-03-01 00:00:01 393
2016-03-01 00:00:02 285
2016-03-01 00:00:03 330
2016-03-01 00:00:04 30
2016-03-01 00:00:05 325
2016-03-01 00:00:06 325
2016-03-01 00:00:07 442
2016-03-01 00:00:08 426
2016-03-01 00:00:09 82
2016-03-01 00:00:10 320
2016-03-01 00:00:11 334
2016-03-01 00:00:12 434
2016-03-01 00:00:13 102
2016-03-01 00:00:14 440
2016-03-01 00:00:15 263
2016-03-01 00:00:16 258
2016-03-01 00:00:17 338
2016-03-01 00:00:18 7
2016-03-01 00:00:19 126
2016-03-01 00:00:20 33
2016-03-01 00:00:21 405
2016-03-01 00:00:22 188
2016-03-01 00:00:23 484
2016-03-01 00:00:24 412
2016-03-01 00:00:25 127
2016-03-01 00:00:26 449
2016-03-01 00:00:27 260
2016-03-01 00:00:28 155
2016-03-01 00:00:29 155
...
2016-03-01 00:09:30 329
2016-03-01 00:09:31 30
2016-03-01 00:09:32 295
2016-03-01 00:09:33 181
2016-03-01 00:09:34 178
2016-03-01 00:09:35 22
2016-03-01 00:09:36 148
2016-03-01 00:09:37 166
2016-03-01 00:09:38 137
2016-03-01 00:09:39 238
2016-03-01 00:09:40 106
2016-03-01 00:09:41 442
2016-03-01 00:09:42 143
2016-03-01 00:09:43 180
2016-03-01 00:09:44 64
2016-03-01 00:09:45 98
2016-03-01 00:09:46 60
2016-03-01 00:09:47 211
2016-03-01 00:09:48 200
2016-03-01 00:09:49 458
2016-03-01 00:09:50 348
2016-03-01 00:09:51 353
2016-03-01 00:09:52 314
2016-03-01 00:09:53 191
2016-03-01 00:09:54 55
2016-03-01 00:09:55 320
2016-03-01 00:09:56 461
2016-03-01 00:09:57 223
2016-03-01 00:09:58 176
2016-03-01 00:09:59 325
Freq: S, Length: 600, dtype: int32
In [16]: s.resample('2Min', how='sum')
D:\python\Scripts\ipython:1: FutureWarning: how in .resample() is deprecated
the new syntax is .resample(...).sum()
Out[16]:
2016-03-01 00:00:00 30038
2016-03-01 00:02:00 31791
2016-03-01 00:04:00 29403
2016-03-01 00:06:00 29762
2016-03-01 00:08:00 30800
Freq: 2T, dtype: int32
In [17]: rng = pd.period_range('2000Q1', '2016Q1', freq='Q')^M
...: rng
...:
...:
Out[17]:
PeriodIndex(['2000Q1', '2000Q2', '2000Q3', '2000Q4', '2001Q1', '2001Q2',
'2001Q3', '2001Q4', '2002Q1', '2002Q2', '2002Q3', '2002Q4',
'2003Q1', '2003Q2', '2003Q3', '2003Q4', '2004Q1', '2004Q2',
'2004Q3', '2004Q4', '2005Q1', '2005Q2', '2005Q3', '2005Q4',
'2006Q1', '2006Q2', '2006Q3', '2006Q4', '2007Q1', '2007Q2',
'2007Q3', '2007Q4', '2008Q1', '2008Q2', '2008Q3', '2008Q4',
'2009Q1', '2009Q2', '2009Q3', '2009Q4', '2010Q1', '2010Q2',
'2010Q3', '2010Q4', '2011Q1', '2011Q2', '2011Q3', '2011Q4',
'2012Q1', '2012Q2', '2012Q3', '2012Q4', '2013Q1', '2013Q2',
'2013Q3', '2013Q4', '2014Q1', '2014Q2', '2014Q3', '2014Q4',
'2015Q1', '2015Q2', '2015Q3', '2015Q4', '2016Q1'],
dtype='period[Q-DEC]', freq='Q-DEC')
In [18]: rng.to_timestamp()
Out[18]:
DatetimeIndex(['2000-01-01', '2000-04-01', '2000-07-01', '2000-10-01',
'2001-01-01', '2001-04-01', '2001-07-01', '2001-10-01',
'2002-01-01', '2002-04-01', '2002-07-01', '2002-10-01',
'2003-01-01', '2003-04-01', '2003-07-01', '2003-10-01',
'2004-01-01', '2004-04-01', '2004-07-01', '2004-10-01',
'2005-01-01', '2005-04-01', '2005-07-01', '2005-10-01',
'2006-01-01', '2006-04-01', '2006-07-01', '2006-10-01',
'2007-01-01', '2007-04-01', '2007-07-01', '2007-10-01',
'2008-01-01', '2008-04-01', '2008-07-01', '2008-10-01',
'2009-01-01', '2009-04-01', '2009-07-01', '2009-10-01',
'2010-01-01', '2010-04-01', '2010-07-01', '2010-10-01',
'2011-01-01', '2011-04-01', '2011-07-01', '2011-10-01',
'2012-01-01', '2012-04-01', '2012-07-01', '2012-10-01',
'2013-01-01', '2013-04-01', '2013-07-01', '2013-10-01',
'2014-01-01', '2014-04-01', '2014-07-01', '2014-10-01',
'2015-01-01', '2015-04-01', '2015-07-01', '2015-10-01',
'2016-01-01'],
dtype='datetime64[ns]', freq='QS-OCT')
In [19]: pd.Timestamp('20160301') - pd.Timestamp('20160201')
Out[19]: Timedelta('29 days 00:00:00')
In [20]: pd.Timestamp('20160301') + pd.Timedelta(days=5)
Out[20]: Timestamp('2016-03-06 00:00:00')
In [21]: df = pd.DataFrame({'id': [1,2,3,4,5,6], 'raw_grade':['a', 'b', 'b', 'a', 'a', 'd']})^M
...: df
...:
...:
Out[21]:
id raw_grade
0 1 a
1 2 b
2 3 b
3 4 a
4 5 a
5 6 d
In [22]: df['grade'] = df.raw_grade.astype('category')^M
...: df
...:
...:
Out[22]:
id raw_grade grade
0 1 a a
1 2 b b
2 3 b b
3 4 a a
4 5 a a
5 6 d d
In [23]: df.grade.cat.categories
Out[23]: Index(['a', 'b', 'd'], dtype='object')
In [24]: df.grade.cat.categories = ['very good', 'good', 'bad']^M
...: df
...:
...:
Out[24]:
id raw_grade grade
0 1 a very good
1 2 b good
2 3 b good
3 4 a very good
4 5 a very good
5 6 d bad
In [25]: df.sort_values(by='grade', ascending=True)
Out[25]:
id raw_grade grade
0 1 a very good
3 4 a very good
4 5 a very good
1 2 b good
2 3 b good
5 6 d bad
In [26]: s = pd.Series(np.random.randn(1000), index=pd.date_range('20000101', periods=1000))^M
...: s
...:
...:
Out[26]:
2000-01-01 -0.141344
2000-01-02 -0.797249
2000-01-03 -2.464608
2000-01-04 -0.870485
2000-01-05 -1.210260
2000-01-06 1.192860
2000-01-07 0.642895
2000-01-08 -1.152103
2000-01-09 -1.313273
2000-01-10 0.027484
2000-01-11 -0.678573
2000-01-12 1.167240
2000-01-13 -1.650681
2000-01-14 0.578776
2000-01-15 -1.070705
2000-01-16 1.345511
2000-01-17 -0.161714
2000-01-18 -0.315464
2000-01-19 -1.189132
2000-01-20 -0.162142
2000-01-21 1.443795
2000-01-22 -0.547895
2000-01-23 -0.556073
2000-01-24 1.990200
2000-01-25 -0.215637
2000-01-26 1.048317
2000-01-27 -1.030935
2000-01-28 0.256619
2000-01-29 -0.130376
2000-01-30 1.286080
...
2002-08-28 -0.588474
2002-08-29 1.310814
2002-08-30 -0.386883
2002-08-31 -0.181065
2002-09-01 -1.756253
2002-09-02 0.305742
2002-09-03 -2.771434
2002-09-04 0.288447
2002-09-05 -0.056637
2002-09-06 -0.448806
2002-09-07 0.811163
2002-09-08 -0.205134
2002-09-09 0.786792
2002-09-10 1.951288
2002-09-11 0.736074
2002-09-12 -0.138304
2002-09-13 1.119185
2002-09-14 -0.037335
2002-09-15 0.218690
2002-09-16 -0.134962
2002-09-17 -2.203361
2002-09-18 0.177029
2002-09-19 1.161275
2002-09-20 -1.238382
2002-09-21 0.250562
2002-09-22 0.048922
2002-09-23 0.504966
2002-09-24 0.311811
2002-09-25 1.020513
2002-09-26 -0.975082
Freq: D, Length: 1000, dtype: float64
In [27]: s = s.cumsum()
In [28]: s
Out[28]:
2000-01-01 -0.141344
2000-01-02 -0.938593
2000-01-03 -3.403201
2000-01-04 -4.273685
2000-01-05 -5.483945
2000-01-06 -4.291085
2000-01-07 -3.648190
2000-01-08 -4.800293
2000-01-09 -6.113566
2000-01-10 -6.086082
2000-01-11 -6.764654
2000-01-12 -5.597414
2000-01-13 -7.248095
2000-01-14 -6.669319
2000-01-15 -7.740024
2000-01-16 -6.394512
2000-01-17 -6.556226
2000-01-18 -6.871690
2000-01-19 -8.060822
2000-01-20 -8.222964
2000-01-21 -6.779169
2000-01-22 -7.327065
2000-01-23 -7.883137
2000-01-24 -5.892937
2000-01-25 -6.108574
2000-01-26 -5.060258
2000-01-27 -6.091193
2000-01-28 -5.834574
2000-01-29 -5.964950
2000-01-30 -4.678870
...
2002-08-28 -26.069711
2002-08-29 -24.758897
2002-08-30 -25.145779
2002-08-31 -25.326844
2002-09-01 -27.083097
2002-09-02 -26.777355
2002-09-03 -29.548789
2002-09-04 -29.260342
2002-09-05 -29.316979
2002-09-06 -29.765785
2002-09-07 -28.954622
2002-09-08 -29.159755
2002-09-09 -28.372963
2002-09-10 -26.421675
2002-09-11 -25.685601
2002-09-12 -25.823905
2002-09-13 -24.704720
2002-09-14 -24.742055
2002-09-15 -24.523365
2002-09-16 -24.658327
2002-09-17 -26.861687
2002-09-18 -26.684658
2002-09-19 -25.523383
2002-09-20 -26.761766
2002-09-21 -26.511203
2002-09-22 -26.462281
2002-09-23 -25.957315
2002-09-24 -25.645504
2002-09-25 -24.624991
2002-09-26 -25.600073
Freq: D, Length: 1000, dtype: float64
In [29]: df = pd.DataFrame(np.random.randn(100, 4), columns=list('ABCD'))^M
...: df
...:
...:
Out[29]:
A B C D
0 0.267327 0.107506 0.080250 -0.621563
1 1.816829 1.175715 0.950130 0.836614
2 -1.442490 0.651116 0.474866 0.179345
3 0.410552 0.012790 -0.486893 0.034593
4 0.510322 2.030582 0.466503 -0.085239
5 0.191119 -1.055877 -0.520714 2.633334
6 0.094701 1.506650 -1.624039 -0.366824
7 -0.899925 2.669231 2.607940 -0.622080
8 1.953028 -0.610078 0.885680 -0.365108
9 0.306464 0.694631 -0.164848 -0.140056
10 -0.444048 0.135605 1.471948 -1.673677
11 0.635337 -0.922888 -1.242464 0.448070
12 -0.845890 0.881532 -1.182719 0.773362
13 1.051579 -0.477570 1.219806 -0.338791
14 -0.081794 0.319024 -0.566869 1.154736
15 -0.878720 1.746420 -0.217140 0.253803
16 0.178135 -0.042904 0.421445 0.325830
17 0.808898 -0.247453 1.611405 -0.451700
18 -0.098036 0.601209 0.726469 -0.520484
19 -0.642209 1.428606 -1.124756 -0.007182
20 -0.917643 -2.275002 -0.446514 -0.521532
21 0.709361 -0.735774 -0.474513 -0.133009
22 -0.470721 -0.280986 -0.221664 0.424061
23 0.068563 -0.998091 -1.417820 0.773527
24 -0.742220 -0.561952 1.072177 -1.363455
25 0.928466 -0.960329 -0.760199 -0.000401
26 1.528048 -0.026176 0.197139 0.646781
27 0.155977 0.238656 -0.799400 0.849429
28 1.388809 0.714155 -0.074333 0.663270
29 0.028229 0.887151 1.639230 -1.722949
.. ... ... ... ...
70 0.320943 0.204128 0.563068 1.005620
71 -0.016522 1.032978 -0.217907 -1.338971
72 0.772309 0.268404 -0.111950 0.567020
73 -0.522461 -0.410367 -0.329346 0.686058
74 -0.066561 1.331591 -0.869071 0.921186
75 -0.353796 0.102065 0.806607 0.750724
76 1.380541 0.626505 0.619084 -0.618170
77 -0.846997 0.227475 -1.855233 0.224078
78 -0.613626 -0.075322 -0.167048 0.600913
79 -1.047309 -0.322190 0.048969 -1.052706
80 0.772724 -0.464186 -0.930369 -0.320229
81 1.038498 0.927340 1.060027 -0.103949
82 -0.716448 -1.590410 0.538984 -0.189105
83 -0.710689 -0.321669 0.132338 -0.162068
84 1.364318 0.828088 1.280381 0.880573
85 -0.171488 1.197617 0.843253 -0.328299
86 0.326151 -0.820610 -1.629678 -0.088559
87 0.092089 -0.042380 1.824448 2.386188
88 0.209274 -0.903950 0.245931 2.023770
89 0.782739 -0.493215 -0.115856 -0.285229
90 -1.083658 0.336333 0.868388 0.444167
91 1.859865 -0.206399 0.287647 -0.298413
92 -0.677470 -0.059909 -0.347117 -0.672348
93 -0.708517 0.245301 -2.156514 0.023272
94 0.662887 -0.654867 0.575094 -1.501833
95 0.915414 -0.731354 -0.173915 -0.834434
96 0.358771 -0.983519 -0.537583 0.911525
97 -1.143764 0.202356 -0.450695 1.252933
98 0.417678 0.210289 0.472555 -0.363459
99 1.803065 0.588571 -0.459731 1.801414
[100 rows x 4 columns]
In [30]: df.to_csv('pandas.csv')
In [31]: %ls
驱动器 C 中的卷没有标签。
卷的序列号是 CA90-0532
C:\Users\Jay 的目录
2019/05/28 20:50 <DIR> .
2019/05/28 20:50 <DIR> ..
2019/05/04 00:03 <DIR> .3T
2018/04/19 21:41 <DIR> .android
2017/11/04 23:27 <DIR> .AndroidStudio3.0
2018/09/09 21:18 8,250 .bash_history
2018/09/16 14:52 <DIR> .config
2019/05/03 23:49 14 .dbshell
2017/11/05 09:38 <DIR> .dnx
2019/03/11 21:55 <DIR> .docker
2019/05/10 12:21 <DIR> .dubbo
2018/01/23 23:15 16 .emulator_console_auth_token
2018/10/24 13:26 52 .gitconfig
2017/11/05 20:25 <DIR> .gradle
2018/04/08 22:47 <DIR> .idlerc
2019/03/12 14:07 <DIR> .IntelliJIdea2018.2
2018/04/24 21:49 <DIR> .ipython
2018/04/24 21:52 <DIR> .jupyter
2019/04/02 17:01 <DIR> .kafkatool2
2017/11/05 20:36 <DIR> .keras
2018/11/15 00:43 <DIR> .kube
2019/03/30 00:20 <DIR> .m2
2018/04/02 21:42 <DIR> .matplotlib
2018/09/16 14:05 <DIR> .minikube
2019/05/03 23:43 0 .mongorc.js
2018/04/29 15:47 <DIR> .nuget
2019/05/12 15:29 <DIR> .oracle_jre_usage
2017/12/11 22:33 <DIR> .PyCharm2017.3
2017/12/11 22:53 <DIR> .PyCharmCE2017.2
2019/04/15 17:16 0 .scala_history
2019/05/25 22:34 <DIR> .VirtualBox
2019/04/23 20:29 <DIR> .WebStorm2019.1
2019/05/25 13:11 <DIR> 3D Objects
2018/04/24 22:03 <DIR> Anaconda3
2018/06/23 02:01 <DIR> ansel
2018/09/06 19:13 <DIR> AppData
2019/03/14 11:46 <DIR> Contacts
2019/05/27 22:44 <DIR> Desktop
2019/05/19 16:56 <DIR> Documents
2019/03/14 11:46 <DIR> Downloads
2019/04/27 11:19 <DIR> Favorites
2018/05/03 20:36 46,251,864 heapDump-pycharm-1525350999967.hprof.zip
2018/05/03 20:38 46,925,852 heapDump-pycharm-1525351099190.hprof.zip
2019/04/27 11:15 39,983 java_error_in_idea_6940.log
2019/05/14 21:16 40,103 java_error_in_idea_8180.log
2018/04/27 19:21 144,319,266 java_error_in_pycharm.hprof
2018/05/05 13:31 34,521 java_error_in_pycharm_3564.log
2019/04/27 11:15 38,176 java_error_in_pycharm_7488.log
2018/05/03 20:53 34,156 java_error_in_pycharm_8968.log
2019/03/14 11:46 <DIR> Links
2019/03/14 11:46 <DIR> Music
2019/05/27 18:30 <DIR> OneDrive
2019/05/28 20:50 8,249 pandas.csv
2019/03/14 11:46 <DIR> Pictures
2019/03/14 11:46 <DIR> Saved Games
2019/03/14 11:46 <DIR> Searches
2019/04/26 09:12 <DIR> UIDowner
2019/03/14 11:46 <DIR> Videos
15 个文件 237,700,502 字节
43 个目录 28,620,050,432 可用字节
In [32]: %more pandas.csv
UsageError: Line magic function `%more` not found.
In [33]: pd.read_csv('pandas.csv', index_col=0)
Out[33]:
A B C D
0 0.267327 0.107506 0.080250 -0.621563
1 1.816829 1.175715 0.950130 0.836614
2 -1.442490 0.651116 0.474866 0.179345
3 0.410552 0.012790 -0.486893 0.034593
4 0.510322 2.030582 0.466503 -0.085239
5 0.191119 -1.055877 -0.520714 2.633334
6 0.094701 1.506650 -1.624039 -0.366824
7 -0.899925 2.669231 2.607940 -0.622080
8 1.953028 -0.610078 0.885680 -0.365108
9 0.306464 0.694631 -0.164848 -0.140056
10 -0.444048 0.135605 1.471948 -1.673677
11 0.635337 -0.922888 -1.242464 0.448070
12 -0.845890 0.881532 -1.182719 0.773362
13 1.051579 -0.477570 1.219806 -0.338791
14 -0.081794 0.319024 -0.566869 1.154736
15 -0.878720 1.746420 -0.217140 0.253803
16 0.178135 -0.042904 0.421445 0.325830
17 0.808898 -0.247453 1.611405 -0.451700
18 -0.098036 0.601209 0.726469 -0.520484
19 -0.642209 1.428606 -1.124756 -0.007182
20 -0.917643 -2.275002 -0.446514 -0.521532
21 0.709361 -0.735774 -0.474513 -0.133009
22 -0.470721 -0.280986 -0.221664 0.424061
23 0.068563 -0.998091 -1.417820 0.773527
24 -0.742220 -0.561952 1.072177 -1.363455
25 0.928466 -0.960329 -0.760199 -0.000401
26 1.528048 -0.026176 0.197139 0.646781
27 0.155977 0.238656 -0.799400 0.849429
28 1.388809 0.714155 -0.074333 0.663270
29 0.028229 0.887151 1.639230 -1.722949
.. ... ... ... ...
70 0.320943 0.204128 0.563068 1.005620
71 -0.016522 1.032978 -0.217907 -1.338971
72 0.772309 0.268404 -0.111950 0.567020
73 -0.522461 -0.410367 -0.329346 0.686058
74 -0.066561 1.331591 -0.869071 0.921186
75 -0.353796 0.102065 0.806607 0.750724
76 1.380541 0.626505 0.619084 -0.618170
77 -0.846997 0.227475 -1.855233 0.224078
78 -0.613626 -0.075322 -0.167048 0.600913
79 -1.047309 -0.322190 0.048969 -1.052706
80 0.772724 -0.464186 -0.930369 -0.320229
81 1.038498 0.927340 1.060027 -0.103949
82 -0.716448 -1.590410 0.538984 -0.189105
83 -0.710689 -0.321669 0.132338 -0.162068
84 1.364318 0.828088 1.280381 0.880573
85 -0.171488 1.197617 0.843253 -0.328299
86 0.326151 -0.820610 -1.629678 -0.088559
87 0.092089 -0.042380 1.824448 2.386188
88 0.209274 -0.903950 0.245931 2.023770
89 0.782739 -0.493215 -0.115856 -0.285229
90 -1.083658 0.336333 0.868388 0.444167
91 1.859865 -0.206399 0.287647 -0.298413
92 -0.677470 -0.059909 -0.347117 -0.672348
93 -0.708517 0.245301 -2.156514 0.023272
94 0.662887 -0.654867 0.575094 -1.501833
95 0.915414 -0.731354 -0.173915 -0.834434
96 0.358771 -0.983519 -0.537583 0.911525
97 -1.143764 0.202356 -0.450695 1.252933
98 0.417678 0.210289 0.472555 -0.363459
99 1.803065 0.588571 -0.459731 1.801414
[100 rows x 4 columns]
网友评论