布尔值数组的长度必须和数组轴索引长度一致。
In [1]: import numpy as np
In [2]: names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
In [3]: data = np.random.randn(7, 4)
In [4]: names
Out[4]: array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')
In [5]: data
Out[5]:
array([[ 1.1110709 , -0.47574111, -0.32405598, 1.68851732],
[ 0.18338712, 1.86099602, 0.16028324, -0.77338395],
[ 1.30208873, 0.05613606, 0.09993033, 0.9296732 ],
[-2.37345175, 0.7715262 , 0.30284201, -0.11449909],
[ 1.15309792, 0.57371153, -0.16297593, -0.922199 ],
[ 0.30940118, -0.75740561, -0.30268499, -1.49159479],
[ 1.30522225, -0.18000825, 0.40052368, -1.6624808 ]])
In [6]: names == 'Bob'
Out[6]: array([ True, False, False, True, False, False, False])
In [7]: data[names == 'Bob']
Out[7]:
array([[ 1.1110709 , -0.47574111, -0.32405598, 1.68851732],
[-2.37345175, 0.7715262 , 0.30284201, -0.11449909]])
In [8]: data[names == 'Bob', 2:]
Out[8]:
array([[-0.32405598, 1.68851732],
[ 0.30284201, -0.11449909]])
In [9]: data[names == 'Bob', 3]
Out[9]: array([ 1.68851732, -0.11449909])
In [10]: names != 'Bob'
Out[10]: array([False, True, True, False, True, True, True])
In [11]: data[~(names == 'Bob')]
Out[11]:
array([[ 0.18338712, 1.86099602, 0.16028324, -0.77338395],
[ 1.30208873, 0.05613606, 0.09993033, 0.9296732 ],
[ 1.15309792, 0.57371153, -0.16297593, -0.922199 ],
[ 0.30940118, -0.75740561, -0.30268499, -1.49159479],
[ 1.30522225, -0.18000825, 0.40052368, -1.6624808 ]])
In [12]: cond = names == 'Bob'
In [13]: cond
Out[13]: array([ True, False, False, True, False, False, False])
In [14]: data[~cond]
Out[14]:
array([[ 0.18338712, 1.86099602, 0.16028324, -0.77338395],
[ 1.30208873, 0.05613606, 0.09993033, 0.9296732 ],
[ 1.15309792, 0.57371153, -0.16297593, -0.922199 ],
[ 0.30940118, -0.75740561, -0.30268499, -1.49159479],
[ 1.30522225, -0.18000825, 0.40052368, -1.6624808 ]])
In [15]: mask = (names == 'Bob') | (names == 'Will')
In [16]: mask
Out[16]: array([ True, False, True, True, True, False, False])
In [17]: data[mask]
Out[17]:
array([[ 1.1110709 , -0.47574111, -0.32405598, 1.68851732],
[ 1.30208873, 0.05613606, 0.09993033, 0.9296732 ],
[-2.37345175, 0.7715262 , 0.30284201, -0.11449909],
[ 1.15309792, 0.57371153, -0.16297593, -0.922199 ]])
使用布尔值索引选择数据时,总是生成数据的拷贝,即使返回的数组没有任何变化。使用 & 和 | 代替Python中的关键字and和or
In [18]: data[data < 0] = 0
In [19]: data
Out[19]:
array([[1.1110709 , 0. , 0. , 1.68851732],
[0.18338712, 1.86099602, 0.16028324, 0. ],
[1.30208873, 0.05613606, 0.09993033, 0.9296732 ],
[0. , 0.7715262 , 0.30284201, 0. ],
[1.15309792, 0.57371153, 0. , 0. ],
[0.30940118, 0. , 0. , 0. ],
[1.30522225, 0. , 0.40052368, 0. ]])
In [20]: data[names != 'Joe'] = 7
In [21]: data
Out[21]:
array([[7. , 7. , 7. , 7. ],
[0.18338712, 1.86099602, 0.16028324, 0. ],
[7. , 7. , 7. , 7. ],
[7. , 7. , 7. , 7. ],
[7. , 7. , 7. , 7. ],
[0.30940118, 0. , 0. , 0. ],
[1.30522225, 0. , 0.40052368, 0. ]])
网友评论