numpy介绍2
import numpy
#it will compare the second value to each element in the vector
# If the values are equal, the Python interpreter returns True; otherwise, it returns False
#判断,上等号右边的值与向量内的每个元素比较,返回的值是一个数组,元素类型为布尔型。
vector = numpy.array([5, 10, 15, 20])
vector == 10
array([False, True, False, False], dtype=bool)
matrix = numpy.array([
[5, 10, 15],
[20, 25, 30],
[35, 40, 45]
])
matrix == 25
array([[False, False, False],
[False, True, False],
[False, False, False]], dtype=bool)
#Compares vector to the value 10, which generates a new Boolean vector [False, True, False, False]. It assigns this result to equal_to_ten
vector = numpy.array([5, 10, 15, 20])
equal_to_ten = (vector == 10)
print equal_to_ten
print(vector[equal_to_ten])
#将布尔类型作为索引,实际上是两个矩阵相乘
[False True False False]
[10]
vector1 = numpy.array([5, 10, 15, 20])
vector2 = numpy.array([False , True, False ,False])
print(vector1[vector2])
[10]
matrix = numpy.array([
[5, 10, 15],
[20, 25, 30],
[35, 40, 45]
])
second_column_25 = (matrix[:,1] == 25)
print second_column_25
print(matrix[second_column_25, :])
#对于多维矩阵也是一样。先运算得出结果矩阵,把这个矩阵作为过滤索引矩阵来使用。
#两个矩阵相乘就能得出结果。
[False True False]
[[20 25 30]]
#We can also perform comparisons with multiple conditions
#多个条件的比较
vector = numpy.array([5, 10, 15, 20])
equal_to_ten_and_five = (vector == 10) & (vector == 5)#与
print equal_to_ten_and_five
[False False False False]
vector = numpy.array([5, 10, 15, 20])
equal_to_ten_or_five = (vector == 10) | (vector == 5)#或
print equal_to_ten_or_five
[ True True False False]
vector = numpy.array([5, 10, 15, 20])
equal_to_ten_or_five = (vector == 10) | (vector == 5)
vector[equal_to_ten_or_five] = 50#将索引为0,1的元素赋值为50
print(vector)
[50 50 15 20]
matrix = numpy.array([
[5, 10, 15],
[20, 25, 30],
[35, 40, 45]
])
second_column_25 = matrix[:,1] == 25
print second_column_25
matrix[second_column_25, 1] = 10
print matrix
[False True False]
[[ 5 10 15]
[20 10 30]
[35 40 45]]
#We can convert the data type of an array with the ndarray.astype() method.
#.astype()方法强转类型。
vector = numpy.array(["1", "2", "3"])
print vector.dtype
print vector
vector = vector.astype(float)
print vector.dtype
print vector
|S1
['1' '2' '3']
float64
[ 1. 2. 3.]
vector = numpy.array([5, 10, 15, 20])
vector.sum()#求统计值。min,max,
#使用print(help(numpy.array))来打印帮助信息。
50
# The axis dictates which dimension we perform the operation on
#1 means that we want to perform the operation on each row, and 0 means on each column
matrix = numpy.array([
[5, 10, 15],
[20, 25, 30],
[35, 40, 45]
])
matrix.sum(axis=1)#求每一行的和
array([ 30, 75, 120])
matrix = numpy.array([
[5, 10, 15],
[20, 25, 30],
[35, 40, 45]
])
matrix.sum(axis=0)
array([60, 75, 90])
如何来处理那些无法数值处理的数据?
#replace nan value with 0
world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",")
#会有很多nan值
#print world_alcohol
is_value_empty = numpy.isnan(world_alcohol[:,4])
#我们只要列索引为4的数据
#print is_value_empty
world_alcohol[is_value_empty, 4] = '0'
#对于为nan的多转换成0
alcohol_consumption = world_alcohol[:,4]
#取出第5列数据
alcohol_consumption = alcohol_consumption.astype(float)
#强转为浮点型
total_alcohol = alcohol_consumption.sum()
average_alcohol = alcohol_consumption.mean()
print total_alcohol
print average_alcohol
1137.78
1.14006012024
网友评论