numpy介绍2

import numpy
#it will compare the second value to each element in the vector
# If the values are equal, the Python interpreter returns True; otherwise, it returns False
#判断，上等号右边的值与向量内的每个元素比较，返回的值是一个数组，元素类型为布尔型。
vector = numpy.array([5, 10, 15, 20])
vector == 10

array([False,  True, False, False], dtype=bool)

matrix = numpy.array([
                    [5, 10, 15], 
                    [20, 25, 30],
                    [35, 40, 45]
                 ])
matrix == 25

array([[False, False, False],
       [False,  True, False],
       [False, False, False]], dtype=bool)

#Compares vector to the value 10, which generates a new Boolean vector [False, True, False, False]. It assigns this result to equal_to_ten
vector = numpy.array([5, 10, 15, 20])
equal_to_ten = (vector == 10)
print equal_to_ten
print(vector[equal_to_ten])
#将布尔类型作为索引，实际上是两个矩阵相乘

[False  True False False]
[10]

vector1 = numpy.array([5, 10, 15, 20])
vector2 = numpy.array([False , True, False ,False])
print(vector1[vector2])

[10]

matrix = numpy.array([
                [5, 10, 15], 
                [20, 25, 30],
                [35, 40, 45]
             ])
second_column_25 = (matrix[:,1] == 25)
print second_column_25
print(matrix[second_column_25, :])
#对于多维矩阵也是一样。先运算得出结果矩阵，把这个矩阵作为过滤索引矩阵来使用。
#两个矩阵相乘就能得出结果。

[False  True False]
[[20 25 30]]

#We can also perform comparisons with multiple conditions
#多个条件的比较
vector = numpy.array([5, 10, 15, 20])
equal_to_ten_and_five = (vector == 10) & (vector == 5)#与
print equal_to_ten_and_five

[False False False False]

vector = numpy.array([5, 10, 15, 20])
equal_to_ten_or_five = (vector == 10) | (vector == 5)#或
print equal_to_ten_or_five

[ True  True False False]

vector = numpy.array([5, 10, 15, 20])
equal_to_ten_or_five = (vector == 10) | (vector == 5)
vector[equal_to_ten_or_five] = 50#将索引为0,1的元素赋值为50
print(vector)

[50 50 15 20]

matrix = numpy.array([
            [5, 10, 15], 
            [20, 25, 30],
            [35, 40, 45]
         ])
second_column_25 = matrix[:,1] == 25
print second_column_25
matrix[second_column_25, 1] = 10
print matrix

[False  True False]
[[ 5 10 15]
 [20 10 30]
 [35 40 45]]

#We can convert the data type of an array with the ndarray.astype() method.
#.astype()方法强转类型。
vector = numpy.array(["1", "2", "3"])
print vector.dtype
print vector
vector = vector.astype(float)
print vector.dtype
print vector

|S1
['1' '2' '3']
float64
[ 1.  2.  3.]

vector = numpy.array([5, 10, 15, 20])
vector.sum()#求统计值。min,max,
#使用print(help(numpy.array))来打印帮助信息。

# The axis dictates which dimension we perform the operation on
#1 means that we want to perform the operation on each row, and 0 means on each column
matrix = numpy.array([
                [5, 10, 15], 
                [20, 25, 30],
                [35, 40, 45]
             ])
matrix.sum(axis=1)#求每一行的和

array([ 30,  75, 120])

matrix = numpy.array([
                [5, 10, 15], 
                [20, 25, 30],
                [35, 40, 45]
             ])
matrix.sum(axis=0)

array([60, 75, 90])

如何来处理那些无法数值处理的数据？

#replace nan value with 0
world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",")
#会有很多nan值
#print world_alcohol
is_value_empty = numpy.isnan(world_alcohol[:,4])
#我们只要列索引为4的数据
#print is_value_empty
world_alcohol[is_value_empty, 4] = '0'
#对于为nan的多转换成0
alcohol_consumption = world_alcohol[:,4]
#取出第5列数据
alcohol_consumption = alcohol_consumption.astype(float)
#强转为浮点型
total_alcohol = alcohol_consumption.sum()
average_alcohol = alcohol_consumption.mean()
print total_alcohol
print average_alcohol