美文网首页
1. 日月光华 Python数据分析-Numpy

1. 日月光华 Python数据分析-Numpy

作者: 薛东弗斯 | 来源:发表于2023-06-28 08:34 被阅读0次
import numpy as np
import matplotlib.pyplot as plt   # 与绘图有关的包都再pyplot下面
%matplotlib inline                      # 魔法函数,将绘图显示出来,否则绘图将存储在一个对象中
import pandas as pd

创建数组

np.random.randn(3,4)
array([[-0.76863424,  1.71936986,  0.10624708,  1.34709104],
       [ 0.17115228,  2.03093016, -0.25014394,  1.44215241],
       [ 0.49469013,  0.64961492, -0.12764838,  0.45850221]])

计算代码执行时间

%timeit [x**2 for x in range(4000)]
967 µs ± 102 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
# Jupyter Notebook常用快捷键
# Tab键进行代码提示。查看方法,自动补全
# ?显示帮助文档。    
# shift+Enter执行代码
# %matplotlib inline  将绘图直接显示出来
# %timeit  计算一段代码的执行时间
# a = np.array([True,True,False,False])  创建bool类型是数组
# a.astype(np.float16)  # 更改数据类型
a = np.array([[1,2,3],
             [4,5,6]])
print(a.ndim)
print(a.shape)
print(a.dtype)
2
(2, 3)
int32
np.arange(36).reshape(3,3,4)   # 3个3*4的数组
array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]],

       [[24, 25, 26, 27],
        [28, 29, 30, 31],
        [32, 33, 34, 35]]])
np.linspace(0,1,100)  # 从0-1 均匀生成100个数字。 常用于绘图
array([0.        , 0.01010101, 0.02020202, 0.03030303, 0.04040404,
       0.05050505, 0.06060606, 0.07070707, 0.08080808, 0.09090909,
       0.1010101 , 0.11111111, 0.12121212, 0.13131313, 0.14141414,
       0.15151515, 0.16161616, 0.17171717, 0.18181818, 0.19191919,
       0.2020202 , 0.21212121, 0.22222222, 0.23232323, 0.24242424,
       0.25252525, 0.26262626, 0.27272727, 0.28282828, 0.29292929,
       0.3030303 , 0.31313131, 0.32323232, 0.33333333, 0.34343434,
       0.35353535, 0.36363636, 0.37373737, 0.38383838, 0.39393939,
       0.4040404 , 0.41414141, 0.42424242, 0.43434343, 0.44444444,
       0.45454545, 0.46464646, 0.47474747, 0.48484848, 0.49494949,
       0.50505051, 0.51515152, 0.52525253, 0.53535354, 0.54545455,
       0.55555556, 0.56565657, 0.57575758, 0.58585859, 0.5959596 ,
       0.60606061, 0.61616162, 0.62626263, 0.63636364, 0.64646465,
       0.65656566, 0.66666667, 0.67676768, 0.68686869, 0.6969697 ,
       0.70707071, 0.71717172, 0.72727273, 0.73737374, 0.74747475,
       0.75757576, 0.76767677, 0.77777778, 0.78787879, 0.7979798 ,
       0.80808081, 0.81818182, 0.82828283, 0.83838384, 0.84848485,
       0.85858586, 0.86868687, 0.87878788, 0.88888889, 0.8989899 ,
       0.90909091, 0.91919192, 0.92929293, 0.93939394, 0.94949495,
       0.95959596, 0.96969697, 0.97979798, 0.98989899, 1.        ])
a = np.arange(20)  # 此时是浅拷贝,对拷贝出来的数据改变,会改变原始数组
# a[2:8:2]  # 从第2位切分到第8位,每隔2位切分一个数字
# a[5]=100
# a
# b = a.copy() # 深拷贝
# b[0]=100   # 此时,对b的操作不会影响a数组
a > 10   # 生成一个bool值的array
array([False, False, False, False, False, False, False, False, False,
       False, False,  True,  True,  True,  True,  True,  True,  True,
        True,  True])
a[ a > 10 ]  # 用bool值作为索引进行取值
array([11, 12, 13, 14, 15, 16, 17, 18, 19])
(a>5)&(a<15)  # 获取bool数组
array([False, False, False, False, False, False,  True,  True,  True,
        True,  True,  True,  True,  True,  True, False, False, False,
       False, False])
a[(a>5)&(a<15)]  # 将a位于5-15期间的数字索引出来
# a[(a>5)&(a<15)&(a%2==0)]
array([ 6,  7,  8,  9, 10, 11, 12, 13, 14])
a[(a<3)|(a>15)] 
array([ 0,  1,  2, 16, 17, 18, 19])
b = np.arange(24).reshape(2,3,4)
b[(b>3)&(b%2!= 0)]
array([ 5,  7,  9, 11, 13, 15, 17, 19, 21, 23])
b[b<=10]
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])
b[~(b<=10)]   # ~ 取反,非运算
array([11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23])
a = np.arange(10).reshape(2,5)
a
array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

广播

a+10   # 标量值广播,会广播到每一个元素.
# a*100
array([[10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])
a+a  # 相同位置对应元素相加
array([[ 0,  2,  4,  6,  8],
       [10, 12, 14, 16, 18]])
a*a   # 相同位置对应元素相乘
array([[ 0,  1,  4,  9, 16],
       [25, 36, 49, 64, 81]])
a1 = np.arange(5)
a1
array([0, 1, 2, 3, 4])
a = np.arange(10).reshape(2,5)
a
array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])
a1+a   # a1与a只是列数相同,就可以进行加法运算。  # 列维度相同时,按照列广播
array([[ 0,  2,  4,  6,  8],
       [ 5,  7,  9, 11, 13]])
b1 = np.arange(2).reshape(2,1)
b1
array([[0],
       [1]])
a
array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])
b1 + a    # 行维度相同时,按照行广播
array([[ 0,  1,  2,  3,  4],
       [ 6,  7,  8,  9, 10]])
a.T   # 装置
array([[0, 5],
       [1, 6],
       [2, 7],
       [3, 8],
       [4, 9]])
a1 = np.array([[1,2],[3,4]])
b1 = np.array([[5,6],[7,8]])
np.concatenate((a1,b1))
array([[1, 2],
       [3, 4],
       [5, 6],
       [7, 8]])
np.concatenate((a1,b1),axis=1)
array([[1, 2, 5, 6],
       [3, 4, 7, 8]])
a1 = np.array([1,2,3,4])
a2 = np.array([5,6,7,8])
np.concatenate((a1,a2))  # 组合以后,还是一维数组
array([1, 2, 3, 4, 5, 6, 7, 8])
np.stack((a1,a2))# 组合以后,变成高维数组
array([[1, 2, 3, 4],
       [5, 6, 7, 8]])
np.append(a1,5)
array([1, 2, 3, 4, 5])
b = np.stack((a1,a1))
b
array([[1, 2, 3, 4],
       [1, 2, 3, 4]])
np.append(b,[9,10])   # append之后展开位1维数组
array([ 1,  2,  3,  4,  1,  2,  3,  4,  9, 10])
m = np.array([[1,2],[3,4]])
np.append(m,[5,6])
array([1, 2, 3, 4, 5, 6])
np.append(m,[[9,10]],axis=0)
array([[ 1,  2],
       [ 3,  4],
       [ 9, 10]])
np.append(m,[[9],[10]],axis=1)  # 追加
array([[ 1,  2,  9],
       [ 3,  4, 10]])
x = np.array([1,2,3,4,2,1,2,1,1,1,2,3,3,2,3])
np.unique(x)   # 去重
array([1, 2, 3, 4])
y = np.arange(10)
np.square(y)
array([ 0,  1,  4,  9, 16, 25, 36, 49, 64, 81], dtype=int32)
y1 = np.arange(10,0,-1)
y1
array([10,  9,  8,  7,  6,  5,  4,  3,  2,  1])
np.maximum(y,y1)  # 取出对应位置最大的那个数值
array([10,  9,  8,  7,  6,  5,  6,  7,  8,  9])
np.mean(y)  # 顶级方法   顶级方法返回副本
# y.mean()  # 对象方法
4.5
m
array([[1, 2],
       [3, 4]])
m.mean(axis=1)  # 按照行取均值
array([1.5, 3.5])
m.mean(axis=0) # 按照列取均值
array([2., 3.])
b
array([[1, 2, 3, 4],
       [1, 2, 3, 4]])
b.max()  # 取全局最大值
4
b.max(axis=0)  # 取出每一列的最大值
array([1, 2, 3, 4])
b.max(axis=1)  # 取出每一行的最大值
array([4, 4])
a = np.random.randn(19)
a
array([-1.61081374, -0.91845914,  0.76720491,  0.20492141, -1.15177974,
        0.48665073, -0.68125193,  0.61895934,  0.87045498,  1.26682085,
       -1.35425483,  0.33467291,  0.68378407, -0.21076582, -0.99360988,
        0.11501658, -0.14558961, -1.13452916, -1.15467634])
a.sort()
a
# 如果用顶级方法,返回的是a排序后的副本,a本身不会发生变化
array([-1.61081374, -1.35425483, -1.15467634, -1.15177974, -1.13452916,
       -0.99360988, -0.91845914, -0.68125193, -0.21076582, -0.14558961,
        0.11501658,  0.20492141,  0.33467291,  0.48665073,  0.61895934,
        0.68378407,  0.76720491,  0.87045498,  1.26682085])
b = np.random.randn(10)
b.shape=2,5
print("before sort:",b)
b.sort(axis=1)  # 按照每一行进行排序
b

before sort: [[ 0.4619393 -0.30835792 0.55555801 0.1328978 0.13320337]
[-0.48944674 -1.50409789 -0.60189908 -1.19267168 -0.16516362]]

array([[-0.30835792,  0.1328978 ,  0.13320337,  0.4619393 ,  0.55555801],
       [-1.50409789, -1.19267168, -0.60189908, -0.48944674, -0.16516362]])
c = np.array([2,4,1,3])
np.argsort(c)
# 排第一位,最小值是索引为2的值
# 排第2位 是索引为0的值
# 排第3位,是索引位3的值
# 排第4位,是索引为1的值
array([2, 0, 3, 1], dtype=int64)
np.argmax(c) # 返回最大值的索引
1
np.where(c>2)   # 返回符合条件的索引
# 如果要找出这些值,直接用bool索引即可  c[c>2]
(array([1, 3], dtype=int64),)
d = np.array([9,5,6,7])
cond = np.array([True,False,False,False])
# 当condition为True时取c的值,False时取d的值
np.where(cond,c,d)
array([2, 5, 6, 7])
c
array([2, 4, 1, 3])
c>2
array([False,  True, False,  True])
(c>2).sum()  #统计符合条件的元素的个数
2
np.random.randn(3,4)
array([[ 0.57739186,  0.56219424, -1.61210981, -0.74358949],
       [ 0.34497086,  1.65096901,  0.14879807,  0.33655262],
       [-0.09287764,  1.23421095,  0.36261984, -1.06369641]])
 np.random.normal(size=(3,4))
array([[ 0.0284827 , -0.93254035,  0.41356179,  0.73310516],
       [ 1.92833853, -1.13892997,  1.25694962, -1.31025191],
       [ 0.13151099, -1.47289915,  0.45882324,  0.47906981]])
np.random.randint(1,10,(3,4))
array([[1, 2, 8, 4],
       [8, 4, 6, 5],
       [9, 8, 9, 9]])
c
array([2, 4, 1, 3])
np.random.permutation(len(c))  # 生成乱序的索引
array([0, 1, 2, 3])
c[np.random.permutation(len(c)) ] # 得到c的乱序的值
array([2, 1, 4, 3])
np.random.choice(c)  # 从列表c中随机取出1个值
2
np.random.choice(c,size=2)  # 从列表c中随机取出2个值
array([2, 2])
np.random.seed(5)  # seed只生效一次
np.random.randn(3,4)
array([[ 0.44122749, -0.33087015,  2.43077119, -0.25209213],
       [ 0.10960984,  1.58248112, -0.9092324 , -0.59163666],
       [ 0.18760323, -0.32986996, -1.19276461, -0.20487651]])
np.random.seed(5)   # 只要指定随机种子,每次生成的随机数相同。 伪随机
np.random.randn(3,4)
array([[ 0.44122749, -0.33087015,  2.43077119, -0.25209213],
       [ 0.10960984,  1.58248112, -0.9092324 , -0.59163666],
       [ 0.18760323, -0.32986996, -1.19276461, -0.20487651]])
np.save('variable_name',c)  # 自动将c的值保存到variable_name文件中。默认文件后缀.npy
np.load('variable_name.npy')

array([2, 4, 1, 3])

np.savetxt('variable_name',c)  # 以文本形式保存
np.loadtxt('variable_name')  
array([2., 4., 1., 3.])

相关文章

网友评论

      本文标题:1. 日月光华 Python数据分析-Numpy

      本文链接:https://www.haomeiwen.com/subject/mdloydtx.html