美文网首页
numpy and pandas

numpy and pandas

作者: 拾柒丶_8257 | 来源:发表于2019-04-26 00:48 被阅读0次
    import numpy 
    # 指定路径 分隔符  类型     shi_header 跳过标题
    world_alcohol = numpy.genfromtxt("dang.txt",delimiter=",",encoding='utf8',dtype=str,skip_header=1)
    # class  numpy.ndarray
    print(type(world_alcohol)) 
    print(world_alcohol)
    # 打印帮助文档
    print(help(numpy.genfromtxt)) 
    
    
    vector = numpy.array([5,10,15,20])
    matrix = numpy.array([[5,10,15,20],[20,25,30],[35,40,45]])
    print(vector)
    print(matrix) # 二维数组
    print(vector.shape)  # 查看结构
    print(matrix.shape)
    print(vector.dtype)  # numpy  需要同一类型
    
    
    import numpy as np
    print(np.arange(15))
    a  = np.arange(15).reshape(3,5) # 一维 转 二维
    a
    
    import pandas
    food_info = pandas.read_csv('dangdang1.csv')
    print(type(food_info))
    print(food_info.dtypes)
    print(help(pandas.read_csv))
    print(food_info)
    
    food_info.head() # 默认 前5行
    food_info.tail() # 默认 后5行
    food_info.columns  # 列名
    food_info.shape    # 维度 大小  行列
    
    food_info.loc[0] # 第一条数据   行
    # food_info.dtypes
    # object    For string values
    # int       For integer values
    # float     For float values
    # datetime  For time values
    # bool      For Boolean values
    
    
    food_info.loc[3:6]  # 行切片
    ndb_col = food_info['rank']  # 根据列名  取列
    
    #  col_name =  "rank"    列名赋值
    # ndb_col = food_info[cil_name]    根据变量取列
    
    columns = ['rank','name'] # 取多个列  组成list
    zinc_copper = food_info[columns] # 根据list 取多个列
    print(zinc_copper)
    
    
    col_names = food_info.columns.tolist()   # 所有的列名  list
    print(col_names)
    gram_columns = []
    
    for c in col_names:
        if c.endswith('s'):
            gram_columns.append(c)
    gram_df = food_info[gram_columns]  # 拿到所有以 s  结尾的列名
    print(gram_df.head())
    
    
    print(food_info['comments'])
    div_1000 = food_info['comments'] / 1000 # 算术运算
    print(div_1000)
    food_info['div_1000'] = div_1000  # 新建一列  并赋值
    
    
    max_calories = food_info['comments'].max() # 求一列 最大值
    print(max_calories)
    
    food_info.sort_values('comments',inplace=True) # 根据某列排序  inplace=True 在原来的基础上排序
    print(food_info['comments'])
    food_info
    # food_info.sort_values('comments',inplace=True,ascending=False) ascending = False 为升序 默认为降序
    
    # comments = food_info['comments']   判断缺失值处理
    # comments_is_null = pandas.isnull(comments)
    # comments_is_null
    # comments[comments_is_null] 
    # sum 求和
    # len 长度
    # mean 求均值
    # comments_nonull = food_info['comments'][comments_is_null == False]  去除缺失值
    
    

    相关文章

      网友评论

          本文标题:numpy and pandas

          本文链接:https://www.haomeiwen.com/subject/tzwtnqtx.html