美文网首页
numpy and pandas

numpy and pandas

作者: 拾柒丶_8257 | 来源:发表于2019-04-26 00:48 被阅读0次
import numpy 
# 指定路径 分隔符  类型     shi_header 跳过标题
world_alcohol = numpy.genfromtxt("dang.txt",delimiter=",",encoding='utf8',dtype=str,skip_header=1)
# class  numpy.ndarray
print(type(world_alcohol)) 
print(world_alcohol)
# 打印帮助文档
print(help(numpy.genfromtxt)) 


vector = numpy.array([5,10,15,20])
matrix = numpy.array([[5,10,15,20],[20,25,30],[35,40,45]])
print(vector)
print(matrix) # 二维数组
print(vector.shape)  # 查看结构
print(matrix.shape)
print(vector.dtype)  # numpy  需要同一类型


import numpy as np
print(np.arange(15))
a  = np.arange(15).reshape(3,5) # 一维 转 二维
a

import pandas
food_info = pandas.read_csv('dangdang1.csv')
print(type(food_info))
print(food_info.dtypes)
print(help(pandas.read_csv))
print(food_info)

food_info.head() # 默认 前5行
food_info.tail() # 默认 后5行
food_info.columns  # 列名
food_info.shape    # 维度 大小  行列

food_info.loc[0] # 第一条数据   行
# food_info.dtypes
# object    For string values
# int       For integer values
# float     For float values
# datetime  For time values
# bool      For Boolean values


food_info.loc[3:6]  # 行切片
ndb_col = food_info['rank']  # 根据列名  取列

#  col_name =  "rank"    列名赋值
# ndb_col = food_info[cil_name]    根据变量取列

columns = ['rank','name'] # 取多个列  组成list
zinc_copper = food_info[columns] # 根据list 取多个列
print(zinc_copper)


col_names = food_info.columns.tolist()   # 所有的列名  list
print(col_names)
gram_columns = []

for c in col_names:
    if c.endswith('s'):
        gram_columns.append(c)
gram_df = food_info[gram_columns]  # 拿到所有以 s  结尾的列名
print(gram_df.head())


print(food_info['comments'])
div_1000 = food_info['comments'] / 1000 # 算术运算
print(div_1000)
food_info['div_1000'] = div_1000  # 新建一列  并赋值


max_calories = food_info['comments'].max() # 求一列 最大值
print(max_calories)

food_info.sort_values('comments',inplace=True) # 根据某列排序  inplace=True 在原来的基础上排序
print(food_info['comments'])
food_info
# food_info.sort_values('comments',inplace=True,ascending=False) ascending = False 为升序 默认为降序

# comments = food_info['comments']   判断缺失值处理
# comments_is_null = pandas.isnull(comments)
# comments_is_null
# comments[comments_is_null] 
# sum 求和
# len 长度
# mean 求均值
# comments_nonull = food_info['comments'][comments_is_null == False]  去除缺失值

相关文章

网友评论

      本文标题:numpy and pandas

      本文链接:https://www.haomeiwen.com/subject/tzwtnqtx.html