记录python 使用pandas 处理 csv 文件常规程序:
1. 读取 csv 文件,获取数据:
- 1. pandas
import pandas as pd
import csv
data=pd.read_csv('zi_202105071017.csv',encoding='utf-8')
print(data.head())# 查看前5行数据
print(data.tail())# 查看最后5行数据,括号里可以指定查看行数
>>
zi_id fanti struct zi_str freq
0 1 NaN NaN 一 1338743
1 2 NaN NaN 丁 11857
2 3 NaN NaN 七 14477
3 4 NaN NaN 万 28095
4 5 NaN NaN 丈 15697
zi_id fanti struct zi_str freq
4961 4962 NaN NaN 龋 27
4962 4963 NaN NaN 龙 7012
4963 4964 NaN NaN 龚 57
4964 4965 NaN NaN 龛 379
4965 4966 NaN NaN 龟 647
[4966 rows x 5 columns]
==================================================================================================================
print(data['zi_str'])#访问指定的列
print(data['zi_str'].values)#取出对应的值,依次放入 list
>>
0 一
1 丁
2 七
3 万
4 丈
Name: zi_str, Length: 4966, dtype: object
['一' '丁' '七' ... '龚' '龛' '龟']
======================================================================================================
data.drop('freq', axis=1, inplace=True)
data.drop('struct', axis=1, inplace=True)#删除指定的列
>>
zi_id fanti zi_str
0 1 NaN 一
1 2 NaN 丁
2 3 NaN 七
3 4 NaN 万
4 5 NaN 丈
[4966 rows x 3 columns]
- 2. csv.reader()
with open("zi_202105071017.csv",'r',encoding='utf-8') as f:
rows = [row for row in csv.reader(f)]
print(rows[:5])
>>
[['zi_id', 'fanti', 'struct', 'zi_str', 'freq'], ['1', '', '', '一', '1338743'], ['2', '', '', '丁', '11857'], ['3', '', '', '七', '14477'], ['4', '', '', '万', '28095']]
读取 CSV 文件并取出指定行写入新的 CSV文件
import csv
count=0
with open("out.csv", 'r',encoding='utf-8', newline='') as file:
with open("train.csv", 'w',encoding='utf-8', newline='') as trian:
with open("valid.csv", 'w',encoding='utf-8', newline='') as valid:
csvreader = csv.reader(file)
trian_csvwriter = csv.writer(trian)
valid_csvwriter = csv.writer(valid)
valid_csvwriter.writerow(['text','ner_tags'])
for row in csvreader:
if count<10001:
trian_csvwriter.writerow(row)
elif count>=10001 and count<=13000:
valid_csvwriter.writerow(row)
else:
break
count+=1
3. 数据写入 csv:
pd.DataFrame(data=pred_label, index=range(len(pred_label))).to_csv('pred.csv')
eg2:
with open('test.csv','w',encoding="utf-8",errors='ignore',newline='')as f:
csv_write=csv.writer(f,dialect='excel')
csv_write.writerow(['text','ner_tags'])
stu1=['今天天气还行','B,I,O,I,I,O']
csv_write.writerow(stu1)
网友评论