9. Python下的文件读写操作

9.1 文件对象的声明及基本操作

另一种数据格式：文件 / 文档

文件的界定：
指向一个本地存储的文件：是一个链接或一个映射
文件的申明 open 语句：

#路径字符串便携
>>> f1 = 'C:\\Users\\Administrator\\Desktop\\test.txt'
>>> f2 = r'C:\Users\Administrator\Desktop\test.txt'
>>> f3 = 'test.txt' 
#模式：以文本文件为例 r w rw a
>>> path = 'C:\\Users\\Administrator\\Desktop\\'
>>> f = open(path + 'test.txt', 'r')
>>> #f被申明为文件
>>> type(f)
<class '_io.TextIOWrapper'>
>>> print(f)
<_io.TextIOWrapper name='C:\\Users\\Administrator\\Desktop\\test.txt' mode='r' encoding='cp936'>
>>> f.read()
'冯轩\n学习\n数据\n'
>>> print(f.read())

>>> #读取过程中光标造成的问题
>>> f.read()
''
>>> f.seek(0)
0
>>> f.read()
'冯轩\n学习\n数据\n'
>>> f.seek(0)
0
>>> print(f.read())
冯轩
学习
数据

path = 'C:\\Users\\Administrator\\Desktop\\test.txt'

f = open(path, 'r')

print(f.read())
f.seek(0)
f.close()
>>> 
============= RESTART: C:\Users\Administrator\Desktop\script.py =============
冯轩
学习
数据

>>>

关闭文件连接 f.close()

9.2 系统模块os下的路径操作

os模块：系统模块 - 常用命令：

os.name
输出字符串指示正在使用的平台
os.getcwd()
函数得到当前工作目录，即当前Python脚本工作的目录路径
os.listdir()
返回指定目录下的所有文件和目录名
os.chidr()
切换到当前路径

>>> import os
>>> os.name
'nt'
>>> os.getcwd()
'C:\\Python3.5.2'
>>> os.listdir()
['DLLs', 'Doc', 'include', 'Lib', 'libs', 'LICENSE.txt', 'NEWS.txt', 'python.exe', 'python3.dll', 'python35.dll', 'pythonw.exe', 'README.txt', 'Scripts', 'tcl', 'Tools', 'vcruntime140.dll']
>>> os.chdir('C:\\Users\\Administrator\\Desktop')
>>> os.getcwd()
'C:\\Users\\Administrator\\Desktop'
>>> f = open('test.txt', 'r')
>>> f.read()
'冯轩\n学习\n数据\n'
>>>

os.remove()
删除一个文件
os.system()
运行shell命令

>>> f = open('test.txt', 'r')
>>> f.read()
'冯轩\n学习\n数据\n'
>>> os.remove('test.txt')
Traceback (most recent call last):
  File "<pyshell#10>", line 1, in <module>
    os.remove('test.txt')
PermissionError: [WinError 32] 另一个程序正在使用此文件，进程无法访问。: 'test.txt'
>>> f.close()
>>> os.remove('test.txt')
>>> os.system('cmd')
-1073741510
>>> os.system('python')
-1073741510

os.path.split()
函数返回一个路径的目录名和文件名
os.path.exists()
检验给出的路径是否真的存在

9.3 文件读取操作

文件读取：

open('路径' , '模式' , enconding = '编码')
f.read() f.seek(0) f.close()
f.read(n)n表示读取多少字符
f.readlines()
读取行到列表文件，一次读取一行全部读取
f.readline()
一次读取一行

>>> import os
>>> os.getcwd()
'C:\\Python3.5.2'
>>> os.chdir('C:\\Users\\Administrator\\Desktop')
>>> f = open('test.txt', 'r')
>>> f.read()
'冯轩\n数据\n学习\nBig Data\nMachine Learning'
>>> f.seek(0)
0
>>> for line in f.readlines():
    print(type(line))
    print(line)

    
<class 'str'>
冯轩

<class 'str'>
数据

<class 'str'>
学习

<class 'str'>
Big Data

<class 'str'>
Machine Learning
>>> 
>>> 
>>> for line in f:
    print(line)

    
>>> f.seek(0)
0
>>> for line in f:
    print(line)

    
冯轩

数据

学习

Big Data

Machine Learning

txt存储时编码设置：

ANSI：'gbk'
UTF-8：‘utf8’

path = 'C:\\Users\\Administrator\\Desktop\\test.txt'

f = open('test.txt', 'r', encoding = 'utf8')
f.seek(0)
print(f.read())

作业：
爬取百度POI数据，存为txt，Python读取编写为json形式

path = 'C:\\Users\\Administrator\\Desktop\\cspoi.txt'

f = open('cspoi.txt', 'r')
f.seek(0)

m = []
for line in f.readlines():
    #print(line)
    st1 = line.split(':') #冒号分割
    name = st1[0]
    st2 = st1[1].split(',')
    lng = float(st2[0])
    lat = float(st2[1])
    address = st2[2].strip() #.srtip()删去首尾转义符
    v = [['name', name], ['lng', lng], ['lat', lat], ['address', address]]
    #print(v)
    d = dict(v)
    #print(d)
    m.append(d)

print(m)
>>> 
============= RESTART: C:\Users\Administrator\Desktop\script.py =============
[{'lat': 28.213735, 'address': '地铁2号线', 'lng': 112.920419, 'name': '望城坡'}, {'lat': 28.256779, 'address': '地铁1号线', 'lng': 112.995326, 'name': '马厂'}, {'lat': 28.200609, 'address': '地铁2号线', 'lng': 112.990715, 'name': '芙蓉广场'}, {'lat': 28.14178, 'address': '地铁1号线', 'lng': 112.993968, 'name': '铁道学院'}, {'lat': 28.171698, 'address': '地铁1号线', 'lng': 112.992335, 'name': '南湖路'}, ...

9.4 文件写入操作

新建一个文件名就算创建一个文件

path = (r"C:\Users\Administrator\Desktop")

f = open(path + '\\test2.txt', 'w',  encoding = 'utf8')

print('finished!')

>>> 
============= RESTART: C:\Users\Administrator\Desktop\script.py =============
finished!

f.write(str)写入str

path = (r"C:\Users\Administrator\Desktop")

f = open(path + '\\test2.txt', 'w',  encoding = 'utf8')

f.write('hello world !')
f.close() #关闭后才会写入
f.flush() #或者直接映射

print('finished!')

f.writelines(list)写入list：

path = (r"C:\Users\Administrator\Desktop")

f = open(path + '\\test2.txt', 'w',  encoding = 'utf8')

lst = ['a', 'b', 'c', 'd', 'e']

for i in range(len(lst)):
    lst[i] = lst[i] + '\n' #手动换行

f.writelines(lst)
f.close()

print('finished ! ')

作业：

n = list(range(1,11))
m = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
#print(n, m)

f = open(r'C:\Users\Administrator\Desktop\write.txt', 'w', encoding = 'utf8')

for i in range(len(n)):
    f.writelines([str(n[i]), ',', m[i] + '\n'])

f.close()
print('finished')