1. 文件操作
1.1 读文件
- 读文本文件
#打开文件
# -模式
# -rb 表示读取文件原始的二进制
file_object=open("common/info.txt",mode="rb")
#读取文件
data=file_object.read()
#关闭文件
file_object.close()
#转码
data=data.decode("utf-8")
#输出内容
print(data)
# -rt 转码
#判断路径是否存在
import os
os.path.exists("common/info.txt")
file_object=open("common/info.txt",mode="rt",encoding="utf-8")
#读取文件
data=file_object.read()
#关闭文件
file_object.close()
#输出内容
print(data)
- 读图片
file_object=open("common/img.png",mode="rb")
data=file_object.read()
file_object.close()
print(data)
1.2 写文件
#-wb,字节类型,w会先清空文件
file_object = open("common/t1.txt","wb")
file_object.write("hello".encode("utf-8"))
file_object.close()
#wt,t表示文本
file_object = open("common/t2.txt","wt")
file_object.write("哈哈") #默认utf-8或者encoding='utf-8'
file_object.close()
- 写图片文件
f1 = open("img.png",mode="rb")
content = f1.read()
f1.close()
f2 = open("img.png",mode="rb")
f2.write(content)
f2.close()
案例
#1.去网上下载一点文本
import requests
res=requests.get(url="https://www.jianshu.com/p/3ed339aa3012",headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36"})
print(res.content.decode("utf-8"))
file_object=open("note/log1.txt",mode='wb')
file_object.write(res.content)
file_object.close()
#2.去网上下载图片
import requests
res=requests.get(url="https://www.jianshu.com/p/3ed339aa3012",headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36"})
print(res.content.decode("utf-8"))
file_object=open("note/log1.txt",mode='wb')
file_object.write(res.content)
file_object.close()
注意事项
- 绝对路径
- 相对路径
- 文件不存在,w新建,文件存在先清空
1.3 文件打开模式
- r
- w
- x 不存在创建写入;存在报错
- a 尾部追加
不能单独存在,需要组合下面的
- b 二进制
- t 文本模式(默认)
关于文件的打开模式常见应用有:
-
只读 r rt rb
- 存在 读
- 不存在 报错
-
只写 w wt wb
- 存在,清空在写
- 不存在,创建在写
-
只写 x xt xb
- 存在,报错
- 不存在,创建在写
-
只写 a at ab 尾部追加
- 存在,尾部追加
- 不存在,创建在写
-
读写(光标位置重置0,file_object.seek(0))
- r+ rb+ ,默认光标位置:起始位置
- w+ wb+ ,默认光标位置:起始位置(清空文件)
- x+ xb+ ,默认光标位置:起始位置(新文件)
- a+,ab+ ,默认光标位置:末尾
1.4 常见功能
再上述文件的操作中,只用了读写,其实还有其他功能辅助读写
- read
- 读所有
f = open("log1.txt",mode="r",encoding="utf-8")
data=f.read()
f.close()
f = open("log1.txt",mode="rb")
data=f.read()
f.close()
- 读n个字符
f = open("log1.txt",mode="r",encoding="utf-8")
#读一个字符
data=f.read(1)
f.close()
f = open("log1.txt",mode="rb")
#读一个字节
data=f.read(1)
f.close()
- readline,读一行
f = open("log1.txt",mode="r",encoding="utf-8")
data=f.readline()
f.close()
- readlines,所有行
f = open("log1.txt",mode="r",encoding="utf-8")
data=f.readlines()
f.close()
- 循环,读大文件(readline加强版)
f = open("note/log1.txt",mode="r",encoding="utf-8")
for line in f:
print(line.strip())
f.close()
- write 写
- flush,刷到硬盘
f=open("note/info.txt",mode="a",encoding="utf-8")
while True:
#不是实时的,写到了缓冲区
f.write("余小凤")
f.flush()
f.close()
- 移动光标位置
f=open("note/info.txt",mode="r+",encoding="utf-8")
#移动光标位置
#会覆盖
f.seek(3)
f.write("哈哈哈")
f.close()
f=open("note/info.txt",mode="a+",encoding="utf-8")
#移动光标位置,字节
#会永远在末尾写
f.seek(3)
f.write("哈哈哈")
f.close()
- 获取当前光标位置
f=open("note/info.txt",mode="a+",encoding="utf-8")
#移动光标位置
#tell,字节位置
f.seek(3)
f.tell()
f.write("哈哈哈")
f.close()
1.5 上下文管理
with open("note/info.txt",mode="a+",encoding="utf-8") as f, open("log1.txt",'r') as f2:
f.write("hello")
pass
练习题
#1日志计数,记录所有用户的次数
user_dict={}
with open("log.txt","r",encoding="utf-8") as file_object:
for line in file_object:
user_ip=line.split(" ")[0]
if user_ip in user_dict:
user_dict[user_ip]+=1
else:
user_dict[user_ip] =1
print(user_dict)
#重命名,若存在,会覆盖
import shutil
shutil.move("log.txt","log1.txt")
2. csv格式文件
import os
if not os.path.exists("/images"):
os.makedirs("images")
with open("gp.csv", mode="r", encoding="utf-8") as file_object:
file_object.readline()
for line in file_object:
name, price, race = line.strip().split(",")
print(name, price, race)
3. ini格式
import configparser
config = configparser.ConfigParser()
config.read("cfg.ini", encoding="utf-8")
# 读取所有节点
ret = config.sections()
print(ret)
# 读取节点下的键值
res = config.items("mysqld")
print(res)
for key, value in config.items("mysqld"):
print(key, value)
# 获取某个节点下键对应的值
val = config.get("mysqld", "max_connections")
print(val)
# 其他
# 是否存在节点
v1 = config.has_section("name")
# 添加节点
# config.add_section("group")
# config.write(open("cfg.ini",mode="w",encoding="utf-8"))
# 添加键值
config.set("group", "name", "dzw")
config.write(open("cfg.ini", mode="w", encoding="utf-8"))
# 删除节点
config.remove_section("group")
config.remove_option("group", "name")
config.write(open("cfg.ini", mode="w", encoding="utf-8"))
4. XML格式文件
可扩展标记语言,是一种简单的数据存储语言,XML被设计用来传输和存储数据
from xml.etree import ElementTree as et
# et去打开xml文件
tree = et.parse("test.XML")
# 获取根标签
root = tree.getroot()
# et打开字符串内容
# content = ""
# root = et.XML(content)
# print(root)
# 获取孩子标签
for child in root:
# print(child.tag, child.attrib, child.text)
for node in child:
pass
# print(node.tag, node.attrib, node.text)
name = root.find("food")
print(name.tag, name.attrib)
pr = name.find("price")
print(pr.tag, pr.attrib)
# 查找所有的food标签
for child in root.iter("food"):
print(child.tag)
v1 = root.findall("food")
print(v1)
v2 = root.find("food").find("name")
print(v2)
修改和删除节点
from xml.etree import ElementTree as et
# et去打开xml文件
tree = et.parse("test.XML")
# 获取根标签
root = tree.getroot()
#修改节点内容和属性
rank = root.find("food").find("name")
rank.text = 999
rank.set("update", '2020-11-11')
print(rank.text, rank.attrib)
# 删除节点
root.remove(root.find("hi"))
# 保存文件
tree = et.ElementTree(root)
tree.write("new.xml",encoding="utf-8")
构建文档
from xml.etree import ElementTree as et
# 创建根标签
root = et.Element("home")
# 创建节点大儿子
son1=et.Element('son',{"name":"儿1"})
son2=et.Element('son',{"name":"儿2"})
# 在大儿子中创建2个孙子
grandson1=et.Element("grandson",{"name":"儿11"})
grandson2=et.Element("grandson",{"name":"儿12"})
son1.append(grandson1)
son1.append(grandson2)
# 把儿子添加到根节点中
root.append(son1)
root.append(son2)
tree=et.ElementTree(root)
tree.write("new1.xml",encoding="utf-8",short_empty_elements=False)
from xml.etree import ElementTree as et
# 创建根标签
root = et.Element("family")
# 创建节点大儿子
son1=root.makeelement('son',{"name":"儿1"})
son2=root.makeelement('son',{"name":"儿2"})
# 在大儿子中创建2个孙子
grandson1=son1.makeelement("grandson",{"name":"儿11"})
grandson2=son1.makeelement("grandson",{"name":"儿12"})
son1.append(grandson1)
son1.append(grandson2)
# 把儿子添加到根节点中
root.append(son1)
root.append(son2)
tree=et.ElementTree(root)
tree.write("new2.xml",encoding="utf-8",short_empty_elements=False)
from xml.etree import ElementTree as et
# 创建根标签
root = et.Element("family")
# 创建节点大儿子
son1=et.SubElement(root,'son',{"name":"儿1"})
son2=et.SubElement(root,'son',{"name":"儿2"})
# 在大儿子中创建2个孙子
grandson1=et.SubElement(son1,"grandson",{"name":"儿11"})
grandson1.text="sunzi"
grandson2=et.SubElement(son1,"grandson",{"name":"儿12"})
tree=et.ElementTree(root)
tree.write("new3.xml",encoding="utf-8",short_empty_elements=False)
from xml.etree import ElementTree as et
# 创建根标签
root=et.Element("user")
# 生成文档对象
root.text="<![CDATA[你好]]"
tree=et.ElementTree(root)
tree.write("new5.xml",encoding="utf-8",short_empty_elements=False)
5.excel格式文件
5.1 读excel
- 读sheet
from openpyxl import load_workbook
workbook = load_workbook("test.xlsx")
# sheet相关操作
# 1 获取Excel所有sheet
print(workbook.sheetnames)
# 2 选择sheet,基于sheet名称
sheet1 = workbook["bug"]
cell1 = sheet1.cell(1, 1)
cell2 = sheet1.cell(1, 2)
print(cell1.value)
print(cell2.value)
# 3 选择sheet 基于索引位置
sheet2 =workbook.worksheets[0]
cell3 = sheet2.cell(1, 1)
cell4 = sheet2.cell(1, 2)
print(cell3.value)
print(cell4.value)
# 4.循环所有的sheet
#方式1
for name in workbook.sheetnames:
sheet = workbook[name]
cell = sheet.cell(1,1)
print(cell.value)
#方式2
for name in workbook.worksheets:
cell = name.cell(1,1)
print(cell.value)
#方式3
for name in workbook:
cell = name.cell(1,1)
print(cell.value)
#5 获取第n行第n列的单元格(位置是从1开始)
sheet2 =workbook.worksheets[0]
cell3 = sheet2.cell(1, 1)
cell4 = sheet2.cell(1, 2)
print(cell3.value)
print(cell4.value)
print(cell.style)
print(cell.font)
print(cell.alignment)#对齐
#6 获取某个单元格
c1 = sheet2["A1"]
print(c1.value)
#7.第n行所有的单元格
for cell in sheet2[1]:
print(cell.value)
#8 获取所有行的数据
for row in sheet2.rows:
print(row[0].value)
#9 获取所有列的数据
for col in sheet2.columns:
print(col[0].value)
# 10 读合并的单元格

5.2 写Excel
from openpyxl import load_workbook
from openpyxl import workbook
from openpyxl.styles import Alignment,Border,Side,Font,GradientFill,Color,PatternFill
wb = workbook.Workbook() #默认创建Sheet
Color(index=0) # 根据索引进行填充
#
Color(rgb='00000000') # 根据rgb值进行填充
# index
COLOR_INDEX = (
'00000000', '00FFFFFF', '00FF0000', '0000FF00', '000000FF', #0-4
'00FFFF00', '00FF00FF', '0000FFFF', '00000000', '00FFFFFF', #5-9
'00FF0000', '0000FF00', '000000FF', '00FFFF00', '00FF00FF', #10-14
'0000FFFF', '00800000', '00008000', '00000080', '00808000', #15-19
'00800080', '00008080', '00C0C0C0', '00808080', '009999FF', #20-24
'00993366', '00FFFFCC', '00CCFFFF', '00660066', '00FF8080', #25-29
'000066CC', '00CCCCFF', '00000080', '00FF00FF', '00FFFF00', #30-34
'0000FFFF', '00800080', '00800000', '00008080', '000000FF', #35-39
'0000CCFF', '00CCFFFF', '00CCFFCC', '00FFFF99', '0099CCFF', #40-44
'00FF99CC', '00CC99FF', '00FFCC99', '003366FF', '0033CCCC', #45-49
'0099CC00', '00FFCC00', '00FF9900', '00FF6600', '00666699', #50-54
'00969696', '00003366', '00339966', '00003300', '00333300', #55-59
'00993300', '00993366', '00333399', '00333333', #60-63
)
BLACK = COLOR_INDEX[0]
WHITE = COLOR_INDEX[1]
RED = COLOR_INDEX[2]
DARKRED = COLOR_INDEX[8]
BLUE = COLOR_INDEX[4]
DARKBLUE = COLOR_INDEX[12]
GREEN = COLOR_INDEX[3]
DARKGREEN = COLOR_INDEX[9]
YELLOW = COLOR_INDEX[5]
DARKYELLOW = COLOR_INDEX[19]
# 1 修改sheet名称
sheet = wb.worksheets[0]
sheet.title="数据集"
wb.save("p2.xlsx")
# 2 创建sheet,并设置sheet颜色
sheet=wb.create_sheet("工作计划",1)
sheet.sheet_properties.tabColor="1072BA"
wb.save("p2.xlsx")
#3 默认打开sheet
wb.active=1
wb.save("p2.xlsx")
#4 拷贝sheet
new_sheet = wb.copy_worksheet(wb["数据集"])
new_sheet.title="新数据集"
wb.save("p2.xlsx")
#5 删除sheet
# del wb["工作计划"]
# wb.save("p2.xlsx")
#6 获取某个单元格,修改值
sheet["B3"] = "666"
wb.save("p2.xlsx")
#7 获取某个单元格,修改值
cell =sheet.cell(1,1)
cell.value=888
wb.save("p2.xlsx")
#8 获取某个单元格,修改值
cell_list= sheet["B2":"C3"]#方形区域
for row in cell_list:
for cell in row:
cell.value = "新的值"
wb.save("p2.xlsx")
#7 对齐方式
# 参数可选项
horizontal = {'fill', 'distributed', 'centerContinuous', 'right',
'justify', 'center', 'left', 'general'}
vertical = {'distributed', 'justify', 'center', 'bottom', 'top'}
cell.alignment=Alignment(horizontal='center',vertical="distributed",text_rotation=45,wrap_text=True)
wb.save("p2.xlsx")
# 8 边框
# style可选项
style = ('dashDot','dashDotDot', 'dashed','dotted',
'double','hair', 'medium', 'mediumDashDot', 'mediumDashDotDot',
'mediumDashed', 'slantDashDot', 'thick', 'thin')
# 'medium' 中粗
# 'thin' 细
# 'thick' 粗
# 'dashed' 虚线
# 'dotted' 点线
cell1 =sheet.cell(9,2)
cell1.border=Border(
top=Side(style="thick",color='FFB6C1'),
bottom=Side(style="thick",color='FFB6C1'),
left=Side(style="thick",color='FFB6C1'),
right=Side(style="thick",color='FFB6C1',)
)
wb.save("p2.xlsx")
# 9 字体
cell1.font=Font(name='微软雅黑',size=45,color='ff0000',underline='single')
cell1.value="hello"
wb.save("p2.xlsx")
# 10 背景色
cell1.fill=PatternFill('solid',fgColor="99ccff")
wb.save("p2.xlsx")
#11 渐变背景色
cell2=sheet.cell(10,2)
cell2.fill=GradientFill('linear',stop=('FFFFFF','99ccff','000000'))
wb.save("p2.xlsx")
#12 宽度,索引从1开始
sheet.row_dimensions[1].height=50
sheet.column_dimensions["E"].width=100
wb.save("p2.xlsx")
#12 合并单元格
sheet.merge_cells("B2:D8")
sheet.merge_cells(start_row=15,start_column=3,end_row=18,end_column=8)
wb.save("p2.xlsx")
#13 写入公式
sheet['E1']='合计'
sheet['E2']='=B1*C1'
sheet['E3']='=SUM(B1,C1)'
wb.save("p2.xlsx")
#14 删除
#idx 要删除的索引位置
#amount,从索引位置开始要删除的个数,默认为1
sheet.delete_rows(idx=5,amount=0)
sheet.delete_cols(idx=5,amount=0)
wb.save("p2.xlsx")
#15 插入
sheet.insert_rows(idx=5,amount=0)
sheet.insert_cols(idx=5,amount=0)
wb.save("p2.xlsx")
#16 循环写内容
cell_range=sheet['B15:C16']
for row in cell_range:
for cell in row:
cell.value='xx'
for row in sheet.iter_rows(min_row=5,min_col=1,max_col=7,max_row=10):
for cell in row:
cell.value='oo'
wb.save("p2.xlsx")
#17 移动
sheet.move_range("H2:J10",rows=-1,cols=15,translate=True)#下右,自动翻译公式
wb.save("p2.xlsx")
# 18 打印区域
sheet.print_area="A1:C50"
sheet.print_title_rows='A:D'
sheet.print_title_cols='1:3'
wb.save("p2.xlsx")
6.压缩文件
基于内置shutil模块
import shutil
#1 压缩文件
# base_name,压缩后的压缩包文件
# format,压缩的格式
# root_dir,要压缩的文件夹路径
# shutil.make_archive(base_name=r'测试压缩',format='zip',root_dir='压缩')
#2 解压文件
# filename,要解压的压缩包文件
# extract_dir,解压的路径
# forma,解压的格式
shutil.unpack_archive(filename='测试压缩.zip',extract_dir='解压缩',format='zip')
7.路径相关
7.1 转义
windows路径使用的是,linux是/
windows路径使用的是D:\nXXX\tXXX,会报错,\n\t
解决方法:加转义符D:\nXXX\tXXX;路径前加r'D:\nXXX\tXXX'
7.2
import os
import shutil
#找目录
base_dir=os.path.dirname(os.path.abspath(__file__))
print(base_dir)
# file_path=base_dir+r'/解压缩/info.txt'
file_path=os.path.join(base_dir,'解压缩','info.txt')
print(file_path)
if os.path.exists(file_path):
file_object= open(file_path,'r',encoding='utf-8')
content=file_object.read()
print(content)
file_object.close()
else:
print("文件不存在")
#创建文件夹
import os
base_dir=os.path.dirname(os.path.abspath(__file__))
print(base_dir)
file_path=os.path.join(base_dir,'hello','hi')
print(file_path)
if not os.path.exists(file_path):
os.makedirs(file_path)
#是否是文件夹
import os
base_dir=os.path.dirname(os.path.abspath(__file__))
print(base_dir)
file_path=os.path.join(base_dir,'hello','hi')
print(file_path)
is_dir = os.path.isdir(file_path)
# 删除文件或文件夹
file_path=os.path.join(base_dir,'xx')
shutil.rmtree(file_path)
# 拷贝文件
shutil.copy(r'H:\pyspace\learn\note\info.txt',r'H:\pyspace\learn\note\hello\hi\info.txt')
#拷贝文件夹
shutil.copytree(r'H:\pyspace\learn\note\解压缩',r'H:\pyspace\learn\note\hellobreeze')
#文件夹或文件重命名
shutil.move(r'H:\pyspace\learn\note\hello\hi\info.txt',r'H:\pyspace\learn\note\info9.txt')
其他知识点
# 1.for 循环中所有都执行才执行else,且未遇到break
data_list=[1,2,3]
for item in data_list:
print(item)
else:
print("123")
# 2.循环列表,计数器从5开始
data_list=[1,2,3]
for i,item in enumerate(data_list,5):
print(i,item)
练习题1 注册登录系统
import os
base_dir = os.path.dirname(os.path.abspath(__file__))
db_file_path = os.path.join(base_dir, 'db.csv')
# 用户注册
while True:
choice = input("是否要注册(Y/N)?")
choice = choice.upper()
if choice not in ('Y', 'N'):
print("格式输入错误,请重新输入。")
continue
if choice == "N":
break
with open(db_file_path, mode="a", encoding='utf-8') as file_object:
while True:
user = input('请输入用户名(Q/q退出):')
if user.upper() == 'Q':
break
pwd = input("请输入密码:")
file_object.write('{},{}\n'.format(user, pwd))
file_object.flush()
break
# 用户登录
print('欢迎使用xx系统,请登录!')
username = input('请输入用户名(Q/q退出):')
password = input("请输入密码:")
if not os.path.exists(db_file_path):
print("用户文件不存在")
else:
with open(db_file_path,mode='r',encoding='utf-8') as file_object:
for line in file_object:
user,pwd = line.strip().split(',')
if username == user and pwd==password:
print('登录成功!')
break
else:
print("用户名或密码错误")
练习题2 保存天气信息到excel
import os
import requests
from xml.etree import ElementTree as et
from openpyxl import workbook
# 处理文件路径
base_dir = os.path.dirname(os.path.abspath(__file__))
db_file_path = os.path.join(base_dir, 'weather.xlsx')
# 创建Excel
wb = workbook.Workbook()
del wb['Sheet']
# 用户注册
while True:
city = input("请输入城市(Q/q):")
if city.upper() == 'Q':
break
url = 'http://ws.webxml.com.cn/WebServices/WeatherWS.asmx/getWeather'
res = requests.post(url, data={'theCityCode': '{}'.format(city), 'theUserID': ''},
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36'
})
print(res.text)
# 提取xml中的数据
root = et.XML(res.text)
# 为每个城市创建一个sheet,并将获取的xml格式中的数据写入excel中
sheet = wb.create_sheet(city)
for row_index,node in enumerate(root, 1):
text =node.text
cell = sheet.cell(row_index,1)
cell.value=text
wb.save(db_file_path)
练习题3 读取ini文件,写入到excel
import os
import configparser
from openpyxl import workbook
from openpyxl.styles import Alignment, Border, Side, Font, PatternFill
# 处理文件路径
base_dir = os.path.dirname(os.path.abspath(__file__))
file_path = os.path.join(base_dir, 'cfg.ini')
db_file_path = os.path.join(base_dir, 'config.xlsx')
# 创建Excel
wb = workbook.Workbook()
del wb['Sheet']
# 解析ini文件
config = configparser.ConfigParser()
config.read(file_path,encoding='utf-8')
print(config.sections())
for it in config.items('mysqld'):
print(it)
print(config.items('mysql'))
#循环获取每个节点
for section in config.sections():
# 在excel中创建一个sheet,名称为ini节点名称
sheet = wb.create_sheet(section)
#边框和居中
side = Side(style='thin',color='000000')
border = Border(top=side,bottom=side,left=side,right=side)
align=Alignment(horizontal='center',vertical='center')
# 设置sheet表头
title_dict={'A1':'键','B1':'值'}
for position ,text in title_dict.items():
cell = sheet[position]
#设置值
cell.value=text
#设置居中
cell.alignment =align
#设置背景色
cell.fill=PatternFill('solid',fgColor='6495ED')
#设置字体颜色
cell.font=Font(name='微软雅黑',color='FFFFFF')
#设置边框
cell.border =border
row_index =2
for group in config.items(section):
for col,text in enumerate(group,1):
cell =sheet.cell(row_index,col)
cell.alignment =align
cell.border=border
cell.value=text
row_index += 1
wb.save(db_file_path)
练习题4 下载zip文件,并解压到指定路径
import os
import shutil
import requests
# 处理文件路径
base_dir = os.path.dirname(os.path.abspath(__file__))
file_path = os.path.join(base_dir, 'files')
if not os.path.exists(file_path):
os.makedirs(file_path)
#下载文件
file_url = 'http://files.cnblogs.com/files/wupeiqi/HtmlStore.zip'
res = requests.get(url=file_url)
# 将下载的文件保存到目录
file_name =file_url.split('/')[-1]
zip_file_path = os.path.join(file_path,file_name)
with open(zip_file_path,mode='wb') as file_object:
file_object.write(res.content)
#将下载来的文件解压到目录
#目录不存在,会自动创建
unpack_folder = os.path.join(base_dir,'files','html')
shutil.unpack_archive(filename=zip_file_path,extract_dir=unpack_folder,format='zip')
网友评论