python第30课练习——文件搜索

作者: YoYoYoo | 来源:发表于2019-05-31 16:50 被阅读0次

python第30课练习——文件搜索
python模块搜索路径的方式
关于文本读取和写入时的编码错误问题
python django开发教程 & 机器学习
自己安装python包提示module 'xxx' has no
第二周-实习总结
2019-04-02--window 下练习python
Python的包导入
搜索大文件--python脚本
Vscode 配置为Python IDE

先了解一些os模块的扩展阅读：https://fishc.com.cn/forum.php?mod=viewthread&tid=45512&extra=page%3D1%26filter%3Dtypeid%26typeid%3D403

1、编写一个程序，统计当前目录下每个文件类型的文件数，程序实现如图：

image.png

参考答案：

import os

all_files = os.listdir(os.curdir) # 使用os.curdir表示当前目录更准确,这里列举当前目录所有文件名
type_dict = dict()

for each_file in all_files:
    if os.path.isdir(each_file):          # isdir(path)判断指定路径是否存在且是一个目录
        type_dict.setdefault('文件夹',0)  # setdefault()函数:如果键不存在于字典中，将会添加键并将值设为默认值。
        type_dict['文件夹'] += 1
    else:
        ext = os.path.splitext(each_file)[1]   # splitext(path):分离文件名与扩展名，返回(f_name, f_extension)元组
        type_dict.setdefault(ext,0)
        type_dict[ext] += 1

for each_type in type_dict.keys():
    print('该文件夹下共有类型为【%s】的文件 %d 个' % (each_type,type_dict[each_type]))

2、编写一个程序，计算当前文件夹下所有文件大小，大概如图：

image.png

参考答案：

import os

all_files = os.listdir(os.curdir) # 使用os.curdir表示当前目录更标准
file_dict = dict()

for each_file in all_files:
    if os.path.isfile(each_file): # 判断指定路径是否存在且是一个文件
        file_size = os.path.getsize(each_file)  # getsize(file)返回指定文件的尺寸，单位是字节
        file_dict[each_file] = file_size

for each in file_dict.items():
    print('%s【%d Bytes】' %(each[0],each[1]))

3、编写一个程序，用户输入文件名以及开始搜索的路径，搜索该文件是否存在。如遇到文件夹，则进入文件夹继续搜索，程序实现如图：

image.png

参考答案：

import os

def search_file(start_dir,target):
    os.chdir(start_di
             r)       # chdir(path)改变工作目录

    for each_file in os.listdir(os.curdir): # 在当前目录
        if each_file == target:
            print(os.getcwd() + os.sep + each_file) # getcwd()返回当前工作目录;os.sep:输出操作系统特定的路径分隔符（Win下为'\\'，Linux下为'/'）
        if os.path.isdir(each_file):
            search_file(each_file,target) # 递归调用
            os.chdir(os.pardir) # 递归调用后记得返回上一层目录，os.pardir指代上一级目录（'..'）
            
start_dir = input('请输入待查找的初始目录：')
target = input('请输入需要查找的目标文件：')
search_file(start_dir,target)

4、编写一个程序，用户输入开始搜索的路径，查找该路径下（包含子文件夹内）所有的视频格式文件（要求查找mp4、rmvb、avi的格式即可），并把创建一个文件（videooList.txt）存放所有找到的文件的路径，程序实现如图：

image.png

参考答案：

import os
vedio_list = []
def search_file(start_dir) :
    os.chdir(start_dir)   
    for each_file in os.listdir(os.curdir) :
        if os.path.isfile(each_file) :
            file_ext = os.path.splitext(each_file)[1]
            if file_ext in ['.mp4','.rmvb','.avi']:
                vedio_list.append(os.getcwd() + os.sep + each_file + os.linesep)
        if os.path.isdir(each_file) :
            search_file(each_file) # 递归调用
            os.chdir(os.pardir) # 递归调用后切记返回上一层目录
    return vedio_list;

start_dir = input('请输入待查找的初始目录：')
vedio_list = search_file(start_dir)
f = open(os.getcwd() + os.sep + 'VedioList.txt','w')
f.writelines(vedio_list)
f.close()

5、编写一个程序，用户输入关键字，查找当前文件夹内（如果当前文件夹包含文件夹，则进入文件夹继续搜索）所有含有该关键字的文本文件（.txt后缀），要求显示该文件所在位置以及关键字在文件中的具体位置（第几行第几个字符），程序实现如图：

image.png

参考答案：

import os

def print_pos(key_dict):
    keys = key_dict.keys()
    keys = sorted(keys) # 由于字典是无序的，我们这里对行数进行排序
    for each_key in keys:
        print('关键字出现在第 %s 行，第 %s 个位置。' % (each_key,str(key_dict[each_key])))

def pos_in_line(line,key):
    pos = []
    begin = line.find(key)
    while begin != -1:
        pos.append(begin + 1) # 用户的角度是从1开始数，而在这是以0开始为索引值
        begin = line.find(key,begin+1) # 从下一个位置继续查找
    return pos

def search_in_file(file_name,key):
    f = open(file_name)
    count = 0 # 记录行数
    key_dict = dict() # 字典，用户存放key所在具体行数对应具体位置

    for each_line in f:
        count += 1
        if key in each_line:
            pos = pos_in_line(each_line,key) # key在每行对应的位置
            key_dict[count] = pos

    f.close()
    return key_dict

def search_files(key,detail):
    all_files = os.walk(os.getcwd()) # walk(top):遍历top路径以下所有的子目录，返回一个三元组：(路径, [包含目录], [包含文件])
    txt_files = []

    for i in all_files:
        for each_file in i[2]:
            if os.path.splitext(each_file)[1] == '.txt': # 根据后缀判断是否为文本文件
                each_file = os.path.join(i[0],each_file) # join(path1[, path2[, ...]]):将path1, path2各部分组合成一个路径名
                txt_files.append(each_file)
    for each_txt_file in txt_files:
        key_dict = search_in_file(each_txt_file,key)
        if key_dict:
            print('===================================================')
            print('在文件【%s】中找到关键字【%s】' % (each_txt_file,key))
            if detail in ['YES','Yes','yes']:
                print_pos(key_dict)

key = input('请将该脚本放在待查找的文件夹内，请输入关键字：')
detail = input('请问是否需要打印关键字【%s】在文件夹中的具体位置（YES/NO）：'% key)
search_files(key,detail)

这题有些复杂，而且输出答案是错误的，如下图，继续思考.....

请将该脚本放在待查找的文件夹内，请输入关键字：小甲鱼
请问是否需要打印关键字【小甲鱼】在文件夹中的具体位置（YES/NO）：YES
Traceback (most recent call last):
  File "C:/Users/dybhh/Desktop/pypractice/030-5.py", line 50, in <module>
    search_files(key,detail)
  File "C:/Users/dybhh/Desktop/pypractice/030-5.py", line 41, in search_files
    key_dict = search_in_file(each_txt_file,key)
  File "C:/Users/dybhh/Desktop/pypractice/030-5.py", line 22, in search_in_file
    for each_line in f:
UnicodeDecodeError: 'gbk' codec can't decode byte 0xff in position 0: illegal multibyte sequence

**经过检查，将f = open(file_name)改为f = open(file_name,'r',encoding='utf-8')即可，另外，.txt格式的文件需要utf-8编码。