美文网首页知识管理
python按章节分割小说txt文件

python按章节分割小说txt文件

作者: AI视客 | 来源:发表于2018-10-15 14:40 被阅读572次
    # -*- coding: utf-8 -*-
    # __author__:'Administrator'
    # @Time    : 2018/8/31 14:19
    import os
    dst = "D:\\test"   # 生成文件目录
    
    
    # 将一个txt文件的内容,按照第几章进行分割
    def SplitFile(file_path1, dst):
        with open(file_path1, 'rb') as f1:
            # 获取文件每一行
            lines1 = f1.readlines()
            # 获取file的名称
            file_dir1 = file_path1.replace("\\", '/').split("/")[-1].split(".")[0]
            path1 = os.path.join(dst, file_dir1)
            if not os.path.exists(path1):
                os.makedirs(path1)
            i = 1
            for line in lines1:
                try:
                    if ("第" in line and "章 " in line) or ("第" in line and "章..." in line) or ("第" in line and "章\r\n" in line):
                        name = line.strip().decode('utf8')
                        i += 1
                    else:
                        fp = open(file_name1, 'ab+')
                        fp.write(line)
                        fp.close()
                    file_name1 = os.path.join(path1, "%s_%s.txt" % (i-1, name))
                except Exception as e:
                    print e.message
    
    
    # 将一个txt文件的内容,按照数字顺序进行分割
    def SplitFile_by_Num(file_path2, dst):
        with open(file_path2, 'rb') as f2:
            lines2 = f2.readlines()
            file_dir2 = file_path2.replace("\\", '/').split("/")[-1].split(".")[0]
            path2 = os.path.join(dst, file_dir2)
            if not os.path.exists(path2):
                os.makedirs(path2)
            i = 1
            for line in lines2:
                try:
                    if ("%s\r\n" % i) in line or ("%s、" % i in line) or ("%s " % i in line) or str(i) in line:
                        name = line.strip().decode('utf8')
                        i += 1
                    else:
                        fp = open(file_name2, 'ab+')
                        fp.write(line)
                        fp.close()
                    file_name2 = os.path.join(path2, "%s_%s.txt" % (i-1, name))
                except Exception as e:
                     print e.message
    
    
    # 获取某个目录下面的所有txt
    def get_all_txt(path):
        filepaths = []
        for root, dirs, files in os.walk(path):
            for name in files:
                if '.txt' in name:
                    filepaths.append(os.path.join(root, name))
        return filepaths
    
    
    if __name__ == "__main__":
        file_dir = "D:\\xiaoshuo"
        file_paths = get_all_txt(unicode(file_dir, "utf8"))
        for one in file_paths:
            SplitFile(one, dst)
            for root, dirs, files in os.walk(os.path.join(dst, one.replace("\\", '/').split("/")[-1].split(".")[0])):
                if not files:
                    SplitFile_by_Num(one, dst)
    
    
    

    相关文章

      网友评论

        本文标题:python按章节分割小说txt文件

        本文链接:https://www.haomeiwen.com/subject/jwyfzftx.html