文件夹dataset下有cat、dog..等N个子文件夹,每个子文件夹下有N个bin文件,现在划分cat子文件夹的文件成训练集、测试集和验证集,并保存到指定文件目录。
import os
import random
import shutil
def fileCopy(fileName, srcPath, dstPath):
#若文件夹存在,删除,新建文件夹
if os.path.exists(dstPath):
shutil.rmtree(dstPath)
os.makedirs(dstPath)
#将文件从src文件夹移动到dst文件夹
for i in fileName:
srcFile = os.path.join('%s%s' % (srcPath, i))
dstFile = os.path.join('%s%s' % (dstPath, i))
shutil.copy(srcFile, dstFile)
#统计所有文件的数量
def sumFiles(root):
files = []
classes = os.listdir(root)
for i, c in enumerate(classes):
new_files = [os.path.join(root, c, f) for f in os.listdir(os.path.join(root, c))]
files += new_files
print('{}个类, 所有类所有数据总和:{}'.format(len(classes), len(files)))
if __name__ == '__main__':
fileName = "cat/" #待划分数据集的文件名
root = "/dataset/"
filePath = root + fileName #待划分数据集文件地址
trainPath = "/data/training/" #划分后的训练集地址
testPath = "/data/testing/"
validPath = "/data/validation/"
sumFiles(root)
allFile = os.listdir(filePath) #原始数据name list,某类的数据
random.shuffle(allFile)
trainName = allFile[0:int(0.6 * len(allFile))] # 随机切分7:3, 6:2:2
testName = allFile[int(0.6 * len(allFile)):int(0.8 * len(allFile))]
validName = allFile[int(0.8 * len(allFile)):]
print("train:{}, test:{}, validation:{}".format(len(trainName), len(testName), len(validName)))
#分别将训练和测试文件移动到指定文件夹
fileCopy(trainName, filePath, trainPath)
fileCopy(testName, filePath, testPath)
fileCopy(validName, filePath, validPath)
网友评论