生成list,并且划分数据集
#!/bin/sh
CURDIR=$(pwd)
genlist(){
ls $1/ | while read line;
do
echo "${1}/${line} ${2}"
done
}
genlist ${CURDIR}/non-smoking 0 > temp__0.txt
genlist ${CURDIR}/smoking 1 > temp__1.txt
LEN_0=$(cat temp__0.txt | wc -l)
LEN_1=$(cat temp__1.txt | wc -l)
LEN_0_TEST=$((${LEN_0}/5))
LEN_0_TRAIN=$((${LEN_0}-${LEN_0_TEST}))
LEN_1_TEST=$((${LEN_1}/5))
LEN_1_TRAIN=$((${LEN_1}-${LEN_1_TEST}))
cat temp__0.txt | head -n ${LEN_0_TRAIN} > train_0.txt
cat temp__0.txt | tail -n ${LEN_0_TEST} > test_0.txt
cat temp__1.txt | head -n ${LEN_1_TRAIN} > train_1.txt
cat temp__1.txt | tail -n ${LEN_1_TEST} > test_1.txt
cat train_0.txt train_1.txt > train.txt
cat test_0.txt test_1.txt > test.txt
rm temp__0.txt temp__1.txt
rm train_0.txt test_0.txt
rm train_1.txt test_1.txt
网友评论