多卡并行调参美滋滋,以后终于可以大力调参了
import os
import threading
model_list = ['sst_adv_cnn','sst_adv_rnn','sst5_adv_cnn','sst5_adv_rnn']
action_list = ['no_up','no_down','no_same','no_updownsame']
run_list = []
for model in model_list:
for action in action_list:
run_list.append(model+'_'+action+'.sh')
command_list = []
for idx,run in enumerate(run_list):
command = 'CUDA_VISIBLE_DEVICES='+str(idx%4)+' bash '+run
print(command)
command_list.append(command)
class myThread (threading.Thread):
def __init__(self, command):
threading.Thread.__init__(self)
self.cmd = command
def run(self):
print("Starting " + self.cmd)
os.system(self.cmd)
print("Exiting " + self.cmd)
for idx,command in enumerate(command_list):
myThread(command).start()
组间并行,组外串行,以16为一组,在4个GPU上执行,每隔三秒检查一次是否这一组的所有程序执行完,如果是的那么就执行下一组。
import os
import threading
import random
import time
## {0} cnn rnn {1} un down same updownsame
set_num = 16
gpu_num = 4
sleep_time = 3 #second
command_raw = r"""
#!/usr/bin/env sh
set -e
NAME=sst5_adv_cnn_{1}_{2}_{3}_{4}
TRAIN_DIR=./tmp/$NAME
INPUT_DIR=./data/sst5
mkdir $TRAIN_DIR || true
python ./src/sst_action.py \
--mode=train_adv \
--action_type={4} \
--model_type=cnn \
--action={1} \
--input_dir=$INPUT_DIR \
--train_dir=$TRAIN_DIR \
--save_steps=30 \
--test_steps=50 \
--least_freq=2 \
--num_classes=2 \
--every=1 \
--dis_warm_up_step={2} \
--gene_warm_up_step={3} \
--max_vocab_size=100000 \
--embedding_dims=300 \
--rnn_cell_size=300 \
--batch_size=64 \
--learning_rate=0.0001 \
--generator_learning_rate=0.001 \
--max_steps=25000 \
--max_grad_norm=1.0 \
--num_timesteps=100 \
--keep_prob_emb=0.6 \
--keep_prob_dense=0.9 \
python ./src/sst_action.py \
--mode=test \
--model_type=cnn \
--action_type={4} \
--action={1} \
--input_dir=$INPUT_DIR \
--train_dir=$TRAIN_DIR \
--save_steps=30 \
--test_steps=50 \
--least_freq=2 \
--num_classes=2 \
--every=1 \
--dis_warm_up_step={2} \
--gene_warm_up_step={3} \
--max_vocab_size=100000 \
--embedding_dims=300 \
--rnn_cell_size=300 \
--batch_size=64 \
--learning_rate=0.0001 \
--generator_learning_rate=0.001 \
--max_steps=25000 \
--max_grad_norm=1.0 \
--num_timesteps=100 \
--keep_prob_emb=0.6 \
--keep_prob_dense=0.9 \
"""
dis_warm_up_list = [500,800,1000,1200]
gene_warm_up_list = [500,800,1000,1200]
model_list = ['cnn','rnn']
action_list = ['no_up','no_down','no_same','no_updownsame','all']
cuda_list = [0,1,2]
command_list = []
for dis_warm_up in dis_warm_up_list:
for gene_warm_up in gene_warm_up_list:
for action in action_list:
action_type=5 if action=='all' else 4
command = 'sst5_adv'+'_cnn'+'_'+str(action_type)+'_'+action+'_'+str(dis_warm_up)+'_'+str(gene_warm_up)+'.sh'
with open(command,'w') as f:
f.write(command_raw.format('cnn',action,dis_warm_up,gene_warm_up,action_type))
command_list.append(command)
run_list = []
print(command_list)
print(len(command_list))
input('check out!')
for idx,command in enumerate(command_list):
cuda_idx = idx%gpu_num
run = 'CUDA_VISIBLE_DEVICES='+str(cuda_idx)+' bash '+command
run_list.append(run)
run_list = [run_list[i:i+set_num] for i in range(0,len(run_list),set_num)]
class myThread (threading.Thread):
def __init__(self, command):
threading.Thread.__init__(self)
self.cmd = command
def run(self):
print("Starting " + self.cmd)
os.system(self.cmd)
print("Exiting " + self.cmd)
def alldown(thread_list):
for thread in thread_list:
if thread.isAlive():
return False
return True
for tmp_list in run_list:
thread_list=[]
for idx,run in enumerate(tmp_list):
thread = myThread(run)
thread_list.append(thread)
thread.start()
while True:
if alldown(thread_list):
break
time.sleep(sleep_time)
网友评论