from numba import cuda
cc_cores_per_SM_dict = {
(2,0) : 32,
(2,1) : 48,
(3,0) : 192,
(3,5) : 192,
(3,7) : 192,
(5,0) : 128,
(5,2) : 128,
(6,0) : 64,
(6,1) : 128,
(7,0) : 64,
(7,5) : 64,
(8,0) : 64,
(8,6) : 128
}
d = cuda.get_current_device()
my_sms = getattr(d, 'MULTIPROCESSOR_COUNT')
my_cc = getattr(d, 'compute_capability')
cores_per_sm = cc_cores_per_SM_dict.get(my_cc)
total_cores = cores_per_sm*my_sms
网友评论