1、类包含不能序列化的属性时,多进程异步执行失败
import multiprocessing
import os
import random
import sys
class A:
pool = None
def __init__(self):
self.pool = multiprocessing.Pool(3)
def execute(self,dirs):
pid=0
try:
fn = "log"+dirs.split('\\')[-1]
fn = open(fn, "w")
print("aa", file=fn)
print("start to exec..." + str(pid), file=fn)
except Exception as e:
import logging
logging.exception(e)
print("error:%s"%e)
return pid
def _callback(self,pid):
print(str(os.getpid())+" "+str(pid))
def start(self):
print(str(os.getpid()))
dirs = ["D:\Python2to3", "D:\eclipse", "D:\gitworkspace"]
for i in range(3):
try:
self.pool.apply_async(self.execute, (dirs[i],), callback=self._callback)
except Exception as e:
import logging
logging.exception(e)
self.pool.close()
self.pool.join()
if __name__ == "__main__":
A().start()
执行上述代码时,多进程无法执行,因为A中包含了无法序列化的pool(进程池)。多进程调用self.execute方法时会将类本身的内容,即self全部序列化传给另外一个进程,pool无法序列化,因此多进程执行失败。只需将pool的初始化放入start函数中即可执行。
import multiprocessing
import os
import random
import sys
class A:
def execute(self,dirs):
pid=0
try:
fn = "log"+dirs.split('\\')[-1]
fn = open(fn, "w")
print("aa", file=fn)
print("start to exec..." + str(pid), file=fn)
except Exception as e:
import logging
logging.exception(e)
print("error:%s"%e)
return pid
def _callback(self,pid):
print(str(os.getpid())+" "+str(pid))
def start(self):
print(str(os.getpid()))
dirs = ["D:\Python2to3", "D:\eclipse", "D:\gitworkspace"]
pool = multiprocessing.Pool(3)
for i in range(3):
try:
pool.apply_async(self.execute, (dirs[i],), callback=self._callback)
except Exception as e:
import logging
logging.exception(e)
pool.close()
pool.join()
if __name__ == "__main__":
A().start()
2、多进程通信时只能使用一层的dict,两层时更改第二层不起作用
一层dict:
from multiprocessing import Pool, Manager
def chid_proc(test_dict, i):
test_dict[i] = i * i
if __name__ == '__main__':
#td = {'a':1}
td = Manager().dict()
td['a'] = 1
pool = Pool(3)
for i in range(0, 3):
pool.apply_async(chid_proc, args=(td, i))
pool.close()
pool.join()
print (td)
输出为:
{'a': 1, 0: 0, 2: 4, 1: 1}
二层dict:
from multiprocessing import Pool, Manager
def chid_proc(test_dict, i):
test_dict["a"][i] = i * i
if __name__ == '__main__':
#td = {'a':1}
td = Manager().dict()
td['a'] = {"b":1,"c":1}
pool = Pool(3)
for i in range(0, 3):
pool.apply_async(chid_proc, args=(td, i))
pool.close()
pool.join()
print (td)
输出为:
{'a': {'b': 1, 'c': 1}}
想着可能是因为第二层不是Manager().dict()导致的,将第二层改为Manager().dict()。
from multiprocessing import Pool, Manager
def chid_proc(test_dict, i):
test_dict["a"][i] = i * i
if __name__ == '__main__':
#td = {'a':1}
td = Manager().dict()
a = Manager().dict()
a["b"]=1
td['a'] = a
pool = Pool(3)
for i in range(0, 3):
pool.apply_async(chid_proc, args=(td, i))
pool.close()
pool.join()
print (td)
print (td["a"])
输出为:
{'a': <DictProxy object, typeid 'dict' at 0x2970ac8>}
{'b': 1, 0: 0, 2: 4, 1: 1}
从上述例子可以看出,多进程间用多层map通信时,每层都要配置其为Manager().dict()。
网友评论