用scrapy做爬虫的时候需要开多个爬虫子进程,为了定时开启和关闭爬虫子进程,需要对子进程做控制,而关闭进程有两种方法
-----要简单高效,直接看方法2吧-----
方法1:通过获取全部windows进程,获取增量进程方式
该方法是通过获取所有windows进程,将所有进程名为“python.exe”的获取,最后在杀的时候,除了主进程外,全部杀掉
该方法存在的问题在于,如果杀进程的时候刚好有其他人的python在运行,就把其他的也杀了
主要方法:os.kill(pid,signal.SIGTERM)
不是,不是,不是:os.kill(pid, signal.SIGKILL),这个报错
from multiprocessing import Process
import psutil
import signal
import time
main_proce_id = os.getpid()
def get_process_info():
python_pids = []
pids = psutil.pids()
for pid in pids:
p = psutil.Process(pid)
pid_name = p.name()
if pid_name == 'python.exe':
python_pids.append(pid)
return python_pids
def byProcess():
proce = []
proce.append(Process(target=downApp.main, args=('',)))
for name_item in presePage.MARKETS['something_info']:
spider_process = Process(target=targ, args=(name_item['something_name'],))
proce.append(spider_process)
for proce_item in proce:
proce_item.start()
# 下面这两行千万不能加
# for proce_item in proce:
# proce_item.join()
def targ(spider_name):
---you code---
pass
def kill_spider():
pids = get_process_info()
try:
for pid in pids:
if pid == main_proce_id:
continue
os.kill(pid,signal.SIGTERM)
print('已关闭子进程')
except Exception as e:
print('没有如此进程!!!')
# 晚上8点到第二天早上8点之间执行
def start_in_time():
while True:
this_time = int(time.strftime("%H", time.localtime()))
if this_time == 20:
print('到点啦,开始')
WriteLog.writeLog('time','到点啦,开始:%s'%this_time)
byProcess()
time.sleep(60 * 60 * 12)
if this_time == 8:
print('到点啦,结束')
WriteLog.writeLog('time', '到点啦,结束:%s' % this_time)
kill_spider()
time.sleep(60 * 60 * 12)
print(WriteLog.localTime() ,'没有到点,不操作')
time.sleep(120)
if __name__ == '__main__':
start_in_time()
方法2:通过子进程直接关闭
推荐用这个方法,又简单又准确
from multiprocessing import Process
import time
def byProcess():
proce = []
proce.append(Process(target=downApp.main, args=('',)))
for name_item in presePage.MARKETS['something_info']:
spider_process = Process(target=targ, args=(name_item['something_name'],))
proce.append(spider_process)
for proce_item in proce:
proce_item.start()
# 下面这两行千万不能加
# for proce_item in proce:
# proce_item.join()
return proce
def targ(spider_name):
---you code---
pass
def start_in_time_new():
'''定时打开和关闭子进程'''
while True:
proce = None
this_time = int(time.strftime("%H", time.localtime()))
if this_time == 20:
print('到点啦,开始')
WriteLog.writeLog('time','到点啦,开始:%s'%this_time)
proce = byProcess()
time.sleep(60 * 60 * 12)
if this_time == 8:
print('到点啦,结束')
WriteLog.writeLog('time', '到点啦,结束:%s' % this_time)
if proce is not None:
for item in proce:
item.terminate()
time.sleep(60 * 60 * 12)
print(WriteLog.localTime() ,'没有到点,不操作')
time.sleep(120)
还有其他通过进程名称杀的,就不写了,用第一种就可以实现。
网友评论