#main.py
import requests
import json
import pymongo
from multiprocessing import Pool
import time
import random
url = 'https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false'
headers = {
'Accept':'application/json, text/javascript, */*; q=0.01',
'Accept-Encoding':'gzip, deflate',
'Accept-Language':'zh-CN,zh;q=0.8',
'Connection':'keep-alive',
'Content-Length':'65',
'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
'Cookie':'',
'Host':'www.lagou.com',
'Origin':'https://www.lagou.com',
'Referer':'https://www.lagou.com/jobs/list_%E6%95%B0%E6%8D%AE%E5%88%86%E6%9E%90%E5%B8%88?px=default&city=%E5%85%A8%E5%9B%BD',
'User-Agent':'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.130 Safari/537.36',
'X-Anit-Forge-Code':'0',
'X-Anit-Forge-Token':'None',
'X-Requested-With':'XMLHttpRequest'
}
def postonepage(pn):
client = pymongo.MongoClient('localhost', 27017)
Lagou = client['Lagou']
position = Lagou['position']
print(pn)
postdata = {
'first': 'false',
'pn': pn,
'kd': '数据分析师'
}
try:
wbdata = requests.post(url,headers = headers,data = postdata)
jdata = wbdata.json()
# print(jdata)
positionResult = jdata['content']['positionResult']
time.sleep(random.randint(4, 8))
# print(positionResult)
for j in positionResult:
i = j['position']
adata = {
'createTime':i['createTime'],
'positionId': i['positionId'],
'positionName':i['positionName'],
'firstType':i['firstType'],
'secondType':i['secondType'],
'education':i['education'],
'city':i['city'],
'salary':i['salary'],
'jobNature':i['jobNature'],
'workYear':i['workYear'],
'companyId':i['companyId'],
'companyFullName':i['companyFullName'],
'financeStage':i['financeStage'],
'companySize':i['companySize'],
'industryField':i['industryField']
}
position.insert_one(adata)
print('success')
except:
print('one error occurred')
if __name__ == '__main__':
alist = list(range(1,500))
pool = Pool(processes=4)
pool.map(postonepage,alist)
网友评论