from urllib import request,parse
import json,pymysql,re
def zhilianSpider(url):
response_data = load_page_data(url)
data = json.loads(response_data)
if data['code'] == 200:
print('请求成功')
postionJobs = data['data']['results']
for job in postionJobs:
companyJobs = {}
zhilian_job = {}
zhilian_job['jobName'] = job['jobName']
zhilian_job['salary'] = job['salary']
zhilian_job['city'] = job['city']['display']
zhilian_job['workingExp'] = job['workingExp']['name']
zhilian_job['sduLevel'] = job['eduLevel']['name']
zhilian_job['welfare'] = ' '.join(job['welfare'])
zhilian_job['company'] = job['company']['name']
zhilian_job['companyType'] = job['company']['type']['name']
zhilian_job['people'] = job['company']['size']['name']
save_data_to_db(zhilian_job)
print('添加成功')
# print(job)
# companyJobs['company'] = job['company']['name']
# company_url = job['company']['url']
# html = load_page_data(company_url)
# print(html)
# company_data = company_page_data(html)
# zhilianSpider(next_url)
def company_page_data(html):
pattern = re.compile(
'<div.*?class="mian-company">'+
'.*?<div.*?url.*?"(.*?)"'+
'.*?<span.*?>(.*?)</span>'+
'.*?<span.*?>(.*?)</span>'+
'.*?<span.*?>(.*?)</span>'+
'.*?<span.*?>(.*?)</span>'+
'.*?<span.*?>(.*?)</span>'+
'.*?<p.*?style="text-indent.*?>'+
'.*?<span.*?>(.*?)</span>'
,re.S
)
result = re.findall(pattern,html)
print(result)
def save_data_to_db(zhilian_job):
sql = """
INSERT INTO zhilian(%s)
VALUE (%s)
"""%(','.join(zhilian_job.keys()),','.join(['%s']*len(zhilian_job)))
try:
cursor.execute(sql,list(zhilian_job.values()))
mysql_client.commit()
except Exception as err:
print(err)
mysql_client.rollback()
def load_page_data(url):
req_header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0',
}
req = request.Request(url,headers=req_header)
response = request.urlopen(req)
a = response.read().decode('utf-8')
if response.status == 200:
return a
if __name__ == '__main__':
mysql_client = pymysql.Connect('127.0.0.1','root','18603503110','1712B',3306,charset='utf8')
#创建游标(执行sql语句)
url = 'https://fe-api.zhaopin.com/c/i/sou?pageSize=90&cityId=489&workExperience=-1&education=-1&companyType=-1&employmentType=-1&jobWelfareTag=-1&kw=%E6%8A%80%E6%9C%AF&kt=3&_v=0.41792226&x-zp-page-request-id=99f4ba4b537c448e831a297ae4de73f9-1545304025814-219164'
cursor = mysql_client.cursor()
zhilianSpider(url)
网友评论