美文网首页
智联详情简易爬取链接及代码

智联详情简易爬取链接及代码

作者: Meter_bulacn | 来源:发表于2019-03-14 16:02 被阅读0次
wer.png

import requests
import re
import json
from lxml import etree
import urllib.parse
import urllib
import pymysql,random,time
conn=pymysql.connect(host='127.0.0.1',user='root',password='bc123',db='Zhizhi',charset='utf8')
cur=conn.cursor()

header = {
    "User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
    
}

def qingqiu(url):
    response = requests.get(url,headers=header)
    # print(response)
    # response.encoding = 'utf-8'
    # print(response.status_code)
    # a= etree.HTML(response.text)
    # print(a)
    we = json.loads(response.text)
    # with open('w.xlsx','wb+') as f:
    #     f.write(response.content)
    #     f.close()
    wq= we["data"]
    wr = wq["results"]
    for i in wr:
        
        er=i["city"]["display"]
        p = i["positionURL"]
        q = i["workingExp"]["name"]
        f = i["salary"]
        w = i["welfare"]
        for rr in w:
            aa= rr
        l = i["company"]["name"]
        
        
        # t = i["jobType"]["items"]
        # for w in t:
        #     a=w["name"]
        
        o = i["eduLevel"]["name"]
        # print(er,p,q,f,aa,l,a,o)    #城市er 链接p 年限q 补助aa 工资f  公司l 职位a 学历o
        # print(a)

        
        sql='''insert into app_lian_details(id,city,Ur_l,Yea_r,buzhu,gongzi,gongsi,xueli,d_id_id) values(0,%s,%s,%s,%s,%s,%s,%s,35)'''
        # # # # s= time.strftime('%Y-%m-%d',time.localtime(time.time()))
        # # # # ss=random.randint(20,106)
        # # # # cur.execute(sql,(ee,s,ss)) 
        cur.execute(sql,(er,p,q,aa,f,l,o))

        conn.commit()   
               
 

if __name__ == '__main__':
    url = "https://fe-api.zhaopin.com/c/i/sou?pageSize=90&cityId=530&salary=10001,15000&workExperience=-1&education=-1&companyType=-1&employmentType=-1&jobWelfareTag=-1&kw=%E6%8A%95%E8%B5%84%E7%BB%8F%E7%90%86&kt=3&=10001&at=9a1286fe007343a3814416d6c85149d9&rt=e64ecc55cfa04c15bba0d12e000b930a&_v=0.72906389&userCode=1007625844&x-zp-page-request-id=6626a2b7e5b940dbb80a83b21762e320-1552458010604-278814"
    qingqiu(url)

存查数据库一些语句

create table details(
id int auto_increment not null primary key comment'ID',
city varchar(1000) null comment'城市',
Ur_l varchar(1000) null comment'链接',
Yea_r varchar(1000) null comment'年限',
buzhu varchar(1000) null comment'补助',
gongzi varchar(1000) null comment'工资',
gongsi varchar(1000) null comment'公司',
zhiwei varchar(1000) null comment'职位',
xueli varchar(1000) null comment'学历',
d_id int null,foreign key (d_id) references TLei(id)
); 

create table TLei(  id int auto_increment not null primary key comment'ID',tlei varchar(1000) null comment'类目2',t_id int null,foreign key (t_id) references Lei(id));


create table Lei(
id int auto_increment not null primary key comment'ID',
lei varchar(1000) null comment'类目',
); 

insert into app_lian_tlei values(0,'Java 开发',1),(0,'UI设计师',1),(0,'Web前端',1),(0,'PHP',1),(0,'Python',1),(0,'Android',1),(0,'美工',1),(0,'深度学习',1),(0,'算法工程师',1),(0,'Hadoop',1),(0,'Node.js',1),(0,'数据开发',1),(0,'数据分析师',1),(0,'数据架构',1),(0,'人工智能',1),(0,'区块链',1);


insert into app_lian_tlei values(0,'UI电气工程师',1),(0,'电子工程师',1),(0,'PLC',1),(0,'测试工程师',1),(0,'设备工程师',1),(0,'硬件工程师',1),(0,'结构工程师',1),(0,'工艺工程师',1),(0,'产品经理',1),(0,'新媒体运营',1),(0,'运营专员',1),(0,'淘宝运营',1),(0,'天猫运营',1),(0,'产品助理',1),(0,'产品运营',1),(0,'淘宝客服',1),(0,'游戏运营',1),(0,'编辑',1);

select gongsi,gongzi,tlei from details inner join TLei on details.d_id=TLei.id;

select * from app_lian_tlei inner join app_lian_lei on app_lian_tlei.t_id=app_lian_lei.id;

select gongsi,gongzi,tlei,lei from app_lian_details inner join app_lian_tlei inner join app_lian_lei on app_lian_details.d_id_id=app_lian_tlei.id and app_lian_tlei.t_id_id=app_lian_lei.id;

相关文章

网友评论

      本文标题:智联详情简易爬取链接及代码

      本文链接:https://www.haomeiwen.com/subject/crhcmqtx.html