美文网首页Python
python 简单抓取拉钩iOS职位,matplotlib展示

python 简单抓取拉钩iOS职位,matplotlib展示

作者: 十一岁的加重 | 来源:发表于2018-06-14 23:30 被阅读4次
    # coding=utf-8
    import requests
    import time
    import sys
    reload(sys)
    sys.setdefaultencoding('utf8')
    
    import requests
    import json
    
    headers = {
        'Accept' : 'application/json, text/javascript, */*; q=0.01' ,
        'Accept-Encoding' : 'gzip, deflate, br' ,
        'Accept-Language' : 'zh-CN,zh;q=0.9,en;q=0.8' ,
        'Connection' : 'keep-alive' ,
        'Content-Length' : '23' ,
        'Content-Type' : 'application/x-www-form-urlencoded; charset=UTF-8' ,
        'Cookie' : 'user_trace_token=20180614213205-1ed85102-96b3-46b0-97c0-e73b711c8763; JSESSIONID=ABAAABAAAGFABEF5EE54015C5797042B80918E0FC34818B; X_HTTP_TOKEN=b95527a4550f8d93d47b619adada743c; LGUID=20180614213509-c1f72ef8-6fd7-11e8-a465-525400f775ce; _ga=GA1.2.1217499303.1528983310; _gid=GA1.2.1865314832.1528983311; TG-TRACK-CODE=index_search; index_location_city=%E5%8C%97%E4%BA%AC; _gat=1; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1528983310,1528985325; LGSID=20180614220844-72ff3add-6fdc-11e8-a468-525400f775ce; PRE_UTM=; PRE_HOST=www.google.com.tw; PRE_SITE=https%3A%2F%2Fwww.google.com.tw%2F; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2F; SEARCH_ID=2e6ad26430e14a729f89c145ed7e4965; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1528985510; LGRID=20180614221149-e167c83b-6fdc-11e8-9642-5254005c3644' ,
        'DNT' : '1' ,
        'Host' : 'www.lagou.com' ,
        'Origin' : 'https://www.lagou.com' ,
        'Referer' : 'https://www.lagou.com/jobs/list_iOS?city=%E5%8C%97%E4%BA%AC&cl=false&fromSearch=true&labelWords=&suginput=' ,
        'User-Agent' : 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Mobile Safari/537.36' ,
        'X-Anit-Forge-Code' : '0' ,
        'X-Anit-Forge-Token' : 'None' ,
        'X-Requested-With' : 'XMLHttpRequest'
    }
    
    ajax_url = 'https://www.lagou.com/jobs/positionAjax.json?px=default&city=%E5%8C%97%E4%BA%AC&needAddtionalResult=false' 
    
    out = []
    for i in range(31, 35):
        time.sleep(3)
        post_param = {"first": "false", "pn": str(i), "kd": "iOS"} 
        r = requests.post(ajax_url, headers=headers, data=post_param)
        result = json.loads(r.text)
        print str(i)
        print result
        ar = result["content"]["positionResult"]["result"]
        out = out + ar
        print len(out)
    outputFilePath = "/Users/dfpo/Desktop/postman22.json"
    jsonStr = json.dumps( out, ensure_ascii=False, encoding='UTF-8')
    with open(outputFilePath, 'wt') as f:
        f.write(jsonStr)
    
    # coding=utf-8
    import os
    import matplotlib.pyplot as plt
    import json
    def getOldArray():
        oldQYDWithParameterFilePath = "/Users/dfpo/Desktop/postman.json"
        if not os.path.exists(oldQYDWithParameterFilePath):
            print oldQYDWithParameterFilePath + "文件不存在,不能执行合并操作"
            return
        with open(oldQYDWithParameterFilePath,'r') as load_f:
            oldPostmanDict = json.load(load_f)
        return oldPostmanDict
    
    array = getOldArray()
    names = []
    counts = []
    for dict in array:
        # name = dict["industryField"] #workYear#education#financeStage#district#salary#companySize#firstType#industryField
        # companyLabels = dict["positionLables"]#businessZones#companyLabelList#positionLables
        # if companyLabels is not None:
        #     for name in companyLabels:
        #         if name is not None:
        #             if name in names:
        #                 nameIdx = names.index(name)
        #                 counts[nameIdx] += 1
        #             else:
        #                 names.append(name)
        #                 counts.append(1)
    
                        # positionAdvantage
        companyLabels = dict["positionAdvantage"]
        companyLabels = companyLabels.split(',')
        if companyLabels is not None:
            for name in companyLabels:
                if name is not None:
                    if name in names:
                        nameIdx = names.index(name)
                        counts[nameIdx] += 1
                    else:
                        names.append(name)
                        counts.append(1)
    labels = names
    sizes = counts
    explode = [0 for n in range(len(names))]
    maxIdx = counts.index(max(counts))
    explode[maxIdx] = 0.1
    fig1, ax1 = plt.subplots()
    ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%',shadow=True, startangle=90)
    ax1.axis('equal')
    
    plt.show()
    

    效果


    image.png image.png image.png image.png image.png image.png image.png

    相关文章

      网友评论

        本文标题:python 简单抓取拉钩iOS职位,matplotlib展示

        本文链接:https://www.haomeiwen.com/subject/izgueftx.html