爬取人人贷网站

作者: 异想派 | 来源:发表于2017-05-10 00:06 被阅读36次
    # -*- coding: UTF-8 -*- 
    import bs4
    import sys 
    reload(sys) 
    sys.setdefaultencoding("utf-8") 
    import requests
    from bs4 import BeautifulSoup
    import json
    
    url="https://www.renrendai.com/loan#page-1"
    def gethtml(url):
        try:
            r=requests.get(url)
            r.raise_for_status()
            return r.text
        except:
            print "连接失败"
    
    def parsehtml(html):
        soup=BeautifulSoup(html,"html.parser")
        a=soup(id="loan-list-rsp")
        mess=[]
        for i in a:   #type(i)=<class 'bs4.element.Tag'>
            b=str(i.string)
            htmllist=json.loads(b)["data"]["loans"]
            for j in htmllist:
                mess.append([j["loanId"],j["title"],j["amount"],j["interest"],j["months"],j["startTime"]])
        return mess
    
    def showinfo(num,mess):
        tplt="{:^10}\t{:^10}\t{:^10}\t{:^10}\t{:^10}\t{:^10}"
        print (tplt.format("订单标的","借款标题","金额","利息","期限","募资时间"))  #括号不能遗漏,否则在下一步循环报错
        for i in range(num):
            cc=mess[i]
            print tplt.format(cc[0],cc[1],cc[2],cc[3],cc[4],cc[5])
    
    def main():
        html=gethtml(url)
        mess=parsehtml(html)
        showinfo(20,mess)
    
    main()
    

    相关文章

      网友评论

        本文标题:爬取人人贷网站

        本文链接:https://www.haomeiwen.com/subject/bsqwtxtx.html