美文网首页Python四期爬虫作业
【Python爬虫】人民日报科技

【Python爬虫】人民日报科技

作者: d1b0f55d8efb | 来源:发表于2017-09-14 10:42 被阅读18次
    #__author:'cuiwnehao'__
    #coding:utf-8
    from bs4 import BeautifulSoup
    import requests
    url='http://scitech.people.com.cn'
    req=requests.get(url)
    req.encoding="GB2312"
    html=req.text
    soup=BeautifulSoup(html,'lxml')
    h2_result=soup.find('h2',class_='qiehuan1 mt15')
    #print(len(h2_result))
    biaotis=h2_result.find_all('i')
    #print(len(biaotis))
    biaoti_list=[]
    for biaot in biaotis:
        biaoti=biaot.text.split()[1]
        #print(biaoti)
        biaoti_list.append(biaoti)
    print(biaoti_list)
    
    cibiaoti=soup.find_all('div',class_='headingNews qiehuan1_c')[0]
    #print(cibiaoti)
    h5_result=cibiaoti.find_all('h5')
    #print(len(h5_result))
    on=cibiaoti.find_all('div',class_='on')
    h5_list=[]
    for h5 in h5_result:
        h5_biaoti=h5.text
        #print(h5_biaoti)
        h5_list.append(h5_biaoti)
        h5_a=h5.find('a')   #在a标签下找到【'href'】在h5下是找不到的
        #print(h5_a)
        h5_url=h5_a['href']
        #print(h5_url)
        cibiaoti_url=url+h5_url
        print(h5_biaoti,cibiaoti_url)
    
    
    for jie in on:
        jieguo=jie.text
        print(jieguo)
    
    
    

    相关文章

      网友评论

        本文标题:【Python爬虫】人民日报科技

        本文链接:https://www.haomeiwen.com/subject/mxnrsxtx.html