美文网首页Python四期爬虫作业
【Python爬虫】人民日报科技

【Python爬虫】人民日报科技

作者: d1b0f55d8efb | 来源:发表于2017-09-14 10:42 被阅读18次
#__author:'cuiwnehao'__
#coding:utf-8
from bs4 import BeautifulSoup
import requests
url='http://scitech.people.com.cn'
req=requests.get(url)
req.encoding="GB2312"
html=req.text
soup=BeautifulSoup(html,'lxml')
h2_result=soup.find('h2',class_='qiehuan1 mt15')
#print(len(h2_result))
biaotis=h2_result.find_all('i')
#print(len(biaotis))
biaoti_list=[]
for biaot in biaotis:
    biaoti=biaot.text.split()[1]
    #print(biaoti)
    biaoti_list.append(biaoti)
print(biaoti_list)

cibiaoti=soup.find_all('div',class_='headingNews qiehuan1_c')[0]
#print(cibiaoti)
h5_result=cibiaoti.find_all('h5')
#print(len(h5_result))
on=cibiaoti.find_all('div',class_='on')
h5_list=[]
for h5 in h5_result:
    h5_biaoti=h5.text
    #print(h5_biaoti)
    h5_list.append(h5_biaoti)
    h5_a=h5.find('a')   #在a标签下找到【'href'】在h5下是找不到的
    #print(h5_a)
    h5_url=h5_a['href']
    #print(h5_url)
    cibiaoti_url=url+h5_url
    print(h5_biaoti,cibiaoti_url)


for jie in on:
    jieguo=jie.text
    print(jieguo)


相关文章

网友评论

    本文标题:【Python爬虫】人民日报科技

    本文链接:https://www.haomeiwen.com/subject/mxnrsxtx.html