from bs4 import BeautifulSoup
with open('D:\\Python\\crawler\\网易云课堂作业\\第一周作业\\1_2\\1_2_homework_required\\index.html','r') as wb_data:
soup=BeautifulSoup(wb_data,'lxml')
images=soup.select('body > div > div > div.col-md-9 > div > div > div[class="thumbnail"] > img')
titles=soup.select('body > div > div > div.col-md-9 > div > div > div > div.caption > h4 > a[href="#"]')
prices=soup.select('body > div > div > div.col-md-9 > div > div > div > div.caption > h4.pull-right')
reviews=soup.select('body > div > div > div.col-md-9 > div > div > div > div.ratings > p.pull-right')
stars=soup.find_all('p',class_=None)
allStars=[]
num=0
for i in stars:
x=i.find_all()
if x!=[]:
for m in x:
if m.get('class')==['glyphicon', 'glyphicon-star']:
num+=1
allStars.append(num)
num=0
all_data=[]
for image,title,price,review,allStar in zip(images,titles,prices,reviews,allStars):
data={
"image":image.get('href'),
"title":title.get_text(),
'price':price.get_text(),
'review':review.get_text(),
'allStar':allStar
}
all_data.append(data)
print(all_data)
网友评论