#coding:utf-8
import re
from bs4 import BeautifulSoup as bs
with open('dianping.html','rb') as f:
html = f.read().decode()
dianping = bs(html,'lxml')
allshops = dianping.find_all('div', attrs={'class':'shop-list J_shop-list shop-all-list'})[0]
shops = allshops.find_all('li')
for eachshop in shops:
name = eachshop.h4.string
shopurl = eachshop.a["href"]
try:
star = re.findall('title="(.*)">',str(eachshop.find_all('span')[0]))[0]
except:
star = ''
try:
cls = re.findall('(.*?)',str(eachshop.find_all('span')))[0]
except:
cls = ''
try:
area = re.findall('(.*?)',str(eachshop.find_all('span')))[1]
except:
area = ''
try:
addr = re.findall('(.*?)',str(eachshop.find_all('span')))[0]
except:
addr = ''
try:
comments = re.findall('(.*?)',str(eachshop.find_all('b')[0]))[0]
except:
comments = ''
try:
mean = re.findall('(.*?)',str(eachshop.find_all('b')[1]))[0]
except:
mean = ''
try:
taste = re.findall('(.*?)',str(eachshop.find_all('b')[2]))[0]
except:
taste = ''
try:
envior = re.findall('(.*?)',str(eachshop.find_all('b')[3]))[0]
except:
envior = ''
try:
service = re.findall('(.*?)',str(eachshop.find_all('b')[4]))[0]
except:
service = ''
print (name,shopurl,star,cls,area,addr,mean,taste,envior,service,comments)
网友评论