美文网首页
大众点评页面抓取实例

大众点评页面抓取实例

作者: up_shang | 来源:发表于2017-01-01 21:13 被阅读0次

    #coding:utf-8

    import re

    from bs4 import BeautifulSoup as bs

    with open('dianping.html','rb') as f:

    html = f.read().decode()

    dianping = bs(html,'lxml')

    allshops = dianping.find_all('div', attrs={'class':'shop-list J_shop-list shop-all-list'})[0]

    shops = allshops.find_all('li')

    for eachshop in shops:

    name = eachshop.h4.string

    shopurl = eachshop.a["href"]

    try:

    star = re.findall('title="(.*)">',str(eachshop.find_all('span')[0]))[0]

    except:

    star = ''

    try:

    cls = re.findall('(.*?)',str(eachshop.find_all('span')))[0]

    except:

    cls = ''

    try:

    area = re.findall('(.*?)',str(eachshop.find_all('span')))[1]

    except:

    area = ''

    try:

    addr = re.findall('(.*?)',str(eachshop.find_all('span')))[0]

    except:

    addr = ''

    try:

    comments = re.findall('(.*?)',str(eachshop.find_all('b')[0]))[0]

    except:

    comments = ''

    try:

    mean = re.findall('(.*?)',str(eachshop.find_all('b')[1]))[0]

    except:

    mean = ''

    try:

    taste = re.findall('(.*?)',str(eachshop.find_all('b')[2]))[0]

    except:

    taste = ''

    try:

    envior = re.findall('(.*?)',str(eachshop.find_all('b')[3]))[0]

    except:

    envior = ''

    try:

    service = re.findall('(.*?)',str(eachshop.find_all('b')[4]))[0]

    except:

    service = ''

    print (name,shopurl,star,cls,area,addr,mean,taste,envior,service,comments)

    相关文章

      网友评论

          本文标题:大众点评页面抓取实例

          本文链接:https://www.haomeiwen.com/subject/btbuvttx.html