美文网首页
第一周大作业58

第一周大作业58

作者: 木马音响积木 | 来源:发表于2016-12-04 16:56 被阅读0次

    总结,
    1、感觉58网站还在改, 时间没有找到,但是浏览量一下搞定了。
    2、字符串的处理,或者说空值的处理,感觉if else 还有些难度,
    3、find_all 还是感觉很难,需要看文档吧

    成果抓图

    weekdazuoye.jpg

    代码

    #!C:\Python35\python.exe
    # coding=utf-8
    
    from bs4 import BeautifulSoup
    import requests
    import time
    '''
    url = 'http://zhuanzhuan.58.com/detail/764095267672850433z.shtml'
    wb_data = requests.get(url)
    soup = BeautifulSoup(wb_data.text,'lxml')
    
     #soup.find_all('body > div.content > div > div.box_left > div.info_lubotu.clearfix > div.info_massege.left > div.palce_li > span > i')
    
    kk=soup.select('body > div.content > div > div.box_left > div.info_lubotu.clearfix > div.info_massege.left > div.palce_li > span > i')
    vv=kk[0].text
    print(kk)
    print(vv)
    '''
    #soup.find_all nanduda
    
    #print(soup) #date come
    
    
    
    def get_links_from(who_sells):
        urls = []
        list_view = 'http://bj.58.com/pbdn/{}/pn2/'.format(str(who_sells))
        wb_data = requests.get(list_view)
        soup = BeautifulSoup(wb_data.text,'lxml')
        for link in soup.select('td.t a.t'):
        #for link in soup.select('td.t a.t'):
            # infolist > div.infocon > table > tbody > tr > td.t > a
            urls.append(link.get('href').split('?')[0])
            print(urls)
        return urls
    
    def get_views_from(url):    #nan我还是要以后研究一下,这是什么,希望老师到时候能讲解
        id = url.split('/')[-1].strip('x.shtml')
        api = 'http://jst1.58.com/counter?infoid={}'.format(id)
        # 这个是找到了58的查询接口,不了解接口可以参照一下新浪微博接口的介绍
        js = requests.get(api)
        views = js.text.split('=')[-1]
        return views
        # print(views)
    
    def get_item_info(who_sells=0):
        urls = get_links_from(who_sells)
        time.sleep(1)
        for url in urls:
            wb_data = requests.get(url)
            soup = BeautifulSoup(wb_data.text,'lxml')
            data = {
                'title':soup.select('title')[0].text.split('\r\n')[1].strip(),
                # 'price':soup.select('price')[0].text,
                'price': (str(soup.select('.price_now')[0]).split('<i>')[1]).split('<')[0],
                #body > div.content > div > div.box_left > div.info_lubotu.clearfix > div.info_massege.left > div.price_li > span > i
                'area' :soup.select('body > div.content > div > div.box_left > div.info_lubotu.clearfix > div.info_massege.left > div.palce_li > span > i')[0].text,
                'date' :soup.select('.look_time')[0].text,
               # 'area' :list(soup.select('body > div.content > div > div.box_left > div.info_lubotu.clearfix > div.info_massege.left > div.palce_li > span > i')[0].stripped_strings) if soup.find_all('span','区域:') else None,
                #'date' :soup.select('.look_time')[0].text,
                'cate' :'个人' if who_sells == 0 else '商家', #这具体是什么意思?
                # 'views':get_views_from(url)
                #'views' :soup.select('.look_time')[0].text
            }
            print(data)
    
    # get_item_info(url)
    # get_links_from(1)
    get_item_info() #qidong
    
    
    

    相关文章

      网友评论

          本文标题:第一周大作业58

          本文链接:https://www.haomeiwen.com/subject/romhmttx.html