简单的Python爬虫应用_学习笔记003

作者: plectrum | 来源:发表于2015-01-25 11:17 被阅读632次

    从京东网站抓取_007

    `接上篇内容 从京东网站抓取_006

    组织成列表的形式

    if __name__ == '__main__':
        bose_info = get_content_from_jd('Bose', '1', '3')  
        bose_content = BeautifulSoup(bose_info)
        bose_div = bose_content.find_all('div', class_ = 'lh-wrap') 
    
        bose_id = [i.strong['class'][0] for i in bose_div]
    
        info_results = [] #初始化一个空的列表用来存放抓取的商品信息
    
        for item in bose_div:
            data = {} #初始化一个空的字典用来存放信息
            data['url'] = item.find('div', class_ = 'p-img').a['href']#商品链接
            data['img'] = item.find('img')['data-lazyload']#商品图片
            data['title'] = item.find('div', class_ = 'p-name').get_text().strip()#商品标题
            data['id'] = item.find('div', class_ = 'p-price').strong['class'][0]#商品id
            info_results.append(data)
    
        print info_results[0] #通过下标可查询任意一条商品信息
    

    返回内容

    {
    'url': 'http://item.jd.com/1150767.html', 
    'id': 'J_1150767', 
    'img': 'http://img12.360buyimg.com/n7/jfs/t160/263/2138317114/91577/2ad0b9a5/53c64152Nfa21b93d.jpg', 
    'title': u'Bose SoundLink Mini\u84dd\u7259\u626c\u58f0\u5668 \u84dd\u7259\u97f3\u7bb1'
    }
    

    商品价格的抓取
    京东网站价格通过GET命令获取,通过chrome调试工具先获取价格网址

    Json.PNG
    http://p.3.cn/prices/mgets?skuids=J_1150767,J_1225287,J_1150768,J_1237665,J_1439929797,J_1150689,J_1225340,J_1264417,J_1153768,J_1253203,J_1150696,J_1150765,J_1237667,J_1150699,J_1253201,J_1253221,J_1253226,J_1253232,J_1237663,J_1237666,J_1253123,J_1253126,J_1222097,J_1150701,J_1150735,J_1230490,J_1217252,J_1237670,J_1255249,J_1150760&area=1_72_2799_0&type=1
    此网址由三个参数组成skuid,area,type

    构建函数获取商品价格
    函数接受所有商品ID,之后构建如上链接,获取商品价格

    import json
    def get_price(all_id):
      params = {
    
                'skuids':','.join(all_id),
                'area':'1_72_2799_0',
                'type':'1'
      }
      html_price = urllib.urlencode(params)
      html_price_content = urllib.urlopen('http://p.3.cn/prices/mgets?'+html_price)
      price_content = html_price_content.read()
      html_price_content.close()
      return json.loads(price_content)
    

    修改主程序增加商品价格字段

    if __name__ == '__main__':
        bose_info = get_content_from_jd('Bose', '1', '3')  
        bose_content = BeautifulSoup(bose_info)
        bose_div = bose_content.find_all('div', class_ = 'lh-wrap') 
        # <strong class="J_1150767"></strong>
        bose_id = [i.strong['class'][0] for i in bose_div]
    
        bose_price = get_price(bose_id)
    
        info_results = []
    
        for item in bose_div:
            data = {}
    
            data['url'] = item.find('div', class_ = 'p-img').a['href']
            data['img'] = item.find('img')['data-lazyload']
            data['title'] = item.find('div', class_ = 'p-name').get_text().strip()
            data['id'] = item.find('div', class_ = 'p-price').strong['class'][0]
            data['price'] = filter(lambda price: price['id'] == data['id'], bose_price)[0]['p']
    
            info_results.append(data)
        for i in info_results:
            print json.dumps(i, encoding="gb2312", ensure_ascii=False)
    

    返回结果

    {"url": "http://item.jd.com/1150767.html", "price": "1880.00", "id": "J_1150767", "img": "http://img12.360buyimg.com/n7/jfs/t160/263/2138317114/91577/2ad0b9a5/53c64152Nfa21b93d.jpg", "title": "Bose SoundLink Mini蓝牙扬声器 蓝牙音箱"}
    {"url": "http://item.jd.com/1225287.html", "price": "2680.00", "id": "J_1225287", "img": "http://img12.360buyimg.com/n7/jfs/t382/299/358410988/102067/28bc7fa/5417ef6eN1a471400.jpg", "title": "Bose QuietComfort25有源消噪耳机-黑色  QC25"}
    {"url": "http://item.jd.com/1150768.html", "price": "2600.00", "id": "J_1150768", "img": "http://img13.360buyimg.com/n7/jfs/t208/86/2195379170/121054/373a1635/53c6414cNa684fbeb.jpg", "title": "Bose SoundLink 蓝牙扬声器III 蓝牙音箱"}
    {"url": "http://item.jd.com/1237665.html", "price": "999.00", "id": "J_1237665", "img": "http://img10.360buyimg.com/n7/jfs/t340/338/1222059663/138599/83529a01/5434b619Nd24a3d4b.jpg", "title": "Bose SoundLink Colour蓝牙扬声器-红色"}
    {"url": "http://item.jd.com/1439929797.html", "price": "138.00", "id": "J_1439929797", "img": "http://img12.360buyimg.com/n7/jfs/t700/310/356730772/324161/83da6250/54a3bc7eNfa25365f.jpg", "title": "山水D11迷你音响便携式插卡收音机老人带mp3音乐播放器外放小音箱 红色"}
    {"url": "http://item.jd.com/1150689.html", "price": "1390.00", "id": "J_1150689", "img": "http://img14.360buyimg.com/n7/jfs/t208/317/2213636018/63579/6e4b116f/53c748efN7baca4a7.jpg", "title": "Bose SoundTrue 耳罩式耳机-薄荷黑"}
    {"url": "http://item.jd.com/1225340.html", "price": "2680.00", "id": "J_1225340", "img": "http://img10.360buyimg.com/n7/jfs/t223/352/2430825655/67306/a9994f48/5417ef44N0c7a8927.jpg", "title": "Bose QuietComfort25有源消噪耳机-白色  QC25"}
    {"url": "http://item.jd.com/1264417.html", "price": "230.00", "id": "J_1264417", "img": "http://img12.360buyimg.com/n7/jfs/t517/72/112548631/56020/8bda8115/544ef80cN9a010b1f.jpg", "title": "Bose SoundLink Mini蓝牙扬声器封套-橙色 蓝牙音箱配件"}
    {"url": "http://item.jd.com/1153768.html", "price": "2900.00", "id": "J_1153768", "img": "http://img13.360buyimg.com/n7/jfs/t166/272/2180604273/69161/fb60368e/53c6403dNee2eb466.jpg", "title": "Bose QC3有源消噪耳机"}
    {"url": "http://item.jd.com/1253203.html", "price": "860.00", "id": "J_1253203", "img": "http://img13.360buyimg.com/n7/jfs/t322/59/1919666393/48470/e8feae8a/5444db09N9675a5b6.jpg", "title": "Bose SoundTrue 耳塞式耳机-Audio白色"}
    {"url": "http://item.jd.com/1150696.html", "price": "1390.00", "id": "J_1150696", "img": "http://img11.360buyimg.com/n7/jfs/t148/316/2168034496/55985/33491bc7/53c748f6N18f6bba2.jpg", "title": "Bose SoundTrue 耳罩式耳机-白色"}
    {"url": "http://item.jd.com/1150765.html", "price": "1190.00", "id": "J_1150765", "img": "http://img10.360buyimg.com/n7/jfs/t301/274/605583376/53605/b339d875/541a76b2N5ccb18e3.jpg", "title": "Bose SoundTrue 贴耳式耳机-薄荷绿"}
    {"url": "http://item.jd.com/1237667.html", "price": "999.00", "id": "J_1237667", "img": "http://img12.360buyimg.com/n7/jfs/t289/182/1218390339/163662/9fe085a4/5434b621Naedfdec8.jpg", "title": "Bose SoundLink Colour蓝牙扬声器-蓝色"}
    {"url": "http://item.jd.com/1150699.html", "price": "1100.00", "id": "J_1150699", "img": "http://img14.360buyimg.com/n7/jfs/t181/31/2177504157/57468/1c4635a/53c748feN97183aa9.jpg", "title": "BOSE FreeStyle耳塞式耳机-靛蓝色"}
    {"url": "http://item.jd.com/1253201.html", "price": "860.00", "id": "J_1253201", "img": "http://img11.360buyimg.com/n7/jfs/t346/156/1911266815/54489/7c8e3997/5444db0dNa623d2b8.jpg", "title": "Bose SoundTrue 耳塞式耳机-Audio黑色"}
    {"url": "http://item.jd.com/1253221.html", "price": "1100.00", "id": "J_1253221", "img": "http://img11.360buyimg.com/n7/jfs/t289/140/1935678963/56537/582956b2/5444db00N572caa4a.jpg", "title": "Bose SoundTrue 耳塞式耳机-MFI黑色"}
    {"url": "http://item.jd.com/1253226.html", "price": "1100.00", "id": "J_1253226", "img": "http://img11.360buyimg.com/n7/jfs/t277/71/1930074315/50436/5acf672a/5444dafcN0c4791d5.jpg", "title": "Bose SoundTrue 耳塞式耳机-MFI白色"}
    {"url": "http://item.jd.com/1253232.html", "price": "1100.00", "id": "J_1253232", "img": "http://img12.360buyimg.com/n7/jfs/t328/196/1930225986/61739/bbec7731/5444daf9Nddbecfb6.jpg", "title": "Bose SoundTrue 耳塞式耳机-MFI红色"}
    {"url": "http://item.jd.com/1237663.html", "price": "999.00", "id": "J_1237663", "img": "http://img13.360buyimg.com/n7/jfs/t319/242/1280480196/152009/5fe7709a/5434b615Nbcfa5d7c.jpg", "title": "Bose SoundLink Colour蓝牙扬声器-白色"}
    {"url": "http://item.jd.com/1237666.html", "price": "999.00", "id": "J_1237666", "img": "http://img11.360buyimg.com/n7/jfs/t304/242/1241674581/146981/c06eaa1f/5434b61dN9c7696b3.jpg", "title": "Bose SoundLink Colour蓝牙扬声器-黑色"}
    {"url": "http://item.jd.com/1253123.html", "price": "1280.00", "id": "J_1253123", "img": "http://img13.360buyimg.com/n7/jfs/t292/198/1897986217/53974/f562c7e1/5444db17Ne54d0602.jpg", "title": "Bose SoundSport 耳塞式运动耳机-MFI蓝色"}
    {"url": "http://item.jd.com/1253126.html", "price": "1280.00", "id": "J_1253126", "img": "http://img11.360buyimg.com/n7/jfs/t292/189/1898519967/51313/51a8a428/5444db10Nf3ff2fd3.jpg", "title": "Bose SoundSport 耳塞式运动耳机-MFI绿色"}
    {"url": "http://item.jd.com/1222097.html", "price": "1300.00", "id": "J_1222097", "img": "http://img12.360buyimg.com/n7/jfs/t361/202/251727987/123343/f26572be/5412c577N18d44163.jpg", "title": "Bose SoundDock XT 扬声器-黄色"}
    {"url": "http://item.jd.com/1150701.html", "price": "1100.00", "id": "J_1150701", "img": "http://img11.360buyimg.com/n7/jfs/t190/337/2171347628/71312/aaee683/53c74901N1f1f18c6.jpg", "title": "BOSE FreeStyle耳塞式耳机-冰蓝色"}
    {"url": "http://item.jd.com/1150735.html", "price": "2100.00", "id": "J_1150735", "img": "http://img10.360buyimg.com/n7/jfs/t145/287/2185253825/90804/2249f972/53c748dfN2ddf2080.jpg", "title": "Bose AE2w蓝牙 音乐耳机"}
    {"url": "http://item.jd.com/1230490.html", "price": "3900.00", "id": "J_1230490", "img": "http://img10.360buyimg.com/n7/jfs/t271/122/797716279/40566/938f086d/54250485Nd234341a.jpg", "title": "Bose Solo 15 电视音响"}
    {"url": "http://item.jd.com/1217252.html", "price": "280.00", "id": "J_1217252", "img": "http://img12.360buyimg.com/n7/jfs/t391/286/362695776/68919/aa56f3ad/5417d812N5f28a56b.jpg", "title": "Bose SoundLink 蓝牙扬声器III封套-灰色"}
    {"url": "http://item.jd.com/1237670.html", "price": "200.00", "id": "J_1237670", "img": "http://img10.360buyimg.com/n7/jfs/t289/169/1495591883/259103/d0ae681e/543ceff6N239ff5f6.jpg", "title": "Bose SoundLink Colour蓝牙扬声器 便携包"}
    {"url": "http://item.jd.com/1255249.html", "price": "988.00", "id": "J_1255249", "img": "http://img14.360buyimg.com/n7/jfs/t280/299/1915466812/56681/67fa4c6b/5444dae9Na3c39a8c.jpg", "title": "Bose SoundTrue 耳塞式耳机-SMSG黑色"}
    {"url": "http://item.jd.com/1150760.html", "price": "1190.00", "id": "J_1150760", "img": "http://img10.360buyimg.com/n7/jfs/t172/238/2201679276/52515/cab70722/53c748baN815e7a20.jpg", "title": "Bose SoundTrue 贴耳式耳机-白色"}
    

    相关文章

      网友评论

        本文标题:简单的Python爬虫应用_学习笔记003

        本文链接:https://www.haomeiwen.com/subject/splkxttx.html