美文网首页
2-1homework

2-1homework

作者: OldSix1987 | 来源:发表于2016-08-16 00:58 被阅读18次

    结果


    Result.png

    我的代码


    from bs4 import BeautifulSoup
    import requests
    import time
    import pymongo
    
    __author__ = 'CP6'
    
    client = pymongo.MongoClient('localhost', 27017)
    xiaozhuDB = client['xiaozhuDB']
    xiaozhuTb = xiaozhuDB['xiaozhuTb']
    
    urls = ['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(str(i)) for i in range(1, 4)]
    
    def somMethod():
        for single_url in urls:
            wb_data = requests.get(single_url)
            soup = BeautifulSoup(wb_data.text, 'lxml')
            time.sleep(2)
            titles = soup.select('span.result_title')
            prices = soup.select('span.result_price > i')
            links = soup.select('#page_list > ul > li > a')
    
            # print(titles, prices, links,sep="\n-------------------------\n")
            # info = []
            for title, price, link in zip(titles, prices, links):
                data = {
                    'title': title.get_text(),
                    'price': int(price.get_text()),
                    'link': link.get('href')
                }
                # print(data)
                # info.append(data)
                xiaozhuTb.insert_one(data)
            # for item in info:
            #     # print(item)
            #     if item['price'] >= 500:
            #         print(item)
    
    
    def getResult():
        for item in xiaozhuTb.find({'price': {'$gt': 500}}):
            print(item)
    
    if __name__ == '__main__':
        somMethod()
        getResult()
    
    

    总结


    • 1. 数据库查询方法

    # $lt/$lte/$gt/$gte/$ne 等价于 less than/equal/greater/not
    
    • 2. 打开文件,读取内容

    path = 'C:\\Users\Queen\Desktop\MuggleCode\Mycode\week2\\2-1\\2-1code_video\walden.txt'
    
    with open(path, 'r') as f:
        lines = f.readlines() // 读取文件的每一行
        for index, line in enumerate(lines):
            data = {
                'index': index,
                'line': line,
                'words': len(line.split())
            }
            print(data)
    
    • 3. enumerate 函数: 用于遍历序列中的元素以及它们的下标

    for i,j in enumerate(('a','b','c')):
         print i,j
     
    0 a
    1 b
    2 c
    
    • 4. 数据库操作步骤

    // 建立连接
    client = pymongo.MongoClient('localhost', 27017)
    // 创建DB
    xiaozhuDB = client['xiaozhuDB']
    // 创建Table
    xiaozhuTb = xiaozhuDB['xiaozhuTb']
    // 插入数据
    xiaozhuTb.insert_one(data)
    // 查询数据
    for item in xiaozhuTb.find({'price': {'$gt': 500}}):
            print(item)
    
    • 5. Bs4 prettify

    print(soup.b.prettify()) // html以标准格式输出
    

    相关文章

      网友评论

          本文标题:2-1homework

          本文链接:https://www.haomeiwen.com/subject/shqpsttx.html