美文网首页
day03 通过切片获得json数据爬取

day03 通过切片获得json数据爬取

作者: LittleBear_6c91 | 来源:发表于2019-04-11 17:58 被阅读0次

    mogujie.py(爬取数据)

    import requests
    import json
    from mogijie9_db_helper import *
    
    db = get_connection()
    cursor = get_cursor(db)
    
    
    # 取json数据
    def get_one_page(page):
        url = "https://list.mogujie.com/search?callback=jQuery21108297191095165726_1554948240451&_version=8193&ratio=3%3A4&cKey=15&page="+ str(page) +"&sort=pop&ad=2&fcid=50270&action=clothing&mt=12.848.r123121.3253"
        headers = {
            "User-Agent": "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)"
        }
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            text = response.content.decode('utf-8')
            return text
        return None
    
    
    # 解析json数据
    def parse_page(html):
        index = html.index('(')
        html = html[index + 1:][:-2]
        # print(html)
        json_data = json.loads(html)
        is_end = json_data['result']['wall']['isEnd']
        products = json_data['result']['wall']['docs']
        print(len(products))
        for product in products:
            execute_sql2(db, cursor, product)
    
        return is_end
    
    def main():
        page = 1
        while True:
    
            html = get_one_page(page)
            print('*'*20)
            print(page)
            page += 1
            # print(html)
            is_end = parse_page(html)
    
            if is_end:
                print('爬取结束')
                break
            parse_page(html)
        close_connection(db)
    
    
    if __name__ == '__main__':
        main()
    

    /SQL/mogujie.sql(搭建数据库)

    
    create database mogujie9 default character set=utf8;
    
    use mogujie9;
    
    
    create table product(
        id integer primary key auto_increment,
        trade_item_id varchar(32),
        img varchar(1024),
        link varchar(1024),
        title varchar(512),
        org_price varchar(32),
        price varchar(32)
    );
    
    

    mogujie_db_helper.py(连接数据库储存数据)

    import pymysql
    
    # 取数据库连接
    def get_connection():
        host = '127.0.0.1'
        port = 3306
        user = 'root'
        password = 'DENG5rong2hua0!'
        database = 'mogujie9'
        db = pymysql.connect(host, user, password, database, charset='utf8', port=port)
        return db
    
    # 取数据库游标
    def get_cursor(db):
        cursor = db.cursor()
        return cursor
    
    # 关闭数据库连接
    def close_connection(db):
        db.close()
    
    # 执行sql语句
    def execute_sql(db, cursor, item_dict):
        sql = 'insert into movie (movie_name, actor, releasetime, cover_url, score, ranks, detail_url) values ("%s", "%s", "%s", "%s", "%s", "%s", "%s")' % (item_dict['movie_name'], item_dict['actor'],item_dict['releasetime'],item_dict['cover_url'],item_dict['score'],item_dict['rank'],item_dict['detail_url'])
        print(sql)
        cursor.execute(sql)
        db.commit()
    
    # 执行sql语句
    def execute_sql2(db, cursor, item_dict):
        sql = 'insert into product (trade_item_id, img, link, title, org_price, price) values (%s, %s, %s, %s, %s, %s)'
        print(sql)
        cursor.execute(sql, (item_dict['tradeItemId'], item_dict['img'],item_dict['link'],item_dict['title'],item_dict['orgPrice'],item_dict['price']))
        db.commit()
    

    相关文章

      网友评论

          本文标题:day03 通过切片获得json数据爬取

          本文链接:https://www.haomeiwen.com/subject/ovsxwqtx.html