美文网首页上海Python
爬虫作业3(京东商品)

爬虫作业3(京东商品)

作者: 56f82a501045 | 来源:发表于2019-07-25 22:29 被阅读0次

    import requests

    import json

    import pymysql

    import time

    import re

    conn = pymysql.connect(host='192.168.112.157',user='python',passwd='Python.123456',db='python',port=3306,

    charset='utf8')

    cursor = conn.cursor()

    headers = {

    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',

    'referer':'https://item.jd.com/100004325476.html'

    }

    urls = [

    'https://sclub.jd.com/comment/productPageComments.action?&productId=100004325476&score=0&sortType=5&page={}&pageSize=10'.format(

    str(i))for iin range(0,50)]

    for urlin urls:

    res = requests.get(url,headers=headers)

    json_data = json.loads(res.text)

    comments = json_data['comments']

    for commentin comments:

    content = comment['content']

    creationTime = comment['creationTime']

    nickname = comment['nickname']

    productColor = comment['productColor']

    # userClientShow = re.findall('来自京东(.*?)客户端', comment['userClientShow'], re.S)[0]

            userClientShow = re.sub('来自','',comment['userClientShow']).replace('京东','').replace('客户端','')

    userLevelName = comment['userLevelName']

    # print(userClientShow, userLevelName, content, creationTime, nickname, productColor)

            cursor.execute(

    "insert into comment_info (userClientShow, userLevelName,content,creationTime,nickname,productColor) " \

    "values(%s,%s,%s,%s,%s,%s)",

    (userClientShow, userLevelName, content, creationTime, nickname, productColor))

    conn.commit()

    time.sleep(5)

    conn.close()

    相关文章

      网友评论

        本文标题:爬虫作业3(京东商品)

        本文链接:https://www.haomeiwen.com/subject/hitlrctx.html