美文网首页
爬取京东商品评论

爬取京东商品评论

作者: 叁咪 | 来源:发表于2018-07-09 02:44 被阅读0次

    爬取京东商品评论

    #--*--coding:utf-8--*--

    import requests

    import json

    import os

    import sys

    import random

    import time

    '''proxies = {

      "http": "proxy.xxcom:911",

      "https": "proxy.xx.com:911",

    }'''

    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',

    'Accept':'text/html;q=0.9,*/*;q=0.8',

    'Accept-Charset':'ISO-8859-1,utf-8;q=0.7,*;q=0.3',

    'Connection':'close',

    'Referer':'https://www.jd.com/'

    }

    cookie= {'__jdu':'10846'}

    f = open('c:/users/ffan2/desktop/jd.txt','a',encoding='utf-8')

    url1='https://sclub.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv33573&productId=5118016&score=0&sortType=5&page='

    url2='&pageSize=10&isShadowSku=0&fold=1'

    ran_num=random.sample(range(30), 30)

    '''for i in range(0,1):

        #url='https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv17182&productId=4554969&score=0&sortType=5&page='+str(i)+'&pageSize=10&isShadowSku=0&fold=1'

        url='https://sclub.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv33573&productId=5118016&score=0&sortType=5&page='+str(i)+'&pageSize=10&isShadowSku=0&fold=1'

        #实现爬多页

        print (url)

        r = requests.get(url,headers=headers,cookies=cookie,proxies=proxies)

        #print(r.status_code)

        html=str(r.content, encoding = "GBK")

        f.write(html)

    print('done')

        #print(html)

    '''

    for i in ran_num:

          a = ran_num[0]

          if i == a:

              i=str(i)

              url=(url1+i+url2)

              r=requests.get(url=url,headers=headers,cookies=cookie) #,proxies=proxies

              html=r.content

          else:

              i=str(i)

              url=(url1+i+url2)

              r=requests.get(url=url,headers=headers,cookies=cookie) #,proxies=proxies

              html2=r.content

              html = html + html2

              time.sleep(5)

              print("当前抓取页面:",url,"状态:",r)

    print('done--------------------')         

    相关文章

      网友评论

          本文标题:爬取京东商品评论

          本文链接:https://www.haomeiwen.com/subject/conauftx.html