美文网首页
Python亚马逊图书爬虫

Python亚马逊图书爬虫

作者: peng_js | 来源:发表于2017-07-02 23:06 被阅读0次

    encoding=utf8

    import requests
    import time
    from requests.exceptions import RequestException
    import urllib
    from pyquery import PyQuery as pq
    import json
    import re
    from bs4 import BeautifulSoup
    from config import *
    def get_detail():
    times=int(time.time())
    datas = {
    'ref_':'dp_apl_pc_loaddesc',
    'asin':'B00JZ96ZI8',
    'cacheTime':times,
    'merchantId':'A1AJ19PSB66TGU',
    'deviceType':'web'
    }
    headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'
    }
    url='https://www.amazon.cn/gp/product-description/ajaxGetProuductDescription.html?'+urllib.urlencode(datas)
    #必须要加请求头
    response = requests.get(url, headers=headers)
    try:
    if response.status_code == 200:
    return response.text
    except RequestException:
    print u'请求索引页出错'
    #return None
    def parse_detail(html):
    #生成BeautifulSoup对象并使用lxml解析
    soup = BeautifulSoup(html, 'lxml')
    #获取目录
    directory = soup.select('#s_content_4 > p')[0]
    #获取编辑推荐
    article = soup.select('#s_content_0 > p')[0]
    article=str(article).replace('亚马逊编辑推荐:','')
    print article
    def main():
    html=get_detail()
    parse_detail(html)
    if name == 'main':
    main()

    相关文章

      网友评论

          本文标题:Python亚马逊图书爬虫

          本文链接:https://www.haomeiwen.com/subject/qpiocxtx.html