美文网首页
python3爬虫笔记

python3爬虫笔记

作者: JJJoeee | 来源:发表于2020-01-08 17:34 被阅读0次

    先上代码:
    GET请求爬虫示例:

    import requests
    url = "https://www.baidu.com"
    response = requests.get(url)
    response.encoding = "utf-8"
    print(response.content.decode())
    

    POST请求爬虫示例:

    import requests
    # import ssl #python 2.6和3.6以上的版本就不能用这个库了,必须在本机下载openssl
    # ssl._create_default_https_context = ssl._create_unverified_context
    url = "https://fanyi.baidu.com/v2transapi"
    querystr = {"query":"人生苦短,我用python。",
        "from":"zh",
        "to":"en"}
    headrstr = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"}
    
    response = requests.post(url,data=querystr,headers=headrstr)
    # print(response)
    print(response.content.decode('unicode_escape'))    #得到的结果是、unicode源码,需要转义成中文
    

    爬取手机应用商店更新:

    # coding=UTF-8
    import time;
    import datetime;
    import requests;
    from lxml import html;
    from threading import Timer;
    import sys;
    reload(sys)
    sys.setdefaultencoding('utf8')
    log_file='android_log.txt'
    record_file='C:\Users\Miky\Desktop\android_record.txt'
    #可尝试不写死参数,而是从文本最后两行读取
    old_date_time = '2019-06-05'
    old_version = '5.9.2'
    url='http://app.mi.com/details?id=air.tv.douyu.android&ref=search'
    localtime = time.asctime( time.localtime(time.time()) )
    
    page=requests.Session().get(url)
    tree=html.fromstring(page.text)
    date_time=tree.xpath('//ul[@class=" cf"]//li[6]/text()')
    version=tree.xpath('//ul[@class=" cf"]//li[4]/text()')
    
    for date in date_time:
        print 'date_time: '+date
    for v in version:
        print 'version: '+v
    
    def get():
        ftime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        if date_time[0]==old_date_time:
            with open(log_file,'a') as file_object:
                file_object.write(ftime+" 成功执行爬取,但目标应用并未更新\n")
        else:
            with open(record_file,'a') as file_object:
                file_object.write("更新时间: "+ftime+"\n")
                file_object.write("版本号: "+version[0]+"\n")
            with open(log_file,'a') as file_object:
                file_object.write(ftime+" 成功执行爬取,目标应用【已更新】\n")
    # 每隔n秒执行一次任务(问题是程序挂掉你可能不知道)
        print u"程序正在执行"+ftime;
        t = Timer(600, get);
        t.start();
    get();
    

    相关文章

      网友评论

          本文标题:python3爬虫笔记

          本文链接:https://www.haomeiwen.com/subject/tyysoctx.html