先上代码:
GET请求爬虫示例:
import requests
url = "https://www.baidu.com"
response = requests.get(url)
response.encoding = "utf-8"
print(response.content.decode())
POST请求爬虫示例:
import requests
# import ssl #python 2.6和3.6以上的版本就不能用这个库了,必须在本机下载openssl
# ssl._create_default_https_context = ssl._create_unverified_context
url = "https://fanyi.baidu.com/v2transapi"
querystr = {"query":"人生苦短,我用python。",
"from":"zh",
"to":"en"}
headrstr = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"}
response = requests.post(url,data=querystr,headers=headrstr)
# print(response)
print(response.content.decode('unicode_escape')) #得到的结果是、unicode源码,需要转义成中文
爬取手机应用商店更新:
# coding=UTF-8
import time;
import datetime;
import requests;
from lxml import html;
from threading import Timer;
import sys;
reload(sys)
sys.setdefaultencoding('utf8')
log_file='android_log.txt'
record_file='C:\Users\Miky\Desktop\android_record.txt'
#可尝试不写死参数,而是从文本最后两行读取
old_date_time = '2019-06-05'
old_version = '5.9.2'
url='http://app.mi.com/details?id=air.tv.douyu.android&ref=search'
localtime = time.asctime( time.localtime(time.time()) )
page=requests.Session().get(url)
tree=html.fromstring(page.text)
date_time=tree.xpath('//ul[@class=" cf"]//li[6]/text()')
version=tree.xpath('//ul[@class=" cf"]//li[4]/text()')
for date in date_time:
print 'date_time: '+date
for v in version:
print 'version: '+v
def get():
ftime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
if date_time[0]==old_date_time:
with open(log_file,'a') as file_object:
file_object.write(ftime+" 成功执行爬取,但目标应用并未更新\n")
else:
with open(record_file,'a') as file_object:
file_object.write("更新时间: "+ftime+"\n")
file_object.write("版本号: "+version[0]+"\n")
with open(log_file,'a') as file_object:
file_object.write(ftime+" 成功执行爬取,目标应用【已更新】\n")
# 每隔n秒执行一次任务(问题是程序挂掉你可能不知道)
print u"程序正在执行"+ftime;
t = Timer(600, get);
t.start();
get();
网友评论