- 最基本网页抓取
import urllib2
response = urllib2.urlopen("http://www.baidu.com")
print response.read()
调用函数urlopen(url, data, timeout)/urlopen(url)
url
- url,data
- 访问URL时要传送的数据,timeout
- 设置超时时间。
data
默认为空(None)
timeout
默认为socket._GLOBAL_DEFAULT_TIMEOUT
执行urlopen
,返回一个response
对象
response.read()
返回获取到的网页内容。
import urllib2
request = urllib2.Request("http://www.baidu.com")//多加一行
response = urllib2.urlopen(request)
print response.read()
构建request,服务器响应请求得到应答,逻辑更清晰,但结果一样。
结果打印的是html代码
- POST GET
登录/注册
GET:以链接形式访问,链接中包含了所有参数(也就是data)
POST:not ↑
POST:(等价)
import urllib
import urllib2
values = {"username":"1016903103@qq.com","password":"XXXX"}
data = urllib.urlencode(values)
url = "https://passport.csdn.net/account/login?from=http://my.csdn.net/my/mycsdn"
request = urllib2.Request(url,data)
response = urllib2.urlopen(request)
print response.read()
import urllib
import urllib2
values = {}
values['username'] = "1016903103@qq.com"
values['password'] = "XXXX"
data = urllib.urlencode(values)
url = "http://passport.csdn.net/account/login?from=http://my.csdn.net/my/mycsdn"
request = urllib2.Request(url,data)
response = urllib2.urlopen(request)
print response.read()
GET:
import urllib
import urllib2
url = "https://www.baidu.com/"
value = {}
value['username'] = 'Alice'
value['password'] = '111'
data = urllib.urlencode(value)
geturl = url + "?" + data
req = urllib2.Request(geturl)
response = urllib2.urlopen(req)
print response.read()
网友评论