urllib库

作者: 小白快加油 | 来源:发表于2018-06-02 15:55 被阅读0次

    1 urlopen()

    给Python官网爬下来

    # urlopen()
    import urllib.request
    
    response = urllib.request.urlopen('https://www.python.org')
    print(response.read().decode('utf-8'))
    

    2 查看返回的类型

    #查看返回的类型
    import urllib.request
    
    response = urllib.request.urlopen('https://www.python.org')
    print(type(response))
    
    #<class 'http.client.HTTPResponse'>
    

    这是一个HTTPResponse类型的对象,包含的方法有:
    read()
    readinto()
    getheader(name)
    getheaders()
    fileno() 等。。。
    属性:msg
    version
    status
    reason
    debuglevel
    closed
    3 再来看一个例子

    #再来看一个例子
    import urllib.request
    
    response = urllib.request.urlopen('https://www.python.org')
    print(response.status)
    print(response.getheaders())
    print(response.getheader('Server'))
    
    #200
    #这里的太长了,省略掉
    #nginx
    

    4 urlopen() 函数的API

    #urlopen() 函数的API
    urllib.request.urlopen(url, data=None, [timeout, ]*, 
        cafile=None, capath=None, cadefault=False, context=None)
    

    5 data参数

    import urllib.parse
    import urllib.request
    
    data = bytes(urllib.parse.urlencode({'word':'hello'}), encoding='utf8')
    response = urllib.request.urlopen('http://httpbin.org/post', data=data)
    print(response.read())
    
    #运行结果不展示了
    

    6 timeout参数

    #这里的timeout参数的意思是,程序1秒之后,
    #服务器依然没有响应,就会抛出URLError异常
    import urllib.request
    
    response = urllib.request.urlopen('http://httpbin.org/get', timeout=1)
    print(response.read())
    
    #结果不展示了
    
    import socket
    import urllib.request
    import urllib.error
    
    try:
        response = urllib.request.urlopen('http://httpbin.org/get', timeout=0.1)
        print(response.read())
    except urllib.error.URLError as e:
        if isinstance(e.reason, socket.timeout):
            print('TIME OUT')
    

    7 Request

    class urllib.request.Request(url, data=None, headers={}, 
        origin_req_host=None, unverifiable=False, method=None)
    

    看一个例子

    传入多个参数构建请求

    # 传入多个参数构建请求
    from urllib import request, parse
    
    url = 'http://httpbin.org/post'
    headers = {
         'User-Agent':'Mozilla/4.0(compatible;MSIC 5.5;Windows NT)',
         'Host':'httpbin.org'
    }
    dict = {
        'name':'Germey'
    }
    data = bytes(parse.urlencode(dict), encoding='utf8')
    req = request.Request(url=url, data=data, headers=headers, method='POST')
    response = request.urlopen(req)
    print(response.read().decode('utf-8'))
    

    相关文章

      网友评论

          本文标题:urllib库

          本文链接:https://www.haomeiwen.com/subject/galrsftx.html