美文网首页
linux内核tcp协议栈走读记录(一)

linux内核tcp协议栈走读记录(一)

作者: help_youself | 来源:发表于2019-07-18 15:33 被阅读0次

     先看[1]中的两段代码:

     ret = sock_create_kern(&init_net, AF_INET, SOCK_STREAM, 0, &sock);
    ret = sock->ops->connect(sock, (struct sockaddr *)&s_addr, sizeof(s_addr), 0);
    

     sock是怎么创建的?sock->ops中有多少操作?

    int __init inet_init(void)
    {
    (void)sock_register(&inet_family_ops);
    }
    //af_inet.c
    static const struct net_proto_family inet_family_ops = {
        .family = PF_INET,
        .create = inet_create,
    /*  .owner  = THIS_MODULE,*/
    };
    int sock_create_kern(int family, int type, int protocol, struct socket **res)
    {
        return __sock_create(&init_net, family, type, protocol, res, 1);
    }
    int __sock_create(struct net *net, int family, int type, int protocol,
                 struct socket **res, int kern)
    {
    err = pf->create(net, sock, protocol, kern);
    }
    //这里的pf->create指向inet_create函数
    

     接下来就需要分析inet_create(af_inet.c)的处理流程。

    static int inet_create(struct net *net, struct socket *sock, int protocol,
                   int kern)
    {
        sock->ops = answer->ops;
        answer_prot = answer->prot;
    sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot);
    sock_init_data(sock, sk);
    }
    struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
                  struct proto *prot)
    {
        sk->sk_prot = sk->sk_prot_creator = prot;
    }
    

     protocol就是在应用层调用sock函数传输的参数,对应三种数据包的处理方式,主要SOCK_STREAM(TCP)、SOCK_DGRAM(UDP)、SOCK_RAW(原始socket)。代码中的answer指针,就是根据protocol选择了inetsw_array中的某一个。sock->ops根据上面的代码,可知:sock->ops = answer->ops。

    static struct inet_protosw inetsw_array[] =
    {
        {
            .type =       SOCK_STREAM,
            .protocol =   IPPROTO_TCP,
            .prot =       &tcp_prot,
            .ops =        &inet_stream_ops,
            .no_check =   0,
            .flags =      INET_PROTOSW_PERMANENT |
                      INET_PROTOSW_ICSK,
        },
    
        {
            .type =       SOCK_DGRAM,
            .protocol =   IPPROTO_UDP,
            .prot =       &udp_prot,
            .ops =        &inet_dgram_ops,
            .no_check =   UDP_CSUM_DEFAULT,
            .flags =      INET_PROTOSW_PERMANENT,
           },
    
           {
            .type =       SOCK_DGRAM,
            .protocol =   IPPROTO_ICMP,
            .prot =       &ping_prot,
            .ops =        &inet_dgram_ops,
            .no_check =   UDP_CSUM_DEFAULT,
            .flags =      INET_PROTOSW_REUSE,
           },
    
           {
               .type =       SOCK_RAW,
               .protocol =   IPPROTO_IP,    /* wild card */
               .prot =       &raw_prot,
               .ops =        &inet_sockraw_ops,
               .no_check =   UDP_CSUM_DEFAULT,
               .flags =      INET_PROTOSW_REUSE,
           }
    };
    

     以tcp为例,这里的ops指针中内容是:

    const struct proto_ops inet_stream_ops = {
        .family        = PF_INET,
        /*.owner           = THIS_MODULE,*/
        .release       = inet_release,
        .bind          = inet_bind,
        .connect       = inet_stream_connect,
        .socketpair    = sock_no_socketpair,
        .accept        = inet_accept,
        .getname       = inet_getname,
    //  .poll          = tcp_poll,
        .ioctl         = inet_ioctl,
        .listen        = inet_listen,
        .shutdown      = inet_shutdown,
        .setsockopt    = sock_common_setsockopt,
        .getsockopt    = sock_common_getsockopt,
        .sendmsg       = inet_sendmsg,
        .recvmsg       = inet_recvmsg,
    //  .mmap          = sock_no_mmap,
        .sendpage      = inet_sendpage,
    //  .splice_read       = tcp_splice_read,
    #ifdef CONFIG_COMPAT
        .compat_setsockopt = compat_sock_common_setsockopt,
        .compat_getsockopt = compat_sock_common_getsockopt,
        .compat_ioctl      = inet_compat_ioctl,
    #endif
    };
    

     回到文首提到的sock->ops->connect操作,就是执行了inet_stream_connect。在上面提到sk_alloc函数, sk->sk_prot指向的就是answer->prot。

    int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
                int addr_len, int flags)
    {
        err = __inet_stream_connect(sock, uaddr, addr_len, flags);
    }
    int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
                  int addr_len, int flags)
    {
    err = sk->sk_prot->connect(sk, uaddr, addr_len);
    }
    

     如果是tcp协议,answer->prot就是指向了tcp_prot。结构体tcp_prot(tcp_ipv4.c)中的内容:

    struct proto tcp_prot = {
        .name           = "TCP",
        //.owner            = THIS_MODULE,
        .close          = tcp_close,
        .connect        = tcp_v4_connect,
        .disconnect     = tcp_disconnect,
        .accept         = inet_csk_accept,
        .ioctl          = tcp_ioctl,
        .init           = tcp_v4_init_sock,
        .destroy        = tcp_v4_destroy_sock,
        .shutdown       = tcp_shutdown,
        .setsockopt     = tcp_setsockopt,
        .getsockopt     = tcp_getsockopt,
        .recvmsg        = tcp_recvmsg,
        .sendmsg        = tcp_sendmsg,
        .sendpage       = tcp_sendpage,
        .backlog_rcv        = tcp_v4_do_rcv,
        .release_cb     = tcp_release_cb,
        .mtu_reduced        = tcp_v4_mtu_reduced,
        .hash           = inet_hash,
        .unhash         = inet_unhash,
        .get_port       = inet_csk_get_port,
        .enter_memory_pressure  = tcp_enter_memory_pressure,
        .stream_memory_free = tcp_stream_memory_free,
        .sockets_allocated  = &tcp_sockets_allocated,
        .orphan_count       = &tcp_orphan_count,
        .memory_allocated   = &tcp_memory_allocated,
        .memory_pressure    = &tcp_memory_pressure,
        .sysctl_mem     = sysctl_tcp_mem,
        .sysctl_wmem        = sysctl_tcp_wmem,
        .sysctl_rmem        = sysctl_tcp_rmem,
        .max_header     = MAX_TCP_HEADER,
        .obj_size       = sizeof(struct tcp_sock),
    //  .slab_flags     = SLAB_DESTROY_BY_RCU,
        .twsk_prot      = &tcp_timewait_sock_ops,
        .rsk_prot       = &tcp_request_sock_ops,
        .h.hashinfo     = &tcp_hashinfo,
        .no_autobind        = true,
    #ifdef CONFIG_COMPAT
        .compat_setsockopt  = compat_tcp_setsockopt,
        .compat_getsockopt  = compat_tcp_getsockopt,
    #endif
    #ifdef CONFIG_MEMCG_KMEM
        .init_cgroup        = tcp_init_cgroup,
        .destroy_cgroup     = tcp_destroy_cgroup,
        .proto_cgroup       = tcp_proto_cgroup,
    #endif
    };
    

     sk->sk_prot->connect实际执行的就是tcp_v4_connect函数。
     博客[5]会继续分析tcp_v4_connect之后的操作,比如源端口的分配。端口的分配与函数inet_hash_connect相关,基于博客[6]中描述的原则。

    如果用户已经绑定了端口,就使用绑定的端口。
    如果用户没有绑定端口,则让系统自动选取,策略如下:

    1. 获取端口的取值区间,以及区间内端口的个数。
    2. 根据初始偏移量,从端口区间内的某个端口开始,遍历整个区间。
      2.1 如果端口是保留的,直接跳过。
      2.2 如果端口已经被使用了。
      2.2.1 不允许复用已经被bind()的端口。
      2.2.2 检查端口是否能被重用,可以的话就重用此端口。
      2.3 如果端口没有被使用过,就选择此端口。
    int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
    {
        rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
                      RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
                      IPPROTO_TCP,
                      orig_sport, orig_dport, sk);
    err = inet_hash_connect(&tcp_death_row, sk);
    }
    int inet_hash_connect(struct inet_timewait_death_row *death_row,
                  struct sock *sk)
    {
        return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk),
                __inet_check_established, __inet_hash_nolisten);
    }
    int __inet_hash_connect(struct inet_timewait_death_row *death_row,
            struct sock *sk, u32 port_offset,
            int (*check_established)(struct inet_timewait_death_row *,
                struct sock *, __u16, struct inet_timewait_sock **),
            int (*hash)(struct sock *sk, struct inet_timewait_sock *twp))
    {
        if (!snum) {
      //端口分配
    }
      inet_bind_hash(sk, tb, port);
    }
    

    reference:
    [1] 一个简单的内核Socket Client例子
    [2] 2.1 Socket系统调用
    [3]第一个Linux网络设备驱动——最简虚拟网卡virnet
    [4] Linux串口网卡(一)——通用虚拟网卡的实现
    [5] Linux TCP/IP 协议栈之 Socket的实现分析(Connect客户端发起连接请求)
    [6] TCP连接建立系列 — 客户端的端口选取和重用
    [7] 路由表

    相关文章

      网友评论

          本文标题:linux内核tcp协议栈走读记录(一)

          本文链接:https://www.haomeiwen.com/subject/ktullctx.html