美文网首页
connect 如何连接远程 socket

connect 如何连接远程 socket

作者: 董泽润 | 来源:发表于2019-08-09 13:09 被阅读0次

    通过 man 可以看到,connect 对于 udp, tcp 是不同的

    SYNOPSIS
         #include <sys/types.h>
         #include <sys/socket.h>
    
         int
         connect(int socket, const struct sockaddr *address, socklen_t address_len);
    
    DESCRIPTION
         The parameter socket is a socket.  If it is of type SOCK_DGRAM, this call specifies the peer with which the socket is to be asso-
         ciated; this address is that to which datagrams are to be sent, and the only address from which datagrams are to be received.  If
         the socket is of type SOCK_STREAM, this call attempts to make a connection to another socket.  The other socket is specified by
         address, which is an address in the communications space of the socket
    

    udp 只是指定要连接的地址,而 tcp 要尝试三次握手建连,那具体流程呢?

    Syscall 入口

    和其它接口一样,系统调用 SYSCALL_DEFINE3, 最终调用 __sys_connect

    SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
            int, addrlen)
    {
        return __sys_connect(fd, uservaddr, addrlen);
    }
    

    整体实现

    int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
    {
        struct socket *sock;
        struct sockaddr_storage address;
        int err, fput_needed;
    
        sock = sockfd_lookup_light(fd, &err, &fput_needed);
        if (!sock)
            goto out;
        err = move_addr_to_kernel(uservaddr, addrlen, &address);
        if (err < 0)
            goto out_put;
    
        err =
            security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
        if (err)
            goto out_put;
    
        err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
                     sock->file->f_flags);
    out_put:
        fput_light(sock->file, fput_needed);
    out:
        return err;
    }
    

    参数是本机 socket fd, 对端要连接 socket 的地址 uservaddr. 先用 sockfd_lookup_light 根据 fd 找到 socket,然后有一个 move_addr_to_kernel 将用户空间地址拷贝到内核的过程,最后函数指针 ops->connect 回调具体协义动作,tcp 对应 inet_stream_ops.inet_stream_connect, udp 对应 inet_dgram_ops.inet_dgram_connect

    udp实现

    int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
                   int addr_len, int flags)
    {
        struct sock *sk = sock->sk;
        int err;
    
        if (addr_len < sizeof(uaddr->sa_family))
            return -EINVAL;
        if (uaddr->sa_family == AF_UNSPEC)
            return sk->sk_prot->disconnect(sk, flags);
    
        if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
            err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
            if (err)
                return err;
        }
    
        if (!inet_sk(sk)->inet_num && inet_autobind(sk))
            return -EAGAIN;
        return sk->sk_prot->connect(sk, uaddr, addr_len);
    }
    

    addr_len 判断长度,如果协义族是 AF_UNSPEC,直接断开连接。还有关于 BPF 的,暂时忽略。最后调用 sk_prot->connect 对应函数为 ip4_datagram_connect

    int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
    {
        struct inet_sock *inet = inet_sk(sk);
        struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
        struct flowi4 *fl4;
        struct rtable *rt;
        __be32 saddr;
        int oif;
        int err;
          ......
        sk_dst_reset(sk);
    
        oif = sk->sk_bound_dev_if;
        saddr = inet->inet_saddr;
        if (ipv4_is_multicast(usin->sin_addr.s_addr)) {
            if (!oif || netif_index_is_l3_master(sock_net(sk), oif))
                oif = inet->mc_index;
            if (!saddr)
                saddr = inet->mc_addr;
        }
        fl4 = &inet->cork.fl.u.ip4;
        rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr,
                      RT_CONN_FLAGS(sk), oif,
                      sk->sk_protocol,
                      inet->inet_sport, usin->sin_port, sk);
        if (IS_ERR(rt)) {
            err = PTR_ERR(rt);
            if (err == -ENETUNREACH)
                IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
            goto out;
        }
    
        if ((rt->rt_flags & RTCF_BROADCAST) && !sock_flag(sk, SOCK_BROADCAST)) {
            ip_rt_put(rt);
            err = -EACCES;
            goto out;
        }
        if (!inet->inet_saddr)
            inet->inet_saddr = fl4->saddr;  /* Update source address */
        if (!inet->inet_rcv_saddr) {
            inet->inet_rcv_saddr = fl4->saddr;
            if (sk->sk_prot->rehash)
                sk->sk_prot->rehash(sk);
        }
        inet->inet_daddr = fl4->daddr;
        inet->inet_dport = usin->sin_port;
        sk->sk_state = TCP_ESTABLISHED;
        sk_set_txhash(sk);
        inet->inet_id = jiffies;
    
        sk_dst_set(sk, &rt->dst);
        err = 0;
    out:
        return err;
    }
    
    1. sk_dst_reset 清空路由 cache
    2. ip_route_connect 查找路由相关,如果找不到报错返回,并更新统计
    3. 如果 saddr 未定指,还要根据路由信息来确定源地址
    4. 更新 socket 状态为 ESTABLISHED, 最后 sk_dst_set 设置下一跳路由

    从这可以看到,udp connect 不会真正的建连

    tcp实现

    tcp_v4_connect
    /* This will initiate an outgoing connection. */
    int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
    {
        struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
        struct inet_sock *inet = inet_sk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        __be16 orig_sport, orig_dport;
        __be32 daddr, nexthop;
        struct flowi4 *fl4;
        struct rtable *rt;
        int err;
        struct ip_options_rcu *inet_opt;
        struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
    
        nexthop = daddr = usin->sin_addr.s_addr;
        inet_opt = rcu_dereference_protected(inet->inet_opt,
                             lockdep_sock_is_held(sk));
        if (inet_opt && inet_opt->opt.srr) {
            if (!daddr)
                return -EINVAL;
            nexthop = inet_opt->opt.faddr;
        }
    
        orig_sport = inet->inet_sport;
        orig_dport = usin->sin_port;
        fl4 = &inet->cork.fl.u.ip4;
        rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
                      RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
                      IPPROTO_TCP,
                      orig_sport, orig_dport, sk);
        if (IS_ERR(rt)) {
            err = PTR_ERR(rt);
            if (err == -ENETUNREACH)
                IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
            return err;
        }
    
        if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
            ip_rt_put(rt);
            return -ENETUNREACH;
        }
    
        if (!inet_opt || !inet_opt->opt.srr)
            daddr = fl4->daddr;
    
        if (!inet->inet_saddr)
            inet->inet_saddr = fl4->saddr;
        sk_rcv_saddr_set(sk, inet->inet_saddr);
    
        if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
            /* Reset inherited state */
            tp->rx_opt.ts_recent       = 0;
            tp->rx_opt.ts_recent_stamp = 0;
            if (likely(!tp->repair))
                tp->write_seq      = 0;
        }
    
        inet->inet_dport = usin->sin_port;
        sk_daddr_set(sk, daddr);
    
        inet_csk(sk)->icsk_ext_hdr_len = 0;
        if (inet_opt)
            inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
    
        tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
    
        /* Socket identity is still unknown (sport may be zero).
         * However we set state to SYN-SENT and not releasing socket
         * lock select source port, enter ourselves into the hash tables and
         * complete initialization after this.
         */
        tcp_set_state(sk, TCP_SYN_SENT);
        err = inet_hash_connect(tcp_death_row, sk);
        if (err)
            goto failure;
    
        sk_set_txhash(sk);
    
        rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
                       inet->inet_sport, inet->inet_dport, sk);
        if (IS_ERR(rt)) {
            err = PTR_ERR(rt);
            rt = NULL;
            goto failure;
        }
        /* OK, now commit destination to socket.  */
        sk->sk_gso_type = SKB_GSO_TCPV4;
        sk_setup_caps(sk, &rt->dst);
        rt = NULL;
    
        if (likely(!tp->repair)) {
            if (!tp->write_seq)
                tp->write_seq = secure_tcp_seq(inet->inet_saddr,
                                   inet->inet_daddr,
                                   inet->inet_sport,
                                   usin->sin_port);
            tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
                             inet->inet_saddr,
                             inet->inet_daddr);
        }
    
        inet->inet_id = tp->write_seq ^ jiffies;
    
        if (tcp_fastopen_defer_connect(sk, &err))
            return err;
        if (err)
            goto failure;
    
        err = tcp_connect(sk);
    
        if (err)
            goto failure;
    
        return 0;
    }
    
    1. ip_route_connect 寻找路由,主要是找到下一跳,如果报错那么返回
    2. tcp_set_state 将 socket 状态设置为 SYN_SENT
    3. inet_hash_connect 将 socket 扔到 hash 表里,这里面会判断有没有指定源 port,如果没指定自动选择一个,涉及 reuseport, 暂时不看,以后有机会再分析
    4. 下面还有路由的,还有一段关于 SKB_GSO 分片的,暂时也不看了
    5. write_seq 初始化序列号,这里可以看到是安全的,具体算法也忽略
    6. fastopen 也忽略,最后调用 tcp_connect 来干活
    tcp_connect
    /* Build a SYN and send it off. */
    int tcp_connect(struct sock *sk)
    {
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *buff;
        int err;
    
        tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB, 0, NULL);
    
        if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
            return -EHOSTUNREACH; /* Routing failure or similar. */
    
        tcp_connect_init(sk);
    
        if (unlikely(tp->repair)) {
            tcp_finish_connect(sk, NULL);
            return 0;
        }
    
        buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, true);
        if (unlikely(!buff))
            return -ENOBUFS;
    
        tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
        tcp_mstamp_refresh(tp);
        tp->retrans_stamp = tcp_time_stamp(tp);
        tcp_connect_queue_skb(sk, buff);
        tcp_ecn_send_syn(sk, buff);
        tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
    
        /* Send off SYN; include data in Fast Open. */
        err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
              tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
        if (err == -ECONNREFUSED)
            return err;
    
        /* We change tp->snd_nxt after the tcp_transmit_skb() call
         * in order to make this packet get counted in tcpOutSegs.
         */
        tp->snd_nxt = tp->write_seq;
        tp->pushed_seq = tp->write_seq;
        buff = tcp_send_head(sk);
        if (unlikely(buff)) {
            tp->snd_nxt = TCP_SKB_CB(buff)->seq;
            tp->pushed_seq  = TCP_SKB_CB(buff)->seq;
        }
        TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
    
        /* Timer for repeating the SYN until an answer. */
        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
                      inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
        return 0;
    }
    
    1. tcp_connect_init 初始,设置滑动窗口,超时时间杂七杂八的
    2. sk_stream_alloc_skb 生成 skb,这个很重要,四层协义栈的数据都写在这里发送出去。调用 tcp_init_nondata_skb 初始化 sdk
    3. 判断是否是 fastopen,当前没有数据,调用 tcp_transmit_skb 发送数据,实际也是发到队列里
    4. 最后 inet_csk_reset_xmit_timer 注册 timer, 如果发送 SYN 超时继续重发

    小结

    暂时只看大致的流程,一些初始化的细节暂时没细看。一句话总结,udp connect 不会发送数据,状态直接设置 ESTABLISHED,而 tcp connect 要发送 SYN 包,并且设置各种初始窗口,序列号等等,状态标记为 SYN_SENT

    相关文章

      网友评论

          本文标题:connect 如何连接远程 socket

          本文链接:https://www.haomeiwen.com/subject/lufrrctx.html