美文网首页
源码分析 shutdown 与 close 的区别

源码分析 shutdown 与 close 的区别

作者: 董泽润 | 来源:发表于2019-08-13 19:03 被阅读0次

    我们知道,在 linux 中一切皆文件,socket 建立后需要关闭,可以使用通用的 close 函数关闭,也可以使用 socket 专有的 shutdown. 具体有什么差异呢?

    SYNOPSIS
         #include <sys/socket.h>
    
         int
         shutdown(int socket, int how);
    
    DESCRIPTION
         The shutdown() call causes all or part of a full-duplex connection on the socket associated with socket to be shut down.  If how
         is SHUT_RD, further receives will be disallowed.  If how is SHUT_WR, further sends will be disallowed.  If how is SHUT_RDWR, fur-
         ther sends and receives will be disallowed.
    
    SYNOPSIS
         #include <unistd.h>
    
         int
         close(int fildes);
    
    DESCRIPTION
         The close() call deletes a descriptor from the per-process object reference table.  If this is the last reference to the underly-
         ing object, the object will be deactivated.  For example, on the last close of a file the current seek pointer associated with the
         file is lost; on the last close of a socket(2) associated naming information and queued data are discarded; on the last close of a
         file holding an advisory lock the lock is released (see further flock(2)).
         ......
    
    1. shutdown 粒度比较细,可以控制读或者写,而 close 只是关闭文件
    2. close 涉及到文件的引用计数,如果计数为 0 才是真正的关闭。一个进程 fork 时子进程默认继承父进程所有打开文件,此时计数加一,或是 close_on_exec 关闭掉,那么这里的就是 close
    3. 从影响上来看,如果一个 socket 文件被很多进程打开,那么 shutdown 影响所有进程。而 close 只是计数减一,并不影响其它进程
    4. close 属于更高层的抽象,属于 fs 文件系统级别的,shutdown 更底一些

    从 man 手册可以看到如上区别,那么具体实现如何呢?去看内核源码好了。

    close 实现

    fs 接口层实现

    close 属于文件系统层面的操作,通过系统调用看具体实现。

    SYSCALL_DEFINE1(close, unsigned int, fd)
    {
        int retval = __close_fd(current->files, fd);
    
        /* can't restart close syscall because file table entry was cleared */
        if (unlikely(retval == -ERESTARTSYS ||
                 retval == -ERESTARTNOINTR ||
                 retval == -ERESTARTNOHAND ||
                 retval == -ERESTART_RESTARTBLOCK))
            retval = -EINTR;
    
        return retval;
    }
    

    __close_fd 关闭 fd 入口,current->files 表示当前进程打开的文件

    int __close_fd(struct files_struct *files, unsigned fd)
    {
        struct file *file;
        struct fdtable *fdt;
    
        spin_lock(&files->file_lock); // 锁
        fdt = files_fdtable(files);
        if (fd >= fdt->max_fds)
            goto out_unlock;
        file = fdt->fd[fd];
        if (!file)
            goto out_unlock;
        rcu_assign_pointer(fdt->fd[fd], NULL); // 将 fd 文件从进程文件表里删除
        __put_unused_fd(files, fd);
        spin_unlock(&files->file_lock);
        return filp_close(file, files); // 关闭
    
    out_unlock:
        spin_unlock(&files->file_lock);
        return -EBADF;
    }
    

    __close_fd 重要的工作是从文件表里找到 fd 对应的 file,然后从列表里删除,顺便 __put_unused_fd 把 fd 释放出来。再调用 filp_close 关闭文件

    int filp_close(struct file *filp, fl_owner_t id)
    {
        int retval = 0;
    
        if (!file_count(filp)) {
            printk(KERN_ERR "VFS: Close: file count is 0\n");
            return 0;
        }
    
        if (filp->f_op->flush)
            retval = filp->f_op->flush(filp, id); // 如果有 flush 先刷数据
    
        if (likely(!(filp->f_mode & FMODE_PATH))) {
            dnotify_flush(filp, id);
            locks_remove_posix(filp, id);
        }
        fput(filp);
        return retval;
    }
    

    此时 file 己经不在进程打开文件列表里了,调用 fput 来异步关闭文件

    static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
    
    void fput_many(struct file *file, unsigned int refs)
    {
        if (atomic_long_sub_and_test(refs, &file->f_count)) {
            struct task_struct *task = current;
    
            if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
                init_task_work(&file->f_u.fu_rcuhead, ____fput);
                if (!task_work_add(task, &file->f_u.fu_rcuhead, true))
                    return;
                /*
                 * After this task has run exit_task_work(),
                 * task_work_add() will fail.  Fall through to delayed
                 * fput to avoid leaking *file.
                 */
            }
    
            if (llist_add(&file->f_u.fu_llist, &delayed_fput_list))
                schedule_delayed_work(&delayed_fput_work, 1);
        }
    }
    

    首先 atomic_long_sub_and_test 将引用计数减一,如果为 0 了,那么走后面关闭的逻辑,也就是说引用计数大于 0 ,本次操作什么也不做的。delayed_fput_work 是一个异步的 workqueue 队列,专职用于关闭文件,对应调用函数是 delayed_fput. workqueue 原理暂时不看了,反正就是异步队列

    static void __fput(struct file *file)
    {
        struct dentry *dentry = file->f_path.dentry;
        struct vfsmount *mnt = file->f_path.mnt;
        struct inode *inode = file->f_inode;
        fmode_t mode = file->f_mode;
    
        if (unlikely(!(file->f_mode & FMODE_OPENED)))
            goto out;
    
        might_sleep();
    
        fsnotify_close(file);
        /*
         * The function eventpoll_release() should be the first called
         * in the file cleanup chain.
         */
        eventpoll_release(file);
        locks_remove_file(file);
    
        ima_file_free(file);
        if (unlikely(file->f_flags & FASYNC)) {
            if (file->f_op->fasync)
                file->f_op->fasync(-1, file, 0);
        }
        if (file->f_op->release)
            file->f_op->release(inode, file);
        if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
                 !(mode & FMODE_PATH))) {
            cdev_put(inode->i_cdev);
        }
        fops_put(file->f_op);
        put_pid(file->f_owner.pid);
        if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
            i_readcount_dec(inode);
        if (mode & FMODE_WRITER) {
            put_write_access(inode);
            __mnt_drop_write(mnt);
        }
        dput(dentry);
        if (unlikely(mode & FMODE_NEED_UNMOUNT))
            dissolve_on_fput(mnt);
        mntput(mnt);
    out:
        file_free(file);
    }
    

    __fput 才是真正关闭 fd 代码,首先他是一个泛型的操作,本身 linux 一切皆文件嘛,也好理解。fsnotify 消息通知,特殊处理 epoll file 等等,通用的代码就是回调 ops 函数指针 release 来关闭文件。由之前的源码分件我们知道, 创建 socketsock_alloc_file 会将 socket_file_ops 关联到 ops 函数指针,那么最终 release 实际指向 sock_close

    static const struct file_operations socket_file_ops = {
        .owner =    THIS_MODULE,
        .llseek =   no_llseek,
        .read_iter =    sock_read_iter,
        .write_iter =   sock_write_iter,
        .poll =     sock_poll,
        .unlocked_ioctl = sock_ioctl,
    #ifdef CONFIG_COMPAT
        .compat_ioctl = compat_sock_ioctl,
    #endif
        .mmap =     sock_mmap,
        .release =  sock_close,
        .fasync =   sock_fasync,
        .sendpage = sock_sendpage,
        .splice_write = generic_splice_sendpage,
        .splice_read =  sock_splice_read,
    };
    

    inet 接口层实现

    内核的设计是分层的,file system 下面就直接就 sock 层,那么 __sock_release 具体调用哪些具体方法来关闭 sock,也要看具体类型,所以也是函数指针

    static void __sock_release(struct socket *sock, struct inode *inode)
    {
        if (sock->ops) {
            struct module *owner = sock->ops->owner;
    
            if (inode)
                inode_lock(inode);
            sock->ops->release(sock); // 释放 sock
            sock->sk = NULL;
            if (inode)
                inode_unlock(inode);
            sock->ops = NULL;
            module_put(owner);
        }
    
        if (sock->wq->fasync_list)
            pr_err("%s: fasync list not empty!\n", __func__);
    
        if (!sock->file) {
            iput(SOCK_INODE(sock));
            return;
        }
        sock->file = NULL;
    }
    

    由前面的分析知道,对于 tcp 来说 sock->ops 实际上是 inet_stream_ops, 对应 release 指针为 inet_release

    int inet_release(struct socket *sock)
    {
        struct sock *sk = sock->sk;
    
        if (sk) {
            long timeout;
    
            /* Applications forget to leave groups before exiting */
            ip_mc_drop_socket(sk);
    
            /* If linger is set, we don't return until the close
             * is complete.  Otherwise we return immediately. The
             * actually closing is done the same either way.
             *
             * If the close is due to the process exiting, we never
             * linger..
             */
            timeout = 0;
            if (sock_flag(sk, SOCK_LINGER) &&
                !(current->flags & PF_EXITING))
                timeout = sk->sk_lingertime;
            sk->sk_prot->close(sk, timeout);
            sock->sk = NULL;
        }
        return 0;
    }
    

    inet_stream 这块也做了抽象,因为具体有 ipv4, ipv6 等等实现,所以 sk->sk_prot 还是函数指针,对应 tcp_prot 结构体, close 调用 tcp_close,走真正的 tcp 四次握手逻辑,这里还细分主动与被动关闭,下一篇再详细分析

    shutdown 实现

    首先进入内核态的,肯定都是系统调用,shutdown 也是调用的 sock->ops->shutdown

    SYSCALL_DEFINE2(shutdown, int, fd, int, how)
    {
        return __sys_shutdown(fd, how);
    }
    
    int __sys_shutdown(int fd, int how)
    {
        int err, fput_needed;
        struct socket *sock;
    
        sock = sockfd_lookup_light(fd, &err, &fput_needed);
        if (sock != NULL) {
            err = security_socket_shutdown(sock, how);
            if (!err)
                err = sock->ops->shutdown(sock, how);
            fput_light(sock->file, fput_needed);
        }
        return err;
    }
    

    调用 inet_stream_ops.shutdown 函数指针,指向 inet_shutdown

    int inet_shutdown(struct socket *sock, int how)
    {
        struct sock *sk = sock->sk;
        int err = 0;
    
        /* This should really check to make sure
         * the socket is a TCP socket. (WHY AC...)
         */
        how++; /* maps 0->1 has the advantage of making bit 1 rcvs and
                   1->2 bit 2 snds.
                   2->3 */
        if ((how & ~SHUTDOWN_MASK) || !how) /* MAXINT->0 */
            return -EINVAL;
    
        lock_sock(sk);
        if (sock->state == SS_CONNECTING) {
            if ((1 << sk->sk_state) &
                (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE))
                sock->state = SS_DISCONNECTING;
            else
                sock->state = SS_CONNECTED;
        }
    
        switch (sk->sk_state) {
        case TCP_CLOSE:
            err = -ENOTCONN;
            /* Hack to wake up other listeners, who can poll for
               EPOLLHUP, even on eg. unconnected UDP sockets -- RR */
            /* fall through */
        default:
            sk->sk_shutdown |= how;
            if (sk->sk_prot->shutdown)
                sk->sk_prot->shutdown(sk, how);
            break;
    
        /* Remaining two branches are temporary solution for missing
         * close() in multithreaded environment. It is _not_ a good idea,
         * but we have no choice until close() is repaired at VFS level.
         */
        case TCP_LISTEN:
            if (!(how & RCV_SHUTDOWN))
                break;
            /* fall through */
        case TCP_SYN_SENT:
            err = sk->sk_prot->disconnect(sk, O_NONBLOCK);
            sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
            break;
        }
    
        /* Wake up anyone sleeping in poll. */
        sk->sk_state_change(sk);
        release_sock(sk);
        return err;
    }
    

    其实到这里,就看出 shutdownclose 的区别了

    1. 首先判断 how 长为是否合法,SHUT_RD, SHUT_WR, SHUT_RDWR
    2. 设置 sock->state 状态,这个是 sock 层的,不是 tcp/udp 层
    3. 判断 tcp/udp state 是否是 TCP_CLOSE, 如果是的话己经关闭了返回即可,否则默认走 shutdown 逻辑,并且设置 sk->sk_shutdown 状态
    4. release_sock 释放资源?这里有个问题,后续有读写怎么办呢???
    void tcp_shutdown(struct sock *sk, int how)
    {
        /*  We need to grab some memory, and put together a FIN,
         *  and then put it into the queue to be sent.
         *      Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
         */
        if (!(how & SEND_SHUTDOWN)) // SHUT_RDWR
            return;
    
        /* If we've already sent a FIN, or it's a closed state, skip this. */
        if ((1 << sk->sk_state) &
            (TCPF_ESTABLISHED | TCPF_SYN_SENT |
             TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
            /* Clear out any half completed packets.  FIN if needed. */
            if (tcp_close_state(sk))
                tcp_send_fin(sk);
        }
    }
    
    1. 如果 how 行为不是 SHUT_RDWR,那么返回,什么也不做
    2. 如果是 SHUT_RDWR, 那么 tcp_send_fin 发送 FIN 包走四关握手逻辑

    那么,如果 how 是 SHUT_RD, SHUT_WR 如何生效呢?在哪里起做用呢?其实如果只是关闭读或写,那么 shutdown 只是做个标记而己,具体 tcp_recvmsgtcp_sendmsg 时会判断然后报错。

    小结

    分析的还是比较浅显,下一篇再看详细的 tcp 四次挥手逻辑

    相关文章

      网友评论

          本文标题:源码分析 shutdown 与 close 的区别

          本文链接:https://www.haomeiwen.com/subject/voavjctx.html