美文网首页
nfs-ganesha - Data Structure

nfs-ganesha - Data Structure

作者: 帆子_8c3a | 来源:发表于2019-02-13 18:21 被阅读0次

    常用缩写

    • FSAL - File System Abstract Layer
    • DRC - Duplicate Request/Reply Cache
    • CMAL - Cluster Management Abstraction Layer
    • XDR - eXternal Data Representation
    • SAL - State Abstraction Layer
    • cih - Cache inode hashed dictionary
    • TI-PRC (transport-independent RPC)
    • TS-RPC (transport-specific RPC)

    1. FSAL module

    每个FSAL module对应一个so文件,例如libfsalvfs.so。每个FSAL module描述了对某种文件系统的实现。有时也简称为fsal或module,对应struct fsal_module,变量名常用fsal或者fsal_hdl。

    常见的FSAL module

    • PSEUDO, 用于根目录,或其他pseudo fs
    • MDCACHE(比较特殊)
    • VFS
    • CEPH
    struct fsal_module {
        struct glist_head fsals;    //所有fsal通过它连成链表,存在`fsal_list`全局变量中
        struct glist_head exports;//这个fsal下所有export形成的链表头
        struct glist_head handles;//这个fsal下所有handle形成的链表头
        struct glist_head servers;//Data Servers链表头,pnfs使用用
        char *path; //so模块的path
        char *name; //module的名字
        void *dl_handle;//当用dlopen动态加载时候,返回的句柄
        struct fsal_ops m_ops;  这个模块提供的通用函数,如unload,create_export等等
        pthread_rwlock_t lock;//在使用上面的链表时候,用到的lock
        int32_t refcount;//Reference count 
        struct fsal_stats *stats;   /*< for storing the FSAL specific stats */
        struct fsal_staticfsinfo_t fs_info; /*< for storing FSAL static info */
    };
    

    相关函数:

    • load_fsal: 指定文件系统的名字,如VFS,加载相应模块,例如libfsalvfs.so,返回module。
    • unload_fsal
    • lookup_fsal: 指定文件系统的名字,从内存里找到module。
    • register_fsal:将module注册进系统,加入到fsal_list全局变量中。

    2. FSAL Export

    在配置文件中每描述一个export,ganesha就创建一个export,对应一个文件系统,每个export都有一个export id,都对应一个FSAL module。ganesha会自动创建一个根目录的export,其export id是0。 可以理解export是FSAL module的一个实例。

    EXPORT
    {
        Export_ID=1;
        Path = /tmp;
        Pseudo = /vfs;
        Access_Type = RW;
        Protocols = 4;
        Transports = TCP;
        FSAL
        {
            Name = VFS;
        }
    }
    

    2.1 gsh_export

    struct gsh_export是在解析配置文件中用到的数据,不是太重要。

    struct gsh_export {
        struct glist_head exp_list; //表头是全局变量 exportlist
        struct avltree_node node_k; //通过它存储AVL tree,key是export_id
        char *fullpath;
        char *pseudopath; //PseudoFS path 
        uint16_t export_id; 
            struct fsal_export *fsal_export; //指向fsal export
    ...
    };
    

    相关函数:

    • alloc_export
    • free_export
    • insert_gsh_export
    • get_gsh_export
    • get_gsh_export_by_path
    • mount_gsh_export
    • remove_gsh_export
    • foreach_gsh_export

    2.2 fsal_export

    struct fsal_export代表着export。

    struct fsal_export {
        struct glist_head exports;//fsal->exports是所有相同FSAL的export的链表头
        struct fsal_module *fsal;   //指向fsal
        const struct fsal_up_vector *up_ops;    //Upcall operations
        struct export_ops exp_ops;//不需要inode的一些操作,如lookup_path,set_quota等
        struct fsal_export *sub_export; //下面的export
        struct fsal_export *super_export;//上面的export
        uint16_t export_id; //export id
    };
    

    有意思的是export可以分层,形成一个stack的结构。其实一般分为两层,最上层是MDCACHE对应的export,下层是真正文件系统对应(如VFS)的export。这样的好处是,IO先进入到MDCACHE的export,如果能处理则直接返回,如果不能则调用下层export去处理。
    相关函数:

    • fsal_attach_export 将export插入到这个FSAL对应链表中
    • fsal_export_stack 将两个export黏在一起,形成上下层关系

    2.3 扩展的FSAL export

    对于不同FSAL来说,都会扩展export的含义。如FSAL VFS的export定义为:

    struct vfs_fsal_export {
        struct fsal_export export; //由fsal_export扩展而来
        struct fsal_filesystem *root_fs;//根目录的fs
        struct glist_head filesystems;//在这个export下所有的FS形成的链表头
        int fsid_type;
        bool async_hsm_restore;
    };
    

    再如FSAL MDCACHE的export定义为:

    struct mdcache_fsal_export {
        struct fsal_export mfe_exp; //由fsal_export扩展而来
        char *name;
        struct fsal_up_vector up_ops;
        struct fsal_up_vector super_up_ops;
        struct glist_head entry_list;
        pthread_rwlock_t mdc_exp_lock;
        uint8_t flags;
    };
    

    不同FSAL的export的创建是由不同函数实现的

    fsal->m_ops.create_export(...)
    

    例如对应VFS的create_export函数指针对应vfs_create_export()

    3. Object handle

    对应struct fsal_obj_handle,变量名常用obj。每个object handle对应一个文件或者目录。

    struct fsal_obj_handle {
        struct glist_head handles;//所有相同fsal的handle通过它形成链表,链表头是fsal-> handles
        struct fsal_filesystem *fs;//指向隶属的FS
        struct fsal_module *fsal;   //指向fsal module
        struct fsal_obj_ops *obj_ops;   //如lookup,readdir,getattrs,read2,write2等等。
        pthread_rwlock_t obj_lock;
        object_file_type_t type;    /*< Object file type */
        fsal_fsid_t fsid;   
        uint64_t fileid;    //在相同fsid范围下,唯一区分的ID,如inode number
        struct state_hdl *state_hdl;    // obj相关的state,参看vfs_state_locate()
    };
    

    3.1 扩充的Object handle

    对于不同FSAL来说,需要扩充这个对象。例如VFS对应的Object handle:

    struct vfs_fsal_obj_handle {
        struct fsal_obj_handle obj_handle; //在fsal_obj_handle基础上扩充
        fsal_dev_t dev;
        vfs_file_handle_t *handle;
        struct vfs_subfsal_obj_ops *sub_ops;    /*< Optional subfsal ops */
        const struct fsal_up_vector *up_ops;    /*< Upcall operations */
        union {
            struct {
                struct fsal_share share;
                struct vfs_fd fd;
            } file;
            struct {
                unsigned char *link_content;
                int link_size;
            } symlink;
            struct {
                vfs_file_handle_t *dir;
                char *name;
            } unopenable;
        } u;
    };
    

    例如MDCACHE对应的Object handle:

    struct mdcache_fsal_obj_handle {
        struct fsal_obj_handle obj_handle; //在fsal_obj_handle基础上扩充
        struct fsal_obj_handle *sub_handle; //mdcache的下层是更加实际的fsal,如VFS的handle
    ...
    }
    

    4. 不同level的ops函数指针

    FSAL module的ops

    struct fsal_ops,FSAL module级别的ops

    struct fsal_ops def_fsal_ops = {
        .unload = unload_fsal,
        .init_config = init_config,
        .dump_config = dump_config,
        .create_export = create_export,
        .emergency_cleanup = emergency_cleanup,
        .getdeviceinfo = getdeviceinfo,
        .fs_da_addr_size = fs_da_addr_size,
        .fsal_pnfs_ds = fsal_pnfs_ds,
        .fsal_pnfs_ds_ops = fsal_pnfs_ds_ops,
        .fsal_extract_stats = fsal_extract_stats,
        .fsal_reset_stats = fsal_reset_stats,
    };
    

    4.2 export的ops

    struct export_ops,文件系统级级别的ops

    void vfs_export_ops_init(struct export_ops *ops)
    {
        ops->release = release;
        ops->lookup_path = vfs_lookup_path;
        ops->wire_to_host = wire_to_host;
        ops->create_handle = vfs_create_handle;
        ops->get_fs_dynamic_info = get_dynamic_info;
        ops->get_quota = get_quota;
        ops->set_quota = set_quota;
        ops->alloc_state = vfs_alloc_state;
        ops->free_state = vfs_free_state;
    }
    

    4.3 obj的ops

    struct fsal_obj_ops,文件对象层的ops。

    void vfs_handle_ops_init(struct fsal_obj_ops *ops)
    {
        fsal_default_obj_ops_init(ops);
    
        ops->release = release;
        ops->merge = vfs_merge;
        ops->lookup = lookup;
        ops->readdir = read_dirents;
        ops->mkdir = makedir;
        ops->mknode = makenode;
        ops->symlink = makesymlink;
        ops->readlink = readsymlink;
        ops->getattrs = vfs_getattr2;
        ops->link = linkfile;
        ops->rename = renamefile;
        ops->unlink = file_unlink;
        ops->close = vfs_close;
        ops->handle_to_wire = handle_to_wire;
        ops->handle_to_key = handle_to_key;
        ops->open2 = vfs_open2;
        ops->reopen2 = vfs_reopen2;
        ops->read2 = vfs_read2;
        ops->write2 = vfs_write2;
        ops->commit2 = vfs_commit2;
        ops->list_ext_attrs = vfs_list_ext_attrs;
        ops->getextattr_id_by_name = vfs_getextattr_id_by_name;
        ops->getextattr_value_by_name = vfs_getextattr_value_by_name;
        ops->getextattr_value_by_id = vfs_getextattr_value_by_id;
        ops->setextattr_value = vfs_setextattr_value;
        ops->setextattr_value_by_id = vfs_setextattr_value_by_id;
        ops->remove_extattr_by_id = vfs_remove_extattr_by_id;
        ops->remove_extattr_by_name = vfs_remove_extattr_by_name;
    }
    

    5. MDCACHE和其他FSAL的交互

    MDCACHE的export位于其他FSAL的上层。

    mdcache_read2()
    {
    ...
        subcall(
            entry->sub_handle->obj_ops->read2(entry->sub_handle, bypass,
                             mdc_read_cb, read_arg, arg)
               );
    }
    

    6. IO处理线程的op_ctx

    每个IO处理线程有个线程变量op_ctx,指向nfs_rpc_process_request()下的req_ctx局部变量。

    __thread struct req_op_context *op_ctx;
    

    会在函数nfs_rpc_process_request的开头设置op_ctx,并在结尾清理op_ctx
    init_root_op_context初始化op_ctx
    release_root_op_context恢复op_ctx
    nfs4_mds_putfh()设置op_ctx->ctx_exportop_ctx->fsal_export
    在所有的IO处理线程中,可以方便的从op_ctx得到export信息。
    那么op_ctx都存了些什么东西呢,

    struct req_op_context {
        struct user_cred *creds;    /*< resolved user creds from request */
        struct user_cred original_creds;    /*< Saved creds */
        struct group_data *caller_gdata;
        gid_t *caller_garray_copy;  /*< Copied garray from AUTH_SYS */
        gid_t *managed_garray_copy; /*< Copied garray from managed gids */
        int cred_flags;     /* Various cred flags */
        sockaddr_t *caller_addr;    //IP connection info
        const uint64_t *clientid;   //Client ID 
        uint32_t nfs_vers;
        uint32_t nfs_minorvers;
        uint32_t req_type;  /*< request_type NFS | 9P */
        struct gsh_client *client;  //client host info,将TCP,UDP,9P的连接都隐藏在内
        struct gsh_export *ctx_export;//指向gsh_export
        struct fsal_export *fsal_export;//current export ,一般是DCACHE的export
        struct export_perms *export_perms;  /*< Effective export perms */
        nsecs_elapsed_t start_time; //start time of this op/request 
        nsecs_elapsed_t queue_wait; //time in wait queue 
        void *fsal_private;     /*< private for FSAL use */
        struct fsal_module *fsal_module;    //fsal module,一般是DCACHE
        struct fsal_pnfs_ds *fsal_pnfs_ds;  //pnfs相关
    };
    

    7. Compound中Operation的处理函数

    static const struct nfs4_op_desc optabv4[] = {
        [0] = { /* all out of bounds illegals go here to die */
            .name = "OP_ILLEGAL",
            .funct = nfs4_op_illegal,
            .free_res = nfs4_op_illegal_Free,
            .resp_size = sizeof(ILLEGAL4res),
            .exp_perm_flags = 0},
        [1] = {
            .name = "OP_ILLEGAL",
            .funct = nfs4_op_illegal,
            .free_res = nfs4_op_illegal_Free,
            .resp_size = sizeof(ILLEGAL4res),
            .exp_perm_flags = 0},
        [2] = {
            .name = "OP_ILLEGAL",
            .funct = nfs4_op_illegal,
            .free_res = nfs4_op_illegal_Free,
            .resp_size = sizeof(ILLEGAL4res),
            .exp_perm_flags = 0},
        [NFS4_OP_ACCESS] = {
            .name = "OP_ACCESS",
            .funct = nfs4_op_access,
            .free_res = nfs4_op_access_Free,
            .resp_size = sizeof(ACCESS4res),
            .exp_perm_flags = EXPORT_OPTION_MD_READ_ACCESS},
        [NFS4_OP_CLOSE] = {
            .name = "OP_CLOSE",
            .funct = nfs4_op_close,
            .free_res = nfs4_op_close_Free,
            .resp_size = sizeof(CLOSE4res),
            .exp_perm_flags = EXPORT_OPTION_MD_READ_ACCESS},
        [NFS4_OP_COMMIT] = {
            .name = "OP_COMMIT",
            .funct = nfs4_op_commit,
            .free_res = nfs4_op_commit_Free,
            .resp_size = sizeof(COMMIT4res),
            .exp_perm_flags = EXPORT_OPTION_MD_WRITE_ACCESS}
        //...
    }
    

    相关文章

      网友评论

          本文标题:nfs-ganesha - Data Structure

          本文链接:https://www.haomeiwen.com/subject/seodeqtx.html