常用缩写
- FSAL - File System Abstract Layer
- DRC - Duplicate Request/Reply Cache
- CMAL - Cluster Management Abstraction Layer
- XDR - eXternal Data Representation
- SAL - State Abstraction Layer
- cih - Cache inode hashed dictionary
- TI-PRC (transport-independent RPC)
- TS-RPC (transport-specific RPC)
1. FSAL module
每个FSAL module对应一个so文件,例如libfsalvfs.so
。每个FSAL module描述了对某种文件系统的实现。有时也简称为fsal或module,对应struct fsal_module
,变量名常用fsal或者fsal_hdl。
常见的FSAL module
- PSEUDO, 用于根目录,或其他pseudo fs
- MDCACHE(比较特殊)
- VFS
- CEPH
struct fsal_module {
struct glist_head fsals; //所有fsal通过它连成链表,存在`fsal_list`全局变量中
struct glist_head exports;//这个fsal下所有export形成的链表头
struct glist_head handles;//这个fsal下所有handle形成的链表头
struct glist_head servers;//Data Servers链表头,pnfs使用用
char *path; //so模块的path
char *name; //module的名字
void *dl_handle;//当用dlopen动态加载时候,返回的句柄
struct fsal_ops m_ops; 这个模块提供的通用函数,如unload,create_export等等
pthread_rwlock_t lock;//在使用上面的链表时候,用到的lock
int32_t refcount;//Reference count
struct fsal_stats *stats; /*< for storing the FSAL specific stats */
struct fsal_staticfsinfo_t fs_info; /*< for storing FSAL static info */
};
相关函数:
- load_fsal: 指定文件系统的名字,如VFS,加载相应模块,例如
libfsalvfs.so
,返回module。 - unload_fsal
- lookup_fsal: 指定文件系统的名字,从内存里找到module。
- register_fsal:将module注册进系统,加入到
fsal_list
全局变量中。
2. FSAL Export
在配置文件中每描述一个export,ganesha就创建一个export,对应一个文件系统,每个export都有一个export id,都对应一个FSAL module。ganesha会自动创建一个根目录的export,其export id是0。 可以理解export是FSAL module的一个实例。
EXPORT
{
Export_ID=1;
Path = /tmp;
Pseudo = /vfs;
Access_Type = RW;
Protocols = 4;
Transports = TCP;
FSAL
{
Name = VFS;
}
}
2.1 gsh_export
struct gsh_export
是在解析配置文件中用到的数据,不是太重要。
struct gsh_export {
struct glist_head exp_list; //表头是全局变量 exportlist
struct avltree_node node_k; //通过它存储AVL tree,key是export_id
char *fullpath;
char *pseudopath; //PseudoFS path
uint16_t export_id;
struct fsal_export *fsal_export; //指向fsal export
...
};
相关函数:
- alloc_export
- free_export
- insert_gsh_export
- get_gsh_export
- get_gsh_export_by_path
- mount_gsh_export
- remove_gsh_export
- foreach_gsh_export
2.2 fsal_export
struct fsal_export
代表着export。
struct fsal_export {
struct glist_head exports;//fsal->exports是所有相同FSAL的export的链表头
struct fsal_module *fsal; //指向fsal
const struct fsal_up_vector *up_ops; //Upcall operations
struct export_ops exp_ops;//不需要inode的一些操作,如lookup_path,set_quota等
struct fsal_export *sub_export; //下面的export
struct fsal_export *super_export;//上面的export
uint16_t export_id; //export id
};
有意思的是export可以分层,形成一个stack的结构。其实一般分为两层,最上层是MDCACHE对应的export,下层是真正文件系统对应(如VFS)的export。这样的好处是,IO先进入到MDCACHE的export,如果能处理则直接返回,如果不能则调用下层export去处理。
相关函数:
- fsal_attach_export 将export插入到这个FSAL对应链表中
- fsal_export_stack 将两个export黏在一起,形成上下层关系
2.3 扩展的FSAL export
对于不同FSAL来说,都会扩展export的含义。如FSAL VFS的export定义为:
struct vfs_fsal_export {
struct fsal_export export; //由fsal_export扩展而来
struct fsal_filesystem *root_fs;//根目录的fs
struct glist_head filesystems;//在这个export下所有的FS形成的链表头
int fsid_type;
bool async_hsm_restore;
};
再如FSAL MDCACHE的export定义为:
struct mdcache_fsal_export {
struct fsal_export mfe_exp; //由fsal_export扩展而来
char *name;
struct fsal_up_vector up_ops;
struct fsal_up_vector super_up_ops;
struct glist_head entry_list;
pthread_rwlock_t mdc_exp_lock;
uint8_t flags;
};
不同FSAL的export的创建是由不同函数实现的
fsal->m_ops.create_export(...)
例如对应VFS的create_export函数指针对应vfs_create_export()
3. Object handle
对应struct fsal_obj_handle
,变量名常用obj。每个object handle对应一个文件或者目录。
struct fsal_obj_handle {
struct glist_head handles;//所有相同fsal的handle通过它形成链表,链表头是fsal-> handles
struct fsal_filesystem *fs;//指向隶属的FS
struct fsal_module *fsal; //指向fsal module
struct fsal_obj_ops *obj_ops; //如lookup,readdir,getattrs,read2,write2等等。
pthread_rwlock_t obj_lock;
object_file_type_t type; /*< Object file type */
fsal_fsid_t fsid;
uint64_t fileid; //在相同fsid范围下,唯一区分的ID,如inode number
struct state_hdl *state_hdl; // obj相关的state,参看vfs_state_locate()
};
3.1 扩充的Object handle
对于不同FSAL来说,需要扩充这个对象。例如VFS对应的Object handle:
struct vfs_fsal_obj_handle {
struct fsal_obj_handle obj_handle; //在fsal_obj_handle基础上扩充
fsal_dev_t dev;
vfs_file_handle_t *handle;
struct vfs_subfsal_obj_ops *sub_ops; /*< Optional subfsal ops */
const struct fsal_up_vector *up_ops; /*< Upcall operations */
union {
struct {
struct fsal_share share;
struct vfs_fd fd;
} file;
struct {
unsigned char *link_content;
int link_size;
} symlink;
struct {
vfs_file_handle_t *dir;
char *name;
} unopenable;
} u;
};
例如MDCACHE对应的Object handle:
struct mdcache_fsal_obj_handle {
struct fsal_obj_handle obj_handle; //在fsal_obj_handle基础上扩充
struct fsal_obj_handle *sub_handle; //mdcache的下层是更加实际的fsal,如VFS的handle
...
}
4. 不同level的ops函数指针
FSAL module的ops
struct fsal_ops
,FSAL module级别的ops
struct fsal_ops def_fsal_ops = {
.unload = unload_fsal,
.init_config = init_config,
.dump_config = dump_config,
.create_export = create_export,
.emergency_cleanup = emergency_cleanup,
.getdeviceinfo = getdeviceinfo,
.fs_da_addr_size = fs_da_addr_size,
.fsal_pnfs_ds = fsal_pnfs_ds,
.fsal_pnfs_ds_ops = fsal_pnfs_ds_ops,
.fsal_extract_stats = fsal_extract_stats,
.fsal_reset_stats = fsal_reset_stats,
};
4.2 export的ops
struct export_ops
,文件系统级级别的ops
void vfs_export_ops_init(struct export_ops *ops)
{
ops->release = release;
ops->lookup_path = vfs_lookup_path;
ops->wire_to_host = wire_to_host;
ops->create_handle = vfs_create_handle;
ops->get_fs_dynamic_info = get_dynamic_info;
ops->get_quota = get_quota;
ops->set_quota = set_quota;
ops->alloc_state = vfs_alloc_state;
ops->free_state = vfs_free_state;
}
4.3 obj的ops
struct fsal_obj_ops
,文件对象层的ops。
void vfs_handle_ops_init(struct fsal_obj_ops *ops)
{
fsal_default_obj_ops_init(ops);
ops->release = release;
ops->merge = vfs_merge;
ops->lookup = lookup;
ops->readdir = read_dirents;
ops->mkdir = makedir;
ops->mknode = makenode;
ops->symlink = makesymlink;
ops->readlink = readsymlink;
ops->getattrs = vfs_getattr2;
ops->link = linkfile;
ops->rename = renamefile;
ops->unlink = file_unlink;
ops->close = vfs_close;
ops->handle_to_wire = handle_to_wire;
ops->handle_to_key = handle_to_key;
ops->open2 = vfs_open2;
ops->reopen2 = vfs_reopen2;
ops->read2 = vfs_read2;
ops->write2 = vfs_write2;
ops->commit2 = vfs_commit2;
ops->list_ext_attrs = vfs_list_ext_attrs;
ops->getextattr_id_by_name = vfs_getextattr_id_by_name;
ops->getextattr_value_by_name = vfs_getextattr_value_by_name;
ops->getextattr_value_by_id = vfs_getextattr_value_by_id;
ops->setextattr_value = vfs_setextattr_value;
ops->setextattr_value_by_id = vfs_setextattr_value_by_id;
ops->remove_extattr_by_id = vfs_remove_extattr_by_id;
ops->remove_extattr_by_name = vfs_remove_extattr_by_name;
}
5. MDCACHE和其他FSAL的交互
MDCACHE的export位于其他FSAL的上层。
mdcache_read2()
{
...
subcall(
entry->sub_handle->obj_ops->read2(entry->sub_handle, bypass,
mdc_read_cb, read_arg, arg)
);
}
6. IO处理线程的op_ctx
每个IO处理线程有个线程变量op_ctx
,指向nfs_rpc_process_request()
下的req_ctx局部变量。
__thread struct req_op_context *op_ctx;
会在函数nfs_rpc_process_request
的开头设置op_ctx
,并在结尾清理op_ctx
在init_root_op_context
初始化op_ctx
在release_root_op_context
恢复op_ctx
nfs4_mds_putfh()
设置op_ctx->ctx_export
和op_ctx->fsal_export
在所有的IO处理线程中,可以方便的从op_ctx
得到export信息。
那么op_ctx都存了些什么东西呢,
struct req_op_context {
struct user_cred *creds; /*< resolved user creds from request */
struct user_cred original_creds; /*< Saved creds */
struct group_data *caller_gdata;
gid_t *caller_garray_copy; /*< Copied garray from AUTH_SYS */
gid_t *managed_garray_copy; /*< Copied garray from managed gids */
int cred_flags; /* Various cred flags */
sockaddr_t *caller_addr; //IP connection info
const uint64_t *clientid; //Client ID
uint32_t nfs_vers;
uint32_t nfs_minorvers;
uint32_t req_type; /*< request_type NFS | 9P */
struct gsh_client *client; //client host info,将TCP,UDP,9P的连接都隐藏在内
struct gsh_export *ctx_export;//指向gsh_export
struct fsal_export *fsal_export;//current export ,一般是DCACHE的export
struct export_perms *export_perms; /*< Effective export perms */
nsecs_elapsed_t start_time; //start time of this op/request
nsecs_elapsed_t queue_wait; //time in wait queue
void *fsal_private; /*< private for FSAL use */
struct fsal_module *fsal_module; //fsal module,一般是DCACHE
struct fsal_pnfs_ds *fsal_pnfs_ds; //pnfs相关
};
7. Compound中Operation的处理函数
static const struct nfs4_op_desc optabv4[] = {
[0] = { /* all out of bounds illegals go here to die */
.name = "OP_ILLEGAL",
.funct = nfs4_op_illegal,
.free_res = nfs4_op_illegal_Free,
.resp_size = sizeof(ILLEGAL4res),
.exp_perm_flags = 0},
[1] = {
.name = "OP_ILLEGAL",
.funct = nfs4_op_illegal,
.free_res = nfs4_op_illegal_Free,
.resp_size = sizeof(ILLEGAL4res),
.exp_perm_flags = 0},
[2] = {
.name = "OP_ILLEGAL",
.funct = nfs4_op_illegal,
.free_res = nfs4_op_illegal_Free,
.resp_size = sizeof(ILLEGAL4res),
.exp_perm_flags = 0},
[NFS4_OP_ACCESS] = {
.name = "OP_ACCESS",
.funct = nfs4_op_access,
.free_res = nfs4_op_access_Free,
.resp_size = sizeof(ACCESS4res),
.exp_perm_flags = EXPORT_OPTION_MD_READ_ACCESS},
[NFS4_OP_CLOSE] = {
.name = "OP_CLOSE",
.funct = nfs4_op_close,
.free_res = nfs4_op_close_Free,
.resp_size = sizeof(CLOSE4res),
.exp_perm_flags = EXPORT_OPTION_MD_READ_ACCESS},
[NFS4_OP_COMMIT] = {
.name = "OP_COMMIT",
.funct = nfs4_op_commit,
.free_res = nfs4_op_commit_Free,
.resp_size = sizeof(COMMIT4res),
.exp_perm_flags = EXPORT_OPTION_MD_WRITE_ACCESS}
//...
}
网友评论