/*
* Library interface to io_uring
*/
struct io_uring_sq {
unsigned *khead;
unsigned *ktail;
unsigned *kring_mask;
unsigned *kring_entries;
unsigned *kflags;
unsigned *kdropped;
unsigned *array;
struct io_uring_sqe *sqes;
unsigned sqe_head;
unsigned sqe_tail;
size_t ring_sz;
void *ring_ptr;
unsigned pad[4];
};
struct io_uring_cq {
unsigned *khead;
unsigned *ktail;
unsigned *kring_mask;
unsigned *kring_entries;
unsigned *kflags;
unsigned *koverflow;
struct io_uring_cqe *cqes;
size_t ring_sz;
void *ring_ptr;
unsigned pad[4];
};
struct io_uring {
struct io_uring_sq sq;
struct io_uring_cq cq;
unsigned flags;
int ring_fd;
unsigned features;
unsigned pad[3];
};
/*
* Library interface
*/
extern struct io_uring_probe *io_uring_get_probe_ring(struct io_uring *ring);
extern struct io_uring_probe *io_uring_get_probe(void);
extern void io_uring_free_probe(struct io_uring_probe *probe);
static inline int io_uring_opcode_supported(struct io_uring_probe *p, int op);
extern int io_uring_queue_init_params(unsigned entries, struct io_uring *ring, struct io_uring_params *p);
extern int io_uring_queue_init(unsigned entries, struct io_uring *ring, unsigned flags); //初始化struct io_uring,函数内部会调用io_uring_queue_init_params。io_uring 是一个循环队列(ring_buffer)。第一个参数 entries 表示队列大小(实际空间可能比用户指定的大);第二个参数 ring 就是需要初始化的 io_uring 结构指针;第三个参数 flags 是标志参数,无特殊需要传 0 即可。
extern int io_uring_queue_mmap(int fd, struct io_uring_params *p, struct io_uring *ring);
extern int io_uring_ring_dontfork(struct io_uring *ring);
extern void io_uring_queue_exit(struct io_uring *ring);
unsigned io_uring_peek_batch_cqe(struct io_uring *ring, struct io_uring_cqe **cqes, unsigned count);
extern int io_uring_wait_cqes(struct io_uring *ring, struct io_uring_cqe **cqe_ptr, unsigned wait_nr, struct __kernel_timespec *ts, sigset_t *sigmask);
extern int io_uring_wait_cqe_timeout(struct io_uring *ring, struct io_uring_cqe **cqe_ptr, struct __kernel_timespec *ts);
extern int io_uring_submit(struct io_uring *ring);
extern int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr);
extern struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring);
extern int io_uring_register_buffers(struct io_uring *ring, const struct iovec *iovecs, unsigned nr_iovecs);
extern int io_uring_unregister_buffers(struct io_uring *ring);
extern int io_uring_register_files(struct io_uring *ring, const int *files, unsigned nr_files);
extern int io_uring_unregister_files(struct io_uring *ring);
extern int io_uring_register_files_update(struct io_uring *ring, unsigned off, int *files, unsigned nr_files);
extern int io_uring_register_eventfd(struct io_uring *ring, int fd);
extern int io_uring_register_eventfd_async(struct io_uring *ring, int fd);
extern int io_uring_unregister_eventfd(struct io_uring *ring);
extern int io_uring_register_probe(struct io_uring *ring, struct io_uring_probe *p, unsigned nr);
extern int io_uring_register_personality(struct io_uring *ring);
extern int io_uring_unregister_personality(struct io_uring *ring, int id);
extern int io_uring_register_restrictions(struct io_uring *ring, struct io_uring_restriction *res, unsigned int nr_res);
extern int io_uring_enable_rings(struct io_uring *ring);
extern int __io_uring_sqring_wait(struct io_uring *ring);
extern int __io_uring_get_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr, unsigned submit, unsigned wait_nr, sigset_t *sigmask);
#define io_uring_for_each_cqe(ring, head, cqe)
static inline void io_uring_cq_advance(struct io_uring *ring, unsigned nr)
static inline void io_uring_cqe_seen(struct io_uring *ring, struct io_uring_cqe *cqe)
static inline void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data)
static inline void *io_uring_cqe_get_data(const struct io_uring_cqe *cqe)
static inline void io_uring_sqe_set_flags(struct io_uring_sqe *sqe,
static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd, const void *addr, unsigned len, __u64 offset)
static inline void io_uring_prep_splice(struct io_uring_sqe *sqe, int fd_in, int64_t off_in, int fd_out, int64_t off_out, unsigned int nbytes, unsigned int splice_flags)
static inline void io_uring_prep_tee(struct io_uring_sqe *sqe, int fd_in, int fd_out, unsigned int nbytes, unsigned int splice_flags)
io_uring使用
1、首先需要创建io_uring struct
2、然后使用io_uring_queue_init(unsigned entries, struct io_uring *ring, unsigned flags);初始化io_uring struct,io_uring_queue_init函数内部会调用函数内部会调用io_uring_queue_init_params。
io_uring struct在liburing中定义如下
struct io_uring {
struct io_uring_sq sq;
struct io_uring_cq cq;
unsigned flags;
int ring_fd;
unsigned features;
unsigned pad[3];
};
3、获取并sqe struct
一个 sqe(submission queue entry)代表一次 IO 请求,占用循环队列一个空位。io_uring 队列满时 io_uring_get_sqe 会返回 NULL,注意错误处理。注意这里的队列是指未提交的请求,已提交的(但未完成)请求不占位置。
struct io_uring_sqe *sqe = io_uring_get_sqe(&ring);
4、初始化sqe struct
io_uring_prep_readv 或 io_uring_prep_writev 初始化 sqe struct。
io_uring_prep_readv(struct io_uring_sqe *sqe, int fd, const struct iovec *iovecs, unsigned nr_vecs, off_t offset)会调用io_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset)
io_uring_prep_rw的定义
static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
const void *addr, unsigned len,
__u64 offset)
{
sqe->opcode = op;
sqe->flags = 0;
sqe->ioprio = 0;
sqe->fd = fd;
sqe->off = offset;
sqe->addr = (unsigned long) addr;
sqe->len = len;
sqe->rw_flags = 0;
sqe->user_data = 0;
sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0;
}
io_uring_prep_writev(struct io_uring_sqe *sqe, int fd, const struct iovec *iovecs, unsigned nr_vecs, off_t offset)会调用io_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset)
io_uring_prep_rw的定义
static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
const void *addr, unsigned len,
__u64 offset)
{
sqe->opcode = op;
sqe->flags = 0;
sqe->ioprio = 0;
sqe->fd = fd;
sqe->off = offset;
sqe->addr = (unsigned long) addr;
sqe->len = len;
sqe->rw_flags = 0;
sqe->user_data = 0;
sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0;
}
sqe struct的定义
/*
* IO submission data structure (Submission Queue Entry)
*/
struct io_uring_sqe {
__u8 opcode; /* type of operation for this sqe */
__u8 flags; /* IOSQE_ flags */
__u16 ioprio; /* ioprio for the request */
__s32 fd; /* file descriptor to do IO on */
union {
__u64 off; /* offset into file */
__u64 addr2;
};
union {
__u64 addr; /* pointer to buffer or iovecs */
__u64 splice_off_in;
};
__u32 len; /* buffer size or number of iovecs */
union {
__kernel_rwf_t rw_flags;
__u32 fsync_flags;
__u16 poll_events; /* compatibility */
__u32 poll32_events; /* word-reversed for BE */
__u32 sync_range_flags;
__u32 msg_flags;
__u32 timeout_flags;
__u32 accept_flags;
__u32 cancel_flags;
__u32 open_flags;
__u32 statx_flags;
__u32 fadvise_advice;
__u32 splice_flags;
__u32 rename_flags;
__u32 unlink_flags;
};
__u64 user_data; /* data to be passed back at completion time */
union {
struct {
/* pack this to avoid bogus arm OABI complaints */
union {
/* index into fixed buffers, if used */
__u16 buf_index;
/* for grouped buffer selection */
__u16 buf_group;
} __attribute__((packed));
/* personality to use, if used */
__u16 personality;
__s32 splice_fd_in;
};
__u64 __pad2[3];
};
};
5、提交sqe(也就是提交IO)
/*
* Submit sqes acquired from io_uring_get_sqe() to the kernel.
*
* Returns number of sqes submitted
*/
int io_uring_submit(struct io_uring *ring)
{
return __io_uring_submit_and_wait(ring, 0);
}
static int __io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr)
{
return __io_uring_submit(ring, __io_uring_flush_sq(ring), wait_nr);
}
/*
* Submit sqes acquired from io_uring_get_sqe() to the kernel.
*
* Returns number of sqes submitted
*/
static int __io_uring_submit(struct io_uring *ring, unsigned submitted,
unsigned wait_nr)
{
unsigned flags;
int ret;
flags = 0;
if (sq_ring_needs_enter(ring, &flags) || wait_nr) {
if (wait_nr || (ring->flags & IORING_SETUP_IOPOLL))
flags |= IORING_ENTER_GETEVENTS;
ret = __sys_io_uring_enter(ring->ring_fd, submitted, wait_nr,
flags, NULL);
if (ret < 0)
return -errno;
} else
ret = submitted;
return ret;
}
int __sys_io_uring_enter(int fd, unsigned to_submit, unsigned min_complete,
unsigned flags, sigset_t *sig)
{
return __sys_io_uring_enter2(fd, to_submit, min_complete, flags, sig,
_NSIG / 8);
}
int __sys_io_uring_enter2(int fd, unsigned to_submit, unsigned min_complete,
unsigned flags, sigset_t *sig, int sz)
{
return syscall(__NR_io_uring_enter, fd, to_submit, min_complete,
flags, sig, sz);
}
6、提交sqe后,我们只需要等待io完成
鸣谢:
https://segmentfault.com/a/1190000019300089
demo:
fio中的代码 https://github.com/lnsyyj/Codes/blob/master/iouring/io_uring-test.c
网友评论