本文主要关注中断从硬件如何上报到用户态、用户态开关中断相关处理。
本文基于hns3 PMD driver,选取DPDK的Rx interrupt特性,使用DPDK工程中的l3fwd-power测试程序来进行说明。
一. 简短概括:
熟悉DPDK的小伙伴可以直接看本章节如下总结部分,如果需要详细细节,可以进一步查看第二节。
1)用户态执行设备驱动的.dev_start函数时,为网卡的每个硬件队列创建一个eventfd,使用epoll来关联监听所有的eventfd;
2)当网卡硬件中断上报cpu,内核态vfio-pci驱动进行中断处理,在中断处理函数中调用eventfd_signal向用户态上报事件信息;
3)在用户态,当通过epoll_wait监听到有事件发生后,通过read从相应的eventfd读取相关事件,应用程序以此可以进一步做相关处理。
二. 详细处理:
以hns3 PMD driver为例,说明网卡PMD驱动部分处理:
1. 驱动初始化及.dev_start函数:
(.dev_init钩子函数)hns3_dev_init -> hns3_init_pf
static int
hns3_init_pf(struct rte_eth_dev *eth_dev)
{
<snip>
// 注册vector0相关中断处理函数,使能vector0中断
hns3_clear_all_event_cause(hw);
<snip>
ret = rte_intr_callback_register(&pci_dev->intr_handle,
hns3_interrupt_handler,
eth_dev);
if (ret) {
PMD_INIT_LOG(ERR, "Failed to register intr: %d", ret);
goto err_intr_callback_register;
}
/* Enable interrupt */
rte_intr_enable(&pci_dev->intr_handle);
hns3_pf_enable_irq0(hw);
<snip>
}
.dev_start函数
(.dev_start钩子函数)hns3_dev_start -> hns3_map_rx_interrupt
static int
hns3_map_rx_interrupt(struct rte_eth_dev *dev)
{
<snip>
/* disable uio/vfio intr/eventfd mapping */
rte_intr_disable(intr_handle);
<snip>
intr_vector = hw->used_rx_queues;
/* creates event fd for each intr vector when MSIX is used */
if (rte_intr_efd_enable(intr_handle, intr_vector))
return -EINVAL;
}
for (q_id = 0; q_id < hw->used_rx_queues; q_id++) {
ret = hns3_bind_ring_with_vector(hw, vec, true,,);
<snip>
}
rte_intr_enable(intr_handle);
<snip>
}
DPDK相关API实现:
int
rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd)
{
uint32_t n = RTE_MIN(nb_efd,(uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
<snip>
if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX) {
for (i = 0; i < n; i++) {
fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
<snip>
intr_handle->efds[i] = fd;
}
}
<snip>
}
rte_intr_enable -> vfio_enable_msix
/* enable MSI-X interrupts */
static int
vfio_enable_msix(const struct rte_intr_handle *intr_handle) {
<snip>
irq_set = (struct vfio_irq_set *) irq_set_buf;
irq_set->argsz = len;
/* 0 < irq_set->count < RTE_MAX_RXTX_INTR_VEC_ID + 1 */
irq_set->count = intr_handle->max_intr ?
(intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID + 1 ?
RTE_MAX_RXTX_INTR_VEC_ID + 1 : intr_handle->max_intr) : 1;
irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
VFIO_IRQ_SET_ACTION_TRIGGER;
irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
irq_set->start = 0;
fd_ptr = (int *) &irq_set->data;
/* INTR vector offset 0 reserve for non-efds mapping */
fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = intr_handle->fd;
memcpy(&fd_ptr[RTE_INTR_VEC_RXTX_OFFSET], intr_handle->efds,
sizeof(*intr_handle->efds) * intr_handle->nb_efd);
ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS,
irq_set);
<snip>
}
2. l3fwd-power中的用法
收发包处理:
/* main processing loop */
static int
main_loop(__rte_unused void *dummy)
{
<snip>
/* add into event wait list */
if (event_register(qconf) == 0)
intr_en = 1;
<snip>
while (1) {
<snip>
start_rx:
<snip>
//打开网卡硬件中断
turn_on_off_intr(qconf, 1);
sleep_until_rx_interrupt(qconf->n_rx_queue);
//关闭网卡硬件中断
turn_on_off_intr(qconf, 0);
/*start receiving packets immediately*/
goto start_rx;
}
}
static int event_register(struct lcore_conf *qconf)
{
<snip>
for (i = 0; i < qconf->n_rx_queue; ++i) {
rx_queue = &(qconf->rx_queue_list[i]);
portid = rx_queue->port_id;
queueid = rx_queue->queue_id;
data = portid << CHAR_BIT | queueid;
ret = rte_eth_dev_rx_intr_ctl_q(portid, queueid,
RTE_EPOLL_PER_THREAD, RTE_INTR_EVENT_ADD,
(void*)((uintptr_t)data));
if (ret)
return ret;
}
return 0;
}
static void turn_on_off_intr(struct lcore_conf *qconf, bool on)
{
int i;
struct lcore_rx_queue *rx_queue;
uint8_t queue_id;
uint16_t port_id;
for (i = 0; i < qconf->n_rx_queue; ++i) {
rx_queue = &(qconf->rx_queue_list[i]);
port_id = rx_queue->port_id;
queue_id = rx_queue->queue_id;
rte_spinlock_lock(&(locks[port_id]));
if (on)
rte_eth_dev_rx_intr_enable(port_id, queue_id);
else
rte_eth_dev_rx_intr_disable(port_id, queue_id);
rte_spinlock_unlock(&(locks[port_id]));
}
}
/**
* force polling thread sleep until one-shot rx interrupt triggers
*/
static int
sleep_until_rx_interrupt(int num)
{
<snip>
n = rte_epoll_wait(RTE_EPOLL_PER_THREAD, event, num, -1);
for (i = 0; i < n; i++) {
data = event[i].epdata.data;
port_id = ((uintptr_t)data) >> CHAR_BIT;
queue_id = ((uintptr_t)data) &
RTE_LEN2MASK(CHAR_BIT, uint8_t);
RTE_LOG(INFO, L3FWD_POWER,
"lcore %u is waked up from rx interrupt on"
" port %d queue %d\n",
rte_lcore_id(), port_id, queue_id);
}
return 0;
}
DPDK相关API实现:
rte_eth_dev_rx_intr_ctl_q -> rte_intr_rx_ctl
eal_intr_proc_rxtx_intr -> read
rte_epoll_ctl -> epoll_ctl
rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, int epfd,
int op, unsigned int vec, void *data)
{
<snip>
switch (op) {
case RTE_INTR_EVENT_ADD:
epfd_op = EPOLL_CTL_ADD;
rev = &intr_handle->elist[efd_idx];
<snip>
/* attach to intr vector fd */
epdata = &rev->epdata;
epdata->event = EPOLLIN | EPOLLPRI | EPOLLET;
epdata->data = data;
epdata->cb_fun =
(rte_intr_event_cb_t)eal_intr_proc_rxtx_intr;
epdata->cb_arg = (void *)intr_handle;
rc = rte_epoll_ctl(epfd, epfd_op,
intr_handle->efds[efd_idx], rev);
<snip>
}
<snip>
}
3.内核态vfio-pci处理
略
网友评论