ovs qos

作者: 分享放大价值 | 来源:发表于2021-03-25 21:13 被阅读0次

    ovs支持qos功能,其根据datapath类型调用不同的接口,对于kernel datapath,qos使用kernel提供的tc模块实现了入方向限速和出方向流量整型。对于userspace datapath,qos使用了dpdk提供的qos lib实现比较简单的入方向和出方向限速。

    kernel qos

    kernel的tc提供了多种调度机制,但是ovs目前只支持如下两种:htb和hfsc,更多qos参数可参考ovs db中qos和queue的定义

    image.png
    也可以参考这篇文章,专门讲述qos。

    userspace qos

    先看一下代码中配置qos的地方,配置这部分是不区分哪种qos的,适用于kernel和userspace qos,区别是根据类型调用不同的api。

    userspace qos配置的例子可以参数官网

    不管哪种qos,配置都会下到最底层接口收发包模块,不会在datapath或者slow path中。

    遍历所有bridge上所有port下的interface,执行iface_configure_qos

    static void
    bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
        HMAP_FOR_EACH (br, node, &all_bridges) {
            struct port *port;
    
            /* We need the datapath ID early to allow LACP ports to use it as the
             * default system ID. */
            bridge_configure_datapath_id(br);
    
            HMAP_FOR_EACH (port, hmap_node, &br->ports) {
                struct iface *iface;
    
                LIST_FOR_EACH (iface, port_elem, &port->ifaces) {
                    ...
                    iface_configure_qos(iface, port->cfg->qos);
                    ...
                }
            }
            ...
        }
    

    配置接口的qos,入方向,出方向,队列上的配置都在此函数完成。对于usespace qos来说,调用dpdk提供的接口,入方向qos配置最终调用 netdev_dpdk_set_policing,出方向qos配置调用 netdev_dpdk_set_qos

    static void
    iface_configure_qos(struct iface *iface, const struct ovsrec_qos *qos)
    {
        struct ofpbuf queues_buf;
    
        ofpbuf_init(&queues_buf, 0);
    
        if (!qos || qos->type[0] == '\0') {
            netdev_set_qos(iface->netdev, NULL, NULL);
        } else {
            const struct ovsdb_datum *queues;
            struct netdev_queue_dump dump;
            unsigned int queue_id;
            struct smap details;
            bool queue_zero;
            size_t i;
    
            /* Configure top-level Qos for 'iface'. */
            //class->set_qos(netdev, type, details) -- netdev_dpdk_set_qos
            netdev_set_qos(iface->netdev, qos->type, &qos->other_config);
    
            /* Deconfigure queues that were deleted. */
            queues = ovsrec_qos_get_queues(qos, OVSDB_TYPE_INTEGER, OVSDB_TYPE_UUID);
            smap_init(&details);
            NETDEV_QUEUE_FOR_EACH (&queue_id, &details, &dump, iface->netdev) {
                if (!queue_ids_include(queues, queue_id)) {
                    netdev_delete_queue(iface->netdev, queue_id);
                }
            }
            smap_destroy(&details);
    
            /* Configure queues for 'iface'. */
            queue_zero = false;
            for (i = 0; i < qos->n_queues; i++) {
                const struct ovsrec_queue *queue = qos->value_queues[i];
                queue_id = qos->key_queues[i];
    
                if (queue_id == 0) {
                    queue_zero = true;
                }
    
                if (queue->n_dscp == 1) {
                    struct ofproto_port_queue *port_queue;
    
                    port_queue = ofpbuf_put_uninit(&queues_buf,
                                                   sizeof *port_queue);
                    port_queue->queue = queue_id;
                    port_queue->dscp = queue->dscp[0];
                }
    
                //class->set_queue(netdev, queue_id, details)
                netdev_set_queue(iface->netdev, queue_id, &queue->other_config);
            }
            if (!queue_zero) {
                smap_init(&details);
                netdev_set_queue(iface->netdev, 0, &details);
                smap_destroy(&details);
            }
        }
    
        if (iface->ofp_port != OFPP_NONE) {
            const struct ofproto_port_queue *port_queues = queues_buf.data;
            size_t n_queues = queues_buf.size / sizeof *port_queues;
    
            ofproto_port_set_queues(iface->port->bridge->ofproto, iface->ofp_port,
                                    port_queues, n_queues);
        }
    
        //netdev->netdev_class->set_policing(netdev, kbits_rate, kbits_burst) -- netdev_dpdk_set_qos
        netdev_set_policing(iface->netdev,
                            MIN(UINT32_MAX, iface->cfg->ingress_policing_rate),
                            MIN(UINT32_MAX, iface->cfg->ingress_policing_burst));
    
        ofpbuf_uninit(&queues_buf);
    }
    

    入方向设置 netdev_dpdk_set_policing

    static int
    netdev_dpdk_set_policing(struct netdev* netdev, uint32_t policer_rate,
                             uint32_t policer_burst)
    {
        struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
        struct ingress_policer *policer;
    
        /* Force to 0 if no rate specified,
         * default to 8000 kbits if burst is 0,
         * else stick with user-specified value.
         */
        policer_burst = (!policer_rate ? 0
                         : !policer_burst ? 8000
                         : policer_burst);
    
        ovs_mutex_lock(&dev->mutex);
    
        policer = ovsrcu_get_protected(struct ingress_policer *,
                                        &dev->ingress_policer);
    
        if (dev->policer_rate == policer_rate &&
            dev->policer_burst == policer_burst) {
            /* Assume that settings haven't changed since we last set them. */
            ovs_mutex_unlock(&dev->mutex);
            return 0;
        }
    
        /* Destroy any existing ingress policer for the device if one exists */
        if (policer) {
            ovsrcu_postpone(free, policer);
        }
    
        if (policer_rate != 0) {
            //函数内部调用dpdk lib提供的api进行配置,返回policer
            policer = netdev_dpdk_policer_construct(policer_rate, policer_burst);
        } else {
            policer = NULL;
        }
        //将policer赋值到接口设备 dev->ingress_policer, 从此接口收包后,会使用policer测速。
        ovsrcu_set(&dev->ingress_policer, policer);
        dev->policer_rate = policer_rate;
        dev->policer_burst = policer_burst;
        ovs_mutex_unlock(&dev->mutex);
    
        return 0;
    }
    //rte_meter_srtcm_config 为dpdk提供的api
    static struct ingress_policer *
    netdev_dpdk_policer_construct(uint32_t rate, uint32_t burst)
    {
        struct ingress_policer *policer = NULL;
        uint64_t rate_bytes;
        uint64_t burst_bytes;
        int err = 0;
    
        policer = xmalloc(sizeof *policer);
        rte_spinlock_init(&policer->policer_lock);
    
        /* rte_meter requires bytes so convert kbits rate and burst to bytes. */
        rate_bytes = rate * 1000ULL / 8;
        burst_bytes = burst * 1000ULL / 8;
    
        policer->app_srtcm_params.cir = rate_bytes;
        policer->app_srtcm_params.cbs = burst_bytes;
        policer->app_srtcm_params.ebs = 0;
        err = rte_meter_srtcm_config(&policer->in_policer,
                                        &policer->app_srtcm_params);
        if (err) {
            VLOG_ERR("Could not create rte meter for ingress policer");
            free(policer);
            return NULL;
        }
    
        return policer;
    }
    

    出方向设置 netdev_dpdk_set_qos
    当前看的是2.8.2版本的ovs,目前只支持一种出方向限速egress_policer_ops 。最终也是调用dpdk的lib api rte_meter_srtcm_config进行配置,和入方向配置使用的函数相同。

    static const struct dpdk_qos_ops egress_policer_ops = {
        "egress-policer",    /* qos_name */
        egress_policer_qos_construct,
        egress_policer_qos_destruct,
        egress_policer_qos_get,
        egress_policer_qos_is_equal,
        egress_policer_run
    };
    
    static int
    netdev_dpdk_set_qos(struct netdev *netdev, const char *type,
                        const struct smap *details)
    {
        struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
        const struct dpdk_qos_ops *new_ops = NULL;
        struct qos_conf *qos_conf, *new_qos_conf = NULL;
        int error = 0;
    
        ovs_mutex_lock(&dev->mutex);
    
        qos_conf = ovsrcu_get_protected(struct qos_conf *, &dev->qos_conf);
    
        //目前只有一种:egress_policer_ops,name为egress-policer
        new_ops = qos_lookup_name(type);
    
        if (!new_ops || !new_ops->qos_construct) {
            new_qos_conf = NULL;
            if (type && type[0]) {
                error = EOPNOTSUPP;
            }
        } else if (qos_conf && qos_conf->ops == new_ops
                   && qos_conf->ops->qos_is_equal(qos_conf, details)) {
            new_qos_conf = qos_conf;
        } else {
            //egress_policer_qos_construct
            error = new_ops->qos_construct(details, &new_qos_conf);
        }
    
        if (error) {
            VLOG_ERR("Failed to set QoS type %s on port %s: %s",
                     type, netdev->name, rte_strerror(error));
        }
    
        if (new_qos_conf != qos_conf) {
            ovsrcu_set(&dev->qos_conf, new_qos_conf);
            if (qos_conf) {
                ovsrcu_postpone(qos_conf->ops->qos_destruct, qos_conf);
            }
        }
    
        ovs_mutex_unlock(&dev->mutex);
    
        return error;
    }
    
    static int
    egress_policer_qos_construct(const struct smap *details,
                                 struct qos_conf **conf)
    {
        struct egress_policer *policer;
        int err = 0;
    
        policer = xmalloc(sizeof *policer);
        qos_conf_init(&policer->qos_conf, &egress_policer_ops);
        //解析配置
        egress_policer_details_to_param(details, &policer->app_srtcm_params);
        err = rte_meter_srtcm_config(&policer->egress_meter,
                                     &policer->app_srtcm_params);
        if (!err) {
            *conf = &policer->qos_conf;
        } else {
            free(policer);
            *conf = NULL;
            err = -err;
        }
    
        return err;
    }
    

    报文处理

    入方向qos处理
    ovs dpdk中收包函数有两种,一种是vhostuser类型,调用netdev_dpdk_vhost_rxq_recv,一种是物理网卡类型的,调用netdev_dpdk_rxq_recv

    netdev_dpdk_vhost_rxq_recv
    netdev_dpdk_rxq_recv
        struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq);
        struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
        //从接口设备取出policer
        struct ingress_policer *policer = netdev_dpdk_get_ingress_policer(dev);
        int nb_rx;
        int dropped = 0;
        //调用dpdk函数rte_eth_rx_burst收包
        nb_rx = rte_eth_rx_burst(rx->port_id, rxq->queue_id,
                                 (struct rte_mbuf **) batch->packets,
                                 NETDEV_MAX_BURST);
        //如果policer不为空,说明配置了qos,开始处理报文。              
        if (policer) {
            dropped = nb_rx;
            nb_rx = ingress_policer_run(policer, (struct rte_mbuf **) batch->packets, nb_rx);
                rte_spinlock_lock(&policer->policer_lock);
                cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts, pkt_cnt);
                    int i = 0;
                    int cnt = 0;
                    struct rte_mbuf *pkt = NULL;
                    uint64_t current_time = rte_rdtsc();
    
                    for (i = 0; i < pkt_cnt; i++) {
                        pkt = pkts[i];
                        /* Handle current packet */
                        if (netdev_dpdk_policer_pkt_handle(meter, pkt, current_time)) {
                            if (cnt != i) {
                                pkts[cnt] = pkt;
                            }
                            cnt++;
                        } else {
                            rte_pktmbuf_free(pkt);
                        }
                    }
    
                    return cnt;
                rte_spinlock_unlock(&policer->policer_lock);
            dropped -= nb_rx;
        }
    //调用dpdk函数rte_meter_srtcm_color_blind_check对每个报文进行着色处理,绿色表示允许报文通过,红色和黄色表示报文超速,需要丢弃。
    static inline bool
    netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,
                                   struct rte_mbuf *pkt, uint64_t time)
    {
        uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct ether_hdr);
    
        return rte_meter_srtcm_color_blind_check(meter, time, pkt_len) ==
                                                    e_RTE_METER_GREEN;
    }
    

    出方向qos处理
    出方向处理也是调用rte_meter_srtcm_color_blind_check对报文着色,根据颜色类型判断是否丢包。

    netdev_dpdk_send__
    __netdev_dpdk_vhost_send
        cnt = netdev_dpdk_qos_run(dev, pkts, cnt);
            struct qos_conf *qos_conf = ovsrcu_get(struct qos_conf *, &dev->qos_conf);
    
            if (qos_conf) {
                rte_spinlock_lock(&qos_conf->lock);
                //egress_policer_run
                cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt);
                    int cnt = 0;
                    struct egress_policer *policer =
                        CONTAINER_OF(conf, struct egress_policer, qos_conf);
    
                    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts, pkt_cnt);
                        int i = 0;
                        int cnt = 0;
                        struct rte_mbuf *pkt = NULL;
                        uint64_t current_time = rte_rdtsc();
    
                        for (i = 0; i < pkt_cnt; i++) {
                            pkt = pkts[i];
                            /* Handle current packet */
                            if (netdev_dpdk_policer_pkt_handle(meter, pkt, current_time)) {
                                if (cnt != i) {
                                    pkts[cnt] = pkt;
                                }
                                cnt++;
                            } else {
                                rte_pktmbuf_free(pkt);
                            }
                        }
    
                        return cnt;
                    return cnt;
                rte_spinlock_unlock(&qos_conf->lock);
            }
    
            return cnt;
    

    相关文章

      网友评论

          本文标题:ovs qos

          本文链接:https://www.haomeiwen.com/subject/jkqjcltx.html