BFD(BidirectionalForwardingDetection)双向转发检测,用于快速检测、监控网络中链路或者IP路由的转发连通状况,保证邻居之间能够快速检测到通信故障,从而快速建立起备用通道恢复通信。
bfd协议详细信息可以参考RFC 5880,也可以参考这篇文章,这里只讲述在ovs中bfd的实现。
ovs支持的bfd的参数可从ovsdb定义查到,如下所示,每个字段有详细的解释。
下面挑几个重要的字段翻译一下
bfd:enable bfd是设置在interface上的,设置为true使能bfd,默认是关闭bfd的。
bfd:min_rx 本端bfd session期望接收bfd报文的最小间隔,单位毫秒,默认值1000。
bfd:min_tx 本端bfd session期望发送bfd报文的最小间隔,单位毫秒,默认值100。
bfd:bfd_local_src_mac 本端发送bfd报文的源mac地址,如果不指定,则默认使用interface的mac地址。
bfd:bfd_local_dst_mac 本端发送bfd报文的目的mac地址,如果不指定,则默认使用00:23:20:00:00:01。
bfd:bfd_remote_dst_mac 用于检测接收到的对端发送bfd报文的目的mac。如果配置了,则目的mac必须是配置的值,如果没配置,则不做检查。
bfd:bfd_src_ip 本端发送bfd报文的源ip,如果不指定,则默认使用169.254.1.1。
bfd:bfd_dst_ip 本端发送bfd报文的目的ip,如果不指定,则默认使用169.254.1.0。
bfd配置
使用如下命令使能bfd,查看bfd配置,查看bfd状态,抓取bfd报文
//查看当前ovs的配置
root@master:~# ovs-vsctl show
163a03bf-8b1b-4043-8d37-8b2287bf94fe
Bridge "br1"
Port "br1"
Interface "br1"
type: internal
Port "ens8"
Interface "ens8"
//在ens8接口上使能bfd
root@master:~# ovs-vsctl set interface ens8 bfd:enable=true
//查看bfd配置,注意bfd_status不是配置的,而是实时反映interface的状态
root@master:~# ovs-vsctl list interface ens8
_uuid : 1b7b107b-f54e-44a5-85d2-41b2ea337549
admin_state : up
bfd : {enable="true"}
bfd_status : {diagnostic="No Diagnostic", flap_count="0", forwarding="false", remote_diagnostic="No Diagnostic", remote_state=down, state=down}
//查看bfd session协议状态,包含本端和对端情况
root@master:~# ovs-appctl bfd/show
---- ens8 ----
Forwarding: false
Detect Multiplier: 3
Concatenated Path Down: false
TX Interval: Approx 1000ms
RX Interval: Approx 1000ms
Detect Time: now -1168912259ms
Next TX Time: now +478ms
Last TX Time: now -432ms
Local Flags: none
Local Session State: down
Local Diagnostic: No Diagnostic
Local Discriminator: 0x2ca43608
Local Minimum TX Interval: 1000ms
Local Minimum RX Interval: 1000ms
Remote Flags: none
Remote Session State: down
Remote Diagnostic: No Diagnostic
Remote Discriminator: 0x0
Remote Minimum TX Interval: 0ms
Remote Minimum RX Interval: 1ms
Remote Detect Multiplier: 0
//在interface ens8上可抓到bfd报文,源mac为ens8的mac,目的mac,源ip和目的ip都为默认值。
root@master:~# tcpdump -vne -i ens8
tcpdump: listening on ens8, link-type EN10MB (Ethernet), capture size 262144 bytes
10:16:18.348154 52:54:00:9e:98:20 > 00:23:20:00:00:01, ethertype IPv4 (0x0800), length 66: (tos 0x18, ttl 255, id 0, offset 0, flags [none], proto UDP (17), length 52)
169.254.1.1.49154 > 169.254.1.0.3784: BFDv1, length: 24
Control, State Down, Flags: [none], Diagnostic: No Diagnostic (0x00)
Detection Timer Multiplier: 3 (3000 ms Detection time), BFD Length: 24
My Discriminator: 0x2ca43608, Your Discriminator: 0x00000000
Desired min Tx Interval: 1000 ms
Required min Rx Interval: 1000 ms
Required min Echo Interval: 0 ms
下面尝试修改源ip和源mac,查看是否生效
root@master:~# ovs-vsctl set interface ens8 bfd:bfd_src_ip=10.10.10.1
root@master:~# ovs-vsctl set interface ens8 bfd:bfd_local_src_mac=00:00:00:00:00:01
root@master:~# ovs-vsctl list interface ens8
_uuid : 1b7b107b-f54e-44a5-85d2-41b2ea337549
admin_state : up
bfd : {bfd_local_src_mac="00:00:00:00:00:01", bfd_src_ip="10.10.10.1", bfd_src_mac="00:00:00:00:00:01", enable="true"}
bfd_status : {diagnostic="No Diagnostic", flap_count="0", forwarding="false", remote_diagnostic="No Diagnostic", remote_state=down, state=down}
//从抓包内容可看到,源ip和mac已经生效
root@master:~# tcpdump -vne -i ens8
tcpdump: listening on ens8, link-type EN10MB (Ethernet), capture size 262144 bytes
10:26:33.537711 00:00:00:00:00:01 > 00:23:20:00:00:01, ethertype IPv4 (0x0800), length 66: (tos 0x18, ttl 255, id 0, offset 0, flags [none], proto UDP (17), length 52)
10.10.10.1.49154 > 169.254.1.0.3784: BFDv1, length: 24
Control, State Down, Flags: [none], Diagnostic: No Diagnostic (0x00)
Detection Timer Multiplier: 3 (3000 ms Detection time), BFD Length: 24
My Discriminator: 0x2ca43608, Your Discriminator: 0x00000000
Desired min Tx Interval: 1000 ms
Required min Rx Interval: 1000 ms
Required min Echo Interval: 0 ms
将对端bfd也使能后,查看本端bfd状态,Forwarding已经变为true
root@master:~# ovs-appctl bfd/show
---- ens8 ----
Forwarding: true
Detect Multiplier: 3
Concatenated Path Down: false
TX Interval: Approx 1000ms
RX Interval: Approx 1000ms
Detect Time: now +2201ms
Next TX Time: now +276ms
Last TX Time: now -604ms
Local Flags: none
Local Session State: up
Local Diagnostic: No Diagnostic
Local Discriminator: 0x2ca43608
Local Minimum TX Interval: 100ms
Local Minimum RX Interval: 1000ms
Remote Flags: none
Remote Session State: up
Remote Diagnostic: No Diagnostic
Remote Discriminator: 0xb732dbb8
Remote Minimum TX Interval: 100ms
Remote Minimum RX Interval: 1000ms
Remote Detect Multiplier: 3
源码分析
bfd配置
配置bfd流程如下,在bfd_configure中会解析bfd配置。
ofproto_port_set_bfd -> set_bfd -> bfd_configure
解析配置后,会调用 ofproto_dpif_monitor_port_update,此函数会判断如果配置了bfd,启动新线程用来发送bfd报文。
//如果有bfd,lldp或者cfm配置了,则需要启动 monitor 线程,执行bfd,lldp或者cfg相关操作
//如果 monitor 线程已经运行了,但是bfd,lldp和cfm的配置为空了,则需要删除monitor线程
/* Creates the mport in monitor module if either bfd or cfm
* is configured. Otherwise, deletes the mport.
* Also checks whether the monitor thread should be started
* or terminated. */
void
ofproto_dpif_monitor_port_update(const struct ofport_dpif *ofport,
struct bfd *bfd, struct cfm *cfm,
struct lldp *lldp,
const struct eth_addr *hw_addr)
{
ovs_mutex_lock(&monitor_mutex);
if (!cfm && !bfd && !lldp) {
mport_unregister(ofport);
} else {
mport_register(ofport, bfd, cfm, lldp, hw_addr);
struct mport *mport = mport_find(ofport);
if (!mport) {
mport = xzalloc(sizeof *mport);
mport->ofport = ofport;
hmap_insert(&monitor_hmap, &mport->hmap_node, hash_pointer(ofport, 0));
heap_insert(&monitor_heap, &mport->heap_node, 0);
}
mport_update(mport, bfd, cfm, lldp, hw_addr);
}
ovs_mutex_unlock(&monitor_mutex);
/* If the monitor thread is not running and the hmap
* is not empty, starts it. If it is and the hmap is empty,
* terminates it. */
if (!monitor_running && !hmap_is_empty(&monitor_hmap)) {
latch_init(&monitor_exit_latch);
//启动线程
monitor_tid = ovs_thread_create("monitor", monitor_main, NULL);
monitor_running = true;
} else if (monitor_running && hmap_is_empty(&monitor_hmap)) {
latch_set(&monitor_exit_latch);
xpthread_join(monitor_tid, NULL);
latch_destroy(&monitor_exit_latch);
monitor_running = false;
}
}
发送bfd报文
发送bfd报文的工作由上面启动的线程处理,可以通过top -H -p pid来查看,如下monitor68
Threads: 7 total, 0 running, 7 sleeping, 0 stopped, 0 zombie
%Cpu(s): 0.0 us, 0.0 sy, 0.0 ni,100.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
MiB Mem : 7961.7 total, 325.6 free, 638.5 used, 6997.6 buff/cache
MiB Swap: 0.0 total, 0.0 free, 0.0 used. 7036.0 avail Mem
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
319 root 20 0 448212 5708 3416 S 0.0 0.1 4:08.92 ovs-vswitchd
346 root 20 0 448212 5708 3416 S 0.0 0.1 0:00.31 urcu4
518 root 20 0 448212 5708 3416 S 0.0 0.1 0:00.02 handler67
519 root 20 0 448212 5708 3416 S 0.0 0.1 0:02.11 handler64
520 root 20 0 448212 5708 3416 S 0.0 0.1 0:07.69 revalidator66
521 root 20 0 448212 5708 3416 S 0.0 0.1 0:05.06 revalidator65
1622 root 20 0 448212 5708 3416 S 0.0 0.1 0:10.25 monitor68
此线程首先调用 bfd_put_packet,根据配置构造bfd报文
void
bfd_put_packet(struct bfd *bfd, struct dp_packet *p,
const struct eth_addr eth_src, bool *oam) OVS_EXCLUDED(mutex)
{
long long int min_tx, min_rx;
struct udp_header *udp;
struct eth_header *eth;
struct ip_header *ip;
struct msg *msg;
ovs_mutex_lock(&mutex);
if (bfd->next_tx) {
long long int delay = time_msec() - bfd->next_tx;
long long int interval = bfd_tx_interval(bfd);
if (delay > interval * 3 / 2) {
VLOG_INFO("%s: long delay of %lldms (expected %lldms) sending BFD"
" control message", bfd->name, delay, interval);
}
}
/* RFC 5880 Section 6.5
* A BFD Control packet MUST NOT have both the Poll (P) and Final (F) bits
* set. */
ovs_assert(!(bfd->flags & FLAG_POLL) || !(bfd->flags & FLAG_FINAL));
dp_packet_reserve(p, 2); /* Properly align after the ethernet header. */
eth = dp_packet_put_uninit(p, sizeof *eth);
eth->eth_src = eth_addr_is_zero(bfd->local_eth_src)
? eth_src : bfd->local_eth_src;
eth->eth_dst = eth_addr_is_zero(bfd->local_eth_dst)
? eth_addr_bfd : bfd->local_eth_dst;
eth->eth_type = htons(ETH_TYPE_IP);
ip = dp_packet_put_zeros(p, sizeof *ip);
ip->ip_ihl_ver = IP_IHL_VER(5, 4);
ip->ip_tot_len = htons(sizeof *ip + sizeof *udp + sizeof *msg);
ip->ip_ttl = MAXTTL;
ip->ip_tos = IPTOS_LOWDELAY | IPTOS_THROUGHPUT;
ip->ip_proto = IPPROTO_UDP;
put_16aligned_be32(&ip->ip_src, bfd->ip_src);
put_16aligned_be32(&ip->ip_dst, bfd->ip_dst);
/* Checksum has already been zeroed by put_zeros call. */
ip->ip_csum = csum(ip, sizeof *ip);
udp = dp_packet_put_zeros(p, sizeof *udp);
udp->udp_src = htons(bfd->udp_src);
udp->udp_dst = htons(BFD_DEST_PORT);
udp->udp_len = htons(sizeof *udp + sizeof *msg);
msg = dp_packet_put_uninit(p, sizeof *msg);
msg->vers_diag = (BFD_VERSION << 5) | bfd->diag;
msg->flags = (bfd->state & STATE_MASK) | bfd->flags;
msg->mult = bfd->mult;
msg->length = BFD_PACKET_LEN;
msg->my_disc = htonl(bfd->disc);
msg->your_disc = htonl(bfd->rmt_disc);
msg->min_rx_echo = htonl(0);
if (bfd_in_poll(bfd)) {
min_tx = bfd->poll_min_tx;
min_rx = bfd->poll_min_rx;
} else {
min_tx = bfd_min_tx(bfd);
min_rx = bfd->min_rx;
}
msg->min_tx = htonl(min_tx * 1000);
msg->min_rx = htonl(min_rx * 1000);
bfd->flags &= ~FLAG_FINAL;
*oam = bfd->oam;
log_msg(VLL_DBG, msg, "Sending BFD Message", bfd);
bfd->last_tx = time_msec();
bfd_set_next_tx(bfd);
ovs_mutex_unlock(&mutex);
}
再调用 ofproto_dpif_send_packet 将bfd报文发送出去,流程如下:
ofproto_dpif_send_packet -> xlate_send_packet -> ofproto_dpif_execute_actions -> ofproto_dpif_execute_actions__ -> dpif_execute -> dpif_operate
dpif_operate根据datapath类型调用不同的函数,将报文最终发送出去。
接收bfd报文
bfd等协议报文都是由ovs的slow path处理的。
第一个bfd报文查找fast path流表失败后,会上送slow path进行处理,代码流程如下:
#第一个bfd报文处理路径
dp_netdev_process_rxq_port
//接收报文
netdev_rxq_recv
//处理报文
dp_netdev_input
dp_netdev_input__
//查找emc失败后,查fast path
emc_processing
//查fast path失败后,查slow path
fast_path_processing
//slow path的处理
handle_packet_upcall
dp_netdev_upcall
upcall_cb
process_upcall
upcall_xlate
xlate_actions
//处理bfd报文
process_special
//对于bfd报文,需要安装flow,action为userspace,表示需要slow path处理
compose_slow_path
cookie.type = USER_ACTION_COOKIE_SLOW_PATH;
cookie.slow_path.unused = 0;
cookie.slow_path.reason = xout->slow;
//OVS_ACTION_ATTR_USERSPACE
odp_put_userspace_action
//此处只有正常action的处理,不包含slow-path的处理
dp_netdev_execute_actions
//将flow安装到fast path
dp_netdev_flow_add
//将flow安装到emc
emc_probabilistic_insert(pmd, key, netdev_flow);
bfd首包处理完后,就会安装流表到emc和fast path,可通过如下命令查询(只能查到fast path流表,emc的看不到)
root@master:~# ovs-appctl dpctl/dump-flows
recirc_id(0),in_port(2),eth_type(0x0800),ipv4(proto=17,frag=no),udp(dst=3784), packets:15818, bytes:1043988, used:0.004s, actions:userspace(pid=2898884630,slow_path(bfd))
后续bfd报文都会匹配上面的流表,执行action,即上送slow path处理,代码流程如下
#后续bfd报文处理路径
dp_netdev_process_rxq_port
//接收报文
netdev_rxq_recv
//处理报文
dp_netdev_input
dp_netdev_input__
//查找emc流表,成功,不用再查fast path
emc_processing
//不用查fast path。但是emc流表一般只有8k,如果流很多的话,可能还得查找fast path
fast_path_processing
//执行流表的action
packet_batch_per_flow_execute
dp_netdev_execute_actions
odp_execute_actions
//上送userspace
dp_execute_cb
OVS_ACTION_ATTR_USERSPACE
dp_execute_userspace_action
//userdata不为空
dp_netdev_upcall
upcall_cb
process_upcall
upcall_xlate
xlate_actions
//最终也是调用到process_special处理bfd报文
process_special
从上面流程可知,不管bfd报文走的是slow path,还是后面的fast path,最终都会调用 process_special处理bfd报文。
首先调用bfd_should_process_flow判断是否是bfd报文,只有满足条件了才认为是bfd报文。
bool
bfd_should_process_flow(const struct bfd *bfd_, const struct flow *flow,
struct flow_wildcards *wc)
{
struct bfd *bfd = CONST_CAST(struct bfd *, bfd_);
//如果配置了 bfd:bfd_remote_dst_mac,则需要检查收到的bfd报文的目的mac是否和配置的一样,不一样就不处理
if (!eth_addr_is_zero(bfd->rmt_eth_dst)) {
memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
if (!eth_addr_equals(bfd->rmt_eth_dst, flow->dl_dst)) {
return false;
}
}
//报文类型必须是ipv4
if (flow->dl_type == htons(ETH_TYPE_IP)) {
memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
//必须是udp报文
if (flow->nw_proto == IPPROTO_UDP && !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
//udp报文目的端口号必须是3784
if (flow->tp_dst == htons(BFD_DEST_PORT)) {
bool check_tnl_key;
//如果配置了 check_tnl_key,tunnle id必须是0
atomic_read_relaxed(&bfd->check_tnl_key, &check_tnl_key);
if (check_tnl_key) {
memset(&wc->masks.tunnel.tun_id, 0xff,
sizeof wc->masks.tunnel.tun_id);
return flow->tunnel.tun_id == htonll(0);
}
return true;
}
}
}
return false;
}
认定是bfd报文后,调用 bfd_process_packet 处理bfd报文。
网友评论