veth虚拟网卡

作者: 分享放大价值 | 来源:发表于2020-09-22 21:55 被阅读0次

veth虚拟网卡
veth pair
【k8s】云原生网络之VXLAN 协议
Linux 虚拟网络设备 tun/tap veth pa
无线网卡共享wifi踩坑笔记
VMwVMware虚拟机的三种网络模型
kali虚拟机连接无线网卡comfast cf-812AC（Re
macvlan虚拟接口
CentOS7 通过 VMware的桥接上网
虚拟机的三种网络模式（桥接，NAT，Host-only）

veth是kernel提供的一种虚拟网卡，总是成对出现。在一端发送数据，就可以在另一端接收到，类似一根网线。那么它是如何实现的呢？今天就看一下它的实现。

veth创建

可以使用ip命令创建，如下。

[root@localhost ~]# ip link add vetha type veth peer name vethb

在kernel端需要提前加载veth module，如下，这个module做的事情很简单，就是注册一个 rtnl_link_ops

[root@localhost ~]# modprobe veth
[root@localhost ~]# lsmod | grep veth
veth                   13410  0

//veth.ko 初始化
#define DRV_NAME    "veth"
static struct rtnl_link_ops veth_link_ops = {
    .kind       = DRV_NAME,
    .priv_size  = sizeof(struct veth_priv),
    .setup      = veth_setup,
    .validate   = veth_validate,
    .newlink    = veth_newlink,
    .dellink    = veth_dellink,
    .policy     = veth_policy,
    .maxtype    = VETH_INFO_MAX,
};

/*
 * init/fini
 */

static __init int veth_init(void)
{
    return rtnl_link_register(&veth_link_ops);
}
int rtnl_link_register(struct rtnl_link_ops *ops)
{
    int err;

    rtnl_lock();
    err = __rtnl_link_register(ops);
    rtnl_unlock();
    return err;
}
int __rtnl_link_register(struct rtnl_link_ops *ops)
{
    if (rtnl_link_ops_get(ops->kind))
        return -EEXIST;

    /* The check for setup is here because if ops
     * does not have that filled up, it is not possible
     * to use the ops for creating device. So do not
     * fill up dellink as well. That disables rtnl_dellink.
     */
    if (ops->setup && !ops->dellink)
        ops->dellink = unregister_netdevice_queue;

    list_add_tail(&ops->list, &link_ops);
    return 0;
}

通过命令 ip link add vetha type veth peer name vethb 创建veth时，在kernel中调用rtnl_newlink，会根据传入的type查找rtnl_link_ops，再调用rtnl_link_ops的newlink创建veth的peer，并将两端veth分别放在对方的私有数据中。

rtnl_newlink
    if (linkinfo[IFLA_INFO_KIND]) {
        nla_strlcpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind));
        ops = rtnl_link_ops_get(kind);

    struct net_device *dev;
    dev = rtnl_create_link(dest_net, ifname, name_assign_type, ops, tb);
    
    if (ops->newlink) {
        err = ops->newlink(net, dev, tb, data);

static int veth_newlink(struct net *src_net, struct net_device *dev,
             struct nlattr *tb[], struct nlattr *data[])

    struct net_device *peer;
    peer = rtnl_create_link(net, ifname, name_assign_type,
                &veth_link_ops, tbp);

    register_netdevice(peer);
    register_netdevice(dev);

    /*
     * tie the deviced together
     */
   //这里是关键，将peer放在dev的priv中，将dev放在peer的priv中，
   //这样将这两个虚拟设备绑定到一起。
    priv = netdev_priv(dev);
    rcu_assign_pointer(priv->peer, peer);

    priv = netdev_priv(peer);
    rcu_assign_pointer(priv->peer, dev);

static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
    //取出peer设备
    struct veth_priv *priv = netdev_priv(dev);
    rcv = rcu_dereference(priv->peer);
    //调用 dev_forward_skb 发送数据时，已经换成peer设备rcv了
    //相当于 peer 设备的接收
    if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) {
        struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats);
        u64_stats_update_begin(&stats->syncp);
        stats->bytes += length;
        stats->packets++;
        u64_stats_update_end(&stats->syncp);
    } else {
drop:
        atomic64_inc(&priv->dropped);
    }
    rcu_read_unlock();
    return NETDEV_TX_OK;
}
//如果报文可转发，则调用netif_rx_internal将报文送入主机协议栈
int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
{
    return __dev_forward_skb(dev, skb) ?: netif_rx_internal(skb);
}
int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
{
    if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
        if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
            atomic_long_inc(&dev->rx_dropped);
            kfree_skb(skb);
            return NET_RX_DROP;
        }
    }

    if (unlikely(!is_skb_forwardable(dev, skb))) {
        atomic_long_inc(&dev->rx_dropped);
        kfree_skb(skb);
        return NET_RX_DROP;
    }

    skb_scrub_packet(skb, true);
    skb->protocol = eth_type_trans(skb, dev);
    skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);

    return 0;
}
//根据目的mac设备 pkt_type
__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
{
    unsigned short _service_access_point;
    const unsigned short *sap;
    const struct ethhdr *eth;

    skb->dev = dev;
    skb_reset_mac_header(skb);
    skb_pull_inline(skb, ETH_HLEN);
    eth = eth_hdr(skb);

    if (unlikely(is_multicast_ether_addr(eth->h_dest))) {
        if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast))
            skb->pkt_type = PACKET_BROADCAST;
        else
            skb->pkt_type = PACKET_MULTICAST;
    }
    else if (unlikely(!ether_addr_equal_64bits(eth->h_dest,
                           dev->dev_addr)))
        skb->pkt_type = PACKET_OTHERHOST;
  ....
}

veth使用

我见过的使用方法有两种，如下

a. 一端在 root namespace，另一端放在其他 namespace，连接两个namespace，比如在k8s中，calico, cilium和ovs这些cni都是如此实现的。
b.两端分别放在两个网桥上，连接网桥。

第二种情况比较简单，重点说一下第一种情况遇到的几个问题。

实验步骤如下，
创建一对veth口，vetha和vethb，
创建一个namespace test，
将vetha放入namespace test，
将vetha和vethb都up起来，
给vetha配置ip 1.1.1.2

[root@localhost ~]# ip link add vetha type veth peer name vethb
[root@localhost ~]# ip link set dev vethb up
[root@localhost ~]# ip netns add test
[root@localhost ~]# ip link set dev vetha netns test
[root@localhost ~]# ip netns exec test ip link set dev vetha up
[root@localhost ~]# ip netns exec test ip address add dev vetha 1.1.1.2/24

[root@localhost ~]# ip netns exec test ip a
1: lo: <LOOPBACK> mtu 65536 qdisc noop state DOWN qlen 1
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
30: vetha@if29: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP qlen 1000
    link/ether 96:7f:a6:ea:93:23 brd ff:ff:ff:ff:ff:ff link-netnsid 0
    inet 1.1.1.2/24 scope global vetha
       valid_lft forever preferred_lft forever
    inet6 fe80::74ef:e3ff:fe5d:2db0/64 scope link
       valid_lft forever preferred_lft forever

[root@localhost ~]# ip netns exec test ip r
1.1.1.0/24 dev vetha proto kernel scope link src 1.1.1.2

[root@localhost ~]# ip netns exec test arp -n
Address                  HWtype  HWaddress           Flags Mask            Iface
1.1.1.4                          (incomplete)                              vetha

[root@localhost ~]# ifconfig vethb
vethb: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet6 fe80::947f:a6ff:feea:9321  prefixlen 64  scopeid 0x20<link>
        ether 96:7f:a6:ea:93:21  txqueuelen 1000  (Ethernet)
        RX packets 208784  bytes 12836250 (12.2 MiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 1143  bytes 62469 (61.0 KiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

问题1：如果在test namespace中，设置一个静态arp，ping一个不存在的地址1.1.1.4

[root@localhost ~]# ip netns exec test arp -s 1.1.1.4 00:00:00:00:00:01
[root@localhost ~]# ip netns exec test arp -n
Address                  HWtype  HWaddress           Flags Mask            Iface
1.1.1.4                  ether   00:00:00:00:00:01   CM                    vetha

icmp报文到达vethb设备后，走host的协议栈，匹配到host的默认路由表，应该会从em1发出去，但是结果是vethb可以抓到icmp报文，em1抓不到。

[root@localhost ~]# ip r
default via 10.164.129.1 dev em1 proto static metric 100
10.10.10.0/24 dev gre10 proto kernel scope link src 10.10.10.1
10.164.129.0/24 dev em1 proto kernel scope link src 10.164.129.16 metric 100

[root@localhost ~]# tcpdump -vne -i vethb icmp
tcpdump: listening on vethb, link-type EN10MB (Ethernet), capture size 262144 bytes
08:04:49.386991 f6:d4:d2:de:20:be > 00:00:00:00:00:01, ethertype IPv4 (0x0800), length 98: (tos 0x0, ttl 64, id 48297, offset 0, flags [DF], proto ICMP (1), length 84)
    1.1.1.2 > 1.1.1.4: ICMP echo request, id 30782, seq 21, length 64
08:04:50.386985 f6:d4:d2:de:20:be > 00:00:00:00:00:01, ethertype IPv4 (0x0800), length 98: (tos 0x0, ttl 64, id 48427, offset 0, flags [DF], proto ICMP (1), length 84)
    1.1.1.2 > 1.1.1.4: ICMP echo request, id 30782, seq 22, length 64
^C
2 packets captured
2 packets received by filter
0 packets dropped by kernel
[root@localhost ~]# tcpdump -vne -i em1 icmp
tcpdump: listening on em1, link-type EN10MB (Ethernet), capture size 262144 bytes
^C
0 packets captured
0 packets received by filter
0 packets dropped by kernel

原因是icmp报文的目的mac为00:00:00:00:00:01，而在vethb收到此报文后，在函数eth_type_trans中会根据目的mac给skb->pkt_type赋值，因为目的mac不为vethb的mac，所以skb->pkt_type被设置成PACKET_OTHERHOST。

veth_xmit ->dev_forward_skb -> __dev_forward_skb -> eth_type_trans
    if (unlikely(is_multicast_ether_addr(eth->h_dest))) {
        if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast))
            skb->pkt_type = PACKET_BROADCAST;
        else
            skb->pkt_type = PACKET_MULTICAST;
    }
    else if (unlikely(!ether_addr_equal_64bits(eth->h_dest,
                           dev->dev_addr)))
        skb->pkt_type = PACKET_OTHERHOST;

随后将报文送入主机协议栈，在ip_rcv中有个判断如果skb->pkt_type == PACKET_OTHERHOST就直接drop报文，比较恶心的是，这个drop没有统计信息可看。
看来得在test namespace中将1.1.1.4对应的mac设置为vethb的mac地址。

netif_rx_internal -> enqueue_to_backlog -> process_backlog ->__netif_receive_skb -> __netif_receive_skb_core -> ip_rcv
int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
    const struct iphdr *iph;
    u32 len;

    /* When the interface is in promisc. mode, drop all the crap
     * that it receives, do not try to analyse it.
     */
    if (skb->pkt_type == PACKET_OTHERHOST)
        goto drop;
drop:
    kfree_skb(skb);
out:
    return NET_RX_DROP;
}

问题2: 如下，将1.1.1.4对应的mac修改为vethb的mac了，但是仍然有问题，vethb可以收到，em1收不到。

[root@localhost ~]# ip link show dev vethb
33: vethb@if34: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT qlen 1000
    link/ether 96:ec:6a:a8:67:ed brd ff:ff:ff:ff:ff:ff link-netnsid 0
[root@localhost ~]# ip netns exec test arp -d 1.1.1.4
[root@localhost ~]# ip netns exec test arp -s 1.1.1.4 96:ec:6a:a8:67:ed
[root@localhost ~]# ip netns exec test arp -n
Address                  HWtype  HWaddress           Flags Mask            Iface
1.1.1.4                  ether   96:ec:6a:a8:67:ed   CM                    vetha

[root@localhost ~]# ip link show dev vethb
33: vethb@if34: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT qlen 1000
    link/ether 96:ec:6a:a8:67:ed brd ff:ff:ff:ff:ff:ff link-netnsid 0
[root@localhost ~]# tcpdump -vne -i vethb icmp
tcpdump: listening on vethb, link-type EN10MB (Ethernet), capture size 262144 bytes
08:15:21.495979 f6:d4:d2:de:20:be > 96:ec:6a:a8:67:ed, ethertype IPv4 (0x0800), length 98: (tos 0x0, ttl 64, id 23666, offset 0, flags [DF], proto ICMP (1), length 84)
    1.1.1.2 > 1.1.1.4: ICMP echo request, id 31770, seq 32, length 64
^C
1 packet captured
1 packet received by filter
0 packets dropped by kernel
[root@localhost ~]# tcpdump -vne -i em1 icmp
tcpdump: listening on em1, link-type EN10MB (Ethernet), capture size 262144 bytes
^C
0 packets captured
0 packets received by filter
0 packets dropped by kernel

问题3: 这又是另一个问题了，在调用ip_route_input_noref查找路由时，虽然可以匹配到默认路由，但是因为 vethb 没有开启forward功能，所以仍然会失败。

ip_route_input_noref -> ip_route_input_slow
    fl4.flowi4_oif = 0;
    fl4.flowi4_iif = dev->ifindex;
    fl4.flowi4_mark = skb->mark;
    fl4.flowi4_tos = tos;
    fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
    fl4.daddr = daddr;
    fl4.saddr = saddr;
    err = fib_lookup(net, &fl4, &res);

    if (!IN_DEV_FORWARD(in_dev)) {
        err = -EHOSTUNREACH;
        goto no_route;
    }
no_route:
    RT_CACHE_STAT_INC(in_no_route);
    res.type = RTN_UNREACHABLE;
    res.fi = NULL;

可以通过下面的命令查看丢包计数 in_no_route

cat /proc/net/stat/rt_cache | awk -F " " '{print $5}'

接下来使能vethb的forwarding试试看

[root@localhost ~]# echo 1 > /proc/sys/net/ipv4/conf/vethb/forwarding
[root@localhost ~]# cat /proc/sys/net/ipv4/conf/vethb/forwarding
1

注意：如果 /proc/sys/net/ipv4/conf/all/forwarding 使能了，则新创建的网卡的forwarding 功能都会默认被使能。

问题4: 再次ping还是不行，这是由于反向路径检查失败导致的。会调用fib_validate_source使用报文的源ip作为目的查找路由表，只能匹配到默认路由，因为默认路由出接口和报文入接口不是同一个，所以判断失败。收发同一个报文应该是同一个设备，这称为对称路由。

ip_route_input_slow -> __mkroute_input
/* Ignore rp_filter for packets protected by IPsec. */
    err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
                  in_dev->dev, in_dev, &itag);
    if (err < 0) {
        ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
                     saddr);

        goto cleanup;
    }

int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
            u8 tos, int oif, struct net_device *dev,
            struct in_device *idev, u32 *itag)
{
    int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev);
    //反向路径检查开关，为0就不做检查
    if (!r && !fib_num_tclassid_users(dev_net(dev)) &&
        IN_DEV_ACCEPT_LOCAL(idev) &&
        (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) {
        *itag = 0;
        return 0;
    }
    return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag);
}

static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
                 u8 tos, int oif, struct net_device *dev,
                 int rpf, struct in_device *idev, u32 *itag)
{
    int ret, no_addr;
    struct fib_result res;
    struct flowi4 fl4;
    struct net *net;
    bool dev_match;

    fl4.flowi4_oif = 0;
    fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
    fl4.daddr = src;
    fl4.saddr = dst;
    fl4.flowi4_tos = tos;
    fl4.flowi4_scope = RT_SCOPE_UNIVERSE;

    no_addr = idev->ifa_list == NULL;

    fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0;

    net = dev_net(dev);
    if (fib_lookup(net, &fl4, &res))
        goto last_resort;
    if (res.type != RTN_UNICAST &&
        (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
        goto e_inval;
    if (!rpf && !fib_num_tclassid_users(dev_net(dev)) &&
        (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev)))
        goto last_resort;
    fib_combine_itag(itag, &res);
    dev_match = false;

#ifdef CONFIG_IP_ROUTE_MULTIPATH
    for (ret = 0; ret < res.fi->fib_nhs; ret++) {
        struct fib_nh *nh = &res.fi->fib_nh[ret];

        if (nh->nh_dev == dev) {
            dev_match = true;
            break;
        }
    }
#else
    //如果路由中的出接口是入接口才会成功。否则就是反向路径检查失败。
    if (FIB_RES_DEV(res) == dev)
        dev_match = true;
#endif
    if (dev_match) {
        ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
        return ret;
    }
    if (no_addr)
        goto last_resort;
    if (rpf == 1)
        goto e_rpf;
    fl4.flowi4_oif = dev->ifindex;

    ret = 0;
    if (fib_lookup(net, &fl4, &res) == 0) {
        if (res.type == RTN_UNICAST)
            ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
    }
    return ret;

last_resort:
    if (rpf)
        goto e_rpf;
    *itag = 0;
    return 0;

e_inval:
    return -EINVAL;
e_rpf:
    return -EXDEV;
}

可以通过下面的命令查看drop计数 in_martian_src

[root@localhost ~]# cat /proc/net/stat/rt_cache | awk -F " " '{print $8}'

这个问题的解决办法有两个

a. 添加对称路由
b. 关闭反向路径检查

a. 添加对称路由，如下添加到test namespace路由后，icmp报文可以从em1发出去了

[root@localhost ~]# ip route add 1.1.1.2 dev vethb
[root@localhost ~]# ip r
default via 10.164.129.1 dev em1 proto static metric 100
1.1.1.2 dev vethb scope link
10.10.10.0/24 dev gre10 proto kernel scope link src 10.10.10.1
10.164.129.0/24 dev em1 proto kernel scope link src 10.164.129.16 metric 100
169.254.0.0/16 dev provisioning_nw scope link metric 1016
169.254.0.0/16 dev idrac_nw scope link metric 1017
192.168.0.0/24 dev provisioning_nw proto kernel scope link src 192.168.0.253
192.168.10.0/24 dev idrac_nw proto kernel scope link src 192.168.10.13
192.168.122.0/24 dev virbr0 proto kernel scope link src 192.168.122.1
[root@localhost ~]# tcpdump -vne -i em1 icmp or arp
tcpdump: listening on em1, link-type EN10MB (Ethernet), capture size 262144 bytes
08:56:10.509045 90:b1:1c:55:37:1e > 00:00:0c:07:ac:02, ethertype IPv4 (0x0800), length 98: (tos 0x0, ttl 63, id 28970, offset 0, flags [DF], proto ICMP (1), length 84)
    1.1.1.2 > 1.1.1.4: ICMP echo request, id 1009, seq 1664, length 64
08:56:11.509051 90:b1:1c:55:37:1e > 00:00:0c:07:ac:02, ethertype IPv4 (0x0800), length 98: (tos 0x0, ttl 63, id 29200, offset 0, flags [DF], proto ICMP (1), length 84)
    1.1.1.2 > 1.1.1.4: ICMP echo request, id 1009, seq 1665, length 64

b. 关闭反向路径检查
rp_filter取决于设备和all的最大值，所以必须把设备和all的rp_filter都关闭

#define IN_DEV_RPFILTER(in_dev)     IN_DEV_MAXCONF((in_dev), RP_FILTER)

#define IN_DEV_MAXCONF(in_dev, attr) \
    (max(IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr), \
         IN_DEV_CONF_GET((in_dev), attr)))

[root@localhost ~]# ip route del 1.1.1.2 dev vethb
[root@localhost ~]# echo 0 > /proc/sys/net/ipv4/conf/all/rp_filter
[root@localhost ~]# echo 0 > /proc/sys/net/ipv4/conf/vethb/rp_filter

[root@localhost ~]# tcpdump -vne -i em1 icmp or arp
tcpdump: listening on em1, link-type EN10MB (Ethernet), capture size 262144 bytes
09:01:22.555047 90:b1:1c:55:37:1e > 00:00:0c:07:ac:02, ethertype IPv4 (0x0800), length 98: (tos 0x0, ttl 63, id 58344, offset 0, flags [DF], proto ICMP (1), length 84)
    1.1.1.2 > 1.1.1.4: ICMP echo request, id 1009, seq 1976, length 64
09:01:23.555046 90:b1:1c:55:37:1e > 00:00:0c:07:ac:02, ethertype IPv4 (0x0800), length 98: (tos 0x0, ttl 63, id 58481, offset 0, flags [DF], proto ICMP (1), length 84)
    1.1.1.2 > 1.1.1.4: ICMP echo request, id 1009, seq 1977, length 64

proxy_arp

到这里icmp报文算是成功发出去了，但是test namespace中1.1.1.4的mac地址是手动设置的，不太灵活，可以使用设备的 proxy_arp 功能。

//使能 proxy_arp
[root@localhost ~]# echo 1 >  /proc/sys/net/ipv4/conf/vethb/proxy_arp

[root@localhost ~]# taskset -c 3 ip netns exec test ping 1.1.1.4
PING 1.1.1.4 (1.1.1.4) 56(84) bytes of data.
^C
--- 1.1.1.4 ping statistics ---
1 packets transmitted, 0 received, 100% packet loss, time 0ms

//学到了vethb的mac地址
[root@localhost ~]# ip netns exec test arp -n
Address                  HWtype  HWaddress           Flags Mask            Iface
1.1.1.4                  ether   96:ec:6a:a8:67:ed   C                     vetha

但是在 arp 处理过程中，也会查找路由，反向路径检查等流程，所以上面的问题也都会遇到，按照上面的设置一下就行。

arp_process
    if (arp->ar_op == htons(ARPOP_REQUEST) &&
        ip_route_input_noref(skb, tip, sip, 0, dev) == 0) {
        rt = skb_rtable(skb);
        addr_type = rt->rt_type;

        if (addr_type == RTN_LOCAL) {
        ...
      } else if (IN_DEV_FORWARD(in_dev)) {
            if (addr_type == RTN_UNICAST  &&
                (arp_fwd_proxy(in_dev, dev, rt) ||
                 arp_fwd_pvlan(in_dev, dev, rt, sip, tip) ||
                 (rt->dst.dev != dev &&
                  pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))) {
                n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
                if (n)
                    neigh_release(n);

                if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED ||
                    skb->pkt_type == PACKET_HOST ||
                    NEIGH_VAR(in_dev->arp_parms, PROXY_DELAY) == 0) {
                    arp_send(ARPOP_REPLY, ETH_P_ARP, sip,
                         dev, tip, sha, dev->dev_addr,
                         sha);
                } else {
                    pneigh_enqueue(&arp_tbl,
                               in_dev->arp_parms, skb);
                    return 0;
                }
                goto out;
            }

总结

a. 如果veth一端在其他namespace，另一端在root namespace，并没有被加入到网桥，则test namespace中通过veth发送的报文的目的mac如果是单播的，则必须是veth的peer设备的mac。可以在test namespace中静态配置或者设置代理arp
b. 为了通过反向路径检查，可以关闭反向路径检查或者设置对称路由。
c. 必须使能设备的 forwarding 功能。

用到的命令如下，其实在k8s的calico cni网络中，基本上就是下面的几个设置。

echo 1 >  /proc/sys/net/ipv4/conf/vethb/proxy_arp
echo 1 > /proc/sys/net/ipv4/conf/vethb/forwarding
echo 0 > /proc/sys/net/ipv4/conf/vethb/rp_filter
ip route add 1.1.1.2 dev vethb

veth虚拟网卡
veth是kernel提供的一种虚拟网卡，总是成对出现。在一端发送数据，就可以在另一端接收到，类似一根网线。那么它...
veth pair
前言 veth pair 网卡对。使用意义为：创建两张网卡，并且这两张网卡之间保持连通性。本篇文章不详细讨论ve...
【k8s】云原生网络之VXLAN 协议
云原生虚拟网络 tun/tap & veth-pair云原生虚拟网络之 VXLAN 协议云原生虚拟网络之 Flan...
Linux 虚拟网络设备 tun/tap veth pa
Linux 虚拟网络设备 tun/tap veth pair 本篇主要介绍一下 linux 下面的虚拟网络...
无线网卡共享wifi踩坑笔记
常规步骤插入网卡安装驱动可能自带自动安装创建虚拟网卡 & 启用网卡wifi 共享网络给虚拟网卡当前上网的适...
VMwVMware虚拟机的三种网络模型
VMware虚拟机的三种网络模型一、Bridged（桥接模型）桥接模式是将主机网卡与虚拟机虚拟的网卡用虚拟网卡网桥...
kali虚拟机连接无线网卡comfast cf-812AC（Re
插上网卡，设置虚拟机虚拟机，可移动设备，选择网卡，链接查看usb网卡ID usb网卡安装驱动分别执行 apt...
macvlan虚拟接口
macvlan是kernel提供的一种网卡虚拟化技术，可以将网卡(不一定是真是的物理网卡，virtio等虚拟网卡也...
CentOS7 通过 VMware的桥接上网
配置VMware的网卡保证虚拟机的虚拟网络中桥接配置用的网卡是正确的物理网卡，不要用自动 CentOS 7的网卡...
虚拟机的三种网络模式（桥接，NAT，Host-only）
桥接模式桥接网络是指本地物理网卡和虚拟网卡通过VMnet0虚拟交换机进行桥接，虚拟网卡和物理网卡在网络拓扑图上处...