美文网首页
[docker 网络][flannel] 背后操作

[docker 网络][flannel] 背后操作

作者: nicktming | 来源:发表于2019-11-03 09:30 被阅读0次

    1. 前言

    上文 [docker 网络][flannel] 配置安装测试 利用flannel vxlan实现了容器跨主机相互访问. 本文将模拟flannel vxlan看看flannel到底做了些什么操作可以让容器跨主机相互访问. 关于 vxlan的原理可以参考 VXLAN详解, 本文将会注重实际操作过程.

    环境如下:

    Machine 1 : 172.21.0.16 主机名:master
    Machine 2 : 172.21.0.12 主机名:worker
    

    flannel
    1. [docker 网络][flannel] 配置安装测试
    2. [docker 网络][flannel] 背后操作
    3. [docker 网络][flannel] 源码简单分析

    2. 增加vxlan节点

    2.1 master(172.21.0.16)

    add-vxlan.sh脚本增加一个vxlan类型的vxlan.1, 地址为10.0.1.250/32.

    [root@master vxlan]# 
    [root@master vxlan]# cat add-vxlan.sh 
    ip link delete vxlan.1
    
    ip link add vxlan.1 type vxlan id 1 dev eth0 local 172.21.0.16 dstport 4789 nolearning
    ip addr add 10.0.1.250/32 dev vxlan.1
    ip link set vxlan.1 up
    
    [root@master vxlan]# ./add-vxlan.sh 
    [root@master vxlan]# ifconfig vxlan.1
    vxlan.1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1450
            inet 10.0.1.250  netmask 255.255.255.255  broadcast 0.0.0.0
            inet6 fe80::c0d4:cfff:feb5:8612  prefixlen 64  scopeid 0x20<link>
            ether c2:d4:cf:b5:86:12  txqueuelen 1000  (Ethernet)
            RX packets 0  bytes 0 (0.0 B)
            RX errors 0  dropped 0  overruns 0  frame 0
            TX packets 0  bytes 0 (0.0 B)
            TX errors 0  dropped 6 overruns 0  carrier 0  collisions 0
    
    [root@master vxlan]# 
    

    2.2 worker(172.21.0.12)

    [root@worker vxlan]# cat add-vxlan.sh 
    ip link delete vxlan.1
    
    ip link add vxlan.1 type vxlan id 1 dev eth0 local 172.21.0.12 dstport 4789 nolearning
    ip addr add 10.0.2.250/32 dev vxlan.1
    ip link set vxlan.1 up
    
    [root@worker vxlan]# ./add-vxlan.sh 
    [root@worker vxlan]# ifconfig vxlan.1
    vxlan.1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1450
            inet 10.0.2.250  netmask 255.255.255.255  broadcast 0.0.0.0
            inet6 fe80::bcba:39ff:fe2e:a8ed  prefixlen 64  scopeid 0x20<link>
            ether be:ba:39:2e:a8:ed  txqueuelen 1000  (Ethernet)
            RX packets 0  bytes 0 (0.0 B)
            RX errors 0  dropped 0  overruns 0  frame 0
            TX packets 0  bytes 0 (0.0 B)
            TX errors 0  dropped 6 overruns 0  carrier 0  collisions 0
    
    [root@worker vxlan]# 
    

    3. 增加fdb, neighbors 和 route

    3.1 master(172.21.0.16)

    需要知道worker(172.21.0.12)vxlan.1mac地址be:ba:39:2e:a8:ed以及vxlan.1ip地址(10.0.2.250/32).

    [root@master vxlan]# cat add-fdb-arp-route.sh 
    #ip route add 10.0.2.0/24 dev vxlan.1 onlink
    ip route add 10.0.2.0/24 via 10.0.2.250 dev vxlan.1 onlink
    bridge fdb add $1 dev vxlan.1 dst 172.21.0.12
    ip neighbor add 10.0.2.250 lladdr $1 dev vxlan.1
    
    [root@master vxlan]# ./add-fdb-arp-route.sh be:ba:39:2e:a8:ed
    [root@master vxlan]# route -n
    Kernel IP routing table
    Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
    0.0.0.0         172.21.0.1      0.0.0.0         UG    0      0        0 eth0
    10.0.2.0        10.0.2.250      255.255.255.0   UG    0      0        0 vxlan.1
    169.254.0.0     0.0.0.0         255.255.0.0     U     1002   0        0 eth0
    172.21.0.0      0.0.0.0         255.255.240.0   U     0      0        0 eth0
    [root@master vxlan]# bridge fdb show
    ...
    be:ba:39:2e:a8:ed dev vxlan.1 dst 172.21.0.12 self permanent
    ...
    [root@master vxlan]# ip neighbor show
    ...
    10.0.2.250 dev vxlan.1 lladdr be:ba:39:2e:a8:ed PERMANENT
    ...
    [root@master vxlan]# 
    

    3.2 worker(172.21.0.12)

    需要知道master(172.21.0.16)vxlan.1mac地址c2:d4:cf:b5:86:12以及vxlan.1ip地址(10.0.2.250).

    [root@worker vxlan]# cat add-fdb-arp-route.sh 
    ip route add 10.0.1.0/24 via 10.0.1.250 dev vxlan.1 onlink
    bridge fdb add $1 dev vxlan.1 dst 172.21.0.16
    ip neighbor add 10.0.1.250 lladdr $1 dev vxlan.1
    
    [root@worker vxlan]# ./add-fdb-arp-route.sh c2:d4:cf:b5:86:12
    [root@worker vxlan]# route -n
    Kernel IP routing table
    Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
    0.0.0.0         172.21.0.1      0.0.0.0         UG    0      0        0 eth0
    10.0.1.0        10.0.1.250      255.255.255.0   UG    0      0        0 vxlan.1
    169.254.0.0     0.0.0.0         255.255.0.0     U     1002   0        0 eth0
    172.21.0.0      0.0.0.0         255.255.240.0   U     0      0        0 eth0
    [root@worker vxlan]# bridge fdb show
    ...
    c2:d4:cf:b5:86:12 dev vxlan.1 dst 172.21.0.16 self permanent
    ...
    [root@worker vxlan]# ip neighbor show
    ...
    10.0.1.250 dev vxlan.1 lladdr c2:d4:cf:b5:86:12 PERMANENT
    ...
    [root@worker vxlan]# 
    

    3.3 测试vxlan之间相互访问

    ===> master vxlan.1 -> worker vxlan.1
    [root@master vxlan]# ping -c 1 10.0.2.250
    PING 10.0.2.250 (10.0.2.250) 56(84) bytes of data.
    64 bytes from 10.0.2.250: icmp_seq=1 ttl=64 time=0.454 ms
    
    --- 10.0.2.250 ping statistics ---
    1 packets transmitted, 1 received, 0% packet loss, time 0ms
    rtt min/avg/max/mdev = 0.454/0.454/0.454/0.000 ms
    [root@master vxlan]# 
    
    ===> worker vxlan.1 -> master vxlan.1
    [root@worker vxlan]# ping -c 1 10.0.1.250
    PING 10.0.1.250 (10.0.1.250) 56(84) bytes of data.
    64 bytes from 10.0.1.250: icmp_seq=1 ttl=64 time=0.437 ms
    
    --- 10.0.1.250 ping statistics ---
    1 packets transmitted, 1 received, 0% packet loss, time 0ms
    rtt min/avg/max/mdev = 0.437/0.437/0.437/0.000 ms
    [root@worker vxlan]# 
    

    可以看到两个vxlan设备已经可以相互访问.

    4. 增加network namespace (模拟docker)

    4.1 master(172.21.0.16)

    [root@master vxlan]# ip netns ls
    [root@master vxlan]# cat add-ns.sh 
    ip link delete veth1 type veth
    ip netns delete ns1
    ip link delete docker0 type bridge 
    iptables -t nat -F
    iptables -F
    
    ip link add veth1 type veth peer name veth2
    ip link set veth1 up
    ip link add docker0 type bridge
    ifconfig docker0 10.0.1.1/24
    #brctl addif docker0 veth1
    ip link set veth1 master docker0
    ip netns add ns1
    ip link set veth2 netns ns1
    
    ip netns exec ns1 ip addr add 10.0.1.2/24 dev veth2
    ip netns exec ns1 ip link set lo up
    ip netns exec ns1 ip link set veth2 up
    ip netns exec ns1 route add default gw 10.0.1.1
    
    iptables -P FORWARD ACCEPT
    iptables -t nat -A POSTROUTING -s 10.0.1.0/24 -o eth0 -j MASQUERADE
    iptables -t filter -A FORWARD -s 10.0.0.0/16 -j ACCEPT
    iptables -t filter -A FORWARD -d 10.0.0.0/16 -j ACCEPT
    
    [root@master vxlan]# ./add-ns.sh 
    Cannot find device "docker0"
    [root@master vxlan]# ./add-ns.sh 
    [root@master vxlan]# ip netns ls
    ns1 (id: 0)
    [root@master vxlan]# ip netns exec ns1 sh
    sh-4.2# ifconfig
    lo: flags=73<UP,LOOPBACK,RUNNING>  mtu 65536
            inet 127.0.0.1  netmask 255.0.0.0
            inet6 ::1  prefixlen 128  scopeid 0x10<host>
            loop  txqueuelen 1000  (Local Loopback)
            RX packets 0  bytes 0 (0.0 B)
            RX errors 0  dropped 0  overruns 0  frame 0
            TX packets 0  bytes 0 (0.0 B)
            TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
    
    veth2: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
            inet 10.0.1.2  netmask 255.255.255.0  broadcast 0.0.0.0
            inet6 fe80::48e8:88ff:fe95:945c  prefixlen 64  scopeid 0x20<link>
            ether 4a:e8:88:95:94:5c  txqueuelen 1000  (Ethernet)
            RX packets 14  bytes 1116 (1.0 KiB)
            RX errors 0  dropped 0  overruns 0  frame 0
            TX packets 8  bytes 648 (648.0 B)
            TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
    sh-4.2# route -n
    Kernel IP routing table
    Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
    0.0.0.0         10.0.1.1        0.0.0.0         UG    0      0        0 veth2
    10.0.1.0        0.0.0.0         255.255.255.0   U     0      0        0 veth2
    sh-4.2# 
    

    可以看到network namespace已经创建成功了, 并且相关配置已经设置完成.

    4.2 worker(172.21.0.12)

    [root@worker vxlan]# ip netns ls
    [root@worker vxlan]# cat add-ns.sh
    ip link delete veth1 type veth
    ip netns delete ns1
    ip link delete docker0 type bridge 
    iptables -t nat -F
    iptables -F
    
    ip link add veth1 type veth peer name veth2
    ip link set veth1 up
    ip link add docker0 type bridge
    ifconfig docker0 10.0.2.1/24
    #brctl addif docker0 veth1
    ip link set veth1 master docker0
    ip netns add ns1
    ip link set veth2 netns ns1
    
    ip netns exec ns1 ip addr add 10.0.2.2/24 dev veth2
    ip netns exec ns1 ip link set lo up
    ip netns exec ns1 ip link set veth2 up
    ip netns exec ns1 route add default gw 10.0.2.1
    
    iptables -P FORWARD ACCEPT
    iptables -t nat -A POSTROUTING -s 10.0.2.0/24 -o eth0 -j MASQUERADE
    iptables -t filter -A FORWARD -s 10.0.0.0/16 -j ACCEPT
    iptables -t filter -A FORWARD -d 10.0.0.0/16 -j ACCEPT
    
    [root@worker vxlan]# ./add-ns.sh 
    Cannot find device "veth1"
    Cannot remove namespace file "/var/run/netns/ns1": No such file or directory
    Cannot find device "docker0"
    [root@worker vxlan]# ./add-ns.sh 
    [root@worker vxlan]# ip netns ls
    ns1 (id: 1)
    [root@worker vxlan]# ip netns exec ns1 sh
    sh-4.2# ifconfig 
    lo: flags=73<UP,LOOPBACK,RUNNING>  mtu 65536
            inet 127.0.0.1  netmask 255.0.0.0
            inet6 ::1  prefixlen 128  scopeid 0x10<host>
            loop  txqueuelen 1000  (Local Loopback)
            RX packets 0  bytes 0 (0.0 B)
            RX errors 0  dropped 0  overruns 0  frame 0
            TX packets 0  bytes 0 (0.0 B)
            TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
    
    veth2: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
            inet 10.0.2.2  netmask 255.255.255.0  broadcast 0.0.0.0
            inet6 fe80::6c11:71ff:feb8:3a6c  prefixlen 64  scopeid 0x20<link>
            ether 6e:11:71:b8:3a:6c  txqueuelen 1000  (Ethernet)
            RX packets 15  bytes 1206 (1.1 KiB)
            RX errors 0  dropped 0  overruns 0  frame 0
            TX packets 8  bytes 648 (648.0 B)
            TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
    
    sh-4.2# route -n
    Kernel IP routing table
    Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
    0.0.0.0         10.0.2.1        0.0.0.0         UG    0      0        0 veth2
    10.0.2.0        0.0.0.0         255.255.255.0   U     0      0        0 veth2
    sh-4.2# 
    

    worker中模拟的容器也创建成功了.

    4.3 测试跨主机访问

    ns1.png

    master中的容器访问worker中的容器,

    [root@master vxlan]# ip netns exec ns1 sh
    ===> 访问worker 的ns1
    sh-4.2# ping -c 1 10.0.2.2
    PING 10.0.2.2 (10.0.2.2) 56(84) bytes of data.
    64 bytes from 10.0.2.2: icmp_seq=1 ttl=62 time=0.411 ms
    
    --- 10.0.2.2 ping statistics ---
    1 packets transmitted, 1 received, 0% packet loss, time 0ms
    rtt min/avg/max/mdev = 0.411/0.411/0.411/0.000 ms
    
    ===> 访问worker 的docker0
    sh-4.2# ping -c 1 10.0.2.1
    PING 10.0.2.1 (10.0.2.1) 56(84) bytes of data.
    64 bytes from 10.0.2.1: icmp_seq=1 ttl=63 time=0.389 ms
    
    --- 10.0.2.1 ping statistics ---
    1 packets transmitted, 1 received, 0% packet loss, time 0ms
    rtt min/avg/max/mdev = 0.389/0.389/0.389/0.000 ms
    
    ===> 访问worker 的vxlan.1
    sh-4.2# ping -c 1 10.0.2.250
    PING 10.0.2.250 (10.0.2.250) 56(84) bytes of data.
    64 bytes from 10.0.2.250: icmp_seq=1 ttl=63 time=0.394 ms
    
    --- 10.0.2.250 ping statistics ---
    1 packets transmitted, 1 received, 0% packet loss, time 0ms
    rtt min/avg/max/mdev = 0.394/0.394/0.394/0.000 ms
    
    ===> 访问worker
    sh-4.2# ping -c 1 172.21.0.12
    PING 172.21.0.12 (172.21.0.12) 56(84) bytes of data.
    64 bytes from 172.21.0.12: icmp_seq=1 ttl=63 time=0.351 ms
    
    --- 172.21.0.12 ping statistics ---
    1 packets transmitted, 1 received, 0% packet loss, time 0ms
    rtt min/avg/max/mdev = 0.351/0.351/0.351/0.000 ms
    
    

    worker容器访问master中的容器, docker0, vxlan.1 以及主机.

    [root@worker vxlan]# ip netns exec ns1 sh
    ===> 访问master的ns1
    sh-4.2# ping -c 1 10.0.1.2
    PING 10.0.1.2 (10.0.1.2) 56(84) bytes of data.
    64 bytes from 10.0.1.2: icmp_seq=1 ttl=62 time=0.449 ms
    
    --- 10.0.1.2 ping statistics ---
    1 packets transmitted, 1 received, 0% packet loss, time 0ms
    rtt min/avg/max/mdev = 0.449/0.449/0.449/0.000 ms
    
    ===> 访问master的docker0
    sh-4.2# ping -c 1 10.0.1.1
    PING 10.0.1.1 (10.0.1.1) 56(84) bytes of data.
    64 bytes from 10.0.1.1: icmp_seq=1 ttl=63 time=0.408 ms
    
    --- 10.0.1.1 ping statistics ---
    1 packets transmitted, 1 received, 0% packet loss, time 0ms
    rtt min/avg/max/mdev = 0.408/0.408/0.408/0.000 ms
    
    ===> 访问master的vxlan.1
    sh-4.2# ping -c 1 10.0.1.250
    PING 10.0.1.250 (10.0.1.250) 56(84) bytes of data.
    64 bytes from 10.0.1.250: icmp_seq=1 ttl=63 time=0.409 ms
    
    --- 10.0.1.250 ping statistics ---
    1 packets transmitted, 1 received, 0% packet loss, time 0ms
    rtt min/avg/max/mdev = 0.409/0.409/0.409/0.000 ms
    
    ===> 访问master
    sh-4.2# ping -c 1 172.21.0.16
    PING 172.21.0.16 (172.21.0.16) 56(84) bytes of data.
    64 bytes from 172.21.0.16: icmp_seq=1 ttl=63 time=0.348 ms
    
    --- 172.21.0.16 ping statistics ---
    1 packets transmitted, 1 received, 0% packet loss, time 0ms
    rtt min/avg/max/mdev = 0.348/0.348/0.348/0.000 ms
    
    

    可以看到两个容器network namespace实现了跨主机访问.

    5. 在master中增加一个network namespace

    ns-2.png
    [root@master vxlan]# cat add-another-ns.sh 
    ip link delete veth5 type veth
    ip netns delete ns2
    
    ip link add veth5 type veth peer name veth6
    ip link set veth5 up
    ip link set veth5 master docker0
    ip netns add ns2
    ip link set veth6 netns ns2
    
    ip netns exec ns2 ip addr add 10.0.1.3/24 dev veth6
    ip netns exec ns2 ip link set lo up
    ip netns exec ns2 ip link set veth6 up
    ip netns exec ns2 route add default gw 10.0.1.1
    
    
    [root@master vxlan]# ./add-another-ns.sh 
    Cannot find device "veth5"
    Cannot remove namespace file "/var/run/netns/ns2": No such file or directory
    [root@master vxlan]# ./add-another-ns.sh 
    [root@master vxlan]# ip netns ls
    ns2 (id: 1)
    ns1 (id: 0)
    [root@master vxlan]# 
    [root@master vxlan]# ip netns exec ns2 sh
    sh-4.2# ifconfig
    lo: flags=73<UP,LOOPBACK,RUNNING>  mtu 65536
            inet 127.0.0.1  netmask 255.0.0.0
            inet6 ::1  prefixlen 128  scopeid 0x10<host>
            loop  txqueuelen 1000  (Local Loopback)
            RX packets 0  bytes 0 (0.0 B)
            RX errors 0  dropped 0  overruns 0  frame 0
            TX packets 0  bytes 0 (0.0 B)
            TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
    
    veth6: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
            inet 10.0.1.3  netmask 255.255.255.0  broadcast 0.0.0.0
            inet6 fe80::5ca9:72ff:fe81:24d3  prefixlen 64  scopeid 0x20<link>
            ether 5e:a9:72:81:24:d3  txqueuelen 1000  (Ethernet)
            RX packets 8  bytes 648 (648.0 B)
            RX errors 0  dropped 0  overruns 0  frame 0
            TX packets 8  bytes 648 (648.0 B)
            TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
    
    sh-4.2# route -n
    Kernel IP routing table
    Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
    0.0.0.0         10.0.1.1        0.0.0.0         UG    0      0        0 veth6
    10.0.1.0        0.0.0.0         255.255.255.0   U     0      0        0 veth6
    ===> 访问本机ns1
    sh-4.2# ping -c 1 10.0.1.2
    PING 10.0.1.2 (10.0.1.2) 56(84) bytes of data.
    64 bytes from 10.0.1.2: icmp_seq=1 ttl=64 time=0.071 ms
    
    --- 10.0.1.2 ping statistics ---
    1 packets transmitted, 1 received, 0% packet loss, time 0ms
    rtt min/avg/max/mdev = 0.071/0.071/0.071/0.000 ms
    
    ===> 访问本机docker0
    sh-4.2# ping -c 1 10.0.1.1
    PING 10.0.1.1 (10.0.1.1) 56(84) bytes of data.
    64 bytes from 10.0.1.1: icmp_seq=1 ttl=64 time=0.067 ms
    
    --- 10.0.1.1 ping statistics ---
    1 packets transmitted, 1 received, 0% packet loss, time 0ms
    rtt min/avg/max/mdev = 0.067/0.067/0.067/0.000 ms
    
    ===> 访问本机vxlan.1
    sh-4.2# ping -c 1 10.0.1.250
    PING 10.0.1.250 (10.0.1.250) 56(84) bytes of data.
    64 bytes from 10.0.1.250: icmp_seq=1 ttl=64 time=0.066 ms
    
    --- 10.0.1.250 ping statistics ---
    1 packets transmitted, 1 received, 0% packet loss, time 0ms
    rtt min/avg/max/mdev = 0.066/0.066/0.066/0.000 ms
    
    ===> 访问本机
    sh-4.2# ping -c 1 172.21.0.16
    PING 172.21.0.16 (172.21.0.16) 56(84) bytes of data.
    64 bytes from 172.21.0.16: icmp_seq=1 ttl=64 time=0.044 ms
    
    --- 172.21.0.16 ping statistics ---
    1 packets transmitted, 1 received, 0% packet loss, time 0ms
    rtt min/avg/max/mdev = 0.044/0.044/0.044/0.000 ms
    
    ===> 访问互联网
    sh-4.2# ping -c 1 www.baidu.com
    PING www.a.shifen.com (220.181.38.149) 56(84) bytes of data.
    64 bytes from 220.181.38.149 (220.181.38.149): icmp_seq=1 ttl=249 time=6.13 ms
    
    --- www.a.shifen.com ping statistics ---
    1 packets transmitted, 1 received, 0% packet loss, time 0ms
    rtt min/avg/max/mdev = 6.132/6.132/6.132/0.000 ms
    sh-4.2# exit
    exit
    [root@master vxlan]# 
    

    可以看到主机内部的容器(network nameapce)已经可以相互之间访问.

    相关文章

      网友评论

          本文标题:[docker 网络][flannel] 背后操作

          本文链接:https://www.haomeiwen.com/subject/wpdhbctx.html