terway是阿里云下的CNI实现。
github链接
terway 的设计 文档,详细的说明terway的实现原理,本文以该文档为指导,结合实际host和pod网络情况,分析下网络配置流程的代码。
cni入口
按照部署和调用方式划分,Terway包含daemon和binary两部分(../plugin/terway)。
binary部分就是CNI的入口了。
plugin/terway/cni.go
func main() {
skel.PluginMain(cmdAdd, cmdCheck, cmdDel, version.PluginSupports("0.3.0", "0.3.1", "0.4.0"), bv.BuildString("terway"))
}
main函数包含cmdAdd,cmdCheck... 是 cni的标准开局。我们关注 cmdAdd函数。
//plugin/terway/cni.go
func cmdAdd(args *skel.CmdArgs) error {
client, conn, err := getNetworkClient(ctx)
containerIPNet, gatewayIPSet, err = doCmdAdd(ctx, logger, client, cmdArgs)
//plugin/terway/cni_linux.go
func doCmdAdd(ctx context.Context, logger *logrus.Entry, client rpc.TerwayBackendClient, cmdArgs *cniCmdArgs) (containerIPNet *terwayTypes.IPNetSet, gatewayIPSet *terwayTypes.IPSet, err error) {
var conf, cniNetns, k8sConfig, args = cmdArgs.conf, cmdArgs.netNS, cmdArgs.k8sArgs, cmdArgs.inputArgs
allocResult, err := client.AllocIP(ctx, &rpc.AllocIPRequest{
Netns: args.Netns,
K8SPodName: string(k8sConfig.K8S_POD_NAME),
K8SPodNamespace: string(k8sConfig.K8S_POD_NAMESPACE),
K8SPodInfraContainerId: string(k8sConfig.K8S_POD_INFRA_CONTAINER_ID),
IfName: args.IfName,
})
err = datapath.NewPolicyRoute().Setup(setupCfg, cniNetns)
主要分为2大步骤:
1:client.AllocIP 请求daemon分配网络资源。(client.AllocIP)
2:根据daemon返回的网络资源配置网络 ( datapath.NewPolicyRoute().Setup )。
主要讲配置网络。
//plugin/datapath/policy_router_linux.go # Setup 函数
SetUp第二个参数是 netNS :命名空间
func (d *PolicyRoute) Setup(cfg *types.SetupConfig, netNS ns.NetNS) error {
vethCfg := &veth.Veth{
IfName: cfg.ContainerIfName,
PeerName: cfg.HostVETHName,
MTU: cfg.MTU,
}
//veth.Setup创建Veth对,一端在容器(ContainerIfName),一端在Host(HostVETHName),
err := veth.Setup(vethCfg, netNS)
if err != nil {
return err
}
//创建完成后 查询出 hostVETH,确保存在。
hostVETH, err := netlink.LinkByName(cfg.HostVETHName)
if err != nil {
return err
}
err = netNS.Do(func(_ ns.NetNS) error {
//容器端veth网络配置
contCfg := generateContCfgForPolicy(cfg, contLink, hostVETH.Attrs().HardwareAddr)
//配置网络
err = nic.Setup(contLink, contCfg)
})
//eni网络配置
eniCfg := generateENICfgForPolicy(cfg, eni, table)
//配置网络
err = nic.Setup(eni, eniCfg)
if err != nil {
return fmt.Errorf("setup eni config, %w", err)
}
//host端veth 网络配置
hostVETHCfg := generateHostPeerCfgForPolicy(cfg, hostVETH, table)
//配置网络
err = nic.Setup(hostVETH, hostVETHCfg)
}
上面分别生成不同的网络配置(容器端网络配置,eni网络配置,host端veth网络配置)
然后每个配置都会调用 nic.Setup 配置网络.区别是操作的目标和配置不同
func Setup(link netlink.Link, conf *Conf) error {
//设置link 的 ip,最终调用 netlink.AddrReplace
_, err = utils.EnsureAddr(link, addr)
//设置arp缓存,最终调用 netlink.NeighSet(neigh)
_, err = utils.EnsureNeigh(neigh)
//设置路由,最终调用 netlink.RouteReplace(route)
_, err = utils.EnsureRoute(route)
}
1:容器端网络配置
//link 是 ContainerIfName端的
func generateContCfgForPolicy(cfg *types.SetupConfig, link netlink.Link, mac net.HardwareAddr) *nic.Conf {
//POD
//Destination Gateway Genmask Flags Metric Ref Use Iface
//0.0.0.0 169.254.1.1 0.0.0.0 UG 0 0 0 eth0
//添加如注释的默认路由
routes = append(routes, &netlink.Route{
LinkIndex: link.Attrs().Index,
Scope: netlink.SCOPE_UNIVERSE,
Dst: defaultRoute,// 0.0.0.0/0
Gw: LinkIPNet.IP,//169, 254, 1, 1
Flags: int(netlink.FLAG_ONLINK),
})
//169.254.1.1 dev eth0 lladdr c6:15:62:c3:32:0a PERMANENT , 网关ip的对应的mac是 host端的mac
//Linux ARP 缓存
neighs = append(neighs, &netlink.Neigh{
LinkIndex: link.Attrs().Index,
IP: LinkIPNet.IP,//169, 254, 1, 1
HardwareAddr: mac, //mac = hostVETH.Attrs().HardwareAddr ,host端的mac
State: netlink.NUD_PERMANENT,
})
contCfg := &nic.Conf{
IfName: cfg.ContainerIfName,
MTU: cfg.MTU,
Addrs: utils.NewIPNetToMaxMask(cfg.ContainerIPNet),
Routes: routes,
Rules: rules,
Neighs: neighs,
SysCtl: sysctl,
}
容器端主要添加了路由和ARP缓存,(见注释给出了示例)
2:eni网络配置
//生成route tableId,id就是eni的link id +1000
table := utils.GetRouteTableID(eni.Attrs().Index)
eniCfg := generateENICfgForPolicy(cfg, eni, table){
//default via 192.168.175.253 dev eth2 onlink
routes = append(routes, &netlink.Route{
LinkIndex: link.Attrs().Index,
Scope: netlink.SCOPE_UNIVERSE,
Table: table,//1004
Dst: defaultRoute,//0.0.0.0/0
Gw: cfg.GatewayIP.IPv4, //default via 192.168.175.253 dev eth2 onlink ,eni的网关
Flags: int(netlink.FLAG_ONLINK),
})
}
eni主要添加了(1000+index)路由表,(见注释给出了示例)
3:host端veth网络配置
// add route to container
//192.168.170.254 0.0.0.0 255.255.255.255 UH 0 0 0 calif9f9e3e5e80
routes = append(routes, &netlink.Route{
LinkIndex: link.Attrs().Index, //host
Scope: netlink.SCOPE_LINK,
Dst: utils.NewIPNetWithMaxMask(cfg.ContainerIPNet.IPv4), //192.168.170.254/32 (255.255.255.255)
})
// 2. add host to container rule
//512: from all to 192.168.170.254 lookup main
toContainerRule := netlink.NewRule()
toContainerRule.Dst = v4
toContainerRule.Table = mainRouteTable
toContainerRule.Priority = toContainerPriority
//2048: from 192.168.170.254 iif calif9f9e3e5e80 lookup 1004
fromContainerRule := netlink.NewRule()
fromContainerRule.IifName = link.Attrs().Name
fromContainerRule.Src = v4
fromContainerRule.Table = table
fromContainerRule.Priority = fromContainerPriority
rules = append(rules, toContainerRule, fromContainerRule)
host端主要是添加到容器的路由,host到容器的rule,(见注释给出了示例)
网络配置如图
image.png附录
附录:pod 网络信息示例
# route
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
default gateway 0.0.0.0 UG 0 0 0 eth0
# route -n
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
0.0.0.0 169.254.1.1 0.0.0.0 UG 0 0 0 eth0
#ip neigh
169.254.1.1 dev eth0 lladdr c6:15:62:c3:32:0a PERMANENT
192.168.43.46 dev eth0 lladdr c6:15:62:c3:32:0a STALE
#ifconfig
eth0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet 192.168.170.254 netmask 255.255.255.255 broadcast 0.0.0.0
ether da:a5:24:73:d3:de txqueuelen 0 (Ethernet)
RX packets 11932 bytes 24480656 (23.3 MiB)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 11219 bytes 6789494 (6.4 MiB)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
lo: flags=73<UP,LOOPBACK,RUNNING> mtu 65536
inet 127.0.0.1 netmask 255.0.0.0
loop txqueuelen 1000 (Local Loopback)
RX packets 0 bytes 0 (0.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 0 bytes 0 (0.0 B)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
#cat /sys/class/net/eth0/iflink
625
# ip link
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
3: eth0@if625: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether da:a5:24:73:d3:de brd ff:ff:ff:ff:ff:ff link-netnsid 0
# ip rule show
0: from all lookup local
32766: from all lookup main
32767: from all lookup default
# ip link
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
3: eth0@if625: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether da:a5:24:73:d3:de brd ff:ff:ff:ff:ff:ff link-netnsid 0
[root@memall-nginx-76ccf5c975-cj6qg /]#
附录:host主机信息示例
ip:192.168.43.46
# ip link
625: calif9f9e3e5e80@if3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether c6:15:62:c3:32:0a brd ff:ff:ff:ff:ff:ff link-netnsid 4
#ifconfig
calif9f9e3e5e80: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet6 fe80::c415:62ff:fec3:320a prefixlen 64 scopeid 0x20<link>
ether c6:15:62:c3:32:0a txqueuelen 0 (Ethernet)
RX packets 0 bytes 0 (0.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 0 bytes 0 (0.0 B)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
docker0: flags=4099<UP,BROADCAST,MULTICAST> mtu 1500
inet 169.254.123.1 netmask 255.255.255.0 broadcast 169.254.123.255
ether 02:42:34:86:58:6b txqueuelen 0 (Ethernet)
RX packets 0 bytes 0 (0.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 0 bytes 0 (0.0 B)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
eth0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet 192.168.43.46 netmask 255.255.240.0 broadcast 192.168.47.255
inet6 fe80::216:3eff:fe18:53da prefixlen 64 scopeid 0x20<link>
ether 00:16:3e:18:53:da txqueuelen 1000 (Ethernet)
RX packets 186201329 bytes 113505422156 (105.7 GiB)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 240777442 bytes 202667357325 (188.7 GiB)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
eth1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet 192.168.43.46 netmask 255.255.255.255 broadcast 0.0.0.0
ether 00:16:3e:17:ae:b0 txqueuelen 1000 (Ethernet)
RX packets 606714847 bytes 461987239275 (430.2 GiB)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 399366100 bytes 604784954265 (563.2 GiB)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
eth2: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet 192.168.43.46 netmask 255.255.255.255 broadcast 0.0.0.0
ether 00:16:3e:0e:30:52 txqueuelen 1000 (Ethernet)
RX packets 157336675 bytes 59235357552 (55.1 GiB)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 99301837 bytes 66331261453 (61.7 GiB)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
# ip rule show
512: from all to 192.168.170.254 lookup main
......
2048: from 192.168.170.254 iif calif9f9e3e5e80 lookup 1004
#ip route list table 1004
default via 192.168.175.253 dev eth2 onlink
# route
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
default gateway 0.0.0.0 UG 1024 0 0 eth0
169.254.123.0 0.0.0.0 255.255.255.0 U 0 0 0 docker0
192.168.32.0 0.0.0.0 255.255.240.0 U 0 0 0 eth0
gateway 0.0.0.0 255.255.255.255 UH 1024 0 0 eth0
192.168.169.33 0.0.0.0 255.255.255.255 UH 0 0 0 calidf22c452212
192.168.169.97 0.0.0.0 255.255.255.255 UH 0 0 0 cali9d1bc1dae22
192.168.169.154 0.0.0.0 255.255.255.255 UH 0 0 0 cali64ae98c6602
192.168.170.19 0.0.0.0 255.255.255.255 UH 0 0 0 califfc6fb6b044
192.168.170.26 0.0.0.0 255.255.255.255 UH 0 0 0 calibba4974ea33
192.168.170.169 0.0.0.0 255.255.255.255 UH 0 0 0 calib7e2241b1b5
192.168.170.213 0.0.0.0 255.255.255.255 UH 0 0 0 caliddda34c6b3a
192.168.170.215 0.0.0.0 255.255.255.255 UH 0 0 0 caliec49c8e9a0a
192.168.170.227 0.0.0.0 255.255.255.255 UH 0 0 0 cali3fa7232d4b7
192.168.170.228 0.0.0.0 255.255.255.255 UH 0 0 0 cali68c99518d33
192.168.170.252 0.0.0.0 255.255.255.255 UH 0 0 0 cali6fb976e2433
192.168.170.253 0.0.0.0 255.255.255.255 UH 0 0 0 cali2d557118fe9
192.168.170.254 0.0.0.0 255.255.255.255 UH 0 0 0 calif9f9e3e5e80
192.168.170.255 0.0.0.0 255.255.255.255 UH 0 0 0 calic13d5fb7fb0
192.168.171.9 0.0.0.0 255.255.255.255 UH 0 0 0 cali04ac618d704
192.168.171.10 0.0.0.0 255.255.255.255 UH 0 0 0 cali9aabe1cf15f
# route -n
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
0.0.0.0 192.168.47.253 0.0.0.0 UG 1024 0 0 eth0
169.254.123.0 0.0.0.0 255.255.255.0 U 0 0 0 docker0
192.168.32.0 0.0.0.0 255.255.240.0 U 0 0 0 eth0
192.168.47.253 0.0.0.0 255.255.255.255 UH 1024 0 0 eth0
192.168.169.33 0.0.0.0 255.255.255.255 UH 0 0 0 calidf22c452212
192.168.169.97 0.0.0.0 255.255.255.255 UH 0 0 0 cali9d1bc1dae22
192.168.169.154 0.0.0.0 255.255.255.255 UH 0 0 0 cali64ae98c6602
192.168.170.19 0.0.0.0 255.255.255.255 UH 0 0 0 califfc6fb6b044
192.168.170.26 0.0.0.0 255.255.255.255 UH 0 0 0 calibba4974ea33
192.168.170.169 0.0.0.0 255.255.255.255 UH 0 0 0 calib7e2241b1b5
192.168.170.213 0.0.0.0 255.255.255.255 UH 0 0 0 caliddda34c6b3a
192.168.170.215 0.0.0.0 255.255.255.255 UH 0 0 0 caliec49c8e9a0a
192.168.170.227 0.0.0.0 255.255.255.255 UH 0 0 0 cali3fa7232d4b7
192.168.170.228 0.0.0.0 255.255.255.255 UH 0 0 0 cali68c99518d33
192.168.170.252 0.0.0.0 255.255.255.255 UH 0 0 0 cali6fb976e2433
192.168.170.253 0.0.0.0 255.255.255.255 UH 0 0 0 cali2d557118fe9
192.168.170.254 0.0.0.0 255.255.255.255 UH 0 0 0 calif9f9e3e5e80
192.168.170.255 0.0.0.0 255.255.255.255 UH 0 0 0 calic13d5fb7fb0
192.168.171.9 0.0.0.0 255.255.255.255 UH 0 0 0 cali04ac618d704
192.168.171.10 0.0.0.0 255.255.255.255 UH 0 0 0 cali9aabe1cf15f
# ip link
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000
link/ether 00:16:3e:18:53:da brd ff:ff:ff:ff:ff:ff
515: caliddda34c6b3a@if3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether 6a:fe:35:0a:ac:c9 brd ff:ff:ff:ff:ff:ff link-netnsid 12
3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000
link/ether 00:16:3e:17:ae:b0 brd ff:ff:ff:ff:ff:ff
4: eth2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000
link/ether 00:16:3e:0e:30:52 brd ff:ff:ff:ff:ff:ff
5: docker0: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc noqueue state DOWN mode DEFAULT group default
link/ether 02:42:34:86:58:6b brd ff:ff:ff:ff:ff:ff
6: dummy0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether 9a:62:6f:59:bf:30 brd ff:ff:ff:ff:ff:ff
7: kube-ipvs0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default
link/ether 5e:6f:9d:87:4c:4c brd ff:ff:ff:ff:ff:ff
8: cali64ae98c6602@if3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether 66:bf:92:98:3a:5e brd ff:ff:ff:ff:ff:ff link-netnsid 0
9: califfc6fb6b044@if3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether 72:7b:bd:d6:ec:25 brd ff:ff:ff:ff:ff:ff link-netnsid 1
18: calibba4974ea33@if3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether ae:13:b5:a6:5d:a8 brd ff:ff:ff:ff:ff:ff link-netnsid 10
22: caliec49c8e9a0a@if3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether 46:83:18:45:e7:a7 brd ff:ff:ff:ff:ff:ff link-netnsid 13
805: cali2d557118fe9@if2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether b2:7a:66:74:59:8b brd ff:ff:ff:ff:ff:ff link-netnsid 9
583: cali04ac618d704@if3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether 72:63:87:fc:22:2c brd ff:ff:ff:ff:ff:ff link-netnsid 15
598: cali68c99518d33@if3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether d6:84:de:fe:7c:ac brd ff:ff:ff:ff:ff:ff link-netnsid 17
625: calif9f9e3e5e80@if3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether c6:15:62:c3:32:0a brd ff:ff:ff:ff:ff:ff link-netnsid 4
626: cali9aabe1cf15f@if3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether 72:a7:7f:ab:ec:86 brd ff:ff:ff:ff:ff:ff link-netnsid 6
125: cali6fb976e2433@if3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether 8a:4e:b8:19:c2:fe brd ff:ff:ff:ff:ff:ff link-netnsid 16
638: calic13d5fb7fb0@if3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether 8e:25:cd:33:29:ba brd ff:ff:ff:ff:ff:ff link-netnsid 2
158: cali9d1bc1dae22@if3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether aa:a9:c9:12:a4:ff brd ff:ff:ff:ff:ff:ff link-netnsid 3
442: calib7e2241b1b5@if3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether c6:0a:b1:79:bf:0c brd ff:ff:ff:ff:ff:ff link-netnsid 7
217: cali3fa7232d4b7@if3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether 1e:1f:3d:a0:68:6b brd ff:ff:ff:ff:ff:ff link-netnsid 5
504: calidf22c452212@if3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether 3a:d2:f6:2f:dc:42 brd ff:ff:ff:ff:ff:ff link-netnsid 8
//https://help.aliyun.com/document_detail/56955.html?spm=5176.2020520101.help.dexternal.438f4df55Wcqjp
# curl http://100.100.100.200/latest/meta-data/network/interfaces/macs/00:16:3e:0e:30:52/gateway
192.168.175.253
image.png
网友评论