美文网首页
Cni terway-ENI独占模式源码详解

Cni terway-ENI独占模式源码详解

作者: Teddy_b | 来源:发表于2024-02-22 10:49 被阅读0次

    Terway

    ENI独占模式

    源码分析

    func podNetworkType(daemonMode string, pod *corev1.Pod) string {
        switch daemonMode {
        case daemon.ModeENIMultiIP:
            return daemon.PodNetworkTypeENIMultiIP
        case daemon.ModeVPC:
            podAnnotation := pod.GetAnnotations()
            useENI := false
            if needEni, ok := podAnnotation[podNeedEni]; ok && (needEni != "" && needEni != ConditionFalse && needEni != "0") {
                useENI = true
            }
    
            for _, c := range pod.Spec.Containers {
                if _, ok := c.Resources.Requests[deviceplugin.ENIResName]; ok {
                    useENI = true
                    break
                }
            }
    
            if useENI {
                return daemon.PodNetworkTypeVPCENI
            }
            return daemon.PodNetworkTypeVPCIP
        case daemon.ModeENIOnly:
            return daemon.PodNetworkTypeVPCENI
        }
    
    }
    

    ENI独占模式时,对应的POD网络模式是VPC-ENI,此时的网络资源请求类型就不一样了

    switch pod.PodNetworkType {
    
        case daemon.PodNetworkTypeVPCENI:
            reply.IPType = rpc.IPType_TypeVPCENI
    
            else {
                req := &eni.LocalIPRequest{}
    
                resourceRequests = append(resourceRequests, req)
            }
    
        }
    

    对于VPC-ENI类型,可以看到此时的网络资源请求类型是LocalIPRequest

    func (l *Local) Allocate(ctx context.Context, cni *daemon.CNI, request ResourceRequest) (chan *AllocResp, []Trace) {
        
        expectV4 := 0
        expectV6 := 0
    
        if l.enableIPv4 {
            ipv4 := l.ipv4.PeekAvailable(cni.PodID, lo.IPv4)
            if ipv4 == nil && len(l.ipv4)+l.allocatingV4 >= l.cap {
                return nil, []Trace{{Condition: Full}}
            } else if ipv4 == nil {
                expectV4 = 1
            }
        }
    
        l.allocatingV4 += expectV4
    
        l.cond.Broadcast()
    
        respCh := make(chan *AllocResp)
    
        go l.allocWorker(ctx, cni, lo, respCh, func() {
                ...
        })
    
        return respCh, nil
    }
    

    LocalIPRequest这种类型分配IP的流程相对复杂一点,这里它会维护一个IP可用集合,分配IP的时候就是遍历这个集合,从中获取可用的IP

    所谓可用的IP就是集合中还没绑定POD的那些IP

    func (s Set) PeekAvailable(podID string, prefer netip.Addr) *IP {
        
        for _, v := range s {
            if  v.status == ipStatusValid  && v.podID == ""{
                return v
            }
        }
        return nil
    }
    

    如果集合中没有可用IP,它会通过信号量通知其它携程帮他分配IP,然后自己等待IP分配好了之后,再遍历集合去获取可用的IP

    func (l *Local) allocWorker(ctx context.Context, cni *daemon.CNI, request *LocalIPRequest, respCh chan *AllocResp, onErrLocked func()) {
    
        for {
            resp := &AllocResp{}
    
            var ip types.IPSet2
            if l.enableIPv4 {
                ipv4 = l.ipv4.PeekAvailable(cni.PodID, request.IPv4)
                if ipv4 == nil {
                    l.cond.Wait()
                    continue
                }
                ip.IPv4 = ipv4.ip
            }
    
            return
        }
    }
    

    这里没有可用IP时它会通过l.cond.Broadcast()去唤醒携程帮它分配IP,再其它携程帮它分配好IP之前它用过l.cond.Wait()将自己挂起,等待其它携程唤醒自己

    可见真正干活的是另外的携程

    func (l *Local) factoryAllocWorker(ctx context.Context) {
        l.cond.L.Lock()
    
        log := logf.FromContext(ctx)
        for {
    
            if l.allocatingV4 <= 0 && l.allocatingV6 <= 0 {
                l.cond.Wait()
                continue
            }
    
            // wait a small period
            l.cond.L.Unlock()
            time.Sleep(300 * time.Millisecond)
            l.cond.L.Lock()
    
            if l.eni == nil {
                // create eni
                v4Count := min(l.batchSize, max(l.allocatingV4, 1))
                v6Count := min(l.batchSize, l.allocatingV6)
    
                l.status = statusCreating
                l.cond.L.Unlock()
    
                err := l.rateLimitEni.Wait(ctx)
                
                eni, ipv4Set, ipv6Set, err := l.factory.CreateNetworkInterface(v4Count, v6Count, l.eniType)
                
                l.cond.L.Lock()
    
                l.eni = eni
    
                l.allocatingV4 -= v4Count
                l.allocatingV6 -= v6Count
    
                l.allocatingV4 = max(l.allocatingV4, 0)
                l.allocatingV6 = max(l.allocatingV6, 0)
    
                primary, err := netip.ParseAddr(eni.PrimaryIP.IPv4.String())
                if err == nil {
                    for _, v := range ipv4Set {
                        l.ipv4.Add(NewValidIP(v, netip.MustParseAddr(v.String()) == primary))
                    }
                }
    
                l.status = statusInUse
            } 
    
            l.cond.Broadcast()
        }
    }
    

    这个携程就是真正分配IP的了,再不需要分配IP的时候,即l.allocatingV4 <= 0,它会一直挂起,等待被需要分配IP的携程唤醒

    上述有了分配IP的需求进来了,它就会被唤醒干活了

    func (a *Aliyun) CreateNetworkInterface(ipv4, ipv6 int, eniType string) (*daemon.ENI, []netip.Addr, []netip.Addr, error) {
        ctx, cancel := context.WithTimeout(a.ctx, time.Second*60)
        defer cancel()
    
        // 1. create eni
        var eni *client.NetworkInterface
        var vswID string
    
        err := wait.ExponentialBackoffWithContext(a.ctx, backoff.Backoff(backoff.ENICreate), func(ctx context.Context) (bool, error) {
            vsw, innerErr := a.vsw.GetOne(ctx, a.openAPI, a.zoneID, a.vSwitchOptions)
            
            eni, innerErr = a.openAPI.CreateNetworkInterface(ctx, trunk, vswID, a.securityGroupIDs, a.resourceGroupID, ipv4, ipv6, a.eniTags)
            
            return true, nil
        })
    
        r := &daemon.ENI{
            ID:        eni.NetworkInterfaceID,
            MAC:       eni.MacAddress,
            VSwitchID: eni.VSwitchID,
            Type:      eni.Type,
        }
    
        r.PrimaryIP.SetIP(eni.PrivateIPAddress)
    
        v4Set, err := func() ([]netip.Addr, error) {
            var ips []netip.Addr
            for _, v := range eni.PrivateIPSets {
                addr, err := netip.ParseAddr(v.PrivateIpAddress)
                ips = append(ips, addr)
            }
            return ips, nil
        }()
    
    
        // 2. attach eni
        err = a.openAPI.AttachNetworkInterface(ctx, eni.NetworkInterfaceID, a.instanceID, "")
    
        // 3. wait metadata ready & update cidr
        err = validateIPInMetadata(ctx, v4Set, func() []netip.Addr {
            exists, err := metadata.GetIPv4ByMac(r.MAC)
            
            return exists
        })
    
    
        prefix, err := metadata.GetVSwitchCIDR(eni.MacAddress)
        r.VSwitchCIDR.SetIPNet(prefix.String())
    
        gw, err := metadata.GetENIGatewayAddr(eni.MacAddress)
        r.GatewayIP.SetIP(gw.String())
    
    
        return r, v4Set, v6Set, nil
    }
    

    这里主要就是和阿里云 云主机相关的一些交互了

    • 首先查询vswitch,vswitch id 就是当前云主机所在的vswitch,可以通过metadata获取到
    curl http://100.100.100.200/latest/meta-data/vswitch-id
    
    vsw-8vbddxzcxxxxxxp1evxd6r
    
    • 然后通过ECS客户端开通ENI,关联的vswitch就是上面的这个

    • 然后将ENI绑定到当前云主机,当前云主机有一个唯一的实例ID,也是通过metadata获取

    curl http://100.100.100.200/latest/meta-data/instance-id
    
    i-8vb4cxxxxxxxxxxxxxzahaxyv
    
    • 然后确保这个ENI已经绑定到了当前云主机上,并且IP也分配到了,也是通过metadata获取
    curl http://100.100.100.200/latest/meta-data/network/interfaces/macs/00:11:22:33:44:55/private-ipv4s
    
    192.168.128.15
    
    • 然后查询ENI所属的vswitch的CIDR,也是通过metadata获取
    curl http://100.100.100.200/latest/meta-data/network/interfaces/macs/00:11:22:33:44:55/vswitch-cidr-block
    
    192.168.128.0/24
    
    • 然后查询ENI的网关,也是通过metadata获取
    curl http://100.100.100.200/latest/meta-data/network/interfaces/macs/00:11:22:33:44:55/gateway
    
    192.168.128.253
    

    上述ENI准备好了之后,就会把对应的IP地址加入到集合里,然后唤醒需要分配IP的携程即可

    有了IP之后,就会转换为网络配置

    func (l *LocalIPResource) ToRPC() []*rpc.NetConf {
        cfg := &rpc.NetConf{
            BasicInfo: &rpc.BasicInfo{
                PodIP:       l.IP.ToRPC(),
                PodCIDR:     l.ENI.VSwitchCIDR.ToRPC(),
                GatewayIP:   l.ENI.GatewayIP.ToRPC(),
                ServiceCIDR: nil,
            },
            ENIInfo: &rpc.ENIInfo{
                MAC:       l.ENI.MAC,
                Trunk:     false,
                Vid:       0,
                GatewayIP: l.ENI.GatewayIP.ToRPC(),
            },
            Pod:          nil,
            IfName:       "",
            ExtraRoutes:  nil,
            DefaultRoute: true,
        }
    
        return []*rpc.NetConf{cfg}
    }
    

    然后补充Service CIDR,获取方式和前面VPV模式是一样的

    c.BasicInfo.ServiceCIDR = n.k8s.GetServiceCIDR().ToRPC()
    

    有了网络配置后,就可以开始配置网卡了,由于此时的ipType 对应的是VPC-ENI,所以对应的网卡配置类型为独占ENI

    func getDatePath(ipType rpc.IPType, vlanStripType types.VlanStripType, trunk bool) types.DataPath {
        switch ipType {
        case rpc.IPType_TypeVPCENI:
            return types.ExclusiveENI
        }
    }
    

    因为已经分配好了IP地址,所以这里就不需要IPAM插件了,直接使用分配好的IP地址即可

    switch setupCfg.DP {
            case types.ExclusiveENI:
                
                if setupCfg.ContainerIfName == args.IfName {
                    containerIPNet = setupCfg.ContainerIPNet
                    gatewayIPSet = setupCfg.GatewayIP
                }
    
                err = datapath.NewExclusiveENIDriver().Setup(setupCfg, cniNetns)
    

    最后再看下网卡配置过程

    func (r *ExclusiveENI) Setup(cfg *types.SetupConfig, netNS ns.NetNS) error {
        // 1. move link in
        nicLink, err := netlink.LinkByIndex(cfg.ENIIndex)
        
        hostNetNS, err := ns.GetCurrentNS()
        
        defer hostNetNS.Close()
    
        err = utils.LinkSetNsFd(nicLink, netNS)
    
        // 2. setup addr and default route
        err = netNS.Do(func(netNS ns.NetNS) error {
            // 2.1 setup addr
            contLink, err := netlink.LinkByName(nicLink.Attrs().Name)
    
            contCfg := generateContCfgForExclusiveENI(cfg, contLink)
            err = nic.Setup(contLink, contCfg)
    
            // for now we only create slave link for eth0
            if !cfg.DisableCreatePeer && cfg.ContainerIfName == "eth0" {
                err = veth.Setup(&veth.Veth{
                    IfName:   cfg.HostVETHName, // name for host ns side
                    PeerName: defaultVethForENI,
                }, hostNetNS)
    
                var mac net.HardwareAddr
                err = hostNetNS.Do(func(netNS ns.NetNS) error {
                    hostPeer, innerErr := netlink.LinkByName(cfg.HostVETHName)
                    mac = hostPeer.Attrs().HardwareAddr
                    return innerErr
                })
    
                veth1, err := netlink.LinkByName(defaultVethForENI)
    
                veth1Cfg := generateVeth1Cfg(cfg, veth1, mac)
                return nic.Setup(veth1, veth1Cfg)
            }
            return nil
        })
    
    
        hostPeer, err := netlink.LinkByName(cfg.HostVETHName)
    
        hostPeerCfg := generateHostSlaveCfg(cfg, hostPeer)
        err = nic.Setup(hostPeer, hostPeerCfg)
    
        return nil
    }
    

    容器内的网卡配置时, 首先直接将ENI设备移到容器命名空间内,可见这种模式下容器是直接分配的ENI网卡

    然后配置容器ENI网卡名称、设置ENI网卡的IP地址、默认路由

    func generateContCfgForExclusiveENI(cfg *types.SetupConfig, link netlink.Link) *nic.Conf {
        var addrs []*netlink.Addr
        var routes []*netlink.Route
        var rules []*netlink.Rule
        var sysctl map[string][]string
    
            else {
            addrs = utils.NewIPNetToMaxMask(cfg.ContainerIPNet)
        }
    
        if cfg.ContainerIPNet.IPv4 != nil {
            // add default route
            if cfg.DefaultRoute {
                routes = append(routes, &netlink.Route{
                    LinkIndex: link.Attrs().Index,
                    Scope:     netlink.SCOPE_UNIVERSE,
                    Dst:       "0.0.0.0/0",
                    Gw:        cfg.GatewayIP.IPv4,
                    Flags:     int(netlink.FLAG_ONLINK),
                })
            }
        }
    
        contCfg := &nic.Conf{
            IfName: cfg.ContainerIfName,
            MTU:    cfg.MTU,
            Addrs:  addrs,
            Routes: routes,
            Rules:  rules,
            SysCtl: sysctl,
        }
        return contCfg
    }
    

    设置ENI网卡名称为eth0、然后设置的IP地址就是ENI的IP地址、然后添加默认路由,注意这里默认路由的网关设备就是ENI的网关地址

    default via  192.168.128.253  dev eth0 onlink
    

    如此,ENI网卡就配置好了,但是还需要一个veth网卡

    err = veth.Setup(&veth.Veth{
                    IfName:   cfg.HostVETHName, // name for host ns side
                    PeerName: "veth1",
                }, hostNetNS)
    

    veth网卡在容器内的网卡名称就是veth1,在宿主机上的名称就是calixxxxxxxxxxx

    然后配置容器内veth网卡的名称、配置veth网卡的IP地址、配置veth网卡的默认路由、配置veth网卡的静态ARP

    func generateVeth1Cfg(cfg *types.SetupConfig, link netlink.Link, peerMAC net.HardwareAddr) *nic.Conf {
        var routes []*netlink.Route
        var neighs []*netlink.Neigh
        var sysctl map[string][]string
    
        if cfg.ContainerIPNet.IPv4 != nil {
            // 169.254.1.1 dev veth1
            routes = append(routes, &netlink.Route{
                LinkIndex: link.Attrs().Index,
                Scope:     netlink.SCOPE_LINK,
                Dst:       "169.254.1.1",
            })
    
            if cfg.ServiceCIDR != nil && cfg.ServiceCIDR.IPv4 != nil {
                routes = append(routes, &netlink.Route{
                    LinkIndex: link.Attrs().Index,
                    Dst:       "10.96.0.0/12",
                    Gw:        "169.254.1.1/32",
                    Flags:     int(netlink.FLAG_ONLINK),
                })
            }
            neighs = append(neighs, &netlink.Neigh{
                LinkIndex:    link.Attrs().Index,
                IP:           169.254.1.1,
                HardwareAddr: peerMAC,
                State:        netlink.NUD_PERMANENT,
            })
        }
    
        contCfg := &nic.Conf{
            IfName: "veth1",
            MTU:    cfg.MTU,
            Addrs:  "192.168.128.15/32",
            Routes: routes,
            Neighs: neighs,
            SysCtl: sysctl,
        }
        return contCfg
    }
    

    设置容器内veth网卡的名称为veth1

    veth1网卡的IP地址仍为ENI的IP地址

    然后是veth1默认路由

    169.254.1.1 dev veth1 scope link
    10.96.0.0/12 via 169.254.1.1 dev veth1 onlink
    

    然后是静态ARP,对应的MAC地址就是宿主机上calixxxxxxxxxx设备的MAC地址

    ? (169.254.1.1) at da:44:55:66:77:88 [ether] on eth0
    

    最后是宿主机上的veth网卡配置

    func generateHostSlaveCfg(cfg *types.SetupConfig, link netlink.Link) *nic.Conf {
        var addrs []*netlink.Addr
        var routes []*netlink.Route
    
        if cfg.ContainerIPNet.IPv4 != nil {
            addrs = append(addrs, &netlink.Addr{
                IPNet: "169.254.1.1/32",
            })
    
            // add route to container
            routes = append(routes, &netlink.Route{
                LinkIndex: link.Attrs().Index,
                Scope:     netlink.SCOPE_LINK,
                Dst:       "192.168.128.15/32",
            })
        }
        contCfg := &nic.Conf{
            IfName: cfg.HostVETHName,
            MTU:    cfg.MTU,
            Addrs:  addrs,
            Routes: routes,
            SysCtl: sysctl,
        }
    
        return contCfg
    }
    

    首先设置宿主机上的veth网卡名称为calixxxxxxxxxxx

    设置calixxxxxxxxxxxxxx网卡IP地址为169.254.1.1/32

    设置calixxxxxxxxxxxxxx网卡的默认路由

    192.168.128.15/32 dev calixxxxxxxxxxxxxx scope link
    

    可以看到这种模式下,容器内是有两个网卡的,其中ENI网卡直连的是VPC;另外的veth网卡是处理Service请求的

    参考

    相关文章

      网友评论

          本文标题:Cni terway-ENI独占模式源码详解

          本文链接:https://www.haomeiwen.com/subject/jyowadtx.html