美文网首页k8s那点事儿Docker容器k8s入门
[k8s源码分析][kube-scheduler]schedul

[k8s源码分析][kube-scheduler]schedul

作者: nicktming | 来源:发表于2019-10-13 10:14 被阅读0次

    1. 前言

    转载请说明原文出处, 尊重他人劳动成果!

    本文将分析调度器中的优选方法, 主要涉及pkg/scheduler/algorithm/priorities下面的一些文件和pkg/scheduler/algorithm/type.go
    源码位置: https://github.com/nicktming/kubernetes
    分支: tming-v1.13 (基于v1.13版本)

    2. 优选方法定义

    type PriorityMetadataProducer func(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo) interface{}
    
    // 新版本
    type PriorityMapFunction func(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error)
    type PriorityReduceFunction func(pod *v1.Pod, meta interface{}, nodeNameToInfo map[string]*schedulercache.NodeInfo, result schedulerapi.HostPriorityList) error
    
    // 旧版本
    type PriorityFunction func(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error)
    
    // HostPriority represents the priority of scheduling to a particular host, higher priority is better.
    type HostPriority struct {
        // Name of the host
        Host string
        // Score associated with the host
        Score int
    }
    
    // HostPriorityList declares a []HostPriority type.
    type HostPriorityList []HostPriority
    
    func (h HostPriorityList) Len() int {
        return len(h)
    }
    
    func (h HostPriorityList) Less(i, j int) bool {
        if h[i].Score == h[j].Score {
            return h[i].Host < h[j].Host
        }
        return h[i].Score < h[j].Score
    }
    
    func (h HostPriorityList) Swap(i, j int) {
        h[i], h[j] = h[j], h[i]
    }
    

    可以看到优选方法会返回一个HostPriority, 代表了该节点在此优选方法中获得了多少分.

    3. 优选方法

    3.1 EqualPriority

    先看一下最简单的EqualPriority. 就是所有节点一视同仁全部都给1分.

    // pkg/scheduler/core/generic_scheduler.go
    
    func EqualPriorityMap(_ *v1.Pod, _ interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
        node := nodeInfo.Node()
        if node == nil {
            return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
        }
        return schedulerapi.HostPriority{
            Host:  node.Name,
            Score: 1,
        }, nil
    }
    

    3.2 resource_allocation

    ResourceAllocationPriority定义了优选名称Name和打分策略scorer. 然后PriorityMap方法中会调用scorer方法进行打分. 其中就包括了三个优选方法(MostRequestedPriority, LeastRequestedPriorityBalancedResourceAllocation)都是属于ResourceAllocationPriority

    // pkg/scheduler/algorithm/priorities/resource_allocation.go
    
    // 与cpu和memory相关的算分方法
    // name为其名字 scorer为算分方法 会在PriorityMap中调用
    type ResourceAllocationPriority struct {
        Name   string
        scorer func(requested, allocable *schedulercache.Resource, includeVolumes bool, requestedVolumes int, allocatableVolumes int) int64
    }
    
    // PriorityMap priorities nodes according to the resource allocations on the node.
    // It will use `scorer` function to calculate the score.
    
    // PriorityMap会根据节点上的cpu和memory分配情况进行优选
    // 具体打分策略调用scorer进行计算
    func (r *ResourceAllocationPriority) PriorityMap(
        pod *v1.Pod,
        meta interface{},
        nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
        node := nodeInfo.Node()
        if node == nil {
            return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
        }
        allocatable := nodeInfo.AllocatableResource()
    
        var requested schedulercache.Resource
        if priorityMeta, ok := meta.(*priorityMetadata); ok {
            requested = *priorityMeta.nonZeroRequest
        } else {
            // We couldn't parse metadata - fallback to computing it.
            requested = *getNonZeroRequests(pod)
        }
    
        requested.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
        requested.Memory += nodeInfo.NonZeroRequest().Memory
        var score int64
        // Check if the pod has volumes and this could be added to scorer function for balanced resource allocation.
        if len(pod.Spec.Volumes) >= 0 && utilfeature.DefaultFeatureGate.Enabled(features.BalanceAttachedNodeVolumes) && nodeInfo.TransientInfo != nil {
            score = r.scorer(&requested, &allocatable, true, nodeInfo.TransientInfo.TransNodeInfo.RequestedVolumes, nodeInfo.TransientInfo.TransNodeInfo.AllocatableVolumesCount)
        } else {
            score = r.scorer(&requested, &allocatable, false, 0, 0)
        }
    
        if klog.V(10) {
            if len(pod.Spec.Volumes) >= 0 && utilfeature.DefaultFeatureGate.Enabled(features.BalanceAttachedNodeVolumes) && nodeInfo.TransientInfo != nil {
                klog.Infof(
                    "%v -> %v: %v, capacity %d millicores %d memory bytes, %d volumes, total request %d millicores %d memory bytes %d volumes, score %d",
                    pod.Name, node.Name, r.Name,
                    allocatable.MilliCPU, allocatable.Memory, nodeInfo.TransientInfo.TransNodeInfo.AllocatableVolumesCount,
                    requested.MilliCPU, requested.Memory,
                    nodeInfo.TransientInfo.TransNodeInfo.RequestedVolumes,
                    score,
                )
            } else {
                klog.Infof(
                    "%v -> %v: %v, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d",
                    pod.Name, node.Name, r.Name,
                    allocatable.MilliCPU, allocatable.Memory,
                    requested.MilliCPU, requested.Memory,
                    score,
                )
            }
        }
    
        return schedulerapi.HostPriority{
            Host:  node.Name,
            Score: int(score),
        }, nil
    }
    
    func getNonZeroRequests(pod *v1.Pod) *schedulercache.Resource {
        result := &schedulercache.Resource{}
        for i := range pod.Spec.Containers {
            container := &pod.Spec.Containers[i]
            cpu, memory := priorityutil.GetNonzeroRequests(&container.Resources.Requests)
            result.MilliCPU += cpu
            result.Memory += memory
        }
        return result
    }
    
    3.2.1 LeastRequestedPriority

    从注册方法(factory.RegisterPriorityFunction2("LeastRequestedPriority", priorities.LeastRequestedPriorityMap, nil, 1),)中可以看到优选方法LeastRequestedPriority使用的Map方法为LeastRequestedPriorityMap, 也就是resource_allocation.go中的PriorityMap, 然后在PriorityMap方法调用LeastRequestedPriority自己的算分方法leastResourceScorer.

    从字面上看, request越少得分越多, 比较倾向于让pod尽量分到空闲的机器.

    // pkg/scheduler/algorithm/priorities/least_requested.go
    
    var (
        leastResourcePriority = &ResourceAllocationPriority{"LeastResourceAllocation", leastResourceScorer}
      // defaults.go 注册factory.RegisterPriorityFunction2("LeastRequestedPriority", priorities.LeastRequestedPriorityMap, nil, 1),
            // 就是调用resource_allocation.go中PriorityMap方法
        LeastRequestedPriorityMap = leastResourcePriority.PriorityMap
    )
    func leastResourceScorer(requested, allocable *schedulercache.Resource, includeVolumes bool, requestedVolumes int, allocatableVolumes int) int64 {
        return (leastRequestedScore(requested.MilliCPU, allocable.MilliCPU) +
            leastRequestedScore(requested.Memory, allocable.Memory)) / 2
    }
    func leastRequestedScore(requested, capacity int64) int64 {
        if capacity == 0 {
            return 0
        }
        if requested > capacity {
            return 0
        }
        return ((capacity - requested) * int64(schedulerapi.MaxPriority)) / capacity
    }
    
    3.2.2 MostRequestedPriority

    MostRequestedPriorityLeastRequestedPriority逻辑一样, 只是用自己的算分规则mostResourceScorer.

    从字面上看, request越多得分越多, 比较倾向于尽量压满一台机器, 避免造成过多碎片化.

    // pkg/scheduler/algorithm/priorities/most_requested.go
    
    var (
        mostResourcePriority = &ResourceAllocationPriority{"MostResourceAllocation", mostResourceScorer}
        MostRequestedPriorityMap = mostResourcePriority.PriorityMap
    )
    
    func mostResourceScorer(requested, allocable *schedulercache.Resource, includeVolumes bool, requestedVolumes int, allocatableVolumes int) int64 {
        return (mostRequestedScore(requested.MilliCPU, allocable.MilliCPU) +
            mostRequestedScore(requested.Memory, allocable.Memory)) / 2
    }
    func mostRequestedScore(requested, capacity int64) int64 {
        if capacity == 0 {
            return 0
        }
        if requested > capacity {
            return 0
        }
    
        return (requested * schedulerapi.MaxPriority) / capacity
    }
    
    3.2.3 例子

    这里用个例子来理解一下LeastRequestedPriorityMostRequestedPriority的算分规则.

    这里有两个节点分别为Machine1Machine2 并且Machine1中已经有了一个pod1, Machine2中已经有了一个pod2. 信息如下

    Machine1:
    milliCPU: 10000  Memory: 20000
    
    Machine2:
    milliCPU: 10000 Memory: 20000
    
    pod1:
    request.cpu: 3000
    request.mem: 0
    
    pod2:
    request.cpu: 3000
    request.mem: 5000
    

    现在有一个pod3请求cpu=3000, memory=5000, 可以来看看分别用LeastRequestedPriorityMostRequestedPriority可以得分多少?

    按照LeastRequestedPriority的算法:

    Machine1 (0~10)
    CPU Score: ((10000 - 6000) *10) / 10000 = 4
    Memory Score: ((20000 - 5000) *10) / 20000 = 7.5
    Machine1 Score: (4 + 7.5) / 2 = 5
    
    Machine2 scores on 0-10 scale
    CPU Score: ((10000 - 6000) *10) / 10000 = 4
    Memory Score: ((20000 - 10000) *10) / 20000 = 5
    Machine2 Score: (4 + 5) / 2 = 4
    

    按照MostRequestedPriority的算法:

    Machine1 (0~10)
    CPU Score: (6000 *10) / 10000 = 6
    Memory Score: ((5000 *10) / 20000 = 2.5
    Machine1 Score: (6 + 2.5) / 2 = 4
    
    Machine2 scores on 0-10 scale
    CPU Score: (6000 *10) / 10000 = 6
    Memory Score: (10000 *10) / 20000 = 5
    Machine2 Score: (6 + 5) / 2 = 5
    

    4. 总结

    简单介绍了几个常见的优选方法EqualPriority, LeastRequestedPriorityMostRequestedPriority. 主要是为了能理解优选方法是如何工作的.

    相关文章

      网友评论

        本文标题:[k8s源码分析][kube-scheduler]schedul

        本文链接:https://www.haomeiwen.com/subject/tjncmctx.html