上一篇分析了kubelet如何选取挂载哪些gpu,那么对应的就会有删除gpu使用记录的操作。
每次分配的时候都是比较已分配和所有健康的设备
devicesInUse := m.allocatedDevices[resource]
available := m.healthyDevices[resource].Difference(devicesInUse)
删除已分配设备,需要先删除Pod,而Pod和Dev的关系是通过podDevices
维护的,所以删除已分配设备不需要管allocatedDevices
,而是关注podDevices
// ManagerImpl is the structure in charge of managing Device Plugins.
type ManagerImpl struct {
// healthyDevices contains all of the registered healthy resourceNames and their exported device IDs.
healthyDevices map[string]sets.String
// allocatedDevices contains allocated deviceIds, keyed by resourceName.
allocatedDevices map[string]sets.String
// podDevices contains pod to allocated device mapping.
podDevices podDevices
}
func (pdev podDevices) delete(pods []string) {
for _, uid := range pods {
delete(pdev, uid)
}
}
// updateAllocatedDevices gets a list of active pods and then frees any Devices that are bound to
// terminated pods. Returns error on failure.
func (m *ManagerImpl) updateAllocatedDevices(activePods []*v1.Pod) {
activePodUids := sets.NewString()
for _, pod := range activePods {
activePodUids.Insert(string(pod.UID))
}
allocatedPodUids := m.podDevices.pods()
podsToBeRemoved := allocatedPodUids.Difference(activePodUids)
klog.V(3).Infof("pods to be removed: %v", podsToBeRemoved.List())
m.podDevices.delete(podsToBeRemoved.List())
// Regenerated allocatedDevices after we update pod allocation information.
m.allocatedDevices = m.podDevices.devices()
}
以podDevices
为切入点,找到对应delete
方法,再找到updateAllocatedDevices
,这里一切就明了了。
- 比较active和allocate pod,找到removed pod
- 删除removed pod devices
- 根据pod devices更新allocated devices
网友评论