volcano.sh/volcano@v1.9.0/pkg/scheduler/util/predicate_helper.go (about) 1 package util 2 3 import ( 4 "context" 5 "fmt" 6 "strings" 7 "sync" 8 "sync/atomic" 9 10 "k8s.io/client-go/util/workqueue" 11 "k8s.io/klog/v2" 12 13 "volcano.sh/volcano/pkg/scheduler/api" 14 ) 15 16 type PredicateHelper interface { 17 PredicateNodes(task *api.TaskInfo, nodes []*api.NodeInfo, fn api.PredicateFn, enableErrorCache bool) ([]*api.NodeInfo, *api.FitErrors) 18 } 19 20 type predicateHelper struct { 21 taskPredicateErrorCache map[string]map[string]error 22 } 23 24 // PredicateNodes returns the specified number of nodes that fit a task 25 func (ph *predicateHelper) PredicateNodes(task *api.TaskInfo, nodes []*api.NodeInfo, fn api.PredicateFn, enableErrorCache bool) ([]*api.NodeInfo, *api.FitErrors) { 26 var errorLock sync.RWMutex 27 fe := api.NewFitErrors() 28 29 allNodes := len(nodes) 30 if allNodes == 0 { 31 return make([]*api.NodeInfo, 0), fe 32 } 33 numNodesToFind := CalculateNumOfFeasibleNodesToFind(int32(allNodes)) 34 35 //allocate enough space to avoid growing it 36 predicateNodes := make([]*api.NodeInfo, numNodesToFind) 37 38 numFoundNodes := int32(0) 39 processedNodes := int32(0) 40 41 taskGroupid := taskGroupID(task) 42 nodeErrorCache, taskFailedBefore := ph.taskPredicateErrorCache[taskGroupid] 43 if nodeErrorCache == nil { 44 nodeErrorCache = map[string]error{} 45 } 46 47 //create a context with cancellation 48 ctx, cancel := context.WithCancel(context.Background()) 49 50 checkNode := func(index int) { 51 // Check the nodes starting from where is left off in the previous scheduling cycle, 52 // to make sure all nodes have the same chance of being examined across pods. 53 node := nodes[(lastProcessedNodeIndex+index)%allNodes] 54 atomic.AddInt32(&processedNodes, 1) 55 klog.V(4).Infof("Considering Task <%v/%v> on node <%v>: <%v> vs. <%v>", 56 task.Namespace, task.Name, node.Name, task.Resreq, node.Idle) 57 58 // Check if the task had "predicate" failure before. 59 // And then check if the task failed to predict on this node before. 60 if enableErrorCache && taskFailedBefore { 61 errorLock.RLock() 62 errC, ok := nodeErrorCache[node.Name] 63 errorLock.RUnlock() 64 65 if ok { 66 errorLock.Lock() 67 fe.SetNodeError(node.Name, errC) 68 errorLock.Unlock() 69 return 70 } 71 } 72 73 // TODO (k82cn): Enable eCache for performance improvement. 74 if _, err := fn(task, node); err != nil { 75 klog.V(3).Infof("Predicates failed: %v", err) 76 errorLock.Lock() 77 nodeErrorCache[node.Name] = err 78 ph.taskPredicateErrorCache[taskGroupid] = nodeErrorCache 79 fe.SetNodeError(node.Name, err) 80 errorLock.Unlock() 81 return 82 } 83 84 //check if the number of found nodes is more than the numNodesTofind 85 length := atomic.AddInt32(&numFoundNodes, 1) 86 if length > numNodesToFind { 87 cancel() 88 atomic.AddInt32(&numFoundNodes, -1) 89 } else { 90 predicateNodes[length-1] = node 91 } 92 } 93 94 //workqueue.ParallelizeUntil(context.TODO(), 16, len(nodes), checkNode) 95 workqueue.ParallelizeUntil(ctx, 16, allNodes, checkNode) 96 97 //processedNodes := int(numFoundNodes) + len(filteredNodesStatuses) + len(failedPredicateMap) 98 lastProcessedNodeIndex = (lastProcessedNodeIndex + int(processedNodes)) % allNodes 99 predicateNodes = predicateNodes[:numFoundNodes] 100 return predicateNodes, fe 101 } 102 103 func taskGroupID(task *api.TaskInfo) string { 104 return fmt.Sprintf("%s/%s", task.Job, task.GetTaskSpecKey()) 105 } 106 107 func NewPredicateHelper() PredicateHelper { 108 return &predicateHelper{taskPredicateErrorCache: map[string]map[string]error{}} 109 } 110 111 type StatusSets []*api.Status 112 113 func (s StatusSets) ContainsUnschedulable() bool { 114 for _, status := range s { 115 if status == nil { 116 continue 117 } 118 if status.Code == api.Unschedulable { 119 return true 120 } 121 } 122 return false 123 } 124 125 func (s StatusSets) ContainsUnschedulableAndUnresolvable() bool { 126 for _, status := range s { 127 if status == nil { 128 continue 129 } 130 if status.Code == api.UnschedulableAndUnresolvable { 131 return true 132 } 133 } 134 return false 135 } 136 137 func (s StatusSets) ContainsErrorSkipOrWait() bool { 138 for _, status := range s { 139 if status == nil { 140 continue 141 } 142 if status.Code == api.Error || status.Code == api.Skip || status.Code == api.Wait { 143 return true 144 } 145 } 146 return false 147 } 148 149 // Message return the message generated from StatusSets 150 func (s StatusSets) Message() string { 151 if s == nil { 152 return "" 153 } 154 all := make([]string, 0, len(s)) 155 for _, status := range s { 156 if status.Reason == "" { 157 continue 158 } 159 all = append(all, status.Reason) 160 } 161 return strings.Join(all, ",") 162 } 163 164 // Reasons return the reasons list 165 func (s StatusSets) Reasons() []string { 166 if s == nil { 167 return nil 168 } 169 all := make([]string, 0, len(s)) 170 for _, status := range s { 171 if status.Reason == "" { 172 continue 173 } 174 all = append(all, status.Reason) 175 } 176 return all 177 }