github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/orchestrator/constraintenforcer/constraint_enforcer.go (about) 1 package constraintenforcer 2 3 import ( 4 "time" 5 6 "github.com/docker/swarmkit/api" 7 "github.com/docker/swarmkit/api/genericresource" 8 "github.com/docker/swarmkit/log" 9 "github.com/docker/swarmkit/manager/constraint" 10 "github.com/docker/swarmkit/manager/state" 11 "github.com/docker/swarmkit/manager/state/store" 12 "github.com/docker/swarmkit/protobuf/ptypes" 13 ) 14 15 // ConstraintEnforcer watches for updates to nodes and shuts down tasks that no 16 // longer satisfy scheduling constraints or resource limits. 17 type ConstraintEnforcer struct { 18 store *store.MemoryStore 19 stopChan chan struct{} 20 doneChan chan struct{} 21 } 22 23 // New creates a new ConstraintEnforcer. 24 func New(store *store.MemoryStore) *ConstraintEnforcer { 25 return &ConstraintEnforcer{ 26 store: store, 27 stopChan: make(chan struct{}), 28 doneChan: make(chan struct{}), 29 } 30 } 31 32 // Run is the ConstraintEnforcer's main loop. 33 func (ce *ConstraintEnforcer) Run() { 34 defer close(ce.doneChan) 35 36 watcher, cancelWatch := state.Watch(ce.store.WatchQueue(), api.EventUpdateNode{}) 37 defer cancelWatch() 38 39 var ( 40 nodes []*api.Node 41 err error 42 ) 43 ce.store.View(func(readTx store.ReadTx) { 44 nodes, err = store.FindNodes(readTx, store.All) 45 }) 46 if err != nil { 47 log.L.WithError(err).Error("failed to check nodes for noncompliant tasks") 48 } else { 49 for _, node := range nodes { 50 ce.rejectNoncompliantTasks(node) 51 } 52 } 53 54 for { 55 select { 56 case event := <-watcher: 57 node := event.(api.EventUpdateNode).Node 58 ce.rejectNoncompliantTasks(node) 59 case <-ce.stopChan: 60 return 61 } 62 } 63 } 64 65 func (ce *ConstraintEnforcer) rejectNoncompliantTasks(node *api.Node) { 66 // If the availability is "drain", the orchestrator will 67 // shut down all tasks. 68 // If the availability is "pause", we shouldn't touch 69 // the tasks on this node. 70 if node.Spec.Availability != api.NodeAvailabilityActive { 71 return 72 } 73 74 var ( 75 tasks []*api.Task 76 err error 77 ) 78 79 services := map[string]*api.Service{} 80 ce.store.View(func(tx store.ReadTx) { 81 tasks, err = store.FindTasks(tx, store.ByNodeID(node.ID)) 82 if err != nil { 83 return 84 } 85 86 // Deduplicate service IDs using the services map. It's okay for the 87 // values to be nil for now, we will look them up from the store next. 88 for _, task := range tasks { 89 services[task.ServiceID] = nil 90 } 91 92 for serviceID := range services { 93 services[serviceID] = store.GetService(tx, serviceID) 94 } 95 }) 96 97 if err != nil { 98 log.L.WithError(err).Errorf("failed to list tasks for node ID %s", node.ID) 99 } 100 101 available := &api.Resources{} 102 var fakeStore []*api.GenericResource 103 104 if node.Description != nil && node.Description.Resources != nil { 105 available = node.Description.Resources.Copy() 106 } 107 108 removeTasks := make(map[string]*api.Task) 109 110 // TODO(aaronl): The set of tasks removed will be 111 // nondeterministic because it depends on the order of 112 // the slice returned from FindTasks. We could do 113 // a separate pass over the tasks for each type of 114 // resource, and sort by the size of the reservation 115 // to remove the most resource-intensive tasks. 116 loop: 117 for _, t := range tasks { 118 if t.DesiredState < api.TaskStateAssigned || t.DesiredState > api.TaskStateCompleted { 119 continue 120 } 121 122 // Ensure that the node still satisfies placement constraints. 123 // NOTE: If the task is associacted with a service then we must use the 124 // constraints from the current service spec rather than the 125 // constraints from the task spec because they may be outdated. This 126 // will happen if the service was previously updated in a way which 127 // only changes the placement constraints and the node matched the 128 // placement constraints both before and after that update. In the case 129 // of such updates, the tasks are not considered "dirty" and are not 130 // restarted but it will mean that the task spec's placement 131 // constraints are outdated. Consider this example: 132 // - A service is created with no constraints and a task is scheduled 133 // to a node. 134 // - The node is updated to add a label, this doesn't affect the task 135 // on that node because it has no constraints. 136 // - The service is updated to add a node label constraint which 137 // matches the label which was just added to the node. The updater 138 // does not shut down the task because the only the constraints have 139 // changed and the node still matches the updated constraints. 140 // - The node is updated to remove the node label. The node no longer 141 // satisfies the placement constraints of the service, so the task 142 // should be shutdown. However, the task's spec still has the 143 // original and outdated constraints (that are still satisfied by 144 // the node). If we used those original constraints then the task 145 // would incorrectly not be removed. This is why the constraints 146 // from the service spec should be used instead. 147 var placement *api.Placement 148 if service := services[t.ServiceID]; service != nil { 149 // This task is associated with a service, so we use the service's 150 // current placement constraints. 151 placement = service.Spec.Task.Placement 152 } else { 153 // This task is not associated with a service (or the service no 154 // longer exists), so we use the placement constraints from the 155 // original task spec. 156 placement = t.Spec.Placement 157 } 158 if placement != nil && len(placement.Constraints) > 0 { 159 constraints, _ := constraint.Parse(placement.Constraints) 160 if !constraint.NodeMatches(constraints, node) { 161 removeTasks[t.ID] = t 162 continue 163 } 164 } 165 166 // Ensure that the task assigned to the node 167 // still satisfies the resource limits. 168 if t.Spec.Resources != nil && t.Spec.Resources.Reservations != nil { 169 if t.Spec.Resources.Reservations.MemoryBytes > available.MemoryBytes { 170 removeTasks[t.ID] = t 171 continue 172 } 173 if t.Spec.Resources.Reservations.NanoCPUs > available.NanoCPUs { 174 removeTasks[t.ID] = t 175 continue 176 } 177 for _, ta := range t.AssignedGenericResources { 178 // Type change or no longer available 179 if genericresource.HasResource(ta, available.Generic) { 180 removeTasks[t.ID] = t 181 break loop 182 } 183 } 184 185 available.MemoryBytes -= t.Spec.Resources.Reservations.MemoryBytes 186 available.NanoCPUs -= t.Spec.Resources.Reservations.NanoCPUs 187 188 genericresource.ClaimResources(&available.Generic, 189 &fakeStore, t.AssignedGenericResources) 190 } 191 } 192 193 if len(removeTasks) != 0 { 194 err := ce.store.Batch(func(batch *store.Batch) error { 195 for _, t := range removeTasks { 196 err := batch.Update(func(tx store.Tx) error { 197 t = store.GetTask(tx, t.ID) 198 if t == nil || t.DesiredState > api.TaskStateCompleted { 199 return nil 200 } 201 202 // We set the observed state to 203 // REJECTED, rather than the desired 204 // state. Desired state is owned by the 205 // orchestrator, and setting it directly 206 // will bypass actions such as 207 // restarting the task on another node 208 // (if applicable). 209 t.Status.State = api.TaskStateRejected 210 t.Status.Message = "task rejected by constraint enforcer" 211 t.Status.Err = "assigned node no longer meets constraints" 212 t.Status.Timestamp = ptypes.MustTimestampProto(time.Now()) 213 return store.UpdateTask(tx, t) 214 }) 215 if err != nil { 216 log.L.WithError(err).Errorf("failed to shut down task %s", t.ID) 217 } 218 } 219 return nil 220 }) 221 222 if err != nil { 223 log.L.WithError(err).Errorf("failed to shut down tasks") 224 } 225 } 226 } 227 228 // Stop stops the ConstraintEnforcer and waits for the main loop to exit. 229 func (ce *ConstraintEnforcer) Stop() { 230 close(ce.stopChan) 231 <-ce.doneChan 232 }