github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/orchestrator/global/global.go (about) 1 package global 2 3 import ( 4 "context" 5 6 "github.com/docker/swarmkit/api" 7 "github.com/docker/swarmkit/log" 8 "github.com/docker/swarmkit/manager/constraint" 9 "github.com/docker/swarmkit/manager/orchestrator" 10 "github.com/docker/swarmkit/manager/orchestrator/restart" 11 "github.com/docker/swarmkit/manager/orchestrator/taskinit" 12 "github.com/docker/swarmkit/manager/orchestrator/update" 13 "github.com/docker/swarmkit/manager/state/store" 14 ) 15 16 type globalService struct { 17 *api.Service 18 19 // Compiled constraints 20 constraints []constraint.Constraint 21 } 22 23 // Orchestrator runs a reconciliation loop to create and destroy tasks as 24 // necessary for global services. 25 type Orchestrator struct { 26 store *store.MemoryStore 27 // nodes is the set of non-drained nodes in the cluster, indexed by node ID 28 nodes map[string]*api.Node 29 // globalServices has all the global services in the cluster, indexed by ServiceID 30 globalServices map[string]globalService 31 restartTasks map[string]struct{} 32 33 // stopChan signals to the state machine to stop running. 34 stopChan chan struct{} 35 // doneChan is closed when the state machine terminates. 36 doneChan chan struct{} 37 38 updater *update.Supervisor 39 restarts *restart.Supervisor 40 41 cluster *api.Cluster // local instance of the cluster 42 } 43 44 // NewGlobalOrchestrator creates a new global Orchestrator 45 func NewGlobalOrchestrator(store *store.MemoryStore) *Orchestrator { 46 restartSupervisor := restart.NewSupervisor(store) 47 updater := update.NewSupervisor(store, restartSupervisor) 48 return &Orchestrator{ 49 store: store, 50 nodes: make(map[string]*api.Node), 51 globalServices: make(map[string]globalService), 52 stopChan: make(chan struct{}), 53 doneChan: make(chan struct{}), 54 updater: updater, 55 restarts: restartSupervisor, 56 restartTasks: make(map[string]struct{}), 57 } 58 } 59 60 func (g *Orchestrator) initTasks(ctx context.Context, readTx store.ReadTx) error { 61 return taskinit.CheckTasks(ctx, g.store, readTx, g, g.restarts) 62 } 63 64 // Run contains the global orchestrator event loop 65 func (g *Orchestrator) Run(ctx context.Context) error { 66 defer close(g.doneChan) 67 68 // Watch changes to services and tasks 69 queue := g.store.WatchQueue() 70 watcher, cancel := queue.Watch() 71 defer cancel() 72 73 // lookup the cluster 74 var err error 75 g.store.View(func(readTx store.ReadTx) { 76 var clusters []*api.Cluster 77 clusters, err = store.FindClusters(readTx, store.ByName(store.DefaultClusterName)) 78 79 if len(clusters) != 1 { 80 return // just pick up the cluster when it is created. 81 } 82 g.cluster = clusters[0] 83 }) 84 if err != nil { 85 return err 86 } 87 88 // Get list of nodes 89 var nodes []*api.Node 90 g.store.View(func(readTx store.ReadTx) { 91 nodes, err = store.FindNodes(readTx, store.All) 92 }) 93 if err != nil { 94 return err 95 } 96 for _, n := range nodes { 97 g.updateNode(n) 98 } 99 100 // Lookup global services 101 var existingServices []*api.Service 102 g.store.View(func(readTx store.ReadTx) { 103 existingServices, err = store.FindServices(readTx, store.All) 104 }) 105 if err != nil { 106 return err 107 } 108 109 var reconcileServiceIDs []string 110 for _, s := range existingServices { 111 if orchestrator.IsGlobalService(s) { 112 g.updateService(s) 113 reconcileServiceIDs = append(reconcileServiceIDs, s.ID) 114 } 115 } 116 117 // fix tasks in store before reconciliation loop 118 g.store.View(func(readTx store.ReadTx) { 119 err = g.initTasks(ctx, readTx) 120 }) 121 if err != nil { 122 return err 123 } 124 125 g.tickTasks(ctx) 126 g.reconcileServices(ctx, reconcileServiceIDs) 127 128 for { 129 select { 130 case event := <-watcher: 131 // TODO(stevvooe): Use ctx to limit running time of operation. 132 switch v := event.(type) { 133 case api.EventUpdateCluster: 134 g.cluster = v.Cluster 135 case api.EventCreateService: 136 if !orchestrator.IsGlobalService(v.Service) { 137 continue 138 } 139 g.updateService(v.Service) 140 g.reconcileServices(ctx, []string{v.Service.ID}) 141 case api.EventUpdateService: 142 if !orchestrator.IsGlobalService(v.Service) { 143 continue 144 } 145 g.updateService(v.Service) 146 g.reconcileServices(ctx, []string{v.Service.ID}) 147 case api.EventDeleteService: 148 if !orchestrator.IsGlobalService(v.Service) { 149 continue 150 } 151 orchestrator.SetServiceTasksRemove(ctx, g.store, v.Service) 152 // delete the service from service map 153 delete(g.globalServices, v.Service.ID) 154 g.restarts.ClearServiceHistory(v.Service.ID) 155 case api.EventCreateNode: 156 g.updateNode(v.Node) 157 g.reconcileOneNode(ctx, v.Node) 158 case api.EventUpdateNode: 159 g.updateNode(v.Node) 160 g.reconcileOneNode(ctx, v.Node) 161 case api.EventDeleteNode: 162 g.foreachTaskFromNode(ctx, v.Node, g.deleteTask) 163 delete(g.nodes, v.Node.ID) 164 case api.EventUpdateTask: 165 g.handleTaskChange(ctx, v.Task) 166 } 167 case <-g.stopChan: 168 return nil 169 } 170 g.tickTasks(ctx) 171 } 172 } 173 174 // FixTask validates a task with the current cluster settings, and takes 175 // action to make it conformant to node state and service constraint 176 // it's called at orchestrator initialization 177 func (g *Orchestrator) FixTask(ctx context.Context, batch *store.Batch, t *api.Task) { 178 if _, exists := g.globalServices[t.ServiceID]; !exists { 179 return 180 } 181 // if a task's DesiredState has past running, the task has been processed 182 if t.DesiredState > api.TaskStateRunning { 183 return 184 } 185 186 var node *api.Node 187 if t.NodeID != "" { 188 node = g.nodes[t.NodeID] 189 } 190 // if the node no longer valid, remove the task 191 if t.NodeID == "" || orchestrator.InvalidNode(node) { 192 g.shutdownTask(ctx, batch, t) 193 return 194 } 195 196 // restart a task if it fails 197 if t.Status.State > api.TaskStateRunning { 198 g.restartTasks[t.ID] = struct{}{} 199 } 200 } 201 202 // handleTaskChange defines what orchestrator does when a task is updated by agent 203 func (g *Orchestrator) handleTaskChange(ctx context.Context, t *api.Task) { 204 if _, exists := g.globalServices[t.ServiceID]; !exists { 205 return 206 } 207 // if a task's DesiredState has passed running, it 208 // means the task has been processed 209 if t.DesiredState > api.TaskStateRunning { 210 return 211 } 212 213 // if a task has passed running, restart it 214 if t.Status.State > api.TaskStateRunning { 215 g.restartTasks[t.ID] = struct{}{} 216 } 217 } 218 219 // Stop stops the orchestrator. 220 func (g *Orchestrator) Stop() { 221 close(g.stopChan) 222 <-g.doneChan 223 g.updater.CancelAll() 224 g.restarts.CancelAll() 225 } 226 227 func (g *Orchestrator) foreachTaskFromNode(ctx context.Context, node *api.Node, cb func(context.Context, *store.Batch, *api.Task)) { 228 var ( 229 tasks []*api.Task 230 err error 231 ) 232 g.store.View(func(tx store.ReadTx) { 233 tasks, err = store.FindTasks(tx, store.ByNodeID(node.ID)) 234 }) 235 if err != nil { 236 log.G(ctx).WithError(err).Errorf("global orchestrator: foreachTaskFromNode failed finding tasks") 237 return 238 } 239 240 err = g.store.Batch(func(batch *store.Batch) error { 241 for _, t := range tasks { 242 // Global orchestrator only removes tasks from globalServices 243 if _, exists := g.globalServices[t.ServiceID]; exists { 244 cb(ctx, batch, t) 245 } 246 } 247 return nil 248 }) 249 if err != nil { 250 log.G(ctx).WithError(err).Errorf("global orchestrator: foreachTaskFromNode failed batching tasks") 251 } 252 } 253 254 func (g *Orchestrator) reconcileServices(ctx context.Context, serviceIDs []string) { 255 nodeTasks := make(map[string]map[string][]*api.Task) 256 257 g.store.View(func(tx store.ReadTx) { 258 for _, serviceID := range serviceIDs { 259 service := g.globalServices[serviceID].Service 260 if service == nil { 261 continue 262 } 263 264 tasks, err := store.FindTasks(tx, store.ByServiceID(serviceID)) 265 if err != nil { 266 log.G(ctx).WithError(err).Errorf("global orchestrator: reconcileServices failed finding tasks for service %s", serviceID) 267 continue 268 } 269 270 // nodeID -> task list 271 nodeTasks[serviceID] = make(map[string][]*api.Task) 272 273 for _, t := range tasks { 274 nodeTasks[serviceID][t.NodeID] = append(nodeTasks[serviceID][t.NodeID], t) 275 } 276 277 // Keep all runnable instances of this service, 278 // and instances that were not be restarted due 279 // to restart policy but may be updated if the 280 // service spec changed. 281 for nodeID, slot := range nodeTasks[serviceID] { 282 updatable := g.restarts.UpdatableTasksInSlot(ctx, slot, g.globalServices[serviceID].Service) 283 if len(updatable) != 0 { 284 nodeTasks[serviceID][nodeID] = updatable 285 } else { 286 delete(nodeTasks[serviceID], nodeID) 287 } 288 } 289 290 } 291 }) 292 293 updates := make(map[*api.Service][]orchestrator.Slot) 294 295 err := g.store.Batch(func(batch *store.Batch) error { 296 for _, serviceID := range serviceIDs { 297 var updateTasks []orchestrator.Slot 298 299 if _, exists := nodeTasks[serviceID]; !exists { 300 continue 301 } 302 303 service := g.globalServices[serviceID] 304 305 for nodeID, node := range g.nodes { 306 meetsConstraints := constraint.NodeMatches(service.constraints, node) 307 ntasks := nodeTasks[serviceID][nodeID] 308 delete(nodeTasks[serviceID], nodeID) 309 310 if !meetsConstraints { 311 g.shutdownTasks(ctx, batch, ntasks) 312 continue 313 } 314 315 if node.Spec.Availability == api.NodeAvailabilityPause { 316 // the node is paused, so we won't add or update 317 // any tasks 318 continue 319 } 320 321 // this node needs to run 1 copy of the task 322 if len(ntasks) == 0 { 323 g.addTask(ctx, batch, service.Service, nodeID) 324 } else { 325 updateTasks = append(updateTasks, ntasks) 326 } 327 } 328 329 if len(updateTasks) > 0 { 330 updates[service.Service] = updateTasks 331 } 332 333 // Remove any tasks assigned to nodes not found in g.nodes. 334 // These must be associated with nodes that are drained, or 335 // nodes that no longer exist. 336 for _, ntasks := range nodeTasks[serviceID] { 337 g.shutdownTasks(ctx, batch, ntasks) 338 } 339 } 340 return nil 341 }) 342 343 if err != nil { 344 log.G(ctx).WithError(err).Errorf("global orchestrator: reconcileServices transaction failed") 345 } 346 347 for service, updateTasks := range updates { 348 g.updater.Update(ctx, g.cluster, service, updateTasks) 349 } 350 } 351 352 // updateNode updates g.nodes based on the current node value 353 func (g *Orchestrator) updateNode(node *api.Node) { 354 if node.Spec.Availability == api.NodeAvailabilityDrain || node.Status.State == api.NodeStatus_DOWN { 355 delete(g.nodes, node.ID) 356 } else { 357 g.nodes[node.ID] = node 358 } 359 } 360 361 // updateService updates g.globalServices based on the current service value 362 func (g *Orchestrator) updateService(service *api.Service) { 363 var constraints []constraint.Constraint 364 365 if service.Spec.Task.Placement != nil && len(service.Spec.Task.Placement.Constraints) != 0 { 366 constraints, _ = constraint.Parse(service.Spec.Task.Placement.Constraints) 367 } 368 369 g.globalServices[service.ID] = globalService{ 370 Service: service, 371 constraints: constraints, 372 } 373 } 374 375 // reconcileOneNode checks all global services on one node 376 func (g *Orchestrator) reconcileOneNode(ctx context.Context, node *api.Node) { 377 if node.Spec.Availability == api.NodeAvailabilityDrain { 378 log.G(ctx).Debugf("global orchestrator: node %s in drain state, shutting down its tasks", node.ID) 379 g.foreachTaskFromNode(ctx, node, g.shutdownTask) 380 return 381 } 382 383 if node.Status.State == api.NodeStatus_DOWN { 384 log.G(ctx).Debugf("global orchestrator: node %s is down, shutting down its tasks", node.ID) 385 g.foreachTaskFromNode(ctx, node, g.shutdownTask) 386 return 387 } 388 389 if node.Spec.Availability == api.NodeAvailabilityPause { 390 // the node is paused, so we won't add or update tasks 391 return 392 } 393 394 node, exists := g.nodes[node.ID] 395 if !exists { 396 return 397 } 398 399 // tasks by service 400 tasks := make(map[string][]*api.Task) 401 402 var ( 403 tasksOnNode []*api.Task 404 err error 405 ) 406 407 g.store.View(func(tx store.ReadTx) { 408 tasksOnNode, err = store.FindTasks(tx, store.ByNodeID(node.ID)) 409 }) 410 if err != nil { 411 log.G(ctx).WithError(err).Errorf("global orchestrator: reconcile failed finding tasks on node %s", node.ID) 412 return 413 } 414 415 for serviceID, service := range g.globalServices { 416 for _, t := range tasksOnNode { 417 if t.ServiceID != serviceID { 418 continue 419 } 420 tasks[serviceID] = append(tasks[serviceID], t) 421 } 422 423 // Keep all runnable instances of this service, 424 // and instances that were not be restarted due 425 // to restart policy but may be updated if the 426 // service spec changed. 427 for serviceID, slot := range tasks { 428 updatable := g.restarts.UpdatableTasksInSlot(ctx, slot, service.Service) 429 430 if len(updatable) != 0 { 431 tasks[serviceID] = updatable 432 } else { 433 delete(tasks, serviceID) 434 } 435 } 436 } 437 438 err = g.store.Batch(func(batch *store.Batch) error { 439 for serviceID, service := range g.globalServices { 440 if !constraint.NodeMatches(service.constraints, node) { 441 continue 442 } 443 444 if len(tasks) == 0 { 445 g.addTask(ctx, batch, service.Service, node.ID) 446 } else { 447 // If task is out of date, update it. This can happen 448 // on node reconciliation if, for example, we pause a 449 // node, update the service, and then activate the node 450 // later. 451 452 // We don't use g.updater here for two reasons: 453 // - This is not a rolling update. Since it was not 454 // triggered directly by updating the service, it 455 // should not observe the rolling update parameters 456 // or show status in UpdateStatus. 457 // - Calling Update cancels any current rolling updates 458 // for the service, such as one triggered by service 459 // reconciliation. 460 461 var ( 462 dirtyTasks []*api.Task 463 cleanTasks []*api.Task 464 ) 465 466 for _, t := range tasks[serviceID] { 467 if orchestrator.IsTaskDirty(service.Service, t, node) { 468 dirtyTasks = append(dirtyTasks, t) 469 } else { 470 cleanTasks = append(cleanTasks, t) 471 } 472 } 473 474 if len(cleanTasks) == 0 { 475 g.addTask(ctx, batch, service.Service, node.ID) 476 } else { 477 dirtyTasks = append(dirtyTasks, cleanTasks[1:]...) 478 } 479 g.shutdownTasks(ctx, batch, dirtyTasks) 480 } 481 } 482 return nil 483 }) 484 if err != nil { 485 log.G(ctx).WithError(err).Errorf("global orchestrator: reconcileServiceOneNode batch failed") 486 } 487 } 488 489 func (g *Orchestrator) tickTasks(ctx context.Context) { 490 if len(g.restartTasks) == 0 { 491 return 492 } 493 err := g.store.Batch(func(batch *store.Batch) error { 494 for taskID := range g.restartTasks { 495 err := batch.Update(func(tx store.Tx) error { 496 t := store.GetTask(tx, taskID) 497 if t == nil || t.DesiredState > api.TaskStateRunning { 498 return nil 499 } 500 501 service := store.GetService(tx, t.ServiceID) 502 if service == nil { 503 return nil 504 } 505 506 node, nodeExists := g.nodes[t.NodeID] 507 serviceEntry, serviceExists := g.globalServices[t.ServiceID] 508 if !nodeExists || !serviceExists { 509 return nil 510 } 511 512 if node.Spec.Availability == api.NodeAvailabilityPause || 513 !constraint.NodeMatches(serviceEntry.constraints, node) { 514 t.DesiredState = api.TaskStateShutdown 515 return store.UpdateTask(tx, t) 516 } 517 518 return g.restarts.Restart(ctx, tx, g.cluster, service, *t) 519 }) 520 if err != nil { 521 log.G(ctx).WithError(err).Errorf("orchestrator restartTask transaction failed") 522 } 523 } 524 return nil 525 }) 526 if err != nil { 527 log.G(ctx).WithError(err).Errorf("global orchestrator: restartTask transaction failed") 528 } 529 g.restartTasks = make(map[string]struct{}) 530 } 531 532 func (g *Orchestrator) shutdownTask(ctx context.Context, batch *store.Batch, t *api.Task) { 533 // set existing task DesiredState to TaskStateShutdown 534 // TODO(aaronl): optimistic update? 535 err := batch.Update(func(tx store.Tx) error { 536 t = store.GetTask(tx, t.ID) 537 if t != nil && t.DesiredState < api.TaskStateShutdown { 538 t.DesiredState = api.TaskStateShutdown 539 return store.UpdateTask(tx, t) 540 } 541 return nil 542 }) 543 if err != nil { 544 log.G(ctx).WithError(err).Errorf("global orchestrator: shutdownTask failed to shut down %s", t.ID) 545 } 546 } 547 548 func (g *Orchestrator) addTask(ctx context.Context, batch *store.Batch, service *api.Service, nodeID string) { 549 task := orchestrator.NewTask(g.cluster, service, 0, nodeID) 550 551 err := batch.Update(func(tx store.Tx) error { 552 if store.GetService(tx, service.ID) == nil { 553 return nil 554 } 555 return store.CreateTask(tx, task) 556 }) 557 if err != nil { 558 log.G(ctx).WithError(err).Errorf("global orchestrator: failed to create task") 559 } 560 } 561 562 func (g *Orchestrator) shutdownTasks(ctx context.Context, batch *store.Batch, tasks []*api.Task) { 563 for _, t := range tasks { 564 g.shutdownTask(ctx, batch, t) 565 } 566 } 567 568 func (g *Orchestrator) deleteTask(ctx context.Context, batch *store.Batch, t *api.Task) { 569 err := batch.Update(func(tx store.Tx) error { 570 return store.DeleteTask(tx, t.ID) 571 }) 572 if err != nil { 573 log.G(ctx).WithError(err).Errorf("global orchestrator: deleteTask failed to delete %s", t.ID) 574 } 575 } 576 577 // IsRelatedService returns true if the service should be governed by this orchestrator 578 func (g *Orchestrator) IsRelatedService(service *api.Service) bool { 579 return orchestrator.IsGlobalService(service) 580 } 581 582 // SlotTuple returns a slot tuple for the global service task. 583 func (g *Orchestrator) SlotTuple(t *api.Task) orchestrator.SlotTuple { 584 return orchestrator.SlotTuple{ 585 ServiceID: t.ServiceID, 586 NodeID: t.NodeID, 587 } 588 }