github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/orchestrator/replicated/tasks.go (about)

     1  package replicated
     2  
     3  import (
     4  	"context"
     5  
     6  	"github.com/docker/go-events"
     7  	"github.com/docker/swarmkit/api"
     8  	"github.com/docker/swarmkit/log"
     9  	"github.com/docker/swarmkit/manager/orchestrator"
    10  	"github.com/docker/swarmkit/manager/orchestrator/taskinit"
    11  	"github.com/docker/swarmkit/manager/state/store"
    12  )
    13  
    14  // This file provides task-level orchestration. It observes changes to task
    15  // and node state and kills/recreates tasks if necessary. This is distinct from
    16  // service-level reconciliation, which observes changes to services and creates
    17  // and/or kills tasks to match the service definition.
    18  
    19  func (r *Orchestrator) initTasks(ctx context.Context, readTx store.ReadTx) error {
    20  	return taskinit.CheckTasks(ctx, r.store, readTx, r, r.restarts)
    21  }
    22  
    23  func (r *Orchestrator) handleTaskEvent(ctx context.Context, event events.Event) {
    24  	switch v := event.(type) {
    25  	case api.EventDeleteNode:
    26  		r.restartTasksByNodeID(ctx, v.Node.ID)
    27  	case api.EventCreateNode:
    28  		r.handleNodeChange(ctx, v.Node)
    29  	case api.EventUpdateNode:
    30  		r.handleNodeChange(ctx, v.Node)
    31  	case api.EventDeleteTask:
    32  		if v.Task.DesiredState <= api.TaskStateRunning {
    33  			service := r.resolveService(ctx, v.Task)
    34  			if !orchestrator.IsReplicatedService(service) {
    35  				return
    36  			}
    37  			r.reconcileServices[service.ID] = service
    38  		}
    39  		r.restarts.Cancel(v.Task.ID)
    40  	case api.EventUpdateTask:
    41  		r.handleTaskChange(ctx, v.Task)
    42  	case api.EventCreateTask:
    43  		r.handleTaskChange(ctx, v.Task)
    44  	}
    45  }
    46  
    47  func (r *Orchestrator) tickTasks(ctx context.Context) {
    48  	if len(r.restartTasks) > 0 {
    49  		err := r.store.Batch(func(batch *store.Batch) error {
    50  			for taskID := range r.restartTasks {
    51  				err := batch.Update(func(tx store.Tx) error {
    52  					// TODO(aaronl): optimistic update?
    53  					t := store.GetTask(tx, taskID)
    54  					if t != nil {
    55  						if t.DesiredState > api.TaskStateRunning {
    56  							return nil
    57  						}
    58  
    59  						service := store.GetService(tx, t.ServiceID)
    60  						if !orchestrator.IsReplicatedService(service) {
    61  							return nil
    62  						}
    63  
    64  						// Restart task if applicable
    65  						if err := r.restarts.Restart(ctx, tx, r.cluster, service, *t); err != nil {
    66  							return err
    67  						}
    68  					}
    69  					return nil
    70  				})
    71  				if err != nil {
    72  					log.G(ctx).WithError(err).Errorf("Orchestrator task reaping transaction failed")
    73  				}
    74  			}
    75  			return nil
    76  		})
    77  
    78  		if err != nil {
    79  			log.G(ctx).WithError(err).Errorf("orchestrator task removal batch failed")
    80  		}
    81  
    82  		r.restartTasks = make(map[string]struct{})
    83  	}
    84  }
    85  
    86  func (r *Orchestrator) restartTasksByNodeID(ctx context.Context, nodeID string) {
    87  	var err error
    88  	r.store.View(func(tx store.ReadTx) {
    89  		var tasks []*api.Task
    90  		tasks, err = store.FindTasks(tx, store.ByNodeID(nodeID))
    91  		if err != nil {
    92  			return
    93  		}
    94  
    95  		for _, t := range tasks {
    96  			if t.DesiredState > api.TaskStateRunning {
    97  				continue
    98  			}
    99  			service := store.GetService(tx, t.ServiceID)
   100  			if orchestrator.IsReplicatedService(service) {
   101  				r.restartTasks[t.ID] = struct{}{}
   102  			}
   103  		}
   104  	})
   105  	if err != nil {
   106  		log.G(ctx).WithError(err).Errorf("failed to list tasks to remove")
   107  	}
   108  }
   109  
   110  func (r *Orchestrator) handleNodeChange(ctx context.Context, n *api.Node) {
   111  	if !orchestrator.InvalidNode(n) {
   112  		return
   113  	}
   114  
   115  	r.restartTasksByNodeID(ctx, n.ID)
   116  }
   117  
   118  // handleTaskChange defines what orchestrator does when a task is updated by agent.
   119  func (r *Orchestrator) handleTaskChange(ctx context.Context, t *api.Task) {
   120  	// If we already set the desired state past TaskStateRunning, there is no
   121  	// further action necessary.
   122  	if t.DesiredState > api.TaskStateRunning {
   123  		return
   124  	}
   125  
   126  	var (
   127  		n       *api.Node
   128  		service *api.Service
   129  	)
   130  	r.store.View(func(tx store.ReadTx) {
   131  		if t.NodeID != "" {
   132  			n = store.GetNode(tx, t.NodeID)
   133  		}
   134  		if t.ServiceID != "" {
   135  			service = store.GetService(tx, t.ServiceID)
   136  		}
   137  	})
   138  
   139  	if !orchestrator.IsReplicatedService(service) {
   140  		return
   141  	}
   142  
   143  	if t.Status.State > api.TaskStateRunning ||
   144  		(t.NodeID != "" && orchestrator.InvalidNode(n)) {
   145  		r.restartTasks[t.ID] = struct{}{}
   146  	}
   147  }
   148  
   149  // FixTask validates a task with the current cluster settings, and takes
   150  // action to make it conformant. it's called at orchestrator initialization.
   151  func (r *Orchestrator) FixTask(ctx context.Context, batch *store.Batch, t *api.Task) {
   152  	// If we already set the desired state past TaskStateRunning, there is no
   153  	// further action necessary.
   154  	if t.DesiredState > api.TaskStateRunning {
   155  		return
   156  	}
   157  
   158  	var (
   159  		n       *api.Node
   160  		service *api.Service
   161  	)
   162  	batch.Update(func(tx store.Tx) error {
   163  		if t.NodeID != "" {
   164  			n = store.GetNode(tx, t.NodeID)
   165  		}
   166  		if t.ServiceID != "" {
   167  			service = store.GetService(tx, t.ServiceID)
   168  		}
   169  		return nil
   170  	})
   171  
   172  	if !orchestrator.IsReplicatedService(service) {
   173  		return
   174  	}
   175  
   176  	if t.Status.State > api.TaskStateRunning ||
   177  		(t.NodeID != "" && orchestrator.InvalidNode(n)) {
   178  		r.restartTasks[t.ID] = struct{}{}
   179  		return
   180  	}
   181  }