github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/orchestrator/jobs/global/reconciler.go (about)

     1  package global
     2  
     3  import (
     4  	"context"
     5  
     6  	"github.com/docker/swarmkit/api"
     7  	"github.com/docker/swarmkit/manager/constraint"
     8  	"github.com/docker/swarmkit/manager/orchestrator"
     9  	"github.com/docker/swarmkit/manager/state/store"
    10  )
    11  
    12  // restartSupervisor is an interface representing the methods from the
    13  // restart.SupervisorInterface that are actually needed by the reconciler. This
    14  // more limited interface allows us to write a less ugly fake for unit testing.
    15  type restartSupervisor interface {
    16  	Restart(context.Context, store.Tx, *api.Cluster, *api.Service, api.Task) error
    17  }
    18  
    19  // Reconciler is an object that manages reconciliation of global jobs. It is
    20  // blocking and non-asynchronous, for ease of testing. It implements the
    21  // Reconciler interface from the orchestrator package above it, and the
    22  // taskinit.InitHandler interface.
    23  type Reconciler struct {
    24  	store *store.MemoryStore
    25  
    26  	restart restartSupervisor
    27  }
    28  
    29  // NewReconciler creates a new global job reconciler.
    30  func NewReconciler(store *store.MemoryStore, restart restartSupervisor) *Reconciler {
    31  	return &Reconciler{
    32  		store:   store,
    33  		restart: restart,
    34  	}
    35  }
    36  
    37  // ReconcileService reconciles one global job service.
    38  func (r *Reconciler) ReconcileService(id string) error {
    39  	var (
    40  		service *api.Service
    41  		cluster *api.Cluster
    42  		tasks   []*api.Task
    43  		nodes   []*api.Node
    44  		viewErr error
    45  	)
    46  
    47  	// we need to first get the latest iteration of the service, its tasks, and
    48  	// the nodes in the cluster.
    49  	r.store.View(func(tx store.ReadTx) {
    50  		service = store.GetService(tx, id)
    51  		if service == nil {
    52  			return
    53  		}
    54  
    55  		// getting tasks with FindTasks should only return an error if we've
    56  		// made a mistake coding; there's no user-input or even reasonable
    57  		// system state that can cause it. If it returns an error, we'll just
    58  		// panic and crash.
    59  		tasks, viewErr = store.FindTasks(tx, store.ByServiceID(id))
    60  		if viewErr != nil {
    61  			return
    62  		}
    63  
    64  		// same as with FindTasks
    65  		nodes, viewErr = store.FindNodes(tx, store.All)
    66  		if viewErr != nil {
    67  			return
    68  		}
    69  
    70  		clusters, _ := store.FindClusters(tx, store.All)
    71  		if len(clusters) == 1 {
    72  			cluster = clusters[0]
    73  		} else if len(clusters) > 1 {
    74  			panic("there should never be more than one cluster object")
    75  		}
    76  	})
    77  
    78  	if viewErr != nil {
    79  		return viewErr
    80  	}
    81  
    82  	// the service may be nil if the service has been deleted before we entered
    83  	// the View.
    84  	if service == nil {
    85  		return nil
    86  	}
    87  
    88  	if service.JobStatus == nil {
    89  		service.JobStatus = &api.JobStatus{}
    90  	}
    91  
    92  	// we need to compute the constraints on the service so we know which nodes
    93  	// to schedule it on
    94  	var constraints []constraint.Constraint
    95  	if service.Spec.Task.Placement != nil && len(service.Spec.Task.Placement.Constraints) != 0 {
    96  		// constraint.Parse does return an error, but we don't need to check
    97  		// it, because it was already checked when the service was created or
    98  		// updated.
    99  		constraints, _ = constraint.Parse(service.Spec.Task.Placement.Constraints)
   100  	}
   101  
   102  	var candidateNodes []string
   103  	var invalidNodes []string
   104  	for _, node := range nodes {
   105  		// instead of having a big ugly multi-line boolean expression in the
   106  		// if-statement, we'll have several if-statements, and bail out of
   107  		// this loop iteration with continue if the node is not acceptable
   108  		if !constraint.NodeMatches(constraints, node) {
   109  			continue
   110  		}
   111  
   112  		// if a node is invalid, we should remove any tasks that might be on it
   113  		if orchestrator.InvalidNode(node) {
   114  			invalidNodes = append(invalidNodes, node.ID)
   115  			continue
   116  		}
   117  
   118  		if node.Spec.Availability != api.NodeAvailabilityActive {
   119  			continue
   120  		}
   121  		if node.Status.State != api.NodeStatus_READY {
   122  			continue
   123  		}
   124  		// you can append to a nil slice and get a non-nil slice, which is
   125  		// pretty slick.
   126  		candidateNodes = append(candidateNodes, node.ID)
   127  	}
   128  
   129  	// now, we have a list of all nodes that match constraints. it's time to
   130  	// match running tasks to the nodes. we need to identify all nodes that
   131  	// need new tasks, which is any node that doesn't have a task of this job
   132  	// iteration. trade some space for some time by building a node ID to task
   133  	// ID mapping, so that we're just doing 2x linear operation, instead of a
   134  	// quadratic operation.
   135  	nodeToTask := map[string]string{}
   136  	// additionally, while we're iterating through tasks, if any of those tasks
   137  	// are failed, we'll hand them to the restart supervisor to handle
   138  	restartTasks := []string{}
   139  	// and if there are any tasks belonging to old job iterations, set them to
   140  	// be removed
   141  	removeTasks := []string{}
   142  	for _, task := range tasks {
   143  		// match all tasks belonging to this job iteration which are in desired
   144  		// state completed, including failed tasks. We only want to create
   145  		// tasks for nodes on which there are no existing tasks.
   146  		if task.JobIteration != nil {
   147  			if task.JobIteration.Index == service.JobStatus.JobIteration.Index &&
   148  				task.DesiredState <= api.TaskStateCompleted {
   149  				// we already know the task is desired to be executing (because its
   150  				// desired state is Completed). Check here to see if it's already
   151  				// failed, so we can restart it
   152  				if task.Status.State > api.TaskStateCompleted {
   153  					restartTasks = append(restartTasks, task.ID)
   154  				}
   155  				nodeToTask[task.NodeID] = task.ID
   156  			}
   157  
   158  			if task.JobIteration.Index != service.JobStatus.JobIteration.Index {
   159  				if task.DesiredState != api.TaskStateRemove {
   160  					removeTasks = append(removeTasks, task.ID)
   161  				}
   162  			}
   163  		}
   164  	}
   165  
   166  	return r.store.Batch(func(batch *store.Batch) error {
   167  		// first, create any new tasks required.
   168  		for _, node := range candidateNodes {
   169  			// check if there is a task for this node ID. If not, then we need
   170  			// to create one.
   171  			if _, ok := nodeToTask[node]; !ok {
   172  				if err := batch.Update(func(tx store.Tx) error {
   173  					// if the node does not already have a running or completed
   174  					// task, create a task for this node.
   175  					task := orchestrator.NewTask(cluster, service, 0, node)
   176  					task.JobIteration = &service.JobStatus.JobIteration
   177  					task.DesiredState = api.TaskStateCompleted
   178  					return store.CreateTask(tx, task)
   179  				}); err != nil {
   180  					return err
   181  				}
   182  			}
   183  		}
   184  
   185  		// then, restart any tasks that are failed
   186  		for _, taskID := range restartTasks {
   187  			if err := batch.Update(func(tx store.Tx) error {
   188  				// get the latest version of the task for the restart
   189  				t := store.GetTask(tx, taskID)
   190  				// if it's deleted, nothing to do
   191  				if t == nil {
   192  					return nil
   193  				}
   194  
   195  				// if it's not still desired to be running, then don't restart
   196  				// it.
   197  				if t.DesiredState > api.TaskStateCompleted {
   198  					return nil
   199  				}
   200  
   201  				// Finally, restart it
   202  				// TODO(dperny): pass in context to ReconcileService, so we can
   203  				// pass it in here.
   204  				return r.restart.Restart(context.Background(), tx, cluster, service, *t)
   205  			}); err != nil {
   206  				// TODO(dperny): probably should log like in the other
   207  				// orchestrators instead of returning here.
   208  				return err
   209  			}
   210  		}
   211  
   212  		// remove tasks that need to be removed
   213  		for _, taskID := range removeTasks {
   214  			if err := batch.Update(func(tx store.Tx) error {
   215  				t := store.GetTask(tx, taskID)
   216  				if t == nil {
   217  					return nil
   218  				}
   219  
   220  				if t.DesiredState == api.TaskStateRemove {
   221  					return nil
   222  				}
   223  
   224  				t.DesiredState = api.TaskStateRemove
   225  				return store.UpdateTask(tx, t)
   226  			}); err != nil {
   227  				return err
   228  			}
   229  		}
   230  
   231  		// finally, shut down any tasks on invalid nodes
   232  		for _, nodeID := range invalidNodes {
   233  			if taskID, ok := nodeToTask[nodeID]; ok {
   234  				if err := batch.Update(func(tx store.Tx) error {
   235  					t := store.GetTask(tx, taskID)
   236  					if t == nil {
   237  						return nil
   238  					}
   239  					// if the task is still desired to be running, and is still
   240  					// actually, running, then it still needs to be shut down.
   241  					if t.DesiredState > api.TaskStateCompleted || t.Status.State <= api.TaskStateRunning {
   242  						t.DesiredState = api.TaskStateShutdown
   243  						return store.UpdateTask(tx, t)
   244  					}
   245  					return nil
   246  				}); err != nil {
   247  					return err
   248  				}
   249  			}
   250  		}
   251  		return nil
   252  	})
   253  }
   254  
   255  // IsRelatedService returns true if the task is a global job. This method
   256  // fulfills the taskinit.InitHandler interface. Because it is just a wrapper
   257  // around a well-tested function call, it has no tests of its own.
   258  func (r *Reconciler) IsRelatedService(service *api.Service) bool {
   259  	return orchestrator.IsGlobalJob(service)
   260  }
   261  
   262  // FixTask validates that a task is compliant with the rest of the cluster
   263  // state, and fixes it if it's not. This covers some main scenarios:
   264  //
   265  // * The node that the task is running on is now paused or drained. we do not
   266  //   need to check if the node still meets constraints -- that is the purview
   267  //   of the constraint enforcer.
   268  // * The task has failed and needs to be restarted.
   269  //
   270  // This implements the FixTask method of the taskinit.InitHandler interface.
   271  func (r *Reconciler) FixTask(ctx context.Context, batch *store.Batch, t *api.Task) {
   272  	// tasks already desired to be shut down need no action.
   273  	if t.DesiredState > api.TaskStateCompleted {
   274  		return
   275  	}
   276  
   277  	batch.Update(func(tx store.Tx) error {
   278  		node := store.GetNode(tx, t.NodeID)
   279  		// if the node is no longer a valid node for this task, we need to shut
   280  		// it down
   281  		if orchestrator.InvalidNode(node) {
   282  			task := store.GetTask(tx, t.ID)
   283  			if task != nil && task.DesiredState < api.TaskStateShutdown {
   284  				task.DesiredState = api.TaskStateShutdown
   285  				return store.UpdateTask(tx, task)
   286  			}
   287  		}
   288  		// we will reconcile all services after fixing the tasks, so we don't
   289  		// need to restart tasks right now; we'll do so after this.
   290  		return nil
   291  	})
   292  }
   293  
   294  // SlotTuple returns a slot tuple representing this task. It implements the
   295  // taskinit.InitHandler interface.
   296  func (r *Reconciler) SlotTuple(t *api.Task) orchestrator.SlotTuple {
   297  	return orchestrator.SlotTuple{
   298  		ServiceID: t.ServiceID,
   299  		NodeID:    t.NodeID,
   300  	}
   301  }