
     1  package global
     3  import (
     4  	"context"
     6  	""
     7  	""
     8  	""
     9  	""
    10  	""
    11  	""
    12  	""
    13  	""
    14  )
    16  type globalService struct {
    17  	*api.Service
    19  	// Compiled constraints
    20  	constraints []constraint.Constraint
    21  }
    23  // Orchestrator runs a reconciliation loop to create and destroy tasks as
    24  // necessary for global services.
    25  type Orchestrator struct {
    26  	store *store.MemoryStore
    27  	// nodes is the set of non-drained nodes in the cluster, indexed by node ID
    28  	nodes map[string]*api.Node
    29  	// globalServices has all the global services in the cluster, indexed by ServiceID
    30  	globalServices map[string]globalService
    31  	restartTasks   map[string]struct{}
    33  	// stopChan signals to the state machine to stop running.
    34  	stopChan chan struct{}
    35  	// doneChan is closed when the state machine terminates.
    36  	doneChan chan struct{}
    38  	updater  *update.Supervisor
    39  	restarts *restart.Supervisor
    41  	cluster *api.Cluster // local instance of the cluster
    42  }
    44  // NewGlobalOrchestrator creates a new global Orchestrator
    45  func NewGlobalOrchestrator(store *store.MemoryStore) *Orchestrator {
    46  	restartSupervisor := restart.NewSupervisor(store)
    47  	updater := update.NewSupervisor(store, restartSupervisor)
    48  	return &Orchestrator{
    49  		store:          store,
    50  		nodes:          make(map[string]*api.Node),
    51  		globalServices: make(map[string]globalService),
    52  		stopChan:       make(chan struct{}),
    53  		doneChan:       make(chan struct{}),
    54  		updater:        updater,
    55  		restarts:       restartSupervisor,
    56  		restartTasks:   make(map[string]struct{}),
    57  	}
    58  }
    60  func (g *Orchestrator) initTasks(ctx context.Context, readTx store.ReadTx) error {
    61  	return taskinit.CheckTasks(ctx,, readTx, g, g.restarts)
    62  }
    64  // Run contains the global orchestrator event loop
    65  func (g *Orchestrator) Run(ctx context.Context) error {
    66  	defer close(g.doneChan)
    68  	// Watch changes to services and tasks
    69  	queue :=
    70  	watcher, cancel := queue.Watch()
    71  	defer cancel()
    73  	// lookup the cluster
    74  	var err error
    75 store.ReadTx) {
    76  		var clusters []*api.Cluster
    77  		clusters, err = store.FindClusters(readTx, store.ByName(store.DefaultClusterName))
    79  		if len(clusters) != 1 {
    80  			return // just pick up the cluster when it is created.
    81  		}
    82  		g.cluster = clusters[0]
    83  	})
    84  	if err != nil {
    85  		return err
    86  	}
    88  	// Get list of nodes
    89  	var nodes []*api.Node
    90 store.ReadTx) {
    91  		nodes, err = store.FindNodes(readTx, store.All)
    92  	})
    93  	if err != nil {
    94  		return err
    95  	}
    96  	for _, n := range nodes {
    97  		g.updateNode(n)
    98  	}
   100  	// Lookup global services
   101  	var existingServices []*api.Service
   102 store.ReadTx) {
   103  		existingServices, err = store.FindServices(readTx, store.All)
   104  	})
   105  	if err != nil {
   106  		return err
   107  	}
   109  	var reconcileServiceIDs []string
   110  	for _, s := range existingServices {
   111  		if orchestrator.IsGlobalService(s) {
   112  			g.updateService(s)
   113  			reconcileServiceIDs = append(reconcileServiceIDs, s.ID)
   114  		}
   115  	}
   117  	// fix tasks in store before reconciliation loop
   118 store.ReadTx) {
   119  		err = g.initTasks(ctx, readTx)
   120  	})
   121  	if err != nil {
   122  		return err
   123  	}
   125  	g.tickTasks(ctx)
   126  	g.reconcileServices(ctx, reconcileServiceIDs)
   128  	for {
   129  		select {
   130  		case event := <-watcher:
   131  			// TODO(stevvooe): Use ctx to limit running time of operation.
   132  			switch v := event.(type) {
   133  			case api.EventUpdateCluster:
   134  				g.cluster = v.Cluster
   135  			case api.EventCreateService:
   136  				if !orchestrator.IsGlobalService(v.Service) {
   137  					continue
   138  				}
   139  				g.updateService(v.Service)
   140  				g.reconcileServices(ctx, []string{v.Service.ID})
   141  			case api.EventUpdateService:
   142  				if !orchestrator.IsGlobalService(v.Service) {
   143  					continue
   144  				}
   145  				g.updateService(v.Service)
   146  				g.reconcileServices(ctx, []string{v.Service.ID})
   147  			case api.EventDeleteService:
   148  				if !orchestrator.IsGlobalService(v.Service) {
   149  					continue
   150  				}
   151  				orchestrator.SetServiceTasksRemove(ctx,, v.Service)
   152  				// delete the service from service map
   153  				delete(g.globalServices, v.Service.ID)
   154  				g.restarts.ClearServiceHistory(v.Service.ID)
   155  			case api.EventCreateNode:
   156  				g.updateNode(v.Node)
   157  				g.reconcileOneNode(ctx, v.Node)
   158  			case api.EventUpdateNode:
   159  				g.updateNode(v.Node)
   160  				g.reconcileOneNode(ctx, v.Node)
   161  			case api.EventDeleteNode:
   162  				g.foreachTaskFromNode(ctx, v.Node, g.deleteTask)
   163  				delete(g.nodes, v.Node.ID)
   164  			case api.EventUpdateTask:
   165  				g.handleTaskChange(ctx, v.Task)
   166  			}
   167  		case <-g.stopChan:
   168  			return nil
   169  		}
   170  		g.tickTasks(ctx)
   171  	}
   172  }
   174  // FixTask validates a task with the current cluster settings, and takes
   175  // action to make it conformant to node state and service constraint
   176  // it's called at orchestrator initialization
   177  func (g *Orchestrator) FixTask(ctx context.Context, batch *store.Batch, t *api.Task) {
   178  	if _, exists := g.globalServices[t.ServiceID]; !exists {
   179  		return
   180  	}
   181  	// if a task's DesiredState has past running, the task has been processed
   182  	if t.DesiredState > api.TaskStateRunning {
   183  		return
   184  	}
   186  	var node *api.Node
   187  	if t.NodeID != "" {
   188  		node = g.nodes[t.NodeID]
   189  	}
   190  	// if the node no longer valid, remove the task
   191  	if t.NodeID == "" || orchestrator.InvalidNode(node) {
   192  		g.shutdownTask(ctx, batch, t)
   193  		return
   194  	}
   196  	// restart a task if it fails
   197  	if t.Status.State > api.TaskStateRunning {
   198  		g.restartTasks[t.ID] = struct{}{}
   199  	}
   200  }
   202  // handleTaskChange defines what orchestrator does when a task is updated by agent
   203  func (g *Orchestrator) handleTaskChange(ctx context.Context, t *api.Task) {
   204  	if _, exists := g.globalServices[t.ServiceID]; !exists {
   205  		return
   206  	}
   207  	// if a task's DesiredState has passed running, it
   208  	// means the task has been processed
   209  	if t.DesiredState > api.TaskStateRunning {
   210  		return
   211  	}
   213  	// if a task has passed running, restart it
   214  	if t.Status.State > api.TaskStateRunning {
   215  		g.restartTasks[t.ID] = struct{}{}
   216  	}
   217  }
   219  // Stop stops the orchestrator.
   220  func (g *Orchestrator) Stop() {
   221  	close(g.stopChan)
   222  	<-g.doneChan
   223  	g.updater.CancelAll()
   224  	g.restarts.CancelAll()
   225  }
   227  func (g *Orchestrator) foreachTaskFromNode(ctx context.Context, node *api.Node, cb func(context.Context, *store.Batch, *api.Task)) {
   228  	var (
   229  		tasks []*api.Task
   230  		err   error
   231  	)
   232 store.ReadTx) {
   233  		tasks, err = store.FindTasks(tx, store.ByNodeID(node.ID))
   234  	})
   235  	if err != nil {
   236  		log.G(ctx).WithError(err).Errorf("global orchestrator: foreachTaskFromNode failed finding tasks")
   237  		return
   238  	}
   240  	err = *store.Batch) error {
   241  		for _, t := range tasks {
   242  			// Global orchestrator only removes tasks from globalServices
   243  			if _, exists := g.globalServices[t.ServiceID]; exists {
   244  				cb(ctx, batch, t)
   245  			}
   246  		}
   247  		return nil
   248  	})
   249  	if err != nil {
   250  		log.G(ctx).WithError(err).Errorf("global orchestrator: foreachTaskFromNode failed batching tasks")
   251  	}
   252  }
   254  func (g *Orchestrator) reconcileServices(ctx context.Context, serviceIDs []string) {
   255  	nodeTasks := make(map[string]map[string][]*api.Task)
   257 store.ReadTx) {
   258  		for _, serviceID := range serviceIDs {
   259  			service := g.globalServices[serviceID].Service
   260  			if service == nil {
   261  				continue
   262  			}
   264  			tasks, err := store.FindTasks(tx, store.ByServiceID(serviceID))
   265  			if err != nil {
   266  				log.G(ctx).WithError(err).Errorf("global orchestrator: reconcileServices failed finding tasks for service %s", serviceID)
   267  				continue
   268  			}
   270  			// nodeID -> task list
   271  			nodeTasks[serviceID] = make(map[string][]*api.Task)
   273  			for _, t := range tasks {
   274  				nodeTasks[serviceID][t.NodeID] = append(nodeTasks[serviceID][t.NodeID], t)
   275  			}
   277  			// Keep all runnable instances of this service,
   278  			// and instances that were not be restarted due
   279  			// to restart policy but may be updated if the
   280  			// service spec changed.
   281  			for nodeID, slot := range nodeTasks[serviceID] {
   282  				updatable := g.restarts.UpdatableTasksInSlot(ctx, slot, g.globalServices[serviceID].Service)
   283  				if len(updatable) != 0 {
   284  					nodeTasks[serviceID][nodeID] = updatable
   285  				} else {
   286  					delete(nodeTasks[serviceID], nodeID)
   287  				}
   288  			}
   290  		}
   291  	})
   293  	updates := make(map[*api.Service][]orchestrator.Slot)
   295  	err := *store.Batch) error {
   296  		for _, serviceID := range serviceIDs {
   297  			var updateTasks []orchestrator.Slot
   299  			if _, exists := nodeTasks[serviceID]; !exists {
   300  				continue
   301  			}
   303  			service := g.globalServices[serviceID]
   305  			for nodeID, node := range g.nodes {
   306  				meetsConstraints := constraint.NodeMatches(service.constraints, node)
   307  				ntasks := nodeTasks[serviceID][nodeID]
   308  				delete(nodeTasks[serviceID], nodeID)
   310  				if !meetsConstraints {
   311  					g.shutdownTasks(ctx, batch, ntasks)
   312  					continue
   313  				}
   315  				if node.Spec.Availability == api.NodeAvailabilityPause {
   316  					// the node is paused, so we won't add or update
   317  					// any tasks
   318  					continue
   319  				}
   321  				// this node needs to run 1 copy of the task
   322  				if len(ntasks) == 0 {
   323  					g.addTask(ctx, batch, service.Service, nodeID)
   324  				} else {
   325  					updateTasks = append(updateTasks, ntasks)
   326  				}
   327  			}
   329  			if len(updateTasks) > 0 {
   330  				updates[service.Service] = updateTasks
   331  			}
   333  			// Remove any tasks assigned to nodes not found in g.nodes.
   334  			// These must be associated with nodes that are drained, or
   335  			// nodes that no longer exist.
   336  			for _, ntasks := range nodeTasks[serviceID] {
   337  				g.shutdownTasks(ctx, batch, ntasks)
   338  			}
   339  		}
   340  		return nil
   341  	})
   343  	if err != nil {
   344  		log.G(ctx).WithError(err).Errorf("global orchestrator: reconcileServices transaction failed")
   345  	}
   347  	for service, updateTasks := range updates {
   348  		g.updater.Update(ctx, g.cluster, service, updateTasks)
   349  	}
   350  }
   352  // updateNode updates g.nodes based on the current node value
   353  func (g *Orchestrator) updateNode(node *api.Node) {
   354  	if node.Spec.Availability == api.NodeAvailabilityDrain || node.Status.State == api.NodeStatus_DOWN {
   355  		delete(g.nodes, node.ID)
   356  	} else {
   357  		g.nodes[node.ID] = node
   358  	}
   359  }
   361  // updateService updates g.globalServices based on the current service value
   362  func (g *Orchestrator) updateService(service *api.Service) {
   363  	var constraints []constraint.Constraint
   365  	if service.Spec.Task.Placement != nil && len(service.Spec.Task.Placement.Constraints) != 0 {
   366  		constraints, _ = constraint.Parse(service.Spec.Task.Placement.Constraints)
   367  	}
   369  	g.globalServices[service.ID] = globalService{
   370  		Service:     service,
   371  		constraints: constraints,
   372  	}
   373  }
   375  // reconcileOneNode checks all global services on one node
   376  func (g *Orchestrator) reconcileOneNode(ctx context.Context, node *api.Node) {
   377  	if node.Spec.Availability == api.NodeAvailabilityDrain {
   378  		log.G(ctx).Debugf("global orchestrator: node %s in drain state, shutting down its tasks", node.ID)
   379  		g.foreachTaskFromNode(ctx, node, g.shutdownTask)
   380  		return
   381  	}
   383  	if node.Status.State == api.NodeStatus_DOWN {
   384  		log.G(ctx).Debugf("global orchestrator: node %s is down, shutting down its tasks", node.ID)
   385  		g.foreachTaskFromNode(ctx, node, g.shutdownTask)
   386  		return
   387  	}
   389  	if node.Spec.Availability == api.NodeAvailabilityPause {
   390  		// the node is paused, so we won't add or update tasks
   391  		return
   392  	}
   394  	node, exists := g.nodes[node.ID]
   395  	if !exists {
   396  		return
   397  	}
   399  	// tasks by service
   400  	tasks := make(map[string][]*api.Task)
   402  	var (
   403  		tasksOnNode []*api.Task
   404  		err         error
   405  	)
   407 store.ReadTx) {
   408  		tasksOnNode, err = store.FindTasks(tx, store.ByNodeID(node.ID))
   409  	})
   410  	if err != nil {
   411  		log.G(ctx).WithError(err).Errorf("global orchestrator: reconcile failed finding tasks on node %s", node.ID)
   412  		return
   413  	}
   415  	for serviceID, service := range g.globalServices {
   416  		for _, t := range tasksOnNode {
   417  			if t.ServiceID != serviceID {
   418  				continue
   419  			}
   420  			tasks[serviceID] = append(tasks[serviceID], t)
   421  		}
   423  		// Keep all runnable instances of this service,
   424  		// and instances that were not be restarted due
   425  		// to restart policy but may be updated if the
   426  		// service spec changed.
   427  		for serviceID, slot := range tasks {
   428  			updatable := g.restarts.UpdatableTasksInSlot(ctx, slot, service.Service)
   430  			if len(updatable) != 0 {
   431  				tasks[serviceID] = updatable
   432  			} else {
   433  				delete(tasks, serviceID)
   434  			}
   435  		}
   436  	}
   438  	err = *store.Batch) error {
   439  		for serviceID, service := range g.globalServices {
   440  			if !constraint.NodeMatches(service.constraints, node) {
   441  				continue
   442  			}
   444  			if len(tasks) == 0 {
   445  				g.addTask(ctx, batch, service.Service, node.ID)
   446  			} else {
   447  				// If task is out of date, update it. This can happen
   448  				// on node reconciliation if, for example, we pause a
   449  				// node, update the service, and then activate the node
   450  				// later.
   452  				// We don't use g.updater here for two reasons:
   453  				// - This is not a rolling update. Since it was not
   454  				//   triggered directly by updating the service, it
   455  				//   should not observe the rolling update parameters
   456  				//   or show status in UpdateStatus.
   457  				// - Calling Update cancels any current rolling updates
   458  				//   for the service, such as one triggered by service
   459  				//   reconciliation.
   461  				var (
   462  					dirtyTasks []*api.Task
   463  					cleanTasks []*api.Task
   464  				)
   466  				for _, t := range tasks[serviceID] {
   467  					if orchestrator.IsTaskDirty(service.Service, t, node) {
   468  						dirtyTasks = append(dirtyTasks, t)
   469  					} else {
   470  						cleanTasks = append(cleanTasks, t)
   471  					}
   472  				}
   474  				if len(cleanTasks) == 0 {
   475  					g.addTask(ctx, batch, service.Service, node.ID)
   476  				} else {
   477  					dirtyTasks = append(dirtyTasks, cleanTasks[1:]...)
   478  				}
   479  				g.shutdownTasks(ctx, batch, dirtyTasks)
   480  			}
   481  		}
   482  		return nil
   483  	})
   484  	if err != nil {
   485  		log.G(ctx).WithError(err).Errorf("global orchestrator: reconcileServiceOneNode batch failed")
   486  	}
   487  }
   489  func (g *Orchestrator) tickTasks(ctx context.Context) {
   490  	if len(g.restartTasks) == 0 {
   491  		return
   492  	}
   493  	err := *store.Batch) error {
   494  		for taskID := range g.restartTasks {
   495  			err := batch.Update(func(tx store.Tx) error {
   496  				t := store.GetTask(tx, taskID)
   497  				if t == nil || t.DesiredState > api.TaskStateRunning {
   498  					return nil
   499  				}
   501  				service := store.GetService(tx, t.ServiceID)
   502  				if service == nil {
   503  					return nil
   504  				}
   506  				node, nodeExists := g.nodes[t.NodeID]
   507  				serviceEntry, serviceExists := g.globalServices[t.ServiceID]
   508  				if !nodeExists || !serviceExists {
   509  					return nil
   510  				}
   512  				if node.Spec.Availability == api.NodeAvailabilityPause ||
   513  					!constraint.NodeMatches(serviceEntry.constraints, node) {
   514  					t.DesiredState = api.TaskStateShutdown
   515  					return store.UpdateTask(tx, t)
   516  				}
   518  				return g.restarts.Restart(ctx, tx, g.cluster, service, *t)
   519  			})
   520  			if err != nil {
   521  				log.G(ctx).WithError(err).Errorf("orchestrator restartTask transaction failed")
   522  			}
   523  		}
   524  		return nil
   525  	})
   526  	if err != nil {
   527  		log.G(ctx).WithError(err).Errorf("global orchestrator: restartTask transaction failed")
   528  	}
   529  	g.restartTasks = make(map[string]struct{})
   530  }
   532  func (g *Orchestrator) shutdownTask(ctx context.Context, batch *store.Batch, t *api.Task) {
   533  	// set existing task DesiredState to TaskStateShutdown
   534  	// TODO(aaronl): optimistic update?
   535  	err := batch.Update(func(tx store.Tx) error {
   536  		t = store.GetTask(tx, t.ID)
   537  		if t != nil && t.DesiredState < api.TaskStateShutdown {
   538  			t.DesiredState = api.TaskStateShutdown
   539  			return store.UpdateTask(tx, t)
   540  		}
   541  		return nil
   542  	})
   543  	if err != nil {
   544  		log.G(ctx).WithError(err).Errorf("global orchestrator: shutdownTask failed to shut down %s", t.ID)
   545  	}
   546  }
   548  func (g *Orchestrator) addTask(ctx context.Context, batch *store.Batch, service *api.Service, nodeID string) {
   549  	task := orchestrator.NewTask(g.cluster, service, 0, nodeID)
   551  	err := batch.Update(func(tx store.Tx) error {
   552  		if store.GetService(tx, service.ID) == nil {
   553  			return nil
   554  		}
   555  		return store.CreateTask(tx, task)
   556  	})
   557  	if err != nil {
   558  		log.G(ctx).WithError(err).Errorf("global orchestrator: failed to create task")
   559  	}
   560  }
   562  func (g *Orchestrator) shutdownTasks(ctx context.Context, batch *store.Batch, tasks []*api.Task) {
   563  	for _, t := range tasks {
   564  		g.shutdownTask(ctx, batch, t)
   565  	}
   566  }
   568  func (g *Orchestrator) deleteTask(ctx context.Context, batch *store.Batch, t *api.Task) {
   569  	err := batch.Update(func(tx store.Tx) error {
   570  		return store.DeleteTask(tx, t.ID)
   571  	})
   572  	if err != nil {
   573  		log.G(ctx).WithError(err).Errorf("global orchestrator: deleteTask failed to delete %s", t.ID)
   574  	}
   575  }
   577  // IsRelatedService returns true if the service should be governed by this orchestrator
   578  func (g *Orchestrator) IsRelatedService(service *api.Service) bool {
   579  	return orchestrator.IsGlobalService(service)
   580  }
   582  // SlotTuple returns a slot tuple for the global service task.
   583  func (g *Orchestrator) SlotTuple(t *api.Task) orchestrator.SlotTuple {
   584  	return orchestrator.SlotTuple{
   585  		ServiceID: t.ServiceID,
   586  		NodeID:    t.NodeID,
   587  	}
   588  }