github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/orchestrator/constraintenforcer/constraint_enforcer.go (about)

     1  package constraintenforcer
     2  
     3  import (
     4  	"time"
     5  
     6  	"github.com/docker/swarmkit/api"
     7  	"github.com/docker/swarmkit/api/genericresource"
     8  	"github.com/docker/swarmkit/log"
     9  	"github.com/docker/swarmkit/manager/constraint"
    10  	"github.com/docker/swarmkit/manager/state"
    11  	"github.com/docker/swarmkit/manager/state/store"
    12  	"github.com/docker/swarmkit/protobuf/ptypes"
    13  )
    14  
    15  // ConstraintEnforcer watches for updates to nodes and shuts down tasks that no
    16  // longer satisfy scheduling constraints or resource limits.
    17  type ConstraintEnforcer struct {
    18  	store    *store.MemoryStore
    19  	stopChan chan struct{}
    20  	doneChan chan struct{}
    21  }
    22  
    23  // New creates a new ConstraintEnforcer.
    24  func New(store *store.MemoryStore) *ConstraintEnforcer {
    25  	return &ConstraintEnforcer{
    26  		store:    store,
    27  		stopChan: make(chan struct{}),
    28  		doneChan: make(chan struct{}),
    29  	}
    30  }
    31  
    32  // Run is the ConstraintEnforcer's main loop.
    33  func (ce *ConstraintEnforcer) Run() {
    34  	defer close(ce.doneChan)
    35  
    36  	watcher, cancelWatch := state.Watch(ce.store.WatchQueue(), api.EventUpdateNode{})
    37  	defer cancelWatch()
    38  
    39  	var (
    40  		nodes []*api.Node
    41  		err   error
    42  	)
    43  	ce.store.View(func(readTx store.ReadTx) {
    44  		nodes, err = store.FindNodes(readTx, store.All)
    45  	})
    46  	if err != nil {
    47  		log.L.WithError(err).Error("failed to check nodes for noncompliant tasks")
    48  	} else {
    49  		for _, node := range nodes {
    50  			ce.rejectNoncompliantTasks(node)
    51  		}
    52  	}
    53  
    54  	for {
    55  		select {
    56  		case event := <-watcher:
    57  			node := event.(api.EventUpdateNode).Node
    58  			ce.rejectNoncompliantTasks(node)
    59  		case <-ce.stopChan:
    60  			return
    61  		}
    62  	}
    63  }
    64  
    65  func (ce *ConstraintEnforcer) rejectNoncompliantTasks(node *api.Node) {
    66  	// If the availability is "drain", the orchestrator will
    67  	// shut down all tasks.
    68  	// If the availability is "pause", we shouldn't touch
    69  	// the tasks on this node.
    70  	if node.Spec.Availability != api.NodeAvailabilityActive {
    71  		return
    72  	}
    73  
    74  	var (
    75  		tasks []*api.Task
    76  		err   error
    77  	)
    78  
    79  	services := map[string]*api.Service{}
    80  	ce.store.View(func(tx store.ReadTx) {
    81  		tasks, err = store.FindTasks(tx, store.ByNodeID(node.ID))
    82  		if err != nil {
    83  			return
    84  		}
    85  
    86  		// Deduplicate service IDs using the services map. It's okay for the
    87  		// values to be nil for now, we will look them up from the store next.
    88  		for _, task := range tasks {
    89  			services[task.ServiceID] = nil
    90  		}
    91  
    92  		for serviceID := range services {
    93  			services[serviceID] = store.GetService(tx, serviceID)
    94  		}
    95  	})
    96  
    97  	if err != nil {
    98  		log.L.WithError(err).Errorf("failed to list tasks for node ID %s", node.ID)
    99  	}
   100  
   101  	available := &api.Resources{}
   102  	var fakeStore []*api.GenericResource
   103  
   104  	if node.Description != nil && node.Description.Resources != nil {
   105  		available = node.Description.Resources.Copy()
   106  	}
   107  
   108  	removeTasks := make(map[string]*api.Task)
   109  
   110  	// TODO(aaronl): The set of tasks removed will be
   111  	// nondeterministic because it depends on the order of
   112  	// the slice returned from FindTasks. We could do
   113  	// a separate pass over the tasks for each type of
   114  	// resource, and sort by the size of the reservation
   115  	// to remove the most resource-intensive tasks.
   116  loop:
   117  	for _, t := range tasks {
   118  		if t.DesiredState < api.TaskStateAssigned || t.DesiredState > api.TaskStateCompleted {
   119  			continue
   120  		}
   121  
   122  		// Ensure that the node still satisfies placement constraints.
   123  		// NOTE: If the task is associacted with a service then we must use the
   124  		// constraints from the current service spec rather than the
   125  		// constraints from the task spec because they may be outdated. This
   126  		// will happen if the service was previously updated in a way which
   127  		// only changes the placement constraints and the node matched the
   128  		// placement constraints both before and after that update. In the case
   129  		// of such updates, the tasks are not considered "dirty" and are not
   130  		// restarted but it will mean that the task spec's placement
   131  		// constraints are outdated. Consider this example:
   132  		// - A service is created with no constraints and a task is scheduled
   133  		//   to a node.
   134  		// - The node is updated to add a label, this doesn't affect the task
   135  		//   on that node because it has no constraints.
   136  		// - The service is updated to add a node label constraint which
   137  		//   matches the label which was just added to the node. The updater
   138  		//   does not shut down the task because the only the constraints have
   139  		//   changed and the node still matches the updated constraints.
   140  		// - The node is updated to remove the node label. The node no longer
   141  		//   satisfies the placement constraints of the service, so the task
   142  		//   should be shutdown. However, the task's spec still has the
   143  		//   original and outdated constraints (that are still satisfied by
   144  		//   the node). If we used those original constraints then the task
   145  		//   would incorrectly not be removed. This is why the constraints
   146  		//   from the service spec should be used instead.
   147  		var placement *api.Placement
   148  		if service := services[t.ServiceID]; service != nil {
   149  			// This task is associated with a service, so we use the service's
   150  			// current placement constraints.
   151  			placement = service.Spec.Task.Placement
   152  		} else {
   153  			// This task is not associated with a service (or the service no
   154  			// longer exists), so we use the placement constraints from the
   155  			// original task spec.
   156  			placement = t.Spec.Placement
   157  		}
   158  		if placement != nil && len(placement.Constraints) > 0 {
   159  			constraints, _ := constraint.Parse(placement.Constraints)
   160  			if !constraint.NodeMatches(constraints, node) {
   161  				removeTasks[t.ID] = t
   162  				continue
   163  			}
   164  		}
   165  
   166  		// Ensure that the task assigned to the node
   167  		// still satisfies the resource limits.
   168  		if t.Spec.Resources != nil && t.Spec.Resources.Reservations != nil {
   169  			if t.Spec.Resources.Reservations.MemoryBytes > available.MemoryBytes {
   170  				removeTasks[t.ID] = t
   171  				continue
   172  			}
   173  			if t.Spec.Resources.Reservations.NanoCPUs > available.NanoCPUs {
   174  				removeTasks[t.ID] = t
   175  				continue
   176  			}
   177  			for _, ta := range t.AssignedGenericResources {
   178  				// Type change or no longer available
   179  				if genericresource.HasResource(ta, available.Generic) {
   180  					removeTasks[t.ID] = t
   181  					break loop
   182  				}
   183  			}
   184  
   185  			available.MemoryBytes -= t.Spec.Resources.Reservations.MemoryBytes
   186  			available.NanoCPUs -= t.Spec.Resources.Reservations.NanoCPUs
   187  
   188  			genericresource.ClaimResources(&available.Generic,
   189  				&fakeStore, t.AssignedGenericResources)
   190  		}
   191  	}
   192  
   193  	if len(removeTasks) != 0 {
   194  		err := ce.store.Batch(func(batch *store.Batch) error {
   195  			for _, t := range removeTasks {
   196  				err := batch.Update(func(tx store.Tx) error {
   197  					t = store.GetTask(tx, t.ID)
   198  					if t == nil || t.DesiredState > api.TaskStateCompleted {
   199  						return nil
   200  					}
   201  
   202  					// We set the observed state to
   203  					// REJECTED, rather than the desired
   204  					// state. Desired state is owned by the
   205  					// orchestrator, and setting it directly
   206  					// will bypass actions such as
   207  					// restarting the task on another node
   208  					// (if applicable).
   209  					t.Status.State = api.TaskStateRejected
   210  					t.Status.Message = "task rejected by constraint enforcer"
   211  					t.Status.Err = "assigned node no longer meets constraints"
   212  					t.Status.Timestamp = ptypes.MustTimestampProto(time.Now())
   213  					return store.UpdateTask(tx, t)
   214  				})
   215  				if err != nil {
   216  					log.L.WithError(err).Errorf("failed to shut down task %s", t.ID)
   217  				}
   218  			}
   219  			return nil
   220  		})
   221  
   222  		if err != nil {
   223  			log.L.WithError(err).Errorf("failed to shut down tasks")
   224  		}
   225  	}
   226  }
   227  
   228  // Stop stops the ConstraintEnforcer and waits for the main loop to exit.
   229  func (ce *ConstraintEnforcer) Stop() {
   230  	close(ce.stopChan)
   231  	<-ce.doneChan
   232  }