github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/orchestrator/replicated/services.go (about)

     1  package replicated
     2  
     3  import (
     4  	"context"
     5  	"sort"
     6  
     7  	"github.com/docker/go-events"
     8  	"github.com/docker/swarmkit/api"
     9  	"github.com/docker/swarmkit/log"
    10  	"github.com/docker/swarmkit/manager/orchestrator"
    11  	"github.com/docker/swarmkit/manager/state/store"
    12  )
    13  
    14  // This file provices service-level orchestration. It observes changes to
    15  // services and creates and destroys tasks as necessary to match the service
    16  // specifications. This is different from task-level orchestration, which
    17  // responds to changes in individual tasks (or nodes which run them).
    18  
    19  func (r *Orchestrator) initCluster(readTx store.ReadTx) error {
    20  	clusters, err := store.FindClusters(readTx, store.ByName(store.DefaultClusterName))
    21  	if err != nil {
    22  		return err
    23  	}
    24  
    25  	if len(clusters) != 1 {
    26  		// we'll just pick it when it is created.
    27  		return nil
    28  	}
    29  
    30  	r.cluster = clusters[0]
    31  	return nil
    32  }
    33  
    34  func (r *Orchestrator) initServices(readTx store.ReadTx) error {
    35  	services, err := store.FindServices(readTx, store.All)
    36  	if err != nil {
    37  		return err
    38  	}
    39  	for _, s := range services {
    40  		if orchestrator.IsReplicatedService(s) {
    41  			r.reconcileServices[s.ID] = s
    42  		}
    43  	}
    44  	return nil
    45  }
    46  
    47  func (r *Orchestrator) handleServiceEvent(ctx context.Context, event events.Event) {
    48  	switch v := event.(type) {
    49  	case api.EventDeleteService:
    50  		if !orchestrator.IsReplicatedService(v.Service) {
    51  			return
    52  		}
    53  		orchestrator.SetServiceTasksRemove(ctx, r.store, v.Service)
    54  		r.restarts.ClearServiceHistory(v.Service.ID)
    55  		delete(r.reconcileServices, v.Service.ID)
    56  	case api.EventCreateService:
    57  		if !orchestrator.IsReplicatedService(v.Service) {
    58  			return
    59  		}
    60  		r.reconcileServices[v.Service.ID] = v.Service
    61  	case api.EventUpdateService:
    62  		if !orchestrator.IsReplicatedService(v.Service) {
    63  			return
    64  		}
    65  		r.reconcileServices[v.Service.ID] = v.Service
    66  	}
    67  }
    68  
    69  func (r *Orchestrator) tickServices(ctx context.Context) {
    70  	if len(r.reconcileServices) > 0 {
    71  		for _, s := range r.reconcileServices {
    72  			r.reconcile(ctx, s)
    73  		}
    74  		r.reconcileServices = make(map[string]*api.Service)
    75  	}
    76  }
    77  
    78  func (r *Orchestrator) resolveService(ctx context.Context, task *api.Task) *api.Service {
    79  	if task.ServiceID == "" {
    80  		return nil
    81  	}
    82  	var service *api.Service
    83  	r.store.View(func(tx store.ReadTx) {
    84  		service = store.GetService(tx, task.ServiceID)
    85  	})
    86  	return service
    87  }
    88  
    89  // reconcile decides what actions must be taken depending on the number of
    90  // specificed slots and actual running slots. If the actual running slots are
    91  // fewer than what is requested, it creates new tasks. If the actual running
    92  // slots are more than requested, then it decides which slots must be removed
    93  // and sets desired state of those tasks to REMOVE (the actual removal is handled
    94  // by the task reaper, after the agent shuts the tasks down).
    95  func (r *Orchestrator) reconcile(ctx context.Context, service *api.Service) {
    96  	runningSlots, deadSlots, err := r.updatableAndDeadSlots(ctx, service)
    97  	if err != nil {
    98  		log.G(ctx).WithError(err).Errorf("reconcile failed finding tasks")
    99  		return
   100  	}
   101  
   102  	numSlots := len(runningSlots)
   103  
   104  	slotsSlice := make([]orchestrator.Slot, 0, numSlots)
   105  	for _, slot := range runningSlots {
   106  		slotsSlice = append(slotsSlice, slot)
   107  	}
   108  
   109  	deploy := service.Spec.GetMode().(*api.ServiceSpec_Replicated)
   110  	specifiedSlots := deploy.Replicated.Replicas
   111  
   112  	switch {
   113  	case specifiedSlots > uint64(numSlots):
   114  		log.G(ctx).Debugf("Service %s was scaled up from %d to %d instances", service.ID, numSlots, specifiedSlots)
   115  		// Update all current tasks then add missing tasks
   116  		r.updater.Update(ctx, r.cluster, service, slotsSlice)
   117  		err = r.store.Batch(func(batch *store.Batch) error {
   118  			r.addTasks(ctx, batch, service, runningSlots, deadSlots, specifiedSlots-uint64(numSlots))
   119  			r.deleteTasksMap(ctx, batch, deadSlots)
   120  			return nil
   121  		})
   122  		if err != nil {
   123  			log.G(ctx).WithError(err).Errorf("reconcile batch failed")
   124  		}
   125  
   126  	case specifiedSlots < uint64(numSlots):
   127  		// Update up to N tasks then remove the extra
   128  		log.G(ctx).Debugf("Service %s was scaled down from %d to %d instances", service.ID, numSlots, specifiedSlots)
   129  
   130  		// Preferentially remove tasks on the nodes that have the most
   131  		// copies of this service, to leave a more balanced result.
   132  
   133  		// First sort tasks such that tasks which are currently running
   134  		// (in terms of observed state) appear before non-running tasks.
   135  		// This will cause us to prefer to remove non-running tasks, all
   136  		// other things being equal in terms of node balance.
   137  
   138  		sort.Sort(slotsByRunningState(slotsSlice))
   139  
   140  		// Assign each task an index that counts it as the nth copy of
   141  		// of the service on its node (1, 2, 3, ...), and sort the
   142  		// tasks by this counter value.
   143  
   144  		slotsByNode := make(map[string]int)
   145  		slotsWithIndices := make(slotsByIndex, 0, numSlots)
   146  
   147  		for _, slot := range slotsSlice {
   148  			if len(slot) == 1 && slot[0].NodeID != "" {
   149  				slotsByNode[slot[0].NodeID]++
   150  				slotsWithIndices = append(slotsWithIndices, slotWithIndex{slot: slot, index: slotsByNode[slot[0].NodeID]})
   151  			} else {
   152  				slotsWithIndices = append(slotsWithIndices, slotWithIndex{slot: slot, index: -1})
   153  			}
   154  		}
   155  
   156  		sort.Sort(slotsWithIndices)
   157  
   158  		sortedSlots := make([]orchestrator.Slot, 0, numSlots)
   159  		for _, slot := range slotsWithIndices {
   160  			sortedSlots = append(sortedSlots, slot.slot)
   161  		}
   162  
   163  		r.updater.Update(ctx, r.cluster, service, sortedSlots[:specifiedSlots])
   164  		err = r.store.Batch(func(batch *store.Batch) error {
   165  			r.deleteTasksMap(ctx, batch, deadSlots)
   166  			// for all slots that we are removing, we set the desired state of those tasks
   167  			// to REMOVE. Then, the agent is responsible for shutting them down, and the
   168  			// task reaper is responsible for actually removing them from the store after
   169  			// shutdown.
   170  			r.setTasksDesiredState(ctx, batch, sortedSlots[specifiedSlots:], api.TaskStateRemove)
   171  			return nil
   172  		})
   173  		if err != nil {
   174  			log.G(ctx).WithError(err).Errorf("reconcile batch failed")
   175  		}
   176  
   177  	case specifiedSlots == uint64(numSlots):
   178  		err = r.store.Batch(func(batch *store.Batch) error {
   179  			r.deleteTasksMap(ctx, batch, deadSlots)
   180  			return nil
   181  		})
   182  		if err != nil {
   183  			log.G(ctx).WithError(err).Errorf("reconcile batch failed")
   184  		}
   185  		// Simple update, no scaling - update all tasks.
   186  		r.updater.Update(ctx, r.cluster, service, slotsSlice)
   187  	}
   188  }
   189  
   190  func (r *Orchestrator) addTasks(ctx context.Context, batch *store.Batch, service *api.Service, runningSlots map[uint64]orchestrator.Slot, deadSlots map[uint64]orchestrator.Slot, count uint64) {
   191  	slot := uint64(0)
   192  	for i := uint64(0); i < count; i++ {
   193  		// Find a slot number that is missing a running task
   194  		for {
   195  			slot++
   196  			if _, ok := runningSlots[slot]; !ok {
   197  				break
   198  			}
   199  		}
   200  
   201  		delete(deadSlots, slot)
   202  		err := batch.Update(func(tx store.Tx) error {
   203  			return store.CreateTask(tx, orchestrator.NewTask(r.cluster, service, slot, ""))
   204  		})
   205  		if err != nil {
   206  			log.G(ctx).Errorf("Failed to create task: %v", err)
   207  		}
   208  	}
   209  }
   210  
   211  // setTasksDesiredState sets the desired state for all tasks for the given slots to the
   212  // requested state
   213  func (r *Orchestrator) setTasksDesiredState(ctx context.Context, batch *store.Batch, slots []orchestrator.Slot, newDesiredState api.TaskState) {
   214  	for _, slot := range slots {
   215  		for _, t := range slot {
   216  			err := batch.Update(func(tx store.Tx) error {
   217  				// time travel is not allowed. if the current desired state is
   218  				// above the one we're trying to go to we can't go backwards.
   219  				// we have nothing to do and we should skip to the next task
   220  				if t.DesiredState > newDesiredState {
   221  					// log a warning, though. we shouln't be trying to rewrite
   222  					// a state to an earlier state
   223  					log.G(ctx).Warnf(
   224  						"cannot update task %v in desired state %v to an earlier desired state %v",
   225  						t.ID, t.DesiredState, newDesiredState,
   226  					)
   227  					return nil
   228  				}
   229  				// update desired state
   230  				t.DesiredState = newDesiredState
   231  
   232  				return store.UpdateTask(tx, t)
   233  			})
   234  
   235  			// log an error if we get one
   236  			if err != nil {
   237  				log.G(ctx).WithError(err).Errorf("failed to update task to %v", newDesiredState.String())
   238  			}
   239  		}
   240  	}
   241  }
   242  
   243  func (r *Orchestrator) deleteTasksMap(ctx context.Context, batch *store.Batch, slots map[uint64]orchestrator.Slot) {
   244  	for _, slot := range slots {
   245  		for _, t := range slot {
   246  			r.deleteTask(ctx, batch, t)
   247  		}
   248  	}
   249  }
   250  
   251  func (r *Orchestrator) deleteTask(ctx context.Context, batch *store.Batch, t *api.Task) {
   252  	err := batch.Update(func(tx store.Tx) error {
   253  		return store.DeleteTask(tx, t.ID)
   254  	})
   255  	if err != nil {
   256  		log.G(ctx).WithError(err).Errorf("deleting task %s failed", t.ID)
   257  	}
   258  }
   259  
   260  // IsRelatedService returns true if the service should be governed by this orchestrator
   261  func (r *Orchestrator) IsRelatedService(service *api.Service) bool {
   262  	return orchestrator.IsReplicatedService(service)
   263  }