github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/group_service_hook.go (about)

     1  package allocrunner
     2  
     3  import (
     4  	"context"
     5  	"sync"
     6  	"time"
     7  
     8  	log "github.com/hashicorp/go-hclog"
     9  	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
    10  	"github.com/hashicorp/nomad/client/serviceregistration"
    11  	"github.com/hashicorp/nomad/client/serviceregistration/wrapper"
    12  	"github.com/hashicorp/nomad/client/taskenv"
    13  	"github.com/hashicorp/nomad/helper"
    14  	"github.com/hashicorp/nomad/nomad/structs"
    15  )
    16  
    17  const (
    18  	groupServiceHookName = "group_services"
    19  )
    20  
    21  // groupServiceHook manages task group Consul service registration and
    22  // deregistration.
    23  type groupServiceHook struct {
    24  	allocID          string
    25  	jobID            string
    26  	group            string
    27  	namespace        string
    28  	restarter        serviceregistration.WorkloadRestarter
    29  	prerun           bool
    30  	deregistered     bool
    31  	networkStatus    structs.NetworkStatus
    32  	shutdownDelayCtx context.Context
    33  
    34  	// providerNamespace is the Nomad or Consul namespace in which service
    35  	// registrations will be made. This field may be updated.
    36  	providerNamespace string
    37  
    38  	// serviceRegWrapper is the handler wrapper that is used to perform service
    39  	// and check registration and deregistration.
    40  	serviceRegWrapper *wrapper.HandlerWrapper
    41  
    42  	logger log.Logger
    43  
    44  	// The following fields may be updated
    45  	canary         bool
    46  	services       []*structs.Service
    47  	networks       structs.Networks
    48  	ports          structs.AllocatedPorts
    49  	taskEnvBuilder *taskenv.Builder
    50  	delay          time.Duration
    51  
    52  	// Since Update() may be called concurrently with any other hook all
    53  	// hook methods must be fully serialized
    54  	mu sync.Mutex
    55  }
    56  
    57  type groupServiceHookConfig struct {
    58  	alloc            *structs.Allocation
    59  	restarter        serviceregistration.WorkloadRestarter
    60  	taskEnvBuilder   *taskenv.Builder
    61  	networkStatus    structs.NetworkStatus
    62  	shutdownDelayCtx context.Context
    63  	logger           log.Logger
    64  
    65  	// providerNamespace is the Nomad or Consul namespace in which service
    66  	// registrations will be made.
    67  	providerNamespace string
    68  
    69  	// serviceRegWrapper is the handler wrapper that is used to perform service
    70  	// and check registration and deregistration.
    71  	serviceRegWrapper *wrapper.HandlerWrapper
    72  }
    73  
    74  func newGroupServiceHook(cfg groupServiceHookConfig) *groupServiceHook {
    75  	var shutdownDelay time.Duration
    76  	tg := cfg.alloc.Job.LookupTaskGroup(cfg.alloc.TaskGroup)
    77  
    78  	if tg.ShutdownDelay != nil {
    79  		shutdownDelay = *tg.ShutdownDelay
    80  	}
    81  
    82  	h := &groupServiceHook{
    83  		allocID:           cfg.alloc.ID,
    84  		jobID:             cfg.alloc.JobID,
    85  		group:             cfg.alloc.TaskGroup,
    86  		namespace:         cfg.alloc.Namespace,
    87  		restarter:         cfg.restarter,
    88  		providerNamespace: cfg.providerNamespace,
    89  		taskEnvBuilder:    cfg.taskEnvBuilder,
    90  		delay:             shutdownDelay,
    91  		networkStatus:     cfg.networkStatus,
    92  		logger:            cfg.logger.Named(groupServiceHookName),
    93  		serviceRegWrapper: cfg.serviceRegWrapper,
    94  		services:          tg.Services,
    95  		shutdownDelayCtx:  cfg.shutdownDelayCtx,
    96  	}
    97  
    98  	if cfg.alloc.AllocatedResources != nil {
    99  		h.networks = cfg.alloc.AllocatedResources.Shared.Networks
   100  		h.ports = cfg.alloc.AllocatedResources.Shared.Ports
   101  	}
   102  
   103  	if cfg.alloc.DeploymentStatus != nil {
   104  		h.canary = cfg.alloc.DeploymentStatus.Canary
   105  	}
   106  
   107  	return h
   108  }
   109  
   110  func (*groupServiceHook) Name() string {
   111  	return groupServiceHookName
   112  }
   113  
   114  func (h *groupServiceHook) Prerun() error {
   115  	h.mu.Lock()
   116  	defer func() {
   117  		// Mark prerun as true to unblock Updates
   118  		h.prerun = true
   119  		h.mu.Unlock()
   120  	}()
   121  	return h.prerunLocked()
   122  }
   123  
   124  func (h *groupServiceHook) prerunLocked() error {
   125  	if len(h.services) == 0 {
   126  		return nil
   127  	}
   128  
   129  	services := h.getWorkloadServices()
   130  	return h.serviceRegWrapper.RegisterWorkload(services)
   131  }
   132  
   133  func (h *groupServiceHook) Update(req *interfaces.RunnerUpdateRequest) error {
   134  	h.mu.Lock()
   135  	defer h.mu.Unlock()
   136  
   137  	oldWorkloadServices := h.getWorkloadServices()
   138  
   139  	// Store new updated values out of request
   140  	canary := false
   141  	if req.Alloc.DeploymentStatus != nil {
   142  		canary = req.Alloc.DeploymentStatus.Canary
   143  	}
   144  
   145  	var networks structs.Networks
   146  	if req.Alloc.AllocatedResources != nil {
   147  		networks = req.Alloc.AllocatedResources.Shared.Networks
   148  		h.ports = req.Alloc.AllocatedResources.Shared.Ports
   149  	}
   150  
   151  	tg := req.Alloc.Job.LookupTaskGroup(h.group)
   152  	var shutdown time.Duration
   153  	if tg.ShutdownDelay != nil {
   154  		shutdown = *tg.ShutdownDelay
   155  	}
   156  
   157  	// Update group service hook fields
   158  	h.networks = networks
   159  	h.services = tg.Services
   160  	h.canary = canary
   161  	h.delay = shutdown
   162  	h.taskEnvBuilder.UpdateTask(req.Alloc, nil)
   163  
   164  	// An update may change the service provider, therefore we need to account
   165  	// for how namespaces work across providers also.
   166  	h.providerNamespace = req.Alloc.ServiceProviderNamespace()
   167  
   168  	// Create new task services struct with those new values
   169  	newWorkloadServices := h.getWorkloadServices()
   170  
   171  	if !h.prerun {
   172  		// Update called before Prerun. Update alloc and exit to allow
   173  		// Prerun to do initial registration.
   174  		return nil
   175  	}
   176  
   177  	return h.serviceRegWrapper.UpdateWorkload(oldWorkloadServices, newWorkloadServices)
   178  }
   179  
   180  func (h *groupServiceHook) PreTaskRestart() error {
   181  	h.mu.Lock()
   182  	defer func() {
   183  		// Mark prerun as true to unblock Updates
   184  		h.prerun = true
   185  		h.mu.Unlock()
   186  	}()
   187  
   188  	h.preKillLocked()
   189  	return h.prerunLocked()
   190  }
   191  
   192  func (h *groupServiceHook) PreKill() {
   193  	h.mu.Lock()
   194  	defer h.mu.Unlock()
   195  	h.preKillLocked()
   196  }
   197  
   198  // implements the PreKill hook but requires the caller hold the lock
   199  func (h *groupServiceHook) preKillLocked() {
   200  	// If we have a shutdown delay deregister group services and then wait
   201  	// before continuing to kill tasks.
   202  	h.deregister()
   203  	h.deregistered = true
   204  
   205  	if h.delay == 0 {
   206  		return
   207  	}
   208  
   209  	h.logger.Debug("delay before killing tasks", "group", h.group, "shutdown_delay", h.delay)
   210  
   211  	timer, cancel := helper.NewSafeTimer(h.delay)
   212  	defer cancel()
   213  
   214  	select {
   215  	// Wait for specified shutdown_delay unless ignored
   216  	// This will block an agent from shutting down.
   217  	case <-timer.C:
   218  	case <-h.shutdownDelayCtx.Done():
   219  	}
   220  }
   221  
   222  func (h *groupServiceHook) Postrun() error {
   223  	h.mu.Lock()
   224  	defer h.mu.Unlock()
   225  
   226  	if !h.deregistered {
   227  		h.deregister()
   228  	}
   229  	return nil
   230  }
   231  
   232  // deregister services from Consul.
   233  func (h *groupServiceHook) deregister() {
   234  	if len(h.services) > 0 {
   235  		workloadServices := h.getWorkloadServices()
   236  		h.serviceRegWrapper.RemoveWorkload(workloadServices)
   237  	}
   238  }
   239  
   240  func (h *groupServiceHook) getWorkloadServices() *serviceregistration.WorkloadServices {
   241  	// Interpolate with the task's environment
   242  	interpolatedServices := taskenv.InterpolateServices(h.taskEnvBuilder.Build(), h.services)
   243  
   244  	var netStatus *structs.AllocNetworkStatus
   245  	if h.networkStatus != nil {
   246  		netStatus = h.networkStatus.NetworkStatus()
   247  	}
   248  
   249  	info := structs.AllocInfo{
   250  		AllocID:   h.allocID,
   251  		JobID:     h.jobID,
   252  		Group:     h.group,
   253  		Namespace: h.namespace,
   254  	}
   255  
   256  	// Create task services struct with request's driver metadata
   257  	return &serviceregistration.WorkloadServices{
   258  		AllocInfo:         info,
   259  		ProviderNamespace: h.providerNamespace,
   260  		Restarter:         h.restarter,
   261  		Services:          interpolatedServices,
   262  		Networks:          h.networks,
   263  		NetworkStatus:     netStatus,
   264  		Ports:             h.ports,
   265  		Canary:            h.canary,
   266  	}
   267  }