github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/client/allocrunner/groupservice_hook.go (about)

     1  package allocrunner
     2  
     3  import (
     4  	"sync"
     5  	"time"
     6  
     7  	log "github.com/hashicorp/go-hclog"
     8  	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
     9  	"github.com/hashicorp/nomad/client/consul"
    10  	"github.com/hashicorp/nomad/client/taskenv"
    11  	agentconsul "github.com/hashicorp/nomad/command/agent/consul"
    12  	"github.com/hashicorp/nomad/nomad/structs"
    13  )
    14  
    15  type networkStatusGetter interface {
    16  	NetworkStatus() *structs.AllocNetworkStatus
    17  }
    18  
    19  // groupServiceHook manages task group Consul service registration and
    20  // deregistration.
    21  type groupServiceHook struct {
    22  	allocID             string
    23  	group               string
    24  	restarter           agentconsul.WorkloadRestarter
    25  	consulClient        consul.ConsulServiceAPI
    26  	prerun              bool
    27  	delay               time.Duration
    28  	deregistered        bool
    29  	networkStatusGetter networkStatusGetter
    30  
    31  	logger log.Logger
    32  
    33  	// The following fields may be updated
    34  	canary         bool
    35  	services       []*structs.Service
    36  	networks       structs.Networks
    37  	ports          structs.AllocatedPorts
    38  	taskEnvBuilder *taskenv.Builder
    39  
    40  	// Since Update() may be called concurrently with any other hook all
    41  	// hook methods must be fully serialized
    42  	mu sync.Mutex
    43  }
    44  
    45  type groupServiceHookConfig struct {
    46  	alloc               *structs.Allocation
    47  	consul              consul.ConsulServiceAPI
    48  	restarter           agentconsul.WorkloadRestarter
    49  	taskEnvBuilder      *taskenv.Builder
    50  	networkStatusGetter networkStatusGetter
    51  	logger              log.Logger
    52  }
    53  
    54  func newGroupServiceHook(cfg groupServiceHookConfig) *groupServiceHook {
    55  	var shutdownDelay time.Duration
    56  	tg := cfg.alloc.Job.LookupTaskGroup(cfg.alloc.TaskGroup)
    57  
    58  	if tg.ShutdownDelay != nil {
    59  		shutdownDelay = *tg.ShutdownDelay
    60  	}
    61  
    62  	h := &groupServiceHook{
    63  		allocID:             cfg.alloc.ID,
    64  		group:               cfg.alloc.TaskGroup,
    65  		restarter:           cfg.restarter,
    66  		consulClient:        cfg.consul,
    67  		taskEnvBuilder:      cfg.taskEnvBuilder,
    68  		delay:               shutdownDelay,
    69  		networkStatusGetter: cfg.networkStatusGetter,
    70  	}
    71  	h.logger = cfg.logger.Named(h.Name())
    72  	h.services = cfg.alloc.Job.LookupTaskGroup(h.group).Services
    73  
    74  	if cfg.alloc.AllocatedResources != nil {
    75  		h.networks = cfg.alloc.AllocatedResources.Shared.Networks
    76  		h.ports = cfg.alloc.AllocatedResources.Shared.Ports
    77  	}
    78  
    79  	if cfg.alloc.DeploymentStatus != nil {
    80  		h.canary = cfg.alloc.DeploymentStatus.Canary
    81  	}
    82  
    83  	return h
    84  }
    85  
    86  func (*groupServiceHook) Name() string {
    87  	return "group_services"
    88  }
    89  
    90  func (h *groupServiceHook) Prerun() error {
    91  	h.mu.Lock()
    92  	defer func() {
    93  		// Mark prerun as true to unblock Updates
    94  		h.prerun = true
    95  		h.mu.Unlock()
    96  	}()
    97  	return h.prerunLocked()
    98  }
    99  
   100  func (h *groupServiceHook) prerunLocked() error {
   101  	if len(h.services) == 0 {
   102  		return nil
   103  	}
   104  
   105  	services := h.getWorkloadServices()
   106  	return h.consulClient.RegisterWorkload(services)
   107  }
   108  
   109  func (h *groupServiceHook) Update(req *interfaces.RunnerUpdateRequest) error {
   110  	h.mu.Lock()
   111  	defer h.mu.Unlock()
   112  
   113  	oldWorkloadServices := h.getWorkloadServices()
   114  
   115  	// Store new updated values out of request
   116  	canary := false
   117  	if req.Alloc.DeploymentStatus != nil {
   118  		canary = req.Alloc.DeploymentStatus.Canary
   119  	}
   120  
   121  	var networks structs.Networks
   122  	if req.Alloc.AllocatedResources != nil {
   123  		networks = req.Alloc.AllocatedResources.Shared.Networks
   124  		h.ports = req.Alloc.AllocatedResources.Shared.Ports
   125  	}
   126  
   127  	tg := req.Alloc.Job.LookupTaskGroup(h.group)
   128  	var shutdown time.Duration
   129  	if tg.ShutdownDelay != nil {
   130  		shutdown = *tg.ShutdownDelay
   131  	}
   132  
   133  	// Update group service hook fields
   134  	h.networks = networks
   135  	h.services = tg.Services
   136  	h.canary = canary
   137  	h.delay = shutdown
   138  	h.taskEnvBuilder.UpdateTask(req.Alloc, nil)
   139  
   140  	// Create new task services struct with those new values
   141  	newWorkloadServices := h.getWorkloadServices()
   142  
   143  	if !h.prerun {
   144  		// Update called before Prerun. Update alloc and exit to allow
   145  		// Prerun to do initial registration.
   146  		return nil
   147  	}
   148  
   149  	return h.consulClient.UpdateWorkload(oldWorkloadServices, newWorkloadServices)
   150  }
   151  
   152  func (h *groupServiceHook) PreTaskRestart() error {
   153  	h.mu.Lock()
   154  	defer func() {
   155  		// Mark prerun as true to unblock Updates
   156  		h.prerun = true
   157  		h.mu.Unlock()
   158  	}()
   159  
   160  	h.preKillLocked()
   161  	return h.prerunLocked()
   162  }
   163  
   164  func (h *groupServiceHook) PreKill() {
   165  	h.mu.Lock()
   166  	defer h.mu.Unlock()
   167  	h.preKillLocked()
   168  }
   169  
   170  // implements the PreKill hook but requires the caller hold the lock
   171  func (h *groupServiceHook) preKillLocked() {
   172  	// If we have a shutdown delay deregister
   173  	// group services and then wait
   174  	// before continuing to kill tasks
   175  	h.deregister()
   176  	h.deregistered = true
   177  
   178  	if h.delay == 0 {
   179  		return
   180  	}
   181  
   182  	h.logger.Debug("waiting before removing group service", "shutdown_delay", h.delay)
   183  
   184  	// Wait for specified shutdown_delay
   185  	// this will block an agent from shutting down
   186  	<-time.After(h.delay)
   187  }
   188  
   189  func (h *groupServiceHook) Postrun() error {
   190  	h.mu.Lock()
   191  	defer h.mu.Unlock()
   192  
   193  	if !h.deregistered {
   194  		h.deregister()
   195  	}
   196  	return nil
   197  }
   198  
   199  // deregister services from Consul.
   200  func (h *groupServiceHook) deregister() {
   201  	if len(h.services) > 0 {
   202  		workloadServices := h.getWorkloadServices()
   203  		h.consulClient.RemoveWorkload(workloadServices)
   204  
   205  		// Canary flag may be getting flipped when the alloc is being
   206  		// destroyed, so remove both variations of the service
   207  		workloadServices.Canary = !workloadServices.Canary
   208  		h.consulClient.RemoveWorkload(workloadServices)
   209  	}
   210  }
   211  
   212  func (h *groupServiceHook) getWorkloadServices() *agentconsul.WorkloadServices {
   213  	// Interpolate with the task's environment
   214  	interpolatedServices := taskenv.InterpolateServices(h.taskEnvBuilder.Build(), h.services)
   215  
   216  	var netStatus *structs.AllocNetworkStatus
   217  	if h.networkStatusGetter != nil {
   218  		netStatus = h.networkStatusGetter.NetworkStatus()
   219  	}
   220  
   221  	// Create task services struct with request's driver metadata
   222  	return &agentconsul.WorkloadServices{
   223  		AllocID:       h.allocID,
   224  		Group:         h.group,
   225  		Restarter:     h.restarter,
   226  		Services:      interpolatedServices,
   227  		Networks:      h.networks,
   228  		NetworkStatus: netStatus,
   229  		Ports:         h.ports,
   230  		Canary:        h.canary,
   231  	}
   232  }