github.com/manicqin/nomad@v0.9.5/client/allocrunner/groupservice_hook.go (about)

     1  package allocrunner
     2  
     3  import (
     4  	"sync"
     5  	"time"
     6  
     7  	log "github.com/hashicorp/go-hclog"
     8  	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
     9  	"github.com/hashicorp/nomad/client/consul"
    10  	"github.com/hashicorp/nomad/client/taskenv"
    11  	agentconsul "github.com/hashicorp/nomad/command/agent/consul"
    12  	"github.com/hashicorp/nomad/nomad/structs"
    13  	"github.com/hashicorp/nomad/plugins/drivers"
    14  )
    15  
    16  // groupServiceHook manages task group Consul service registration and
    17  // deregistration.
    18  type groupServiceHook struct {
    19  	allocID      string
    20  	group        string
    21  	restarter    agentconsul.WorkloadRestarter
    22  	consulClient consul.ConsulServiceAPI
    23  	prerun       bool
    24  	delay        time.Duration
    25  	deregistered bool
    26  
    27  	logger log.Logger
    28  
    29  	// The following fields may be updated
    30  	canary         bool
    31  	services       []*structs.Service
    32  	networks       structs.Networks
    33  	taskEnvBuilder *taskenv.Builder
    34  
    35  	// Since Update() may be called concurrently with any other hook all
    36  	// hook methods must be fully serialized
    37  	mu sync.Mutex
    38  }
    39  
    40  type groupServiceHookConfig struct {
    41  	alloc          *structs.Allocation
    42  	consul         consul.ConsulServiceAPI
    43  	restarter      agentconsul.WorkloadRestarter
    44  	taskEnvBuilder *taskenv.Builder
    45  	logger         log.Logger
    46  }
    47  
    48  func newGroupServiceHook(cfg groupServiceHookConfig) *groupServiceHook {
    49  	var shutdownDelay time.Duration
    50  	tg := cfg.alloc.Job.LookupTaskGroup(cfg.alloc.TaskGroup)
    51  
    52  	if tg.ShutdownDelay != nil {
    53  		shutdownDelay = *tg.ShutdownDelay
    54  	}
    55  
    56  	h := &groupServiceHook{
    57  		allocID:        cfg.alloc.ID,
    58  		group:          cfg.alloc.TaskGroup,
    59  		restarter:      cfg.restarter,
    60  		consulClient:   cfg.consul,
    61  		taskEnvBuilder: cfg.taskEnvBuilder,
    62  		delay:          shutdownDelay,
    63  	}
    64  	h.logger = cfg.logger.Named(h.Name())
    65  	h.services = cfg.alloc.Job.LookupTaskGroup(h.group).Services
    66  
    67  	if cfg.alloc.AllocatedResources != nil {
    68  		h.networks = cfg.alloc.AllocatedResources.Shared.Networks
    69  	}
    70  
    71  	if cfg.alloc.DeploymentStatus != nil {
    72  		h.canary = cfg.alloc.DeploymentStatus.Canary
    73  	}
    74  	return h
    75  }
    76  
    77  func (*groupServiceHook) Name() string {
    78  	return "group_services"
    79  }
    80  
    81  func (h *groupServiceHook) Prerun() error {
    82  	h.mu.Lock()
    83  	defer func() {
    84  		// Mark prerun as true to unblock Updates
    85  		h.prerun = true
    86  		h.mu.Unlock()
    87  	}()
    88  
    89  	if len(h.services) == 0 {
    90  		return nil
    91  	}
    92  
    93  	services := h.getWorkloadServices()
    94  	return h.consulClient.RegisterWorkload(services)
    95  }
    96  
    97  func (h *groupServiceHook) Update(req *interfaces.RunnerUpdateRequest) error {
    98  	h.mu.Lock()
    99  	defer h.mu.Unlock()
   100  	oldWorkloadServices := h.getWorkloadServices()
   101  
   102  	// Store new updated values out of request
   103  	canary := false
   104  	if req.Alloc.DeploymentStatus != nil {
   105  		canary = req.Alloc.DeploymentStatus.Canary
   106  	}
   107  
   108  	var networks structs.Networks
   109  	if req.Alloc.AllocatedResources != nil {
   110  		networks = req.Alloc.AllocatedResources.Shared.Networks
   111  	}
   112  
   113  	// Update group service hook fields
   114  	h.networks = networks
   115  	h.services = req.Alloc.Job.LookupTaskGroup(h.group).Services
   116  	h.canary = canary
   117  	h.taskEnvBuilder.UpdateTask(req.Alloc, nil)
   118  
   119  	// Create new task services struct with those new values
   120  	newWorkloadServices := h.getWorkloadServices()
   121  
   122  	if !h.prerun {
   123  		// Update called before Prerun. Update alloc and exit to allow
   124  		// Prerun to do initial registration.
   125  		return nil
   126  	}
   127  
   128  	return h.consulClient.UpdateWorkload(oldWorkloadServices, newWorkloadServices)
   129  }
   130  
   131  func (h *groupServiceHook) PreKill() {
   132  	h.mu.Lock()
   133  	defer h.mu.Unlock()
   134  
   135  	// If we have a shutdown delay deregister
   136  	// group services and then wait
   137  	// before continuing to kill tasks
   138  	h.deregister()
   139  	h.deregistered = true
   140  
   141  	if h.delay == 0 {
   142  		return
   143  	}
   144  
   145  	h.logger.Debug("waiting before removing group service", "shutdown_delay", h.delay)
   146  
   147  	// Wait for specified shutdown_delay
   148  	// this will block an agent from shutting down
   149  	<-time.After(h.delay)
   150  }
   151  
   152  func (h *groupServiceHook) Postrun() error {
   153  	h.mu.Lock()
   154  	defer h.mu.Unlock()
   155  
   156  	if !h.deregistered {
   157  		h.deregister()
   158  	}
   159  	return nil
   160  }
   161  
   162  func (h *groupServiceHook) driverNet() *drivers.DriverNetwork {
   163  	if len(h.networks) == 0 {
   164  		return nil
   165  	}
   166  
   167  	//TODO(schmichael) only support one network for now
   168  	net := h.networks[0]
   169  	//TODO(schmichael) there's probably a better way than hacking driver network
   170  	return &drivers.DriverNetwork{
   171  		AutoAdvertise: true,
   172  		IP:            net.IP,
   173  		// Copy PortLabels from group network
   174  		PortMap: net.PortLabels(),
   175  	}
   176  }
   177  
   178  // deregister services from Consul.
   179  func (h *groupServiceHook) deregister() {
   180  	if len(h.services) > 0 {
   181  		workloadServices := h.getWorkloadServices()
   182  		h.consulClient.RemoveWorkload(workloadServices)
   183  
   184  		// Canary flag may be getting flipped when the alloc is being
   185  		// destroyed, so remove both variations of the service
   186  		workloadServices.Canary = !workloadServices.Canary
   187  		h.consulClient.RemoveWorkload(workloadServices)
   188  	}
   189  }
   190  
   191  func (h *groupServiceHook) getWorkloadServices() *agentconsul.WorkloadServices {
   192  	// Interpolate with the task's environment
   193  	interpolatedServices := taskenv.InterpolateServices(h.taskEnvBuilder.Build(), h.services)
   194  
   195  	// Create task services struct with request's driver metadata
   196  	return &agentconsul.WorkloadServices{
   197  		AllocID:       h.allocID,
   198  		Group:         h.group,
   199  		Restarter:     h.restarter,
   200  		Services:      interpolatedServices,
   201  		DriverNetwork: h.driverNet(),
   202  		Networks:      h.networks,
   203  		Canary:        h.canary,
   204  	}
   205  }