github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/group_service_hook.go (about) 1 package allocrunner 2 3 import ( 4 "context" 5 "sync" 6 "time" 7 8 log "github.com/hashicorp/go-hclog" 9 "github.com/hashicorp/nomad/client/allocrunner/interfaces" 10 "github.com/hashicorp/nomad/client/serviceregistration" 11 "github.com/hashicorp/nomad/client/serviceregistration/wrapper" 12 "github.com/hashicorp/nomad/client/taskenv" 13 "github.com/hashicorp/nomad/helper" 14 "github.com/hashicorp/nomad/nomad/structs" 15 ) 16 17 const ( 18 groupServiceHookName = "group_services" 19 ) 20 21 // groupServiceHook manages task group Consul service registration and 22 // deregistration. 23 type groupServiceHook struct { 24 allocID string 25 jobID string 26 group string 27 namespace string 28 restarter serviceregistration.WorkloadRestarter 29 prerun bool 30 deregistered bool 31 networkStatus structs.NetworkStatus 32 shutdownDelayCtx context.Context 33 34 // providerNamespace is the Nomad or Consul namespace in which service 35 // registrations will be made. This field may be updated. 36 providerNamespace string 37 38 // serviceRegWrapper is the handler wrapper that is used to perform service 39 // and check registration and deregistration. 40 serviceRegWrapper *wrapper.HandlerWrapper 41 42 logger log.Logger 43 44 // The following fields may be updated 45 canary bool 46 services []*structs.Service 47 networks structs.Networks 48 ports structs.AllocatedPorts 49 taskEnvBuilder *taskenv.Builder 50 delay time.Duration 51 52 // Since Update() may be called concurrently with any other hook all 53 // hook methods must be fully serialized 54 mu sync.Mutex 55 } 56 57 type groupServiceHookConfig struct { 58 alloc *structs.Allocation 59 restarter serviceregistration.WorkloadRestarter 60 taskEnvBuilder *taskenv.Builder 61 networkStatus structs.NetworkStatus 62 shutdownDelayCtx context.Context 63 logger log.Logger 64 65 // providerNamespace is the Nomad or Consul namespace in which service 66 // registrations will be made. 67 providerNamespace string 68 69 // serviceRegWrapper is the handler wrapper that is used to perform service 70 // and check registration and deregistration. 71 serviceRegWrapper *wrapper.HandlerWrapper 72 } 73 74 func newGroupServiceHook(cfg groupServiceHookConfig) *groupServiceHook { 75 var shutdownDelay time.Duration 76 tg := cfg.alloc.Job.LookupTaskGroup(cfg.alloc.TaskGroup) 77 78 if tg.ShutdownDelay != nil { 79 shutdownDelay = *tg.ShutdownDelay 80 } 81 82 h := &groupServiceHook{ 83 allocID: cfg.alloc.ID, 84 jobID: cfg.alloc.JobID, 85 group: cfg.alloc.TaskGroup, 86 namespace: cfg.alloc.Namespace, 87 restarter: cfg.restarter, 88 providerNamespace: cfg.providerNamespace, 89 taskEnvBuilder: cfg.taskEnvBuilder, 90 delay: shutdownDelay, 91 networkStatus: cfg.networkStatus, 92 logger: cfg.logger.Named(groupServiceHookName), 93 serviceRegWrapper: cfg.serviceRegWrapper, 94 services: tg.Services, 95 shutdownDelayCtx: cfg.shutdownDelayCtx, 96 } 97 98 if cfg.alloc.AllocatedResources != nil { 99 h.networks = cfg.alloc.AllocatedResources.Shared.Networks 100 h.ports = cfg.alloc.AllocatedResources.Shared.Ports 101 } 102 103 if cfg.alloc.DeploymentStatus != nil { 104 h.canary = cfg.alloc.DeploymentStatus.Canary 105 } 106 107 return h 108 } 109 110 func (*groupServiceHook) Name() string { 111 return groupServiceHookName 112 } 113 114 func (h *groupServiceHook) Prerun() error { 115 h.mu.Lock() 116 defer func() { 117 // Mark prerun as true to unblock Updates 118 h.prerun = true 119 h.mu.Unlock() 120 }() 121 return h.prerunLocked() 122 } 123 124 func (h *groupServiceHook) prerunLocked() error { 125 if len(h.services) == 0 { 126 return nil 127 } 128 129 services := h.getWorkloadServices() 130 return h.serviceRegWrapper.RegisterWorkload(services) 131 } 132 133 func (h *groupServiceHook) Update(req *interfaces.RunnerUpdateRequest) error { 134 h.mu.Lock() 135 defer h.mu.Unlock() 136 137 oldWorkloadServices := h.getWorkloadServices() 138 139 // Store new updated values out of request 140 canary := false 141 if req.Alloc.DeploymentStatus != nil { 142 canary = req.Alloc.DeploymentStatus.Canary 143 } 144 145 var networks structs.Networks 146 if req.Alloc.AllocatedResources != nil { 147 networks = req.Alloc.AllocatedResources.Shared.Networks 148 h.ports = req.Alloc.AllocatedResources.Shared.Ports 149 } 150 151 tg := req.Alloc.Job.LookupTaskGroup(h.group) 152 var shutdown time.Duration 153 if tg.ShutdownDelay != nil { 154 shutdown = *tg.ShutdownDelay 155 } 156 157 // Update group service hook fields 158 h.networks = networks 159 h.services = tg.Services 160 h.canary = canary 161 h.delay = shutdown 162 h.taskEnvBuilder.UpdateTask(req.Alloc, nil) 163 164 // An update may change the service provider, therefore we need to account 165 // for how namespaces work across providers also. 166 h.providerNamespace = req.Alloc.ServiceProviderNamespace() 167 168 // Create new task services struct with those new values 169 newWorkloadServices := h.getWorkloadServices() 170 171 if !h.prerun { 172 // Update called before Prerun. Update alloc and exit to allow 173 // Prerun to do initial registration. 174 return nil 175 } 176 177 return h.serviceRegWrapper.UpdateWorkload(oldWorkloadServices, newWorkloadServices) 178 } 179 180 func (h *groupServiceHook) PreTaskRestart() error { 181 h.mu.Lock() 182 defer func() { 183 // Mark prerun as true to unblock Updates 184 h.prerun = true 185 h.mu.Unlock() 186 }() 187 188 h.preKillLocked() 189 return h.prerunLocked() 190 } 191 192 func (h *groupServiceHook) PreKill() { 193 h.mu.Lock() 194 defer h.mu.Unlock() 195 h.preKillLocked() 196 } 197 198 // implements the PreKill hook but requires the caller hold the lock 199 func (h *groupServiceHook) preKillLocked() { 200 // If we have a shutdown delay deregister group services and then wait 201 // before continuing to kill tasks. 202 h.deregister() 203 h.deregistered = true 204 205 if h.delay == 0 { 206 return 207 } 208 209 h.logger.Debug("delay before killing tasks", "group", h.group, "shutdown_delay", h.delay) 210 211 timer, cancel := helper.NewSafeTimer(h.delay) 212 defer cancel() 213 214 select { 215 // Wait for specified shutdown_delay unless ignored 216 // This will block an agent from shutting down. 217 case <-timer.C: 218 case <-h.shutdownDelayCtx.Done(): 219 } 220 } 221 222 func (h *groupServiceHook) Postrun() error { 223 h.mu.Lock() 224 defer h.mu.Unlock() 225 226 if !h.deregistered { 227 h.deregister() 228 } 229 return nil 230 } 231 232 // deregister services from Consul. 233 func (h *groupServiceHook) deregister() { 234 if len(h.services) > 0 { 235 workloadServices := h.getWorkloadServices() 236 h.serviceRegWrapper.RemoveWorkload(workloadServices) 237 } 238 } 239 240 func (h *groupServiceHook) getWorkloadServices() *serviceregistration.WorkloadServices { 241 // Interpolate with the task's environment 242 interpolatedServices := taskenv.InterpolateServices(h.taskEnvBuilder.Build(), h.services) 243 244 var netStatus *structs.AllocNetworkStatus 245 if h.networkStatus != nil { 246 netStatus = h.networkStatus.NetworkStatus() 247 } 248 249 info := structs.AllocInfo{ 250 AllocID: h.allocID, 251 JobID: h.jobID, 252 Group: h.group, 253 Namespace: h.namespace, 254 } 255 256 // Create task services struct with request's driver metadata 257 return &serviceregistration.WorkloadServices{ 258 AllocInfo: info, 259 ProviderNamespace: h.providerNamespace, 260 Restarter: h.restarter, 261 Services: interpolatedServices, 262 Networks: h.networks, 263 NetworkStatus: netStatus, 264 Ports: h.ports, 265 Canary: h.canary, 266 } 267 }