github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/taskrunner/envoy_bootstrap_hook.go (about) 1 package taskrunner 2 3 import ( 4 "context" 5 "encoding/json" 6 "errors" 7 "fmt" 8 "io" 9 "io/ioutil" 10 "net" 11 "os" 12 "os/exec" 13 "path/filepath" 14 "strconv" 15 "strings" 16 "time" 17 18 "github.com/hashicorp/go-hclog" 19 "github.com/hashicorp/nomad/client/allocdir" 20 ifs "github.com/hashicorp/nomad/client/allocrunner/interfaces" 21 "github.com/hashicorp/nomad/client/serviceregistration" 22 "github.com/hashicorp/nomad/client/taskenv" 23 "github.com/hashicorp/nomad/helper" 24 "github.com/hashicorp/nomad/nomad/structs" 25 "github.com/hashicorp/nomad/nomad/structs/config" 26 "oss.indeed.com/go/libtime/decay" 27 ) 28 29 const envoyBootstrapHookName = "envoy_bootstrap" 30 31 const ( 32 // envoyBootstrapWaitTime is the amount of time this hook should wait on Consul 33 // objects to be created before giving up. 34 envoyBootstrapWaitTime = 60 * time.Second 35 36 // envoyBootstrapInitialGap is the initial amount of time the envoy bootstrap 37 // retry loop will wait, exponentially increasing each iteration, not including 38 // jitter. 39 envoyBoostrapInitialGap = 1 * time.Second 40 41 // envoyBootstrapMaxJitter is the maximum amount of jitter applied to the 42 // wait gap each iteration of the envoy bootstrap retry loop. 43 envoyBootstrapMaxJitter = 500 * time.Millisecond 44 ) 45 46 type consulTransportConfig struct { 47 HTTPAddr string // required 48 Auth string // optional, env CONSUL_HTTP_AUTH 49 SSL string // optional, env CONSUL_HTTP_SSL 50 VerifySSL string // optional, env CONSUL_HTTP_SSL_VERIFY 51 CAFile string // optional, arg -ca-file 52 CertFile string // optional, arg -client-cert 53 KeyFile string // optional, arg -client-key 54 Namespace string // optional, only consul Enterprise, env CONSUL_NAMESPACE 55 // CAPath (dir) not supported by Nomad's config object 56 } 57 58 func newConsulTransportConfig(consul *config.ConsulConfig) consulTransportConfig { 59 return consulTransportConfig{ 60 HTTPAddr: consul.Addr, 61 Auth: consul.Auth, 62 SSL: decodeTriState(consul.EnableSSL), 63 VerifySSL: decodeTriState(consul.VerifySSL), 64 CAFile: consul.CAFile, 65 CertFile: consul.CertFile, 66 KeyFile: consul.KeyFile, 67 Namespace: consul.Namespace, 68 } 69 } 70 71 type envoyBootstrapHookConfig struct { 72 alloc *structs.Allocation 73 consul consulTransportConfig 74 consulNamespace string 75 logger hclog.Logger 76 } 77 78 func decodeTriState(b *bool) string { 79 switch { 80 case b == nil: 81 return "" 82 case *b: 83 return "true" 84 default: 85 return "false" 86 } 87 } 88 89 func newEnvoyBootstrapHookConfig(alloc *structs.Allocation, consul *config.ConsulConfig, consulNamespace string, logger hclog.Logger) *envoyBootstrapHookConfig { 90 return &envoyBootstrapHookConfig{ 91 alloc: alloc, 92 consul: newConsulTransportConfig(consul), 93 consulNamespace: consulNamespace, 94 logger: logger, 95 } 96 } 97 98 const ( 99 envoyBaseAdminPort = 19000 // Consul default (bridge only) 100 envoyBaseReadyPort = 19100 // Consul default (bridge only) 101 envoyAdminBindEnvPrefix = "NOMAD_ENVOY_ADMIN_ADDR_" 102 envoyReadyBindEnvPrefix = "NOMAD_ENVOY_READY_ADDR_" 103 ) 104 105 const ( 106 grpcConsulVariable = "CONSUL_GRPC_ADDR" 107 grpcDefaultAddress = "127.0.0.1:8502" 108 ) 109 110 // envoyBootstrapHook writes the bootstrap config for the Connect Envoy proxy 111 // sidecar. 112 type envoyBootstrapHook struct { 113 // alloc is the allocation with the envoy task being bootstrapped. 114 alloc *structs.Allocation 115 116 // Bootstrapping Envoy requires talking directly to Consul to generate 117 // the bootstrap.json config. Runtime Envoy configuration is done via 118 // Consul's gRPC endpoint. There are many security parameters to configure 119 // before contacting Consul. 120 consulConfig consulTransportConfig 121 122 // consulNamespace is the Consul namespace as set by in the job 123 consulNamespace string 124 125 // envoyBootstrapWaitTime is the total amount of time hook will wait for Consul 126 envoyBootstrapWaitTime time.Duration 127 128 // envoyBootstrapInitialGap is the initial wait gap when retyring 129 envoyBoostrapInitialGap time.Duration 130 131 // envoyBootstrapMaxJitter is the maximum amount of jitter applied to retries 132 envoyBootstrapMaxJitter time.Duration 133 134 // envoyBootstrapExpSleep controls exponential waiting 135 envoyBootstrapExpSleep func(time.Duration) 136 137 // logger is used to log things 138 logger hclog.Logger 139 } 140 141 func newEnvoyBootstrapHook(c *envoyBootstrapHookConfig) *envoyBootstrapHook { 142 return &envoyBootstrapHook{ 143 alloc: c.alloc, 144 consulConfig: c.consul, 145 consulNamespace: c.consulNamespace, 146 envoyBootstrapWaitTime: envoyBootstrapWaitTime, 147 envoyBoostrapInitialGap: envoyBoostrapInitialGap, 148 envoyBootstrapMaxJitter: envoyBootstrapMaxJitter, 149 envoyBootstrapExpSleep: time.Sleep, 150 logger: c.logger.Named(envoyBootstrapHookName), 151 } 152 } 153 154 // getConsulNamespace will resolve the Consul namespace, choosing between 155 // - agent config (low precedence) 156 // - task group config (high precedence) 157 func (h *envoyBootstrapHook) getConsulNamespace() string { 158 var namespace string 159 if h.consulConfig.Namespace != "" { 160 namespace = h.consulConfig.Namespace 161 } 162 if h.consulNamespace != "" { 163 namespace = h.consulNamespace 164 } 165 return namespace 166 } 167 168 func (envoyBootstrapHook) Name() string { 169 return envoyBootstrapHookName 170 } 171 172 func isConnectKind(kind string) bool { 173 switch kind { 174 case structs.ConnectProxyPrefix: 175 return true 176 case structs.ConnectIngressPrefix: 177 return true 178 case structs.ConnectTerminatingPrefix: 179 return true 180 case structs.ConnectMeshPrefix: 181 return true 182 default: 183 return false 184 } 185 } 186 187 func (_ *envoyBootstrapHook) extractNameAndKind(kind structs.TaskKind) (string, string, error) { 188 serviceName := kind.Value() 189 serviceKind := kind.Name() 190 191 if !isConnectKind(serviceKind) { 192 return "", "", errors.New("envoy must be used as connect sidecar or gateway") 193 } 194 195 if serviceName == "" { 196 return "", "", errors.New("envoy must be configured with a service name") 197 } 198 199 return serviceKind, serviceName, nil 200 } 201 202 func (h *envoyBootstrapHook) lookupService(svcKind, svcName string, taskEnv *taskenv.TaskEnv) (*structs.Service, error) { 203 tg := h.alloc.Job.LookupTaskGroup(h.alloc.TaskGroup) 204 interpolatedServices := taskenv.InterpolateServices(taskEnv, tg.Services) 205 206 var service *structs.Service 207 for _, s := range interpolatedServices { 208 if s.Name == svcName { 209 service = s 210 break 211 } 212 } 213 214 if service == nil { 215 if svcKind == structs.ConnectProxyPrefix { 216 return nil, errors.New("connect proxy sidecar task exists but no services configured with a sidecar") 217 } else { 218 return nil, errors.New("connect gateway task exists but no service associated") 219 } 220 } 221 222 return service, nil 223 } 224 225 // Prestart creates an envoy bootstrap config file. 226 // 227 // Must be aware of both launching envoy as a sidecar proxy, as well as a connect gateway. 228 func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *ifs.TaskPrestartRequest, resp *ifs.TaskPrestartResponse) error { 229 if !req.Task.Kind.IsConnectProxy() && !req.Task.Kind.IsAnyConnectGateway() { 230 // Not a Connect proxy sidecar 231 resp.Done = true 232 return nil 233 } 234 235 serviceKind, serviceName, err := h.extractNameAndKind(req.Task.Kind) 236 if err != nil { 237 return err 238 } 239 240 service, err := h.lookupService(serviceKind, serviceName, req.TaskEnv) 241 if err != nil { 242 return err 243 } 244 245 grpcAddr := h.grpcAddress(req.TaskEnv.EnvMap) 246 247 h.logger.Debug("bootstrapping Consul "+serviceKind, "task", req.Task.Name, "service", serviceName) 248 249 // Envoy runs an administrative listener. There is no way to turn this feature off. 250 // https://github.com/envoyproxy/envoy/issues/1297 251 envoyAdminBind := buildEnvoyAdminBind(h.alloc, serviceName, req.Task.Name, req.TaskEnv) 252 253 // Consul configures a ready listener. There is no way to turn this feature off. 254 envoyReadyBind := buildEnvoyReadyBind(h.alloc, serviceName, req.Task.Name, req.TaskEnv) 255 256 // Set runtime environment variables for the envoy admin and ready listeners. 257 resp.Env = map[string]string{ 258 helper.CleanEnvVar(envoyAdminBindEnvPrefix+serviceName, '_'): envoyAdminBind, 259 helper.CleanEnvVar(envoyReadyBindEnvPrefix+serviceName, '_'): envoyReadyBind, 260 } 261 262 // Envoy bootstrap configuration may contain a Consul token, so write 263 // it to the secrets directory like Vault tokens. 264 bootstrapFilePath := filepath.Join(req.TaskDir.SecretsDir, "envoy_bootstrap.json") 265 266 // Write everything related to the command to enable debugging 267 bootstrapStderrPath := filepath.Join(req.TaskDir.LogDir, "envoy_bootstrap.stderr.0") 268 bootstrapEnvPath := filepath.Join(req.TaskDir.SecretsDir, ".envoy_bootstrap.env") 269 bootstrapCmdPath := filepath.Join(req.TaskDir.SecretsDir, ".envoy_bootstrap.cmd") 270 271 siToken, err := h.maybeLoadSIToken(req.Task.Name, req.TaskDir.SecretsDir) 272 if err != nil { 273 h.logger.Error("failed to generate envoy bootstrap config", "sidecar_for", service.Name) 274 return fmt.Errorf("failed to generate envoy bootstrap config: %w", err) 275 } 276 h.logger.Debug("check for SI token for task", "task", req.Task.Name, "exists", siToken != "") 277 278 bootstrap := h.newEnvoyBootstrapArgs(h.alloc.TaskGroup, service, grpcAddr, envoyAdminBind, envoyReadyBind, siToken, bootstrapFilePath) 279 280 // Create command line arguments 281 bootstrapArgs := bootstrap.args() 282 283 // Write args to file for debugging 284 argsFile, err := os.Create(bootstrapCmdPath) 285 if err != nil { 286 return fmt.Errorf("failed to write bootstrap command line: %w", err) 287 } 288 defer argsFile.Close() 289 if _, err := io.WriteString(argsFile, strings.Join(bootstrapArgs, " ")+"\n"); err != nil { 290 return fmt.Errorf("failed to encode bootstrap command line: %w", err) 291 } 292 293 // Create environment 294 bootstrapEnv := bootstrap.env(os.Environ()) 295 // append nomad environment variables to the bootstrap environment 296 bootstrapEnv = append(bootstrapEnv, h.groupEnv()...) 297 298 // Write env to file for debugging 299 envFile, err := os.Create(bootstrapEnvPath) 300 if err != nil { 301 return fmt.Errorf("failed to write bootstrap environment: %w", err) 302 } 303 defer envFile.Close() 304 envEnc := json.NewEncoder(envFile) 305 envEnc.SetIndent("", " ") 306 if err := envEnc.Encode(bootstrapEnv); err != nil { 307 return fmt.Errorf("failed to encode bootstrap environment: %w", err) 308 } 309 310 // keep track of latest error returned from exec-ing consul envoy bootstrap 311 var cmdErr error 312 313 // Since Consul services are registered asynchronously with this task 314 // hook running, retry until timeout or success. 315 backoffOpts := decay.BackoffOptions{ 316 MaxSleepTime: h.envoyBootstrapWaitTime, 317 InitialGapSize: h.envoyBoostrapInitialGap, 318 MaxJitterSize: h.envoyBootstrapMaxJitter, 319 } 320 backoffErr := decay.Backoff(func() (bool, error) { 321 // If hook is killed, just stop. 322 select { 323 case <-ctx.Done(): 324 return false, nil 325 default: 326 } 327 328 // Prepare bootstrap command to run. 329 cmd := exec.CommandContext(ctx, "consul", bootstrapArgs...) 330 cmd.Env = bootstrapEnv 331 332 // Redirect stdout to secrets/envoy_bootstrap.json. 333 stdout, fileErr := os.Create(bootstrapFilePath) 334 if fileErr != nil { 335 return false, fmt.Errorf("failed to create secrets/envoy_bootstrap.json for envoy: %w", fileErr) 336 } 337 defer stdout.Close() 338 cmd.Stdout = stdout 339 340 // Redirect stderr into another file for later debugging. 341 stderr, fileErr := os.OpenFile(bootstrapStderrPath, os.O_RDWR|os.O_CREATE, 0644) 342 if fileErr != nil { 343 return false, fmt.Errorf("failed to create alloc/logs/envoy_bootstrap.stderr.0 for envoy: %w", fileErr) 344 } 345 defer stderr.Close() 346 cmd.Stderr = stderr 347 348 // Generate bootstrap 349 cmdErr = cmd.Run() 350 351 // Command succeeded, exit. 352 if cmdErr == nil { 353 // Bootstrap written. Mark as done and move on. 354 resp.Done = true 355 return false, nil 356 } 357 358 // Command failed, prepare for retry 359 // 360 // Cleanup the bootstrap file. An errors here is not 361 // important as (a) we test to ensure the deletion 362 // occurs, and (b) the file will either be rewritten on 363 // retry or eventually garbage collected if the task 364 // fails. 365 _ = os.Remove(bootstrapFilePath) 366 367 return true, cmdErr 368 }, backoffOpts) 369 370 if backoffErr != nil { 371 // Wrap the last error from Consul and set that as our status. 372 _, recoverable := cmdErr.(*exec.ExitError) 373 return structs.NewRecoverableError( 374 fmt.Errorf("error creating bootstrap configuration for Connect proxy sidecar: %v", cmdErr), 375 recoverable, 376 ) 377 } 378 379 return nil 380 } 381 382 func (h *envoyBootstrapHook) groupEnv() []string { 383 return []string{ 384 fmt.Sprintf("%s=%s", taskenv.AllocID, h.alloc.ID), 385 fmt.Sprintf("%s=%s", taskenv.ShortAllocID, h.alloc.ID[:8]), 386 fmt.Sprintf("%s=%s", taskenv.AllocName, h.alloc.Name), 387 fmt.Sprintf("%s=%s", taskenv.GroupName, h.alloc.TaskGroup), 388 fmt.Sprintf("%s=%s", taskenv.JobName, h.alloc.Job.Name), 389 fmt.Sprintf("%s=%s", taskenv.JobID, h.alloc.Job.ID), 390 fmt.Sprintf("%s=%s", taskenv.Namespace, h.alloc.Namespace), 391 fmt.Sprintf("%s=%s", taskenv.Region, h.alloc.Job.Region), 392 } 393 } 394 395 // buildEnvoyAdminBind determines a unique port for use by the envoy admin listener. 396 // 397 // This listener will be bound to 127.0.0.2. 398 func buildEnvoyAdminBind(alloc *structs.Allocation, service, task string, env *taskenv.TaskEnv) string { 399 return buildEnvoyBind(alloc, "127.0.0.2", service, task, env, envoyBaseAdminPort) 400 } 401 402 // buildEnvoyAdminBind determines a unique port for use by the envoy ready listener. 403 // 404 // This listener will be bound to 127.0.0.1. 405 func buildEnvoyReadyBind(alloc *structs.Allocation, service, task string, env *taskenv.TaskEnv) string { 406 return buildEnvoyBind(alloc, "127.0.0.1", service, task, env, envoyBaseReadyPort) 407 } 408 409 // buildEnvoyBind is used to determine a unique port for an envoy listener. 410 // 411 // In bridge mode, if multiple sidecars are running, the bind addresses need 412 // to be unique within the namespace, so we simply start at basePort and increment 413 // by the index of the task. 414 // 415 // In host mode, use the port provided through the service definition, which can 416 // be a port chosen by Nomad. 417 func buildEnvoyBind(alloc *structs.Allocation, ifce, service, task string, taskEnv *taskenv.TaskEnv, basePort int) string { 418 tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) 419 port := basePort 420 switch tg.Networks[0].Mode { 421 case "host": 422 interpolatedServices := taskenv.InterpolateServices(taskEnv, tg.Services) 423 for _, svc := range interpolatedServices { 424 if svc.Name == service { 425 mapping := tg.Networks.Port(svc.PortLabel) 426 port = mapping.Value 427 break 428 } 429 } 430 default: 431 for idx, tgTask := range tg.Tasks { 432 if tgTask.Name == task { 433 port += idx 434 break 435 } 436 } 437 } 438 return net.JoinHostPort(ifce, strconv.Itoa(port)) 439 } 440 441 func (h *envoyBootstrapHook) writeConfig(filename, config string) error { 442 if err := ioutil.WriteFile(filename, []byte(config), 0440); err != nil { 443 _ = os.Remove(filename) 444 return err 445 } 446 return nil 447 } 448 449 // grpcAddress determines the Consul gRPC endpoint address to use. 450 // 451 // In host networking this will default to 127.0.0.1:8502. 452 // In bridge/cni networking this will default to unix://<socket>. 453 // In either case, CONSUL_GRPC_ADDR will override the default. 454 func (h *envoyBootstrapHook) grpcAddress(env map[string]string) string { 455 if address := env[grpcConsulVariable]; address != "" { 456 return address 457 } 458 459 tg := h.alloc.Job.LookupTaskGroup(h.alloc.TaskGroup) 460 switch tg.Networks[0].Mode { 461 case "host": 462 return grpcDefaultAddress 463 default: 464 return "unix://" + allocdir.AllocGRPCSocket 465 } 466 } 467 468 func (h *envoyBootstrapHook) proxyServiceID(group string, service *structs.Service) string { 469 // Note, it is critical the ID here matches what is actually registered in 470 // Consul. See: WorkloadServices.Name in serviceregistration/workload.go. 471 return serviceregistration.MakeAllocServiceID(h.alloc.ID, "group-"+group, service) 472 } 473 474 // newEnvoyBootstrapArgs is used to prepare for the invocation of the 475 // 'consul connect envoy' command with arguments which will bootstrap the connect 476 // proxy or gateway. 477 // 478 // https://www.consul.io/commands/connect/envoy#consul-connect-envoy 479 func (h *envoyBootstrapHook) newEnvoyBootstrapArgs( 480 group string, service *structs.Service, 481 grpcAddr, envoyAdminBind, envoyReadyBind, siToken, filepath string, 482 ) envoyBootstrapArgs { 483 484 namespace := h.getConsulNamespace() 485 proxyID := h.proxyServiceID(group, service) 486 487 var gateway string 488 switch { 489 case service.Connect.HasSidecar(): 490 proxyID += "-sidecar-proxy" 491 case service.Connect.IsIngress(): 492 gateway = "ingress" 493 case service.Connect.IsTerminating(): 494 gateway = "terminating" 495 case service.Connect.IsMesh(): 496 gateway = "mesh" 497 } 498 499 h.logger.Info("bootstrapping envoy", 500 "namespace", namespace, "proxy_id", proxyID, "service", service.Name, 501 "gateway", gateway, "bootstrap_file", filepath, "grpc_addr", grpcAddr, 502 "admin_bind", envoyAdminBind, "ready_bind", envoyReadyBind, 503 ) 504 505 return envoyBootstrapArgs{ 506 consulConfig: h.consulConfig, 507 grpcAddr: grpcAddr, 508 envoyAdminBind: envoyAdminBind, 509 envoyReadyBind: envoyReadyBind, 510 siToken: siToken, 511 gateway: gateway, 512 proxyID: proxyID, 513 namespace: namespace, 514 } 515 } 516 517 // envoyBootstrapArgs is used to accumulate CLI arguments that will be passed 518 // along to the exec invocation of consul which will then generate the bootstrap 519 // configuration file for envoy. 520 type envoyBootstrapArgs struct { 521 consulConfig consulTransportConfig 522 grpcAddr string 523 envoyAdminBind string 524 envoyReadyBind string 525 siToken string 526 gateway string // gateways only 527 proxyID string // gateways and sidecars 528 namespace string 529 } 530 531 // args returns the CLI arguments consul needs in the correct order, with the 532 // -token argument present or not present depending on whether it is set. 533 func (e envoyBootstrapArgs) args() []string { 534 arguments := []string{ 535 "connect", 536 "envoy", 537 "-grpc-addr", e.grpcAddr, 538 "-http-addr", e.consulConfig.HTTPAddr, 539 "-admin-bind", e.envoyAdminBind, 540 "-address", e.envoyReadyBind, 541 "-proxy-id", e.proxyID, 542 "-bootstrap", 543 } 544 545 if v := e.gateway; v != "" { 546 arguments = append(arguments, "-gateway", v) 547 } 548 549 if v := e.siToken; v != "" { 550 arguments = append(arguments, "-token", v) 551 } 552 553 if v := e.consulConfig.CAFile; v != "" { 554 arguments = append(arguments, "-ca-file", v) 555 } 556 557 if v := e.consulConfig.CertFile; v != "" { 558 arguments = append(arguments, "-client-cert", v) 559 } 560 561 if v := e.consulConfig.KeyFile; v != "" { 562 arguments = append(arguments, "-client-key", v) 563 } 564 565 if v := e.namespace; v != "" { 566 arguments = append(arguments, "-namespace", v) 567 } 568 569 return arguments 570 } 571 572 // env creates the context of environment variables to be used when exec-ing 573 // the consul command for generating the envoy bootstrap config. It is expected 574 // the value of os.Environ() is passed in to be appended to. Because these are 575 // appended at the end of what will be passed into Cmd.Env, they will override 576 // any pre-existing values (i.e. what the Nomad agent was launched with). 577 // https://golang.org/pkg/os/exec/#Cmd 578 func (e envoyBootstrapArgs) env(env []string) []string { 579 if v := e.consulConfig.Auth; v != "" { 580 env = append(env, fmt.Sprintf("%s=%s", "CONSUL_HTTP_AUTH", v)) 581 } 582 if v := e.consulConfig.SSL; v != "" { 583 env = append(env, fmt.Sprintf("%s=%s", "CONSUL_HTTP_SSL", v)) 584 } 585 if v := e.consulConfig.VerifySSL; v != "" { 586 env = append(env, fmt.Sprintf("%s=%s", "CONSUL_HTTP_SSL_VERIFY", v)) 587 } 588 if v := e.namespace; v != "" { 589 env = append(env, fmt.Sprintf("%s=%s", "CONSUL_NAMESPACE", v)) 590 } 591 return env 592 } 593 594 // maybeLoadSIToken reads the SI token saved to disk in the secrets directory 595 // by the service identities prestart hook. This envoy bootstrap hook blocks 596 // until the sids hook completes, so if the SI token is required to exist (i.e. 597 // Consul ACLs are enabled), it will be in place by the time we try to read it. 598 func (h *envoyBootstrapHook) maybeLoadSIToken(task, dir string) (string, error) { 599 tokenPath := filepath.Join(dir, sidsTokenFile) 600 token, err := ioutil.ReadFile(tokenPath) 601 if err != nil { 602 if !os.IsNotExist(err) { 603 h.logger.Error("failed to load SI token", "task", task, "error", err) 604 return "", fmt.Errorf("failed to load SI token for %s: %w", task, err) 605 } 606 h.logger.Trace("no SI token to load", "task", task) 607 return "", nil // token file does not exist 608 } 609 h.logger.Trace("recovered pre-existing SI token", "task", task) 610 return string(token), nil 611 }