github.com/hernad/nomad@v1.6.112/nomad/consul.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package nomad 5 6 import ( 7 "context" 8 "errors" 9 "fmt" 10 "strings" 11 "sync" 12 "time" 13 14 "github.com/armon/go-metrics" 15 "github.com/hashicorp/consul/api" 16 "github.com/hashicorp/go-hclog" 17 "github.com/hernad/nomad/command/agent/consul" 18 "github.com/hernad/nomad/helper" 19 "github.com/hernad/nomad/nomad/structs" 20 "golang.org/x/exp/slices" 21 "golang.org/x/sync/errgroup" 22 "golang.org/x/time/rate" 23 ) 24 25 const ( 26 // siTokenDescriptionFmt is the format for the .Description field of 27 // service identity tokens generated on behalf of Nomad. 28 siTokenDescriptionFmt = "_nomad_si [%s] [%s] [%s]" 29 30 // siTokenRequestRateLimit is the maximum number of requests per second Nomad 31 // will make against Consul for requesting SI tokens. 32 siTokenRequestRateLimit rate.Limit = 500 33 34 // siTokenMaxParallelRevokes is the maximum number of parallel SI token 35 // revocation requests Nomad will make against Consul. 36 siTokenMaxParallelRevokes = 64 37 38 // siTokenRevocationInterval is the interval at which SI tokens that failed 39 // initial revocation are retried. 40 siTokenRevocationInterval = 5 * time.Minute 41 ) 42 43 const ( 44 // configEntriesRequestRateLimit is the maximum number of requests per second 45 // Nomad will make against Consul for operations on global Configuration Entry 46 // objects. 47 configEntriesRequestRateLimit rate.Limit = 10 48 ) 49 50 const ( 51 // ConsulPolicyWrite is the literal text of the policy field of a Consul Policy 52 // Rule that we check when validating an Operator Consul token against the 53 // necessary permissions for creating a Service Identity token for a given 54 // service. 55 // 56 // The rule may be: 57 // - service.<exact> 58 // - service."*" (wildcard) 59 // - service_prefix.<matching> (including empty string) 60 // 61 // e.g. 62 // service "web" { policy = "write" } 63 // service_prefix "" { policy = "write" } 64 ConsulPolicyWrite = "write" 65 66 // ConsulPolicyRead is the literal text of the policy field of a Consul Policy 67 // Rule that we check when validating a job-submitter Consul token against the 68 // necessary permissions for reading the key-value store. 69 // 70 // The only acceptable rule is 71 // - service_prefix "" { policy = "read|write" } 72 ConsulPolicyRead = "read" 73 ) 74 75 type ServiceIdentityRequest struct { 76 ConsulNamespace string 77 TaskKind structs.TaskKind 78 TaskName string 79 ClusterID string 80 AllocID string 81 } 82 83 func (sir ServiceIdentityRequest) Validate() error { 84 switch { 85 case sir.ClusterID == "": 86 return errors.New("cluster id not set") 87 case sir.AllocID == "": 88 return errors.New("alloc id not set") 89 case sir.TaskName == "": 90 return errors.New("task name not set") 91 case sir.TaskKind == "": 92 return errors.New("task kind not set") 93 default: 94 return nil 95 } 96 } 97 98 func (sir ServiceIdentityRequest) Description() string { 99 return fmt.Sprintf(siTokenDescriptionFmt, sir.ClusterID, sir.AllocID, sir.TaskName) 100 } 101 102 // ConsulACLsAPI is an abstraction over the consul/api.ACL API used by Nomad 103 // Server. 104 // 105 // ACL requirements 106 // - acl:write (transitive through ACLsAPI) 107 type ConsulACLsAPI interface { 108 // CheckPermissions checks that the given Consul token has the necessary ACL 109 // permissions for each way that Consul is used as indicated by usage, 110 // returning an error if not. 111 CheckPermissions(ctx context.Context, namespace string, usage *structs.ConsulUsage, secretID string) error 112 113 // Create instructs Consul to create a Service Identity token. 114 CreateToken(context.Context, ServiceIdentityRequest) (*structs.SIToken, error) 115 116 // RevokeTokens instructs Consul to revoke the given token accessors. 117 RevokeTokens(context.Context, []*structs.SITokenAccessor, bool) bool 118 119 // MarkForRevocation marks the tokens for background revocation 120 MarkForRevocation([]*structs.SITokenAccessor) 121 122 // Stop is used to stop background token revocations. Intended to be used 123 // on Nomad Server shutdown. 124 Stop() 125 } 126 127 // PurgeSITokenAccessorFunc is called to remove SI Token accessors from the 128 // system (i.e. raft). If the function returns an error, the token will still 129 // be tracked and revocation attempts will retry in the background until there 130 // is a success. 131 type PurgeSITokenAccessorFunc func([]*structs.SITokenAccessor) error 132 133 type SITokenStats struct { 134 TrackedForRevoke int 135 } 136 137 type consulACLsAPI struct { 138 // aclClient is the API subset of the real consul client we need for 139 // managing Service Identity tokens 140 aclClient consul.ACLsAPI 141 142 // limiter is used to rate limit requests to consul 143 limiter *rate.Limiter 144 145 bgRevokeLock sync.Mutex 146 // Track accessors that must have their revocation retried in the background. 147 bgRetryRevocation []*structs.SITokenAccessor 148 // Track whether the background revocations have been stopped, to avoid 149 // creating tokens we would no longer be able to revoke. Expected to be used 150 // on a Server shutdown. 151 bgRevokeStopped bool 152 153 // purgeFunc is the Nomad Server function that removes the reference to the 154 // SI token accessor from the persistent raft store 155 purgeFunc PurgeSITokenAccessorFunc 156 157 // stopC is used to signal the client is shutting down and token revocation 158 // background goroutine should stop 159 stopC chan struct{} 160 161 // logger is used to log messages 162 logger hclog.Logger 163 } 164 165 func NewConsulACLsAPI(aclClient consul.ACLsAPI, logger hclog.Logger, purgeFunc PurgeSITokenAccessorFunc) *consulACLsAPI { 166 if purgeFunc == nil { 167 purgeFunc = func([]*structs.SITokenAccessor) error { return nil } 168 } 169 170 c := &consulACLsAPI{ 171 aclClient: aclClient, 172 limiter: rate.NewLimiter(siTokenRequestRateLimit, int(siTokenRequestRateLimit)), 173 stopC: make(chan struct{}), 174 purgeFunc: purgeFunc, 175 logger: logger.Named("consul_acl"), 176 } 177 178 go c.bgRetryRevokeDaemon() 179 180 return c 181 } 182 183 // Stop stops background token revocations from happening. Once stopped, tokens 184 // may no longer be created. 185 func (c *consulACLsAPI) Stop() { 186 c.bgRevokeLock.Lock() 187 defer c.bgRevokeLock.Unlock() 188 189 c.stopC <- struct{}{} 190 c.bgRevokeStopped = true 191 } 192 193 func (c *consulACLsAPI) readToken(ctx context.Context, secretID string) (*api.ACLToken, error) { 194 defer metrics.MeasureSince([]string{"nomad", "consul", "read_token"}, time.Now()) 195 196 if id := strings.TrimSpace(secretID); !helper.IsUUID(id) { 197 return nil, errors.New("missing consul token") 198 } 199 200 // Ensure we are under our rate limit. 201 if err := c.limiter.Wait(ctx); err != nil { 202 return nil, fmt.Errorf("unable to read consul token: %w", err) 203 } 204 205 consulToken, _, err := c.aclClient.TokenReadSelf(&api.QueryOptions{ 206 AllowStale: false, 207 Token: secretID, 208 }) 209 if err != nil { 210 return nil, fmt.Errorf("unable to read consul token: %w", err) 211 } 212 213 return consulToken, nil 214 } 215 216 func (c *consulACLsAPI) CheckPermissions(ctx context.Context, namespace string, usage *structs.ConsulUsage, secretID string) error { 217 // consul not used, nothing to check 218 if !usage.Used() { 219 return nil 220 } 221 222 // lookup the token from consul 223 token, readErr := c.readToken(ctx, secretID) 224 if readErr != nil { 225 return readErr 226 } 227 228 // if the token is a global-management token, it has unrestricted privileges 229 if c.isManagementToken(token) { 230 return nil 231 } 232 233 // if the token cannot possibly be used to act on objects in the desired 234 // namespace, reject it immediately 235 if err := namespaceCheck(namespace, token); err != nil { 236 return err 237 } 238 239 // verify token has keystore read permission, if using template 240 if usage.KV { 241 allowable, err := c.canReadKeystore(namespace, token) 242 if err != nil { 243 return err 244 } else if !allowable { 245 return errors.New("insufficient Consul ACL permissions to use template") 246 } 247 } 248 249 // verify token has service write permission for group+task services 250 for _, service := range usage.Services { 251 allowable, err := c.canWriteService(namespace, service, token) 252 if err != nil { 253 return err 254 } else if !allowable { 255 return fmt.Errorf("insufficient Consul ACL permissions to write service %q", service) 256 } 257 } 258 259 return nil 260 } 261 262 func (c *consulACLsAPI) CreateToken(ctx context.Context, sir ServiceIdentityRequest) (*structs.SIToken, error) { 263 defer metrics.MeasureSince([]string{"nomad", "consul", "create_token"}, time.Now()) 264 265 // make sure the background token revocations have not been stopped 266 c.bgRevokeLock.Lock() 267 stopped := c.bgRevokeStopped 268 c.bgRevokeLock.Unlock() 269 270 if stopped { 271 return nil, errors.New("client stopped and may no longer create tokens") 272 } 273 274 // Check the metadata for the token we want 275 if err := sir.Validate(); err != nil { 276 return nil, err 277 } 278 279 // the SI token created must be for the service, not the sidecar of the service 280 // https://www.consul.io/docs/acl/acl-system.html#acl-service-identities 281 service := sir.TaskKind.Value() 282 partial := &api.ACLToken{ 283 Description: sir.Description(), 284 ServiceIdentities: []*api.ACLServiceIdentity{{ServiceName: service}}, 285 Namespace: sir.ConsulNamespace, 286 Local: true, 287 } 288 289 // Ensure we are under our rate limit. 290 if err := c.limiter.Wait(ctx); err != nil { 291 return nil, err 292 } 293 294 token, _, err := c.aclClient.TokenCreate(partial, nil) 295 if err != nil { 296 return nil, err 297 } 298 299 return &structs.SIToken{ 300 ConsulNamespace: token.Namespace, 301 AccessorID: token.AccessorID, 302 SecretID: token.SecretID, 303 TaskName: sir.TaskName, 304 }, nil 305 } 306 307 // RevokeTokens revokes the passed set of SI token accessors. If committed is set, 308 // the client's purge function is called (which purges the tokens from the Server's 309 // persistent store). If there is an error purging either because of Consul failures 310 // or because of the purge function, the revocation is retried in the background. 311 // 312 // The revocation of an SI token accessor is idempotent. 313 // 314 // A return value of true indicates one or more accessors were stored for 315 // a revocation retry attempt in the background (intended for tests). 316 func (c *consulACLsAPI) RevokeTokens(ctx context.Context, accessors []*structs.SITokenAccessor, committed bool) bool { 317 defer metrics.MeasureSince([]string{"nomad", "consul", "revoke_tokens"}, time.Now()) 318 319 nTokens := float32(len(accessors)) 320 321 if err := c.parallelRevoke(ctx, accessors); err != nil { 322 // If these tokens were uncommitted into raft, it is a best effort to 323 // revoke them now. If this immediate revocation does not work, Nomad loses 324 // track of them and will need to do a brute reconciliation later. This 325 // should happen rarely, and will be implemented soon. 326 if !committed { 327 metrics.IncrCounter([]string{"nomad", "consul", "undistributed_si_tokens_abandoned"}, nTokens) 328 } 329 330 c.logger.Warn("failed to revoke tokens, will reattempt later", "error", err) 331 c.storeForRevocation(accessors) 332 return true 333 } 334 335 if !committed { 336 // Un-committed tokens were revoked without incident (nothing to purge) 337 metrics.IncrCounter([]string{"nomad", "consul", "undistributed_si_tokens_revoked"}, nTokens) 338 return false 339 } 340 341 // Committed tokens were revoked without incident, now purge them 342 if err := c.purgeFunc(accessors); err != nil { 343 c.logger.Error("failed to purge SI token accessors", "error", err) 344 c.storeForRevocation(accessors) 345 return true 346 } 347 348 // Track that the SI tokens were revoked and purged successfully 349 metrics.IncrCounter([]string{"nomad", "consul", "distributed_si_tokens_revoked"}, nTokens) 350 return false 351 } 352 353 func (c *consulACLsAPI) MarkForRevocation(accessors []*structs.SITokenAccessor) { 354 c.storeForRevocation(accessors) 355 } 356 357 func (c *consulACLsAPI) storeForRevocation(accessors []*structs.SITokenAccessor) { 358 c.bgRevokeLock.Lock() 359 defer c.bgRevokeLock.Unlock() 360 361 // copy / append the set of accessors we must track for revocation in the 362 // background 363 c.bgRetryRevocation = append(c.bgRetryRevocation, accessors...) 364 } 365 366 func (c *consulACLsAPI) parallelRevoke(ctx context.Context, accessors []*structs.SITokenAccessor) error { 367 g, pCtx := errgroup.WithContext(ctx) 368 369 // Cap the handlers 370 handlers := len(accessors) 371 if handlers > siTokenMaxParallelRevokes { 372 handlers = siTokenMaxParallelRevokes 373 } 374 375 // Revoke the SI Token Accessors 376 input := make(chan *structs.SITokenAccessor, handlers) 377 for i := 0; i < handlers; i++ { 378 g.Go(func() error { 379 for { 380 select { 381 case accessor, ok := <-input: 382 if !ok { 383 return nil 384 } 385 if err := c.singleRevoke(ctx, accessor); err != nil { 386 return fmt.Errorf( 387 "failed to revoke SI token accessor (alloc %q, node %q, task %q): %w", 388 accessor.AllocID, accessor.NodeID, accessor.TaskName, err, 389 ) 390 } 391 case <-pCtx.Done(): 392 return nil 393 } 394 } 395 }) 396 } 397 398 // Send the input 399 go func() { 400 defer close(input) 401 for _, accessor := range accessors { 402 select { 403 case <-pCtx.Done(): 404 return 405 case input <- accessor: 406 } 407 } 408 }() 409 410 // Wait for everything to complete 411 return g.Wait() 412 } 413 414 func (c *consulACLsAPI) singleRevoke(ctx context.Context, accessor *structs.SITokenAccessor) error { 415 c.logger.Trace("revoke SI token", "task", accessor.TaskName, "alloc_id", accessor.AllocID, "node_id", accessor.NodeID) 416 417 // Ensure we are under our rate limit. 418 if err := c.limiter.Wait(ctx); err != nil { 419 return err 420 } 421 422 _, err := c.aclClient.TokenDelete(accessor.AccessorID, &api.WriteOptions{Namespace: accessor.ConsulNamespace}) 423 if err != nil && strings.Contains(err.Error(), "Cannot find token to delete") { 424 return nil // Consul will error when deleting a non-existent token 425 } 426 return err 427 } 428 429 func (c *consulACLsAPI) bgRetryRevokeDaemon() { 430 ticker := time.NewTicker(siTokenRevocationInterval) 431 defer ticker.Stop() 432 433 for { 434 select { 435 case <-c.stopC: 436 return 437 case <-ticker.C: 438 c.bgRetryRevoke() 439 } 440 } 441 } 442 443 // maxConsulRevocationBatchSize is the maximum tokens a bgRetryRevoke should revoke 444 // at any given time. 445 const maxConsulRevocationBatchSize = 1000 446 447 func (c *consulACLsAPI) bgRetryRevoke() { 448 c.bgRevokeLock.Lock() 449 defer c.bgRevokeLock.Unlock() 450 451 // fast path, nothing to do 452 if len(c.bgRetryRevocation) == 0 { 453 return 454 } 455 456 // unlike vault tokens, SI tokens do not have a TTL, and so we must try to 457 // remove all SI token accessors, every time, until they're gone 458 toRevoke := len(c.bgRetryRevocation) 459 if toRevoke > maxConsulRevocationBatchSize { 460 toRevoke = maxConsulRevocationBatchSize 461 } 462 toPurge := make([]*structs.SITokenAccessor, toRevoke) 463 copy(toPurge, c.bgRetryRevocation) 464 465 if err := c.parallelRevoke(context.Background(), toPurge); err != nil { 466 c.logger.Warn("background SI token revocation failed", "error", err) 467 return 468 } 469 470 // Call the revocation function 471 if err := c.purgeFunc(toPurge); err != nil { 472 // Just try again later (revocation is idempotent) 473 c.logger.Error("background SI token purge failed", "error", err) 474 return 475 } 476 477 // Track that the SI tokens were revoked successfully 478 nTokens := float32(len(toPurge)) 479 metrics.IncrCounter([]string{"nomad", "consul", "distributed_tokens_revoked"}, nTokens) 480 481 // Reset the list of accessors to retry, since we just removed them all. 482 c.bgRetryRevocation = nil 483 } 484 485 func (c *consulACLsAPI) ListTokens() ([]string, error) { 486 // defer metrics.MeasureSince([]string{"nomad", "consul", "list_tokens"}, time.Now()) 487 return nil, errors.New("not yet implemented") 488 } 489 490 // purgeSITokenAccessors is the Nomad Server method which will remove the set 491 // of SI token accessors from the persistent raft store. 492 func (s *Server) purgeSITokenAccessors(accessors []*structs.SITokenAccessor) error { 493 // Commit this update via Raft 494 request := structs.SITokenAccessorsRequest{Accessors: accessors} 495 _, _, err := s.raftApply(structs.ServiceIdentityAccessorDeregisterRequestType, request) 496 return err 497 } 498 499 // ConsulConfigsAPI is an abstraction over the consul/api.ConfigEntries API used by 500 // Nomad Server. 501 // 502 // Nomad will only perform write operations on Consul Ingress/Terminating Gateway 503 // Configuration Entries. Removing the entries is not yet safe, given that multiple 504 // Nomad clusters may be writing to the same config entries, which are global in 505 // the Consul scope. There was a Meta field introduced which Nomad can leverage 506 // in the future, when Consul no longer supports versions that do not contain the 507 // field. The Meta field would be used to track which Nomad "owns" the CE. 508 // https://github.com/hernad/nomad/issues/8971 509 type ConsulConfigsAPI interface { 510 // SetIngressCE adds the given ConfigEntry to Consul, overwriting 511 // the previous entry if set. 512 SetIngressCE(ctx context.Context, namespace, service string, entry *structs.ConsulIngressConfigEntry) error 513 514 // SetTerminatingCE adds the given ConfigEntry to Consul, overwriting 515 // the previous entry if set. 516 SetTerminatingCE(ctx context.Context, namespace, service string, entry *structs.ConsulTerminatingConfigEntry) error 517 518 // Stop is used to stop additional creations of Configuration Entries. Intended to 519 // be used on Nomad Server shutdown. 520 Stop() 521 } 522 523 type consulConfigsAPI struct { 524 // configsClient is the API subset of the real Consul client we need for 525 // managing Configuration Entries. 526 configsClient consul.ConfigAPI 527 528 // limiter is used to rate limit requests to Consul 529 limiter *rate.Limiter 530 531 // logger is used to log messages 532 logger hclog.Logger 533 534 // lock protects the stopped flag, which prevents use of the consul configs API 535 // client after shutdown. 536 lock sync.Mutex 537 stopped bool 538 } 539 540 func NewConsulConfigsAPI(configsClient consul.ConfigAPI, logger hclog.Logger) *consulConfigsAPI { 541 return &consulConfigsAPI{ 542 configsClient: configsClient, 543 limiter: rate.NewLimiter(configEntriesRequestRateLimit, int(configEntriesRequestRateLimit)), 544 logger: logger, 545 } 546 } 547 548 func (c *consulConfigsAPI) Stop() { 549 c.lock.Lock() 550 defer c.lock.Unlock() 551 c.stopped = true 552 } 553 554 func (c *consulConfigsAPI) SetIngressCE(ctx context.Context, namespace, service string, entry *structs.ConsulIngressConfigEntry) error { 555 return c.setCE(ctx, convertIngressCE(namespace, service, entry)) 556 } 557 558 func (c *consulConfigsAPI) SetTerminatingCE(ctx context.Context, namespace, service string, entry *structs.ConsulTerminatingConfigEntry) error { 559 return c.setCE(ctx, convertTerminatingCE(namespace, service, entry)) 560 } 561 562 // setCE will set the Configuration Entry of any type Consul supports. 563 func (c *consulConfigsAPI) setCE(ctx context.Context, entry api.ConfigEntry) error { 564 defer metrics.MeasureSince([]string{"nomad", "consul", "create_config_entry"}, time.Now()) 565 566 // make sure the background deletion goroutine has not been stopped 567 c.lock.Lock() 568 stopped := c.stopped 569 c.lock.Unlock() 570 571 if stopped { 572 return errors.New("client stopped and may not longer create config entries") 573 } 574 575 // ensure we are under our wait limit 576 if err := c.limiter.Wait(ctx); err != nil { 577 return err 578 } 579 580 _, _, err := c.configsClient.Set(entry, &api.WriteOptions{Namespace: entry.GetNamespace()}) 581 return err 582 } 583 584 func convertIngressCE(namespace, service string, entry *structs.ConsulIngressConfigEntry) api.ConfigEntry { 585 var listeners []api.IngressListener = nil 586 for _, listener := range entry.Listeners { 587 var services []api.IngressService = nil 588 for _, s := range listener.Services { 589 services = append(services, api.IngressService{ 590 Name: s.Name, 591 Hosts: slices.Clone(s.Hosts), 592 }) 593 } 594 listeners = append(listeners, api.IngressListener{ 595 Port: listener.Port, 596 Protocol: listener.Protocol, 597 Services: services, 598 }) 599 } 600 601 tls := api.GatewayTLSConfig{} 602 if entry.TLS != nil { 603 tls.Enabled = entry.TLS.Enabled 604 tls.TLSMinVersion = entry.TLS.TLSMinVersion 605 tls.TLSMaxVersion = entry.TLS.TLSMaxVersion 606 tls.CipherSuites = slices.Clone(entry.TLS.CipherSuites) 607 } 608 609 return &api.IngressGatewayConfigEntry{ 610 Namespace: namespace, 611 Kind: api.IngressGateway, 612 Name: service, 613 TLS: tls, 614 Listeners: listeners, 615 } 616 } 617 618 func convertTerminatingCE(namespace, service string, entry *structs.ConsulTerminatingConfigEntry) api.ConfigEntry { 619 var linked []api.LinkedService = nil 620 for _, s := range entry.Services { 621 linked = append(linked, api.LinkedService{ 622 Name: s.Name, 623 CAFile: s.CAFile, 624 CertFile: s.CertFile, 625 KeyFile: s.KeyFile, 626 SNI: s.SNI, 627 }) 628 } 629 return &api.TerminatingGatewayConfigEntry{ 630 Namespace: namespace, 631 Kind: api.TerminatingGateway, 632 Name: service, 633 Services: linked, 634 } 635 }