github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/nomad/consul.go (about) 1 package nomad 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "strings" 8 "sync" 9 "time" 10 11 "github.com/armon/go-metrics" 12 "github.com/hashicorp/consul/api" 13 "github.com/hashicorp/go-hclog" 14 "github.com/hashicorp/nomad/command/agent/consul" 15 "github.com/hashicorp/nomad/helper" 16 "github.com/hashicorp/nomad/nomad/structs" 17 "golang.org/x/exp/slices" 18 "golang.org/x/sync/errgroup" 19 "golang.org/x/time/rate" 20 ) 21 22 const ( 23 // siTokenDescriptionFmt is the format for the .Description field of 24 // service identity tokens generated on behalf of Nomad. 25 siTokenDescriptionFmt = "_nomad_si [%s] [%s] [%s]" 26 27 // siTokenRequestRateLimit is the maximum number of requests per second Nomad 28 // will make against Consul for requesting SI tokens. 29 siTokenRequestRateLimit rate.Limit = 500 30 31 // siTokenMaxParallelRevokes is the maximum number of parallel SI token 32 // revocation requests Nomad will make against Consul. 33 siTokenMaxParallelRevokes = 64 34 35 // siTokenRevocationInterval is the interval at which SI tokens that failed 36 // initial revocation are retried. 37 siTokenRevocationInterval = 5 * time.Minute 38 ) 39 40 const ( 41 // configEntriesRequestRateLimit is the maximum number of requests per second 42 // Nomad will make against Consul for operations on global Configuration Entry 43 // objects. 44 configEntriesRequestRateLimit rate.Limit = 10 45 ) 46 47 const ( 48 // ConsulPolicyWrite is the literal text of the policy field of a Consul Policy 49 // Rule that we check when validating an Operator Consul token against the 50 // necessary permissions for creating a Service Identity token for a given 51 // service. 52 // 53 // The rule may be: 54 // - service.<exact> 55 // - service."*" (wildcard) 56 // - service_prefix.<matching> (including empty string) 57 // 58 // e.g. 59 // service "web" { policy = "write" } 60 // service_prefix "" { policy = "write" } 61 ConsulPolicyWrite = "write" 62 63 // ConsulPolicyRead is the literal text of the policy field of a Consul Policy 64 // Rule that we check when validating a job-submitter Consul token against the 65 // necessary permissions for reading the key-value store. 66 // 67 // The only acceptable rule is 68 // - service_prefix "" { policy = "read|write" } 69 ConsulPolicyRead = "read" 70 ) 71 72 type ServiceIdentityRequest struct { 73 ConsulNamespace string 74 TaskKind structs.TaskKind 75 TaskName string 76 ClusterID string 77 AllocID string 78 } 79 80 func (sir ServiceIdentityRequest) Validate() error { 81 switch { 82 case sir.ClusterID == "": 83 return errors.New("cluster id not set") 84 case sir.AllocID == "": 85 return errors.New("alloc id not set") 86 case sir.TaskName == "": 87 return errors.New("task name not set") 88 case sir.TaskKind == "": 89 return errors.New("task kind not set") 90 default: 91 return nil 92 } 93 } 94 95 func (sir ServiceIdentityRequest) Description() string { 96 return fmt.Sprintf(siTokenDescriptionFmt, sir.ClusterID, sir.AllocID, sir.TaskName) 97 } 98 99 // ConsulACLsAPI is an abstraction over the consul/api.ACL API used by Nomad 100 // Server. 101 // 102 // ACL requirements 103 // - acl:write (transitive through ACLsAPI) 104 type ConsulACLsAPI interface { 105 // CheckPermissions checks that the given Consul token has the necessary ACL 106 // permissions for each way that Consul is used as indicated by usage, 107 // returning an error if not. 108 CheckPermissions(ctx context.Context, namespace string, usage *structs.ConsulUsage, secretID string) error 109 110 // Create instructs Consul to create a Service Identity token. 111 CreateToken(context.Context, ServiceIdentityRequest) (*structs.SIToken, error) 112 113 // RevokeTokens instructs Consul to revoke the given token accessors. 114 RevokeTokens(context.Context, []*structs.SITokenAccessor, bool) bool 115 116 // MarkForRevocation marks the tokens for background revocation 117 MarkForRevocation([]*structs.SITokenAccessor) 118 119 // Stop is used to stop background token revocations. Intended to be used 120 // on Nomad Server shutdown. 121 Stop() 122 } 123 124 // PurgeSITokenAccessorFunc is called to remove SI Token accessors from the 125 // system (i.e. raft). If the function returns an error, the token will still 126 // be tracked and revocation attempts will retry in the background until there 127 // is a success. 128 type PurgeSITokenAccessorFunc func([]*structs.SITokenAccessor) error 129 130 type SITokenStats struct { 131 TrackedForRevoke int 132 } 133 134 type consulACLsAPI struct { 135 // aclClient is the API subset of the real consul client we need for 136 // managing Service Identity tokens 137 aclClient consul.ACLsAPI 138 139 // limiter is used to rate limit requests to consul 140 limiter *rate.Limiter 141 142 bgRevokeLock sync.Mutex 143 // Track accessors that must have their revocation retried in the background. 144 bgRetryRevocation []*structs.SITokenAccessor 145 // Track whether the background revocations have been stopped, to avoid 146 // creating tokens we would no longer be able to revoke. Expected to be used 147 // on a Server shutdown. 148 bgRevokeStopped bool 149 150 // purgeFunc is the Nomad Server function that removes the reference to the 151 // SI token accessor from the persistent raft store 152 purgeFunc PurgeSITokenAccessorFunc 153 154 // stopC is used to signal the client is shutting down and token revocation 155 // background goroutine should stop 156 stopC chan struct{} 157 158 // logger is used to log messages 159 logger hclog.Logger 160 } 161 162 func NewConsulACLsAPI(aclClient consul.ACLsAPI, logger hclog.Logger, purgeFunc PurgeSITokenAccessorFunc) *consulACLsAPI { 163 if purgeFunc == nil { 164 purgeFunc = func([]*structs.SITokenAccessor) error { return nil } 165 } 166 167 c := &consulACLsAPI{ 168 aclClient: aclClient, 169 limiter: rate.NewLimiter(siTokenRequestRateLimit, int(siTokenRequestRateLimit)), 170 stopC: make(chan struct{}), 171 purgeFunc: purgeFunc, 172 logger: logger.Named("consul_acl"), 173 } 174 175 go c.bgRetryRevokeDaemon() 176 177 return c 178 } 179 180 // Stop stops background token revocations from happening. Once stopped, tokens 181 // may no longer be created. 182 func (c *consulACLsAPI) Stop() { 183 c.bgRevokeLock.Lock() 184 defer c.bgRevokeLock.Unlock() 185 186 c.stopC <- struct{}{} 187 c.bgRevokeStopped = true 188 } 189 190 func (c *consulACLsAPI) readToken(ctx context.Context, secretID string) (*api.ACLToken, error) { 191 defer metrics.MeasureSince([]string{"nomad", "consul", "read_token"}, time.Now()) 192 193 if id := strings.TrimSpace(secretID); !helper.IsUUID(id) { 194 return nil, errors.New("missing consul token") 195 } 196 197 // Ensure we are under our rate limit. 198 if err := c.limiter.Wait(ctx); err != nil { 199 return nil, fmt.Errorf("unable to read consul token: %w", err) 200 } 201 202 consulToken, _, err := c.aclClient.TokenReadSelf(&api.QueryOptions{ 203 AllowStale: false, 204 Token: secretID, 205 }) 206 if err != nil { 207 return nil, fmt.Errorf("unable to read consul token: %w", err) 208 } 209 210 return consulToken, nil 211 } 212 213 func (c *consulACLsAPI) CheckPermissions(ctx context.Context, namespace string, usage *structs.ConsulUsage, secretID string) error { 214 // consul not used, nothing to check 215 if !usage.Used() { 216 return nil 217 } 218 219 // lookup the token from consul 220 token, readErr := c.readToken(ctx, secretID) 221 if readErr != nil { 222 return readErr 223 } 224 225 // if the token is a global-management token, it has unrestricted privileges 226 if c.isManagementToken(token) { 227 return nil 228 } 229 230 // if the token cannot possibly be used to act on objects in the desired 231 // namespace, reject it immediately 232 if err := namespaceCheck(namespace, token); err != nil { 233 return err 234 } 235 236 // verify token has keystore read permission, if using template 237 if usage.KV { 238 allowable, err := c.canReadKeystore(namespace, token) 239 if err != nil { 240 return err 241 } else if !allowable { 242 return errors.New("insufficient Consul ACL permissions to use template") 243 } 244 } 245 246 // verify token has service write permission for group+task services 247 for _, service := range usage.Services { 248 allowable, err := c.canWriteService(namespace, service, token) 249 if err != nil { 250 return err 251 } else if !allowable { 252 return fmt.Errorf("insufficient Consul ACL permissions to write service %q", service) 253 } 254 } 255 256 return nil 257 } 258 259 func (c *consulACLsAPI) CreateToken(ctx context.Context, sir ServiceIdentityRequest) (*structs.SIToken, error) { 260 defer metrics.MeasureSince([]string{"nomad", "consul", "create_token"}, time.Now()) 261 262 // make sure the background token revocations have not been stopped 263 c.bgRevokeLock.Lock() 264 stopped := c.bgRevokeStopped 265 c.bgRevokeLock.Unlock() 266 267 if stopped { 268 return nil, errors.New("client stopped and may no longer create tokens") 269 } 270 271 // Check the metadata for the token we want 272 if err := sir.Validate(); err != nil { 273 return nil, err 274 } 275 276 // the SI token created must be for the service, not the sidecar of the service 277 // https://www.consul.io/docs/acl/acl-system.html#acl-service-identities 278 service := sir.TaskKind.Value() 279 partial := &api.ACLToken{ 280 Description: sir.Description(), 281 ServiceIdentities: []*api.ACLServiceIdentity{{ServiceName: service}}, 282 Namespace: sir.ConsulNamespace, 283 Local: true, 284 } 285 286 // Ensure we are under our rate limit. 287 if err := c.limiter.Wait(ctx); err != nil { 288 return nil, err 289 } 290 291 token, _, err := c.aclClient.TokenCreate(partial, nil) 292 if err != nil { 293 return nil, err 294 } 295 296 return &structs.SIToken{ 297 ConsulNamespace: token.Namespace, 298 AccessorID: token.AccessorID, 299 SecretID: token.SecretID, 300 TaskName: sir.TaskName, 301 }, nil 302 } 303 304 // RevokeTokens revokes the passed set of SI token accessors. If committed is set, 305 // the client's purge function is called (which purges the tokens from the Server's 306 // persistent store). If there is an error purging either because of Consul failures 307 // or because of the purge function, the revocation is retried in the background. 308 // 309 // The revocation of an SI token accessor is idempotent. 310 // 311 // A return value of true indicates one or more accessors were stored for 312 // a revocation retry attempt in the background (intended for tests). 313 func (c *consulACLsAPI) RevokeTokens(ctx context.Context, accessors []*structs.SITokenAccessor, committed bool) bool { 314 defer metrics.MeasureSince([]string{"nomad", "consul", "revoke_tokens"}, time.Now()) 315 316 nTokens := float32(len(accessors)) 317 318 if err := c.parallelRevoke(ctx, accessors); err != nil { 319 // If these tokens were uncommitted into raft, it is a best effort to 320 // revoke them now. If this immediate revocation does not work, Nomad loses 321 // track of them and will need to do a brute reconciliation later. This 322 // should happen rarely, and will be implemented soon. 323 if !committed { 324 metrics.IncrCounter([]string{"nomad", "consul", "undistributed_si_tokens_abandoned"}, nTokens) 325 } 326 327 c.logger.Warn("failed to revoke tokens, will reattempt later", "error", err) 328 c.storeForRevocation(accessors) 329 return true 330 } 331 332 if !committed { 333 // Un-committed tokens were revoked without incident (nothing to purge) 334 metrics.IncrCounter([]string{"nomad", "consul", "undistributed_si_tokens_revoked"}, nTokens) 335 return false 336 } 337 338 // Committed tokens were revoked without incident, now purge them 339 if err := c.purgeFunc(accessors); err != nil { 340 c.logger.Error("failed to purge SI token accessors", "error", err) 341 c.storeForRevocation(accessors) 342 return true 343 } 344 345 // Track that the SI tokens were revoked and purged successfully 346 metrics.IncrCounter([]string{"nomad", "consul", "distributed_si_tokens_revoked"}, nTokens) 347 return false 348 } 349 350 func (c *consulACLsAPI) MarkForRevocation(accessors []*structs.SITokenAccessor) { 351 c.storeForRevocation(accessors) 352 } 353 354 func (c *consulACLsAPI) storeForRevocation(accessors []*structs.SITokenAccessor) { 355 c.bgRevokeLock.Lock() 356 defer c.bgRevokeLock.Unlock() 357 358 // copy / append the set of accessors we must track for revocation in the 359 // background 360 c.bgRetryRevocation = append(c.bgRetryRevocation, accessors...) 361 } 362 363 func (c *consulACLsAPI) parallelRevoke(ctx context.Context, accessors []*structs.SITokenAccessor) error { 364 g, pCtx := errgroup.WithContext(ctx) 365 366 // Cap the handlers 367 handlers := len(accessors) 368 if handlers > siTokenMaxParallelRevokes { 369 handlers = siTokenMaxParallelRevokes 370 } 371 372 // Revoke the SI Token Accessors 373 input := make(chan *structs.SITokenAccessor, handlers) 374 for i := 0; i < handlers; i++ { 375 g.Go(func() error { 376 for { 377 select { 378 case accessor, ok := <-input: 379 if !ok { 380 return nil 381 } 382 if err := c.singleRevoke(ctx, accessor); err != nil { 383 return fmt.Errorf( 384 "failed to revoke SI token accessor (alloc %q, node %q, task %q): %w", 385 accessor.AllocID, accessor.NodeID, accessor.TaskName, err, 386 ) 387 } 388 case <-pCtx.Done(): 389 return nil 390 } 391 } 392 }) 393 } 394 395 // Send the input 396 go func() { 397 defer close(input) 398 for _, accessor := range accessors { 399 select { 400 case <-pCtx.Done(): 401 return 402 case input <- accessor: 403 } 404 } 405 }() 406 407 // Wait for everything to complete 408 return g.Wait() 409 } 410 411 func (c *consulACLsAPI) singleRevoke(ctx context.Context, accessor *structs.SITokenAccessor) error { 412 c.logger.Trace("revoke SI token", "task", accessor.TaskName, "alloc_id", accessor.AllocID, "node_id", accessor.NodeID) 413 414 // Ensure we are under our rate limit. 415 if err := c.limiter.Wait(ctx); err != nil { 416 return err 417 } 418 419 // Consul will no-op the deletion of a non-existent token (no error) 420 _, err := c.aclClient.TokenDelete(accessor.AccessorID, &api.WriteOptions{Namespace: accessor.ConsulNamespace}) 421 return err 422 } 423 424 func (c *consulACLsAPI) bgRetryRevokeDaemon() { 425 ticker := time.NewTicker(siTokenRevocationInterval) 426 defer ticker.Stop() 427 428 for { 429 select { 430 case <-c.stopC: 431 return 432 case <-ticker.C: 433 c.bgRetryRevoke() 434 } 435 } 436 } 437 438 // maxConsulRevocationBatchSize is the maximum tokens a bgRetryRevoke should revoke 439 // at any given time. 440 const maxConsulRevocationBatchSize = 1000 441 442 func (c *consulACLsAPI) bgRetryRevoke() { 443 c.bgRevokeLock.Lock() 444 defer c.bgRevokeLock.Unlock() 445 446 // fast path, nothing to do 447 if len(c.bgRetryRevocation) == 0 { 448 return 449 } 450 451 // unlike vault tokens, SI tokens do not have a TTL, and so we must try to 452 // remove all SI token accessors, every time, until they're gone 453 toRevoke := len(c.bgRetryRevocation) 454 if toRevoke > maxConsulRevocationBatchSize { 455 toRevoke = maxConsulRevocationBatchSize 456 } 457 toPurge := make([]*structs.SITokenAccessor, toRevoke) 458 copy(toPurge, c.bgRetryRevocation) 459 460 if err := c.parallelRevoke(context.Background(), toPurge); err != nil { 461 c.logger.Warn("background SI token revocation failed", "error", err) 462 return 463 } 464 465 // Call the revocation function 466 if err := c.purgeFunc(toPurge); err != nil { 467 // Just try again later (revocation is idempotent) 468 c.logger.Error("background SI token purge failed", "error", err) 469 return 470 } 471 472 // Track that the SI tokens were revoked successfully 473 nTokens := float32(len(toPurge)) 474 metrics.IncrCounter([]string{"nomad", "consul", "distributed_tokens_revoked"}, nTokens) 475 476 // Reset the list of accessors to retry, since we just removed them all. 477 c.bgRetryRevocation = nil 478 } 479 480 func (c *consulACLsAPI) ListTokens() ([]string, error) { 481 // defer metrics.MeasureSince([]string{"nomad", "consul", "list_tokens"}, time.Now()) 482 return nil, errors.New("not yet implemented") 483 } 484 485 // purgeSITokenAccessors is the Nomad Server method which will remove the set 486 // of SI token accessors from the persistent raft store. 487 func (s *Server) purgeSITokenAccessors(accessors []*structs.SITokenAccessor) error { 488 // Commit this update via Raft 489 request := structs.SITokenAccessorsRequest{Accessors: accessors} 490 _, _, err := s.raftApply(structs.ServiceIdentityAccessorDeregisterRequestType, request) 491 return err 492 } 493 494 // ConsulConfigsAPI is an abstraction over the consul/api.ConfigEntries API used by 495 // Nomad Server. 496 // 497 // Nomad will only perform write operations on Consul Ingress/Terminating Gateway 498 // Configuration Entries. Removing the entries is not yet safe, given that multiple 499 // Nomad clusters may be writing to the same config entries, which are global in 500 // the Consul scope. There was a Meta field introduced which Nomad can leverage 501 // in the future, when Consul no longer supports versions that do not contain the 502 // field. The Meta field would be used to track which Nomad "owns" the CE. 503 // https://github.com/hashicorp/nomad/issues/8971 504 type ConsulConfigsAPI interface { 505 // SetIngressCE adds the given ConfigEntry to Consul, overwriting 506 // the previous entry if set. 507 SetIngressCE(ctx context.Context, namespace, service string, entry *structs.ConsulIngressConfigEntry) error 508 509 // SetTerminatingCE adds the given ConfigEntry to Consul, overwriting 510 // the previous entry if set. 511 SetTerminatingCE(ctx context.Context, namespace, service string, entry *structs.ConsulTerminatingConfigEntry) error 512 513 // Stop is used to stop additional creations of Configuration Entries. Intended to 514 // be used on Nomad Server shutdown. 515 Stop() 516 } 517 518 type consulConfigsAPI struct { 519 // configsClient is the API subset of the real Consul client we need for 520 // managing Configuration Entries. 521 configsClient consul.ConfigAPI 522 523 // limiter is used to rate limit requests to Consul 524 limiter *rate.Limiter 525 526 // logger is used to log messages 527 logger hclog.Logger 528 529 // lock protects the stopped flag, which prevents use of the consul configs API 530 // client after shutdown. 531 lock sync.Mutex 532 stopped bool 533 } 534 535 func NewConsulConfigsAPI(configsClient consul.ConfigAPI, logger hclog.Logger) *consulConfigsAPI { 536 return &consulConfigsAPI{ 537 configsClient: configsClient, 538 limiter: rate.NewLimiter(configEntriesRequestRateLimit, int(configEntriesRequestRateLimit)), 539 logger: logger, 540 } 541 } 542 543 func (c *consulConfigsAPI) Stop() { 544 c.lock.Lock() 545 defer c.lock.Unlock() 546 c.stopped = true 547 } 548 549 func (c *consulConfigsAPI) SetIngressCE(ctx context.Context, namespace, service string, entry *structs.ConsulIngressConfigEntry) error { 550 return c.setCE(ctx, convertIngressCE(namespace, service, entry)) 551 } 552 553 func (c *consulConfigsAPI) SetTerminatingCE(ctx context.Context, namespace, service string, entry *structs.ConsulTerminatingConfigEntry) error { 554 return c.setCE(ctx, convertTerminatingCE(namespace, service, entry)) 555 } 556 557 // setCE will set the Configuration Entry of any type Consul supports. 558 func (c *consulConfigsAPI) setCE(ctx context.Context, entry api.ConfigEntry) error { 559 defer metrics.MeasureSince([]string{"nomad", "consul", "create_config_entry"}, time.Now()) 560 561 // make sure the background deletion goroutine has not been stopped 562 c.lock.Lock() 563 stopped := c.stopped 564 c.lock.Unlock() 565 566 if stopped { 567 return errors.New("client stopped and may not longer create config entries") 568 } 569 570 // ensure we are under our wait limit 571 if err := c.limiter.Wait(ctx); err != nil { 572 return err 573 } 574 575 _, _, err := c.configsClient.Set(entry, &api.WriteOptions{Namespace: entry.GetNamespace()}) 576 return err 577 } 578 579 func convertIngressCE(namespace, service string, entry *structs.ConsulIngressConfigEntry) api.ConfigEntry { 580 var listeners []api.IngressListener = nil 581 for _, listener := range entry.Listeners { 582 var services []api.IngressService = nil 583 for _, s := range listener.Services { 584 services = append(services, api.IngressService{ 585 Name: s.Name, 586 Hosts: slices.Clone(s.Hosts), 587 }) 588 } 589 listeners = append(listeners, api.IngressListener{ 590 Port: listener.Port, 591 Protocol: listener.Protocol, 592 Services: services, 593 }) 594 } 595 596 tls := api.GatewayTLSConfig{} 597 if entry.TLS != nil { 598 tls.Enabled = entry.TLS.Enabled 599 tls.TLSMinVersion = entry.TLS.TLSMinVersion 600 tls.TLSMaxVersion = entry.TLS.TLSMaxVersion 601 tls.CipherSuites = slices.Clone(entry.TLS.CipherSuites) 602 } 603 604 return &api.IngressGatewayConfigEntry{ 605 Namespace: namespace, 606 Kind: api.IngressGateway, 607 Name: service, 608 TLS: tls, 609 Listeners: listeners, 610 } 611 } 612 613 func convertTerminatingCE(namespace, service string, entry *structs.ConsulTerminatingConfigEntry) api.ConfigEntry { 614 var linked []api.LinkedService = nil 615 for _, s := range entry.Services { 616 linked = append(linked, api.LinkedService{ 617 Name: s.Name, 618 CAFile: s.CAFile, 619 CertFile: s.CertFile, 620 KeyFile: s.KeyFile, 621 SNI: s.SNI, 622 }) 623 } 624 return &api.TerminatingGatewayConfigEntry{ 625 Namespace: namespace, 626 Kind: api.TerminatingGateway, 627 Name: service, 628 Services: linked, 629 } 630 }