github.com/hernad/nomad@v1.6.112/nomad/stream/event_broker.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package stream 5 6 import ( 7 "context" 8 "errors" 9 "fmt" 10 "sync" 11 "sync/atomic" 12 "time" 13 14 "github.com/armon/go-metrics" 15 "github.com/hashicorp/go-memdb" 16 "github.com/hernad/nomad/acl" 17 "github.com/hernad/nomad/nomad/structs" 18 19 "github.com/hashicorp/go-hclog" 20 ) 21 22 const ( 23 ACLCheckNodeRead = "node-read" 24 ACLCheckManagement = "management" 25 aclCacheSize = 32 26 ) 27 28 type EventBrokerCfg struct { 29 EventBufferSize int64 30 Logger hclog.Logger 31 } 32 33 type EventBroker struct { 34 // mu protects subscriptions 35 mu sync.Mutex 36 subscriptions *subscriptions 37 38 // eventBuf stores a configurable amount of events in memory 39 eventBuf *eventBuffer 40 41 // publishCh is used to send messages from an active txn to a goroutine which 42 // publishes events, so that publishing can happen asynchronously from 43 // the Commit call in the FSM hot path. 44 publishCh chan *structs.Events 45 46 aclDelegate ACLDelegate 47 aclCache *structs.ACLCache[*acl.ACL] 48 49 aclCh chan structs.Event 50 51 logger hclog.Logger 52 } 53 54 // NewEventBroker returns an EventBroker for publishing change events. 55 // A goroutine is run in the background to publish events to an event buffer. 56 // Cancelling the context will shutdown the goroutine to free resources, and stop 57 // all publishing. 58 func NewEventBroker(ctx context.Context, aclDelegate ACLDelegate, cfg EventBrokerCfg) (*EventBroker, error) { 59 if cfg.Logger == nil { 60 cfg.Logger = hclog.NewNullLogger() 61 } 62 63 // Set the event buffer size to a minimum 64 if cfg.EventBufferSize == 0 { 65 cfg.EventBufferSize = 100 66 } 67 68 buffer := newEventBuffer(cfg.EventBufferSize) 69 e := &EventBroker{ 70 logger: cfg.Logger.Named("event_broker"), 71 eventBuf: buffer, 72 publishCh: make(chan *structs.Events, 64), 73 aclCh: make(chan structs.Event, 10), 74 aclDelegate: aclDelegate, 75 aclCache: structs.NewACLCache[*acl.ACL](aclCacheSize), 76 subscriptions: &subscriptions{ 77 byToken: make(map[string]map[*SubscribeRequest]*Subscription), 78 }, 79 } 80 81 go e.handleUpdates(ctx) 82 go e.handleACLUpdates(ctx) 83 84 return e, nil 85 } 86 87 // Len returns the current length of the event buffer. 88 func (e *EventBroker) Len() int { 89 return e.eventBuf.Len() 90 } 91 92 // Publish events to all subscribers of the event Topic. 93 func (e *EventBroker) Publish(events *structs.Events) { 94 if len(events.Events) == 0 { 95 return 96 } 97 98 // Notify the broker to check running subscriptions against potentially 99 // updated ACL Token or Policy 100 for _, event := range events.Events { 101 if event.Topic == structs.TopicACLToken || event.Topic == structs.TopicACLPolicy { 102 e.aclCh <- event 103 } 104 } 105 106 e.publishCh <- events 107 } 108 109 // SubscribeWithACLCheck validates the SubscribeRequest's token and requested 110 // topics to ensure that the tokens privileges are sufficient. It will also 111 // return the token expiry time, if any. It is the callers responsibility to 112 // check this before publishing events to the caller. 113 func (e *EventBroker) SubscribeWithACLCheck(req *SubscribeRequest) (*Subscription, *time.Time, error) { 114 aclObj, expiryTime, err := aclObjFromSnapshotForTokenSecretID(e.aclDelegate.TokenProvider(), e.aclCache, req.Token) 115 if err != nil { 116 return nil, nil, structs.ErrPermissionDenied 117 } 118 119 if allowed := aclAllowsSubscription(aclObj, req); !allowed { 120 return nil, nil, structs.ErrPermissionDenied 121 } 122 123 sub, err := e.Subscribe(req) 124 if err != nil { 125 return nil, nil, err 126 } 127 return sub, expiryTime, nil 128 } 129 130 // Subscribe returns a new Subscription for a given request. A Subscription 131 // will receive an initial empty currentItem value which points to the first item 132 // in the buffer. This allows the new subscription to call Next() without first checking 133 // for the current Item. 134 // 135 // A Subscription will start at the requested index, or as close as possible to 136 // the requested index if it is no longer in the buffer. If StartExactlyAtIndex is 137 // set and the index is no longer in the buffer or not yet in the buffer an error 138 // will be returned. 139 // 140 // When a caller is finished with the subscription it must call Subscription.Unsubscribe 141 // to free ACL tracking resources. 142 func (e *EventBroker) Subscribe(req *SubscribeRequest) (*Subscription, error) { 143 e.mu.Lock() 144 defer e.mu.Unlock() 145 146 var head *bufferItem 147 var offset int 148 if req.Index != 0 { 149 head, offset = e.eventBuf.StartAtClosest(req.Index) 150 } else { 151 head = e.eventBuf.Head() 152 } 153 if offset > 0 && req.StartExactlyAtIndex { 154 return nil, fmt.Errorf("requested index not in buffer") 155 } else if offset > 0 { 156 metrics.SetGauge([]string{"nomad", "event_broker", "subscription", "request_offset"}, float32(offset)) 157 e.logger.Debug("requested index no longer in buffer", "requsted", int(req.Index), "closest", int(head.Events.Index)) 158 } 159 160 // Empty head so that calling Next on sub 161 start := newBufferItem(&structs.Events{Index: req.Index}) 162 start.link.next.Store(head) 163 close(start.link.nextCh) 164 165 sub := newSubscription(req, start, e.subscriptions.unsubscribeFn(req)) 166 167 e.subscriptions.add(req, sub) 168 return sub, nil 169 } 170 171 // CloseAll closes all subscriptions 172 func (e *EventBroker) CloseAll() { 173 e.subscriptions.closeAll() 174 } 175 176 func (e *EventBroker) handleUpdates(ctx context.Context) { 177 for { 178 select { 179 case <-ctx.Done(): 180 e.subscriptions.closeAll() 181 return 182 case update := <-e.publishCh: 183 e.eventBuf.Append(update) 184 } 185 } 186 } 187 188 func (e *EventBroker) handleACLUpdates(ctx context.Context) { 189 for { 190 select { 191 case <-ctx.Done(): 192 return 193 case update := <-e.aclCh: 194 switch payload := update.Payload.(type) { 195 case *structs.ACLTokenEvent: 196 tokenSecretID := payload.SecretID() 197 198 // Token was deleted 199 if update.Type == structs.TypeACLTokenDeleted { 200 e.subscriptions.closeSubscriptionsForTokens([]string{tokenSecretID}) 201 continue 202 } 203 204 // If broker cannot fetch state there is nothing more to do 205 if e.aclDelegate == nil { 206 continue 207 } 208 209 aclObj, expiryTime, err := aclObjFromSnapshotForTokenSecretID(e.aclDelegate.TokenProvider(), e.aclCache, tokenSecretID) 210 if err != nil || aclObj == nil { 211 e.logger.Error("failed resolving ACL for secretID, closing subscriptions", "error", err) 212 e.subscriptions.closeSubscriptionsForTokens([]string{tokenSecretID}) 213 continue 214 } 215 216 if expiryTime != nil && expiryTime.Before(time.Now().UTC()) { 217 e.logger.Info("ACL token is expired, closing subscriptions") 218 e.subscriptions.closeSubscriptionsForTokens([]string{tokenSecretID}) 219 continue 220 } 221 222 e.subscriptions.closeSubscriptionFunc(tokenSecretID, func(sub *Subscription) bool { 223 return !aclAllowsSubscription(aclObj, sub.req) 224 }) 225 226 case *structs.ACLPolicyEvent, *structs.ACLRoleStreamEvent: 227 // Re-evaluate each subscription permission since a policy or 228 // role change may alter the permissions of the token being 229 // used for the subscription. 230 e.checkSubscriptionsAgainstACLChange() 231 } 232 } 233 } 234 } 235 236 // checkSubscriptionsAgainstACLChange iterates over the brokers subscriptions 237 // and evaluates whether the token used for the subscription is still valid. A 238 // token may become invalid is the assigned policies or roles have been updated 239 // which removed the required permission. If the token is no long valid, the 240 // subscription is closed. 241 func (e *EventBroker) checkSubscriptionsAgainstACLChange() { 242 e.mu.Lock() 243 defer e.mu.Unlock() 244 245 // If broker cannot fetch state there is nothing more to do 246 if e.aclDelegate == nil { 247 return 248 } 249 250 aclSnapshot := e.aclDelegate.TokenProvider() 251 for tokenSecretID := range e.subscriptions.byToken { 252 // if tokenSecretID is empty ACLs were disabled at time of subscribing 253 if tokenSecretID == "" { 254 continue 255 } 256 257 aclObj, expiryTime, err := aclObjFromSnapshotForTokenSecretID(aclSnapshot, e.aclCache, tokenSecretID) 258 if err != nil || aclObj == nil { 259 e.logger.Debug("failed resolving ACL for secretID, closing subscriptions", "error", err) 260 e.subscriptions.closeSubscriptionsForTokens([]string{tokenSecretID}) 261 continue 262 } 263 264 if expiryTime != nil && expiryTime.Before(time.Now().UTC()) { 265 e.logger.Info("ACL token is expired, closing subscriptions") 266 e.subscriptions.closeSubscriptionsForTokens([]string{tokenSecretID}) 267 continue 268 } 269 270 e.subscriptions.closeSubscriptionFunc(tokenSecretID, func(sub *Subscription) bool { 271 return !aclAllowsSubscription(aclObj, sub.req) 272 }) 273 } 274 } 275 276 func aclObjFromSnapshotForTokenSecretID( 277 aclSnapshot ACLTokenProvider, aclCache *structs.ACLCache[*acl.ACL], tokenSecretID string) ( 278 *acl.ACL, *time.Time, error) { 279 280 aclToken, err := aclSnapshot.ACLTokenBySecretID(nil, tokenSecretID) 281 if err != nil { 282 return nil, nil, err 283 } 284 285 if aclToken == nil { 286 return nil, nil, structs.ErrTokenNotFound 287 } 288 if aclToken.IsExpired(time.Now().UTC()) { 289 return nil, nil, structs.ErrTokenExpired 290 } 291 292 // Check if this is a management token 293 if aclToken.Type == structs.ACLManagementToken { 294 return acl.ManagementACL, aclToken.ExpirationTime, nil 295 } 296 297 aclPolicies := make([]*structs.ACLPolicy, 0, len(aclToken.Policies)+len(aclToken.Roles)) 298 299 for _, policyName := range aclToken.Policies { 300 policy, err := aclSnapshot.ACLPolicyByName(nil, policyName) 301 if err != nil { 302 return nil, nil, errors.New("error finding acl policy") 303 } 304 if policy == nil { 305 // Ignore policies that don't exist, since they don't grant any 306 // more privilege. 307 continue 308 } 309 aclPolicies = append(aclPolicies, policy) 310 } 311 312 // Iterate all the token role links, so we can unpack these and identify 313 // the ACL policies. 314 for _, roleLink := range aclToken.Roles { 315 316 role, err := aclSnapshot.GetACLRoleByID(nil, roleLink.ID) 317 if err != nil { 318 return nil, nil, err 319 } 320 if role == nil { 321 continue 322 } 323 324 for _, policyLink := range role.Policies { 325 policy, err := aclSnapshot.ACLPolicyByName(nil, policyLink.Name) 326 if err != nil { 327 return nil, nil, errors.New("error finding acl policy") 328 } 329 if policy == nil { 330 // Ignore policies that don't exist, since they don't grant any 331 // more privilege. 332 continue 333 } 334 aclPolicies = append(aclPolicies, policy) 335 } 336 } 337 338 aclObj, err := structs.CompileACLObject(aclCache, aclPolicies) 339 if err != nil { 340 return nil, nil, err 341 } 342 return aclObj, aclToken.ExpirationTime, nil 343 } 344 345 type ACLTokenProvider interface { 346 ACLTokenBySecretID(ws memdb.WatchSet, secretID string) (*structs.ACLToken, error) 347 ACLPolicyByName(ws memdb.WatchSet, policyName string) (*structs.ACLPolicy, error) 348 GetACLRoleByID(ws memdb.WatchSet, roleID string) (*structs.ACLRole, error) 349 } 350 351 type ACLDelegate interface { 352 TokenProvider() ACLTokenProvider 353 } 354 355 func aclAllowsSubscription(aclObj *acl.ACL, subReq *SubscribeRequest) bool { 356 for topic := range subReq.Topics { 357 switch topic { 358 case structs.TopicDeployment, 359 structs.TopicEvaluation, 360 structs.TopicAllocation, 361 structs.TopicJob, 362 structs.TopicService: 363 if ok := aclObj.AllowNsOp(subReq.Namespace, acl.NamespaceCapabilityReadJob); !ok { 364 return false 365 } 366 case structs.TopicNode: 367 if ok := aclObj.AllowNodeRead(); !ok { 368 return false 369 } 370 case structs.TopicNodePool: 371 // Require management token for node pools since we can't filter 372 // out node pools the token doesn't have access to. 373 if ok := aclObj.IsManagement(); !ok { 374 return false 375 } 376 default: 377 if ok := aclObj.IsManagement(); !ok { 378 return false 379 } 380 } 381 } 382 383 return true 384 } 385 386 func (s *Subscription) forceClose() { 387 if atomic.CompareAndSwapUint32(&s.state, subscriptionStateOpen, subscriptionStateClosed) { 388 close(s.forceClosed) 389 } 390 } 391 392 type subscriptions struct { 393 // mu for byToken. If both subscription.mu and EventBroker.mu need 394 // to be held, EventBroker mutex MUST always be acquired first. 395 mu sync.RWMutex 396 397 // byToken is an mapping of active Subscriptions indexed by a token and 398 // a pointer to the request. 399 // When the token is modified all subscriptions under that token will be 400 // reloaded. 401 // A subscription may be unsubscribed by using the pointer to the request. 402 byToken map[string]map[*SubscribeRequest]*Subscription 403 } 404 405 func (s *subscriptions) add(req *SubscribeRequest, sub *Subscription) { 406 s.mu.Lock() 407 defer s.mu.Unlock() 408 409 subsByToken, ok := s.byToken[req.Token] 410 if !ok { 411 subsByToken = make(map[*SubscribeRequest]*Subscription) 412 s.byToken[req.Token] = subsByToken 413 } 414 subsByToken[req] = sub 415 } 416 417 func (s *subscriptions) closeSubscriptionsForTokens(tokenSecretIDs []string) { 418 s.mu.RLock() 419 defer s.mu.RUnlock() 420 421 for _, secretID := range tokenSecretIDs { 422 if subs, ok := s.byToken[secretID]; ok { 423 for _, sub := range subs { 424 sub.forceClose() 425 } 426 } 427 } 428 } 429 430 func (s *subscriptions) closeSubscriptionFunc(tokenSecretID string, fn func(*Subscription) bool) { 431 s.mu.RLock() 432 defer s.mu.RUnlock() 433 434 for _, sub := range s.byToken[tokenSecretID] { 435 if fn(sub) { 436 sub.forceClose() 437 } 438 } 439 } 440 441 // unsubscribeFn returns a function that the subscription will call to remove 442 // itself from the subsByToken. 443 // This function is returned as a closure so that the caller doesn't need to keep 444 // track of the SubscriptionRequest, and can not accidentally call unsubscribeFn with the 445 // wrong pointer. 446 func (s *subscriptions) unsubscribeFn(req *SubscribeRequest) func() { 447 return func() { 448 s.mu.Lock() 449 defer s.mu.Unlock() 450 451 subsByToken, ok := s.byToken[req.Token] 452 if !ok { 453 return 454 } 455 456 sub := subsByToken[req] 457 if sub == nil { 458 return 459 } 460 461 // close the subscription 462 sub.forceClose() 463 464 delete(subsByToken, req) 465 if len(subsByToken) == 0 { 466 delete(s.byToken, req.Token) 467 } 468 } 469 } 470 471 func (s *subscriptions) closeAll() { 472 s.mu.Lock() 473 defer s.mu.Unlock() 474 475 for _, byRequest := range s.byToken { 476 for _, sub := range byRequest { 477 sub.forceClose() 478 } 479 } 480 }