github.com/hernad/nomad@v1.6.112/nomad/stream/event_broker.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package stream
     5  
     6  import (
     7  	"context"
     8  	"errors"
     9  	"fmt"
    10  	"sync"
    11  	"sync/atomic"
    12  	"time"
    13  
    14  	"github.com/armon/go-metrics"
    15  	"github.com/hashicorp/go-memdb"
    16  	"github.com/hernad/nomad/acl"
    17  	"github.com/hernad/nomad/nomad/structs"
    18  
    19  	"github.com/hashicorp/go-hclog"
    20  )
    21  
    22  const (
    23  	ACLCheckNodeRead   = "node-read"
    24  	ACLCheckManagement = "management"
    25  	aclCacheSize       = 32
    26  )
    27  
    28  type EventBrokerCfg struct {
    29  	EventBufferSize int64
    30  	Logger          hclog.Logger
    31  }
    32  
    33  type EventBroker struct {
    34  	// mu protects subscriptions
    35  	mu            sync.Mutex
    36  	subscriptions *subscriptions
    37  
    38  	// eventBuf stores a configurable amount of events in memory
    39  	eventBuf *eventBuffer
    40  
    41  	// publishCh is used to send messages from an active txn to a goroutine which
    42  	// publishes events, so that publishing can happen asynchronously from
    43  	// the Commit call in the FSM hot path.
    44  	publishCh chan *structs.Events
    45  
    46  	aclDelegate ACLDelegate
    47  	aclCache    *structs.ACLCache[*acl.ACL]
    48  
    49  	aclCh chan structs.Event
    50  
    51  	logger hclog.Logger
    52  }
    53  
    54  // NewEventBroker returns an EventBroker for publishing change events.
    55  // A goroutine is run in the background to publish events to an event buffer.
    56  // Cancelling the context will shutdown the goroutine to free resources, and stop
    57  // all publishing.
    58  func NewEventBroker(ctx context.Context, aclDelegate ACLDelegate, cfg EventBrokerCfg) (*EventBroker, error) {
    59  	if cfg.Logger == nil {
    60  		cfg.Logger = hclog.NewNullLogger()
    61  	}
    62  
    63  	// Set the event buffer size to a minimum
    64  	if cfg.EventBufferSize == 0 {
    65  		cfg.EventBufferSize = 100
    66  	}
    67  
    68  	buffer := newEventBuffer(cfg.EventBufferSize)
    69  	e := &EventBroker{
    70  		logger:      cfg.Logger.Named("event_broker"),
    71  		eventBuf:    buffer,
    72  		publishCh:   make(chan *structs.Events, 64),
    73  		aclCh:       make(chan structs.Event, 10),
    74  		aclDelegate: aclDelegate,
    75  		aclCache:    structs.NewACLCache[*acl.ACL](aclCacheSize),
    76  		subscriptions: &subscriptions{
    77  			byToken: make(map[string]map[*SubscribeRequest]*Subscription),
    78  		},
    79  	}
    80  
    81  	go e.handleUpdates(ctx)
    82  	go e.handleACLUpdates(ctx)
    83  
    84  	return e, nil
    85  }
    86  
    87  // Len returns the current length of the event buffer.
    88  func (e *EventBroker) Len() int {
    89  	return e.eventBuf.Len()
    90  }
    91  
    92  // Publish events to all subscribers of the event Topic.
    93  func (e *EventBroker) Publish(events *structs.Events) {
    94  	if len(events.Events) == 0 {
    95  		return
    96  	}
    97  
    98  	// Notify the broker to check running subscriptions against potentially
    99  	// updated ACL Token or Policy
   100  	for _, event := range events.Events {
   101  		if event.Topic == structs.TopicACLToken || event.Topic == structs.TopicACLPolicy {
   102  			e.aclCh <- event
   103  		}
   104  	}
   105  
   106  	e.publishCh <- events
   107  }
   108  
   109  // SubscribeWithACLCheck validates the SubscribeRequest's token and requested
   110  // topics to ensure that the tokens privileges are sufficient. It will also
   111  // return the token expiry time, if any. It is the callers responsibility to
   112  // check this before publishing events to the caller.
   113  func (e *EventBroker) SubscribeWithACLCheck(req *SubscribeRequest) (*Subscription, *time.Time, error) {
   114  	aclObj, expiryTime, err := aclObjFromSnapshotForTokenSecretID(e.aclDelegate.TokenProvider(), e.aclCache, req.Token)
   115  	if err != nil {
   116  		return nil, nil, structs.ErrPermissionDenied
   117  	}
   118  
   119  	if allowed := aclAllowsSubscription(aclObj, req); !allowed {
   120  		return nil, nil, structs.ErrPermissionDenied
   121  	}
   122  
   123  	sub, err := e.Subscribe(req)
   124  	if err != nil {
   125  		return nil, nil, err
   126  	}
   127  	return sub, expiryTime, nil
   128  }
   129  
   130  // Subscribe returns a new Subscription for a given request. A Subscription
   131  // will receive an initial empty currentItem value which points to the first item
   132  // in the buffer. This allows the new subscription to call Next() without first checking
   133  // for the current Item.
   134  //
   135  // A Subscription will start at the requested index, or as close as possible to
   136  // the requested index if it is no longer in the buffer. If StartExactlyAtIndex is
   137  // set and the index is no longer in the buffer or not yet in the buffer an error
   138  // will be returned.
   139  //
   140  // When a caller is finished with the subscription it must call Subscription.Unsubscribe
   141  // to free ACL tracking resources.
   142  func (e *EventBroker) Subscribe(req *SubscribeRequest) (*Subscription, error) {
   143  	e.mu.Lock()
   144  	defer e.mu.Unlock()
   145  
   146  	var head *bufferItem
   147  	var offset int
   148  	if req.Index != 0 {
   149  		head, offset = e.eventBuf.StartAtClosest(req.Index)
   150  	} else {
   151  		head = e.eventBuf.Head()
   152  	}
   153  	if offset > 0 && req.StartExactlyAtIndex {
   154  		return nil, fmt.Errorf("requested index not in buffer")
   155  	} else if offset > 0 {
   156  		metrics.SetGauge([]string{"nomad", "event_broker", "subscription", "request_offset"}, float32(offset))
   157  		e.logger.Debug("requested index no longer in buffer", "requsted", int(req.Index), "closest", int(head.Events.Index))
   158  	}
   159  
   160  	// Empty head so that calling Next on sub
   161  	start := newBufferItem(&structs.Events{Index: req.Index})
   162  	start.link.next.Store(head)
   163  	close(start.link.nextCh)
   164  
   165  	sub := newSubscription(req, start, e.subscriptions.unsubscribeFn(req))
   166  
   167  	e.subscriptions.add(req, sub)
   168  	return sub, nil
   169  }
   170  
   171  // CloseAll closes all subscriptions
   172  func (e *EventBroker) CloseAll() {
   173  	e.subscriptions.closeAll()
   174  }
   175  
   176  func (e *EventBroker) handleUpdates(ctx context.Context) {
   177  	for {
   178  		select {
   179  		case <-ctx.Done():
   180  			e.subscriptions.closeAll()
   181  			return
   182  		case update := <-e.publishCh:
   183  			e.eventBuf.Append(update)
   184  		}
   185  	}
   186  }
   187  
   188  func (e *EventBroker) handleACLUpdates(ctx context.Context) {
   189  	for {
   190  		select {
   191  		case <-ctx.Done():
   192  			return
   193  		case update := <-e.aclCh:
   194  			switch payload := update.Payload.(type) {
   195  			case *structs.ACLTokenEvent:
   196  				tokenSecretID := payload.SecretID()
   197  
   198  				// Token was deleted
   199  				if update.Type == structs.TypeACLTokenDeleted {
   200  					e.subscriptions.closeSubscriptionsForTokens([]string{tokenSecretID})
   201  					continue
   202  				}
   203  
   204  				// If broker cannot fetch state there is nothing more to do
   205  				if e.aclDelegate == nil {
   206  					continue
   207  				}
   208  
   209  				aclObj, expiryTime, err := aclObjFromSnapshotForTokenSecretID(e.aclDelegate.TokenProvider(), e.aclCache, tokenSecretID)
   210  				if err != nil || aclObj == nil {
   211  					e.logger.Error("failed resolving ACL for secretID, closing subscriptions", "error", err)
   212  					e.subscriptions.closeSubscriptionsForTokens([]string{tokenSecretID})
   213  					continue
   214  				}
   215  
   216  				if expiryTime != nil && expiryTime.Before(time.Now().UTC()) {
   217  					e.logger.Info("ACL token is expired, closing subscriptions")
   218  					e.subscriptions.closeSubscriptionsForTokens([]string{tokenSecretID})
   219  					continue
   220  				}
   221  
   222  				e.subscriptions.closeSubscriptionFunc(tokenSecretID, func(sub *Subscription) bool {
   223  					return !aclAllowsSubscription(aclObj, sub.req)
   224  				})
   225  
   226  			case *structs.ACLPolicyEvent, *structs.ACLRoleStreamEvent:
   227  				// Re-evaluate each subscription permission since a policy or
   228  				// role change may alter the permissions of the token being
   229  				// used for the subscription.
   230  				e.checkSubscriptionsAgainstACLChange()
   231  			}
   232  		}
   233  	}
   234  }
   235  
   236  // checkSubscriptionsAgainstACLChange iterates over the brokers subscriptions
   237  // and evaluates whether the token used for the subscription is still valid. A
   238  // token may become invalid is the assigned policies or roles have been updated
   239  // which removed the required permission. If the token is no long valid, the
   240  // subscription is closed.
   241  func (e *EventBroker) checkSubscriptionsAgainstACLChange() {
   242  	e.mu.Lock()
   243  	defer e.mu.Unlock()
   244  
   245  	// If broker cannot fetch state there is nothing more to do
   246  	if e.aclDelegate == nil {
   247  		return
   248  	}
   249  
   250  	aclSnapshot := e.aclDelegate.TokenProvider()
   251  	for tokenSecretID := range e.subscriptions.byToken {
   252  		// if tokenSecretID is empty ACLs were disabled at time of subscribing
   253  		if tokenSecretID == "" {
   254  			continue
   255  		}
   256  
   257  		aclObj, expiryTime, err := aclObjFromSnapshotForTokenSecretID(aclSnapshot, e.aclCache, tokenSecretID)
   258  		if err != nil || aclObj == nil {
   259  			e.logger.Debug("failed resolving ACL for secretID, closing subscriptions", "error", err)
   260  			e.subscriptions.closeSubscriptionsForTokens([]string{tokenSecretID})
   261  			continue
   262  		}
   263  
   264  		if expiryTime != nil && expiryTime.Before(time.Now().UTC()) {
   265  			e.logger.Info("ACL token is expired, closing subscriptions")
   266  			e.subscriptions.closeSubscriptionsForTokens([]string{tokenSecretID})
   267  			continue
   268  		}
   269  
   270  		e.subscriptions.closeSubscriptionFunc(tokenSecretID, func(sub *Subscription) bool {
   271  			return !aclAllowsSubscription(aclObj, sub.req)
   272  		})
   273  	}
   274  }
   275  
   276  func aclObjFromSnapshotForTokenSecretID(
   277  	aclSnapshot ACLTokenProvider, aclCache *structs.ACLCache[*acl.ACL], tokenSecretID string) (
   278  	*acl.ACL, *time.Time, error) {
   279  
   280  	aclToken, err := aclSnapshot.ACLTokenBySecretID(nil, tokenSecretID)
   281  	if err != nil {
   282  		return nil, nil, err
   283  	}
   284  
   285  	if aclToken == nil {
   286  		return nil, nil, structs.ErrTokenNotFound
   287  	}
   288  	if aclToken.IsExpired(time.Now().UTC()) {
   289  		return nil, nil, structs.ErrTokenExpired
   290  	}
   291  
   292  	// Check if this is a management token
   293  	if aclToken.Type == structs.ACLManagementToken {
   294  		return acl.ManagementACL, aclToken.ExpirationTime, nil
   295  	}
   296  
   297  	aclPolicies := make([]*structs.ACLPolicy, 0, len(aclToken.Policies)+len(aclToken.Roles))
   298  
   299  	for _, policyName := range aclToken.Policies {
   300  		policy, err := aclSnapshot.ACLPolicyByName(nil, policyName)
   301  		if err != nil {
   302  			return nil, nil, errors.New("error finding acl policy")
   303  		}
   304  		if policy == nil {
   305  			// Ignore policies that don't exist, since they don't grant any
   306  			// more privilege.
   307  			continue
   308  		}
   309  		aclPolicies = append(aclPolicies, policy)
   310  	}
   311  
   312  	// Iterate all the token role links, so we can unpack these and identify
   313  	// the ACL policies.
   314  	for _, roleLink := range aclToken.Roles {
   315  
   316  		role, err := aclSnapshot.GetACLRoleByID(nil, roleLink.ID)
   317  		if err != nil {
   318  			return nil, nil, err
   319  		}
   320  		if role == nil {
   321  			continue
   322  		}
   323  
   324  		for _, policyLink := range role.Policies {
   325  			policy, err := aclSnapshot.ACLPolicyByName(nil, policyLink.Name)
   326  			if err != nil {
   327  				return nil, nil, errors.New("error finding acl policy")
   328  			}
   329  			if policy == nil {
   330  				// Ignore policies that don't exist, since they don't grant any
   331  				// more privilege.
   332  				continue
   333  			}
   334  			aclPolicies = append(aclPolicies, policy)
   335  		}
   336  	}
   337  
   338  	aclObj, err := structs.CompileACLObject(aclCache, aclPolicies)
   339  	if err != nil {
   340  		return nil, nil, err
   341  	}
   342  	return aclObj, aclToken.ExpirationTime, nil
   343  }
   344  
   345  type ACLTokenProvider interface {
   346  	ACLTokenBySecretID(ws memdb.WatchSet, secretID string) (*structs.ACLToken, error)
   347  	ACLPolicyByName(ws memdb.WatchSet, policyName string) (*structs.ACLPolicy, error)
   348  	GetACLRoleByID(ws memdb.WatchSet, roleID string) (*structs.ACLRole, error)
   349  }
   350  
   351  type ACLDelegate interface {
   352  	TokenProvider() ACLTokenProvider
   353  }
   354  
   355  func aclAllowsSubscription(aclObj *acl.ACL, subReq *SubscribeRequest) bool {
   356  	for topic := range subReq.Topics {
   357  		switch topic {
   358  		case structs.TopicDeployment,
   359  			structs.TopicEvaluation,
   360  			structs.TopicAllocation,
   361  			structs.TopicJob,
   362  			structs.TopicService:
   363  			if ok := aclObj.AllowNsOp(subReq.Namespace, acl.NamespaceCapabilityReadJob); !ok {
   364  				return false
   365  			}
   366  		case structs.TopicNode:
   367  			if ok := aclObj.AllowNodeRead(); !ok {
   368  				return false
   369  			}
   370  		case structs.TopicNodePool:
   371  			// Require management token for node pools since we can't filter
   372  			// out node pools the token doesn't have access to.
   373  			if ok := aclObj.IsManagement(); !ok {
   374  				return false
   375  			}
   376  		default:
   377  			if ok := aclObj.IsManagement(); !ok {
   378  				return false
   379  			}
   380  		}
   381  	}
   382  
   383  	return true
   384  }
   385  
   386  func (s *Subscription) forceClose() {
   387  	if atomic.CompareAndSwapUint32(&s.state, subscriptionStateOpen, subscriptionStateClosed) {
   388  		close(s.forceClosed)
   389  	}
   390  }
   391  
   392  type subscriptions struct {
   393  	// mu for byToken. If both subscription.mu and EventBroker.mu need
   394  	// to be held, EventBroker mutex MUST always be acquired first.
   395  	mu sync.RWMutex
   396  
   397  	// byToken is an mapping of active Subscriptions indexed by a token and
   398  	// a pointer to the request.
   399  	// When the token is modified all subscriptions under that token will be
   400  	// reloaded.
   401  	// A subscription may be unsubscribed by using the pointer to the request.
   402  	byToken map[string]map[*SubscribeRequest]*Subscription
   403  }
   404  
   405  func (s *subscriptions) add(req *SubscribeRequest, sub *Subscription) {
   406  	s.mu.Lock()
   407  	defer s.mu.Unlock()
   408  
   409  	subsByToken, ok := s.byToken[req.Token]
   410  	if !ok {
   411  		subsByToken = make(map[*SubscribeRequest]*Subscription)
   412  		s.byToken[req.Token] = subsByToken
   413  	}
   414  	subsByToken[req] = sub
   415  }
   416  
   417  func (s *subscriptions) closeSubscriptionsForTokens(tokenSecretIDs []string) {
   418  	s.mu.RLock()
   419  	defer s.mu.RUnlock()
   420  
   421  	for _, secretID := range tokenSecretIDs {
   422  		if subs, ok := s.byToken[secretID]; ok {
   423  			for _, sub := range subs {
   424  				sub.forceClose()
   425  			}
   426  		}
   427  	}
   428  }
   429  
   430  func (s *subscriptions) closeSubscriptionFunc(tokenSecretID string, fn func(*Subscription) bool) {
   431  	s.mu.RLock()
   432  	defer s.mu.RUnlock()
   433  
   434  	for _, sub := range s.byToken[tokenSecretID] {
   435  		if fn(sub) {
   436  			sub.forceClose()
   437  		}
   438  	}
   439  }
   440  
   441  // unsubscribeFn returns a function that the subscription will call to remove
   442  // itself from the subsByToken.
   443  // This function is returned as a closure so that the caller doesn't need to keep
   444  // track of the SubscriptionRequest, and can not accidentally call unsubscribeFn with the
   445  // wrong pointer.
   446  func (s *subscriptions) unsubscribeFn(req *SubscribeRequest) func() {
   447  	return func() {
   448  		s.mu.Lock()
   449  		defer s.mu.Unlock()
   450  
   451  		subsByToken, ok := s.byToken[req.Token]
   452  		if !ok {
   453  			return
   454  		}
   455  
   456  		sub := subsByToken[req]
   457  		if sub == nil {
   458  			return
   459  		}
   460  
   461  		// close the subscription
   462  		sub.forceClose()
   463  
   464  		delete(subsByToken, req)
   465  		if len(subsByToken) == 0 {
   466  			delete(s.byToken, req.Token)
   467  		}
   468  	}
   469  }
   470  
   471  func (s *subscriptions) closeAll() {
   472  	s.mu.Lock()
   473  	defer s.mu.Unlock()
   474  
   475  	for _, byRequest := range s.byToken {
   476  		for _, sub := range byRequest {
   477  			sub.forceClose()
   478  		}
   479  	}
   480  }