github.com/kubeflow/training-operator@v1.7.0/pkg/controller.v1/expectation/expectation.go (about)

     1  /*
     2  Copyright 2023 The Kubeflow Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package expectation
    18  
    19  import (
    20  	"fmt"
    21  	"sync/atomic"
    22  	"time"
    23  
    24  	log "github.com/sirupsen/logrus"
    25  
    26  	"k8s.io/client-go/tools/cache"
    27  	"k8s.io/utils/clock"
    28  )
    29  
    30  const (
    31  	// If a watch drops a delete event for a pod, it'll take this long
    32  	// before a dormant controller waiting for those packets is woken up anyway. It is
    33  	// specifically targeted at the case where some problem prevents an update
    34  	// of expectations, without it the controller could stay asleep forever. This should
    35  	// be set based on the expected latency of watch events.
    36  	//
    37  	// Currently a controller can service (create *and* observe the watch events for said
    38  	// creation) about 10 pods a second, so it takes about 1 min to service
    39  	// 500 pods. Just creation is limited to 20qps, and watching happens with ~10-30s
    40  	// latency/pod at the scale of 3000 pods over 100 nodes.
    41  	ExpectationsTimeout = 5 * time.Minute
    42  )
    43  
    44  // Expectations are a way for controllers to tell the controller manager what they expect. eg:
    45  //	ControllerExpectations: {
    46  //		controller1: expects  2 adds in 2 minutes
    47  //		controller2: expects  2 dels in 2 minutes
    48  //		controller3: expects -1 adds in 2 minutes => controller3's expectations have already been met
    49  //	}
    50  //
    51  // Implementation:
    52  //	ControlleeExpectation = pair of atomic counters to track controllee's creation/deletion
    53  //	ControllerExpectationsStore = TTLStore + a ControlleeExpectation per controller
    54  //
    55  // * Once set expectations can only be lowered
    56  // * A controller isn't synced till its expectations are either fulfilled, or expire
    57  // * Controllers that don't set expectations will get woken up for every matching controllee
    58  
    59  // ExpKeyFunc to parse out the key from a ControlleeExpectation
    60  var ExpKeyFunc = func(obj interface{}) (string, error) {
    61  	if e, ok := obj.(*ControlleeExpectations); ok {
    62  		return e.key, nil
    63  	}
    64  	return "", fmt.Errorf("could not find key for obj %#v", obj)
    65  }
    66  
    67  // ControllerExpectationsInterface is an interface that allows users to set and wait on expectations.
    68  // Only abstracted out for testing.
    69  // Warning: if using KeyFunc it is not safe to use a single ControllerExpectationsInterface with different
    70  // types of controllers, because the keys might conflict across types.
    71  type ControllerExpectationsInterface interface {
    72  	GetExpectations(controllerKey string) (*ControlleeExpectations, bool, error)
    73  	SatisfiedExpectations(controllerKey string) bool
    74  	DeleteExpectations(controllerKey string)
    75  	SetExpectations(controllerKey string, add, del int) error
    76  	ExpectCreations(controllerKey string, adds int) error
    77  	ExpectDeletions(controllerKey string, dels int) error
    78  	CreationObserved(controllerKey string)
    79  	DeletionObserved(controllerKey string)
    80  	RaiseExpectations(controllerKey string, add, del int)
    81  	LowerExpectations(controllerKey string, add, del int)
    82  }
    83  
    84  // ControllerExpectations is a cache mapping controllers to what they expect to see before being woken up for a sync.
    85  type ControllerExpectations struct {
    86  	cache.Store
    87  }
    88  
    89  // GetExpectations returns the ControlleeExpectations of the given controller.
    90  func (r *ControllerExpectations) GetExpectations(controllerKey string) (*ControlleeExpectations, bool, error) {
    91  	exp, exists, err := r.GetByKey(controllerKey)
    92  	if err == nil && exists {
    93  		return exp.(*ControlleeExpectations), true, nil
    94  	}
    95  	return nil, false, err
    96  }
    97  
    98  // DeleteExpectations deletes the expectations of the given controller from the TTLStore.
    99  func (r *ControllerExpectations) DeleteExpectations(controllerKey string) {
   100  	if exp, exists, err := r.GetByKey(controllerKey); err == nil && exists {
   101  		if err := r.Delete(exp); err != nil {
   102  			log.Debugf("Error deleting expectations for controller %v: %v", controllerKey, err)
   103  		}
   104  	}
   105  }
   106  
   107  // SatisfiedExpectations returns true if the required adds/dels for the given controller have been observed.
   108  // Add/del counts are established by the controller at sync time, and updated as controllees are observed by the controller
   109  // manager.
   110  func (r *ControllerExpectations) SatisfiedExpectations(controllerKey string) bool {
   111  	if exp, exists, err := r.GetExpectations(controllerKey); exists {
   112  		if exp.Fulfilled() {
   113  			log.Debugf("Controller expectations fulfilled %#v", exp)
   114  			return true
   115  		} else if exp.isExpired() {
   116  			log.Debugf("Controller expectations expired %#v", exp)
   117  			return true
   118  		} else {
   119  			log.Debugf("Controller still waiting on expectations %#v", exp)
   120  			return false
   121  		}
   122  	} else if err != nil {
   123  		log.Debugf("Error encountered while checking expectations %#v, forcing sync", err)
   124  	} else {
   125  		// When a new controller is created, it doesn't have expectations.
   126  		// When it doesn't see expected watch events for > TTL, the expectations expire.
   127  		//	- In this case it wakes up, creates/deletes controllees, and sets expectations again.
   128  		// When it has satisfied expectations and no controllees need to be created/destroyed > TTL, the expectations expire.
   129  		//	- In this case it continues without setting expectations till it needs to create/delete controllees.
   130  		log.Debugf("Controller %v either never recorded expectations, or the ttl expired.", controllerKey)
   131  	}
   132  	// Trigger a sync if we either encountered and error (which shouldn't happen since we're
   133  	// getting from local store) or this controller hasn't established expectations.
   134  	return true
   135  }
   136  
   137  // TODO: Extend ExpirationCache to support explicit expiration.
   138  // TODO: Make this possible to disable in tests.
   139  // TODO: Support injection of clock.
   140  func (exp *ControlleeExpectations) isExpired() bool {
   141  	return clock.RealClock{}.Since(exp.timestamp) > ExpectationsTimeout
   142  }
   143  
   144  // SetExpectations registers new expectations for the given controller. Forgets existing expectations.
   145  func (r *ControllerExpectations) SetExpectations(controllerKey string, add, del int) error {
   146  	exp := &ControlleeExpectations{add: int64(add), del: int64(del), key: controllerKey, timestamp: clock.RealClock{}.Now()}
   147  	log.Debugf("Setting expectations %#v", exp)
   148  	return r.Add(exp)
   149  }
   150  
   151  func (r *ControllerExpectations) ExpectCreations(controllerKey string, adds int) error {
   152  	return r.SetExpectations(controllerKey, adds, 0)
   153  }
   154  
   155  func (r *ControllerExpectations) ExpectDeletions(controllerKey string, dels int) error {
   156  	return r.SetExpectations(controllerKey, 0, dels)
   157  }
   158  
   159  // Decrements the expectation counts of the given controller.
   160  func (r *ControllerExpectations) LowerExpectations(controllerKey string, add, del int) {
   161  	if exp, exists, err := r.GetExpectations(controllerKey); err == nil && exists {
   162  		exp.Add(int64(-add), int64(-del))
   163  		// The expectations might've been modified since the update on the previous line.
   164  		log.Debugf("Lowered expectations %#v", exp)
   165  	}
   166  }
   167  
   168  // Increments the expectation counts of the given controller.
   169  func (r *ControllerExpectations) RaiseExpectations(controllerKey string, add, del int) {
   170  	if exp, exists, err := r.GetExpectations(controllerKey); err == nil && exists {
   171  		exp.Add(int64(add), int64(del))
   172  		// The expectations might've been modified since the update on the previous line.
   173  		log.Debugf("Raised expectations %#v", exp)
   174  	}
   175  }
   176  
   177  // CreationObserved atomically decrements the `add` expectation count of the given controller.
   178  func (r *ControllerExpectations) CreationObserved(controllerKey string) {
   179  	r.LowerExpectations(controllerKey, 1, 0)
   180  }
   181  
   182  // DeletionObserved atomically decrements the `del` expectation count of the given controller.
   183  func (r *ControllerExpectations) DeletionObserved(controllerKey string) {
   184  	r.LowerExpectations(controllerKey, 0, 1)
   185  }
   186  
   187  // Expectations are either fulfilled, or expire naturally.
   188  type Expectations interface {
   189  	Fulfilled() bool
   190  }
   191  
   192  // ControlleeExpectations track controllee creates/deletes.
   193  type ControlleeExpectations struct {
   194  	// Important: Since these two int64 fields are using sync/atomic, they have to be at the top of the struct due to a bug on 32-bit platforms
   195  	// See: https://golang.org/pkg/sync/atomic/ for more information
   196  	add       int64
   197  	del       int64
   198  	key       string
   199  	timestamp time.Time
   200  }
   201  
   202  // Add increments the add and del counters.
   203  func (e *ControlleeExpectations) Add(add, del int64) {
   204  	atomic.AddInt64(&e.add, add)
   205  	atomic.AddInt64(&e.del, del)
   206  }
   207  
   208  // Fulfilled returns true if this expectation has been fulfilled.
   209  func (e *ControlleeExpectations) Fulfilled() bool {
   210  	// TODO: think about why this line being atomic doesn't matter
   211  	return atomic.LoadInt64(&e.add) <= 0 && atomic.LoadInt64(&e.del) <= 0
   212  }
   213  
   214  // GetExpectations returns the add and del expectations of the controllee.
   215  func (e *ControlleeExpectations) GetExpectations() (int64, int64) {
   216  	return atomic.LoadInt64(&e.add), atomic.LoadInt64(&e.del)
   217  }
   218  
   219  // NewControllerExpectations returns a store for ControllerExpectations.
   220  func NewControllerExpectations() *ControllerExpectations {
   221  	return &ControllerExpectations{cache.NewStore(ExpKeyFunc)}
   222  }