github.com/cilium/cilium@v1.16.2/pkg/k8s/synced/resources.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package synced
     5  
     6  import (
     7  	"fmt"
     8  
     9  	"golang.org/x/sync/errgroup"
    10  	"k8s.io/client-go/tools/cache"
    11  
    12  	"github.com/cilium/cilium/pkg/inctimer"
    13  	"github.com/cilium/cilium/pkg/lock"
    14  	"github.com/cilium/cilium/pkg/time"
    15  )
    16  
    17  // Resources maps resource names to channels that are closed upon initial
    18  // sync with k8s.
    19  type Resources struct {
    20  	CacheStatus CacheStatus
    21  
    22  	lock.RWMutex
    23  	// resourceChannels maps a resource name to a channel. Once the given
    24  	// resource name is synchronized with k8s, the channel for which that
    25  	// resource name maps to is closed.
    26  	resources map[string]<-chan struct{}
    27  	// stopWait contains the result of cache.WaitForCacheSync
    28  	stopWait map[string]bool
    29  
    30  	// timeSinceLastEvent contains the time each resource last received an event.
    31  	timeSinceLastEvent map[string]time.Time
    32  }
    33  
    34  func (r *Resources) getTimeOfLastEvent(resource string) (when time.Time, never bool) {
    35  	r.RLock()
    36  	defer r.RUnlock()
    37  	t, ok := r.timeSinceLastEvent[resource]
    38  	if !ok {
    39  		return time.Time{}, true
    40  	}
    41  	return t, false
    42  }
    43  
    44  func (r *Resources) SetEventTimestamp(resource string) {
    45  	now := time.Now()
    46  	r.Lock()
    47  	defer r.Unlock()
    48  	if r.timeSinceLastEvent != nil {
    49  		r.timeSinceLastEvent[resource] = now
    50  	}
    51  }
    52  
    53  func (r *Resources) CancelWaitGroupToSyncResources(resourceName string) {
    54  	r.Lock()
    55  	delete(r.resources, resourceName)
    56  	r.Unlock()
    57  }
    58  
    59  // BlockWaitGroupToSyncResources ensures that anything which waits on waitGroup
    60  // waits until all objects of the specified resource stored in Kubernetes are
    61  // received by the informer and processed by controller.
    62  // Fatally exits if syncing these initial objects fails.
    63  // If the given stop channel is closed, it does not fatal.
    64  // Once the k8s caches are synced against k8s, k8sCacheSynced is also closed.
    65  func (r *Resources) BlockWaitGroupToSyncResources(
    66  	stop <-chan struct{},
    67  	swg *lock.StoppableWaitGroup,
    68  	hasSyncedFunc cache.InformerSynced,
    69  	resourceName string,
    70  ) {
    71  	// Log an error caches have already synchronized, as the caller is making this call too late
    72  	// and the resource in question was missed in the initial cache sync.
    73  	if r.CacheStatus.Synchronized() {
    74  		log.WithField("kubernetesResource", resourceName).Errorf("BlockWaitGroupToSyncResources called after Caches have already synced")
    75  		return
    76  	}
    77  	ch := make(chan struct{})
    78  	r.Lock()
    79  	if r.resources == nil {
    80  		r.resources = make(map[string]<-chan struct{})
    81  		r.stopWait = make(map[string]bool)
    82  		r.timeSinceLastEvent = make(map[string]time.Time)
    83  	}
    84  	r.resources[resourceName] = ch
    85  	r.Unlock()
    86  
    87  	go func() {
    88  		scopedLog := log.WithField("kubernetesResource", resourceName)
    89  		scopedLog.Debug("waiting for cache to synchronize")
    90  		if ok := cache.WaitForCacheSync(stop, hasSyncedFunc); !ok {
    91  			select {
    92  			case <-stop:
    93  				// do not fatal if the channel was stopped
    94  				scopedLog.Debug("canceled cache synchronization")
    95  				r.Lock()
    96  				// Since the wait for cache sync was canceled we
    97  				// need to mark that stopWait was canceled and it
    98  				// should not stop waiting for this resource to be
    99  				// synchronized.
   100  				r.stopWait[resourceName] = false
   101  				r.Unlock()
   102  			default:
   103  				// Fatally exit it resource fails to sync
   104  				scopedLog.Fatalf("failed to wait for cache to sync")
   105  			}
   106  		} else {
   107  			scopedLog.Debug("cache synced")
   108  			r.Lock()
   109  			// Since the wait for cache sync was not canceled we need to
   110  			// mark that stopWait not canceled and it should stop
   111  			// waiting for this resource to be synchronized.
   112  			r.stopWait[resourceName] = true
   113  			r.Unlock()
   114  		}
   115  		if swg != nil {
   116  			swg.Stop()
   117  			swg.Wait()
   118  		}
   119  		close(ch)
   120  	}()
   121  }
   122  
   123  // WaitForCacheSync waits for all K8s resources represented by
   124  // resourceNames to have their K8s caches synchronized.
   125  func (r *Resources) WaitForCacheSync(resourceNames ...string) {
   126  	for _, resourceName := range resourceNames {
   127  		r.RLock()
   128  		c, ok := r.resources[resourceName]
   129  		r.RUnlock()
   130  		if !ok {
   131  			continue
   132  		}
   133  		for {
   134  			scopedLog := log.WithField("kubernetesResource", resourceName)
   135  			<-c
   136  			r.RLock()
   137  			stopWait := r.stopWait[resourceName]
   138  			r.RUnlock()
   139  			if stopWait {
   140  				scopedLog.Debug("stopped waiting for caches to be synced")
   141  				break
   142  			}
   143  			scopedLog.Debug("original cache sync operation was aborted, waiting for caches to be synced with a new channel...")
   144  			time.Sleep(syncedPollPeriod)
   145  			r.RLock()
   146  			c, ok = r.resources[resourceName]
   147  			r.RUnlock()
   148  			if !ok {
   149  				break
   150  			}
   151  		}
   152  	}
   153  }
   154  
   155  // poll period for underlying client-go wait for cache sync.
   156  const syncedPollPeriod = 100 * time.Millisecond
   157  
   158  // WaitForCacheSyncWithTimeout waits for K8s resources represented by resourceNames to be synced.
   159  // For every resource type, if an event happens after starting the wait, the timeout will be pushed out
   160  // to be the time of the last event plus the timeout duration.
   161  func (r *Resources) WaitForCacheSyncWithTimeout(timeout time.Duration, resourceNames ...string) error {
   162  	// Upon completion, release event map to reduce unnecessary memory usage.
   163  	// SetEventTimestamp calls to nil event time map are no-op.
   164  	// Running BlockWaitGroupToSyncResources will reinitialize the event map.
   165  	defer func() {
   166  		r.Lock()
   167  		r.timeSinceLastEvent = nil
   168  		r.Unlock()
   169  	}()
   170  
   171  	wg := &errgroup.Group{}
   172  	for _, resource := range resourceNames {
   173  		done := make(chan struct{})
   174  		go func(resource string) {
   175  			r.WaitForCacheSync(resource)
   176  			close(done)
   177  		}(resource)
   178  
   179  		waitFn := func(resource string) func() error {
   180  			return func() error {
   181  				currTimeout := timeout + syncedPollPeriod // add buffer of the poll period.
   182  
   183  				for {
   184  					// Wait until after timeout ends or sync is completed.
   185  					// If timeout is reached, check if an event occurred that would
   186  					// have pushed back the timeout and wait for that amount of time.
   187  					select {
   188  					case now := <-inctimer.After(currTimeout):
   189  						lastEvent, never := r.getTimeOfLastEvent(resource)
   190  						if never {
   191  							return fmt.Errorf("timed out after %s, never received event for resource %q", timeout, resource)
   192  						}
   193  						if now.After(lastEvent.Add(timeout)) {
   194  							return fmt.Errorf("timed out after %s since receiving last event for resource %q", timeout, resource)
   195  						}
   196  						// We reset the timer to wait the timeout period minus the
   197  						// time since the last event.
   198  						currTimeout = timeout - time.Since(lastEvent)
   199  						log.Debugf("resource %q received event %s ago, waiting for additional %s before timing out", resource, time.Since(lastEvent), currTimeout)
   200  					case <-done:
   201  						log.Debugf("resource %q cache has synced, stopping timeout watcher", resource)
   202  						return nil
   203  					}
   204  				}
   205  			}
   206  		}(resource)
   207  
   208  		wg.Go(waitFn)
   209  	}
   210  
   211  	return wg.Wait()
   212  }