github.com/cilium/cilium@v1.16.2/pkg/k8s/synced/resources.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package synced 5 6 import ( 7 "fmt" 8 9 "golang.org/x/sync/errgroup" 10 "k8s.io/client-go/tools/cache" 11 12 "github.com/cilium/cilium/pkg/inctimer" 13 "github.com/cilium/cilium/pkg/lock" 14 "github.com/cilium/cilium/pkg/time" 15 ) 16 17 // Resources maps resource names to channels that are closed upon initial 18 // sync with k8s. 19 type Resources struct { 20 CacheStatus CacheStatus 21 22 lock.RWMutex 23 // resourceChannels maps a resource name to a channel. Once the given 24 // resource name is synchronized with k8s, the channel for which that 25 // resource name maps to is closed. 26 resources map[string]<-chan struct{} 27 // stopWait contains the result of cache.WaitForCacheSync 28 stopWait map[string]bool 29 30 // timeSinceLastEvent contains the time each resource last received an event. 31 timeSinceLastEvent map[string]time.Time 32 } 33 34 func (r *Resources) getTimeOfLastEvent(resource string) (when time.Time, never bool) { 35 r.RLock() 36 defer r.RUnlock() 37 t, ok := r.timeSinceLastEvent[resource] 38 if !ok { 39 return time.Time{}, true 40 } 41 return t, false 42 } 43 44 func (r *Resources) SetEventTimestamp(resource string) { 45 now := time.Now() 46 r.Lock() 47 defer r.Unlock() 48 if r.timeSinceLastEvent != nil { 49 r.timeSinceLastEvent[resource] = now 50 } 51 } 52 53 func (r *Resources) CancelWaitGroupToSyncResources(resourceName string) { 54 r.Lock() 55 delete(r.resources, resourceName) 56 r.Unlock() 57 } 58 59 // BlockWaitGroupToSyncResources ensures that anything which waits on waitGroup 60 // waits until all objects of the specified resource stored in Kubernetes are 61 // received by the informer and processed by controller. 62 // Fatally exits if syncing these initial objects fails. 63 // If the given stop channel is closed, it does not fatal. 64 // Once the k8s caches are synced against k8s, k8sCacheSynced is also closed. 65 func (r *Resources) BlockWaitGroupToSyncResources( 66 stop <-chan struct{}, 67 swg *lock.StoppableWaitGroup, 68 hasSyncedFunc cache.InformerSynced, 69 resourceName string, 70 ) { 71 // Log an error caches have already synchronized, as the caller is making this call too late 72 // and the resource in question was missed in the initial cache sync. 73 if r.CacheStatus.Synchronized() { 74 log.WithField("kubernetesResource", resourceName).Errorf("BlockWaitGroupToSyncResources called after Caches have already synced") 75 return 76 } 77 ch := make(chan struct{}) 78 r.Lock() 79 if r.resources == nil { 80 r.resources = make(map[string]<-chan struct{}) 81 r.stopWait = make(map[string]bool) 82 r.timeSinceLastEvent = make(map[string]time.Time) 83 } 84 r.resources[resourceName] = ch 85 r.Unlock() 86 87 go func() { 88 scopedLog := log.WithField("kubernetesResource", resourceName) 89 scopedLog.Debug("waiting for cache to synchronize") 90 if ok := cache.WaitForCacheSync(stop, hasSyncedFunc); !ok { 91 select { 92 case <-stop: 93 // do not fatal if the channel was stopped 94 scopedLog.Debug("canceled cache synchronization") 95 r.Lock() 96 // Since the wait for cache sync was canceled we 97 // need to mark that stopWait was canceled and it 98 // should not stop waiting for this resource to be 99 // synchronized. 100 r.stopWait[resourceName] = false 101 r.Unlock() 102 default: 103 // Fatally exit it resource fails to sync 104 scopedLog.Fatalf("failed to wait for cache to sync") 105 } 106 } else { 107 scopedLog.Debug("cache synced") 108 r.Lock() 109 // Since the wait for cache sync was not canceled we need to 110 // mark that stopWait not canceled and it should stop 111 // waiting for this resource to be synchronized. 112 r.stopWait[resourceName] = true 113 r.Unlock() 114 } 115 if swg != nil { 116 swg.Stop() 117 swg.Wait() 118 } 119 close(ch) 120 }() 121 } 122 123 // WaitForCacheSync waits for all K8s resources represented by 124 // resourceNames to have their K8s caches synchronized. 125 func (r *Resources) WaitForCacheSync(resourceNames ...string) { 126 for _, resourceName := range resourceNames { 127 r.RLock() 128 c, ok := r.resources[resourceName] 129 r.RUnlock() 130 if !ok { 131 continue 132 } 133 for { 134 scopedLog := log.WithField("kubernetesResource", resourceName) 135 <-c 136 r.RLock() 137 stopWait := r.stopWait[resourceName] 138 r.RUnlock() 139 if stopWait { 140 scopedLog.Debug("stopped waiting for caches to be synced") 141 break 142 } 143 scopedLog.Debug("original cache sync operation was aborted, waiting for caches to be synced with a new channel...") 144 time.Sleep(syncedPollPeriod) 145 r.RLock() 146 c, ok = r.resources[resourceName] 147 r.RUnlock() 148 if !ok { 149 break 150 } 151 } 152 } 153 } 154 155 // poll period for underlying client-go wait for cache sync. 156 const syncedPollPeriod = 100 * time.Millisecond 157 158 // WaitForCacheSyncWithTimeout waits for K8s resources represented by resourceNames to be synced. 159 // For every resource type, if an event happens after starting the wait, the timeout will be pushed out 160 // to be the time of the last event plus the timeout duration. 161 func (r *Resources) WaitForCacheSyncWithTimeout(timeout time.Duration, resourceNames ...string) error { 162 // Upon completion, release event map to reduce unnecessary memory usage. 163 // SetEventTimestamp calls to nil event time map are no-op. 164 // Running BlockWaitGroupToSyncResources will reinitialize the event map. 165 defer func() { 166 r.Lock() 167 r.timeSinceLastEvent = nil 168 r.Unlock() 169 }() 170 171 wg := &errgroup.Group{} 172 for _, resource := range resourceNames { 173 done := make(chan struct{}) 174 go func(resource string) { 175 r.WaitForCacheSync(resource) 176 close(done) 177 }(resource) 178 179 waitFn := func(resource string) func() error { 180 return func() error { 181 currTimeout := timeout + syncedPollPeriod // add buffer of the poll period. 182 183 for { 184 // Wait until after timeout ends or sync is completed. 185 // If timeout is reached, check if an event occurred that would 186 // have pushed back the timeout and wait for that amount of time. 187 select { 188 case now := <-inctimer.After(currTimeout): 189 lastEvent, never := r.getTimeOfLastEvent(resource) 190 if never { 191 return fmt.Errorf("timed out after %s, never received event for resource %q", timeout, resource) 192 } 193 if now.After(lastEvent.Add(timeout)) { 194 return fmt.Errorf("timed out after %s since receiving last event for resource %q", timeout, resource) 195 } 196 // We reset the timer to wait the timeout period minus the 197 // time since the last event. 198 currTimeout = timeout - time.Since(lastEvent) 199 log.Debugf("resource %q received event %s ago, waiting for additional %s before timing out", resource, time.Since(lastEvent), currTimeout) 200 case <-done: 201 log.Debugf("resource %q cache has synced, stopping timeout watcher", resource) 202 return nil 203 } 204 } 205 } 206 }(resource) 207 208 wg.Go(waitFn) 209 } 210 211 return wg.Wait() 212 }