k8s.io/client-go@v0.31.1/tools/cache/reflector.go (about) 1 /* 2 Copyright 2014 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package cache 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "io" 24 "math/rand" 25 "reflect" 26 "strings" 27 "sync" 28 "time" 29 30 apierrors "k8s.io/apimachinery/pkg/api/errors" 31 "k8s.io/apimachinery/pkg/api/meta" 32 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 33 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 34 "k8s.io/apimachinery/pkg/runtime" 35 "k8s.io/apimachinery/pkg/runtime/schema" 36 "k8s.io/apimachinery/pkg/util/naming" 37 utilnet "k8s.io/apimachinery/pkg/util/net" 38 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 39 "k8s.io/apimachinery/pkg/util/wait" 40 "k8s.io/apimachinery/pkg/watch" 41 clientfeatures "k8s.io/client-go/features" 42 "k8s.io/client-go/tools/pager" 43 "k8s.io/klog/v2" 44 "k8s.io/utils/clock" 45 "k8s.io/utils/pointer" 46 "k8s.io/utils/ptr" 47 "k8s.io/utils/trace" 48 ) 49 50 const defaultExpectedTypeName = "<unspecified>" 51 52 var ( 53 // We try to spread the load on apiserver by setting timeouts for 54 // watch requests - it is random in [minWatchTimeout, 2*minWatchTimeout]. 55 defaultMinWatchTimeout = 5 * time.Minute 56 ) 57 58 // Reflector watches a specified resource and causes all changes to be reflected in the given store. 59 type Reflector struct { 60 // name identifies this reflector. By default it will be a file:line if possible. 61 name string 62 // The name of the type we expect to place in the store. The name 63 // will be the stringification of expectedGVK if provided, and the 64 // stringification of expectedType otherwise. It is for display 65 // only, and should not be used for parsing or comparison. 66 typeDescription string 67 // An example object of the type we expect to place in the store. 68 // Only the type needs to be right, except that when that is 69 // `unstructured.Unstructured` the object's `"apiVersion"` and 70 // `"kind"` must also be right. 71 expectedType reflect.Type 72 // The GVK of the object we expect to place in the store if unstructured. 73 expectedGVK *schema.GroupVersionKind 74 // The destination to sync up with the watch source 75 store Store 76 // listerWatcher is used to perform lists and watches. 77 listerWatcher ListerWatcher 78 // backoff manages backoff of ListWatch 79 backoffManager wait.BackoffManager 80 resyncPeriod time.Duration 81 // minWatchTimeout defines the minimum timeout for watch requests. 82 minWatchTimeout time.Duration 83 // clock allows tests to manipulate time 84 clock clock.Clock 85 // paginatedResult defines whether pagination should be forced for list calls. 86 // It is set based on the result of the initial list call. 87 paginatedResult bool 88 // lastSyncResourceVersion is the resource version token last 89 // observed when doing a sync with the underlying store 90 // it is thread safe, but not synchronized with the underlying store 91 lastSyncResourceVersion string 92 // isLastSyncResourceVersionUnavailable is true if the previous list or watch request with 93 // lastSyncResourceVersion failed with an "expired" or "too large resource version" error. 94 isLastSyncResourceVersionUnavailable bool 95 // lastSyncResourceVersionMutex guards read/write access to lastSyncResourceVersion 96 lastSyncResourceVersionMutex sync.RWMutex 97 // Called whenever the ListAndWatch drops the connection with an error. 98 watchErrorHandler WatchErrorHandler 99 // WatchListPageSize is the requested chunk size of initial and resync watch lists. 100 // If unset, for consistent reads (RV="") or reads that opt-into arbitrarily old data 101 // (RV="0") it will default to pager.PageSize, for the rest (RV != "" && RV != "0") 102 // it will turn off pagination to allow serving them from watch cache. 103 // NOTE: It should be used carefully as paginated lists are always served directly from 104 // etcd, which is significantly less efficient and may lead to serious performance and 105 // scalability problems. 106 WatchListPageSize int64 107 // ShouldResync is invoked periodically and whenever it returns `true` the Store's Resync operation is invoked 108 ShouldResync func() bool 109 // MaxInternalErrorRetryDuration defines how long we should retry internal errors returned by watch. 110 MaxInternalErrorRetryDuration time.Duration 111 // UseWatchList if turned on instructs the reflector to open a stream to bring data from the API server. 112 // Streaming has the primary advantage of using fewer server's resources to fetch data. 113 // 114 // The old behaviour establishes a LIST request which gets data in chunks. 115 // Paginated list is less efficient and depending on the actual size of objects 116 // might result in an increased memory consumption of the APIServer. 117 // 118 // See https://github.com/kubernetes/enhancements/tree/master/keps/sig-api-machinery/3157-watch-list#design-details 119 // 120 // TODO(#115478): Consider making reflector.UseWatchList a private field. Since we implemented "api streaming" on the etcd storage layer it should work. 121 UseWatchList *bool 122 } 123 124 // ResourceVersionUpdater is an interface that allows store implementation to 125 // track the current resource version of the reflector. This is especially 126 // important if storage bookmarks are enabled. 127 type ResourceVersionUpdater interface { 128 // UpdateResourceVersion is called each time current resource version of the reflector 129 // is updated. 130 UpdateResourceVersion(resourceVersion string) 131 } 132 133 // The WatchErrorHandler is called whenever ListAndWatch drops the 134 // connection with an error. After calling this handler, the informer 135 // will backoff and retry. 136 // 137 // The default implementation looks at the error type and tries to log 138 // the error message at an appropriate level. 139 // 140 // Implementations of this handler may display the error message in other 141 // ways. Implementations should return quickly - any expensive processing 142 // should be offloaded. 143 type WatchErrorHandler func(r *Reflector, err error) 144 145 // DefaultWatchErrorHandler is the default implementation of WatchErrorHandler 146 func DefaultWatchErrorHandler(r *Reflector, err error) { 147 switch { 148 case isExpiredError(err): 149 // Don't set LastSyncResourceVersionUnavailable - LIST call with ResourceVersion=RV already 150 // has a semantic that it returns data at least as fresh as provided RV. 151 // So first try to LIST with setting RV to resource version of last observed object. 152 klog.V(4).Infof("%s: watch of %v closed with: %v", r.name, r.typeDescription, err) 153 case err == io.EOF: 154 // watch closed normally 155 case err == io.ErrUnexpectedEOF: 156 klog.V(1).Infof("%s: Watch for %v closed with unexpected EOF: %v", r.name, r.typeDescription, err) 157 default: 158 utilruntime.HandleError(fmt.Errorf("%s: Failed to watch %v: %v", r.name, r.typeDescription, err)) 159 } 160 } 161 162 // NewNamespaceKeyedIndexerAndReflector creates an Indexer and a Reflector 163 // The indexer is configured to key on namespace 164 func NewNamespaceKeyedIndexerAndReflector(lw ListerWatcher, expectedType interface{}, resyncPeriod time.Duration) (indexer Indexer, reflector *Reflector) { 165 indexer = NewIndexer(MetaNamespaceKeyFunc, Indexers{NamespaceIndex: MetaNamespaceIndexFunc}) 166 reflector = NewReflector(lw, expectedType, indexer, resyncPeriod) 167 return indexer, reflector 168 } 169 170 // NewReflector creates a new Reflector with its name defaulted to the closest source_file.go:line in the call stack 171 // that is outside this package. See NewReflectorWithOptions for further information. 172 func NewReflector(lw ListerWatcher, expectedType interface{}, store Store, resyncPeriod time.Duration) *Reflector { 173 return NewReflectorWithOptions(lw, expectedType, store, ReflectorOptions{ResyncPeriod: resyncPeriod}) 174 } 175 176 // NewNamedReflector creates a new Reflector with the specified name. See NewReflectorWithOptions for further 177 // information. 178 func NewNamedReflector(name string, lw ListerWatcher, expectedType interface{}, store Store, resyncPeriod time.Duration) *Reflector { 179 return NewReflectorWithOptions(lw, expectedType, store, ReflectorOptions{Name: name, ResyncPeriod: resyncPeriod}) 180 } 181 182 // ReflectorOptions configures a Reflector. 183 type ReflectorOptions struct { 184 // Name is the Reflector's name. If unset/unspecified, the name defaults to the closest source_file.go:line 185 // in the call stack that is outside this package. 186 Name string 187 188 // TypeDescription is the Reflector's type description. If unset/unspecified, the type description is defaulted 189 // using the following rules: if the expectedType passed to NewReflectorWithOptions was nil, the type description is 190 // "<unspecified>". If the expectedType is an instance of *unstructured.Unstructured and its apiVersion and kind fields 191 // are set, the type description is the string encoding of those. Otherwise, the type description is set to the 192 // go type of expectedType.. 193 TypeDescription string 194 195 // ResyncPeriod is the Reflector's resync period. If unset/unspecified, the resync period defaults to 0 196 // (do not resync). 197 ResyncPeriod time.Duration 198 199 // MinWatchTimeout, if non-zero, defines the minimum timeout for watch requests send to kube-apiserver. 200 // However, values lower than 5m will not be honored to avoid negative performance impact on controlplane. 201 MinWatchTimeout time.Duration 202 203 // Clock allows tests to control time. If unset defaults to clock.RealClock{} 204 Clock clock.Clock 205 } 206 207 // NewReflectorWithOptions creates a new Reflector object which will keep the 208 // given store up to date with the server's contents for the given 209 // resource. Reflector promises to only put things in the store that 210 // have the type of expectedType, unless expectedType is nil. If 211 // resyncPeriod is non-zero, then the reflector will periodically 212 // consult its ShouldResync function to determine whether to invoke 213 // the Store's Resync operation; `ShouldResync==nil` means always 214 // "yes". This enables you to use reflectors to periodically process 215 // everything as well as incrementally processing the things that 216 // change. 217 func NewReflectorWithOptions(lw ListerWatcher, expectedType interface{}, store Store, options ReflectorOptions) *Reflector { 218 reflectorClock := options.Clock 219 if reflectorClock == nil { 220 reflectorClock = clock.RealClock{} 221 } 222 minWatchTimeout := defaultMinWatchTimeout 223 if options.MinWatchTimeout > defaultMinWatchTimeout { 224 minWatchTimeout = options.MinWatchTimeout 225 } 226 r := &Reflector{ 227 name: options.Name, 228 resyncPeriod: options.ResyncPeriod, 229 minWatchTimeout: minWatchTimeout, 230 typeDescription: options.TypeDescription, 231 listerWatcher: lw, 232 store: store, 233 // We used to make the call every 1sec (1 QPS), the goal here is to achieve ~98% traffic reduction when 234 // API server is not healthy. With these parameters, backoff will stop at [30,60) sec interval which is 235 // 0.22 QPS. If we don't backoff for 2min, assume API server is healthy and we reset the backoff. 236 backoffManager: wait.NewExponentialBackoffManager(800*time.Millisecond, 30*time.Second, 2*time.Minute, 2.0, 1.0, reflectorClock), 237 clock: reflectorClock, 238 watchErrorHandler: WatchErrorHandler(DefaultWatchErrorHandler), 239 expectedType: reflect.TypeOf(expectedType), 240 } 241 242 if r.name == "" { 243 r.name = naming.GetNameFromCallsite(internalPackages...) 244 } 245 246 if r.typeDescription == "" { 247 r.typeDescription = getTypeDescriptionFromObject(expectedType) 248 } 249 250 if r.expectedGVK == nil { 251 r.expectedGVK = getExpectedGVKFromObject(expectedType) 252 } 253 254 // don't overwrite UseWatchList if already set 255 // because the higher layers (e.g. storage/cacher) disabled it on purpose 256 if r.UseWatchList == nil { 257 r.UseWatchList = ptr.To(clientfeatures.FeatureGates().Enabled(clientfeatures.WatchListClient)) 258 } 259 260 return r 261 } 262 263 func getTypeDescriptionFromObject(expectedType interface{}) string { 264 if expectedType == nil { 265 return defaultExpectedTypeName 266 } 267 268 reflectDescription := reflect.TypeOf(expectedType).String() 269 270 obj, ok := expectedType.(*unstructured.Unstructured) 271 if !ok { 272 return reflectDescription 273 } 274 275 gvk := obj.GroupVersionKind() 276 if gvk.Empty() { 277 return reflectDescription 278 } 279 280 return gvk.String() 281 } 282 283 func getExpectedGVKFromObject(expectedType interface{}) *schema.GroupVersionKind { 284 obj, ok := expectedType.(*unstructured.Unstructured) 285 if !ok { 286 return nil 287 } 288 289 gvk := obj.GroupVersionKind() 290 if gvk.Empty() { 291 return nil 292 } 293 294 return &gvk 295 } 296 297 // internalPackages are packages that ignored when creating a default reflector name. These packages are in the common 298 // call chains to NewReflector, so they'd be low entropy names for reflectors 299 var internalPackages = []string{"client-go/tools/cache/"} 300 301 // Run repeatedly uses the reflector's ListAndWatch to fetch all the 302 // objects and subsequent deltas. 303 // Run will exit when stopCh is closed. 304 func (r *Reflector) Run(stopCh <-chan struct{}) { 305 klog.V(3).Infof("Starting reflector %s (%s) from %s", r.typeDescription, r.resyncPeriod, r.name) 306 wait.BackoffUntil(func() { 307 if err := r.ListAndWatch(stopCh); err != nil { 308 r.watchErrorHandler(r, err) 309 } 310 }, r.backoffManager, true, stopCh) 311 klog.V(3).Infof("Stopping reflector %s (%s) from %s", r.typeDescription, r.resyncPeriod, r.name) 312 } 313 314 var ( 315 // nothing will ever be sent down this channel 316 neverExitWatch <-chan time.Time = make(chan time.Time) 317 318 // Used to indicate that watching stopped because of a signal from the stop 319 // channel passed in from a client of the reflector. 320 errorStopRequested = errors.New("stop requested") 321 ) 322 323 // resyncChan returns a channel which will receive something when a resync is 324 // required, and a cleanup function. 325 func (r *Reflector) resyncChan() (<-chan time.Time, func() bool) { 326 if r.resyncPeriod == 0 { 327 return neverExitWatch, func() bool { return false } 328 } 329 // The cleanup function is required: imagine the scenario where watches 330 // always fail so we end up listing frequently. Then, if we don't 331 // manually stop the timer, we could end up with many timers active 332 // concurrently. 333 t := r.clock.NewTimer(r.resyncPeriod) 334 return t.C(), t.Stop 335 } 336 337 // ListAndWatch first lists all items and get the resource version at the moment of call, 338 // and then use the resource version to watch. 339 // It returns error if ListAndWatch didn't even try to initialize watch. 340 func (r *Reflector) ListAndWatch(stopCh <-chan struct{}) error { 341 klog.V(3).Infof("Listing and watching %v from %s", r.typeDescription, r.name) 342 var err error 343 var w watch.Interface 344 useWatchList := ptr.Deref(r.UseWatchList, false) 345 fallbackToList := !useWatchList 346 347 if useWatchList { 348 w, err = r.watchList(stopCh) 349 if w == nil && err == nil { 350 // stopCh was closed 351 return nil 352 } 353 if err != nil { 354 klog.Warningf("The watchlist request ended with an error, falling back to the standard LIST/WATCH semantics because making progress is better than deadlocking, err = %v", err) 355 fallbackToList = true 356 // ensure that we won't accidentally pass some garbage down the watch. 357 w = nil 358 } 359 } 360 361 if fallbackToList { 362 err = r.list(stopCh) 363 if err != nil { 364 return err 365 } 366 } 367 368 klog.V(2).Infof("Caches populated for %v from %s", r.typeDescription, r.name) 369 return r.watchWithResync(w, stopCh) 370 } 371 372 // startResync periodically calls r.store.Resync() method. 373 // Note that this method is blocking and should be 374 // called in a separate goroutine. 375 func (r *Reflector) startResync(stopCh <-chan struct{}, cancelCh <-chan struct{}, resyncerrc chan error) { 376 resyncCh, cleanup := r.resyncChan() 377 defer func() { 378 cleanup() // Call the last one written into cleanup 379 }() 380 for { 381 select { 382 case <-resyncCh: 383 case <-stopCh: 384 return 385 case <-cancelCh: 386 return 387 } 388 if r.ShouldResync == nil || r.ShouldResync() { 389 klog.V(4).Infof("%s: forcing resync", r.name) 390 if err := r.store.Resync(); err != nil { 391 resyncerrc <- err 392 return 393 } 394 } 395 cleanup() 396 resyncCh, cleanup = r.resyncChan() 397 } 398 } 399 400 // watchWithResync runs watch with startResync in the background. 401 func (r *Reflector) watchWithResync(w watch.Interface, stopCh <-chan struct{}) error { 402 resyncerrc := make(chan error, 1) 403 cancelCh := make(chan struct{}) 404 defer close(cancelCh) 405 go r.startResync(stopCh, cancelCh, resyncerrc) 406 return r.watch(w, stopCh, resyncerrc) 407 } 408 409 // watch simply starts a watch request with the server. 410 func (r *Reflector) watch(w watch.Interface, stopCh <-chan struct{}, resyncerrc chan error) error { 411 var err error 412 retry := NewRetryWithDeadline(r.MaxInternalErrorRetryDuration, time.Minute, apierrors.IsInternalError, r.clock) 413 414 for { 415 // give the stopCh a chance to stop the loop, even in case of continue statements further down on errors 416 select { 417 case <-stopCh: 418 // we can only end up here when the stopCh 419 // was closed after a successful watchlist or list request 420 if w != nil { 421 w.Stop() 422 } 423 return nil 424 default: 425 } 426 427 // start the clock before sending the request, since some proxies won't flush headers until after the first watch event is sent 428 start := r.clock.Now() 429 430 if w == nil { 431 timeoutSeconds := int64(r.minWatchTimeout.Seconds() * (rand.Float64() + 1.0)) 432 options := metav1.ListOptions{ 433 ResourceVersion: r.LastSyncResourceVersion(), 434 // We want to avoid situations of hanging watchers. Stop any watchers that do not 435 // receive any events within the timeout window. 436 TimeoutSeconds: &timeoutSeconds, 437 // To reduce load on kube-apiserver on watch restarts, you may enable watch bookmarks. 438 // Reflector doesn't assume bookmarks are returned at all (if the server do not support 439 // watch bookmarks, it will ignore this field). 440 AllowWatchBookmarks: true, 441 } 442 443 w, err = r.listerWatcher.Watch(options) 444 if err != nil { 445 if canRetry := isWatchErrorRetriable(err); canRetry { 446 klog.V(4).Infof("%s: watch of %v returned %v - backing off", r.name, r.typeDescription, err) 447 select { 448 case <-stopCh: 449 return nil 450 case <-r.backoffManager.Backoff().C(): 451 continue 452 } 453 } 454 return err 455 } 456 } 457 458 err = handleWatch(start, w, r.store, r.expectedType, r.expectedGVK, r.name, r.typeDescription, r.setLastSyncResourceVersion, 459 r.clock, resyncerrc, stopCh) 460 // Ensure that watch will not be reused across iterations. 461 w.Stop() 462 w = nil 463 retry.After(err) 464 if err != nil { 465 if !errors.Is(err, errorStopRequested) { 466 switch { 467 case isExpiredError(err): 468 // Don't set LastSyncResourceVersionUnavailable - LIST call with ResourceVersion=RV already 469 // has a semantic that it returns data at least as fresh as provided RV. 470 // So first try to LIST with setting RV to resource version of last observed object. 471 klog.V(4).Infof("%s: watch of %v closed with: %v", r.name, r.typeDescription, err) 472 case apierrors.IsTooManyRequests(err): 473 klog.V(2).Infof("%s: watch of %v returned 429 - backing off", r.name, r.typeDescription) 474 select { 475 case <-stopCh: 476 return nil 477 case <-r.backoffManager.Backoff().C(): 478 continue 479 } 480 case apierrors.IsInternalError(err) && retry.ShouldRetry(): 481 klog.V(2).Infof("%s: retrying watch of %v internal error: %v", r.name, r.typeDescription, err) 482 continue 483 default: 484 klog.Warningf("%s: watch of %v ended with: %v", r.name, r.typeDescription, err) 485 } 486 } 487 return nil 488 } 489 } 490 } 491 492 // list simply lists all items and records a resource version obtained from the server at the moment of the call. 493 // the resource version can be used for further progress notification (aka. watch). 494 func (r *Reflector) list(stopCh <-chan struct{}) error { 495 var resourceVersion string 496 options := metav1.ListOptions{ResourceVersion: r.relistResourceVersion()} 497 498 initTrace := trace.New("Reflector ListAndWatch", trace.Field{Key: "name", Value: r.name}) 499 defer initTrace.LogIfLong(10 * time.Second) 500 var list runtime.Object 501 var paginatedResult bool 502 var err error 503 listCh := make(chan struct{}, 1) 504 panicCh := make(chan interface{}, 1) 505 go func() { 506 defer func() { 507 if r := recover(); r != nil { 508 panicCh <- r 509 } 510 }() 511 // Attempt to gather list in chunks, if supported by listerWatcher, if not, the first 512 // list request will return the full response. 513 pager := pager.New(pager.SimplePageFunc(func(opts metav1.ListOptions) (runtime.Object, error) { 514 return r.listerWatcher.List(opts) 515 })) 516 switch { 517 case r.WatchListPageSize != 0: 518 pager.PageSize = r.WatchListPageSize 519 case r.paginatedResult: 520 // We got a paginated result initially. Assume this resource and server honor 521 // paging requests (i.e. watch cache is probably disabled) and leave the default 522 // pager size set. 523 case options.ResourceVersion != "" && options.ResourceVersion != "0": 524 // User didn't explicitly request pagination. 525 // 526 // With ResourceVersion != "", we have a possibility to list from watch cache, 527 // but we do that (for ResourceVersion != "0") only if Limit is unset. 528 // To avoid thundering herd on etcd (e.g. on master upgrades), we explicitly 529 // switch off pagination to force listing from watch cache (if enabled). 530 // With the existing semantic of RV (result is at least as fresh as provided RV), 531 // this is correct and doesn't lead to going back in time. 532 // 533 // We also don't turn off pagination for ResourceVersion="0", since watch cache 534 // is ignoring Limit in that case anyway, and if watch cache is not enabled 535 // we don't introduce regression. 536 pager.PageSize = 0 537 } 538 539 list, paginatedResult, err = pager.ListWithAlloc(context.Background(), options) 540 if isExpiredError(err) || isTooLargeResourceVersionError(err) { 541 r.setIsLastSyncResourceVersionUnavailable(true) 542 // Retry immediately if the resource version used to list is unavailable. 543 // The pager already falls back to full list if paginated list calls fail due to an "Expired" error on 544 // continuation pages, but the pager might not be enabled, the full list might fail because the 545 // resource version it is listing at is expired or the cache may not yet be synced to the provided 546 // resource version. So we need to fallback to resourceVersion="" in all to recover and ensure 547 // the reflector makes forward progress. 548 list, paginatedResult, err = pager.ListWithAlloc(context.Background(), metav1.ListOptions{ResourceVersion: r.relistResourceVersion()}) 549 } 550 close(listCh) 551 }() 552 select { 553 case <-stopCh: 554 return nil 555 case r := <-panicCh: 556 panic(r) 557 case <-listCh: 558 } 559 initTrace.Step("Objects listed", trace.Field{Key: "error", Value: err}) 560 if err != nil { 561 klog.Warningf("%s: failed to list %v: %v", r.name, r.typeDescription, err) 562 return fmt.Errorf("failed to list %v: %w", r.typeDescription, err) 563 } 564 565 // We check if the list was paginated and if so set the paginatedResult based on that. 566 // However, we want to do that only for the initial list (which is the only case 567 // when we set ResourceVersion="0"). The reasoning behind it is that later, in some 568 // situations we may force listing directly from etcd (by setting ResourceVersion="") 569 // which will return paginated result, even if watch cache is enabled. However, in 570 // that case, we still want to prefer sending requests to watch cache if possible. 571 // 572 // Paginated result returned for request with ResourceVersion="0" mean that watch 573 // cache is disabled and there are a lot of objects of a given type. In such case, 574 // there is no need to prefer listing from watch cache. 575 if options.ResourceVersion == "0" && paginatedResult { 576 r.paginatedResult = true 577 } 578 579 r.setIsLastSyncResourceVersionUnavailable(false) // list was successful 580 listMetaInterface, err := meta.ListAccessor(list) 581 if err != nil { 582 return fmt.Errorf("unable to understand list result %#v: %v", list, err) 583 } 584 resourceVersion = listMetaInterface.GetResourceVersion() 585 initTrace.Step("Resource version extracted") 586 items, err := meta.ExtractListWithAlloc(list) 587 if err != nil { 588 return fmt.Errorf("unable to understand list result %#v (%v)", list, err) 589 } 590 initTrace.Step("Objects extracted") 591 if err := r.syncWith(items, resourceVersion); err != nil { 592 return fmt.Errorf("unable to sync list result: %v", err) 593 } 594 initTrace.Step("SyncWith done") 595 r.setLastSyncResourceVersion(resourceVersion) 596 initTrace.Step("Resource version updated") 597 return nil 598 } 599 600 // watchList establishes a stream to get a consistent snapshot of data 601 // from the server as described in https://github.com/kubernetes/enhancements/tree/master/keps/sig-api-machinery/3157-watch-list#proposal 602 // 603 // case 1: start at Most Recent (RV="", ResourceVersionMatch=ResourceVersionMatchNotOlderThan) 604 // Establishes a consistent stream with the server. 605 // That means the returned data is consistent, as if, served directly from etcd via a quorum read. 606 // It begins with synthetic "Added" events of all resources up to the most recent ResourceVersion. 607 // It ends with a synthetic "Bookmark" event containing the most recent ResourceVersion. 608 // After receiving a "Bookmark" event the reflector is considered to be synchronized. 609 // It replaces its internal store with the collected items and 610 // reuses the current watch requests for getting further events. 611 // 612 // case 2: start at Exact (RV>"0", ResourceVersionMatch=ResourceVersionMatchNotOlderThan) 613 // Establishes a stream with the server at the provided resource version. 614 // To establish the initial state the server begins with synthetic "Added" events. 615 // It ends with a synthetic "Bookmark" event containing the provided or newer resource version. 616 // After receiving a "Bookmark" event the reflector is considered to be synchronized. 617 // It replaces its internal store with the collected items and 618 // reuses the current watch requests for getting further events. 619 func (r *Reflector) watchList(stopCh <-chan struct{}) (watch.Interface, error) { 620 var w watch.Interface 621 var err error 622 var temporaryStore Store 623 var resourceVersion string 624 // TODO(#115478): see if this function could be turned 625 // into a method and see if error handling 626 // could be unified with the r.watch method 627 isErrorRetriableWithSideEffectsFn := func(err error) bool { 628 if canRetry := isWatchErrorRetriable(err); canRetry { 629 klog.V(2).Infof("%s: watch-list of %v returned %v - backing off", r.name, r.typeDescription, err) 630 <-r.backoffManager.Backoff().C() 631 return true 632 } 633 if isExpiredError(err) || isTooLargeResourceVersionError(err) { 634 // we tried to re-establish a watch request but the provided RV 635 // has either expired or it is greater than the server knows about. 636 // In that case we reset the RV and 637 // try to get a consistent snapshot from the watch cache (case 1) 638 r.setIsLastSyncResourceVersionUnavailable(true) 639 return true 640 } 641 return false 642 } 643 644 initTrace := trace.New("Reflector WatchList", trace.Field{Key: "name", Value: r.name}) 645 defer initTrace.LogIfLong(10 * time.Second) 646 for { 647 select { 648 case <-stopCh: 649 return nil, nil 650 default: 651 } 652 653 resourceVersion = "" 654 lastKnownRV := r.rewatchResourceVersion() 655 temporaryStore = NewStore(DeletionHandlingMetaNamespaceKeyFunc) 656 // TODO(#115478): large "list", slow clients, slow network, p&f 657 // might slow down streaming and eventually fail. 658 // maybe in such a case we should retry with an increased timeout? 659 timeoutSeconds := int64(r.minWatchTimeout.Seconds() * (rand.Float64() + 1.0)) 660 options := metav1.ListOptions{ 661 ResourceVersion: lastKnownRV, 662 AllowWatchBookmarks: true, 663 SendInitialEvents: pointer.Bool(true), 664 ResourceVersionMatch: metav1.ResourceVersionMatchNotOlderThan, 665 TimeoutSeconds: &timeoutSeconds, 666 } 667 start := r.clock.Now() 668 669 w, err = r.listerWatcher.Watch(options) 670 if err != nil { 671 if isErrorRetriableWithSideEffectsFn(err) { 672 continue 673 } 674 return nil, err 675 } 676 watchListBookmarkReceived, err := handleListWatch(start, w, temporaryStore, r.expectedType, r.expectedGVK, r.name, r.typeDescription, 677 func(rv string) { resourceVersion = rv }, 678 r.clock, make(chan error), stopCh) 679 if err != nil { 680 w.Stop() // stop and retry with clean state 681 if errors.Is(err, errorStopRequested) { 682 return nil, nil 683 } 684 if isErrorRetriableWithSideEffectsFn(err) { 685 continue 686 } 687 return nil, err 688 } 689 if watchListBookmarkReceived { 690 break 691 } 692 } 693 // We successfully got initial state from watch-list confirmed by the 694 // "k8s.io/initial-events-end" bookmark. 695 initTrace.Step("Objects streamed", trace.Field{Key: "count", Value: len(temporaryStore.List())}) 696 r.setIsLastSyncResourceVersionUnavailable(false) 697 698 // we utilize the temporaryStore to ensure independence from the current store implementation. 699 // as of today, the store is implemented as a queue and will be drained by the higher-level 700 // component as soon as it finishes replacing the content. 701 checkWatchListDataConsistencyIfRequested(wait.ContextForChannel(stopCh), r.name, resourceVersion, wrapListFuncWithContext(r.listerWatcher.List), temporaryStore.List) 702 703 if err := r.store.Replace(temporaryStore.List(), resourceVersion); err != nil { 704 return nil, fmt.Errorf("unable to sync watch-list result: %w", err) 705 } 706 initTrace.Step("SyncWith done") 707 r.setLastSyncResourceVersion(resourceVersion) 708 709 return w, nil 710 } 711 712 // syncWith replaces the store's items with the given list. 713 func (r *Reflector) syncWith(items []runtime.Object, resourceVersion string) error { 714 found := make([]interface{}, 0, len(items)) 715 for _, item := range items { 716 found = append(found, item) 717 } 718 return r.store.Replace(found, resourceVersion) 719 } 720 721 // handleListWatch consumes events from w, updates the Store, and records the 722 // last seen ResourceVersion, to allow continuing from that ResourceVersion on 723 // retry. If successful, the watcher will be left open after receiving the 724 // initial set of objects, to allow watching for future events. 725 func handleListWatch( 726 start time.Time, 727 w watch.Interface, 728 store Store, 729 expectedType reflect.Type, 730 expectedGVK *schema.GroupVersionKind, 731 name string, 732 expectedTypeName string, 733 setLastSyncResourceVersion func(string), 734 clock clock.Clock, 735 errCh chan error, 736 stopCh <-chan struct{}, 737 ) (bool, error) { 738 exitOnWatchListBookmarkReceived := true 739 return handleAnyWatch(start, w, store, expectedType, expectedGVK, name, expectedTypeName, 740 setLastSyncResourceVersion, exitOnWatchListBookmarkReceived, clock, errCh, stopCh) 741 } 742 743 // handleListWatch consumes events from w, updates the Store, and records the 744 // last seen ResourceVersion, to allow continuing from that ResourceVersion on 745 // retry. The watcher will always be stopped on exit. 746 func handleWatch( 747 start time.Time, 748 w watch.Interface, 749 store Store, 750 expectedType reflect.Type, 751 expectedGVK *schema.GroupVersionKind, 752 name string, 753 expectedTypeName string, 754 setLastSyncResourceVersion func(string), 755 clock clock.Clock, 756 errCh chan error, 757 stopCh <-chan struct{}, 758 ) error { 759 exitOnWatchListBookmarkReceived := false 760 _, err := handleAnyWatch(start, w, store, expectedType, expectedGVK, name, expectedTypeName, 761 setLastSyncResourceVersion, exitOnWatchListBookmarkReceived, clock, errCh, stopCh) 762 return err 763 } 764 765 // handleAnyWatch consumes events from w, updates the Store, and records the last 766 // seen ResourceVersion, to allow continuing from that ResourceVersion on retry. 767 // If exitOnWatchListBookmarkReceived is true, the watch events will be consumed 768 // until a bookmark event is received with the WatchList annotation present. 769 // Returns true (watchListBookmarkReceived) if the WatchList bookmark was 770 // received, even if exitOnWatchListBookmarkReceived is false. 771 // The watcher will always be stopped, unless exitOnWatchListBookmarkReceived is 772 // true and watchListBookmarkReceived is true. This allows the same watch stream 773 // to be re-used by the caller to continue watching for new events. 774 func handleAnyWatch(start time.Time, 775 w watch.Interface, 776 store Store, 777 expectedType reflect.Type, 778 expectedGVK *schema.GroupVersionKind, 779 name string, 780 expectedTypeName string, 781 setLastSyncResourceVersion func(string), 782 exitOnWatchListBookmarkReceived bool, 783 clock clock.Clock, 784 errCh chan error, 785 stopCh <-chan struct{}, 786 ) (bool, error) { 787 watchListBookmarkReceived := false 788 eventCount := 0 789 initialEventsEndBookmarkWarningTicker := newInitialEventsEndBookmarkTicker(name, clock, start, exitOnWatchListBookmarkReceived) 790 defer initialEventsEndBookmarkWarningTicker.Stop() 791 792 loop: 793 for { 794 select { 795 case <-stopCh: 796 return watchListBookmarkReceived, errorStopRequested 797 case err := <-errCh: 798 return watchListBookmarkReceived, err 799 case event, ok := <-w.ResultChan(): 800 if !ok { 801 break loop 802 } 803 if event.Type == watch.Error { 804 return watchListBookmarkReceived, apierrors.FromObject(event.Object) 805 } 806 if expectedType != nil { 807 if e, a := expectedType, reflect.TypeOf(event.Object); e != a { 808 utilruntime.HandleError(fmt.Errorf("%s: expected type %v, but watch event object had type %v", name, e, a)) 809 continue 810 } 811 } 812 if expectedGVK != nil { 813 if e, a := *expectedGVK, event.Object.GetObjectKind().GroupVersionKind(); e != a { 814 utilruntime.HandleError(fmt.Errorf("%s: expected gvk %v, but watch event object had gvk %v", name, e, a)) 815 continue 816 } 817 } 818 meta, err := meta.Accessor(event.Object) 819 if err != nil { 820 utilruntime.HandleError(fmt.Errorf("%s: unable to understand watch event %#v", name, event)) 821 continue 822 } 823 resourceVersion := meta.GetResourceVersion() 824 switch event.Type { 825 case watch.Added: 826 err := store.Add(event.Object) 827 if err != nil { 828 utilruntime.HandleError(fmt.Errorf("%s: unable to add watch event object (%#v) to store: %v", name, event.Object, err)) 829 } 830 case watch.Modified: 831 err := store.Update(event.Object) 832 if err != nil { 833 utilruntime.HandleError(fmt.Errorf("%s: unable to update watch event object (%#v) to store: %v", name, event.Object, err)) 834 } 835 case watch.Deleted: 836 // TODO: Will any consumers need access to the "last known 837 // state", which is passed in event.Object? If so, may need 838 // to change this. 839 err := store.Delete(event.Object) 840 if err != nil { 841 utilruntime.HandleError(fmt.Errorf("%s: unable to delete watch event object (%#v) from store: %v", name, event.Object, err)) 842 } 843 case watch.Bookmark: 844 // A `Bookmark` means watch has synced here, just update the resourceVersion 845 if meta.GetAnnotations()[metav1.InitialEventsAnnotationKey] == "true" { 846 watchListBookmarkReceived = true 847 } 848 default: 849 utilruntime.HandleError(fmt.Errorf("%s: unable to understand watch event %#v", name, event)) 850 } 851 setLastSyncResourceVersion(resourceVersion) 852 if rvu, ok := store.(ResourceVersionUpdater); ok { 853 rvu.UpdateResourceVersion(resourceVersion) 854 } 855 eventCount++ 856 if exitOnWatchListBookmarkReceived && watchListBookmarkReceived { 857 watchDuration := clock.Since(start) 858 klog.V(4).Infof("exiting %v Watch because received the bookmark that marks the end of initial events stream, total %v items received in %v", name, eventCount, watchDuration) 859 return watchListBookmarkReceived, nil 860 } 861 initialEventsEndBookmarkWarningTicker.observeLastEventTimeStamp(clock.Now()) 862 case <-initialEventsEndBookmarkWarningTicker.C(): 863 initialEventsEndBookmarkWarningTicker.warnIfExpired() 864 } 865 } 866 867 watchDuration := clock.Since(start) 868 if watchDuration < 1*time.Second && eventCount == 0 { 869 return watchListBookmarkReceived, fmt.Errorf("very short watch: %s: Unexpected watch close - watch lasted less than a second and no items received", name) 870 } 871 klog.V(4).Infof("%s: Watch close - %v total %v items received", name, expectedTypeName, eventCount) 872 return watchListBookmarkReceived, nil 873 } 874 875 // LastSyncResourceVersion is the resource version observed when last sync with the underlying store 876 // The value returned is not synchronized with access to the underlying store and is not thread-safe 877 func (r *Reflector) LastSyncResourceVersion() string { 878 r.lastSyncResourceVersionMutex.RLock() 879 defer r.lastSyncResourceVersionMutex.RUnlock() 880 return r.lastSyncResourceVersion 881 } 882 883 func (r *Reflector) setLastSyncResourceVersion(v string) { 884 r.lastSyncResourceVersionMutex.Lock() 885 defer r.lastSyncResourceVersionMutex.Unlock() 886 r.lastSyncResourceVersion = v 887 } 888 889 // relistResourceVersion determines the resource version the reflector should list or relist from. 890 // Returns either the lastSyncResourceVersion so that this reflector will relist with a resource 891 // versions no older than has already been observed in relist results or watch events, or, if the last relist resulted 892 // in an HTTP 410 (Gone) status code, returns "" so that the relist will use the latest resource version available in 893 // etcd via a quorum read. 894 func (r *Reflector) relistResourceVersion() string { 895 r.lastSyncResourceVersionMutex.RLock() 896 defer r.lastSyncResourceVersionMutex.RUnlock() 897 898 if r.isLastSyncResourceVersionUnavailable { 899 // Since this reflector makes paginated list requests, and all paginated list requests skip the watch cache 900 // if the lastSyncResourceVersion is unavailable, we set ResourceVersion="" and list again to re-establish reflector 901 // to the latest available ResourceVersion, using a consistent read from etcd. 902 return "" 903 } 904 if r.lastSyncResourceVersion == "" { 905 // For performance reasons, initial list performed by reflector uses "0" as resource version to allow it to 906 // be served from the watch cache if it is enabled. 907 return "0" 908 } 909 return r.lastSyncResourceVersion 910 } 911 912 // rewatchResourceVersion determines the resource version the reflector should start streaming from. 913 func (r *Reflector) rewatchResourceVersion() string { 914 r.lastSyncResourceVersionMutex.RLock() 915 defer r.lastSyncResourceVersionMutex.RUnlock() 916 if r.isLastSyncResourceVersionUnavailable { 917 // initial stream should return data at the most recent resource version. 918 // the returned data must be consistent i.e. as if served from etcd via a quorum read 919 return "" 920 } 921 return r.lastSyncResourceVersion 922 } 923 924 // setIsLastSyncResourceVersionUnavailable sets if the last list or watch request with lastSyncResourceVersion returned 925 // "expired" or "too large resource version" error. 926 func (r *Reflector) setIsLastSyncResourceVersionUnavailable(isUnavailable bool) { 927 r.lastSyncResourceVersionMutex.Lock() 928 defer r.lastSyncResourceVersionMutex.Unlock() 929 r.isLastSyncResourceVersionUnavailable = isUnavailable 930 } 931 932 func isExpiredError(err error) bool { 933 // In Kubernetes 1.17 and earlier, the api server returns both apierrors.StatusReasonExpired and 934 // apierrors.StatusReasonGone for HTTP 410 (Gone) status code responses. In 1.18 the kube server is more consistent 935 // and always returns apierrors.StatusReasonExpired. For backward compatibility we can only remove the apierrors.IsGone 936 // check when we fully drop support for Kubernetes 1.17 servers from reflectors. 937 return apierrors.IsResourceExpired(err) || apierrors.IsGone(err) 938 } 939 940 func isTooLargeResourceVersionError(err error) bool { 941 if apierrors.HasStatusCause(err, metav1.CauseTypeResourceVersionTooLarge) { 942 return true 943 } 944 // In Kubernetes 1.17.0-1.18.5, the api server doesn't set the error status cause to 945 // metav1.CauseTypeResourceVersionTooLarge to indicate that the requested minimum resource 946 // version is larger than the largest currently available resource version. To ensure backward 947 // compatibility with these server versions we also need to detect the error based on the content 948 // of the error message field. 949 if !apierrors.IsTimeout(err) { 950 return false 951 } 952 apierr, ok := err.(apierrors.APIStatus) 953 if !ok || apierr == nil || apierr.Status().Details == nil { 954 return false 955 } 956 for _, cause := range apierr.Status().Details.Causes { 957 // Matches the message returned by api server 1.17.0-1.18.5 for this error condition 958 if cause.Message == "Too large resource version" { 959 return true 960 } 961 } 962 963 // Matches the message returned by api server before 1.17.0 964 if strings.Contains(apierr.Status().Message, "Too large resource version") { 965 return true 966 } 967 968 return false 969 } 970 971 // isWatchErrorRetriable determines if it is safe to retry 972 // a watch error retrieved from the server. 973 func isWatchErrorRetriable(err error) bool { 974 // If this is "connection refused" error, it means that most likely apiserver is not responsive. 975 // It doesn't make sense to re-list all objects because most likely we will be able to restart 976 // watch where we ended. 977 // If that's the case begin exponentially backing off and resend watch request. 978 // Do the same for "429" errors. 979 if utilnet.IsConnectionRefused(err) || apierrors.IsTooManyRequests(err) { 980 return true 981 } 982 return false 983 } 984 985 // wrapListFuncWithContext simply wraps ListFunction into another function that accepts a context and ignores it. 986 func wrapListFuncWithContext(listFn ListFunc) func(ctx context.Context, options metav1.ListOptions) (runtime.Object, error) { 987 return func(_ context.Context, options metav1.ListOptions) (runtime.Object, error) { 988 return listFn(options) 989 } 990 } 991 992 // initialEventsEndBookmarkTicker a ticker that produces a warning if the bookmark event 993 // which marks the end of the watch stream, has not been received within the defined tick interval. 994 // 995 // Note: 996 // The methods exposed by this type are not thread-safe. 997 type initialEventsEndBookmarkTicker struct { 998 clock.Ticker 999 clock clock.Clock 1000 name string 1001 1002 watchStart time.Time 1003 tickInterval time.Duration 1004 lastEventObserveTime time.Time 1005 } 1006 1007 // newInitialEventsEndBookmarkTicker returns a noop ticker if exitOnInitialEventsEndBookmarkRequested is false. 1008 // Otherwise, it returns a ticker that exposes a method producing a warning if the bookmark event, 1009 // which marks the end of the watch stream, has not been received within the defined tick interval. 1010 // 1011 // Note that the caller controls whether to call t.C() and t.Stop(). 1012 // 1013 // In practice, the reflector exits the watchHandler as soon as the bookmark event is received and calls the t.C() method. 1014 func newInitialEventsEndBookmarkTicker(name string, c clock.Clock, watchStart time.Time, exitOnWatchListBookmarkReceived bool) *initialEventsEndBookmarkTicker { 1015 return newInitialEventsEndBookmarkTickerInternal(name, c, watchStart, 10*time.Second, exitOnWatchListBookmarkReceived) 1016 } 1017 1018 func newInitialEventsEndBookmarkTickerInternal(name string, c clock.Clock, watchStart time.Time, tickInterval time.Duration, exitOnWatchListBookmarkReceived bool) *initialEventsEndBookmarkTicker { 1019 clockWithTicker, ok := c.(clock.WithTicker) 1020 if !ok || !exitOnWatchListBookmarkReceived { 1021 if exitOnWatchListBookmarkReceived { 1022 klog.Warningf("clock does not support WithTicker interface but exitOnInitialEventsEndBookmark was requested") 1023 } 1024 return &initialEventsEndBookmarkTicker{ 1025 Ticker: &noopTicker{}, 1026 } 1027 } 1028 1029 return &initialEventsEndBookmarkTicker{ 1030 Ticker: clockWithTicker.NewTicker(tickInterval), 1031 clock: c, 1032 name: name, 1033 watchStart: watchStart, 1034 tickInterval: tickInterval, 1035 } 1036 } 1037 1038 func (t *initialEventsEndBookmarkTicker) observeLastEventTimeStamp(lastEventObserveTime time.Time) { 1039 t.lastEventObserveTime = lastEventObserveTime 1040 } 1041 1042 func (t *initialEventsEndBookmarkTicker) warnIfExpired() { 1043 if err := t.produceWarningIfExpired(); err != nil { 1044 klog.Warning(err) 1045 } 1046 } 1047 1048 // produceWarningIfExpired returns an error that represents a warning when 1049 // the time elapsed since the last received event exceeds the tickInterval. 1050 // 1051 // Note that this method should be called when t.C() yields a value. 1052 func (t *initialEventsEndBookmarkTicker) produceWarningIfExpired() error { 1053 if _, ok := t.Ticker.(*noopTicker); ok { 1054 return nil /*noop ticker*/ 1055 } 1056 if t.lastEventObserveTime.IsZero() { 1057 return fmt.Errorf("%s: awaiting required bookmark event for initial events stream, no events received for %v", t.name, t.clock.Since(t.watchStart)) 1058 } 1059 elapsedTime := t.clock.Now().Sub(t.lastEventObserveTime) 1060 hasBookmarkTimerExpired := elapsedTime >= t.tickInterval 1061 1062 if !hasBookmarkTimerExpired { 1063 return nil 1064 } 1065 return fmt.Errorf("%s: hasn't received required bookmark event marking the end of initial events stream, received last event %v ago", t.name, elapsedTime) 1066 } 1067 1068 var _ clock.Ticker = &noopTicker{} 1069 1070 // TODO(#115478): move to k8s/utils repo 1071 type noopTicker struct{} 1072 1073 func (t *noopTicker) C() <-chan time.Time { return nil } 1074 1075 func (t *noopTicker) Stop() {}