github.com/tilt-dev/tilt@v0.33.15-0.20240515162809-0a22ed45d8a0/internal/k8s/watch.go (about) 1 package k8s 2 3 import ( 4 "context" 5 "fmt" 6 "sync" 7 "time" 8 9 "github.com/blang/semver" 10 "github.com/pkg/errors" 11 "golang.org/x/sync/singleflight" 12 v1 "k8s.io/api/core/v1" 13 apiErrors "k8s.io/apimachinery/pkg/api/errors" 14 apierrors "k8s.io/apimachinery/pkg/api/errors" 15 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 16 "k8s.io/apimachinery/pkg/runtime" 17 "k8s.io/apimachinery/pkg/runtime/schema" 18 "k8s.io/apimachinery/pkg/types" 19 "k8s.io/apimachinery/pkg/util/wait" 20 "k8s.io/apimachinery/pkg/version" 21 "k8s.io/client-go/dynamic" 22 "k8s.io/client-go/informers" 23 "k8s.io/client-go/kubernetes" 24 "k8s.io/client-go/metadata/metadatainformer" 25 "k8s.io/client-go/tools/cache" 26 27 "github.com/tilt-dev/tilt/pkg/logger" 28 ) 29 30 type InformerSet interface { 31 // For all watchers, a namespace must be specified. 32 WatchPods(ctx context.Context, ns Namespace) (<-chan ObjectUpdate, error) 33 34 WatchServices(ctx context.Context, ns Namespace) (<-chan *v1.Service, error) 35 36 WatchEvents(ctx context.Context, ns Namespace) (<-chan *v1.Event, error) 37 38 // Fetch a pod from the informer cache. 39 // 40 // If no informer has started, start one now on the given ctx. 41 // 42 // The pod should be treated as immutable (since it's a pointer to a shared cache reference). 43 PodFromInformerCache(ctx context.Context, nn types.NamespacedName) (*v1.Pod, error) 44 } 45 46 type informerSet struct { 47 clientset kubernetes.Interface 48 dynamic dynamic.Interface 49 50 // singleflight and mu protects access to the shared informers 51 mu sync.Mutex 52 singleflight *singleflight.Group 53 informers map[string]cache.SharedInformer 54 } 55 56 func newInformerSet(clientset kubernetes.Interface, dynamic dynamic.Interface) *informerSet { 57 return &informerSet{ 58 clientset: clientset, 59 dynamic: dynamic, 60 singleflight: &singleflight.Group{}, 61 informers: make(map[string]cache.SharedInformer), 62 } 63 } 64 65 var PodGVR = v1.SchemeGroupVersion.WithResource("pods") 66 var ServiceGVR = v1.SchemeGroupVersion.WithResource("services") 67 var EventGVR = v1.SchemeGroupVersion.WithResource("events") 68 69 // Inspired by: 70 // https://groups.google.com/g/kubernetes-sig-api-machinery/c/PbSCXdLDno0/m/v9gH3HXVDAAJ 71 const resyncPeriod = 15 * time.Minute 72 73 // A wrapper object around SharedInformer objects, to make them 74 // a bit easier to use correctly. 75 type ObjectUpdate struct { 76 obj interface{} 77 isDelete bool 78 } 79 80 // Returns a Pod if this is a pod Add or a pod Update. 81 func (r ObjectUpdate) AsPod() (*v1.Pod, bool) { 82 if r.isDelete { 83 return nil, false 84 } 85 pod, ok := r.obj.(*v1.Pod) 86 return pod, ok 87 } 88 89 // Returns the object update as the NamespacedName of the pod. 90 func (r ObjectUpdate) AsNamespacedName() (types.NamespacedName, bool) { 91 pod, ok := r.AsPod() 92 if ok { 93 return types.NamespacedName{Name: pod.Name, Namespace: pod.Namespace}, true 94 } 95 96 ns, name, ok := r.AsDeletedKey() 97 if ok { 98 return types.NamespacedName{Name: name, Namespace: string(ns)}, true 99 } 100 101 return types.NamespacedName{}, false 102 } 103 104 // Returns (namespace, name, isDelete). 105 // 106 // The informer's OnDelete handler sometimes gives us a structured object, and 107 // sometimes returns a DeletedFinalStateUnknown object. To make this easier to 108 // handle correctly, we never allow access to the OnDelete object. Instead, we 109 // force the caller to use AsDeletedKey() to get the identifier of the object. 110 // 111 // For more info, see: 112 // https://godoc.org/k8s.io/client-go/tools/cache#ResourceEventHandler 113 func (r ObjectUpdate) AsDeletedKey() (Namespace, string, bool) { 114 if !r.isDelete { 115 return "", "", false 116 } 117 key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(r.obj) 118 if err != nil { 119 return "", "", false 120 } 121 ns, name, err := cache.SplitMetaNamespaceKey(key) 122 if err != nil { 123 return "", "", false 124 } 125 return Namespace(ns), name, true 126 } 127 128 func maybeUnpackStatusError(err error) error { 129 statusErr, isStatusErr := err.(*apiErrors.StatusError) 130 if !isStatusErr { 131 return err 132 } 133 status := statusErr.ErrStatus 134 return fmt.Errorf("%s, Reason: %s, Code: %d", status.Message, status.Reason, status.Code) 135 } 136 137 // Make a new informer, and start it. 138 func (s *informerSet) makeInformer( 139 ctx context.Context, 140 ns Namespace, 141 gvr schema.GroupVersionResource) (cache.SharedInformer, error) { 142 if ns == "" { 143 return nil, fmt.Errorf("missing namespace from watch request") 144 } 145 146 key := fmt.Sprintf("%s/%s", ns, gvr) 147 result, err, _ := s.singleflight.Do(key, func() (interface{}, error) { 148 s.mu.Lock() 149 cached, ok := s.informers[key] 150 s.mu.Unlock() 151 if ok { 152 return cached, nil 153 } 154 155 newInformer, err := s.makeInformerHelper(ctx, ns, gvr) 156 if err != nil { 157 return nil, err 158 } 159 s.mu.Lock() 160 s.informers[key] = newInformer 161 s.mu.Unlock() 162 return newInformer, err 163 }) 164 165 if err != nil { 166 return nil, err 167 } 168 return result.(cache.SharedInformer), nil 169 } 170 171 // Make a new informer, and start it. 172 func (s *informerSet) makeInformerHelper( 173 ctx context.Context, 174 ns Namespace, 175 gvr schema.GroupVersionResource) (cache.SharedInformer, error) { 176 // HACK(dmiller): There's no way to get errors out of an informer. See https://github.com/kubernetes/client-go/issues/155 177 // In the meantime, at least to get authorization and some other errors let's try to set up a watcher and then just 178 // throw it away. 179 watcher, err := s.dynamic.Resource(gvr).Namespace(ns.String()). 180 Watch(ctx, metav1.ListOptions{}) 181 if err != nil { 182 return nil, errors.Wrap(maybeUnpackStatusError(err), "makeInformer") 183 } 184 watcher.Stop() 185 186 options := []informers.SharedInformerOption{ 187 informers.WithNamespace(ns.String()), 188 } 189 190 factory := informers.NewSharedInformerFactoryWithOptions(s.clientset, resyncPeriod, options...) 191 resFactory, err := factory.ForResource(gvr) 192 if err != nil { 193 return nil, errors.Wrap(err, "makeInformer") 194 } 195 196 informer := resFactory.Informer() 197 198 go runInformer(ctx, gvr.Resource, informer) 199 200 return resFactory.Informer(), nil 201 } 202 203 func (s *informerSet) WatchEvents(ctx context.Context, ns Namespace) (<-chan *v1.Event, error) { 204 gvr := EventGVR 205 informer, err := s.makeInformer(ctx, ns, gvr) 206 if err != nil { 207 return nil, errors.Wrap(err, "WatchEvents") 208 } 209 210 ch := make(chan *v1.Event) 211 _, err = informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ 212 AddFunc: func(obj interface{}) { 213 mObj, ok := obj.(*v1.Event) 214 if ok { 215 ch <- mObj 216 } 217 }, 218 UpdateFunc: func(oldObj interface{}, newObj interface{}) { 219 mObj, ok := newObj.(*v1.Event) 220 if ok { 221 oldObj, ok := oldObj.(*v1.Event) 222 // the informer regularly gives us updates for events where cmp.Equal(oldObj, newObj) returns true. 223 // we have not investigated why it does this, but these updates seem to always be spurious and 224 // uninteresting. 225 // we could check cmp.Equal here, but really, `Count` is probably the only reason we even care about 226 // updates at all. 227 if !ok || oldObj.Count < mObj.Count { 228 ch <- mObj 229 } 230 } 231 }, 232 }) 233 if err != nil { 234 return nil, errors.Wrap(err, "WatchEvents") 235 } 236 237 return ch, nil 238 } 239 240 // Fetch a pod from the informer cache. 241 // 242 // If no informer has started, start one now on the given ctx. 243 // 244 // The pod should be treated as immutable (since it's a pointer to a shared cache reference). 245 func (s *informerSet) PodFromInformerCache(ctx context.Context, nn types.NamespacedName) (*v1.Pod, error) { 246 gvr := PodGVR 247 informer, err := s.makeInformer(ctx, Namespace(nn.Namespace), gvr) 248 if err != nil { 249 return nil, errors.Wrap(err, "PodFromInformer") 250 } 251 pod, exists, err := informer.GetStore().Get(&v1.Pod{ 252 ObjectMeta: metav1.ObjectMeta{Name: nn.Name, Namespace: nn.Namespace}, 253 }) 254 if err != nil { 255 return nil, err 256 } 257 if !exists { 258 return nil, apierrors.NewNotFound(gvr.GroupResource(), nn.Name) 259 } 260 return pod.(*v1.Pod), nil 261 } 262 263 func (s *informerSet) WatchPods(ctx context.Context, ns Namespace) (<-chan ObjectUpdate, error) { 264 gvr := PodGVR 265 informer, err := s.makeInformer(ctx, ns, gvr) 266 if err != nil { 267 return nil, errors.Wrap(err, "WatchPods") 268 } 269 270 ch := make(chan ObjectUpdate) 271 _, err = informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ 272 AddFunc: func(obj interface{}) { 273 mObj, ok := obj.(*v1.Pod) 274 if ok { 275 obj = FixContainerStatusImagesNoMutation(mObj) 276 } 277 ch <- ObjectUpdate{obj: obj} 278 }, 279 DeleteFunc: func(obj interface{}) { 280 mObj, ok := obj.(*v1.Pod) 281 if ok { 282 obj = FixContainerStatusImagesNoMutation(mObj) 283 } 284 ch <- ObjectUpdate{obj: obj, isDelete: true} 285 }, 286 UpdateFunc: func(oldObj interface{}, newObj interface{}) { 287 oldPod, ok := oldObj.(*v1.Pod) 288 if !ok { 289 return 290 } 291 292 newPod, ok := newObj.(*v1.Pod) 293 if !ok || oldPod == newPod { 294 return 295 } 296 297 newPod = FixContainerStatusImagesNoMutation(newPod) 298 ch <- ObjectUpdate{obj: newPod} 299 }, 300 }) 301 if err != nil { 302 return nil, errors.Wrap(err, "WatchPods") 303 } 304 305 return ch, nil 306 } 307 308 func (s *informerSet) WatchServices(ctx context.Context, ns Namespace) (<-chan *v1.Service, error) { 309 gvr := ServiceGVR 310 informer, err := s.makeInformer(ctx, ns, gvr) 311 if err != nil { 312 return nil, errors.Wrap(err, "WatchServices") 313 } 314 315 ch := make(chan *v1.Service) 316 _, err = informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ 317 AddFunc: func(obj interface{}) { 318 mObj, ok := obj.(*v1.Service) 319 if ok { 320 ch <- mObj 321 } 322 }, 323 UpdateFunc: func(oldObj interface{}, newObj interface{}) { 324 newService, ok := newObj.(*v1.Service) 325 if ok { 326 ch <- newService 327 } 328 }, 329 }) 330 if err != nil { 331 return nil, errors.Wrap(err, "WatchServices") 332 } 333 334 return ch, nil 335 } 336 337 func supportsPartialMetadata(v *version.Info) bool { 338 k1dot15, err := semver.ParseTolerant("v1.15.0") 339 if err != nil { 340 return false 341 } 342 version, err := semver.ParseTolerant(v.GitVersion) 343 if err != nil { 344 // If we don't recognize the version number, 345 // assume this server doesn't support metadata. 346 return false 347 } 348 return version.GTE(k1dot15) 349 } 350 351 func (kCli *K8sClient) WatchMeta(ctx context.Context, gvk schema.GroupVersionKind, ns Namespace) (<-chan metav1.Object, error) { 352 mapping, err := kCli.forceDiscovery(ctx, gvk) 353 if err != nil { 354 return nil, errors.Wrap(err, "WatchMeta") 355 } 356 gvr := mapping.Resource 357 358 version, err := kCli.discovery.ServerVersion() 359 if err != nil { 360 return nil, errors.Wrap(err, "WatchMeta") 361 } 362 363 if supportsPartialMetadata(version) { 364 return kCli.watchMeta15Plus(ctx, gvr, ns) 365 } 366 return kCli.watchMeta14Minus(ctx, gvr, ns) 367 } 368 369 // workaround a bug in client-go 370 // https://github.com/kubernetes/client-go/issues/882 371 func (kCli *K8sClient) watchMeta14Minus(ctx context.Context, gvr schema.GroupVersionResource, ns Namespace) (<-chan metav1.Object, error) { 372 factory := informers.NewSharedInformerFactoryWithOptions(kCli.clientset, resyncPeriod, informers.WithNamespace(ns.String())) 373 resFactory, err := factory.ForResource(gvr) 374 if err != nil { 375 return nil, errors.Wrap(err, "watchMeta") 376 } 377 informer := resFactory.Informer() 378 ch := make(chan metav1.Object) 379 _, err = informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ 380 AddFunc: func(obj interface{}) { 381 mObj, ok := obj.(runtime.Object) 382 if !ok { 383 return 384 } 385 386 entity := NewK8sEntity(mObj) 387 ch <- entity.Meta() 388 }, 389 UpdateFunc: func(oldObj interface{}, newObj interface{}) { 390 mNewObj, ok := newObj.(runtime.Object) 391 if !ok { 392 return 393 } 394 395 entity := NewK8sEntity(mNewObj) 396 ch <- entity.Meta() 397 }, 398 }) 399 if err != nil { 400 return nil, errors.Wrap(err, "WatchMeta") 401 } 402 403 go runInformer(ctx, fmt.Sprintf("%s-metadata", gvr.Resource), informer) 404 405 return ch, nil 406 } 407 408 func (kCli *K8sClient) watchMeta15Plus(ctx context.Context, gvr schema.GroupVersionResource, ns Namespace) (<-chan metav1.Object, error) { 409 factory := metadatainformer.NewFilteredSharedInformerFactory(kCli.metadata, resyncPeriod, ns.String(), func(*metav1.ListOptions) {}) 410 informer := factory.ForResource(gvr).Informer() 411 412 ch := make(chan metav1.Object) 413 _, err := informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ 414 AddFunc: func(obj interface{}) { 415 mObj, ok := obj.(*metav1.PartialObjectMetadata) 416 if ok { 417 ch <- &mObj.ObjectMeta 418 } 419 }, 420 UpdateFunc: func(oldObj interface{}, newObj interface{}) { 421 mNewObj, ok := newObj.(*metav1.PartialObjectMetadata) 422 if ok { 423 ch <- &mNewObj.ObjectMeta 424 } 425 }, 426 }) 427 if err != nil { 428 return nil, errors.Wrap(err, "WatchMeta") 429 } 430 431 go runInformer(ctx, fmt.Sprintf("%s-metadata", gvr.Resource), informer) 432 433 return ch, nil 434 } 435 436 func runInformer(ctx context.Context, name string, informer cache.SharedInformer) { 437 originalDuration := 3 * time.Second 438 originalBackoff := wait.Backoff{ 439 Steps: 1000, 440 Duration: originalDuration, 441 Factor: 3.0, 442 Jitter: 0.5, 443 Cap: time.Hour, 444 } 445 backoff := originalBackoff 446 lastErrorHandlerFinish := time.Time{} 447 _ = informer.SetWatchErrorHandler(func(r *cache.Reflector, err error) { 448 sleepTime := originalDuration 449 if time.Since(lastErrorHandlerFinish) < time.Second { 450 sleepTime = backoff.Step() 451 logger.Get(ctx).Warnf("Pausing k8s %s watcher for %s: %v", 452 name, 453 sleepTime.Truncate(time.Second), 454 err) 455 } else { 456 backoff = originalBackoff 457 } 458 459 select { 460 case <-ctx.Done(): 461 case <-time.After(sleepTime): 462 } 463 lastErrorHandlerFinish = time.Now() 464 }) 465 informer.Run(ctx.Done()) 466 }