github.com/tilt-dev/tilt@v0.33.15-0.20240515162809-0a22ed45d8a0/internal/k8s/watch.go (about)

     1  package k8s
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/blang/semver"
    10  	"github.com/pkg/errors"
    11  	"golang.org/x/sync/singleflight"
    12  	v1 "k8s.io/api/core/v1"
    13  	apiErrors "k8s.io/apimachinery/pkg/api/errors"
    14  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    15  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    16  	"k8s.io/apimachinery/pkg/runtime"
    17  	"k8s.io/apimachinery/pkg/runtime/schema"
    18  	"k8s.io/apimachinery/pkg/types"
    19  	"k8s.io/apimachinery/pkg/util/wait"
    20  	"k8s.io/apimachinery/pkg/version"
    21  	"k8s.io/client-go/dynamic"
    22  	"k8s.io/client-go/informers"
    23  	"k8s.io/client-go/kubernetes"
    24  	"k8s.io/client-go/metadata/metadatainformer"
    25  	"k8s.io/client-go/tools/cache"
    26  
    27  	"github.com/tilt-dev/tilt/pkg/logger"
    28  )
    29  
    30  type InformerSet interface {
    31  	// For all watchers, a namespace must be specified.
    32  	WatchPods(ctx context.Context, ns Namespace) (<-chan ObjectUpdate, error)
    33  
    34  	WatchServices(ctx context.Context, ns Namespace) (<-chan *v1.Service, error)
    35  
    36  	WatchEvents(ctx context.Context, ns Namespace) (<-chan *v1.Event, error)
    37  
    38  	// Fetch a pod from the informer cache.
    39  	//
    40  	// If no informer has started, start one now on the given ctx.
    41  	//
    42  	// The pod should be treated as immutable (since it's a pointer to a shared cache reference).
    43  	PodFromInformerCache(ctx context.Context, nn types.NamespacedName) (*v1.Pod, error)
    44  }
    45  
    46  type informerSet struct {
    47  	clientset kubernetes.Interface
    48  	dynamic   dynamic.Interface
    49  
    50  	// singleflight and mu protects access to the shared informers
    51  	mu           sync.Mutex
    52  	singleflight *singleflight.Group
    53  	informers    map[string]cache.SharedInformer
    54  }
    55  
    56  func newInformerSet(clientset kubernetes.Interface, dynamic dynamic.Interface) *informerSet {
    57  	return &informerSet{
    58  		clientset:    clientset,
    59  		dynamic:      dynamic,
    60  		singleflight: &singleflight.Group{},
    61  		informers:    make(map[string]cache.SharedInformer),
    62  	}
    63  }
    64  
    65  var PodGVR = v1.SchemeGroupVersion.WithResource("pods")
    66  var ServiceGVR = v1.SchemeGroupVersion.WithResource("services")
    67  var EventGVR = v1.SchemeGroupVersion.WithResource("events")
    68  
    69  // Inspired by:
    70  // https://groups.google.com/g/kubernetes-sig-api-machinery/c/PbSCXdLDno0/m/v9gH3HXVDAAJ
    71  const resyncPeriod = 15 * time.Minute
    72  
    73  // A wrapper object around SharedInformer objects, to make them
    74  // a bit easier to use correctly.
    75  type ObjectUpdate struct {
    76  	obj      interface{}
    77  	isDelete bool
    78  }
    79  
    80  // Returns a Pod if this is a pod Add or a pod Update.
    81  func (r ObjectUpdate) AsPod() (*v1.Pod, bool) {
    82  	if r.isDelete {
    83  		return nil, false
    84  	}
    85  	pod, ok := r.obj.(*v1.Pod)
    86  	return pod, ok
    87  }
    88  
    89  // Returns the object update as the NamespacedName of the pod.
    90  func (r ObjectUpdate) AsNamespacedName() (types.NamespacedName, bool) {
    91  	pod, ok := r.AsPod()
    92  	if ok {
    93  		return types.NamespacedName{Name: pod.Name, Namespace: pod.Namespace}, true
    94  	}
    95  
    96  	ns, name, ok := r.AsDeletedKey()
    97  	if ok {
    98  		return types.NamespacedName{Name: name, Namespace: string(ns)}, true
    99  	}
   100  
   101  	return types.NamespacedName{}, false
   102  }
   103  
   104  // Returns (namespace, name, isDelete).
   105  //
   106  // The informer's OnDelete handler sometimes gives us a structured object, and
   107  // sometimes returns a DeletedFinalStateUnknown object. To make this easier to
   108  // handle correctly, we never allow access to the OnDelete object. Instead, we
   109  // force the caller to use AsDeletedKey() to get the identifier of the object.
   110  //
   111  // For more info, see:
   112  // https://godoc.org/k8s.io/client-go/tools/cache#ResourceEventHandler
   113  func (r ObjectUpdate) AsDeletedKey() (Namespace, string, bool) {
   114  	if !r.isDelete {
   115  		return "", "", false
   116  	}
   117  	key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(r.obj)
   118  	if err != nil {
   119  		return "", "", false
   120  	}
   121  	ns, name, err := cache.SplitMetaNamespaceKey(key)
   122  	if err != nil {
   123  		return "", "", false
   124  	}
   125  	return Namespace(ns), name, true
   126  }
   127  
   128  func maybeUnpackStatusError(err error) error {
   129  	statusErr, isStatusErr := err.(*apiErrors.StatusError)
   130  	if !isStatusErr {
   131  		return err
   132  	}
   133  	status := statusErr.ErrStatus
   134  	return fmt.Errorf("%s, Reason: %s, Code: %d", status.Message, status.Reason, status.Code)
   135  }
   136  
   137  // Make a new informer, and start it.
   138  func (s *informerSet) makeInformer(
   139  	ctx context.Context,
   140  	ns Namespace,
   141  	gvr schema.GroupVersionResource) (cache.SharedInformer, error) {
   142  	if ns == "" {
   143  		return nil, fmt.Errorf("missing namespace from watch request")
   144  	}
   145  
   146  	key := fmt.Sprintf("%s/%s", ns, gvr)
   147  	result, err, _ := s.singleflight.Do(key, func() (interface{}, error) {
   148  		s.mu.Lock()
   149  		cached, ok := s.informers[key]
   150  		s.mu.Unlock()
   151  		if ok {
   152  			return cached, nil
   153  		}
   154  
   155  		newInformer, err := s.makeInformerHelper(ctx, ns, gvr)
   156  		if err != nil {
   157  			return nil, err
   158  		}
   159  		s.mu.Lock()
   160  		s.informers[key] = newInformer
   161  		s.mu.Unlock()
   162  		return newInformer, err
   163  	})
   164  
   165  	if err != nil {
   166  		return nil, err
   167  	}
   168  	return result.(cache.SharedInformer), nil
   169  }
   170  
   171  // Make a new informer, and start it.
   172  func (s *informerSet) makeInformerHelper(
   173  	ctx context.Context,
   174  	ns Namespace,
   175  	gvr schema.GroupVersionResource) (cache.SharedInformer, error) {
   176  	// HACK(dmiller): There's no way to get errors out of an informer. See https://github.com/kubernetes/client-go/issues/155
   177  	// In the meantime, at least to get authorization and some other errors let's try to set up a watcher and then just
   178  	// throw it away.
   179  	watcher, err := s.dynamic.Resource(gvr).Namespace(ns.String()).
   180  		Watch(ctx, metav1.ListOptions{})
   181  	if err != nil {
   182  		return nil, errors.Wrap(maybeUnpackStatusError(err), "makeInformer")
   183  	}
   184  	watcher.Stop()
   185  
   186  	options := []informers.SharedInformerOption{
   187  		informers.WithNamespace(ns.String()),
   188  	}
   189  
   190  	factory := informers.NewSharedInformerFactoryWithOptions(s.clientset, resyncPeriod, options...)
   191  	resFactory, err := factory.ForResource(gvr)
   192  	if err != nil {
   193  		return nil, errors.Wrap(err, "makeInformer")
   194  	}
   195  
   196  	informer := resFactory.Informer()
   197  
   198  	go runInformer(ctx, gvr.Resource, informer)
   199  
   200  	return resFactory.Informer(), nil
   201  }
   202  
   203  func (s *informerSet) WatchEvents(ctx context.Context, ns Namespace) (<-chan *v1.Event, error) {
   204  	gvr := EventGVR
   205  	informer, err := s.makeInformer(ctx, ns, gvr)
   206  	if err != nil {
   207  		return nil, errors.Wrap(err, "WatchEvents")
   208  	}
   209  
   210  	ch := make(chan *v1.Event)
   211  	_, err = informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
   212  		AddFunc: func(obj interface{}) {
   213  			mObj, ok := obj.(*v1.Event)
   214  			if ok {
   215  				ch <- mObj
   216  			}
   217  		},
   218  		UpdateFunc: func(oldObj interface{}, newObj interface{}) {
   219  			mObj, ok := newObj.(*v1.Event)
   220  			if ok {
   221  				oldObj, ok := oldObj.(*v1.Event)
   222  				// the informer regularly gives us updates for events where cmp.Equal(oldObj, newObj) returns true.
   223  				// we have not investigated why it does this, but these updates seem to always be spurious and
   224  				// uninteresting.
   225  				// we could check cmp.Equal here, but really, `Count` is probably the only reason we even care about
   226  				// updates at all.
   227  				if !ok || oldObj.Count < mObj.Count {
   228  					ch <- mObj
   229  				}
   230  			}
   231  		},
   232  	})
   233  	if err != nil {
   234  		return nil, errors.Wrap(err, "WatchEvents")
   235  	}
   236  
   237  	return ch, nil
   238  }
   239  
   240  // Fetch a pod from the informer cache.
   241  //
   242  // If no informer has started, start one now on the given ctx.
   243  //
   244  // The pod should be treated as immutable (since it's a pointer to a shared cache reference).
   245  func (s *informerSet) PodFromInformerCache(ctx context.Context, nn types.NamespacedName) (*v1.Pod, error) {
   246  	gvr := PodGVR
   247  	informer, err := s.makeInformer(ctx, Namespace(nn.Namespace), gvr)
   248  	if err != nil {
   249  		return nil, errors.Wrap(err, "PodFromInformer")
   250  	}
   251  	pod, exists, err := informer.GetStore().Get(&v1.Pod{
   252  		ObjectMeta: metav1.ObjectMeta{Name: nn.Name, Namespace: nn.Namespace},
   253  	})
   254  	if err != nil {
   255  		return nil, err
   256  	}
   257  	if !exists {
   258  		return nil, apierrors.NewNotFound(gvr.GroupResource(), nn.Name)
   259  	}
   260  	return pod.(*v1.Pod), nil
   261  }
   262  
   263  func (s *informerSet) WatchPods(ctx context.Context, ns Namespace) (<-chan ObjectUpdate, error) {
   264  	gvr := PodGVR
   265  	informer, err := s.makeInformer(ctx, ns, gvr)
   266  	if err != nil {
   267  		return nil, errors.Wrap(err, "WatchPods")
   268  	}
   269  
   270  	ch := make(chan ObjectUpdate)
   271  	_, err = informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
   272  		AddFunc: func(obj interface{}) {
   273  			mObj, ok := obj.(*v1.Pod)
   274  			if ok {
   275  				obj = FixContainerStatusImagesNoMutation(mObj)
   276  			}
   277  			ch <- ObjectUpdate{obj: obj}
   278  		},
   279  		DeleteFunc: func(obj interface{}) {
   280  			mObj, ok := obj.(*v1.Pod)
   281  			if ok {
   282  				obj = FixContainerStatusImagesNoMutation(mObj)
   283  			}
   284  			ch <- ObjectUpdate{obj: obj, isDelete: true}
   285  		},
   286  		UpdateFunc: func(oldObj interface{}, newObj interface{}) {
   287  			oldPod, ok := oldObj.(*v1.Pod)
   288  			if !ok {
   289  				return
   290  			}
   291  
   292  			newPod, ok := newObj.(*v1.Pod)
   293  			if !ok || oldPod == newPod {
   294  				return
   295  			}
   296  
   297  			newPod = FixContainerStatusImagesNoMutation(newPod)
   298  			ch <- ObjectUpdate{obj: newPod}
   299  		},
   300  	})
   301  	if err != nil {
   302  		return nil, errors.Wrap(err, "WatchPods")
   303  	}
   304  
   305  	return ch, nil
   306  }
   307  
   308  func (s *informerSet) WatchServices(ctx context.Context, ns Namespace) (<-chan *v1.Service, error) {
   309  	gvr := ServiceGVR
   310  	informer, err := s.makeInformer(ctx, ns, gvr)
   311  	if err != nil {
   312  		return nil, errors.Wrap(err, "WatchServices")
   313  	}
   314  
   315  	ch := make(chan *v1.Service)
   316  	_, err = informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
   317  		AddFunc: func(obj interface{}) {
   318  			mObj, ok := obj.(*v1.Service)
   319  			if ok {
   320  				ch <- mObj
   321  			}
   322  		},
   323  		UpdateFunc: func(oldObj interface{}, newObj interface{}) {
   324  			newService, ok := newObj.(*v1.Service)
   325  			if ok {
   326  				ch <- newService
   327  			}
   328  		},
   329  	})
   330  	if err != nil {
   331  		return nil, errors.Wrap(err, "WatchServices")
   332  	}
   333  
   334  	return ch, nil
   335  }
   336  
   337  func supportsPartialMetadata(v *version.Info) bool {
   338  	k1dot15, err := semver.ParseTolerant("v1.15.0")
   339  	if err != nil {
   340  		return false
   341  	}
   342  	version, err := semver.ParseTolerant(v.GitVersion)
   343  	if err != nil {
   344  		// If we don't recognize the version number,
   345  		// assume this server doesn't support metadata.
   346  		return false
   347  	}
   348  	return version.GTE(k1dot15)
   349  }
   350  
   351  func (kCli *K8sClient) WatchMeta(ctx context.Context, gvk schema.GroupVersionKind, ns Namespace) (<-chan metav1.Object, error) {
   352  	mapping, err := kCli.forceDiscovery(ctx, gvk)
   353  	if err != nil {
   354  		return nil, errors.Wrap(err, "WatchMeta")
   355  	}
   356  	gvr := mapping.Resource
   357  
   358  	version, err := kCli.discovery.ServerVersion()
   359  	if err != nil {
   360  		return nil, errors.Wrap(err, "WatchMeta")
   361  	}
   362  
   363  	if supportsPartialMetadata(version) {
   364  		return kCli.watchMeta15Plus(ctx, gvr, ns)
   365  	}
   366  	return kCli.watchMeta14Minus(ctx, gvr, ns)
   367  }
   368  
   369  // workaround a bug in client-go
   370  // https://github.com/kubernetes/client-go/issues/882
   371  func (kCli *K8sClient) watchMeta14Minus(ctx context.Context, gvr schema.GroupVersionResource, ns Namespace) (<-chan metav1.Object, error) {
   372  	factory := informers.NewSharedInformerFactoryWithOptions(kCli.clientset, resyncPeriod, informers.WithNamespace(ns.String()))
   373  	resFactory, err := factory.ForResource(gvr)
   374  	if err != nil {
   375  		return nil, errors.Wrap(err, "watchMeta")
   376  	}
   377  	informer := resFactory.Informer()
   378  	ch := make(chan metav1.Object)
   379  	_, err = informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
   380  		AddFunc: func(obj interface{}) {
   381  			mObj, ok := obj.(runtime.Object)
   382  			if !ok {
   383  				return
   384  			}
   385  
   386  			entity := NewK8sEntity(mObj)
   387  			ch <- entity.Meta()
   388  		},
   389  		UpdateFunc: func(oldObj interface{}, newObj interface{}) {
   390  			mNewObj, ok := newObj.(runtime.Object)
   391  			if !ok {
   392  				return
   393  			}
   394  
   395  			entity := NewK8sEntity(mNewObj)
   396  			ch <- entity.Meta()
   397  		},
   398  	})
   399  	if err != nil {
   400  		return nil, errors.Wrap(err, "WatchMeta")
   401  	}
   402  
   403  	go runInformer(ctx, fmt.Sprintf("%s-metadata", gvr.Resource), informer)
   404  
   405  	return ch, nil
   406  }
   407  
   408  func (kCli *K8sClient) watchMeta15Plus(ctx context.Context, gvr schema.GroupVersionResource, ns Namespace) (<-chan metav1.Object, error) {
   409  	factory := metadatainformer.NewFilteredSharedInformerFactory(kCli.metadata, resyncPeriod, ns.String(), func(*metav1.ListOptions) {})
   410  	informer := factory.ForResource(gvr).Informer()
   411  
   412  	ch := make(chan metav1.Object)
   413  	_, err := informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
   414  		AddFunc: func(obj interface{}) {
   415  			mObj, ok := obj.(*metav1.PartialObjectMetadata)
   416  			if ok {
   417  				ch <- &mObj.ObjectMeta
   418  			}
   419  		},
   420  		UpdateFunc: func(oldObj interface{}, newObj interface{}) {
   421  			mNewObj, ok := newObj.(*metav1.PartialObjectMetadata)
   422  			if ok {
   423  				ch <- &mNewObj.ObjectMeta
   424  			}
   425  		},
   426  	})
   427  	if err != nil {
   428  		return nil, errors.Wrap(err, "WatchMeta")
   429  	}
   430  
   431  	go runInformer(ctx, fmt.Sprintf("%s-metadata", gvr.Resource), informer)
   432  
   433  	return ch, nil
   434  }
   435  
   436  func runInformer(ctx context.Context, name string, informer cache.SharedInformer) {
   437  	originalDuration := 3 * time.Second
   438  	originalBackoff := wait.Backoff{
   439  		Steps:    1000,
   440  		Duration: originalDuration,
   441  		Factor:   3.0,
   442  		Jitter:   0.5,
   443  		Cap:      time.Hour,
   444  	}
   445  	backoff := originalBackoff
   446  	lastErrorHandlerFinish := time.Time{}
   447  	_ = informer.SetWatchErrorHandler(func(r *cache.Reflector, err error) {
   448  		sleepTime := originalDuration
   449  		if time.Since(lastErrorHandlerFinish) < time.Second {
   450  			sleepTime = backoff.Step()
   451  			logger.Get(ctx).Warnf("Pausing k8s %s watcher for %s: %v",
   452  				name,
   453  				sleepTime.Truncate(time.Second),
   454  				err)
   455  		} else {
   456  			backoff = originalBackoff
   457  		}
   458  
   459  		select {
   460  		case <-ctx.Done():
   461  		case <-time.After(sleepTime):
   462  		}
   463  		lastErrorHandlerFinish = time.Now()
   464  	})
   465  	informer.Run(ctx.Done())
   466  }