github.com/tilt-dev/tilt@v0.33.15-0.20240515162809-0a22ed45d8a0/internal/controllers/core/kubernetesdiscovery/reconciler.go (about) 1 package kubernetesdiscovery 2 3 import ( 4 "context" 5 "fmt" 6 "sync" 7 "time" 8 9 "github.com/pkg/errors" 10 v1 "k8s.io/api/core/v1" 11 apierrors "k8s.io/apimachinery/pkg/api/errors" 12 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 13 "k8s.io/apimachinery/pkg/labels" 14 "k8s.io/apimachinery/pkg/runtime" 15 "k8s.io/apimachinery/pkg/types" 16 errorutil "k8s.io/apimachinery/pkg/util/errors" 17 ctrl "sigs.k8s.io/controller-runtime" 18 "sigs.k8s.io/controller-runtime/pkg/builder" 19 ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" 20 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 21 "sigs.k8s.io/controller-runtime/pkg/handler" 22 "sigs.k8s.io/controller-runtime/pkg/reconcile" 23 24 "github.com/tilt-dev/tilt/internal/controllers/apicmp" 25 "github.com/tilt-dev/tilt/internal/controllers/apis/cluster" 26 "github.com/tilt-dev/tilt/internal/controllers/indexer" 27 "github.com/tilt-dev/tilt/internal/k8s" 28 "github.com/tilt-dev/tilt/internal/store" 29 "github.com/tilt-dev/tilt/internal/store/k8sconv" 30 "github.com/tilt-dev/tilt/internal/store/kubernetesdiscoverys" 31 "github.com/tilt-dev/tilt/pkg/apis" 32 "github.com/tilt-dev/tilt/pkg/apis/core/v1alpha1" 33 "github.com/tilt-dev/tilt/pkg/logger" 34 "github.com/tilt-dev/tilt/pkg/model" 35 ) 36 37 var ( 38 apiGVStr = v1alpha1.SchemeGroupVersion.String() 39 apiKind = "KubernetesDiscovery" 40 apiType = metav1.TypeMeta{Kind: apiKind, APIVersion: apiGVStr} 41 clusterGVK = v1alpha1.SchemeGroupVersion.WithKind("Cluster") 42 ) 43 44 type namespaceSet map[string]bool 45 type watcherSet map[watcherID]bool 46 47 // watcherID is to disambiguate between K8s object keys and tilt-apiserver KubernetesDiscovery object keys. 48 type watcherID types.NamespacedName 49 50 func (w watcherID) String() string { 51 return types.NamespacedName(w).String() 52 } 53 54 type Reconciler struct { 55 clients *cluster.ClientManager 56 st store.Dispatcher 57 indexer *indexer.Indexer 58 ctrlClient ctrlclient.Client 59 requeuer *indexer.Requeuer 60 61 // restartDetector compares a previous version of status with the latest and emits log events 62 // for any containers on the pod that restarted. 63 restartDetector *ContainerRestartDetector 64 65 // mu should be held throughout OnChange; helper methods used by it expect it to be held. 66 // Any helper methods for the dispatch loop should claim the lock as needed. 67 mu sync.Mutex 68 69 // watchedNamespaces tracks the namespaces that are being observed for Pod events. 70 // 71 // For efficiency, a single watch is created for a given namespace and keys of watchers 72 // are tracked; once there are no more watchers, cleanupAbandonedNamespaces will cancel 73 // the watch. 74 watchedNamespaces map[nsKey]nsWatch 75 76 // watchers reflects the current state of the Reconciler namespace + UID watches. 77 // 78 // On reconcile, if the latest spec differs from what's tracked here, it will be acted upon. 79 watchers map[watcherID]watcher 80 81 // uidWatchers are the KubernetesDiscovery objects that have a watch ref for a particular K8s UID, 82 // and so will receive events for changes to it (in addition to specs that match based on Pod labels). 83 uidWatchers map[uidKey]watcherSet 84 85 // knownDescendentPodUIDs maps the UID of Kubernetes resources to the UIDs of 86 // all pods that they own (transitively). 87 // 88 // For example, a Deployment UID might contain a set of N pod UIDs. 89 knownDescendentPodUIDs map[uidKey]k8s.UIDSet 90 91 // knownPods is an index of all the known pods and associated Tilt-derived metadata, by UID. 92 knownPods map[uidKey]*v1.Pod 93 knownPodOwnerCreation map[uidKey]metav1.Time 94 95 // deletedPods is an index of pods that have been deleted from the cluster, 96 // but are preserved for their termination status. 97 // 98 // Newer versions of Kubernetes have added 'ttl' fields that delete pods 99 // after they terminate. We want tilt to hang onto these pods, even 100 // if they're deleted from the cluster. 101 // 102 // If a Pod is in gcPods it MUST exist in known pods. 103 deletedPods map[uidKey]bool 104 } 105 106 func (w *Reconciler) CreateBuilder(mgr ctrl.Manager) (*builder.Builder, error) { 107 b := ctrl.NewControllerManagedBy(mgr). 108 For(&v1alpha1.KubernetesDiscovery{}). 109 Owns(&v1alpha1.PodLogStream{}). 110 Owns(&v1alpha1.PortForward{}). 111 Watches(&v1alpha1.Cluster{}, 112 handler.EnqueueRequestsFromMapFunc(w.indexer.Enqueue)). 113 WatchesRawSource(w.requeuer) 114 return b, nil 115 } 116 117 func NewReconciler(ctrlClient ctrlclient.Client, scheme *runtime.Scheme, clients cluster.ClientProvider, restartDetector *ContainerRestartDetector, 118 st store.RStore) *Reconciler { 119 return &Reconciler{ 120 ctrlClient: ctrlClient, 121 clients: cluster.NewClientManager(clients), 122 restartDetector: restartDetector, 123 requeuer: indexer.NewRequeuer(), 124 st: st, 125 indexer: indexer.NewIndexer(scheme, indexKubernetesDiscovery), 126 watchedNamespaces: make(map[nsKey]nsWatch), 127 uidWatchers: make(map[uidKey]watcherSet), 128 watchers: make(map[watcherID]watcher), 129 knownDescendentPodUIDs: make(map[uidKey]k8s.UIDSet), 130 knownPods: make(map[uidKey]*v1.Pod), 131 knownPodOwnerCreation: make(map[uidKey]metav1.Time), 132 deletedPods: make(map[uidKey]bool), 133 } 134 } 135 136 type watcher struct { 137 // spec is the current version of the KubernetesDiscoverySpec being used for this watcher. 138 // 139 // It's used to simplify diffing logic and determine if action is needed. 140 spec v1alpha1.KubernetesDiscoverySpec 141 startTime time.Time 142 143 // extraSelectors are label selectors used to match pods that don't transitively match any known UID. 144 extraSelectors []labels.Selector 145 cluster clusterKey 146 errorReason string 147 } 148 149 // nsWatch tracks the watchers for the given namespace and allows the watch to be canceled. 150 type nsWatch struct { 151 watchers map[watcherID]bool 152 cancel context.CancelFunc 153 } 154 155 // Reconcile manages namespace watches for the modified KubernetesDiscovery object. 156 func (w *Reconciler) Reconcile(ctx context.Context, request reconcile.Request) (reconcile.Result, error) { 157 w.mu.Lock() 158 defer w.mu.Unlock() 159 160 key := watcherID(request.NamespacedName) 161 existing, hasExisting := w.watchers[key] 162 163 kd, err := w.getKubernetesDiscovery(ctx, key) 164 w.indexer.OnReconcile(request.NamespacedName, kd) 165 if err != nil { 166 return ctrl.Result{}, err 167 } 168 169 if kd == nil || !kd.ObjectMeta.DeletionTimestamp.IsZero() { 170 // spec was deleted - just clean up any watches and we're done 171 if hasExisting { 172 w.teardown(key) 173 w.cleanupAbandonedNamespaces() 174 } 175 176 if err := w.manageOwnedObjects(ctx, request.NamespacedName, nil); err != nil { 177 return ctrl.Result{}, err 178 } 179 180 w.st.Dispatch(kubernetesdiscoverys.NewKubernetesDiscoveryDeleteAction(request.NamespacedName.Name)) 181 return ctrl.Result{}, nil 182 } 183 184 ctx = store.MustObjectLogHandler(ctx, w.st, kd) 185 186 // The apiserver is the source of truth, and will ensure the engine state is up to date. 187 w.st.Dispatch(kubernetesdiscoverys.NewKubernetesDiscoveryUpsertAction(kd)) 188 189 cluster, err := w.getCluster(ctx, kd) 190 if err != nil { 191 return ctrl.Result{}, err 192 } 193 needsRefresh := w.clients.Refresh(kd, cluster) 194 195 if !hasExisting || needsRefresh || !apicmp.DeepEqual(existing.spec, kd.Spec) { 196 w.addOrReplace(ctx, key, kd, cluster) 197 } 198 199 kd, err = w.maybeUpdateObjectStatus(ctx, kd, key) 200 if err != nil { 201 return ctrl.Result{}, err 202 } 203 204 if err := w.manageOwnedObjects(ctx, request.NamespacedName, kd); err != nil { 205 return ctrl.Result{}, err 206 } 207 208 return ctrl.Result{}, nil 209 } 210 211 func (w *Reconciler) getCluster(ctx context.Context, kd *v1alpha1.KubernetesDiscovery) (*v1alpha1.Cluster, error) { 212 if kd.Spec.Cluster == "" { 213 return nil, errors.New("cluster name is empty") 214 } 215 216 clusterNN := types.NamespacedName{Namespace: kd.Namespace, Name: kd.Spec.Cluster} 217 var cluster v1alpha1.Cluster 218 err := w.ctrlClient.Get(ctx, clusterNN, &cluster) 219 if err != nil { 220 return nil, err 221 } 222 return &cluster, nil 223 } 224 225 // getKubernetesDiscovery returns the KubernetesDiscovery object for the given key. 226 // 227 // If the API returns NotFound, nil will be returned for both the KubernetesDiscovery object AND error to simplify 228 // error-handling for callers. All other errors will result in an a wrapped error being passed along. 229 func (w *Reconciler) getKubernetesDiscovery(ctx context.Context, key watcherID) (*v1alpha1.KubernetesDiscovery, error) { 230 nn := types.NamespacedName(key) 231 var kd v1alpha1.KubernetesDiscovery 232 if err := w.ctrlClient.Get(ctx, nn, &kd); err != nil { 233 if apierrors.IsNotFound(err) { 234 return nil, nil 235 } 236 return nil, fmt.Errorf("failed to get KubernetesDiscovery status for %q: %w", key, err) 237 } 238 return &kd, nil 239 } 240 241 func (w *Reconciler) addOrReplace(ctx context.Context, watcherKey watcherID, kd *store.KubernetesDiscovery, cluster *v1alpha1.Cluster) { 242 if _, ok := w.watchers[watcherKey]; ok { 243 // if a watcher already exists, just tear it down and we'll set it up from scratch so that 244 // we don't have to diff a bunch of different pieces 245 w.teardown(watcherKey) 246 } 247 248 defer func() { 249 // ensure that any namespaces of which this was the last watcher have their watch stopped 250 w.cleanupAbandonedNamespaces() 251 }() 252 253 var extraSelectors []labels.Selector 254 for _, s := range kd.Spec.ExtraSelectors { 255 selector, err := metav1.LabelSelectorAsSelector(&s) 256 if err != nil { 257 w.watchers[watcherKey] = watcher{ 258 spec: *kd.Spec.DeepCopy(), 259 cluster: newClusterKey(cluster), 260 errorReason: fmt.Sprintf("invalid label selectors: %v", err), 261 } 262 return 263 } 264 extraSelectors = append(extraSelectors, selector) 265 } 266 267 newWatcher := watcher{ 268 spec: *kd.Spec.DeepCopy(), 269 extraSelectors: extraSelectors, 270 cluster: newClusterKey(cluster), 271 } 272 273 kCli, err := w.clients.GetK8sClient(kd, cluster) 274 if err != nil { 275 newWatcher.errorReason = "ClusterUnavailable" 276 } else { 277 currentNamespaces, currentUIDs := namespacesAndUIDsFromSpec(kd.Spec.Watches) 278 for namespace := range currentNamespaces { 279 nsKey := newNsKey(cluster, namespace) 280 err := w.setupNamespaceWatch(ctx, nsKey, watcherKey, kCli) 281 if err != nil { 282 newWatcher.errorReason = err.Error() 283 break 284 } 285 } 286 287 if newWatcher.errorReason == "" { 288 for watchUID := range currentUIDs { 289 w.setupUIDWatch(ctx, newUIDKey(cluster, watchUID), watcherKey) 290 } 291 292 newWatcher.startTime = time.Now() 293 } 294 } 295 296 w.watchers[watcherKey] = newWatcher 297 } 298 299 // teardown removes the watcher from all namespace + UIDs it was watching. 300 // 301 // By design, teardown does NOT clean up any watches for namespaces that no longer have any active watchers. 302 // This is done by calling cleanupAbandonedNamespaces explicitly, which allows addOrReplace to have simpler logic 303 // by always calling teardown on a resource, then treating it as "new" and only cleaning up after it has (re-)added 304 // the watches without needlessly removing + recreating the lower-level namespace watch. 305 func (w *Reconciler) teardown(watcherKey watcherID) { 306 watcher := w.watchers[watcherKey] 307 namespaces, uids := namespacesAndUIDsFromSpec(watcher.spec.Watches) 308 for nsKey, nsWatch := range w.watchedNamespaces { 309 if namespaces[nsKey.namespace] { 310 delete(nsWatch.watchers, watcherKey) 311 } 312 } 313 314 for uidKey, watchers := range w.uidWatchers { 315 if uids[uidKey.uid] { 316 delete(watchers, watcherKey) 317 } 318 } 319 320 delete(w.watchers, watcherKey) 321 } 322 323 // cleanupAbandonedNamespaces removes the watch on any namespaces that no longer have any active watchers. 324 // 325 // mu must be held by caller. 326 // 327 // See watchedNamespaces for more details (for efficiency, we don't want duplicative namespace watches). 328 func (w *Reconciler) cleanupAbandonedNamespaces() { 329 for nsKey, watcher := range w.watchedNamespaces { 330 if len(watcher.watchers) == 0 { 331 watcher.cancel() 332 delete(w.watchedNamespaces, nsKey) 333 } 334 } 335 } 336 337 // setupNamespaceWatch creates a namespace watch if necessary and adds a key to the list of watchers for it. 338 // 339 // mu must be held by caller. 340 // 341 // It is idempotent: 342 // - If no watch for the namespace exists, it is created and the given key is the sole watcher 343 // - If a watch for the namespace exists but the given key is not in the watcher list, it is added 344 // - If a watch for the namespace exists and the given key is already in the watcher list, it no-ops 345 // 346 // This ensures it can be safely called by reconcile on each invocation for any namespace that the watcher cares about. 347 // Additionally, for efficiency, duplicative watches on the same namespace will not be created; see watchedNamespaces 348 // for more details. 349 func (w *Reconciler) setupNamespaceWatch(ctx context.Context, nsKey nsKey, watcherKey watcherID, kCli k8s.Client) error { 350 if watcher, ok := w.watchedNamespaces[nsKey]; ok { 351 // already watching this namespace -- just add this watcher to the list for cleanup tracking 352 watcher.watchers[watcherKey] = true 353 return nil 354 } 355 356 ns := nsKey.namespace 357 ch, err := kCli.WatchPods(ctx, k8s.Namespace(ns)) 358 if err != nil { 359 return errors.Wrapf(err, "Error watching pods. Are you connected to kubernetes?\nTry running `kubectl get pods -n %q`", ns) 360 } 361 362 ctx, cancel := context.WithCancel(ctx) 363 w.watchedNamespaces[nsKey] = nsWatch{ 364 watchers: map[watcherID]bool{watcherKey: true}, 365 cancel: cancel, 366 } 367 368 go w.dispatchPodChangesLoop(ctx, nsKey, kCli.OwnerFetcher(), ch) 369 return nil 370 } 371 372 // setupUIDWatch registers a watcher to receive updates for any Pods transitively owned by this UID (or that exactly 373 // match this UID). 374 // 375 // mu must be held by caller. 376 func (w *Reconciler) setupUIDWatch(_ context.Context, uidKey uidKey, watcherID watcherID) { 377 if w.uidWatchers[uidKey][watcherID] { 378 return 379 } 380 381 // add this key as a watcher for the UID 382 uidWatchers, ok := w.uidWatchers[uidKey] 383 if !ok { 384 uidWatchers = make(watcherSet) 385 w.uidWatchers[uidKey] = uidWatchers 386 } 387 uidWatchers[watcherID] = true 388 } 389 390 // updateStatus builds the latest status for the given KubernetesDiscovery spec 391 // key and persists it. Should only be called in the main reconciler thread. 392 // 393 // If the status has not changed since the last status update performed (by the 394 // Reconciler), it will be skipped. 395 // 396 // Returns the latest object on success. 397 func (w *Reconciler) maybeUpdateObjectStatus(ctx context.Context, kd *v1alpha1.KubernetesDiscovery, watcherID watcherID) (*v1alpha1.KubernetesDiscovery, error) { 398 watcher := w.watchers[watcherID] 399 status := w.buildStatus(ctx, watcher) 400 if apicmp.DeepEqual(kd.Status, status) { 401 // the status hasn't changed - avoid a spurious update 402 return kd, nil 403 } 404 405 oldStatus := kd.Status 406 oldError := w.statusError(kd.Status) 407 408 update := kd.DeepCopy() 409 update.Status = status 410 err := w.ctrlClient.Status().Update(ctx, update) 411 if err != nil { 412 return nil, err 413 } 414 415 newError := w.statusError(update.Status) 416 if newError != "" && oldError != newError { 417 logger.Get(ctx).Errorf("kubernetesdiscovery %s: %s", update.Name, newError) 418 } 419 420 w.restartDetector.Detect(w.st, oldStatus, update) 421 return update, nil 422 } 423 424 func (w *Reconciler) statusError(status v1alpha1.KubernetesDiscoveryStatus) string { 425 if status.Waiting != nil { 426 return status.Waiting.Reason 427 } 428 return "" 429 } 430 431 // buildStatus creates the current state for the given KubernetesDiscovery object key. 432 // 433 // mu must be held by caller. 434 func (w *Reconciler) buildStatus(ctx context.Context, watcher watcher) v1alpha1.KubernetesDiscoveryStatus { 435 if watcher.errorReason != "" { 436 return v1alpha1.KubernetesDiscoveryStatus{ 437 Waiting: &v1alpha1.KubernetesDiscoveryStateWaiting{ 438 Reason: watcher.errorReason, 439 }, 440 } 441 } 442 443 seenPodUIDs := k8s.NewUIDSet() 444 var pods []v1alpha1.Pod 445 maybeTrackPod := func(pod *v1.Pod, ancestorUID types.UID) { 446 if pod == nil || seenPodUIDs.Contains(pod.UID) { 447 return 448 } 449 seenPodUIDs.Add(pod.UID) 450 podObj := *k8sconv.Pod(ctx, pod, ancestorUID) 451 if podObj.Owner != nil { 452 podKey := uidKey{cluster: watcher.cluster, uid: pod.UID} 453 podObj.Owner.CreationTimestamp = w.knownPodOwnerCreation[podKey] 454 } 455 pods = append(pods, podObj) 456 } 457 458 for i := range watcher.spec.Watches { 459 watchUID := types.UID(watcher.spec.Watches[i].UID) 460 if watchUID == "" || seenPodUIDs.Contains(watchUID) { 461 continue 462 } 463 // UID could either refer directly to a Pod OR its ancestor (e.g. Deployment) 464 watchedObjKey := uidKey{cluster: watcher.cluster, uid: watchUID} 465 maybeTrackPod(w.knownPods[watchedObjKey], watchUID) 466 for podUID := range w.knownDescendentPodUIDs[watchedObjKey] { 467 podKey := uidKey{cluster: watcher.cluster, uid: podUID} 468 maybeTrackPod(w.knownPods[podKey], watchUID) 469 } 470 } 471 472 // TODO(milas): we should only match against Pods in namespaces referenced by the WatchRefs for this spec 473 if len(watcher.spec.ExtraSelectors) != 0 { 474 for podKey, pod := range w.knownPods { 475 if podKey.cluster != watcher.cluster || seenPodUIDs.Contains(podKey.uid) { 476 // ignore pods that are for other clusters or that we've already seen 477 continue 478 } 479 podLabels := labels.Set(pod.Labels) 480 for _, selector := range watcher.extraSelectors { 481 if selector.Matches(podLabels) { 482 maybeTrackPod(pod, "") 483 break 484 } 485 } 486 } 487 } 488 489 pods = w.maybeLetGoOfDeletedPods(pods, watcher.cluster) 490 491 startTime := apis.NewMicroTime(watcher.startTime) 492 return v1alpha1.KubernetesDiscoveryStatus{ 493 MonitorStartTime: startTime, 494 Pods: pods, 495 Running: &v1alpha1.KubernetesDiscoveryStateRunning{ 496 StartTime: startTime, 497 }, 498 } 499 } 500 501 // If a pod was deleted from the cluster, check to make sure if we 502 // should delete it from our local store. 503 func (w *Reconciler) maybeLetGoOfDeletedPods(pods []v1alpha1.Pod, clusterKey clusterKey) []v1alpha1.Pod { 504 allDeleted := true 505 someDeleted := false 506 for _, pod := range pods { 507 key := uidKey{cluster: clusterKey, uid: types.UID(pod.UID)} 508 isDeleted := w.deletedPods[key] 509 if isDeleted { 510 someDeleted = true 511 } else { 512 allDeleted = false 513 } 514 } 515 516 if allDeleted || !someDeleted { 517 return pods 518 } 519 520 result := make([]v1alpha1.Pod, 0, len(pods)) 521 for _, pod := range pods { 522 key := uidKey{cluster: clusterKey, uid: types.UID(pod.UID)} 523 isDeleted := w.deletedPods[key] 524 if isDeleted { 525 delete(w.knownPods, key) 526 delete(w.knownPodOwnerCreation, key) 527 delete(w.deletedPods, key) 528 } else { 529 result = append(result, pod) 530 } 531 } 532 return result 533 } 534 535 func (w *Reconciler) upsertPod(cluster clusterKey, pod *v1.Pod) { 536 w.mu.Lock() 537 defer w.mu.Unlock() 538 podKey := uidKey{cluster: cluster, uid: pod.UID} 539 w.knownPods[podKey] = pod 540 } 541 542 // triageResult is a KubernetesDiscovery key and the UID (if any) of the watch ref that matched the Pod event. 543 type triageResult struct { 544 watcherID watcherID 545 ancestorUID types.UID 546 } 547 548 // triagePodTree checks to see if this Pod corresponds to any of the KubernetesDiscovery objects. 549 // 550 // Currently, we do this by comparing the Pod UID and its owner UIDs against watched UIDs from 551 // KubernetesDiscovery specs. More than one KubernetesDiscovery object can watch the same UID 552 // and each will receive an event. (Note that currently, ManifestSubscriber uniquely assigns 553 // UIDs to prevent more than one manifest from watching the same UID, but at an API level, it's 554 // possible.) 555 // 556 // Additionally, the Pod's labels will be evaluated against any extra selectors from the specs 557 // and reporting for any specs that it matches. (If a watcher already matched explicitly via 558 // transitive UID ownership, it will not be evaluated for label match.) 559 // 560 // Even if the Pod doesn't match any KubernetesDiscovery spec, it's still kept in local state, 561 // so we can match it later if a KubernetesDiscovery spec is modified to match it; this is actually 562 // extremely common because new Pods are typically observed by Reconciler _before_ the respective 563 // KubernetesDiscovery spec update propagates. 564 func (w *Reconciler) triagePodTree(nsKey nsKey, pod *v1.Pod, objTree k8s.ObjectRefTree) []triageResult { 565 podUID := pod.UID 566 if len(objTree.Owners) > 0 { 567 podKey := uidKey{cluster: nsKey.cluster, uid: podUID} 568 w.knownPodOwnerCreation[podKey] = objTree.Owners[0].CreationTimestamp 569 } 570 571 // Set up the descendent pod UID index 572 for _, ownerUID := range objTree.UIDs() { 573 if podUID == ownerUID { 574 continue 575 } 576 577 ownerKey := uidKey{cluster: nsKey.cluster, uid: ownerUID} 578 set, ok := w.knownDescendentPodUIDs[ownerKey] 579 if !ok { 580 set = k8s.NewUIDSet() 581 w.knownDescendentPodUIDs[ownerKey] = set 582 } 583 set.Add(podUID) 584 } 585 586 seenWatchers := make(map[watcherID]bool) 587 var results []triageResult 588 589 // Find any watchers that have a ref to a UID in the object tree (i.e. the Pod itself or a transitive owner) 590 for _, ownerUID := range objTree.UIDs() { 591 ownerKey := uidKey{cluster: nsKey.cluster, uid: ownerUID} 592 for watcherID := range w.uidWatchers[ownerKey] { 593 if seenWatchers[watcherID] { 594 // in practice, it's not really logical that a watcher would have more than one part of the 595 // object tree watched, but since we already need to track seen watchers to skip duplicative 596 // label matches, we might as well avoid it from becoming an issue 597 // (also, if it does happen - the object tree should have consistent iteration order so a Pod 598 // will always match on a consistent ancestor UID, which avoids a spurious updates) 599 continue 600 } 601 seenWatchers[watcherID] = true 602 results = append(results, triageResult{watcherID: watcherID, ancestorUID: ownerUID}) 603 } 604 } 605 606 // NOTE(nick): This code might be totally obsolete now that we triage 607 // pods by owner UID. It's meant to handle CRDs, but most CRDs should 608 // set owner reference appropriately. 609 podLabels := labels.Set(pod.ObjectMeta.GetLabels()) 610 for key, watcher := range w.watchers { 611 if seenWatchers[key] { 612 continue 613 } 614 for _, selector := range watcher.extraSelectors { 615 if selector.Matches(podLabels) { 616 seenWatchers[key] = true 617 // there is no ancestorUID since this was a label match 618 results = append(results, triageResult{watcherID: key, ancestorUID: ""}) 619 break 620 } 621 } 622 } 623 624 return results 625 } 626 627 func (w *Reconciler) handlePodChange(ctx context.Context, nsKey nsKey, ownerFetcher k8s.OwnerFetcher, pod *v1.Pod) { 628 objTree, err := ownerFetcher.OwnerTreeOf(ctx, k8s.NewK8sEntity(pod)) 629 if err != nil { 630 // In locked-down clusters, the user may not have access to certain types of resources 631 // so it's normal for there to be errors. Ignore them. 632 return 633 } 634 635 w.mu.Lock() 636 defer w.mu.Unlock() 637 638 triageResults := w.triagePodTree(nsKey, pod, objTree) 639 for i := range triageResults { 640 watcherID := triageResults[i].watcherID 641 w.requeuer.Add(types.NamespacedName(watcherID)) 642 } 643 } 644 645 func (w *Reconciler) handlePodDelete(namespace k8s.Namespace, name string) { 646 w.mu.Lock() 647 defer w.mu.Unlock() 648 649 var matchedPodKey uidKey 650 var matchedPod *v1.Pod 651 for podKey, pod := range w.knownPods { 652 if pod.Namespace == namespace.String() && pod.Name == name { 653 matchedPodKey = podKey 654 matchedPod = pod 655 break 656 } 657 } 658 659 if matchedPodKey.uid == "" { 660 // this pod wasn't known/tracked 661 return 662 } 663 664 // If the pod is in a completed state when it was deleted, we may still needs 665 // its status. Hold onto it until we have more pods. 666 phase := matchedPod.Status.Phase 667 isCompleted := phase == v1.PodSucceeded || phase == v1.PodFailed 668 if isCompleted { 669 w.deletedPods[matchedPodKey] = true 670 } else { 671 delete(w.knownPods, matchedPodKey) 672 delete(w.knownPodOwnerCreation, matchedPodKey) 673 delete(w.deletedPods, matchedPodKey) 674 } 675 676 // because we don't know if any watchers matched on this Pod by label previously, 677 // trigger an update on every watcher for the Pod's cluster, which will return 678 // early if it didn't change 679 for watcherID, watcher := range w.watchers { 680 if watcher.cluster != matchedPodKey.cluster { 681 continue 682 } 683 684 w.requeuer.Add(types.NamespacedName(watcherID)) 685 } 686 } 687 688 func (w *Reconciler) manageOwnedObjects(ctx context.Context, nn types.NamespacedName, kd *v1alpha1.KubernetesDiscovery) error { 689 if err := w.manageOwnedPodLogStreams(ctx, nn, kd); err != nil { 690 return err 691 } 692 693 if err := w.manageOwnedPortForwards(ctx, nn, kd); err != nil { 694 return err 695 } 696 return nil 697 } 698 699 // Reconcile all the pod log streams owned by this KD. The KD may be nil if it's being deleted. 700 func (w *Reconciler) manageOwnedPodLogStreams(ctx context.Context, nn types.NamespacedName, kd *v1alpha1.KubernetesDiscovery) error { 701 var managedPodLogStreams v1alpha1.PodLogStreamList 702 err := indexer.ListOwnedBy(ctx, w.ctrlClient, &managedPodLogStreams, nn, apiType) 703 if err != nil { 704 return fmt.Errorf("failed to fetch managed PodLogStream objects for KubernetesDiscovery %s: %v", 705 nn.Name, err) 706 } 707 plsByPod := make(map[types.NamespacedName]v1alpha1.PodLogStream) 708 for _, pls := range managedPodLogStreams.Items { 709 plsByPod[types.NamespacedName{ 710 Namespace: pls.Spec.Namespace, 711 Name: pls.Spec.Pod, 712 }] = pls 713 } 714 715 var errs []error 716 seenPods := make(map[types.NamespacedName]bool) 717 if kd != nil { 718 for _, pod := range kd.Status.Pods { 719 podNN := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name} 720 seenPods[podNN] = true 721 if _, ok := plsByPod[podNN]; ok { 722 // if the PLS gets modified after being created, just leave it as-is 723 continue 724 } 725 726 if err := w.createPodLogStream(ctx, kd, pod); err != nil { 727 errs = append(errs, fmt.Errorf("failed to create PodLogStream for Pod %s:%s for KubernetesDiscovery %s: %v", 728 pod.Namespace, pod.Name, nn.Name, err)) 729 } 730 } 731 } 732 733 for podKey, pls := range plsByPod { 734 if !seenPods[podKey] { 735 if err := w.ctrlClient.Delete(ctx, &pls); ctrlclient.IgnoreNotFound(err) != nil { 736 errs = append(errs, fmt.Errorf("failed to delete PodLogStream %s for KubernetesDiscovery %s: %v", 737 pls.Name, nn.Name, err)) 738 } 739 } 740 } 741 742 return errorutil.NewAggregate(errs) 743 } 744 745 func (w *Reconciler) createPodLogStream(ctx context.Context, kd *v1alpha1.KubernetesDiscovery, pod v1alpha1.Pod) error { 746 plsKey := types.NamespacedName{ 747 Namespace: kd.Namespace, 748 Name: fmt.Sprintf("%s-%s-%s", kd.Name, pod.Namespace, pod.Name), 749 } 750 751 manifest := kd.Annotations[v1alpha1.AnnotationManifest] 752 spanID := string(k8sconv.SpanIDForPod(model.ManifestName(manifest), k8s.PodID(pod.Name))) 753 754 plsTemplate := kd.Spec.PodLogStreamTemplateSpec 755 756 // If there's no podlogtream template, create a default one. 757 if plsTemplate == nil { 758 plsTemplate = &v1alpha1.PodLogStreamTemplateSpec{} 759 } 760 761 // create PLS 762 pls := v1alpha1.PodLogStream{ 763 ObjectMeta: metav1.ObjectMeta{ 764 Name: plsKey.Name, 765 Namespace: kd.Namespace, 766 Annotations: map[string]string{ 767 v1alpha1.AnnotationManifest: manifest, 768 v1alpha1.AnnotationSpanID: spanID, 769 }, 770 }, 771 Spec: v1alpha1.PodLogStreamSpec{ 772 Pod: pod.Name, 773 Namespace: pod.Namespace, 774 SinceTime: plsTemplate.SinceTime, 775 IgnoreContainers: plsTemplate.IgnoreContainers, 776 OnlyContainers: plsTemplate.OnlyContainers, 777 }, 778 } 779 780 if err := controllerutil.SetControllerReference(kd, &pls, w.ctrlClient.Scheme()); err != nil { 781 return err 782 } 783 784 if err := w.ctrlClient.Create(ctx, &pls); err != nil { 785 if apierrors.IsAlreadyExists(err) { 786 return nil 787 } 788 return err 789 } 790 791 return nil 792 } 793 794 func (w *Reconciler) dispatchPodChangesLoop(ctx context.Context, nsKey nsKey, ownerFetcher k8s.OwnerFetcher, 795 ch <-chan k8s.ObjectUpdate) { 796 for { 797 select { 798 case obj, ok := <-ch: 799 if !ok { 800 return 801 } 802 803 pod, ok := obj.AsPod() 804 if ok { 805 w.upsertPod(nsKey.cluster, pod) 806 go w.handlePodChange(ctx, nsKey, ownerFetcher, pod) 807 continue 808 } 809 810 namespace, name, ok := obj.AsDeletedKey() 811 if ok { 812 go w.handlePodDelete(namespace, name) 813 continue 814 } 815 case <-ctx.Done(): 816 return 817 } 818 } 819 } 820 821 func namespacesAndUIDsFromSpec(watches []v1alpha1.KubernetesWatchRef) (namespaceSet, k8s.UIDSet) { 822 seenNamespaces := make(namespaceSet) 823 seenUIDs := k8s.NewUIDSet() 824 825 for i := range watches { 826 seenNamespaces[watches[i].Namespace] = true 827 uid := types.UID(watches[i].UID) 828 if uid != "" { 829 // a watch ref might not have a UID: 830 // * resources haven't been deployed yet 831 // * relies on extra label selectors from spec 832 seenUIDs.Add(uid) 833 } 834 } 835 836 return seenNamespaces, seenUIDs 837 } 838 839 // indexKubernetesDiscovery returns keys for all the objects we need to watch based on the spec. 840 func indexKubernetesDiscovery(obj ctrlclient.Object) []indexer.Key { 841 var result []indexer.Key 842 843 kd := obj.(*v1alpha1.KubernetesDiscovery) 844 if kd != nil && kd.Spec.Cluster != "" { 845 result = append(result, indexer.Key{ 846 Name: types.NamespacedName{ 847 Namespace: kd.Namespace, 848 Name: kd.Spec.Cluster, 849 }, 850 GVK: clusterGVK, 851 }) 852 } 853 854 return result 855 }