istio.io/istio@v0.0.0-20240520182934-d79c90f27776/cni/pkg/nodeagent/informers.go (about) 1 // Copyright Istio Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package nodeagent 16 17 import ( 18 "context" 19 "fmt" 20 21 corev1 "k8s.io/api/core/v1" 22 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 23 klabels "k8s.io/apimachinery/pkg/labels" 24 25 "istio.io/istio/cni/pkg/util" 26 "istio.io/istio/pkg/config/constants" 27 "istio.io/istio/pkg/kube" 28 "istio.io/istio/pkg/kube/controllers" 29 "istio.io/istio/pkg/kube/kclient" 30 "istio.io/istio/pkg/monitoring" 31 ) 32 33 var ( 34 eventTypeTag = monitoring.CreateLabel("type") 35 EventTotals = monitoring.NewSum( 36 "nodeagent_reconcile_events_total", 37 "The total number of node agent reconcile events.", 38 ) 39 ) 40 41 type K8sHandlers interface { 42 GetPodIfAmbient(podName, podNamespace string) (*corev1.Pod, error) 43 GetAmbientPods() []*corev1.Pod 44 Start() 45 } 46 47 type InformerHandlers struct { 48 ctx context.Context 49 dataplane MeshDataplane 50 systemNamespace string 51 52 queue controllers.Queue 53 pods kclient.Client[*corev1.Pod] 54 namespaces kclient.Client[*corev1.Namespace] 55 } 56 57 func setupHandlers(ctx context.Context, kubeClient kube.Client, dataplane MeshDataplane, systemNamespace string) *InformerHandlers { 58 s := &InformerHandlers{ctx: ctx, dataplane: dataplane, systemNamespace: systemNamespace} 59 s.queue = controllers.NewQueue("ambient", 60 controllers.WithGenericReconciler(s.reconcile), 61 controllers.WithMaxAttempts(5), 62 ) 63 // We only need to handle pods on our node 64 s.pods = kclient.NewFiltered[*corev1.Pod](kubeClient, kclient.Filter{FieldSelector: "spec.nodeName=" + NodeName}) 65 s.pods.AddEventHandler(controllers.FromEventHandler(func(o controllers.Event) { 66 s.queue.Add(o) 67 })) 68 69 // Namespaces could be anything though, so we watch all of those 70 // 71 // NOTE that we are requeueing namespaces here explicitly to work around 72 // test flakes with the fake kube client in `pkg/kube/client.go` - 73 // because we are using `List()` in the handler, without this requeue, 74 // the fake client will sometimes drop pod events leading to test flakes. 75 // 76 // WaitForCacheSync *helps*, but does not entirely fix this problem 77 s.namespaces = kclient.New[*corev1.Namespace](kubeClient) 78 s.namespaces.AddEventHandler(controllers.FromEventHandler(func(o controllers.Event) { 79 s.queue.Add(o) 80 })) 81 82 return s 83 } 84 85 func (s *InformerHandlers) GetPodIfAmbient(podName, podNamespace string) (*corev1.Pod, error) { 86 ns := s.namespaces.Get(podNamespace, "") 87 if ns == nil { 88 return nil, fmt.Errorf("failed to find namespace %v", ns) 89 } 90 pod := s.pods.Get(podName, podNamespace) 91 if util.PodRedirectionEnabled(ns, pod) { 92 return pod, nil 93 } 94 return nil, nil 95 } 96 97 func (s *InformerHandlers) Start() { 98 kube.WaitForCacheSync("informer", s.ctx.Done(), s.pods.HasSynced, s.namespaces.HasSynced) 99 go s.queue.Run(s.ctx.Done()) 100 } 101 102 func (s *InformerHandlers) GetAmbientPods() []*corev1.Pod { 103 var pods []*corev1.Pod 104 for _, pod := range s.pods.List(metav1.NamespaceAll, klabels.Everything()) { 105 ns := s.namespaces.Get(pod.Namespace, "") 106 if ns == nil { 107 log.Warnf("failed to find namespace %s for pod %s", pod.Namespace, pod.Name) 108 } 109 110 if !util.IsZtunnelPod(s.systemNamespace, pod) && util.PodRedirectionEnabled(ns, pod) { 111 pods = append(pods, pod) 112 } 113 } 114 return pods 115 } 116 117 // EnqueueNamespace takes a Namespace and enqueues all Pod objects that make need an update 118 // TODO it is sort of pointless/confusing/implicit to populate Old and New with the same reference here 119 func (s *InformerHandlers) enqueueNamespace(o controllers.Object) { 120 namespace := o.GetName() 121 labels := o.GetLabels() 122 matchAmbient := labels[constants.DataplaneModeLabel] == constants.DataplaneModeAmbient 123 if matchAmbient { 124 log.Infof("Namespace %s is enabled in ambient mesh", namespace) 125 } else { 126 log.Infof("Namespace %s is disabled from ambient mesh", namespace) 127 } 128 for _, pod := range s.pods.List(namespace, klabels.Everything()) { 129 // ztunnel pods are never "added to/removed from the mesh", so do not fire 130 // spurious events for them to avoid triggering extra 131 // ztunnel node reconciliation checks. 132 if !util.IsZtunnelPod(s.systemNamespace, pod) { 133 log.Debugf("Enqueuing pod %s/%s", pod.Namespace, pod.Name) 134 s.queue.Add(controllers.Event{ 135 New: pod, 136 Old: pod, 137 Event: controllers.EventUpdate, 138 }) 139 } 140 } 141 } 142 143 func (s *InformerHandlers) reconcile(input any) error { 144 event := input.(controllers.Event) 145 switch event.Latest().(type) { 146 case *corev1.Namespace: 147 return s.reconcileNamespace(input) 148 case *corev1.Pod: 149 return s.reconcilePod(input) 150 default: 151 return fmt.Errorf("unexpected event type: %+v", input) 152 } 153 } 154 155 func (s *InformerHandlers) reconcileNamespace(input any) error { 156 event := input.(controllers.Event) 157 ns := event.Latest().(*corev1.Namespace) 158 159 switch event.Event { 160 case controllers.EventAdd: 161 log.Debugf("Namespace %s added", ns.Name) 162 s.enqueueNamespace(ns) 163 164 case controllers.EventUpdate: 165 newNs := event.New.(*corev1.Namespace) 166 oldNs := event.Old.(*corev1.Namespace) 167 168 if getModeLabel(oldNs.Labels) != getModeLabel(newNs.Labels) { 169 log.Debugf("Namespace %s updated", newNs.Name) 170 s.enqueueNamespace(newNs) 171 } 172 } 173 return nil 174 } 175 176 func getModeLabel(m map[string]string) string { 177 if m == nil { 178 return "" 179 } 180 return m[constants.DataplaneModeLabel] 181 } 182 183 func (s *InformerHandlers) reconcilePod(input any) error { 184 event := input.(controllers.Event) 185 pod := event.Latest().(*corev1.Pod) 186 187 defer EventTotals.With(eventTypeTag.Value(event.Event.String())).Increment() 188 189 switch event.Event { 190 case controllers.EventAdd: 191 // pod was added to our cache 192 // we get here in 2 cases: 193 // 1. new pod was created on our node 194 // 2. we were restarted and current existing pods are added to our cache 195 196 // We have no good way to distinguish between these two cases from here. But we don't need to! 197 // Existing pods will be handled by the dataplane using `GetAmbientPods`, 198 // and the initial enqueueNamespace, and new pods will be handled by the CNI. 199 200 case controllers.EventUpdate: 201 // For update, we just need to handle opt outs 202 newPod := event.New.(*corev1.Pod) 203 oldPod := event.Old.(*corev1.Pod) 204 ns := s.namespaces.Get(newPod.Namespace, "") 205 if ns == nil { 206 return fmt.Errorf("failed to find namespace %v", ns) 207 } 208 wasAnnotated := oldPod.Annotations != nil && oldPod.Annotations[constants.AmbientRedirection] == constants.AmbientRedirectionEnabled 209 isAnnotated := newPod.Annotations != nil && newPod.Annotations[constants.AmbientRedirection] == constants.AmbientRedirectionEnabled 210 shouldBeEnabled := util.PodRedirectionEnabled(ns, newPod) 211 212 // We should check the latest annotation vs desired status 213 changeNeeded := isAnnotated != shouldBeEnabled 214 215 log.Debugf("Pod %s events: wasAnnotated(%v), isAnnotated(%v), shouldBeEnabled(%v), changeNeeded(%v), oldPod(%+v), newPod(%+v)", 216 pod.Name, wasAnnotated, isAnnotated, shouldBeEnabled, changeNeeded, oldPod, newPod) 217 if !changeNeeded { 218 log.Debugf("Pod %s update event skipped, no change needed", pod.Name) 219 return nil 220 } 221 222 if !shouldBeEnabled { 223 log.Debugf("Pod %s no longer matches, removing from mesh", newPod.Name) 224 err := s.dataplane.RemovePodFromMesh(s.ctx, pod) 225 log.Debugf("RemovePodFromMesh(%s) returned %v", newPod.Name, err) 226 // we ignore errors here as we don't want this event to be retried by the queue. 227 } else { 228 // If oldpod != ready && newpod != ready, but the ambient annotation was added, 229 // then assume this event was generated by the CNI plugin labeling the pod on startup, 230 // and skip the event. 231 // 232 // This isn't perfect (someone could manually annotate an unready pod, 233 // then install Istio, then the pod goes ready, and we'd miss capture) - but that 234 // seems vanishingly unlikely 235 wasReady := kube.CheckPodReadyOrComplete(oldPod) 236 isReady := kube.CheckPodReadyOrComplete(newPod) 237 if wasReady != nil && isReady != nil && isAnnotated { 238 log.Infof("Pod %s update event skipped, added/labeled by CNI plugin", pod.Name) 239 return nil 240 } 241 242 log.Debugf("Pod %s now matches, adding to mesh", newPod.Name) 243 // netns == ""; at this point netns should have been added via the initial snapshot, 244 // or via the cni plugin. If it happens to get here before the cni plugin somehow, 245 // then we will just fail to add the pod to the mesh, and it will be retried later when cni plugin adds it. 246 247 // We need a pod IP - if the pod was added via the CNI plugin, that plugin told us the IPs 248 // for the pod. If this is a pod added via informer, the pod should have already gone thru 249 // the CNI plugin chain, and have a PodIP. 250 // 251 // If PodIPs exists, it is preferred, otherwise fallback to PodIP. 252 // 253 // If we get to this point and have a pod that really and truly has no IP in either of those, 254 // it's not routable at this point and something is wrong/we should discard this event. 255 podIPs := util.GetPodIPsIfPresent(pod) 256 if len(podIPs) == 0 { 257 log.Warnf("pod %s does not appear to have any assigned IPs, not capturing", pod.Name) 258 return nil 259 } 260 261 err := s.dataplane.AddPodToMesh(s.ctx, pod, podIPs, "") 262 log.Debugf("AddPodToMesh(%s) returned %v", newPod.Name, err) 263 } 264 case controllers.EventDelete: 265 // TODO: as every pod on our node will come through here, check if pod is annotated? 266 err := s.dataplane.DelPodFromMesh(s.ctx, pod) 267 log.Debugf("DelPodFromMesh(%s) returned %v", pod.Name, err) 268 } 269 return nil 270 }