istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pilot/pkg/serviceregistry/kube/controller/pod.go (about) 1 // Copyright Istio Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package controller 16 17 import ( 18 "sync" 19 20 v1 "k8s.io/api/core/v1" 21 "k8s.io/apimachinery/pkg/types" 22 23 "istio.io/istio/pilot/pkg/model" 24 "istio.io/istio/pkg/config" 25 "istio.io/istio/pkg/config/constants" 26 "istio.io/istio/pkg/kube/kclient" 27 "istio.io/istio/pkg/maps" 28 "istio.io/istio/pkg/util/sets" 29 ) 30 31 // PodCache is an eventually consistent pod cache 32 type PodCache struct { 33 pods kclient.Client[*v1.Pod] 34 35 sync.RWMutex 36 // podsByIP maintains stable pod IP to name key mapping 37 // this allows us to retrieve the latest status by pod IP. 38 // This should only contain RUNNING or PENDING pods with an allocated IP. 39 podsByIP map[string]sets.Set[types.NamespacedName] 40 // IPByPods is a reverse map of podsByIP. This exists to allow us to prune stale entries in the 41 // pod cache if a pod changes IP. 42 IPByPods map[types.NamespacedName]string 43 44 // needResync is map of IP to endpoint namespace/name. This is used to requeue endpoint 45 // events when pod event comes. This typically happens when pod is not available 46 // in podCache when endpoint event comes. 47 needResync map[string]sets.Set[types.NamespacedName] 48 queueEndpointEvent func(types.NamespacedName) 49 50 c *Controller 51 } 52 53 func newPodCache(c *Controller, pods kclient.Client[*v1.Pod], queueEndpointEvent func(types.NamespacedName)) *PodCache { 54 out := &PodCache{ 55 pods: pods, 56 c: c, 57 podsByIP: make(map[string]sets.Set[types.NamespacedName]), 58 IPByPods: make(map[types.NamespacedName]string), 59 needResync: make(map[string]sets.Set[types.NamespacedName]), 60 queueEndpointEvent: queueEndpointEvent, 61 } 62 63 return out 64 } 65 66 // Copied from kubernetes/kubernetes/pkg/controller/util/endpoint/controller_utils.go 67 // 68 // shouldPodBeInEndpoints returns true if a specified pod should be in an 69 // Endpoints or EndpointSlice resource. Terminating pods are not included. 70 func shouldPodBeInEndpoints(pod *v1.Pod) bool { 71 // "Terminal" describes when a Pod is complete (in a succeeded or failed phase). 72 // This is distinct from the "Terminating" condition which represents when a Pod 73 // is being terminated (metadata.deletionTimestamp is non nil). 74 if isPodPhaseTerminal(pod.Status.Phase) { 75 return false 76 } 77 78 if len(pod.Status.PodIP) == 0 && len(pod.Status.PodIPs) == 0 { 79 return false 80 } 81 82 if pod.DeletionTimestamp != nil { 83 return false 84 } 85 86 return true 87 } 88 89 // isPodPhaseTerminal returns true if the pod's phase is terminal. 90 func isPodPhaseTerminal(phase v1.PodPhase) bool { 91 return phase == v1.PodFailed || phase == v1.PodSucceeded 92 } 93 94 func IsPodRunning(pod *v1.Pod) bool { 95 return pod.Status.Phase == v1.PodRunning 96 } 97 98 // IsPodReady is copied from kubernetes/pkg/api/v1/pod/utils.go 99 func IsPodReady(pod *v1.Pod) bool { 100 return IsPodReadyConditionTrue(pod.Status) 101 } 102 103 // IsPodReadyConditionTrue returns true if a pod is ready; false otherwise. 104 func IsPodReadyConditionTrue(status v1.PodStatus) bool { 105 condition := GetPodReadyCondition(status) 106 return condition != nil && condition.Status == v1.ConditionTrue 107 } 108 109 func GetPodReadyCondition(status v1.PodStatus) *v1.PodCondition { 110 _, condition := GetPodCondition(&status, v1.PodReady) 111 return condition 112 } 113 114 func GetPodCondition(status *v1.PodStatus, conditionType v1.PodConditionType) (int, *v1.PodCondition) { 115 if status == nil { 116 return -1, nil 117 } 118 return GetPodConditionFromList(status.Conditions, conditionType) 119 } 120 121 // GetPodConditionFromList extracts the provided condition from the given list of condition and 122 // returns the index of the condition and the condition. Returns -1 and nil if the condition is not present. 123 func GetPodConditionFromList(conditions []v1.PodCondition, conditionType v1.PodConditionType) (int, *v1.PodCondition) { 124 if conditions == nil { 125 return -1, nil 126 } 127 for i := range conditions { 128 if conditions[i].Type == conditionType { 129 return i, &conditions[i] 130 } 131 } 132 return -1, nil 133 } 134 135 func (pc *PodCache) labelFilter(old, cur *v1.Pod) bool { 136 // If labels/annotations updated, trigger proxy push 137 labelsChanged := !maps.Equal(old.Labels, cur.Labels) 138 // Annotations are only used in endpoints in one case, so just compare that one 139 relevantAnnotationsChanged := old.Annotations[constants.AmbientRedirection] != cur.Annotations[constants.AmbientRedirection] 140 changed := labelsChanged || relevantAnnotationsChanged 141 if cur.Status.PodIP != "" && changed { 142 pc.proxyUpdates(cur, true) 143 } 144 145 // always continue calling pc.onEvent 146 return false 147 } 148 149 // onEvent updates the IP-based index (pc.podsByIP). 150 func (pc *PodCache) onEvent(_, pod *v1.Pod, ev model.Event) error { 151 ip := pod.Status.PodIP 152 // PodIP will be empty when pod is just created, but before the IP is assigned 153 // via UpdateStatus. 154 if len(ip) == 0 { 155 return nil 156 } 157 158 key := config.NamespacedName(pod) 159 switch ev { 160 case model.EventAdd: 161 if shouldPodBeInEndpoints(pod) && IsPodReady(pod) { 162 pc.addPod(pod, ip, key) 163 } else { 164 return nil 165 } 166 case model.EventUpdate: 167 if !shouldPodBeInEndpoints(pod) || !IsPodReady(pod) { 168 // delete only if this pod was in the cache 169 if !pc.deleteIP(ip, key) { 170 return nil 171 } 172 ev = model.EventDelete 173 } else if shouldPodBeInEndpoints(pod) && IsPodReady(pod) { 174 pc.addPod(pod, ip, key) 175 } else { 176 return nil 177 } 178 case model.EventDelete: 179 // delete only if this pod was in the cache, 180 // in most case it has already been deleted in `UPDATE` with `DeletionTimestamp` set. 181 if !pc.deleteIP(ip, key) { 182 return nil 183 } 184 } 185 pc.notifyWorkloadHandlers(pod, ev) 186 return nil 187 } 188 189 // notifyWorkloadHandlers fire workloadInstance handlers for pod 190 func (pc *PodCache) notifyWorkloadHandlers(pod *v1.Pod, ev model.Event) { 191 // if no workload handler registered, skip building WorkloadInstance 192 if len(pc.c.handlers.GetWorkloadHandlers()) == 0 { 193 return 194 } 195 // fire instance handles for workload 196 ep := NewEndpointBuilder(pc.c, pod).buildIstioEndpoint(pod.Status.PodIP, 0, "", model.AlwaysDiscoverable, model.Healthy) 197 workloadInstance := &model.WorkloadInstance{ 198 Name: pod.Name, 199 Namespace: pod.Namespace, 200 Kind: model.PodKind, 201 Endpoint: ep, 202 PortMap: getPortMap(pod), 203 } 204 pc.c.handlers.NotifyWorkloadHandlers(workloadInstance, ev) 205 } 206 207 func getPortMap(pod *v1.Pod) map[string]uint32 { 208 pmap := map[string]uint32{} 209 for _, c := range pod.Spec.Containers { 210 for _, port := range c.Ports { 211 if port.Name == "" || port.Protocol != v1.ProtocolTCP { 212 continue 213 } 214 // First port wins, per Kubernetes (https://github.com/kubernetes/kubernetes/issues/54213) 215 if _, f := pmap[port.Name]; !f { 216 pmap[port.Name] = uint32(port.ContainerPort) 217 } 218 } 219 } 220 return pmap 221 } 222 223 // deleteIP returns true if the pod and ip are really deleted. 224 func (pc *PodCache) deleteIP(ip string, podKey types.NamespacedName) bool { 225 pc.Lock() 226 defer pc.Unlock() 227 if pc.podsByIP[ip].Contains(podKey) { 228 sets.DeleteCleanupLast(pc.podsByIP, ip, podKey) 229 delete(pc.IPByPods, podKey) 230 return true 231 } 232 return false 233 } 234 235 func (pc *PodCache) addPod(pod *v1.Pod, ip string, key types.NamespacedName) { 236 pc.Lock() 237 // if the pod has been cached, return 238 if pc.podsByIP[ip].Contains(key) { 239 pc.Unlock() 240 return 241 } 242 if current, f := pc.IPByPods[key]; f { 243 // The pod already exists, but with another IP Address. We need to clean up that 244 sets.DeleteCleanupLast(pc.podsByIP, current, key) 245 } 246 sets.InsertOrNew(pc.podsByIP, ip, key) 247 pc.IPByPods[key] = ip 248 249 if endpointsToUpdate, f := pc.needResync[ip]; f { 250 delete(pc.needResync, ip) 251 for epKey := range endpointsToUpdate { 252 pc.queueEndpointEvent(epKey) 253 } 254 endpointsPendingPodUpdate.Record(float64(len(pc.needResync))) 255 } 256 pc.Unlock() 257 258 const isPodUpdate = false 259 pc.proxyUpdates(pod, isPodUpdate) 260 } 261 262 // queueEndpointEventOnPodArrival registers this endpoint and queues endpoint event 263 // when the corresponding pod arrives. 264 func (pc *PodCache) queueEndpointEventOnPodArrival(key types.NamespacedName, ip string) { 265 pc.Lock() 266 defer pc.Unlock() 267 sets.InsertOrNew(pc.needResync, ip, key) 268 endpointsPendingPodUpdate.Record(float64(len(pc.needResync))) 269 } 270 271 // endpointDeleted cleans up endpoint from resync endpoint list. 272 func (pc *PodCache) endpointDeleted(key types.NamespacedName, ip string) { 273 pc.Lock() 274 defer pc.Unlock() 275 sets.DeleteCleanupLast(pc.needResync, ip, key) 276 endpointsPendingPodUpdate.Record(float64(len(pc.needResync))) 277 } 278 279 func (pc *PodCache) proxyUpdates(pod *v1.Pod, isPodUpdate bool) { 280 if pc.c != nil { 281 if pc.c.opts.XDSUpdater != nil { 282 ip := pod.Status.PodIP 283 pc.c.opts.XDSUpdater.ProxyUpdate(pc.c.Cluster(), ip) 284 } 285 if isPodUpdate { 286 // Recompute service(s) due to pod label change. 287 // If it is a new pod, no need to recompute, as it yet computed for the first time yet. 288 pc.c.recomputeServiceForPod(pod) 289 } 290 } 291 } 292 293 func (pc *PodCache) getPodKeys(addr string) []types.NamespacedName { 294 pc.RLock() 295 defer pc.RUnlock() 296 return pc.podsByIP[addr].UnsortedList() 297 } 298 299 // getPodByIp returns the pod or nil if pod not found or an error occurred 300 func (pc *PodCache) getPodsByIP(addr string) []*v1.Pod { 301 keys := pc.getPodKeys(addr) 302 if keys == nil { 303 return nil 304 } 305 res := make([]*v1.Pod, 0, len(keys)) 306 for _, key := range keys { 307 p := pc.getPodByKey(key) 308 // Subtle race condition. getPodKeys is our cache over pods, while getPodByKey hits the informer cache. 309 // if these are out of sync, p may be nil (pod was deleted). 310 if p != nil { 311 res = append(res, p) 312 } 313 } 314 return res 315 } 316 317 // getPodByKey returns the pod by key 318 func (pc *PodCache) getPodByKey(key types.NamespacedName) *v1.Pod { 319 return pc.pods.Get(key.Name, key.Namespace) 320 } 321 322 // getPodByKey returns the pod of the proxy 323 func (pc *PodCache) getPodByProxy(proxy *model.Proxy) *v1.Pod { 324 var pod *v1.Pod 325 key := podKeyByProxy(proxy) 326 if key.Name != "" { 327 pod = pc.getPodByKey(key) 328 if pod != nil { 329 return pod 330 } 331 } 332 333 // only need to fetch the corresponding pod through the first IP, although there are multiple IP scenarios, 334 // because multiple ips belong to the same pod 335 proxyIP := proxy.IPAddresses[0] 336 // just in case the proxy ID is bad formatted 337 pods := pc.getPodsByIP(proxyIP) 338 switch len(pods) { 339 case 0: 340 return nil 341 case 1: 342 return pods[0] 343 default: 344 // This should only happen with hostNetwork pods, which cannot be proxy clients... 345 log.Errorf("unexpected: found multiple pods for proxy %v (%v)", proxy.ID, proxyIP) 346 // Try to handle it gracefully 347 for _, p := range pods { 348 // At least filter out wrong namespaces... 349 if proxy.ConfigNamespace != p.Namespace { 350 continue 351 } 352 return p 353 } 354 return nil 355 } 356 }