istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pilot/pkg/serviceregistry/kube/controller/pod.go

istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pilot/pkg/serviceregistry/kube/controller/pod.go (about)

     1  // Copyright Istio Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package controller
    16  
    17  import (
    18  	"sync"
    19  
    20  	v1 "k8s.io/api/core/v1"
    21  	"k8s.io/apimachinery/pkg/types"
    22  
    23  	"istio.io/istio/pilot/pkg/model"
    24  	"istio.io/istio/pkg/config"
    25  	"istio.io/istio/pkg/config/constants"
    26  	"istio.io/istio/pkg/kube/kclient"
    27  	"istio.io/istio/pkg/maps"
    28  	"istio.io/istio/pkg/util/sets"
    29  )
    30  
    31  // PodCache is an eventually consistent pod cache
    32  type PodCache struct {
    33  	pods kclient.Client[*v1.Pod]
    34  
    35  	sync.RWMutex
    36  	// podsByIP maintains stable pod IP to name key mapping
    37  	// this allows us to retrieve the latest status by pod IP.
    38  	// This should only contain RUNNING or PENDING pods with an allocated IP.
    39  	podsByIP map[string]sets.Set[types.NamespacedName]
    40  	// IPByPods is a reverse map of podsByIP. This exists to allow us to prune stale entries in the
    41  	// pod cache if a pod changes IP.
    42  	IPByPods map[types.NamespacedName]string
    43  
    44  	// needResync is map of IP to endpoint namespace/name. This is used to requeue endpoint
    45  	// events when pod event comes. This typically happens when pod is not available
    46  	// in podCache when endpoint event comes.
    47  	needResync         map[string]sets.Set[types.NamespacedName]
    48  	queueEndpointEvent func(types.NamespacedName)
    49  
    50  	c *Controller
    51  }
    52  
    53  func newPodCache(c *Controller, pods kclient.Client[*v1.Pod], queueEndpointEvent func(types.NamespacedName)) *PodCache {
    54  	out := &PodCache{
    55  		pods:               pods,
    56  		c:                  c,
    57  		podsByIP:           make(map[string]sets.Set[types.NamespacedName]),
    58  		IPByPods:           make(map[types.NamespacedName]string),
    59  		needResync:         make(map[string]sets.Set[types.NamespacedName]),
    60  		queueEndpointEvent: queueEndpointEvent,
    61  	}
    62  
    63  	return out
    64  }
    65  
    66  // Copied from kubernetes/kubernetes/pkg/controller/util/endpoint/controller_utils.go
    67  //
    68  // shouldPodBeInEndpoints returns true if a specified pod should be in an
    69  // Endpoints or EndpointSlice resource. Terminating pods are not included.
    70  func shouldPodBeInEndpoints(pod *v1.Pod) bool {
    71  	// "Terminal" describes when a Pod is complete (in a succeeded or failed phase).
    72  	// This is distinct from the "Terminating" condition which represents when a Pod
    73  	// is being terminated (metadata.deletionTimestamp is non nil).
    74  	if isPodPhaseTerminal(pod.Status.Phase) {
    75  		return false
    76  	}
    77  
    78  	if len(pod.Status.PodIP) == 0 && len(pod.Status.PodIPs) == 0 {
    79  		return false
    80  	}
    81  
    82  	if pod.DeletionTimestamp != nil {
    83  		return false
    84  	}
    85  
    86  	return true
    87  }
    88  
    89  // isPodPhaseTerminal returns true if the pod's phase is terminal.
    90  func isPodPhaseTerminal(phase v1.PodPhase) bool {
    91  	return phase == v1.PodFailed || phase == v1.PodSucceeded
    92  }
    93  
    94  func IsPodRunning(pod *v1.Pod) bool {
    95  	return pod.Status.Phase == v1.PodRunning
    96  }
    97  
    98  // IsPodReady is copied from kubernetes/pkg/api/v1/pod/utils.go
    99  func IsPodReady(pod *v1.Pod) bool {
   100  	return IsPodReadyConditionTrue(pod.Status)
   101  }
   102  
   103  // IsPodReadyConditionTrue returns true if a pod is ready; false otherwise.
   104  func IsPodReadyConditionTrue(status v1.PodStatus) bool {
   105  	condition := GetPodReadyCondition(status)
   106  	return condition != nil && condition.Status == v1.ConditionTrue
   107  }
   108  
   109  func GetPodReadyCondition(status v1.PodStatus) *v1.PodCondition {
   110  	_, condition := GetPodCondition(&status, v1.PodReady)
   111  	return condition
   112  }
   113  
   114  func GetPodCondition(status *v1.PodStatus, conditionType v1.PodConditionType) (int, *v1.PodCondition) {
   115  	if status == nil {
   116  		return -1, nil
   117  	}
   118  	return GetPodConditionFromList(status.Conditions, conditionType)
   119  }
   120  
   121  // GetPodConditionFromList extracts the provided condition from the given list of condition and
   122  // returns the index of the condition and the condition. Returns -1 and nil if the condition is not present.
   123  func GetPodConditionFromList(conditions []v1.PodCondition, conditionType v1.PodConditionType) (int, *v1.PodCondition) {
   124  	if conditions == nil {
   125  		return -1, nil
   126  	}
   127  	for i := range conditions {
   128  		if conditions[i].Type == conditionType {
   129  			return i, &conditions[i]
   130  		}
   131  	}
   132  	return -1, nil
   133  }
   134  
   135  func (pc *PodCache) labelFilter(old, cur *v1.Pod) bool {
   136  	// If labels/annotations updated, trigger proxy push
   137  	labelsChanged := !maps.Equal(old.Labels, cur.Labels)
   138  	// Annotations are only used in endpoints in one case, so just compare that one
   139  	relevantAnnotationsChanged := old.Annotations[constants.AmbientRedirection] != cur.Annotations[constants.AmbientRedirection]
   140  	changed := labelsChanged || relevantAnnotationsChanged
   141  	if cur.Status.PodIP != "" && changed {
   142  		pc.proxyUpdates(cur, true)
   143  	}
   144  
   145  	// always continue calling pc.onEvent
   146  	return false
   147  }
   148  
   149  // onEvent updates the IP-based index (pc.podsByIP).
   150  func (pc *PodCache) onEvent(_, pod *v1.Pod, ev model.Event) error {
   151  	ip := pod.Status.PodIP
   152  	// PodIP will be empty when pod is just created, but before the IP is assigned
   153  	// via UpdateStatus.
   154  	if len(ip) == 0 {
   155  		return nil
   156  	}
   157  
   158  	key := config.NamespacedName(pod)
   159  	switch ev {
   160  	case model.EventAdd:
   161  		if shouldPodBeInEndpoints(pod) && IsPodReady(pod) {
   162  			pc.addPod(pod, ip, key)
   163  		} else {
   164  			return nil
   165  		}
   166  	case model.EventUpdate:
   167  		if !shouldPodBeInEndpoints(pod) || !IsPodReady(pod) {
   168  			// delete only if this pod was in the cache
   169  			if !pc.deleteIP(ip, key) {
   170  				return nil
   171  			}
   172  			ev = model.EventDelete
   173  		} else if shouldPodBeInEndpoints(pod) && IsPodReady(pod) {
   174  			pc.addPod(pod, ip, key)
   175  		} else {
   176  			return nil
   177  		}
   178  	case model.EventDelete:
   179  		// delete only if this pod was in the cache,
   180  		// in most case it has already been deleted in `UPDATE` with `DeletionTimestamp` set.
   181  		if !pc.deleteIP(ip, key) {
   182  			return nil
   183  		}
   184  	}
   185  	pc.notifyWorkloadHandlers(pod, ev)
   186  	return nil
   187  }
   188  
   189  // notifyWorkloadHandlers fire workloadInstance handlers for pod
   190  func (pc *PodCache) notifyWorkloadHandlers(pod *v1.Pod, ev model.Event) {
   191  	// if no workload handler registered, skip building WorkloadInstance
   192  	if len(pc.c.handlers.GetWorkloadHandlers()) == 0 {
   193  		return
   194  	}
   195  	// fire instance handles for workload
   196  	ep := NewEndpointBuilder(pc.c, pod).buildIstioEndpoint(pod.Status.PodIP, 0, "", model.AlwaysDiscoverable, model.Healthy)
   197  	workloadInstance := &model.WorkloadInstance{
   198  		Name:      pod.Name,
   199  		Namespace: pod.Namespace,
   200  		Kind:      model.PodKind,
   201  		Endpoint:  ep,
   202  		PortMap:   getPortMap(pod),
   203  	}
   204  	pc.c.handlers.NotifyWorkloadHandlers(workloadInstance, ev)
   205  }
   206  
   207  func getPortMap(pod *v1.Pod) map[string]uint32 {
   208  	pmap := map[string]uint32{}
   209  	for _, c := range pod.Spec.Containers {
   210  		for _, port := range c.Ports {
   211  			if port.Name == "" || port.Protocol != v1.ProtocolTCP {
   212  				continue
   213  			}
   214  			// First port wins, per Kubernetes (https://github.com/kubernetes/kubernetes/issues/54213)
   215  			if _, f := pmap[port.Name]; !f {
   216  				pmap[port.Name] = uint32(port.ContainerPort)
   217  			}
   218  		}
   219  	}
   220  	return pmap
   221  }
   222  
   223  // deleteIP returns true if the pod and ip are really deleted.
   224  func (pc *PodCache) deleteIP(ip string, podKey types.NamespacedName) bool {
   225  	pc.Lock()
   226  	defer pc.Unlock()
   227  	if pc.podsByIP[ip].Contains(podKey) {
   228  		sets.DeleteCleanupLast(pc.podsByIP, ip, podKey)
   229  		delete(pc.IPByPods, podKey)
   230  		return true
   231  	}
   232  	return false
   233  }
   234  
   235  func (pc *PodCache) addPod(pod *v1.Pod, ip string, key types.NamespacedName) {
   236  	pc.Lock()
   237  	// if the pod has been cached, return
   238  	if pc.podsByIP[ip].Contains(key) {
   239  		pc.Unlock()
   240  		return
   241  	}
   242  	if current, f := pc.IPByPods[key]; f {
   243  		// The pod already exists, but with another IP Address. We need to clean up that
   244  		sets.DeleteCleanupLast(pc.podsByIP, current, key)
   245  	}
   246  	sets.InsertOrNew(pc.podsByIP, ip, key)
   247  	pc.IPByPods[key] = ip
   248  
   249  	if endpointsToUpdate, f := pc.needResync[ip]; f {
   250  		delete(pc.needResync, ip)
   251  		for epKey := range endpointsToUpdate {
   252  			pc.queueEndpointEvent(epKey)
   253  		}
   254  		endpointsPendingPodUpdate.Record(float64(len(pc.needResync)))
   255  	}
   256  	pc.Unlock()
   257  
   258  	const isPodUpdate = false
   259  	pc.proxyUpdates(pod, isPodUpdate)
   260  }
   261  
   262  // queueEndpointEventOnPodArrival registers this endpoint and queues endpoint event
   263  // when the corresponding pod arrives.
   264  func (pc *PodCache) queueEndpointEventOnPodArrival(key types.NamespacedName, ip string) {
   265  	pc.Lock()
   266  	defer pc.Unlock()
   267  	sets.InsertOrNew(pc.needResync, ip, key)
   268  	endpointsPendingPodUpdate.Record(float64(len(pc.needResync)))
   269  }
   270  
   271  // endpointDeleted cleans up endpoint from resync endpoint list.
   272  func (pc *PodCache) endpointDeleted(key types.NamespacedName, ip string) {
   273  	pc.Lock()
   274  	defer pc.Unlock()
   275  	sets.DeleteCleanupLast(pc.needResync, ip, key)
   276  	endpointsPendingPodUpdate.Record(float64(len(pc.needResync)))
   277  }
   278  
   279  func (pc *PodCache) proxyUpdates(pod *v1.Pod, isPodUpdate bool) {
   280  	if pc.c != nil {
   281  		if pc.c.opts.XDSUpdater != nil {
   282  			ip := pod.Status.PodIP
   283  			pc.c.opts.XDSUpdater.ProxyUpdate(pc.c.Cluster(), ip)
   284  		}
   285  		if isPodUpdate {
   286  			// Recompute service(s) due to pod label change.
   287  			// If it is a new pod, no need to recompute, as it yet computed for the first time yet.
   288  			pc.c.recomputeServiceForPod(pod)
   289  		}
   290  	}
   291  }
   292  
   293  func (pc *PodCache) getPodKeys(addr string) []types.NamespacedName {
   294  	pc.RLock()
   295  	defer pc.RUnlock()
   296  	return pc.podsByIP[addr].UnsortedList()
   297  }
   298  
   299  // getPodByIp returns the pod or nil if pod not found or an error occurred
   300  func (pc *PodCache) getPodsByIP(addr string) []*v1.Pod {
   301  	keys := pc.getPodKeys(addr)
   302  	if keys == nil {
   303  		return nil
   304  	}
   305  	res := make([]*v1.Pod, 0, len(keys))
   306  	for _, key := range keys {
   307  		p := pc.getPodByKey(key)
   308  		// Subtle race condition. getPodKeys is our cache over pods, while getPodByKey hits the informer cache.
   309  		// if these are out of sync, p may be nil (pod was deleted).
   310  		if p != nil {
   311  			res = append(res, p)
   312  		}
   313  	}
   314  	return res
   315  }
   316  
   317  // getPodByKey returns the pod by key
   318  func (pc *PodCache) getPodByKey(key types.NamespacedName) *v1.Pod {
   319  	return pc.pods.Get(key.Name, key.Namespace)
   320  }
   321  
   322  // getPodByKey returns the pod of the proxy
   323  func (pc *PodCache) getPodByProxy(proxy *model.Proxy) *v1.Pod {
   324  	var pod *v1.Pod
   325  	key := podKeyByProxy(proxy)
   326  	if key.Name != "" {
   327  		pod = pc.getPodByKey(key)
   328  		if pod != nil {
   329  			return pod
   330  		}
   331  	}
   332  
   333  	// only need to fetch the corresponding pod through the first IP, although there are multiple IP scenarios,
   334  	// because multiple ips belong to the same pod
   335  	proxyIP := proxy.IPAddresses[0]
   336  	// just in case the proxy ID is bad formatted
   337  	pods := pc.getPodsByIP(proxyIP)
   338  	switch len(pods) {
   339  	case 0:
   340  		return nil
   341  	case 1:
   342  		return pods[0]
   343  	default:
   344  		// This should only happen with hostNetwork pods, which cannot be proxy clients...
   345  		log.Errorf("unexpected: found multiple pods for proxy %v (%v)", proxy.ID, proxyIP)
   346  		// Try to handle it gracefully
   347  		for _, p := range pods {
   348  			// At least filter out wrong namespaces...
   349  			if proxy.ConfigNamespace != p.Namespace {
   350  				continue
   351  			}
   352  			return p
   353  		}
   354  		return nil
   355  	}
   356  }