istio.io/istio@v0.0.0-20240520182934-d79c90f27776/cni/pkg/nodeagent/net.go (about)

     1  // Copyright Istio Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package nodeagent
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"fmt"
    21  	"net/netip"
    22  
    23  	"golang.org/x/sys/unix"
    24  	corev1 "k8s.io/api/core/v1"
    25  	"k8s.io/apimachinery/pkg/types"
    26  
    27  	"istio.io/istio/cni/pkg/ipset"
    28  	"istio.io/istio/cni/pkg/iptables"
    29  	"istio.io/istio/cni/pkg/util"
    30  	istiolog "istio.io/istio/pkg/log"
    31  	"istio.io/istio/pkg/slices"
    32  	"istio.io/istio/pkg/util/sets"
    33  	dep "istio.io/istio/tools/istio-iptables/pkg/dependencies"
    34  )
    35  
    36  var log = istiolog.RegisterScope("ambient", "ambient controller")
    37  
    38  // Adapts CNI to ztunnel server. decoupled from k8s for easier integration testing.
    39  type NetServer struct {
    40  	ztunnelServer        ZtunnelServer
    41  	currentPodSnapshot   *podNetnsCache
    42  	iptablesConfigurator *iptables.IptablesConfigurator
    43  	podNs                PodNetnsFinder
    44  	// allow overriding for tests
    45  	netnsRunner        func(fdable NetnsFd, toRun func() error) error
    46  	hostsideProbeIPSet ipset.IPSet
    47  }
    48  
    49  var _ MeshDataplane = &NetServer{}
    50  
    51  func newNetServer(ztunnelServer ZtunnelServer, podNsMap *podNetnsCache,
    52  	iptablesConfigurator *iptables.IptablesConfigurator, podNs PodNetnsFinder,
    53  	probeSet ipset.IPSet,
    54  ) *NetServer {
    55  	return &NetServer{
    56  		ztunnelServer:        ztunnelServer,
    57  		currentPodSnapshot:   podNsMap,
    58  		podNs:                podNs,
    59  		iptablesConfigurator: iptablesConfigurator,
    60  		netnsRunner:          NetnsDo,
    61  		hostsideProbeIPSet:   probeSet,
    62  	}
    63  }
    64  
    65  func (s *NetServer) Start(ctx context.Context) {
    66  	log.Debug("starting ztunnel server")
    67  	go s.ztunnelServer.Run(ctx)
    68  }
    69  
    70  func (s *NetServer) Stop() {
    71  	log.Debug("removing host iptables rules")
    72  	s.iptablesConfigurator.DeleteHostRules()
    73  
    74  	log.Debug("destroying host ipset")
    75  	s.hostsideProbeIPSet.Flush()
    76  	if err := s.hostsideProbeIPSet.DestroySet(); err != nil {
    77  		log.Warnf("could not destroy host ipset on shutdown")
    78  	}
    79  	log.Debug("stopping ztunnel server")
    80  	s.ztunnelServer.Close()
    81  }
    82  
    83  func (s *NetServer) rescanPod(pod *corev1.Pod) error {
    84  	// this can happen if the pod was dynamically added to the mesh after it was created.
    85  	// in that case, try finding the netns using procfs.
    86  	filter := map[types.UID]*corev1.Pod{
    87  		pod.UID: pod,
    88  	}
    89  	return s.scanProcForPodsAndCache(filter)
    90  }
    91  
    92  func (s *NetServer) getOrOpenNetns(pod *corev1.Pod, netNs string) (Netns, error) {
    93  	if netNs == "" {
    94  		return s.getNetns(pod)
    95  	}
    96  	return s.openNetns(pod, netNs)
    97  }
    98  
    99  func (s *NetServer) openNetns(pod *corev1.Pod, netNs string) (Netns, error) {
   100  	return s.currentPodSnapshot.UpsertPodCache(pod, netNs)
   101  }
   102  
   103  func (s *NetServer) getNetns(pod *corev1.Pod) (Netns, error) {
   104  	openNetns := s.currentPodSnapshot.Get(string(pod.UID))
   105  	if openNetns != nil {
   106  		return openNetns, nil
   107  	}
   108  	log.Debug("pod netns was not found, trying to find it using procfs")
   109  	// this can happen if the pod was dynamically added to the mesh after it was created.
   110  	// in that case, try finding the netns using procfs.
   111  	if err := s.rescanPod(pod); err != nil {
   112  		log.Errorf("error scanning proc: error was %s", err)
   113  		return nil, err
   114  	}
   115  	// try again. we can still get here if the pod is in the process of being created.
   116  	// in this case the CNI will be invoked soon and provide us with the netns.
   117  	openNetns = s.currentPodSnapshot.Get(string(pod.UID))
   118  	if openNetns == nil {
   119  		return nil, fmt.Errorf("can't find netns for pod, this is ok if this is a newly created pod (%w)", ErrPodNotFound)
   120  	}
   121  
   122  	return openNetns, nil
   123  }
   124  
   125  // AddPodToMesh adds a pod to mesh by
   126  // 1. Getting the netns
   127  // 2. Adding the pod's IPs to the hostnetns ipsets for node probe checks
   128  // 3. Creating iptables rules inside the pod's netns
   129  // 4. Notifying ztunnel via GRPC to create a proxy for the pod
   130  //
   131  // You may ask why we pass the pod IPs separately from the pod manifest itself (which contains the pod IPs as a field)
   132  // - this is because during add specifically, if CNI plugins have not finished executing,
   133  // K8S may get a pod Add event without any IPs in the object, and the pod will later be updated with IPs.
   134  //
   135  // We always need the IPs, but this is fine because this AddPodToMesh can be called from the CNI plugin as well,
   136  // which always has the firsthand info of the IPs, even before K8S does - so we pass them separately here because
   137  // we actually may have them before K8S in the Pod object.
   138  func (s *NetServer) AddPodToMesh(ctx context.Context, pod *corev1.Pod, podIPs []netip.Addr, netNs string) error {
   139  	log.Infof("in pod mode - adding pod %s/%s to ztunnel ", pod.Namespace, pod.Name)
   140  	// make sure the cache is aware of the pod, even if we don't have the netns yet.
   141  	s.currentPodSnapshot.Ensure(string(pod.UID))
   142  	openNetns, err := s.getOrOpenNetns(pod, netNs)
   143  	if err != nil {
   144  		return err
   145  	}
   146  
   147  	// Handle node healthcheck probe rewrites
   148  	err = addPodToHostNSIpset(pod, podIPs, &s.hostsideProbeIPSet)
   149  	if err != nil {
   150  		log.Errorf("failed to add pod to ipset: %s/%s %v", pod.Namespace, pod.Name, err)
   151  		return err
   152  	}
   153  
   154  	log.Debug("calling CreateInpodRules")
   155  	if err := s.netnsRunner(openNetns, func() error {
   156  		return s.iptablesConfigurator.CreateInpodRules(&HostProbeSNATIP)
   157  	}); err != nil {
   158  		log.Errorf("failed to update POD inpod: %s/%s %v", pod.Namespace, pod.Name, err)
   159  		return err
   160  	}
   161  
   162  	log.Debug("notifying subscribed node proxies")
   163  	if err := s.sendPodToZtunnelAndWaitForAck(ctx, pod, openNetns); err != nil {
   164  		// we must return PartialAdd error here. the pod was injected with iptables rules,
   165  		// so it should be annotated, so if it is removed from the mesh, the rules will be removed.
   166  		// alternatively, we may not return an error at all, but we want this to fail on tests.
   167  		return NewErrPartialAdd(err)
   168  	}
   169  	return nil
   170  }
   171  
   172  func (s *NetServer) sendPodToZtunnelAndWaitForAck(ctx context.Context, pod *corev1.Pod, netns Netns) error {
   173  	return s.ztunnelServer.PodAdded(ctx, pod, netns)
   174  }
   175  
   176  // ConstructInitialSnapshot takes a "snapshot" of current ambient pods and
   177  //
   178  // 1. Constructs a ztunnel state message to initialize ztunnel
   179  // 2. Syncs the host ipset
   180  func (s *NetServer) ConstructInitialSnapshot(ambientPods []*corev1.Pod) error {
   181  	var consErr []error
   182  
   183  	if err := s.syncHostIPSets(ambientPods); err != nil {
   184  		log.Warnf("failed to sync host IPset: %v", err)
   185  		consErr = append(consErr, err)
   186  	}
   187  
   188  	podsByUID := slices.GroupUnique(ambientPods, (*corev1.Pod).GetUID)
   189  	if err := s.buildZtunnelSnapshot(podsByUID); err != nil {
   190  		log.Warnf("failed to construct initial ztunnel snapshot: %v", err)
   191  		consErr = append(consErr, err)
   192  	}
   193  
   194  	return errors.Join(consErr...)
   195  }
   196  
   197  func (s *NetServer) buildZtunnelSnapshot(ambientPodUIDs map[types.UID]*corev1.Pod) error {
   198  	// first add all the pods as empty:
   199  	for uid := range ambientPodUIDs {
   200  		s.currentPodSnapshot.Ensure(string(uid))
   201  	}
   202  
   203  	// populate full pod snapshot from cgroups
   204  	return s.scanProcForPodsAndCache(ambientPodUIDs)
   205  }
   206  
   207  func (s *NetServer) scanProcForPodsAndCache(pods map[types.UID]*corev1.Pod) error {
   208  	// TODO: maybe remove existing uids in s.currentPodSnapshot from the filter set.
   209  	res, err := s.podNs.FindNetnsForPods(pods)
   210  	if err != nil {
   211  		return err
   212  	}
   213  
   214  	for uid, wl := range res {
   215  		s.currentPodSnapshot.UpsertPodCacheWithNetns(uid, wl)
   216  	}
   217  	return nil
   218  }
   219  
   220  func realDependencies() *dep.RealDependencies {
   221  	return &dep.RealDependencies{
   222  		CNIMode:          false, // we are in cni, but as we do the netns ourselves, we should keep this as false.
   223  		NetworkNamespace: "",
   224  	}
   225  }
   226  
   227  // Remove pod from mesh: pod is not deleted, we just want to remove it from the mesh.
   228  func (s *NetServer) RemovePodFromMesh(ctx context.Context, pod *corev1.Pod) error {
   229  	log := log.WithLabels("ns", pod.Namespace, "name", pod.Name)
   230  	log.Debugf("Pod is now opt out... cleaning up.")
   231  
   232  	openNetns := s.currentPodSnapshot.Take(string(pod.UID))
   233  	if openNetns == nil {
   234  		log.Warn("failed to find pod netns during removal")
   235  		return fmt.Errorf("failed to find pod netns during removal")
   236  	}
   237  	// pod is removed from the mesh, but is still running. remove iptables rules
   238  	log.Debugf("calling DeleteInpodRules.")
   239  	if err := s.netnsRunner(openNetns, func() error { return s.iptablesConfigurator.DeleteInpodRules() }); err != nil {
   240  		log.Errorf("failed to delete inpod rules %v", err)
   241  		return fmt.Errorf("failed to delete inpod rules %w", err)
   242  	}
   243  
   244  	if err := removePodFromHostNSIpset(pod, &s.hostsideProbeIPSet); err != nil {
   245  		log.Errorf("failed to remove pod %s from host ipset, error was: %v", pod.Name, err)
   246  		return err
   247  	}
   248  
   249  	log.Debug("in pod mode - removing pod from ztunnel")
   250  	if err := s.ztunnelServer.PodDeleted(ctx, string(pod.UID)); err != nil {
   251  		log.Errorf("failed to delete pod from ztunnel: %v", err)
   252  	}
   253  	return nil
   254  }
   255  
   256  // Delete pod from mesh: pod is deleted. iptables rules will die with it, we just need to update ztunnel
   257  func (s *NetServer) DelPodFromMesh(ctx context.Context, pod *corev1.Pod) error {
   258  	log := log.WithLabels("ns", pod.Namespace, "name", pod.Name)
   259  	log.Debug("Pod is now stopped... cleaning up.")
   260  
   261  	if err := removePodFromHostNSIpset(pod, &s.hostsideProbeIPSet); err != nil {
   262  		log.Errorf("failed to remove pod %s from host ipset, error was: %v", pod.Name, err)
   263  		return err
   264  	}
   265  
   266  	log.Info("in pod mode - deleting pod from ztunnel")
   267  
   268  	// pod is deleted, clean-up its open netns
   269  	openNetns := s.currentPodSnapshot.Take(string(pod.UID))
   270  	if openNetns == nil {
   271  		log.Warn("failed to find pod netns")
   272  	}
   273  
   274  	if err := s.ztunnelServer.PodDeleted(ctx, string(pod.UID)); err != nil {
   275  		return err
   276  	}
   277  	return nil
   278  }
   279  
   280  func (s *NetServer) syncHostIPSets(ambientPods []*corev1.Pod) error {
   281  	var addedIPSnapshot []netip.Addr
   282  
   283  	for _, pod := range ambientPods {
   284  		podIPs := util.GetPodIPsIfPresent(pod)
   285  		if len(podIPs) == 0 {
   286  			log.Warnf("pod %s does not appear to have any assigned IPs, not syncing with ipset", pod.Name)
   287  		} else {
   288  			err := addPodToHostNSIpset(pod, podIPs, &s.hostsideProbeIPSet)
   289  			if err != nil {
   290  				return err
   291  			}
   292  			addedIPSnapshot = append(addedIPSnapshot, podIPs...)
   293  		}
   294  
   295  	}
   296  	return pruneHostIPset(sets.New(addedIPSnapshot...), &s.hostsideProbeIPSet)
   297  }
   298  
   299  // addPodToHostNSIpset:
   300  // 1. get pod manifest
   301  // 2. Get all pod ips (might be several, v6/v4)
   302  // 3. update ipsets accordingly
   303  func addPodToHostNSIpset(pod *corev1.Pod, podIPs []netip.Addr, hostsideProbeSet *ipset.IPSet) error {
   304  	// Add the pod UID as an ipset entry comment, so we can (more) easily find and delete
   305  	// all relevant entries for a pod later.
   306  	podUID := string(pod.ObjectMeta.UID)
   307  	ipProto := uint8(unix.IPPROTO_TCP)
   308  
   309  	var ipsetAddrErrs []error
   310  
   311  	// For each pod IP
   312  	for _, pip := range podIPs {
   313  		// Add to host ipset
   314  		log.Debugf("adding pod %s probe to ipset %s with ip %s", pod.Name, hostsideProbeSet.Prefix, pip)
   315  		// Add IP/port combo to set. Note that we set Replace to false here - we _did_ previously
   316  		// set it to true, but in theory that could mask weird scenarios where K8S triggers events out of order ->
   317  		// an add(sameIPreused) then delete(originalIP).
   318  		// Which will result in the new pod starting to fail healthchecks.
   319  		//
   320  		// Since we purge on restart of CNI, and remove pod IPs from the set on every pod removal/deletion,
   321  		// we _shouldn't_ get any overwrite/overlap, unless something is wrong and we are asked to add
   322  		// a pod by an IP we already have in the set (which will give an error, which we want).
   323  		if err := hostsideProbeSet.AddIP(pip, ipProto, podUID, false); err != nil {
   324  			ipsetAddrErrs = append(ipsetAddrErrs, err)
   325  			log.Errorf("failed adding pod %s to ipset %s with ip %s, error was %s",
   326  				pod.Name, hostsideProbeSet.Prefix, pip, err)
   327  		}
   328  	}
   329  
   330  	return errors.Join(ipsetAddrErrs...)
   331  }
   332  
   333  func removePodFromHostNSIpset(pod *corev1.Pod, hostsideProbeSet *ipset.IPSet) error {
   334  	podIPs := util.GetPodIPsIfPresent(pod)
   335  	for _, pip := range podIPs {
   336  		if err := hostsideProbeSet.ClearEntriesWithIP(pip); err != nil {
   337  			return err
   338  		}
   339  		log.Debugf("removed pod name %s with UID %s from host ipset %s by ip %s", pod.Name, pod.UID, hostsideProbeSet.Prefix, pip)
   340  	}
   341  
   342  	return nil
   343  }
   344  
   345  func pruneHostIPset(expected sets.Set[netip.Addr], hostsideProbeSet *ipset.IPSet) error {
   346  	actualIPSetContents, err := hostsideProbeSet.ListEntriesByIP()
   347  	if err != nil {
   348  		log.Warnf("unable to list IPSet: %v", err)
   349  		return err
   350  	}
   351  	actual := sets.New[netip.Addr](actualIPSetContents...)
   352  	stales := actual.DifferenceInPlace(expected)
   353  
   354  	for staleIP := range stales {
   355  		if err := hostsideProbeSet.ClearEntriesWithIP(staleIP); err != nil {
   356  			return err
   357  		}
   358  		log.Debugf("removed stale ip %s from host ipset %s", staleIP, hostsideProbeSet.Prefix)
   359  	}
   360  	return nil
   361  }