istio.io/istio@v0.0.0-20240520182934-d79c90f27776/cni/pkg/repair/netns.go (about)

     1  // Copyright Istio Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package repair
    16  
    17  import (
    18  	"fmt"
    19  	"math"
    20  	"net"
    21  	"strconv"
    22  
    23  	netns "github.com/containernetworking/plugins/pkg/ns"
    24  	"github.com/prometheus/procfs"
    25  	"github.com/vishvananda/netlink"
    26  	"golang.org/x/sys/unix"
    27  	corev1 "k8s.io/api/core/v1"
    28  
    29  	"istio.io/istio/pkg/log"
    30  )
    31  
    32  func getPidNamespace(pid int) string {
    33  	return "/host/proc/" + strconv.Itoa(pid) + "/ns/net"
    34  }
    35  
    36  func runInHost[T any](f func() (T, error)) (T, error) {
    37  	var res T
    38  	ns, err := netns.GetNS(getPidNamespace(1))
    39  	if err != nil {
    40  		return res, fmt.Errorf("failed to get host network: %v", err)
    41  	}
    42  	err = ns.Do(func(ns netns.NetNS) error {
    43  		var err error
    44  		res, err = f()
    45  		return err
    46  	})
    47  	if err != nil {
    48  		return res, fmt.Errorf("in host network: %v", err)
    49  	}
    50  	return res, nil
    51  }
    52  
    53  func findNetworkIDByIP(ip string) (int, error) {
    54  	link, err := getLinkWithDestinationOf(ip)
    55  	if err != nil {
    56  		return 0, fmt.Errorf("find link for %v: %v", ip, err)
    57  	}
    58  	return link.Attrs().NetNsID, nil
    59  }
    60  
    61  func getLinkWithDestinationOf(ip string) (netlink.Link, error) {
    62  	routes, err := netlink.RouteListFiltered(
    63  		netlink.FAMILY_V4,
    64  		&netlink.Route{Dst: &net.IPNet{IP: net.ParseIP(ip), Mask: net.CIDRMask(32, 32)}},
    65  		netlink.RT_FILTER_DST)
    66  	if err != nil {
    67  		return nil, err
    68  	}
    69  
    70  	if len(routes) == 0 {
    71  		return nil, fmt.Errorf("no routes found for %s", ip)
    72  	}
    73  
    74  	linkIndex := routes[0].LinkIndex
    75  	return netlink.LinkByIndex(linkIndex)
    76  }
    77  
    78  // getPodNetNs finds the network namespace for a given pod. There is not a great way to do this. Network namespaces live
    79  // under the procfs, /proc/<pid>/ns/net. In majority of cases, this is not used directly, but is rather bind mounted to
    80  // /var/run/netns/<name>. However, this pattern is not ubiquitous. Some platforms bind mount to other places. As we run
    81  // in a pod, we cannot just access any arbitrary file they happen to bind mount in, as we don't know ahead of time where
    82  // it might be.
    83  //
    84  // Instead, we rely directly on the procfs.
    85  // This rules out two possible methods:
    86  // * use crictl to inspect the pod; this returns the bind-mounted network namespace file.
    87  // * /var/lib/cni/results shows the outputs of CNI plugins; this containers the bind-mounted network namespace file.
    88  //
    89  // Instead, we traverse the procfs. Comments on this method are inline.
    90  func getPodNetNs(pod *corev1.Pod) (string, error) {
    91  	// First, find the network namespace id by looking the interface with the given Pod IP.
    92  	// This could break on some platforms if they do not have an interface-per-pod.
    93  	wantID, err := findNetworkIDByIP(pod.Status.PodIP)
    94  	if err != nil {
    95  		return "", fmt.Errorf("network id: %v", err)
    96  	}
    97  	fs, err := procfs.NewFS("/host/proc")
    98  	if err != nil {
    99  		return "", fmt.Errorf("read procfs: %v", err)
   100  	}
   101  	procs, err := fs.AllProcs()
   102  	if err != nil {
   103  		return "", fmt.Errorf("read procs: %v", err)
   104  	}
   105  	oldest := uint64(math.MaxUint64)
   106  	best := ""
   107  	// We will iterate over all processes. Our goal is to find a process with the same network ID as we found above.
   108  	// There should be 1 or 2 processes that match: the pause container should always be there, and the istio-validation *might*.
   109  	// We want the pause container, as the istio-validation one may exit before we are done.
   110  	// We do this by detecting the longest running process. We could look at `cmdline`, but is likely more reliable to weird platforms.
   111  	for _, p := range procs {
   112  		ns := getPidNamespace(p.PID)
   113  		fd, err := unix.Open(ns, unix.O_RDONLY, 0)
   114  		if err != nil {
   115  			// Not uncommon, many processes are transient and we have a TOCTOU here.
   116  			// No problem, must not be the one we are after.
   117  			log.Debugf("failed to open pid %v: %v", p.PID, err)
   118  			continue
   119  		}
   120  		id, err := netlink.GetNetNsIdByFd(fd)
   121  		_ = unix.Close(fd)
   122  		if err != nil {
   123  			log.Debugf("failed to get netns for pid %v: %v", p.PID, err)
   124  			continue
   125  		}
   126  
   127  		if id != wantID {
   128  			// Not the network we want, skip
   129  			continue
   130  		}
   131  		s, err := p.Stat()
   132  		if err != nil {
   133  			// Unexpected... we will use it, but only if we find nothing without errors
   134  			log.Warnf("failed to read proc %v stats: %v", p.PID, err)
   135  			if best == "" {
   136  				best = ns
   137  			}
   138  			continue
   139  		}
   140  		// Get the oldest one.
   141  		if s.Starttime < oldest {
   142  			oldest = s.Starttime
   143  			best = ns
   144  		}
   145  	}
   146  	if best == "" {
   147  		return "", fmt.Errorf("failed to find network namespace")
   148  	}
   149  	return best, nil
   150  }