istio.io/istio@v0.0.0-20240520182934-d79c90f27776/cni/pkg/nodeagent/net.go (about) 1 // Copyright Istio Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package nodeagent 16 17 import ( 18 "context" 19 "errors" 20 "fmt" 21 "net/netip" 22 23 "golang.org/x/sys/unix" 24 corev1 "k8s.io/api/core/v1" 25 "k8s.io/apimachinery/pkg/types" 26 27 "istio.io/istio/cni/pkg/ipset" 28 "istio.io/istio/cni/pkg/iptables" 29 "istio.io/istio/cni/pkg/util" 30 istiolog "istio.io/istio/pkg/log" 31 "istio.io/istio/pkg/slices" 32 "istio.io/istio/pkg/util/sets" 33 dep "istio.io/istio/tools/istio-iptables/pkg/dependencies" 34 ) 35 36 var log = istiolog.RegisterScope("ambient", "ambient controller") 37 38 // Adapts CNI to ztunnel server. decoupled from k8s for easier integration testing. 39 type NetServer struct { 40 ztunnelServer ZtunnelServer 41 currentPodSnapshot *podNetnsCache 42 iptablesConfigurator *iptables.IptablesConfigurator 43 podNs PodNetnsFinder 44 // allow overriding for tests 45 netnsRunner func(fdable NetnsFd, toRun func() error) error 46 hostsideProbeIPSet ipset.IPSet 47 } 48 49 var _ MeshDataplane = &NetServer{} 50 51 func newNetServer(ztunnelServer ZtunnelServer, podNsMap *podNetnsCache, 52 iptablesConfigurator *iptables.IptablesConfigurator, podNs PodNetnsFinder, 53 probeSet ipset.IPSet, 54 ) *NetServer { 55 return &NetServer{ 56 ztunnelServer: ztunnelServer, 57 currentPodSnapshot: podNsMap, 58 podNs: podNs, 59 iptablesConfigurator: iptablesConfigurator, 60 netnsRunner: NetnsDo, 61 hostsideProbeIPSet: probeSet, 62 } 63 } 64 65 func (s *NetServer) Start(ctx context.Context) { 66 log.Debug("starting ztunnel server") 67 go s.ztunnelServer.Run(ctx) 68 } 69 70 func (s *NetServer) Stop() { 71 log.Debug("removing host iptables rules") 72 s.iptablesConfigurator.DeleteHostRules() 73 74 log.Debug("destroying host ipset") 75 s.hostsideProbeIPSet.Flush() 76 if err := s.hostsideProbeIPSet.DestroySet(); err != nil { 77 log.Warnf("could not destroy host ipset on shutdown") 78 } 79 log.Debug("stopping ztunnel server") 80 s.ztunnelServer.Close() 81 } 82 83 func (s *NetServer) rescanPod(pod *corev1.Pod) error { 84 // this can happen if the pod was dynamically added to the mesh after it was created. 85 // in that case, try finding the netns using procfs. 86 filter := map[types.UID]*corev1.Pod{ 87 pod.UID: pod, 88 } 89 return s.scanProcForPodsAndCache(filter) 90 } 91 92 func (s *NetServer) getOrOpenNetns(pod *corev1.Pod, netNs string) (Netns, error) { 93 if netNs == "" { 94 return s.getNetns(pod) 95 } 96 return s.openNetns(pod, netNs) 97 } 98 99 func (s *NetServer) openNetns(pod *corev1.Pod, netNs string) (Netns, error) { 100 return s.currentPodSnapshot.UpsertPodCache(pod, netNs) 101 } 102 103 func (s *NetServer) getNetns(pod *corev1.Pod) (Netns, error) { 104 openNetns := s.currentPodSnapshot.Get(string(pod.UID)) 105 if openNetns != nil { 106 return openNetns, nil 107 } 108 log.Debug("pod netns was not found, trying to find it using procfs") 109 // this can happen if the pod was dynamically added to the mesh after it was created. 110 // in that case, try finding the netns using procfs. 111 if err := s.rescanPod(pod); err != nil { 112 log.Errorf("error scanning proc: error was %s", err) 113 return nil, err 114 } 115 // try again. we can still get here if the pod is in the process of being created. 116 // in this case the CNI will be invoked soon and provide us with the netns. 117 openNetns = s.currentPodSnapshot.Get(string(pod.UID)) 118 if openNetns == nil { 119 return nil, fmt.Errorf("can't find netns for pod, this is ok if this is a newly created pod (%w)", ErrPodNotFound) 120 } 121 122 return openNetns, nil 123 } 124 125 // AddPodToMesh adds a pod to mesh by 126 // 1. Getting the netns 127 // 2. Adding the pod's IPs to the hostnetns ipsets for node probe checks 128 // 3. Creating iptables rules inside the pod's netns 129 // 4. Notifying ztunnel via GRPC to create a proxy for the pod 130 // 131 // You may ask why we pass the pod IPs separately from the pod manifest itself (which contains the pod IPs as a field) 132 // - this is because during add specifically, if CNI plugins have not finished executing, 133 // K8S may get a pod Add event without any IPs in the object, and the pod will later be updated with IPs. 134 // 135 // We always need the IPs, but this is fine because this AddPodToMesh can be called from the CNI plugin as well, 136 // which always has the firsthand info of the IPs, even before K8S does - so we pass them separately here because 137 // we actually may have them before K8S in the Pod object. 138 func (s *NetServer) AddPodToMesh(ctx context.Context, pod *corev1.Pod, podIPs []netip.Addr, netNs string) error { 139 log.Infof("in pod mode - adding pod %s/%s to ztunnel ", pod.Namespace, pod.Name) 140 // make sure the cache is aware of the pod, even if we don't have the netns yet. 141 s.currentPodSnapshot.Ensure(string(pod.UID)) 142 openNetns, err := s.getOrOpenNetns(pod, netNs) 143 if err != nil { 144 return err 145 } 146 147 // Handle node healthcheck probe rewrites 148 err = addPodToHostNSIpset(pod, podIPs, &s.hostsideProbeIPSet) 149 if err != nil { 150 log.Errorf("failed to add pod to ipset: %s/%s %v", pod.Namespace, pod.Name, err) 151 return err 152 } 153 154 log.Debug("calling CreateInpodRules") 155 if err := s.netnsRunner(openNetns, func() error { 156 return s.iptablesConfigurator.CreateInpodRules(&HostProbeSNATIP) 157 }); err != nil { 158 log.Errorf("failed to update POD inpod: %s/%s %v", pod.Namespace, pod.Name, err) 159 return err 160 } 161 162 log.Debug("notifying subscribed node proxies") 163 if err := s.sendPodToZtunnelAndWaitForAck(ctx, pod, openNetns); err != nil { 164 // we must return PartialAdd error here. the pod was injected with iptables rules, 165 // so it should be annotated, so if it is removed from the mesh, the rules will be removed. 166 // alternatively, we may not return an error at all, but we want this to fail on tests. 167 return NewErrPartialAdd(err) 168 } 169 return nil 170 } 171 172 func (s *NetServer) sendPodToZtunnelAndWaitForAck(ctx context.Context, pod *corev1.Pod, netns Netns) error { 173 return s.ztunnelServer.PodAdded(ctx, pod, netns) 174 } 175 176 // ConstructInitialSnapshot takes a "snapshot" of current ambient pods and 177 // 178 // 1. Constructs a ztunnel state message to initialize ztunnel 179 // 2. Syncs the host ipset 180 func (s *NetServer) ConstructInitialSnapshot(ambientPods []*corev1.Pod) error { 181 var consErr []error 182 183 if err := s.syncHostIPSets(ambientPods); err != nil { 184 log.Warnf("failed to sync host IPset: %v", err) 185 consErr = append(consErr, err) 186 } 187 188 podsByUID := slices.GroupUnique(ambientPods, (*corev1.Pod).GetUID) 189 if err := s.buildZtunnelSnapshot(podsByUID); err != nil { 190 log.Warnf("failed to construct initial ztunnel snapshot: %v", err) 191 consErr = append(consErr, err) 192 } 193 194 return errors.Join(consErr...) 195 } 196 197 func (s *NetServer) buildZtunnelSnapshot(ambientPodUIDs map[types.UID]*corev1.Pod) error { 198 // first add all the pods as empty: 199 for uid := range ambientPodUIDs { 200 s.currentPodSnapshot.Ensure(string(uid)) 201 } 202 203 // populate full pod snapshot from cgroups 204 return s.scanProcForPodsAndCache(ambientPodUIDs) 205 } 206 207 func (s *NetServer) scanProcForPodsAndCache(pods map[types.UID]*corev1.Pod) error { 208 // TODO: maybe remove existing uids in s.currentPodSnapshot from the filter set. 209 res, err := s.podNs.FindNetnsForPods(pods) 210 if err != nil { 211 return err 212 } 213 214 for uid, wl := range res { 215 s.currentPodSnapshot.UpsertPodCacheWithNetns(uid, wl) 216 } 217 return nil 218 } 219 220 func realDependencies() *dep.RealDependencies { 221 return &dep.RealDependencies{ 222 CNIMode: false, // we are in cni, but as we do the netns ourselves, we should keep this as false. 223 NetworkNamespace: "", 224 } 225 } 226 227 // Remove pod from mesh: pod is not deleted, we just want to remove it from the mesh. 228 func (s *NetServer) RemovePodFromMesh(ctx context.Context, pod *corev1.Pod) error { 229 log := log.WithLabels("ns", pod.Namespace, "name", pod.Name) 230 log.Debugf("Pod is now opt out... cleaning up.") 231 232 openNetns := s.currentPodSnapshot.Take(string(pod.UID)) 233 if openNetns == nil { 234 log.Warn("failed to find pod netns during removal") 235 return fmt.Errorf("failed to find pod netns during removal") 236 } 237 // pod is removed from the mesh, but is still running. remove iptables rules 238 log.Debugf("calling DeleteInpodRules.") 239 if err := s.netnsRunner(openNetns, func() error { return s.iptablesConfigurator.DeleteInpodRules() }); err != nil { 240 log.Errorf("failed to delete inpod rules %v", err) 241 return fmt.Errorf("failed to delete inpod rules %w", err) 242 } 243 244 if err := removePodFromHostNSIpset(pod, &s.hostsideProbeIPSet); err != nil { 245 log.Errorf("failed to remove pod %s from host ipset, error was: %v", pod.Name, err) 246 return err 247 } 248 249 log.Debug("in pod mode - removing pod from ztunnel") 250 if err := s.ztunnelServer.PodDeleted(ctx, string(pod.UID)); err != nil { 251 log.Errorf("failed to delete pod from ztunnel: %v", err) 252 } 253 return nil 254 } 255 256 // Delete pod from mesh: pod is deleted. iptables rules will die with it, we just need to update ztunnel 257 func (s *NetServer) DelPodFromMesh(ctx context.Context, pod *corev1.Pod) error { 258 log := log.WithLabels("ns", pod.Namespace, "name", pod.Name) 259 log.Debug("Pod is now stopped... cleaning up.") 260 261 if err := removePodFromHostNSIpset(pod, &s.hostsideProbeIPSet); err != nil { 262 log.Errorf("failed to remove pod %s from host ipset, error was: %v", pod.Name, err) 263 return err 264 } 265 266 log.Info("in pod mode - deleting pod from ztunnel") 267 268 // pod is deleted, clean-up its open netns 269 openNetns := s.currentPodSnapshot.Take(string(pod.UID)) 270 if openNetns == nil { 271 log.Warn("failed to find pod netns") 272 } 273 274 if err := s.ztunnelServer.PodDeleted(ctx, string(pod.UID)); err != nil { 275 return err 276 } 277 return nil 278 } 279 280 func (s *NetServer) syncHostIPSets(ambientPods []*corev1.Pod) error { 281 var addedIPSnapshot []netip.Addr 282 283 for _, pod := range ambientPods { 284 podIPs := util.GetPodIPsIfPresent(pod) 285 if len(podIPs) == 0 { 286 log.Warnf("pod %s does not appear to have any assigned IPs, not syncing with ipset", pod.Name) 287 } else { 288 err := addPodToHostNSIpset(pod, podIPs, &s.hostsideProbeIPSet) 289 if err != nil { 290 return err 291 } 292 addedIPSnapshot = append(addedIPSnapshot, podIPs...) 293 } 294 295 } 296 return pruneHostIPset(sets.New(addedIPSnapshot...), &s.hostsideProbeIPSet) 297 } 298 299 // addPodToHostNSIpset: 300 // 1. get pod manifest 301 // 2. Get all pod ips (might be several, v6/v4) 302 // 3. update ipsets accordingly 303 func addPodToHostNSIpset(pod *corev1.Pod, podIPs []netip.Addr, hostsideProbeSet *ipset.IPSet) error { 304 // Add the pod UID as an ipset entry comment, so we can (more) easily find and delete 305 // all relevant entries for a pod later. 306 podUID := string(pod.ObjectMeta.UID) 307 ipProto := uint8(unix.IPPROTO_TCP) 308 309 var ipsetAddrErrs []error 310 311 // For each pod IP 312 for _, pip := range podIPs { 313 // Add to host ipset 314 log.Debugf("adding pod %s probe to ipset %s with ip %s", pod.Name, hostsideProbeSet.Prefix, pip) 315 // Add IP/port combo to set. Note that we set Replace to false here - we _did_ previously 316 // set it to true, but in theory that could mask weird scenarios where K8S triggers events out of order -> 317 // an add(sameIPreused) then delete(originalIP). 318 // Which will result in the new pod starting to fail healthchecks. 319 // 320 // Since we purge on restart of CNI, and remove pod IPs from the set on every pod removal/deletion, 321 // we _shouldn't_ get any overwrite/overlap, unless something is wrong and we are asked to add 322 // a pod by an IP we already have in the set (which will give an error, which we want). 323 if err := hostsideProbeSet.AddIP(pip, ipProto, podUID, false); err != nil { 324 ipsetAddrErrs = append(ipsetAddrErrs, err) 325 log.Errorf("failed adding pod %s to ipset %s with ip %s, error was %s", 326 pod.Name, hostsideProbeSet.Prefix, pip, err) 327 } 328 } 329 330 return errors.Join(ipsetAddrErrs...) 331 } 332 333 func removePodFromHostNSIpset(pod *corev1.Pod, hostsideProbeSet *ipset.IPSet) error { 334 podIPs := util.GetPodIPsIfPresent(pod) 335 for _, pip := range podIPs { 336 if err := hostsideProbeSet.ClearEntriesWithIP(pip); err != nil { 337 return err 338 } 339 log.Debugf("removed pod name %s with UID %s from host ipset %s by ip %s", pod.Name, pod.UID, hostsideProbeSet.Prefix, pip) 340 } 341 342 return nil 343 } 344 345 func pruneHostIPset(expected sets.Set[netip.Addr], hostsideProbeSet *ipset.IPSet) error { 346 actualIPSetContents, err := hostsideProbeSet.ListEntriesByIP() 347 if err != nil { 348 log.Warnf("unable to list IPSet: %v", err) 349 return err 350 } 351 actual := sets.New[netip.Addr](actualIPSetContents...) 352 stales := actual.DifferenceInPlace(expected) 353 354 for staleIP := range stales { 355 if err := hostsideProbeSet.ClearEntriesWithIP(staleIP); err != nil { 356 return err 357 } 358 log.Debugf("removed stale ip %s from host ipset %s", staleIP, hostsideProbeSet.Prefix) 359 } 360 return nil 361 }