k8s.io/kubernetes@v1.29.3/test/e2e/network/netpol/kubemanager.go (about) 1 /* 2 Copyright 2020 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package netpol 18 19 import ( 20 "context" 21 "fmt" 22 v1 "k8s.io/api/core/v1" 23 networkingv1 "k8s.io/api/networking/v1" 24 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 25 "k8s.io/apimachinery/pkg/util/wait" 26 clientset "k8s.io/client-go/kubernetes" 27 "k8s.io/kubernetes/test/e2e/framework" 28 e2epod "k8s.io/kubernetes/test/e2e/framework/pod" 29 netutils "k8s.io/utils/net" 30 "net" 31 "strconv" 32 "strings" 33 "time" 34 ) 35 36 // defaultPollIntervalSeconds [seconds] is the default value for which the Prober will wait before attempting next attempt. 37 const defaultPollIntervalSeconds = 1 38 39 // defaultPollTimeoutSeconds [seconds] is the default timeout when polling on probes. 40 const defaultPollTimeoutSeconds = 10 41 42 // TestPod represents an actual running pod. For each Pod defined by the model, 43 // there will be a corresponding TestPod. TestPod includes some runtime info 44 // (namespace name, service IP) which is not available in the model. 45 type TestPod struct { 46 Namespace string 47 Name string 48 ContainerName string 49 ServiceIP string 50 } 51 52 func (pod TestPod) PodString() PodString { 53 return NewPodString(pod.Namespace, pod.Name) 54 } 55 56 // kubeManager provides a convenience interface to kube functionality that we leverage for polling NetworkPolicy connections. 57 // Its responsibilities are: 58 // - creating resources (pods, deployments, namespaces, services, network policies) 59 // - modifying and cleaning up resources 60 type kubeManager struct { 61 framework *framework.Framework 62 clientSet clientset.Interface 63 namespaceNames []string 64 allPods []TestPod 65 allPodStrings []PodString 66 dnsDomain string 67 } 68 69 // newKubeManager is a utility function that wraps creation of the kubeManager instance. 70 func newKubeManager(framework *framework.Framework, dnsDomain string) *kubeManager { 71 return &kubeManager{ 72 framework: framework, 73 clientSet: framework.ClientSet, 74 dnsDomain: dnsDomain, 75 } 76 } 77 78 // initializeCluster initialized the cluster, creating namespaces pods and services as needed. 79 func (k *kubeManager) initializeClusterFromModel(ctx context.Context, model *Model) error { 80 var createdPods []*v1.Pod 81 for _, ns := range model.Namespaces { 82 // no labels needed, we just need the default kubernetes.io/metadata.name label 83 namespace, err := k.framework.CreateNamespace(ctx, ns.BaseName, nil) 84 if err != nil { 85 return err 86 } 87 namespaceName := namespace.Name 88 k.namespaceNames = append(k.namespaceNames, namespaceName) 89 90 for _, pod := range ns.Pods { 91 framework.Logf("creating pod %s/%s with matching service", namespaceName, pod.Name) 92 93 // note that we defer the logic of pod (i.e. node selector) specifics to the model 94 // which is aware of linux vs windows pods 95 kubePod, err := k.createPod(ctx, pod.KubePod(namespaceName)) 96 if err != nil { 97 return err 98 } 99 100 createdPods = append(createdPods, kubePod) 101 svc, err := k.createService(ctx, pod.Service(namespaceName)) 102 if err != nil { 103 return err 104 } 105 if netutils.ParseIPSloppy(svc.Spec.ClusterIP) == nil { 106 return fmt.Errorf("empty IP address found for service %s/%s", svc.Namespace, svc.Name) 107 } 108 109 k.allPods = append(k.allPods, TestPod{ 110 Namespace: kubePod.Namespace, 111 Name: kubePod.Name, 112 ContainerName: pod.Containers[0].Name(), 113 ServiceIP: svc.Spec.ClusterIP, 114 }) 115 k.allPodStrings = append(k.allPodStrings, NewPodString(kubePod.Namespace, kubePod.Name)) 116 } 117 } 118 119 for _, createdPod := range createdPods { 120 err := e2epod.WaitForPodRunningInNamespace(ctx, k.clientSet, createdPod) 121 if err != nil { 122 return fmt.Errorf("unable to wait for pod %s/%s: %w", createdPod.Namespace, createdPod.Name, err) 123 } 124 } 125 126 return nil 127 } 128 129 func (k *kubeManager) AllPods() []TestPod { 130 return k.allPods 131 } 132 133 func (k *kubeManager) AllPodStrings() []PodString { 134 return k.allPodStrings 135 } 136 137 func (k *kubeManager) DNSDomain() string { 138 return k.dnsDomain 139 } 140 141 func (k *kubeManager) NamespaceNames() []string { 142 return k.namespaceNames 143 } 144 145 // getPod gets a pod by namespace and name. 146 func (k *kubeManager) getPod(ctx context.Context, ns string, name string) (*v1.Pod, error) { 147 kubePod, err := k.clientSet.CoreV1().Pods(ns).Get(ctx, name, metav1.GetOptions{}) 148 if err != nil { 149 return nil, fmt.Errorf("unable to get pod %s/%s: %w", ns, name, err) 150 } 151 return kubePod, nil 152 } 153 154 // probeConnectivity execs into a pod and checks its connectivity to another pod. 155 // Implements the Prober interface. 156 func (k *kubeManager) probeConnectivity(args *probeConnectivityArgs) (bool, string, error) { 157 port := strconv.Itoa(args.toPort) 158 if args.addrTo == "" { 159 return false, "no IP provided", fmt.Errorf("empty addrTo field") 160 } 161 framework.Logf("Starting probe from pod %v to %v", args.podFrom, args.addrTo) 162 var cmd []string 163 timeout := fmt.Sprintf("--timeout=%vs", args.timeoutSeconds) 164 165 switch args.protocol { 166 case v1.ProtocolSCTP: 167 cmd = []string{"/agnhost", "connect", net.JoinHostPort(args.addrTo, port), timeout, "--protocol=sctp"} 168 case v1.ProtocolTCP: 169 cmd = []string{"/agnhost", "connect", net.JoinHostPort(args.addrTo, port), timeout, "--protocol=tcp"} 170 case v1.ProtocolUDP: 171 cmd = []string{"/agnhost", "connect", net.JoinHostPort(args.addrTo, port), timeout, "--protocol=udp"} 172 if framework.NodeOSDistroIs("windows") { 173 framework.Logf("probing UDP for windows may result in cluster instability for certain windows nodes with low CPU/Memory, depending on CRI version") 174 } 175 default: 176 framework.Failf("protocol %s not supported", args.protocol) 177 } 178 179 commandDebugString := fmt.Sprintf("kubectl exec %s -c %s -n %s -- %s", args.podFrom, args.containerFrom, args.nsFrom, strings.Join(cmd, " ")) 180 181 attempt := 0 182 183 // NOTE: The return value of this function[probeConnectivity] should be true if the probe is successful and false otherwise. 184 185 // probeError will be the return value of this function[probeConnectivity] call. 186 var probeError error 187 var stderr string 188 189 // Instead of re-running the job on connectivity failure, the following conditionFunc when passed to PollImmediate, reruns 190 // the job when the observed value don't match the expected value, so we don't rely on return value of PollImmediate, we 191 // simply discard it and use probeError, defined outside scope of conditionFunc, for returning the result of probeConnectivity. 192 conditionFunc := func() (bool, error) { 193 _, stderr, probeError = k.executeRemoteCommand(args.nsFrom, args.podFrom, args.containerFrom, cmd) 194 // retry should only occur if expected and observed value don't match. 195 if args.expectConnectivity { 196 if probeError != nil { 197 // since we expect connectivity here, we fail the condition for PollImmediate to reattempt the probe. 198 // this happens in the cases where network is congested, we don't have any policy rejecting traffic 199 // from "podFrom" to "podTo" and probes from "podFrom" to "podTo" are failing. 200 framework.Logf("probe #%d :: connectivity expected :: %s/%s -> %s :: stderr - %s", 201 attempt+1, args.nsFrom, args.podFrom, args.addrTo, stderr, 202 ) 203 attempt++ 204 return false, nil 205 } else { 206 // we got the expected results, exit immediately. 207 return true, nil 208 } 209 } else { 210 if probeError != nil { 211 // we got the expected results, exit immediately. 212 return true, nil 213 } else { 214 // since we don't expect connectivity here, we fail the condition for PollImmediate to reattempt the probe. 215 // this happens in the cases where we have policy rejecting traffic from "podFrom" to "podTo", but CNI takes 216 // time to implement the policy and probe from "podFrom" to "podTo" was successful in that window. 217 framework.Logf(" probe #%d :: connectivity not expected :: %s/%s -> %s", 218 attempt+1, args.nsFrom, args.podFrom, args.addrTo, 219 ) 220 attempt++ 221 return false, nil 222 } 223 } 224 } 225 226 // ignore the result of PollImmediate, we are only concerned with probeError. 227 _ = wait.PollImmediate( 228 time.Duration(args.pollIntervalSeconds)*time.Second, 229 time.Duration(args.pollTimeoutSeconds)*time.Second, 230 conditionFunc, 231 ) 232 233 if probeError != nil { 234 return false, commandDebugString, nil 235 } 236 return true, commandDebugString, nil 237 } 238 239 // executeRemoteCommand executes a remote shell command on the given pod. 240 func (k *kubeManager) executeRemoteCommand(namespace string, pod string, containerName string, command []string) (string, string, error) { 241 return e2epod.ExecWithOptions(k.framework, e2epod.ExecOptions{ 242 Command: command, 243 Namespace: namespace, 244 PodName: pod, 245 ContainerName: containerName, 246 Stdin: nil, 247 CaptureStdout: true, 248 CaptureStderr: true, 249 PreserveWhitespace: false, 250 }) 251 } 252 253 // createService is a convenience function for service setup. 254 func (k *kubeManager) createService(ctx context.Context, service *v1.Service) (*v1.Service, error) { 255 ns := service.Namespace 256 name := service.Name 257 258 createdService, err := k.clientSet.CoreV1().Services(ns).Create(ctx, service, metav1.CreateOptions{}) 259 if err != nil { 260 return nil, fmt.Errorf("unable to create service %s/%s: %w", ns, name, err) 261 } 262 return createdService, nil 263 } 264 265 // createPod is a convenience function for pod setup. 266 func (k *kubeManager) createPod(ctx context.Context, pod *v1.Pod) (*v1.Pod, error) { 267 ns := pod.Namespace 268 framework.Logf("creating pod %s/%s", ns, pod.Name) 269 270 createdPod, err := k.clientSet.CoreV1().Pods(ns).Create(ctx, pod, metav1.CreateOptions{}) 271 if err != nil { 272 return nil, fmt.Errorf("unable to create pod %s/%s: %w", ns, pod.Name, err) 273 } 274 return createdPod, nil 275 } 276 277 // cleanNetworkPolicies is a convenience function for deleting network policies before startup of any new test. 278 func (k *kubeManager) cleanNetworkPolicies(ctx context.Context) error { 279 for _, ns := range k.namespaceNames { 280 framework.Logf("deleting policies in %s ..........", ns) 281 l, err := k.clientSet.NetworkingV1().NetworkPolicies(ns).List(ctx, metav1.ListOptions{}) 282 if err != nil { 283 return fmt.Errorf("unable to list network policies in ns %s: %w", ns, err) 284 } 285 for _, np := range l.Items { 286 framework.Logf("deleting network policy %s/%s", ns, np.Name) 287 err = k.clientSet.NetworkingV1().NetworkPolicies(ns).Delete(ctx, np.Name, metav1.DeleteOptions{}) 288 if err != nil { 289 return fmt.Errorf("unable to delete network policy %s/%s: %w", ns, np.Name, err) 290 } 291 } 292 } 293 return nil 294 } 295 296 // createNetworkPolicy is a convenience function for creating network policies. 297 func (k *kubeManager) createNetworkPolicy(ctx context.Context, ns string, netpol *networkingv1.NetworkPolicy) (*networkingv1.NetworkPolicy, error) { 298 framework.Logf("creating network policy %s/%s", ns, netpol.Name) 299 netpol.ObjectMeta.Namespace = ns 300 np, err := k.clientSet.NetworkingV1().NetworkPolicies(ns).Create(ctx, netpol, metav1.CreateOptions{}) 301 if err != nil { 302 return nil, fmt.Errorf("unable to create network policy %s/%s: %w", ns, netpol.Name, err) 303 } 304 return np, nil 305 } 306 307 // updateNetworkPolicy is a convenience function for updating network policies. 308 func (k *kubeManager) updateNetworkPolicy(ctx context.Context, ns string, netpol *networkingv1.NetworkPolicy) (*networkingv1.NetworkPolicy, error) { 309 framework.Logf("updating network policy %s/%s", ns, netpol.Name) 310 netpol.ObjectMeta.Namespace = ns 311 np, err := k.clientSet.NetworkingV1().NetworkPolicies(ns).Update(ctx, netpol, metav1.UpdateOptions{}) 312 if err != nil { 313 return np, fmt.Errorf("unable to update network policy %s/%s: %w", ns, netpol.Name, err) 314 } 315 return np, nil 316 } 317 318 // getNamespace gets a namespace object from kubernetes. 319 func (k *kubeManager) getNamespace(ctx context.Context, ns string) (*v1.Namespace, error) { 320 selectedNameSpace, err := k.clientSet.CoreV1().Namespaces().Get(ctx, ns, metav1.GetOptions{}) 321 if err != nil { 322 return nil, fmt.Errorf("unable to get namespace %s: %w", ns, err) 323 } 324 return selectedNameSpace, nil 325 } 326 327 // getProbeTimeoutSeconds returns a timeout for how long the probe should work before failing a check, and takes windows heuristics into account, where requests can take longer sometimes. 328 func getProbeTimeoutSeconds() int { 329 timeoutSeconds := 1 330 if framework.NodeOSDistroIs("windows") { 331 timeoutSeconds = 3 332 } 333 return timeoutSeconds 334 } 335 336 // getWorkers returns the number of workers suggested to run when testing. 337 func getWorkers() int { 338 return 3 339 } 340 341 // getPollInterval returns the value for which the Prober will wait before attempting next attempt. 342 func getPollIntervalSeconds() int { 343 return defaultPollIntervalSeconds 344 } 345 346 // getPollTimeout returns the timeout for polling on probes, and takes windows heuristics into account, where requests can take longer sometimes. 347 func getPollTimeoutSeconds() int { 348 if framework.NodeOSDistroIs("windows") { 349 return defaultPollTimeoutSeconds * 2 350 } 351 return defaultPollTimeoutSeconds 352 }