k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/test/soak/serve_hostnames/serve_hostnames.go (about) 1 /* 2 Copyright 2015 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 /* 18 This soak tests places a specified number of pods on each node and then 19 repeatedly sends queries to a service running on these pods via 20 a service 21 */ 22 23 package main 24 25 import ( 26 "context" 27 "flag" 28 "fmt" 29 "os" 30 "path/filepath" 31 "time" 32 33 v1 "k8s.io/api/core/v1" 34 apierrors "k8s.io/apimachinery/pkg/api/errors" 35 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 36 "k8s.io/apimachinery/pkg/runtime" 37 "k8s.io/apimachinery/pkg/util/intstr" 38 clientset "k8s.io/client-go/kubernetes" 39 restclient "k8s.io/client-go/rest" 40 "k8s.io/client-go/tools/clientcmd" 41 "k8s.io/kubernetes/pkg/api/legacyscheme" 42 "k8s.io/kubernetes/test/e2e/framework/service" 43 imageutils "k8s.io/kubernetes/test/utils/image" 44 45 "k8s.io/klog/v2" 46 ) 47 48 var ( 49 queriesAverage = flag.Int("queries", 100, "Number of hostname queries to make in each iteration per pod on average") 50 podsPerNode = flag.Int("pods_per_node", 1, "Number of serve_hostname pods per node") 51 upTo = flag.Int("up_to", 1, "Number of iterations or -1 for no limit") 52 maxPar = flag.Int("max_par", 500, "Maximum number of queries in flight") 53 gke = flag.String("gke_context", "", "Target GKE cluster with context gke_{project}_{zone}_{cluster-name}") 54 ) 55 56 const ( 57 deleteTimeout = 2 * time.Minute 58 endpointTimeout = 5 * time.Minute 59 nodeListTimeout = 2 * time.Minute 60 podCreateTimeout = 2 * time.Minute 61 podStartTimeout = 30 * time.Minute 62 serviceCreateTimeout = 2 * time.Minute 63 namespaceDeleteTimeout = 5 * time.Minute 64 ) 65 66 func main() { 67 flag.Parse() 68 69 klog.Infof("Starting serve_hostnames soak test with queries=%d and podsPerNode=%d upTo=%d", 70 *queriesAverage, *podsPerNode, *upTo) 71 72 var spec string 73 if *gke != "" { 74 spec = filepath.Join(os.Getenv("HOME"), ".config", "gcloud", "kubernetes", "kubeconfig") 75 } else { 76 spec = filepath.Join(os.Getenv("HOME"), ".kube", "config") 77 } 78 settings, err := clientcmd.LoadFromFile(spec) 79 if err != nil { 80 klog.Fatalf("Error loading configuration: %v", err.Error()) 81 } 82 if *gke != "" { 83 settings.CurrentContext = *gke 84 } 85 config, err := clientcmd.NewDefaultClientConfig(*settings, &clientcmd.ConfigOverrides{}).ClientConfig() 86 if err != nil { 87 klog.Fatalf("Failed to construct config: %v", err) 88 } 89 90 client, err := clientset.NewForConfig(config) 91 if err != nil { 92 klog.Fatalf("Failed to make client: %v", err) 93 } 94 95 var nodes *v1.NodeList 96 for start := time.Now(); time.Since(start) < nodeListTimeout; time.Sleep(2 * time.Second) { 97 nodes, err = client.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{}) 98 if err == nil { 99 break 100 } 101 klog.Warningf("Failed to list nodes: %v", err) 102 } 103 if err != nil { 104 klog.Fatalf("Giving up trying to list nodes: %v", err) 105 } 106 107 if len(nodes.Items) == 0 { 108 klog.Fatalf("Failed to find any nodes.") 109 } 110 111 klog.Infof("Found %d nodes on this cluster:", len(nodes.Items)) 112 for i, node := range nodes.Items { 113 klog.Infof("%d: %s", i, node.Name) 114 } 115 116 queries := *queriesAverage * len(nodes.Items) * *podsPerNode 117 118 // Create the namespace 119 got, err := client.CoreV1().Namespaces().Create(context.TODO(), &v1.Namespace{ObjectMeta: metav1.ObjectMeta{GenerateName: "serve-hostnames-"}}, metav1.CreateOptions{}) 120 if err != nil { 121 klog.Fatalf("Failed to create namespace: %v", err) 122 } 123 ns := got.Name 124 defer func(ns string) { 125 if err := client.CoreV1().Namespaces().Delete(context.TODO(), ns, metav1.DeleteOptions{}); err != nil { 126 klog.Warningf("Failed to delete namespace %s: %v", ns, err) 127 } else { 128 // wait until the namespace disappears 129 for i := 0; i < int(namespaceDeleteTimeout/time.Second); i++ { 130 if _, err := client.CoreV1().Namespaces().Get(context.TODO(), ns, metav1.GetOptions{}); err != nil { 131 if apierrors.IsNotFound(err) { 132 return 133 } 134 } 135 time.Sleep(time.Second) 136 } 137 } 138 }(ns) 139 klog.Infof("Created namespace %s", ns) 140 141 // Create a service for these pods. 142 klog.Infof("Creating service %s/serve-hostnames", ns) 143 // Make several attempts to create a service. 144 var svc *v1.Service 145 for start := time.Now(); time.Since(start) < serviceCreateTimeout; time.Sleep(2 * time.Second) { 146 t := time.Now() 147 svc, err = client.CoreV1().Services(ns).Create(context.TODO(), &v1.Service{ 148 ObjectMeta: metav1.ObjectMeta{ 149 Name: "serve-hostnames", 150 Labels: map[string]string{ 151 "name": "serve-hostname", 152 }, 153 }, 154 Spec: v1.ServiceSpec{ 155 Ports: []v1.ServicePort{{ 156 Protocol: "TCP", 157 Port: 9376, 158 TargetPort: intstr.FromInt32(9376), 159 }}, 160 Selector: map[string]string{ 161 "name": "serve-hostname", 162 }, 163 }, 164 }, metav1.CreateOptions{}) 165 klog.V(4).Infof("Service create %s/server-hostnames took %v", ns, time.Since(t)) 166 if err == nil { 167 break 168 } 169 klog.Warningf("After %v failed to create service %s/serve-hostnames: %v", time.Since(start), ns, err) 170 } 171 if err != nil { 172 klog.Warningf("Unable to create service %s/%s: %v", ns, svc.Name, err) 173 return 174 } 175 // Clean up service 176 defer func() { 177 klog.Infof("Cleaning up service %s/serve-hostnames", ns) 178 // Make several attempts to delete the service. 179 for start := time.Now(); time.Since(start) < deleteTimeout; time.Sleep(1 * time.Second) { 180 if err := client.CoreV1().Services(ns).Delete(context.TODO(), svc.Name, metav1.DeleteOptions{}); err == nil { 181 return 182 } 183 klog.Warningf("After %v unable to delete service %s/%s: %v", time.Since(start), ns, svc.Name, err) 184 } 185 }() 186 187 // Put serve-hostname pods on each node. 188 podNames := []string{} 189 for i, node := range nodes.Items { 190 for j := 0; j < *podsPerNode; j++ { 191 podName := fmt.Sprintf("serve-hostname-%d-%d", i, j) 192 podNames = append(podNames, podName) 193 // Make several attempts 194 for start := time.Now(); time.Since(start) < podCreateTimeout; time.Sleep(2 * time.Second) { 195 klog.Infof("Creating pod %s/%s on node %s", ns, podName, node.Name) 196 t := time.Now() 197 _, err = client.CoreV1().Pods(ns).Create(context.TODO(), &v1.Pod{ 198 ObjectMeta: metav1.ObjectMeta{ 199 Name: podName, 200 Labels: map[string]string{ 201 "name": "serve-hostname", 202 }, 203 }, 204 Spec: v1.PodSpec{ 205 Containers: []v1.Container{ 206 { 207 Name: "serve-hostname", 208 Image: imageutils.GetE2EImage(imageutils.Agnhost), 209 Ports: []v1.ContainerPort{{ContainerPort: 9376}}, 210 }, 211 }, 212 NodeName: node.Name, 213 }, 214 }, metav1.CreateOptions{}) 215 klog.V(4).Infof("Pod create %s/%s request took %v", ns, podName, time.Since(t)) 216 if err == nil { 217 break 218 } 219 klog.Warningf("After %s failed to create pod %s/%s: %v", time.Since(start), ns, podName, err) 220 } 221 if err != nil { 222 klog.Warningf("Failed to create pod %s/%s: %v", ns, podName, err) 223 return 224 } 225 } 226 } 227 // Clean up the pods 228 defer func() { 229 klog.Info("Cleaning up pods") 230 // Make several attempts to delete the pods. 231 for _, podName := range podNames { 232 for start := time.Now(); time.Since(start) < deleteTimeout; time.Sleep(1 * time.Second) { 233 if err = client.CoreV1().Pods(ns).Delete(context.TODO(), podName, metav1.DeleteOptions{}); err == nil { 234 break 235 } 236 klog.Warningf("After %v failed to delete pod %s/%s: %v", time.Since(start), ns, podName, err) 237 } 238 } 239 }() 240 241 klog.Info("Waiting for the serve-hostname pods to be ready") 242 for _, podName := range podNames { 243 var pod *v1.Pod 244 for start := time.Now(); time.Since(start) < podStartTimeout; time.Sleep(5 * time.Second) { 245 pod, err = client.CoreV1().Pods(ns).Get(context.TODO(), podName, metav1.GetOptions{}) 246 if err != nil { 247 klog.Warningf("Get pod %s/%s failed, ignoring for %v: %v", ns, podName, err, podStartTimeout) 248 continue 249 } 250 if pod.Status.Phase == v1.PodRunning { 251 break 252 } 253 } 254 if pod.Status.Phase != v1.PodRunning { 255 klog.Warningf("Gave up waiting on pod %s/%s to be running (saw %v)", ns, podName, pod.Status.Phase) 256 } else { 257 klog.Infof("%s/%s is running", ns, podName) 258 } 259 } 260 261 rclient, err := restclient.RESTClientFor(config) 262 if err != nil { 263 klog.Warningf("Failed to build restclient: %v", err) 264 return 265 } 266 proxyRequest, errProxy := service.GetServicesProxyRequest(client, rclient.Get()) 267 if errProxy != nil { 268 klog.Warningf("Get services proxy request failed: %v", errProxy) 269 return 270 } 271 272 // Wait for the endpoints to propagate. 273 for start := time.Now(); time.Since(start) < endpointTimeout; time.Sleep(10 * time.Second) { 274 hostname, err := proxyRequest. 275 Namespace(ns). 276 Name("serve-hostnames"). 277 DoRaw(context.TODO()) 278 if err != nil { 279 klog.Infof("After %v while making a proxy call got error %v", time.Since(start), err) 280 continue 281 } 282 var r metav1.Status 283 if err := runtime.DecodeInto(legacyscheme.Codecs.UniversalDecoder(), hostname, &r); err != nil { 284 break 285 } 286 if r.Status == metav1.StatusFailure { 287 klog.Infof("After %v got status %v", time.Since(start), string(hostname)) 288 continue 289 } 290 break 291 } 292 293 // Repeatedly make requests. 294 for iteration := 0; iteration != *upTo; iteration++ { 295 responseChan := make(chan string, queries) 296 // Use a channel of size *maxPar to throttle the number 297 // of in-flight requests to avoid overloading the service. 298 inFlight := make(chan struct{}, *maxPar) 299 start := time.Now() 300 for q := 0; q < queries; q++ { 301 go func(i int, query int) { 302 inFlight <- struct{}{} 303 t := time.Now() 304 hostname, err := proxyRequest. 305 Namespace(ns). 306 Name("serve-hostnames"). 307 DoRaw(context.TODO()) 308 klog.V(4).Infof("Proxy call in namespace %s took %v", ns, time.Since(t)) 309 if err != nil { 310 klog.Warningf("Call failed during iteration %d query %d : %v", i, query, err) 311 // If the query failed return a string which starts with a character 312 // that can't be part of a hostname. 313 responseChan <- fmt.Sprintf("!failed in iteration %d to issue query %d: %v", i, query, err) 314 } else { 315 responseChan <- string(hostname) 316 } 317 <-inFlight 318 }(iteration, q) 319 } 320 responses := make(map[string]int, *podsPerNode*len(nodes.Items)) 321 missing := 0 322 for q := 0; q < queries; q++ { 323 r := <-responseChan 324 klog.V(4).Infof("Got response from %s", r) 325 responses[r]++ 326 // If the returned hostname starts with '!' then it indicates 327 // an error response. 328 if len(r) > 0 && r[0] == '!' { 329 klog.V(3).Infof("Got response %s", r) 330 missing++ 331 } 332 } 333 if missing > 0 { 334 klog.Warningf("Missing %d responses out of %d", missing, queries) 335 } 336 // Report any nodes that did not respond. 337 for n, node := range nodes.Items { 338 for i := 0; i < *podsPerNode; i++ { 339 name := fmt.Sprintf("serve-hostname-%d-%d", n, i) 340 if _, ok := responses[name]; !ok { 341 klog.Warningf("No response from pod %s on node %s at iteration %d", name, node.Name, iteration) 342 } 343 } 344 } 345 klog.Infof("Iteration %d took %v for %d queries (%.2f QPS) with %d missing", 346 iteration, time.Since(start), queries-missing, float64(queries-missing)/time.Since(start).Seconds(), missing) 347 } 348 }