sigs.k8s.io/cluster-api-provider-azure@v1.14.3/test/e2e/helpers.go (about) 1 //go:build e2e 2 // +build e2e 3 4 /* 5 Copyright 2020 The Kubernetes Authors. 6 7 Licensed under the Apache License, Version 2.0 (the "License"); 8 you may not use this file except in compliance with the License. 9 You may obtain a copy of the License at 10 11 http://www.apache.org/licenses/LICENSE-2.0 12 13 Unless required by applicable law or agreed to in writing, software 14 distributed under the License is distributed on an "AS IS" BASIS, 15 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 See the License for the specific language governing permissions and 17 limitations under the License. 18 */ 19 20 package e2e 21 22 import ( 23 "bytes" 24 "context" 25 "encoding/json" 26 "fmt" 27 "io" 28 "net" 29 "net/http" 30 "os" 31 "path/filepath" 32 "regexp" 33 "strconv" 34 "strings" 35 "text/tabwriter" 36 "time" 37 38 "github.com/Azure/azure-sdk-for-go/sdk/azidentity" 39 "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5" 40 "github.com/blang/semver" 41 . "github.com/onsi/ginkgo/v2" 42 . "github.com/onsi/gomega" 43 "github.com/pkg/errors" 44 "github.com/pkg/sftp" 45 "golang.org/x/crypto/ssh" 46 appsv1 "k8s.io/api/apps/v1" 47 batchv1 "k8s.io/api/batch/v1" 48 corev1 "k8s.io/api/core/v1" 49 apierrors "k8s.io/apimachinery/pkg/api/errors" 50 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 51 "k8s.io/client-go/kubernetes" 52 typedappsv1 "k8s.io/client-go/kubernetes/typed/apps/v1" 53 typedbatchv1 "k8s.io/client-go/kubernetes/typed/batch/v1" 54 typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1" 55 infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" 56 "sigs.k8s.io/cluster-api-provider-azure/azure" 57 infrav1exp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1beta1" 58 capi_e2e "sigs.k8s.io/cluster-api/test/e2e" 59 "sigs.k8s.io/cluster-api/test/framework" 60 "sigs.k8s.io/cluster-api/test/framework/clusterctl" 61 "sigs.k8s.io/cluster-api/test/framework/kubernetesversions" 62 "sigs.k8s.io/controller-runtime/pkg/client" 63 ) 64 65 const ( 66 sshPort = "22" 67 deleteOperationTimeout = 20 * time.Minute 68 retryableOperationTimeout = 30 * time.Second 69 retryableDeleteOperationTimeout = 3 * time.Minute 70 retryableOperationSleepBetweenRetries = 3 * time.Second 71 helmInstallTimeout = 3 * time.Minute 72 sshConnectionTimeout = 30 * time.Second 73 ) 74 75 // deploymentsClientAdapter adapts a Deployment to work with WaitForDeploymentsAvailable. 76 type deploymentsClientAdapter struct { 77 client typedappsv1.DeploymentInterface 78 } 79 80 // Get fetches the deployment named by the key and updates the provided object. 81 func (c deploymentsClientAdapter) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { 82 deployment, err := c.client.Get(ctx, key.Name, metav1.GetOptions{}) 83 if deployObj, ok := obj.(*appsv1.Deployment); ok { 84 deployment.DeepCopyInto(deployObj) 85 } 86 return err 87 } 88 89 // WaitForDeploymentsAvailableInput is the input for WaitForDeploymentsAvailable. 90 type WaitForDeploymentsAvailableInput struct { 91 Getter framework.Getter 92 Deployment *appsv1.Deployment 93 Clientset *kubernetes.Clientset 94 } 95 96 // WaitForDeploymentsAvailable waits until the Deployment has status.Available = True, that signals that 97 // all the desired replicas are in place. 98 // This can be used to check if Cluster API controllers installed in the management cluster are working. 99 func WaitForDeploymentsAvailable(ctx context.Context, input WaitForDeploymentsAvailableInput, intervals ...interface{}) { 100 start := time.Now() 101 namespace, name := input.Deployment.GetNamespace(), input.Deployment.GetName() 102 Byf("waiting for deployment %s/%s to be available", namespace, name) 103 Log("starting to wait for deployment to become available") 104 Eventually(func() bool { 105 key := client.ObjectKey{Namespace: namespace, Name: name} 106 if err := input.Getter.Get(ctx, key, input.Deployment); err == nil { 107 for _, c := range input.Deployment.Status.Conditions { 108 if c.Type == appsv1.DeploymentAvailable && c.Status == corev1.ConditionTrue { 109 return true 110 } 111 } 112 } 113 return false 114 }, intervals...).Should(BeTrue(), func() string { return DescribeFailedDeployment(ctx, input) }) 115 Logf("Deployment %s/%s is now available, took %v", namespace, name, time.Since(start)) 116 } 117 118 // GetWaitForDeploymentsAvailableInput is a convenience func to compose a WaitForDeploymentsAvailableInput 119 func GetWaitForDeploymentsAvailableInput(ctx context.Context, clusterProxy framework.ClusterProxy, name, namespace string, specName string) WaitForDeploymentsAvailableInput { 120 Expect(clusterProxy).NotTo(BeNil()) 121 cl := clusterProxy.GetClient() 122 var d = &appsv1.Deployment{} 123 Eventually(func() error { 124 return cl.Get(ctx, client.ObjectKey{Name: name, Namespace: namespace}, d) 125 }, e2eConfig.GetIntervals(specName, "wait-deployment")...).Should(Succeed()) 126 clientset := clusterProxy.GetClientSet() 127 return WaitForDeploymentsAvailableInput{ 128 Deployment: d, 129 Clientset: clientset, 130 Getter: cl, 131 } 132 } 133 134 // DescribeFailedDeployment returns detailed output to help debug a deployment failure in e2e. 135 func DescribeFailedDeployment(ctx context.Context, input WaitForDeploymentsAvailableInput) string { 136 namespace, name := input.Deployment.GetNamespace(), input.Deployment.GetName() 137 b := strings.Builder{} 138 b.WriteString(fmt.Sprintf("Deployment %s/%s failed", 139 namespace, name)) 140 b.WriteString(fmt.Sprintf("\nDeployment:\n%s\n", prettyPrint(input.Deployment))) 141 b.WriteString(describeEvents(ctx, input.Clientset, namespace, name)) 142 return b.String() 143 } 144 145 // jobsClientAdapter adapts a Job to work with WaitForJobAvailable. 146 type jobsClientAdapter struct { 147 client typedbatchv1.JobInterface 148 } 149 150 // Get fetches the job named by the key and updates the provided object. 151 func (c jobsClientAdapter) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { 152 job, err := c.client.Get(ctx, key.Name, metav1.GetOptions{}) 153 if jobObj, ok := obj.(*batchv1.Job); ok { 154 job.DeepCopyInto(jobObj) 155 } 156 return err 157 } 158 159 // WaitForJobCompleteInput is the input for WaitForJobComplete. 160 type WaitForJobCompleteInput struct { 161 Getter framework.Getter 162 Job *batchv1.Job 163 Clientset *kubernetes.Clientset 164 } 165 166 // WaitForJobComplete waits until the Job completes with at least one success. 167 func WaitForJobComplete(ctx context.Context, input WaitForJobCompleteInput, intervals ...interface{}) { 168 start := time.Now() 169 namespace, name := input.Job.GetNamespace(), input.Job.GetName() 170 Byf("waiting for job %s/%s to be complete", namespace, name) 171 Logf("waiting for job %s/%s to be complete", namespace, name) 172 Eventually(func() bool { 173 key := client.ObjectKey{Namespace: namespace, Name: name} 174 if err := input.Getter.Get(ctx, key, input.Job); err == nil { 175 for _, c := range input.Job.Status.Conditions { 176 if c.Type == batchv1.JobComplete && c.Status == corev1.ConditionTrue { 177 return input.Job.Status.Succeeded > 0 178 } 179 } 180 } 181 return false 182 }, intervals...).Should(BeTrue(), func() string { return DescribeFailedJob(ctx, input) }) 183 Logf("job %s/%s is complete, took %v", namespace, name, time.Since(start)) 184 } 185 186 // DescribeFailedJob returns a string with information to help debug a failed job. 187 func DescribeFailedJob(ctx context.Context, input WaitForJobCompleteInput) string { 188 namespace, name := input.Job.GetNamespace(), input.Job.GetName() 189 b := strings.Builder{} 190 b.WriteString(fmt.Sprintf("Job %s/%s failed", 191 namespace, name)) 192 b.WriteString(fmt.Sprintf("\nJob:\n%s\n", prettyPrint(input.Job))) 193 b.WriteString(describeEvents(ctx, input.Clientset, namespace, name)) 194 b.WriteString(getJobPodLogs(ctx, input)) 195 return b.String() 196 } 197 198 func getJobPodLogs(ctx context.Context, input WaitForJobCompleteInput) string { 199 podsClient := input.Clientset.CoreV1().Pods(input.Job.GetNamespace()) 200 pods, err := podsClient.List(ctx, metav1.ListOptions{LabelSelector: fmt.Sprintf("job-name=%s", input.Job.GetName())}) 201 if err != nil { 202 return err.Error() 203 } 204 logs := make(map[string]string, len(pods.Items)) 205 for _, pod := range pods.Items { 206 logs[pod.Name] = getPodLogs(ctx, input.Clientset, pod) 207 } 208 b := strings.Builder{} 209 var lastLog string 210 for podName, log := range logs { 211 b.WriteString(fmt.Sprintf("\nLogs for pod %s:\n", podName)) 212 if logsAreSimilar(lastLog, log) { 213 b.WriteString("(Omitted because of similarity to previous pod's logs.)") 214 } else { 215 b.WriteString(log) 216 } 217 lastLog = log 218 } 219 return b.String() 220 } 221 222 // logsAreSimilar compares two multi-line strings and returns true if at least 90% of the lines match. 223 func logsAreSimilar(a, b string) bool { 224 if a == "" { 225 return false 226 } 227 a1 := strings.Split(a, "\n") 228 b1 := strings.Split(b, "\n") 229 for i := len(a1) - 1; i >= 0; i-- { 230 for _, v := range b1 { 231 if a1[i] == v { 232 a1 = append(a1[:i], a1[i+1:]...) 233 break 234 } 235 } 236 } 237 return float32(len(a1))/float32(len(b1)) < 0.1 238 } 239 240 // servicesClientAdapter adapts a Service to work with WaitForServicesAvailable. 241 type servicesClientAdapter struct { 242 client typedcorev1.ServiceInterface 243 } 244 245 // Get fetches the service named by the key and updates the provided object. 246 func (c servicesClientAdapter) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { 247 service, err := c.client.Get(ctx, key.Name, metav1.GetOptions{}) 248 if serviceObj, ok := obj.(*corev1.Service); ok { 249 service.DeepCopyInto(serviceObj) 250 } 251 return err 252 } 253 254 // WaitForDaemonsetInput is the input for WaitForDaemonset. 255 type WaitForDaemonsetInput struct { 256 Getter framework.Getter 257 DaemonSet *appsv1.DaemonSet 258 Clientset *kubernetes.Clientset 259 } 260 261 // WaitForDaemonset retries during E2E until a daemonset's pods are all Running. 262 func WaitForDaemonset(ctx context.Context, input WaitForDaemonsetInput, intervals ...interface{}) { 263 start := time.Now() 264 namespace, name := input.DaemonSet.GetNamespace(), input.DaemonSet.GetName() 265 Eventually(func() bool { 266 key := client.ObjectKey{Namespace: namespace, Name: name} 267 if err := input.Getter.Get(ctx, key, input.DaemonSet); err == nil { 268 if input.DaemonSet.Status.DesiredNumberScheduled > 0 { 269 Byf("waiting for %d daemonset %s/%s pods to be Running", input.DaemonSet.Status.DesiredNumberScheduled, namespace, name) 270 if input.DaemonSet.Status.DesiredNumberScheduled == input.DaemonSet.Status.NumberReady { 271 Logf("%d daemonset %s/%s pods are running, took %v", input.DaemonSet.Status.NumberReady, namespace, name, time.Since(start)) 272 return true 273 } 274 } else { 275 Byf("daemonset %s/%s has no schedulable nodes, will skip", namespace, name) 276 return true 277 } 278 } 279 return false 280 }, intervals...).Should(BeTrue(), func() string { return DescribeFailedDaemonset(ctx, input) }) 281 } 282 283 // WaitForDaemonsets retries during E2E until all daemonsets pods are all Running. 284 func WaitForDaemonsets(ctx context.Context, clusterProxy framework.ClusterProxy, specName string, intervals ...interface{}) { 285 Expect(clusterProxy).NotTo(BeNil()) 286 cl := clusterProxy.GetClient() 287 var dsList = &appsv1.DaemonSetList{} 288 Eventually(func() error { 289 return cl.List(ctx, dsList) 290 }, intervals...).Should(Succeed()) 291 for i := range dsList.Items { 292 waitForDaemonsetInput := WaitForDaemonsetInput{ 293 DaemonSet: &dsList.Items[i], 294 Clientset: clusterProxy.GetClientSet(), 295 Getter: cl, 296 } 297 WaitForDaemonset(ctx, waitForDaemonsetInput, intervals...) 298 } 299 } 300 301 // DescribeFailedDaemonset returns detailed output to help debug a daemonset failure in e2e. 302 func DescribeFailedDaemonset(ctx context.Context, input WaitForDaemonsetInput) string { 303 namespace, name := input.DaemonSet.GetNamespace(), input.DaemonSet.GetName() 304 b := strings.Builder{} 305 b.WriteString(fmt.Sprintf("Service %s/%s failed", 306 namespace, name)) 307 b.WriteString(fmt.Sprintf("\nService:\n%s\n", prettyPrint(input.DaemonSet))) 308 b.WriteString(describeEvents(ctx, input.Clientset, namespace, name)) 309 return b.String() 310 } 311 312 // WaitForServiceAvailableInput is the input for WaitForServiceAvailable. 313 type WaitForServiceAvailableInput struct { 314 Getter framework.Getter 315 Service *corev1.Service 316 Clientset *kubernetes.Clientset 317 } 318 319 // WaitForServiceAvailable waits until the Service has an IP address available on each Ingress. 320 func WaitForServiceAvailable(ctx context.Context, input WaitForServiceAvailableInput, intervals ...interface{}) { 321 start := time.Now() 322 namespace, name := input.Service.GetNamespace(), input.Service.GetName() 323 Byf("waiting for service %s/%s to be available", namespace, name) 324 Logf("waiting for service %s/%s to be available", namespace, name) 325 Eventually(func() bool { 326 key := client.ObjectKey{Namespace: namespace, Name: name} 327 if err := input.Getter.Get(ctx, key, input.Service); err == nil { 328 ingress := input.Service.Status.LoadBalancer.Ingress 329 if len(ingress) > 0 { 330 for _, i := range ingress { 331 if net.ParseIP(i.IP) == nil { 332 return false 333 } 334 } 335 return true 336 } 337 } 338 return false 339 }, intervals...).Should(BeTrue(), func() string { return DescribeFailedService(ctx, input) }) 340 Logf("service %s/%s is available, took %v", namespace, name, time.Since(start)) 341 } 342 343 // DescribeFailedService returns a string with information to help debug a failed service. 344 func DescribeFailedService(ctx context.Context, input WaitForServiceAvailableInput) string { 345 namespace, name := input.Service.GetNamespace(), input.Service.GetName() 346 b := strings.Builder{} 347 b.WriteString(fmt.Sprintf("Service %s/%s failed", 348 namespace, name)) 349 b.WriteString(fmt.Sprintf("\nService:\n%s\n", prettyPrint(input.Service))) 350 b.WriteString(describeEvents(ctx, input.Clientset, namespace, name)) 351 return b.String() 352 } 353 354 // describeEvents returns a string summarizing recent events involving the named object(s). 355 func describeEvents(ctx context.Context, clientset *kubernetes.Clientset, namespace, name string) string { 356 b := strings.Builder{} 357 if clientset == nil { 358 b.WriteString("clientset is nil, so skipping output of relevant events") 359 } else { 360 opts := metav1.ListOptions{ 361 FieldSelector: fmt.Sprintf("involvedObject.name=%s", name), 362 Limit: 20, 363 } 364 evts, err := clientset.CoreV1().Events(namespace).List(ctx, opts) 365 if err != nil { 366 b.WriteString(err.Error()) 367 } else { 368 w := tabwriter.NewWriter(&b, 0, 4, 2, ' ', tabwriter.FilterHTML) 369 fmt.Fprintln(w, "LAST SEEN\tTYPE\tREASON\tOBJECT\tMESSAGE") 370 for _, e := range evts.Items { 371 fmt.Fprintf(w, "%s\t%s\t%s\t%s/%s\t%s\n", e.LastTimestamp, e.Type, e.Reason, 372 strings.ToLower(e.InvolvedObject.Kind), e.InvolvedObject.Name, e.Message) 373 } 374 w.Flush() 375 } 376 } 377 return b.String() 378 } 379 380 // prettyPrint returns a formatted JSON version of the object given. 381 func prettyPrint(v interface{}) string { 382 b, err := json.MarshalIndent(v, "", " ") 383 if err != nil { 384 return err.Error() 385 } 386 return string(b) 387 } 388 389 // getAvailabilityZonesForRegion uses zone information in availableZonesPerLocation.json 390 // and returns the number of availability zones per region that would support the VM type used for e2e tests. 391 // will return an error if the region isn't recognized 392 // availableZonesPerLocation.json was generated by 393 // az vm list-skus -r "virtualMachines" -z | jq 'map({(.locationInfo[0].location + "_" + .name): .locationInfo[0].zones}) | add' > availableZonesPerLocation.json 394 func getAvailabilityZonesForRegion(location string, size string) ([]string, error) { 395 wd, err := os.Getwd() 396 if err != nil { 397 return nil, err 398 } 399 file, err := os.ReadFile(filepath.Join(wd, "data", "availableZonesPerLocation.json")) 400 if err != nil { 401 return nil, err 402 } 403 var data map[string][]string 404 405 if err := json.Unmarshal(file, &data); err != nil { 406 return nil, err 407 } 408 key := fmt.Sprintf("%s_%s", location, size) 409 410 return data[key], nil 411 } 412 413 // logCheckpoint prints a message indicating the start or end of the current test spec, 414 // including which Ginkgo node it's running on. 415 // 416 // Example output: 417 // 418 // INFO: "With 1 worker node" started at Tue, 22 Sep 2020 13:19:08 PDT on Ginkgo node 2 of 3 419 // INFO: "With 1 worker node" ran for 18m34s on Ginkgo node 2 of 3 420 func logCheckpoint(specTimes map[string]time.Time) { 421 text := CurrentSpecReport().LeafNodeText 422 start, started := specTimes[text] 423 suiteConfig, reporterConfig := GinkgoConfiguration() 424 if !started { 425 start = time.Now() 426 specTimes[text] = start 427 fmt.Fprintf(GinkgoWriter, "INFO: \"%s\" started at %s on Ginkgo node %d of %d and junit test report to file %s\n", text, 428 start.Format(time.RFC1123), GinkgoParallelProcess(), suiteConfig.ParallelTotal, reporterConfig.JUnitReport) 429 } else { 430 elapsed := time.Since(start) 431 fmt.Fprintf(GinkgoWriter, "INFO: \"%s\" ran for %s on Ginkgo node %d of %d and reported junit test to file %s\n", text, 432 elapsed.Round(time.Second), GinkgoParallelProcess(), suiteConfig.ParallelTotal, reporterConfig.JUnitReport) 433 } 434 } 435 436 // getClusterName gets the cluster name for the test cluster 437 // and sets the environment variables that depend on it. 438 func getClusterName(prefix, specName string) string { 439 clusterName := os.Getenv("CLUSTER_NAME") 440 if clusterName == "" { 441 clusterName = fmt.Sprintf("%s-%s", prefix, specName) 442 } 443 fmt.Fprintf(GinkgoWriter, "INFO: Cluster name is %s\n", clusterName) 444 445 Expect(os.Setenv(AzureResourceGroup, clusterName)).To(Succeed()) 446 Expect(os.Setenv(AzureVNetName, fmt.Sprintf("%s-vnet", clusterName))).To(Succeed()) 447 return clusterName 448 } 449 450 func isAzureMachineWindows(am *infrav1.AzureMachine) bool { 451 return am.Spec.OSDisk.OSType == azure.WindowsOS 452 } 453 454 func isAzureMachinePoolWindows(amp *infrav1exp.AzureMachinePool) bool { 455 return amp.Spec.Template.OSDisk.OSType == azure.WindowsOS 456 } 457 458 // getProxiedSSHClient creates a SSH client object that connects to a target node 459 // proxied through a control plane node. 460 func getProxiedSSHClient(controlPlaneEndpoint, hostname, port string, ioTimeout time.Duration) (*ssh.Client, error) { 461 config, err := newSSHConfig() 462 if err != nil { 463 return nil, err 464 } 465 466 // Init a client connection to a control plane node via the public load balancer 467 c, err := net.DialTimeout("tcp", fmt.Sprintf("%s:%s", controlPlaneEndpoint, port), config.Timeout) 468 if err != nil { 469 return nil, errors.Wrapf(err, "dialing public load balancer at %s", controlPlaneEndpoint) 470 } 471 err = c.SetDeadline(time.Now().Add(ioTimeout)) 472 if err != nil { 473 return nil, errors.Wrapf(err, "setting timeout for connection to public load balancer at %s", controlPlaneEndpoint) 474 } 475 conn, chans, reqs, err := ssh.NewClientConn(c, fmt.Sprintf("%s:%s", controlPlaneEndpoint, port), config) 476 if err != nil { 477 return nil, errors.Wrapf(err, "connecting to public load balancer at %s", controlPlaneEndpoint) 478 } 479 lbClient := ssh.NewClient(conn, chans, reqs) 480 481 // Init a connection from the control plane to the target node 482 c, err = lbClient.Dial("tcp", fmt.Sprintf("%s:%s", hostname, port)) 483 if err != nil { 484 return nil, errors.Wrapf(err, "dialing from control plane to target node at %s", hostname) 485 } 486 487 // Establish an authenticated SSH conn over the client -> control plane -> target transport 488 conn, chans, reqs, err = ssh.NewClientConn(c, hostname, config) 489 if err != nil { 490 return nil, errors.Wrap(err, "getting a new SSH client connection") 491 } 492 client := ssh.NewClient(conn, chans, reqs) 493 return client, nil 494 } 495 496 // execOnHost runs the specified command directly on a node's host, using a SSH connection 497 // proxied through a control plane host and copies the output to a file. 498 func execOnHost(controlPlaneEndpoint, hostname, port string, ioTimeout time.Duration, f io.StringWriter, command string, 499 args ...string) error { 500 client, err := getProxiedSSHClient(controlPlaneEndpoint, hostname, port, ioTimeout) 501 if err != nil { 502 return err 503 } 504 505 session, err := client.NewSession() 506 if err != nil { 507 return errors.Wrap(err, "opening SSH session") 508 } 509 defer session.Close() 510 511 // Run the command and write the captured stdout to the file 512 var stdoutBuf bytes.Buffer 513 session.Stdout = &stdoutBuf 514 if len(args) > 0 { 515 command += " " + strings.Join(args, " ") 516 } 517 if err = session.Run(command); err != nil { 518 return errors.Wrapf(err, "running command \"%s\"", command) 519 } 520 if _, err = f.WriteString(stdoutBuf.String()); err != nil { 521 return errors.Wrap(err, "writing output to file") 522 } 523 524 return nil 525 } 526 527 // sftpCopyFile copies a file from a node to the specified destination, using a SSH connection 528 // proxied through a control plane node. 529 func sftpCopyFile(controlPlaneEndpoint, hostname, port string, ioTimeout time.Duration, sourcePath, destPath string) error { 530 Logf("Attempting to copy file %s on node %s to %s", sourcePath, hostname, destPath) 531 532 client, err := getProxiedSSHClient(controlPlaneEndpoint, hostname, port, ioTimeout) 533 if err != nil { 534 return err 535 } 536 537 sftp, err := sftp.NewClient(client) 538 if err != nil { 539 return errors.Wrapf(err, "getting a new sftp client connection") 540 } 541 defer sftp.Close() 542 543 // copy file 544 sourceFile, err := sftp.Open(sourcePath) 545 if err != nil { 546 return errors.Wrapf(err, "opening file %s on node %s", sourcePath, hostname) 547 } 548 defer sourceFile.Close() 549 550 destFile, err := os.Create(destPath) 551 if err != nil { 552 return errors.Wrapf(err, "creating file %s on locally", sourcePath) 553 } 554 defer destFile.Close() 555 556 _, err = sourceFile.WriteTo(destFile) 557 if err != nil { 558 return errors.Wrapf(err, "writing to %s", destPath) 559 } 560 561 return nil 562 } 563 564 // fileOnHost creates the specified path, including parent directories if needed. 565 func fileOnHost(path string) (*os.File, error) { 566 if err := os.MkdirAll(filepath.Dir(path), os.ModePerm); err != nil { 567 return nil, err 568 } 569 return os.Create(path) 570 } 571 572 // newSSHConfig returns an SSH config for a workload cluster in the current e2e test run. 573 func newSSHConfig() (*ssh.ClientConfig, error) { 574 // find private key file used for e2e workload cluster 575 keyfile := os.Getenv("AZURE_SSH_PUBLIC_KEY_FILE") 576 if len(keyfile) > 4 && strings.HasSuffix(keyfile, "pub") { 577 keyfile = keyfile[:(len(keyfile) - 4)] 578 } 579 if keyfile == "" { 580 keyfile = ".sshkey" 581 } 582 if _, err := os.Stat(keyfile); os.IsNotExist(err) { 583 if !filepath.IsAbs(keyfile) { 584 // current working directory may be test/e2e, so look in the project root 585 keyfile = filepath.Join("..", "..", keyfile) 586 } 587 } 588 589 pubkey, err := publicKeyFile(keyfile) 590 if err != nil { 591 return nil, err 592 } 593 sshConfig := ssh.ClientConfig{ 594 HostKeyCallback: ssh.InsecureIgnoreHostKey(), //nolint:gosec // Non-production code 595 User: azure.DefaultUserName, 596 Auth: []ssh.AuthMethod{pubkey}, 597 Timeout: sshConnectionTimeout, 598 } 599 return &sshConfig, nil 600 } 601 602 // publicKeyFile parses and returns the public key from the specified private key file. 603 func publicKeyFile(file string) (ssh.AuthMethod, error) { 604 buffer, err := os.ReadFile(file) 605 if err != nil { 606 return nil, err 607 } 608 signer, err := ssh.ParsePrivateKey(buffer) 609 if err != nil { 610 return nil, err 611 } 612 return ssh.PublicKeys(signer), nil 613 } 614 615 // validateStableReleaseString validates the string format that declares "get be the latest stable release for this <Major>.<Minor>" 616 // it should be called wherever we process a stable version string expression like "stable-1.22" 617 func validateStableReleaseString(stableVersion string) (isStable bool, matches []string) { 618 stableReleaseFormat := regexp.MustCompile(`^stable-(0|[1-9]\d*)\.(0|[1-9]\d*)$`) 619 matches = stableReleaseFormat.FindStringSubmatch(stableVersion) 620 return len(matches) > 0, matches 621 } 622 623 // resolveCIVersion resolves kubernetes version labels (e.g. latest, latest-1.xx) to the corresponding CI version numbers. 624 // Go implementation of https://github.com/kubernetes-sigs/cluster-api/blob/d1dc87d5df3ab12a15ae5b63e50541a191b7fec4/scripts/ci-e2e-lib.sh#L75-L95. 625 func resolveCIVersion(label string) (string, error) { 626 if ciVersion, ok := os.LookupEnv("CI_VERSION"); ok { 627 return ciVersion, nil 628 } 629 if strings.HasPrefix(label, "latest") { 630 if kubernetesVersion, err := latestCIVersion(label); err == nil { 631 return kubernetesVersion, nil 632 } 633 } 634 635 // default to https://dl.k8s.io/ci/latest.txt if the label can't be resolved 636 return kubernetesversions.LatestCIRelease() 637 } 638 639 // latestCIVersion returns the latest CI version of a given label in the form of latest-1.xx. 640 func latestCIVersion(label string) (string, error) { 641 ciVersionURL := fmt.Sprintf("https://dl.k8s.io/ci/%s.txt", label) 642 req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, ciVersionURL, http.NoBody) 643 if err != nil { 644 return "", err 645 } 646 resp, err := http.DefaultClient.Do(req) 647 if err != nil { 648 return "", err 649 } 650 defer resp.Body.Close() 651 b, err := io.ReadAll(resp.Body) 652 if err != nil { 653 return "", err 654 } 655 656 return strings.TrimSpace(string(b)), nil 657 } 658 659 // resolveKubetestRepoListPath will set the correct repo list for Windows: 660 // - if WIN_REPO_URL is set use the custom file downloaded via makefile 661 // - if CI version is "latest" do not set repo list since they are not needed K8s v1.24+ 662 // - if CI version is "latest-1.xx" will compare values and use correct repoList 663 // - if standard version will compare values and use correct repoList 664 // - if unable to determine version falls back to using latest 665 func resolveKubetestRepoListPath(version string, path string) (string, error) { 666 if _, ok := os.LookupEnv("WIN_REPO_URL"); ok { 667 return filepath.Join(path, "custom-repo-list.yaml"), nil 668 } 669 670 if version == "latest" { 671 return "", nil 672 } 673 674 version = strings.TrimPrefix(version, "latest-") 675 currentVersion, err := semver.ParseTolerant(version) 676 if err != nil { 677 return "", err 678 } 679 680 v124, err := semver.Make("1.24.0-alpha.0.0") 681 if err != nil { 682 return "", err 683 } 684 685 if currentVersion.GT(v124) { 686 return "", nil 687 } 688 689 // - prior to K8s v1.21 repo-list-k8sprow.yaml should be used 690 // since all test images need to come from k8sprow.azurecr.io 691 // - starting with K8s v1.24 repo lists repo list is not needed 692 // - use repo-list.yaml for everything in between which has only 693 // some images in k8sprow.azurecr.io 694 695 return filepath.Join(path, "repo-list.yaml"), nil 696 } 697 698 // resolveKubernetesVersions looks at Kubernetes versions set as variables in the e2e config and sets them to a valid k8s version 699 // that has an existing capi offer image available. For example, if the version is "stable-1.22", the function will set it to the latest 1.22 version that has a published reference image. 700 func resolveKubernetesVersions(config *clusterctl.E2EConfig) { 701 ubuntuVersions := getVersionsInOffer(context.TODO(), os.Getenv(AzureLocation), capiImagePublisher, capiOfferName) 702 windowsVersions := getVersionsInOffer(context.TODO(), os.Getenv(AzureLocation), capiImagePublisher, capiWindowsOfferName) 703 flatcarK8sVersions := getFlatcarK8sVersions(context.TODO(), os.Getenv(AzureLocation), flatcarCAPICommunityGallery) 704 705 // find the intersection of ubuntu and windows versions available, since we need an image for both. 706 var versions semver.Versions 707 for k, v := range ubuntuVersions { 708 if _, ok := windowsVersions[k]; ok { 709 versions = append(versions, v) 710 } 711 } 712 713 if config.HasVariable(capi_e2e.KubernetesVersion) { 714 resolveKubernetesVersion(config, versions, capi_e2e.KubernetesVersion) 715 } 716 if config.HasVariable(capi_e2e.KubernetesVersionUpgradeFrom) { 717 resolveKubernetesVersion(config, versions, capi_e2e.KubernetesVersionUpgradeFrom) 718 } 719 if config.HasVariable(capi_e2e.KubernetesVersionUpgradeTo) { 720 resolveKubernetesVersion(config, versions, capi_e2e.KubernetesVersionUpgradeTo) 721 } 722 if config.HasVariable(FlatcarKubernetesVersion) && config.HasVariable(FlatcarVersion) { 723 resolveFlatcarKubernetesVersion(config, flatcarK8sVersions, FlatcarKubernetesVersion) 724 flatcarVersions := getFlatcarVersions(context.TODO(), os.Getenv(AzureLocation), flatcarCAPICommunityGallery, config.GetVariable(FlatcarKubernetesVersion)) 725 resolveFlatcarVersion(config, flatcarVersions, FlatcarVersion) 726 } 727 } 728 729 func resolveKubernetesVersion(config *clusterctl.E2EConfig, versions semver.Versions, varName string) { 730 resolveVariable(config, varName, getLatestVersionForMinor(config.GetVariable(varName), versions, "capi offer")) 731 } 732 733 func resolveVariable(config *clusterctl.E2EConfig, varName, v string) { 734 oldVersion := config.GetVariable(varName) 735 if _, ok := os.LookupEnv(varName); ok { 736 Expect(os.Setenv(varName, v)).To(Succeed()) 737 } 738 config.Variables[varName] = v 739 Logf("Resolved %s (set to %s) to %s", varName, oldVersion, v) 740 } 741 742 func resolveFlatcarKubernetesVersion(config *clusterctl.E2EConfig, versions semver.Versions, varName string) { 743 resolveVariable(config, varName, getLatestVersionForMinor(config.GetVariable(varName), versions, "Flatcar Community Gallery")) 744 } 745 746 func resolveFlatcarVersion(config *clusterctl.E2EConfig, versions semver.Versions, varName string) { 747 version := config.GetVariable(varName) 748 if version != "latest" { 749 Expect(versions).To(ContainElement(semver.MustParse(version)), fmt.Sprintf("Provided Flatcar version %q does not have a corresponding VM image in the Flatcar Community Gallery", version)) 750 } 751 752 if version == "latest" { 753 semver.Sort(versions) 754 version = versions[len(versions)-1].String() 755 } 756 757 resolveVariable(config, varName, version) 758 } 759 760 // newImagesClient returns a new VM images client using environmental settings for auth. 761 func newImagesClient() *armcompute.VirtualMachineImagesClient { 762 cred, err := azidentity.NewDefaultAzureCredential(nil) 763 Expect(err).NotTo(HaveOccurred()) 764 imagesClient, err := armcompute.NewVirtualMachineImagesClient(getSubscriptionID(Default), cred, nil) 765 Expect(err).NotTo(HaveOccurred()) 766 767 return imagesClient 768 } 769 770 func newCommunityGalleryImagesClient() *armcompute.CommunityGalleryImagesClient { 771 cred, err := azidentity.NewDefaultAzureCredential(nil) 772 Expect(err).NotTo(HaveOccurred()) 773 communityGalleryImagesClient, err := armcompute.NewCommunityGalleryImagesClient(getSubscriptionID(Default), cred, nil) 774 Expect(err).NotTo(HaveOccurred()) 775 776 return communityGalleryImagesClient 777 } 778 779 func newCommunityGalleryImageVersionsClient() *armcompute.CommunityGalleryImageVersionsClient { 780 cred, err := azidentity.NewDefaultAzureCredential(nil) 781 Expect(err).NotTo(HaveOccurred()) 782 communityGalleryImageVersionsClient, err := armcompute.NewCommunityGalleryImageVersionsClient(getSubscriptionID(Default), cred, nil) 783 Expect(err).NotTo(HaveOccurred()) 784 785 return communityGalleryImageVersionsClient 786 } 787 788 // getVersionsInOffer returns a map of Kubernetes versions as strings to semver.Versions. 789 func getVersionsInOffer(ctx context.Context, location, publisher, offer string) map[string]semver.Version { 790 Logf("Finding image skus and versions for offer %s/%s in %s", publisher, offer, location) 791 var versions map[string]semver.Version 792 capiSku := regexp.MustCompile(`^[\w-]+-gen[12]$`) 793 capiVersion := regexp.MustCompile(`^(\d)(\d{1,2})\.(\d{1,2})\.\d{8}$`) 794 oldCapiSku := regexp.MustCompile(`^k8s-(0|[1-9][0-9]*)dot(0|[1-9][0-9]*)dot(0|[1-9][0-9]*)-[a-z]*.*$`) 795 imagesClient := newImagesClient() 796 resp, err := imagesClient.ListSKUs(ctx, location, publisher, offer, nil) 797 Expect(err).NotTo(HaveOccurred()) 798 799 skus := resp.VirtualMachineImageResourceArray 800 801 versions = make(map[string]semver.Version, len(skus)) 802 for _, sku := range skus { 803 res, err := imagesClient.List(ctx, location, publisher, offer, *sku.Name, nil) 804 Expect(err).NotTo(HaveOccurred()) 805 // Don't use SKUs without existing images. See https://github.com/Azure/azure-cli/issues/20115. 806 if len(res.VirtualMachineImageResourceArray) > 0 { 807 // New SKUs don't contain the Kubernetes version and are named like "ubuntu-2004-gen1". 808 if match := capiSku.FindStringSubmatch(*sku.Name); len(match) > 0 { 809 for _, vmImage := range res.VirtualMachineImageResourceArray { 810 // Versions are named like "121.13.20220601", for Kubernetes v1.21.13 published on June 1, 2022. 811 match = capiVersion.FindStringSubmatch(*vmImage.Name) 812 stringVer := fmt.Sprintf("%s.%s.%s", match[1], match[2], match[3]) 813 versions[stringVer] = semver.MustParse(stringVer) 814 } 815 continue 816 } 817 // Old SKUs before 1.21.12, 1.22.9, or 1.23.6 are named like "k8s-1dot21dot2-ubuntu-2004". 818 if match := oldCapiSku.FindStringSubmatch(*sku.Name); len(match) > 0 { 819 stringVer := fmt.Sprintf("%s.%s.%s", match[1], match[2], match[3]) 820 versions[stringVer] = semver.MustParse(stringVer) 821 } 822 } 823 } 824 825 return versions 826 } 827 828 // getLatestVersionForMinor gets the latest available patch version in the provided list of sku versions that corresponds to the provided k8s version. 829 func getLatestVersionForMinor(version string, versions semver.Versions, imagesSource string) string { 830 isStable, match := validateStableReleaseString(version) 831 if isStable { 832 // if the version is in the format "stable-1.21", we find the latest 1.21.x version. 833 major, err := strconv.ParseUint(match[1], 10, 64) 834 Expect(err).NotTo(HaveOccurred()) 835 minor, err := strconv.ParseUint(match[2], 10, 64) 836 Expect(err).NotTo(HaveOccurred()) 837 semver.Sort(versions) 838 for i := len(versions) - 1; i >= 0; i-- { 839 if versions[i].Major == major && versions[i].Minor == minor { 840 version = "v" + versions[i].String() 841 break 842 } 843 } 844 } else if v, err := semver.ParseTolerant(version); err == nil { 845 if len(v.Pre) == 0 { 846 // if the version is in the format "v1.21.2", we make sure we have an existing image for it. 847 Expect(versions).To(ContainElement(v), fmt.Sprintf("Provided Kubernetes version %s does not have a corresponding VM image in the %q", version, imagesSource)) 848 } 849 } 850 // otherwise, we just return the version as-is. This allows for versions in other formats, such as "latest" or "latest-1.21". 851 return version 852 } 853 854 func getFlatcarVersions(ctx context.Context, location, galleryName, k8sVersion string) semver.Versions { 855 image := fmt.Sprintf("flatcar-stable-amd64-capi-%s", k8sVersion) 856 857 Logf("Finding Flatcar versions in community gallery %q in location %q for image %q", galleryName, location, image) 858 var versions semver.Versions 859 communityGalleryImageVersionsClient := newCommunityGalleryImageVersionsClient() 860 var imageVersions []*armcompute.CommunityGalleryImageVersion 861 pager := communityGalleryImageVersionsClient.NewListPager(location, galleryName, image, nil) 862 for pager.More() { 863 nextResult, err := pager.NextPage(ctx) 864 Expect(err).NotTo(HaveOccurred()) 865 imageVersions = append(imageVersions, nextResult.Value...) 866 } 867 868 for _, imageVersion := range imageVersions { 869 versions = append(versions, semver.MustParse(*imageVersion.Name)) 870 } 871 872 return versions 873 } 874 875 func getFlatcarK8sVersions(ctx context.Context, location, communityGalleryName string) semver.Versions { 876 Logf("Finding Flatcar images and versions in community gallery %q in location %q", communityGalleryName, location) 877 var versions semver.Versions 878 k8sVersion := regexp.MustCompile(`flatcar-stable-amd64-capi-v(\d+)\.(\d+).(\d+)`) 879 communityGalleryImagesClient := newCommunityGalleryImagesClient() 880 communityGalleryImageVersionsClient := newCommunityGalleryImageVersionsClient() 881 var images []*armcompute.CommunityGalleryImage 882 pager := communityGalleryImagesClient.NewListPager(location, communityGalleryName, nil) 883 for pager.More() { 884 nextResult, err := pager.NextPage(ctx) 885 Expect(err).NotTo(HaveOccurred()) 886 images = append(images, nextResult.Value...) 887 } 888 889 for _, image := range images { 890 var imageVersions []*armcompute.CommunityGalleryImageVersion 891 pager := communityGalleryImageVersionsClient.NewListPager(location, communityGalleryName, *image.Name, nil) 892 for pager.More() { 893 nextResult, err := pager.NextPage(ctx) 894 Expect(err).NotTo(HaveOccurred()) 895 imageVersions = append(imageVersions, nextResult.Value...) 896 } 897 898 if len(imageVersions) == 0 { 899 continue 900 } 901 902 match := k8sVersion.FindStringSubmatch(*image.Name) 903 stringVer := fmt.Sprintf("%s.%s.%s", match[1], match[2], match[3]) 904 versions = append(versions, semver.MustParse(stringVer)) 905 } 906 907 return versions 908 } 909 910 // getPodLogs returns the logs of a pod, or an error in string format. 911 func getPodLogs(ctx context.Context, clientset *kubernetes.Clientset, pod corev1.Pod) string { 912 req := clientset.CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, &corev1.PodLogOptions{}) 913 logs, err := req.Stream(ctx) 914 if err != nil { 915 return fmt.Sprintf("error streaming logs for pod %s: %v", pod.Name, err) 916 } 917 defer logs.Close() 918 919 b := new(bytes.Buffer) 920 if _, err = io.Copy(b, logs); err != nil { 921 return fmt.Sprintf("error copying logs for pod %s: %v", pod.Name, err) 922 } 923 return b.String() 924 } 925 926 func CopyConfigMap(ctx context.Context, input clusterctl.ApplyCustomClusterTemplateAndWaitInput, cl client.Client, cmName, fromNamespace, toNamespace string) { 927 cm := &corev1.ConfigMap{} 928 Eventually(func(g Gomega) { 929 g.Expect(cl.Get(ctx, client.ObjectKey{Name: cmName, Namespace: fromNamespace}, cm)).To(Succeed()) 930 cm.SetNamespace(toNamespace) 931 cm.SetResourceVersion("") 932 framework.EnsureNamespace(ctx, cl, toNamespace) 933 err := cl.Create(ctx, cm.DeepCopy()) 934 if !apierrors.IsAlreadyExists(err) { 935 g.Expect(err).To(Succeed()) 936 } 937 }, input.WaitForControlPlaneIntervals...).Should(Succeed()) 938 } 939 940 func getSubscriptionID(g Gomega) string { 941 subscriptionID := os.Getenv("AZURE_SUBSCRIPTION_ID") 942 g.Expect(subscriptionID).NotTo(BeEmpty()) 943 return subscriptionID 944 }