sigs.k8s.io/cluster-api-provider-azure@v1.14.3/test/e2e/azure_clusterproxy.go (about) 1 //go:build e2e 2 // +build e2e 3 4 /* 5 Copyright 2022 The Kubernetes Authors. 6 7 Licensed under the Apache License, Version 2.0 (the "License"); 8 you may not use this file except in compliance with the License. 9 You may obtain a copy of the License at 10 11 http://www.apache.org/licenses/LICENSE-2.0 12 13 Unless required by applicable law or agreed to in writing, software 14 distributed under the License is distributed on an "AS IS" BASIS, 15 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 See the License for the specific language governing permissions and 17 limitations under the License. 18 */ 19 20 package e2e 21 22 import ( 23 "bufio" 24 "context" 25 "encoding/json" 26 "errors" 27 "fmt" 28 "io" 29 "os" 30 "path" 31 "path/filepath" 32 "strings" 33 "time" 34 35 "github.com/Azure/azure-sdk-for-go/sdk/azidentity" 36 "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/monitor/armmonitor" 37 asocontainerservicev1 "github.com/Azure/azure-service-operator/v2/api/containerservice/v1api20231001" 38 . "github.com/onsi/ginkgo/v2" 39 . "github.com/onsi/gomega" 40 corev1 "k8s.io/api/core/v1" 41 apierrors "k8s.io/apimachinery/pkg/api/errors" 42 "k8s.io/apimachinery/pkg/runtime" 43 "k8s.io/apimachinery/pkg/runtime/schema" 44 "k8s.io/kubectl/pkg/describe" 45 "k8s.io/utils/ptr" 46 infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" 47 infrav1exp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1beta1" 48 expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" 49 "sigs.k8s.io/cluster-api/test/framework" 50 ) 51 52 type ( 53 AzureClusterProxy struct { 54 framework.ClusterProxy 55 } 56 // myEventData is used to be able to Marshal armmonitor.EventData into JSON 57 // see https://github.com/Azure/azure-sdk-for-go/issues/8224#issuecomment-614777550 58 myEventData armmonitor.EventData 59 ) 60 61 func NewAzureClusterProxy(name string, kubeconfigPath string, options ...framework.Option) *AzureClusterProxy { 62 proxy := framework.NewClusterProxy(name, kubeconfigPath, initScheme(), options...) 63 return &AzureClusterProxy{ 64 ClusterProxy: proxy, 65 } 66 } 67 68 func initScheme() *runtime.Scheme { 69 scheme := runtime.NewScheme() 70 framework.TryAddDefaultSchemes(scheme) 71 Expect(infrav1.AddToScheme(scheme)).To(Succeed()) 72 Expect(infrav1exp.AddToScheme(scheme)).To(Succeed()) 73 Expect(expv1.AddToScheme(scheme)).To(Succeed()) 74 Expect(asocontainerservicev1.AddToScheme(scheme)).To(Succeed()) 75 return scheme 76 } 77 78 func (acp *AzureClusterProxy) CollectWorkloadClusterLogs(ctx context.Context, namespace, name, outputPath string) { 79 Logf("Dumping workload cluster %s/%s logs", namespace, name) 80 acp.ClusterProxy.CollectWorkloadClusterLogs(ctx, namespace, name, outputPath) 81 82 aboveMachinesPath := strings.Replace(outputPath, "/machines", "", 1) 83 84 Logf("Dumping workload cluster %s/%s nodes", namespace, name) 85 start := time.Now() 86 acp.collectNodes(ctx, namespace, name, aboveMachinesPath) 87 Logf("Fetching nodes took %s", time.Since(start).String()) 88 89 Logf("Dumping workload cluster %s/%s pod logs", namespace, name) 90 start = time.Now() 91 acp.collectPodLogs(ctx, namespace, name, aboveMachinesPath) 92 Logf("Fetching pod logs took %s", time.Since(start).String()) 93 94 Logf("Dumping workload cluster %s/%s Azure activity log", namespace, name) 95 start = time.Now() 96 acp.collectActivityLogs(ctx, namespace, name, aboveMachinesPath) 97 Logf("Fetching activity logs took %s", time.Since(start).String()) 98 } 99 100 func (acp *AzureClusterProxy) collectPodLogs(ctx context.Context, namespace string, name string, aboveMachinesPath string) { 101 workload := acp.GetWorkloadCluster(ctx, namespace, name) 102 pods := &corev1.PodList{} 103 104 Expect(workload.GetClient().List(ctx, pods)).To(Succeed()) 105 106 var err error 107 var podDescribe string 108 109 podDescriber, ok := describe.DescriberFor(schema.GroupKind{Group: corev1.GroupName, Kind: "Pod"}, workload.GetRESTConfig()) 110 if !ok { 111 Logf("failed to get pod describer") 112 } 113 114 for _, pod := range pods.Items { 115 podNamespace := pod.GetNamespace() 116 117 // Describe the pod. 118 podDescribe, err = podDescriber.Describe(podNamespace, pod.GetName(), describe.DescriberSettings{ShowEvents: true}) 119 if err != nil { 120 Logf("failed to describe pod %s/%s: %v", podNamespace, pod.GetName(), err) 121 } 122 123 // collect the init container logs 124 for _, container := range pod.Spec.InitContainers { 125 // Watch each init container's logs in a goroutine, so we can stream them all concurrently. 126 go collectContainerLogs(ctx, pod, container, aboveMachinesPath, workload) 127 } 128 129 for _, container := range pod.Spec.Containers { 130 // Watch each container's logs in a goroutine, so we can stream them all concurrently. 131 go collectContainerLogs(ctx, pod, container, aboveMachinesPath, workload) 132 } 133 134 Logf("Describing Pod %s/%s", podNamespace, pod.Name) 135 describeFile := path.Join(aboveMachinesPath, podNamespace, pod.Name, "pod-describe.txt") 136 writeLogFile(describeFile, podDescribe) 137 } 138 } 139 140 func collectContainerLogs(ctx context.Context, pod corev1.Pod, container corev1.Container, aboveMachinesPath string, workload framework.ClusterProxy) { 141 defer GinkgoRecover() 142 143 podNamespace := pod.GetNamespace() 144 145 Logf("Creating log watcher for controller %s/%s, container %s", podNamespace, pod.Name, container.Name) 146 logFile := path.Join(aboveMachinesPath, podNamespace, pod.Name, container.Name+".log") 147 if err := os.MkdirAll(filepath.Dir(logFile), 0o755); err != nil { 148 // Failing to mkdir should not cause the test to fail 149 Logf("Error mkdir: %v", err) 150 return 151 } 152 153 f, err := os.OpenFile(logFile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) 154 if err != nil { 155 // Failing to fetch logs should not cause the test to fail 156 Logf("Error opening file to write pod logs: %v", err) 157 return 158 } 159 defer f.Close() 160 161 opts := &corev1.PodLogOptions{ 162 Container: container.Name, 163 Follow: true, 164 } 165 166 podLogs, err := workload.GetClientSet().CoreV1().Pods(podNamespace).GetLogs(pod.Name, opts).Stream(ctx) 167 if err != nil { 168 // Failing to stream logs should not cause the test to fail 169 Logf("Error starting logs stream for pod %s/%s, container %s: %v", podNamespace, pod.Name, container.Name, err) 170 return 171 } 172 defer podLogs.Close() 173 174 out := bufio.NewWriter(f) 175 defer out.Flush() 176 _, err = out.ReadFrom(podLogs) 177 if errors.Is(err, io.ErrUnexpectedEOF) { 178 // Failing to stream logs should not cause the test to fail 179 Logf("Got error while streaming logs for pod %s/%s, container %s: %v", podNamespace, pod.Name, container.Name, err) 180 } 181 } 182 183 func (acp *AzureClusterProxy) collectNodes(ctx context.Context, namespace string, name string, aboveMachinesPath string) { 184 workload := acp.GetWorkloadCluster(ctx, namespace, name) 185 nodes := &corev1.NodeList{} 186 187 Expect(workload.GetClient().List(ctx, nodes)).To(Succeed()) 188 189 var err error 190 var nodeDescribe string 191 192 nodeDescriber, ok := describe.DescriberFor(schema.GroupKind{Group: corev1.GroupName, Kind: "Node"}, workload.GetRESTConfig()) 193 if !ok { 194 Logf("failed to get node describer") 195 } 196 197 for _, node := range nodes.Items { 198 // Describe the node. 199 Logf("Describing Node %s", node.GetName()) 200 nodeDescribe, err = nodeDescriber.Describe(node.GetNamespace(), node.GetName(), describe.DescriberSettings{ShowEvents: true}) 201 if err != nil { 202 Logf("failed to describe node %s: %v", node.GetName(), err) 203 } 204 205 describeFile := path.Join(aboveMachinesPath, nodesDir, node.GetName(), "node-describe.txt") 206 writeLogFile(describeFile, nodeDescribe) 207 } 208 } 209 210 func (acp *AzureClusterProxy) collectActivityLogs(ctx context.Context, namespace, name, aboveMachinesPath string) { 211 timeoutctx, cancel := context.WithTimeout(ctx, 30*time.Second) 212 defer cancel() 213 214 cred, err := azidentity.NewDefaultAzureCredential(nil) 215 Expect(err).NotTo(HaveOccurred()) 216 activityLogsClient, err := armmonitor.NewActivityLogsClient(getSubscriptionID(Default), cred, nil) 217 Expect(err).NotTo(HaveOccurred()) 218 219 var groupName string 220 clusterClient := acp.GetClient() 221 workloadCluster, err := getAzureCluster(timeoutctx, clusterClient, namespace, name) 222 if apierrors.IsNotFound(err) { 223 controlPlane, err := getAzureManagedControlPlane(timeoutctx, clusterClient, namespace, name) 224 if err != nil { 225 // Failing to fetch logs should not cause the test to fail 226 Logf("Error fetching activity logs for cluster %s in namespace %s. Not able to find the AzureManagedControlPlane on the management cluster: %v", name, namespace, err) 227 return 228 } 229 groupName = controlPlane.Spec.ResourceGroupName 230 } else { 231 if err != nil { 232 // Failing to fetch logs should not cause the test to fail 233 Logf("Error fetching activity logs for cluster %s in namespace %s. Not able to find the workload cluster on the management cluster: %v", name, namespace, err) 234 return 235 } 236 groupName = workloadCluster.Spec.ResourceGroup 237 } 238 239 start := time.Now().Add(-2 * time.Hour).UTC().Format(time.RFC3339) 240 end := time.Now().UTC().Format(time.RFC3339) 241 242 filter := fmt.Sprintf("eventTimestamp ge '%s' and eventTimestamp le '%s' and resourceGroupName eq '%s'", start, end, groupName) 243 pager := activityLogsClient.NewListPager(filter, nil) 244 245 logFile := path.Join(aboveMachinesPath, activitylog, groupName+".log") 246 Expect(os.MkdirAll(filepath.Dir(logFile), 0o755)).To(Succeed()) 247 248 f, err := os.OpenFile(logFile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) 249 if err != nil { 250 // Failing to fetch logs should not cause the test to fail 251 Logf("Error opening file to write activity logs: %v", err) 252 return 253 } 254 defer f.Close() 255 out := bufio.NewWriter(f) 256 defer out.Flush() 257 258 for pager.More() { 259 page, err := pager.NextPage(ctx) 260 if err != nil { 261 // Failing to fetch logs should not cause the test to fail 262 Logf("Error getting pager for activity logs in resource group %s: %v", groupName, err) 263 return 264 } 265 for _, event := range page.Value { 266 if ptr.Deref(event.Category.Value, "") != "Policy" { 267 b, err := json.MarshalIndent(myEventData(*event), "", " ") 268 if err != nil { 269 Logf("Got error converting activity logs data to json: %v", err) 270 } 271 if _, err = out.WriteString(string(b) + "\n"); err != nil { 272 Logf("Got error while writing activity logs for resource group %s: %v", groupName, err) 273 } 274 } 275 } 276 } 277 } 278 279 func writeLogFile(logFilepath string, logData string) { 280 go func() { 281 defer GinkgoRecover() 282 283 if err := os.MkdirAll(filepath.Dir(logFilepath), 0o755); err != nil { 284 // Failing to mkdir should not cause the test to fail 285 Logf("Error mkdir: %v", err) 286 return 287 } 288 289 f, err := os.OpenFile(logFilepath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) 290 if err != nil { 291 // Failing to open the file should not cause the test to fail 292 Logf("Error opening file %s to write logs: %v", logFilepath, err) 293 return 294 } 295 defer f.Close() 296 297 out := bufio.NewWriter(f) 298 defer out.Flush() 299 _, err = out.WriteString(logData) 300 if err != nil { 301 // Failing to write a log file should not cause the test to fail 302 Logf("failed to write logFile %s: %v", logFilepath, err) 303 } 304 }() 305 }