sigs.k8s.io/cluster-api-provider-azure@v1.17.0/test/e2e/azure_clusterproxy.go (about) 1 //go:build e2e 2 // +build e2e 3 4 /* 5 Copyright 2022 The Kubernetes Authors. 6 7 Licensed under the Apache License, Version 2.0 (the "License"); 8 you may not use this file except in compliance with the License. 9 You may obtain a copy of the License at 10 11 http://www.apache.org/licenses/LICENSE-2.0 12 13 Unless required by applicable law or agreed to in writing, software 14 distributed under the License is distributed on an "AS IS" BASIS, 15 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 See the License for the specific language governing permissions and 17 limitations under the License. 18 */ 19 20 package e2e 21 22 import ( 23 "bufio" 24 "context" 25 "encoding/json" 26 "errors" 27 "fmt" 28 "io" 29 "os" 30 "path" 31 "path/filepath" 32 "strings" 33 "time" 34 35 "github.com/Azure/azure-sdk-for-go/sdk/azidentity" 36 "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/monitor/armmonitor" 37 asocontainerservicev1 "github.com/Azure/azure-service-operator/v2/api/containerservice/v1api20231001" 38 asocontainerservicev1preview "github.com/Azure/azure-service-operator/v2/api/containerservice/v1api20231102preview" 39 asoresourcesv1 "github.com/Azure/azure-service-operator/v2/api/resources/v1api20200601" 40 . "github.com/onsi/ginkgo/v2" 41 . "github.com/onsi/gomega" 42 corev1 "k8s.io/api/core/v1" 43 apierrors "k8s.io/apimachinery/pkg/api/errors" 44 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 45 "k8s.io/apimachinery/pkg/runtime" 46 "k8s.io/apimachinery/pkg/runtime/schema" 47 "k8s.io/kubectl/pkg/describe" 48 "k8s.io/utils/ptr" 49 infrav1alpha "sigs.k8s.io/cluster-api-provider-azure/api/v1alpha1" 50 infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" 51 infrav1exp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1beta1" 52 expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" 53 "sigs.k8s.io/cluster-api/test/framework" 54 "sigs.k8s.io/controller-runtime/pkg/client" 55 ) 56 57 type ( 58 AzureClusterProxy struct { 59 framework.ClusterProxy 60 } 61 // myEventData is used to be able to Marshal armmonitor.EventData into JSON 62 // see https://github.com/Azure/azure-sdk-for-go/issues/8224#issuecomment-614777550 63 myEventData armmonitor.EventData 64 ) 65 66 func NewAzureClusterProxy(name string, kubeconfigPath string, options ...framework.Option) *AzureClusterProxy { 67 proxy := framework.NewClusterProxy(name, kubeconfigPath, initScheme(), options...) 68 return &AzureClusterProxy{ 69 ClusterProxy: proxy, 70 } 71 } 72 73 func initScheme() *runtime.Scheme { 74 scheme := runtime.NewScheme() 75 framework.TryAddDefaultSchemes(scheme) 76 Expect(infrav1.AddToScheme(scheme)).To(Succeed()) 77 Expect(infrav1exp.AddToScheme(scheme)).To(Succeed()) 78 Expect(infrav1alpha.AddToScheme(scheme)).To(Succeed()) 79 Expect(expv1.AddToScheme(scheme)).To(Succeed()) 80 Expect(asoresourcesv1.AddToScheme(scheme)).To(Succeed()) 81 Expect(asocontainerservicev1.AddToScheme(scheme)).To(Succeed()) 82 Expect(asocontainerservicev1preview.AddToScheme(scheme)).To(Succeed()) 83 return scheme 84 } 85 86 func (acp *AzureClusterProxy) CollectWorkloadClusterLogs(ctx context.Context, namespace, name, outputPath string) { 87 Logf("Dumping workload cluster %s/%s logs", namespace, name) 88 acp.ClusterProxy.CollectWorkloadClusterLogs(ctx, namespace, name, outputPath) 89 90 aboveMachinesPath := strings.Replace(outputPath, "/machines", "", 1) 91 92 Logf("Dumping workload cluster %s/%s nodes", namespace, name) 93 start := time.Now() 94 acp.collectNodes(ctx, namespace, name, aboveMachinesPath) 95 Logf("Fetching nodes took %s", time.Since(start).String()) 96 97 Logf("Dumping workload cluster %s/%s pod logs", namespace, name) 98 start = time.Now() 99 acp.collectPodLogs(ctx, namespace, name, aboveMachinesPath) 100 Logf("Fetching pod logs took %s", time.Since(start).String()) 101 102 Logf("Dumping workload cluster %s/%s Azure activity log", namespace, name) 103 start = time.Now() 104 acp.collectActivityLogs(ctx, namespace, name, aboveMachinesPath) 105 Logf("Fetching activity logs took %s", time.Since(start).String()) 106 } 107 108 func (acp *AzureClusterProxy) collectPodLogs(ctx context.Context, namespace string, name string, aboveMachinesPath string) { 109 workload := acp.GetWorkloadCluster(ctx, namespace, name) 110 pods := &corev1.PodList{} 111 112 Expect(workload.GetClient().List(ctx, pods)).To(Succeed()) 113 114 var err error 115 var podDescribe string 116 117 podDescriber, ok := describe.DescriberFor(schema.GroupKind{Group: corev1.GroupName, Kind: "Pod"}, workload.GetRESTConfig()) 118 if !ok { 119 Logf("failed to get pod describer") 120 } 121 122 for _, pod := range pods.Items { 123 podNamespace := pod.GetNamespace() 124 125 // Describe the pod. 126 podDescribe, err = podDescriber.Describe(podNamespace, pod.GetName(), describe.DescriberSettings{ShowEvents: true}) 127 if err != nil { 128 Logf("failed to describe pod %s/%s: %v", podNamespace, pod.GetName(), err) 129 } 130 131 // collect the init container logs 132 for _, container := range pod.Spec.InitContainers { 133 // Watch each init container's logs in a goroutine, so we can stream them all concurrently. 134 go collectContainerLogs(ctx, pod, container, aboveMachinesPath, workload) 135 } 136 137 for _, container := range pod.Spec.Containers { 138 // Watch each container's logs in a goroutine, so we can stream them all concurrently. 139 go collectContainerLogs(ctx, pod, container, aboveMachinesPath, workload) 140 } 141 142 Logf("Describing Pod %s/%s", podNamespace, pod.Name) 143 describeFile := path.Join(aboveMachinesPath, podNamespace, pod.Name, "pod-describe.txt") 144 writeLogFile(describeFile, podDescribe) 145 } 146 } 147 148 func collectContainerLogs(ctx context.Context, pod corev1.Pod, container corev1.Container, aboveMachinesPath string, workload framework.ClusterProxy) { 149 defer GinkgoRecover() 150 151 podNamespace := pod.GetNamespace() 152 153 Logf("Creating log watcher for controller %s/%s, container %s", podNamespace, pod.Name, container.Name) 154 logFile := path.Join(aboveMachinesPath, podNamespace, pod.Name, container.Name+".log") 155 if err := os.MkdirAll(filepath.Dir(logFile), 0o755); err != nil { 156 // Failing to mkdir should not cause the test to fail 157 Logf("Error mkdir: %v", err) 158 return 159 } 160 161 f, err := os.OpenFile(logFile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) 162 if err != nil { 163 // Failing to fetch logs should not cause the test to fail 164 Logf("Error opening file to write pod logs: %v", err) 165 return 166 } 167 defer f.Close() 168 169 opts := &corev1.PodLogOptions{ 170 Container: container.Name, 171 Follow: true, 172 } 173 174 podLogs, err := workload.GetClientSet().CoreV1().Pods(podNamespace).GetLogs(pod.Name, opts).Stream(ctx) 175 if err != nil { 176 // Failing to stream logs should not cause the test to fail 177 Logf("Error starting logs stream for pod %s/%s, container %s: %v", podNamespace, pod.Name, container.Name, err) 178 return 179 } 180 defer podLogs.Close() 181 182 out := bufio.NewWriter(f) 183 defer out.Flush() 184 _, err = out.ReadFrom(podLogs) 185 if errors.Is(err, io.ErrUnexpectedEOF) { 186 // Failing to stream logs should not cause the test to fail 187 Logf("Got error while streaming logs for pod %s/%s, container %s: %v", podNamespace, pod.Name, container.Name, err) 188 } 189 } 190 191 func (acp *AzureClusterProxy) collectNodes(ctx context.Context, namespace string, name string, aboveMachinesPath string) { 192 workload := acp.GetWorkloadCluster(ctx, namespace, name) 193 nodes := &corev1.NodeList{} 194 195 Expect(workload.GetClient().List(ctx, nodes)).To(Succeed()) 196 197 var err error 198 var nodeDescribe string 199 200 nodeDescriber, ok := describe.DescriberFor(schema.GroupKind{Group: corev1.GroupName, Kind: "Node"}, workload.GetRESTConfig()) 201 if !ok { 202 Logf("failed to get node describer") 203 } 204 205 for _, node := range nodes.Items { 206 // Describe the node. 207 Logf("Describing Node %s", node.GetName()) 208 nodeDescribe, err = nodeDescriber.Describe(node.GetNamespace(), node.GetName(), describe.DescriberSettings{ShowEvents: true}) 209 if err != nil { 210 Logf("failed to describe node %s: %v", node.GetName(), err) 211 } 212 213 describeFile := path.Join(aboveMachinesPath, nodesDir, node.GetName(), "node-describe.txt") 214 writeLogFile(describeFile, nodeDescribe) 215 } 216 } 217 218 func (acp *AzureClusterProxy) collectActivityLogs(ctx context.Context, namespace, name, aboveMachinesPath string) { 219 timeoutctx, cancel := context.WithTimeout(ctx, 30*time.Second) 220 defer cancel() 221 222 cred, err := azidentity.NewDefaultAzureCredential(nil) 223 Expect(err).NotTo(HaveOccurred()) 224 activityLogsClient, err := armmonitor.NewActivityLogsClient(getSubscriptionID(Default), cred, nil) 225 Expect(err).NotTo(HaveOccurred()) 226 227 var groupName string 228 clusterClient := acp.GetClient() 229 workloadCluster, err := getAzureCluster(timeoutctx, clusterClient, namespace, name) 230 if apierrors.IsNotFound(err) { 231 controlPlane, err := getAzureManagedControlPlane(timeoutctx, clusterClient, namespace, name) 232 if apierrors.IsNotFound(err) { 233 asoCluster, err := getAzureASOManagedCluster(timeoutctx, clusterClient, namespace, name) 234 if err != nil { 235 // Failing to fetch logs should not cause the test to fail 236 Logf("Error fetching activity logs for cluster %s in namespace %s. Not able to find the AzureASOManagedCluster on the management cluster: %v", name, namespace, err) 237 return 238 } 239 for _, resource := range asoCluster.Spec.Resources { 240 u := &unstructured.Unstructured{} 241 Expect(u.UnmarshalJSON(resource.Raw)).To(Succeed()) 242 if u.GroupVersionKind().Kind != "ResourceGroup" { 243 continue 244 } 245 // AzureName might not be specified in the CAPZ resource. GET the rg to make sure we have it. 246 rg := &asoresourcesv1.ResourceGroup{} 247 Expect(clusterClient.Get(ctx, client.ObjectKey{Namespace: namespace, Name: u.GetName()}, rg)).To(Succeed()) 248 groupName = rg.AzureName() 249 break 250 } 251 } else { 252 if err != nil { 253 // Failing to fetch logs should not cause the test to fail 254 Logf("Error fetching activity logs for cluster %s in namespace %s. Not able to find the AzureManagedControlPlane on the management cluster: %v", name, namespace, err) 255 return 256 } 257 groupName = controlPlane.Spec.ResourceGroupName 258 } 259 } else { 260 if err != nil { 261 // Failing to fetch logs should not cause the test to fail 262 Logf("Error fetching activity logs for cluster %s in namespace %s. Not able to find the workload cluster on the management cluster: %v", name, namespace, err) 263 return 264 } 265 groupName = workloadCluster.Spec.ResourceGroup 266 } 267 268 start := time.Now().Add(-2 * time.Hour).UTC().Format(time.RFC3339) 269 end := time.Now().UTC().Format(time.RFC3339) 270 271 filter := fmt.Sprintf("eventTimestamp ge '%s' and eventTimestamp le '%s' and resourceGroupName eq '%s'", start, end, groupName) 272 pager := activityLogsClient.NewListPager(filter, nil) 273 274 logFile := path.Join(aboveMachinesPath, activitylog, groupName+".log") 275 Expect(os.MkdirAll(filepath.Dir(logFile), 0o755)).To(Succeed()) 276 277 f, err := os.OpenFile(logFile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) 278 if err != nil { 279 // Failing to fetch logs should not cause the test to fail 280 Logf("Error opening file to write activity logs: %v", err) 281 return 282 } 283 defer f.Close() 284 out := bufio.NewWriter(f) 285 defer out.Flush() 286 287 for pager.More() { 288 page, err := pager.NextPage(ctx) 289 if err != nil { 290 // Failing to fetch logs should not cause the test to fail 291 Logf("Error getting pager for activity logs in resource group %s: %v", groupName, err) 292 return 293 } 294 for _, event := range page.Value { 295 if ptr.Deref(event.Category.Value, "") != "Policy" { 296 b, err := json.MarshalIndent(myEventData(*event), "", " ") 297 if err != nil { 298 Logf("Got error converting activity logs data to json: %v", err) 299 } 300 if _, err = out.WriteString(string(b) + "\n"); err != nil { 301 Logf("Got error while writing activity logs for resource group %s: %v", groupName, err) 302 } 303 } 304 } 305 } 306 } 307 308 func writeLogFile(logFilepath string, logData string) { 309 go func() { 310 defer GinkgoRecover() 311 312 if err := os.MkdirAll(filepath.Dir(logFilepath), 0o755); err != nil { 313 // Failing to mkdir should not cause the test to fail 314 Logf("Error mkdir: %v", err) 315 return 316 } 317 318 f, err := os.OpenFile(logFilepath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) 319 if err != nil { 320 // Failing to open the file should not cause the test to fail 321 Logf("Error opening file %s to write logs: %v", logFilepath, err) 322 return 323 } 324 defer f.Close() 325 326 out := bufio.NewWriter(f) 327 defer out.Flush() 328 _, err = out.WriteString(logData) 329 if err != nil { 330 // Failing to write a log file should not cause the test to fail 331 Logf("failed to write logFile %s: %v", logFilepath, err) 332 } 333 }() 334 }