github.com/telepresenceio/telepresence/v2@v2.20.0-pro.6.0.20240517030216-236ea954e789/pkg/client/cli/cmd/gather_logs.go (about) 1 package cmd 2 3 import ( 4 "archive/zip" 5 "bufio" 6 "context" 7 "errors" 8 "fmt" 9 "io" 10 "os" 11 "path/filepath" 12 "regexp" 13 "strings" 14 15 "github.com/spf13/cobra" 16 "google.golang.org/grpc" 17 18 "github.com/telepresenceio/telepresence/rpc/v2/connector" 19 "github.com/telepresenceio/telepresence/v2/pkg/client" 20 "github.com/telepresenceio/telepresence/v2/pkg/client/cli/ann" 21 "github.com/telepresenceio/telepresence/v2/pkg/client/cli/connect" 22 "github.com/telepresenceio/telepresence/v2/pkg/client/cli/daemon" 23 "github.com/telepresenceio/telepresence/v2/pkg/client/scout" 24 "github.com/telepresenceio/telepresence/v2/pkg/errcat" 25 "github.com/telepresenceio/telepresence/v2/pkg/filelocation" 26 ) 27 28 type gatherLogsCommand struct { 29 outputFile string 30 daemons string 31 trafficAgents string 32 trafficManager bool 33 anon bool 34 podYaml bool 35 } 36 37 func gatherLogs() *cobra.Command { 38 gl := &gatherLogsCommand{} 39 cmd := &cobra.Command{ 40 Use: "gather-logs", 41 Args: cobra.NoArgs, 42 Short: "Gather logs from traffic-manager, traffic-agent, user and root daemons, and export them into a zip file.", 43 Long: `Gather logs from traffic-manager, traffic-agent, user and root daemons, 44 and export them into a zip file. Useful if you are opening a Github issue or asking 45 someone to help you debug Telepresence.`, 46 Example: `Here are a few examples of how you can use this command: 47 # Get all logs and export to a given file 48 telepresence gather-logs -o /tmp/telepresence_logs.zip 49 50 # Get all logs and pod yaml manifests for components in the kubernetes cluster 51 telepresence gather-logs -o /tmp/telepresence_logs.zip --get-pod-yaml 52 53 # Get all logs for the daemons only 54 telepresence gather-logs --traffic-agents=None --traffic-manager=False 55 56 # Get all logs for pods that have "echo-easy" in the name, useful if you have multiple replicas 57 telepresence gather-logs --traffic-manager=False --traffic-agents=echo-easy 58 59 # Get all logs for a specific pod 60 telepresence gather-logs --traffic-manager=False --traffic-agents=echo-easy-6848967857-tw4jw 61 62 # Get logs from everything except the daemons 63 telepresence gather-logs --daemons=None 64 `, 65 66 RunE: gl.gatherLogs, 67 Annotations: map[string]string{ 68 ann.Session: ann.Optional, 69 }, 70 } 71 flags := cmd.Flags() 72 flags.StringVarP(&gl.outputFile, "output-file", "o", "", "The file you want to output the logs to.") 73 flags.StringVar(&gl.daemons, "daemons", "all", "The daemons you want logs from: all, root, user, kubeauth, None") 74 flags.BoolVar(&gl.trafficManager, "traffic-manager", true, "If you want to collect logs from the traffic-manager") 75 flags.StringVar(&gl.trafficAgents, "traffic-agents", "all", "Traffic-agents to collect logs from: all, name substring, None") 76 flags.BoolVarP(&gl.anon, "anonymize", "a", false, "To anonymize pod names + namespaces from the logs") 77 flags.BoolVarP(&gl.podYaml, "get-pod-yaml", "y", false, "Get the yaml of any pods you are getting logs for") 78 return cmd 79 } 80 81 // anonymizer contains the mappings between things we want to anonymize 82 // and their new, anonymized name. Using a map instead of simply redacting 83 // makes it easier for us to maintain certain relationships in the logs (e.g. 84 // namespaces things are in) which may be helpful in troubleshooting. 85 type anonymizer struct { 86 namespaces map[string]string 87 podNames map[string]string 88 } 89 90 // gatherLogs gets the logs from the daemons (daemon + connector) and creates a zip. 91 func (gl *gatherLogsCommand) gatherLogs(cmd *cobra.Command, _ []string) error { 92 if err := connect.InitCommand(cmd); err != nil { 93 return err 94 } 95 ctx := cmd.Context() 96 ctx = scout.NewReporter(ctx, "cli") 97 scout.Start(ctx) 98 defer scout.Close(ctx) 99 100 // If the user did not provide an outputFile, we'll use their current working directory 101 if gl.outputFile == "" { 102 pwd, err := os.Getwd() 103 if err != nil { 104 return errcat.User.New(err) 105 } 106 gl.outputFile = filepath.Join(pwd, "telepresence_logs.zip") 107 } else if !strings.HasSuffix(gl.outputFile, ".zip") { 108 return errcat.User.New("output file must end in .zip") 109 } 110 111 // Create a temporary directory where we will store the logs before we zip 112 // them for export 113 exportDir, err := os.MkdirTemp("", "logexp-") 114 if err != nil { 115 return errcat.User.New(err) 116 } 117 defer func() { 118 if err := os.RemoveAll(exportDir); err != nil { 119 fmt.Fprintf(cmd.ErrOrStderr(), "Failed to remove temp directory %s: %s", exportDir, err) 120 } 121 }() 122 123 // First we add the daemonLogs to the export directory 124 var daemonLogs []string 125 switch gl.daemons { 126 case "all": 127 daemonLogs = append(daemonLogs, "cli", "connector", "daemon", "kubeauth") 128 case "root": 129 daemonLogs = append(daemonLogs, "daemon") 130 case "user": 131 daemonLogs = append(daemonLogs, "connector") 132 case "kubeauth": 133 daemonLogs = append(daemonLogs, "kubeauth") 134 case "None": 135 default: 136 return errcat.User.New("Options for --daemons are: all, root, user, or None") 137 } 138 // Add metadata about the request, so we can track usage + see which 139 // types of logs people are requesting more frequently. 140 // This also gives us an idea about how much usage this command is 141 // getting. 142 scout.SetMetadatum(ctx, "daemon_logs", daemonLogs) 143 scout.SetMetadatum(ctx, "traffic_manager_logs", gl.trafficManager) 144 scout.SetMetadatum(ctx, "traffic_agent_logs", gl.trafficAgents) 145 scout.SetMetadatum(ctx, "get_pod_yaml", gl.podYaml) 146 scout.SetMetadatum(ctx, "anonymized_logs", gl.anon) 147 scout.Report(ctx, "used_gather_logs") 148 149 var az *anonymizer 150 if gl.anon { 151 az = &anonymizer{ 152 namespaces: make(map[string]string), 153 podNames: make(map[string]string), 154 } 155 } 156 157 // Since getting the logs from k8s requires the connector, let's only do this 158 // work if we know the user wants to get logs from k8s. 159 // We gather those logs before we gather the connector.log so that problems that 160 // may occur during that process will be included in the connector.log 161 if gl.trafficManager || gl.trafficAgents != "None" { 162 if err := gl.gatherClusterLogs(ctx, exportDir, az); err != nil { 163 // We let the user know we were unable to get logs from the kubernetes components, 164 // and why, but this shouldn't block the command returning successful with the logs 165 // it was able to get. 166 fmt.Fprintf(cmd.ErrOrStderr(), "error getting logs from kubernetes components: %s\n", err) 167 } 168 } 169 170 // Get all logs from the logDir that match the daemons the user cares about. 171 logDir := filelocation.AppUserLogDir(ctx) 172 logFiles, err := os.ReadDir(logDir) 173 if err != nil { 174 return errcat.User.New(err) 175 } 176 for _, entry := range logFiles { 177 if entry.IsDir() { 178 continue 179 } 180 for _, logType := range daemonLogs { 181 if strings.Contains(entry.Name(), logType) { 182 srcFile := filepath.Join(logDir, entry.Name()) 183 184 // The cli.log is often empty, so this check is relevant. 185 empty, err := isEmpty(srcFile) 186 if err != nil { 187 fmt.Fprintf(cmd.ErrOrStderr(), "failed stat on %s: %s\n", entry.Name(), err) 188 continue 189 } 190 if empty { 191 continue 192 } 193 dstFile := filepath.Join(exportDir, entry.Name()) 194 if err := copyFiles(dstFile, srcFile); err != nil { 195 // We don't want to fail / exit abruptly if we can't copy certain 196 // files, but we do want the user to know we were unsuccessful 197 fmt.Fprintf(cmd.ErrOrStderr(), "failed exporting %s: %s\n", entry.Name(), err) 198 continue 199 } 200 } 201 } 202 } 203 204 // Zip up all the files we've created in the zip directory and return that to the user 205 dirEntries, err := os.ReadDir(exportDir) 206 files := make([]string, len(dirEntries)) 207 if err != nil { 208 return errcat.User.New(err) 209 } 210 for i, entry := range dirEntries { 211 if entry.IsDir() { 212 files = files[:len(files)-1] 213 continue 214 } 215 216 fullFileName := filepath.Join(exportDir, entry.Name()) 217 // anonymize the log if necessary 218 if az != nil { 219 if err := az.anonymizeLog(fullFileName); err != nil { 220 fmt.Fprintf(cmd.ErrOrStderr(), "error anonymizing %s: %s\n", fullFileName, err) 221 } 222 } 223 files[i] = fullFileName 224 } 225 226 if err := zipFiles(files, gl.outputFile); err != nil { 227 return errcat.User.New(err) 228 } 229 230 fmt.Fprintf(cmd.OutOrStdout(), "Logs have been exported to %s\n", gl.outputFile) 231 return nil 232 } 233 234 func (gl *gatherLogsCommand) gatherClusterLogs(ctx context.Context, exportDir string, az *anonymizer) error { 235 // To get logs from the components in the kubernetes cluster, we ask the 236 // traffic-manager. 237 rq := &connector.LogsRequest{ 238 TrafficManager: gl.trafficManager, 239 Agents: gl.trafficAgents, 240 GetPodYaml: gl.podYaml, 241 ExportDir: exportDir, 242 } 243 userD := daemon.GetUserClient(ctx) 244 if userD != nil { 245 var opts []grpc.CallOption 246 cfg := client.GetConfig(ctx) 247 if mz := cfg.Grpc().MaxReceiveSize(); mz > 0 { 248 opts = append(opts, grpc.MaxCallRecvMsgSize(int(mz))) 249 } 250 lr, err := userD.GatherLogs(ctx, rq, opts...) 251 if err != nil { 252 return err 253 } 254 if az != nil { 255 if err := az.anonymizeFileNames(lr, exportDir); err != nil { 256 return err 257 } 258 } 259 } 260 return nil 261 } 262 263 func isEmpty(file string) (bool, error) { 264 s, err := os.Stat(file) 265 if err != nil { 266 return false, err 267 } 268 return s.Size() == 0, err 269 } 270 271 // copyFiles copies files from one location into another. 272 func copyFiles(dstFile, srcFile string) error { 273 srcWriter, err := os.Open(srcFile) 274 if err != nil { 275 return err 276 } 277 defer srcWriter.Close() 278 279 dstWriter, err := os.Create(dstFile) 280 if err != nil { 281 return err 282 } 283 defer dstWriter.Close() 284 285 if _, err := io.Copy(dstWriter, srcWriter); err != nil { 286 return err 287 } 288 return nil 289 } 290 291 // zipFiles creates a zip file with the contents of all the files passed in. 292 // If some files do not exist, it will include that in the error message, 293 // but it will still create a zip file with as many files as it can. 294 func zipFiles(files []string, zipFileName string) error { 295 zipFile, err := os.Create(zipFileName) 296 if err != nil { 297 return err 298 } 299 defer zipFile.Close() 300 301 zipWriter := zip.NewWriter(zipFile) 302 defer zipWriter.Close() 303 304 addFileToZip := func(file string) error { 305 fd, err := os.Open(file) 306 if err != nil { 307 return err 308 } 309 defer fd.Close() 310 311 // Get the header information from the original file 312 fileInfo, err := os.Stat(file) 313 if err != nil { 314 return err 315 } 316 fileHeader, err := zip.FileInfoHeader(fileInfo) 317 if err != nil { 318 return err 319 } 320 fileHeader.Method = zip.Deflate 321 if err != nil { 322 return err 323 } 324 325 // Get the basename of the file since that's all we want 326 // to include in the zip 327 baseName := filepath.Base(file) 328 329 fileHeader.Name = baseName 330 zfd, err := zipWriter.CreateHeader(fileHeader) 331 if err != nil { 332 return err 333 } 334 if _, err := io.Copy(zfd, fd); err != nil { 335 return err 336 } 337 return nil 338 } 339 340 // Make a note of the files we fail to add to the zip so users know if the 341 // zip is incomplete 342 errMsg := "" 343 for _, file := range files { 344 // If the file doesn't have a name, then we obviously can't add it to 345 // the zip. We have handling elsewhere to prevent files like this from 346 // getting here but are extra cautious. 347 if file == "" { 348 continue 349 } 350 if err := addFileToZip(file); err != nil { 351 errMsg += fmt.Sprintf("failed adding %s to zip file: %s ", file, err) 352 } 353 } 354 if errMsg != "" { 355 return errors.New(errMsg) 356 } 357 return nil 358 } 359 360 // anonymizeFileNames will anonymize the file names of all pods in the connector.LogResponse. 361 func (a *anonymizer) anonymizeFileNames(lr *connector.LogsResponse, exportDir string) error { 362 for n, v := range lr.PodInfo { 363 qn := filepath.Join(exportDir, n) 364 if v != "ok" { 365 // Write the error to retrieve the log as the log content. It's better than nothing 366 _ = os.WriteFile(qn, []byte(v), 0o666) 367 } 368 anonQn := filepath.Join(exportDir, a.getPodName(n)) 369 if err := os.Rename(qn, anonQn); err != nil { 370 return fmt.Errorf("failed to anonymize by renaming file name %s to %s", qn, anonQn) 371 } 372 } 373 return nil 374 } 375 376 // getPodName returns an anonymized version of the podName. The anonymized value is cached so that 377 // the same anonymized name will be returned on subsequent calls using the same podName. 378 func (a *anonymizer) getPodName(podName string) string { 379 // If this pod name has already been mapped, return that 380 if anonName, ok := a.podNames[podName]; ok { 381 return anonName 382 } 383 384 // the podName hasn't been anonymized yet so we split it up 385 // so we can anonymize the namespace 386 nameComponents := strings.SplitN(podName, ".", 2) 387 if len(nameComponents) != 2 { 388 // Note: the ordinal here is based on the total number of 389 // pods, not the number of anonPods that are found. This 390 // shouldn't be a problem because the main goal of this 391 // is to make them distinct, but should we ever want the 392 // ordinals to be strictly for anonPods, we'll need to 393 // make a change here. 394 unknownPodName := fmt.Sprintf("anonPod-%d.anonNamespace", 395 len(a.podNames)+1) 396 a.podNames[podName] = unknownPodName 397 return unknownPodName 398 } 399 var anonPodName, anonNamespace string 400 name, namespace := nameComponents[0], nameComponents[1] 401 if val, ok := a.namespaces[namespace]; ok { 402 anonNamespace = val 403 } else { 404 anonNamespace = fmt.Sprintf("namespace-%d", len(a.namespaces)+1) 405 a.namespaces[namespace] = anonNamespace 406 } 407 408 // we want to special case the traffic-manager so we can easily distinguish 409 // between that and the traffic-agents 410 if strings.Contains(name, "traffic-manager") { 411 anonPodName = fmt.Sprintf("traffic-manager.%s", anonNamespace) 412 } else { 413 anonPodName = fmt.Sprintf("pod-%d.%s", len(a.podNames)+1, anonNamespace) 414 } 415 // Store the anonPodName in the map 416 a.podNames[podName] = anonPodName 417 return anonPodName 418 } 419 420 // anonymizeLog is a helper function that replaces the namespace + podName 421 // used in the log with its anonymized version, provided by the anonymizer. 422 // It overwrites the file with the anonymized version. 423 func (a *anonymizer) anonymizeLog(logFile string) error { 424 // Read the contents we are going to overwrite from the file 425 content, err := os.ReadFile(logFile) 426 if err != nil { 427 return err 428 } 429 // Open the file with write so we can overwrite it 430 stringContent := string(content) 431 f, err := os.OpenFile(logFile, os.O_RDWR, 0) 432 if err != nil { 433 return err 434 } 435 defer f.Close() 436 437 // First we replace the actual namespace with the anonymized 438 // version. 439 for namespace, anonNamespace := range a.namespaces { 440 stringContent = strings.ReplaceAll(stringContent, namespace, anonNamespace) 441 } 442 // Now we do pod name which is a little bit more complicated 443 for fullPodName, fullAnonPodName := range a.podNames { 444 // strip the namespace off of the anonymized name 445 anonPodParts := strings.Split(fullAnonPodName, ".") 446 anonPodName := anonPodParts[0] 447 448 // Strip the namespace off of the podName 449 podParts := strings.Split(fullPodName, ".") 450 451 for _, name := range getSignificantPodNames(podParts[0]) { 452 stringContent = strings.ReplaceAll(stringContent, name, anonPodName) 453 } 454 } 455 456 // Overwrite the file with the anonymized log 457 err = f.Truncate(0) 458 if err != nil { 459 return err 460 } 461 _, err = f.Seek(0, 0) 462 if err != nil { 463 return err 464 } 465 fdWriter := bufio.NewWriter(f) 466 _, err = fdWriter.WriteString(stringContent) 467 if err != nil { 468 return err 469 } 470 fdWriter.Flush() 471 472 return nil 473 } 474 475 // getSignificantPodNames is a helper function that takes in a 476 // pod's name and returns the significant subnames that we want 477 // to anonymize. It currently works for pods owned by StatefulSets, 478 // ReplicaSets, and Deployments. 479 func getSignificantPodNames(podName string) []string { 480 // if the pods ends in an ordinal we can be pretty sure it's 481 // coming from a StatefulSet. 482 statefulSetRegex := regexp.MustCompile("(.*)-([0-9]+)$") 483 // ReplicasSets, and therefore Deployments because they create 484 // ReplicaSets, have a hash followed by a 5 character identity 485 // string attached to the end. 486 replicaSetRegex := regexp.MustCompile("(.*)-([0-9a-f]+)-([0-9a-z]{5})$") 487 sigNames := []string{} 488 switch { 489 case statefulSetRegex.MatchString(podName): 490 match := statefulSetRegex.FindStringSubmatch(podName) 491 appName := match[1] 492 // Add the pod name with and without the ordinal 493 sigNames = append(sigNames, podName, appName) 494 case replicaSetRegex.MatchString(podName): 495 match := replicaSetRegex.FindStringSubmatch(podName) 496 appName := match[1] 497 rsName := fmt.Sprintf("%s-%s", appName, match[2]) 498 // add the app name with and without generated ReplicaSet hash 499 sigNames = append(sigNames, podName, rsName, appName) 500 default: 501 // For default we don't do anything and will leave sigNames 502 // as an empty slice 503 } 504 return sigNames 505 }