github.com/telepresenceio/telepresence/v2@v2.20.0-pro.6.0.20240517030216-236ea954e789/pkg/client/cli/cmd/gather_logs.go (about)

     1  package cmd
     2  
     3  import (
     4  	"archive/zip"
     5  	"bufio"
     6  	"context"
     7  	"errors"
     8  	"fmt"
     9  	"io"
    10  	"os"
    11  	"path/filepath"
    12  	"regexp"
    13  	"strings"
    14  
    15  	"github.com/spf13/cobra"
    16  	"google.golang.org/grpc"
    17  
    18  	"github.com/telepresenceio/telepresence/rpc/v2/connector"
    19  	"github.com/telepresenceio/telepresence/v2/pkg/client"
    20  	"github.com/telepresenceio/telepresence/v2/pkg/client/cli/ann"
    21  	"github.com/telepresenceio/telepresence/v2/pkg/client/cli/connect"
    22  	"github.com/telepresenceio/telepresence/v2/pkg/client/cli/daemon"
    23  	"github.com/telepresenceio/telepresence/v2/pkg/client/scout"
    24  	"github.com/telepresenceio/telepresence/v2/pkg/errcat"
    25  	"github.com/telepresenceio/telepresence/v2/pkg/filelocation"
    26  )
    27  
    28  type gatherLogsCommand struct {
    29  	outputFile     string
    30  	daemons        string
    31  	trafficAgents  string
    32  	trafficManager bool
    33  	anon           bool
    34  	podYaml        bool
    35  }
    36  
    37  func gatherLogs() *cobra.Command {
    38  	gl := &gatherLogsCommand{}
    39  	cmd := &cobra.Command{
    40  		Use:   "gather-logs",
    41  		Args:  cobra.NoArgs,
    42  		Short: "Gather logs from traffic-manager, traffic-agent, user and root daemons, and export them into a zip file.",
    43  		Long: `Gather logs from traffic-manager, traffic-agent, user and root daemons,
    44  and export them into a zip file. Useful if you are opening a Github issue or asking
    45  someone to help you debug Telepresence.`,
    46  		Example: `Here are a few examples of how you can use this command:
    47  # Get all logs and export to a given file
    48  telepresence gather-logs -o /tmp/telepresence_logs.zip
    49  
    50  # Get all logs and pod yaml manifests for components in the kubernetes cluster
    51  telepresence gather-logs -o /tmp/telepresence_logs.zip --get-pod-yaml
    52  
    53  # Get all logs for the daemons only
    54  telepresence gather-logs --traffic-agents=None --traffic-manager=False
    55  
    56  # Get all logs for pods that have "echo-easy" in the name, useful if you have multiple replicas
    57  telepresence gather-logs --traffic-manager=False --traffic-agents=echo-easy
    58  
    59  # Get all logs for a specific pod
    60  telepresence gather-logs --traffic-manager=False --traffic-agents=echo-easy-6848967857-tw4jw
    61  
    62  # Get logs from everything except the daemons
    63  telepresence gather-logs --daemons=None
    64  `,
    65  
    66  		RunE: gl.gatherLogs,
    67  		Annotations: map[string]string{
    68  			ann.Session: ann.Optional,
    69  		},
    70  	}
    71  	flags := cmd.Flags()
    72  	flags.StringVarP(&gl.outputFile, "output-file", "o", "", "The file you want to output the logs to.")
    73  	flags.StringVar(&gl.daemons, "daemons", "all", "The daemons you want logs from: all, root, user, kubeauth, None")
    74  	flags.BoolVar(&gl.trafficManager, "traffic-manager", true, "If you want to collect logs from the traffic-manager")
    75  	flags.StringVar(&gl.trafficAgents, "traffic-agents", "all", "Traffic-agents to collect logs from: all, name substring, None")
    76  	flags.BoolVarP(&gl.anon, "anonymize", "a", false, "To anonymize pod names + namespaces from the logs")
    77  	flags.BoolVarP(&gl.podYaml, "get-pod-yaml", "y", false, "Get the yaml of any pods you are getting logs for")
    78  	return cmd
    79  }
    80  
    81  // anonymizer contains the mappings between things we want to anonymize
    82  // and their new, anonymized name.  Using a map instead of simply redacting
    83  // makes it easier for us to maintain certain relationships in the logs (e.g.
    84  // namespaces things are in) which may be helpful in troubleshooting.
    85  type anonymizer struct {
    86  	namespaces map[string]string
    87  	podNames   map[string]string
    88  }
    89  
    90  // gatherLogs gets the logs from the daemons (daemon + connector) and creates a zip.
    91  func (gl *gatherLogsCommand) gatherLogs(cmd *cobra.Command, _ []string) error {
    92  	if err := connect.InitCommand(cmd); err != nil {
    93  		return err
    94  	}
    95  	ctx := cmd.Context()
    96  	ctx = scout.NewReporter(ctx, "cli")
    97  	scout.Start(ctx)
    98  	defer scout.Close(ctx)
    99  
   100  	// If the user did not provide an outputFile, we'll use their current working directory
   101  	if gl.outputFile == "" {
   102  		pwd, err := os.Getwd()
   103  		if err != nil {
   104  			return errcat.User.New(err)
   105  		}
   106  		gl.outputFile = filepath.Join(pwd, "telepresence_logs.zip")
   107  	} else if !strings.HasSuffix(gl.outputFile, ".zip") {
   108  		return errcat.User.New("output file must end in .zip")
   109  	}
   110  
   111  	// Create a temporary directory where we will store the logs before we zip
   112  	// them for export
   113  	exportDir, err := os.MkdirTemp("", "logexp-")
   114  	if err != nil {
   115  		return errcat.User.New(err)
   116  	}
   117  	defer func() {
   118  		if err := os.RemoveAll(exportDir); err != nil {
   119  			fmt.Fprintf(cmd.ErrOrStderr(), "Failed to remove temp directory %s: %s", exportDir, err)
   120  		}
   121  	}()
   122  
   123  	// First we add the daemonLogs to the export directory
   124  	var daemonLogs []string
   125  	switch gl.daemons {
   126  	case "all":
   127  		daemonLogs = append(daemonLogs, "cli", "connector", "daemon", "kubeauth")
   128  	case "root":
   129  		daemonLogs = append(daemonLogs, "daemon")
   130  	case "user":
   131  		daemonLogs = append(daemonLogs, "connector")
   132  	case "kubeauth":
   133  		daemonLogs = append(daemonLogs, "kubeauth")
   134  	case "None":
   135  	default:
   136  		return errcat.User.New("Options for --daemons are: all, root, user, or None")
   137  	}
   138  	// Add metadata about the request, so we can track usage + see which
   139  	// types of logs people are requesting more frequently.
   140  	// This also gives us an idea about how much usage this command is
   141  	// getting.
   142  	scout.SetMetadatum(ctx, "daemon_logs", daemonLogs)
   143  	scout.SetMetadatum(ctx, "traffic_manager_logs", gl.trafficManager)
   144  	scout.SetMetadatum(ctx, "traffic_agent_logs", gl.trafficAgents)
   145  	scout.SetMetadatum(ctx, "get_pod_yaml", gl.podYaml)
   146  	scout.SetMetadatum(ctx, "anonymized_logs", gl.anon)
   147  	scout.Report(ctx, "used_gather_logs")
   148  
   149  	var az *anonymizer
   150  	if gl.anon {
   151  		az = &anonymizer{
   152  			namespaces: make(map[string]string),
   153  			podNames:   make(map[string]string),
   154  		}
   155  	}
   156  
   157  	// Since getting the logs from k8s requires the connector, let's only do this
   158  	// work if we know the user wants to get logs from k8s.
   159  	// We gather those logs before we gather the connector.log so that problems that
   160  	// may occur during that process will be included in the connector.log
   161  	if gl.trafficManager || gl.trafficAgents != "None" {
   162  		if err := gl.gatherClusterLogs(ctx, exportDir, az); err != nil {
   163  			// We let the user know we were unable to get logs from the kubernetes components,
   164  			// and why, but this shouldn't block the command returning successful with the logs
   165  			// it was able to get.
   166  			fmt.Fprintf(cmd.ErrOrStderr(), "error getting logs from kubernetes components: %s\n", err)
   167  		}
   168  	}
   169  
   170  	// Get all logs from the logDir that match the daemons the user cares about.
   171  	logDir := filelocation.AppUserLogDir(ctx)
   172  	logFiles, err := os.ReadDir(logDir)
   173  	if err != nil {
   174  		return errcat.User.New(err)
   175  	}
   176  	for _, entry := range logFiles {
   177  		if entry.IsDir() {
   178  			continue
   179  		}
   180  		for _, logType := range daemonLogs {
   181  			if strings.Contains(entry.Name(), logType) {
   182  				srcFile := filepath.Join(logDir, entry.Name())
   183  
   184  				// The cli.log is often empty, so this check is relevant.
   185  				empty, err := isEmpty(srcFile)
   186  				if err != nil {
   187  					fmt.Fprintf(cmd.ErrOrStderr(), "failed stat on %s: %s\n", entry.Name(), err)
   188  					continue
   189  				}
   190  				if empty {
   191  					continue
   192  				}
   193  				dstFile := filepath.Join(exportDir, entry.Name())
   194  				if err := copyFiles(dstFile, srcFile); err != nil {
   195  					// We don't want to fail / exit abruptly if we can't copy certain
   196  					// files, but we do want the user to know we were unsuccessful
   197  					fmt.Fprintf(cmd.ErrOrStderr(), "failed exporting %s: %s\n", entry.Name(), err)
   198  					continue
   199  				}
   200  			}
   201  		}
   202  	}
   203  
   204  	// Zip up all the files we've created in the zip directory and return that to the user
   205  	dirEntries, err := os.ReadDir(exportDir)
   206  	files := make([]string, len(dirEntries))
   207  	if err != nil {
   208  		return errcat.User.New(err)
   209  	}
   210  	for i, entry := range dirEntries {
   211  		if entry.IsDir() {
   212  			files = files[:len(files)-1]
   213  			continue
   214  		}
   215  
   216  		fullFileName := filepath.Join(exportDir, entry.Name())
   217  		// anonymize the log if necessary
   218  		if az != nil {
   219  			if err := az.anonymizeLog(fullFileName); err != nil {
   220  				fmt.Fprintf(cmd.ErrOrStderr(), "error anonymizing %s: %s\n", fullFileName, err)
   221  			}
   222  		}
   223  		files[i] = fullFileName
   224  	}
   225  
   226  	if err := zipFiles(files, gl.outputFile); err != nil {
   227  		return errcat.User.New(err)
   228  	}
   229  
   230  	fmt.Fprintf(cmd.OutOrStdout(), "Logs have been exported to %s\n", gl.outputFile)
   231  	return nil
   232  }
   233  
   234  func (gl *gatherLogsCommand) gatherClusterLogs(ctx context.Context, exportDir string, az *anonymizer) error {
   235  	// To get logs from the components in the kubernetes cluster, we ask the
   236  	// traffic-manager.
   237  	rq := &connector.LogsRequest{
   238  		TrafficManager: gl.trafficManager,
   239  		Agents:         gl.trafficAgents,
   240  		GetPodYaml:     gl.podYaml,
   241  		ExportDir:      exportDir,
   242  	}
   243  	userD := daemon.GetUserClient(ctx)
   244  	if userD != nil {
   245  		var opts []grpc.CallOption
   246  		cfg := client.GetConfig(ctx)
   247  		if mz := cfg.Grpc().MaxReceiveSize(); mz > 0 {
   248  			opts = append(opts, grpc.MaxCallRecvMsgSize(int(mz)))
   249  		}
   250  		lr, err := userD.GatherLogs(ctx, rq, opts...)
   251  		if err != nil {
   252  			return err
   253  		}
   254  		if az != nil {
   255  			if err := az.anonymizeFileNames(lr, exportDir); err != nil {
   256  				return err
   257  			}
   258  		}
   259  	}
   260  	return nil
   261  }
   262  
   263  func isEmpty(file string) (bool, error) {
   264  	s, err := os.Stat(file)
   265  	if err != nil {
   266  		return false, err
   267  	}
   268  	return s.Size() == 0, err
   269  }
   270  
   271  // copyFiles copies files from one location into another.
   272  func copyFiles(dstFile, srcFile string) error {
   273  	srcWriter, err := os.Open(srcFile)
   274  	if err != nil {
   275  		return err
   276  	}
   277  	defer srcWriter.Close()
   278  
   279  	dstWriter, err := os.Create(dstFile)
   280  	if err != nil {
   281  		return err
   282  	}
   283  	defer dstWriter.Close()
   284  
   285  	if _, err := io.Copy(dstWriter, srcWriter); err != nil {
   286  		return err
   287  	}
   288  	return nil
   289  }
   290  
   291  // zipFiles creates a zip file with the contents of all the files passed in.
   292  // If some files do not exist, it will include that in the error message,
   293  // but it will still create a zip file with as many files as it can.
   294  func zipFiles(files []string, zipFileName string) error {
   295  	zipFile, err := os.Create(zipFileName)
   296  	if err != nil {
   297  		return err
   298  	}
   299  	defer zipFile.Close()
   300  
   301  	zipWriter := zip.NewWriter(zipFile)
   302  	defer zipWriter.Close()
   303  
   304  	addFileToZip := func(file string) error {
   305  		fd, err := os.Open(file)
   306  		if err != nil {
   307  			return err
   308  		}
   309  		defer fd.Close()
   310  
   311  		// Get the header information from the original file
   312  		fileInfo, err := os.Stat(file)
   313  		if err != nil {
   314  			return err
   315  		}
   316  		fileHeader, err := zip.FileInfoHeader(fileInfo)
   317  		if err != nil {
   318  			return err
   319  		}
   320  		fileHeader.Method = zip.Deflate
   321  		if err != nil {
   322  			return err
   323  		}
   324  
   325  		// Get the basename of the file since that's all we want
   326  		// to include in the zip
   327  		baseName := filepath.Base(file)
   328  
   329  		fileHeader.Name = baseName
   330  		zfd, err := zipWriter.CreateHeader(fileHeader)
   331  		if err != nil {
   332  			return err
   333  		}
   334  		if _, err := io.Copy(zfd, fd); err != nil {
   335  			return err
   336  		}
   337  		return nil
   338  	}
   339  
   340  	// Make a note of the files we fail to add to the zip so users know if the
   341  	// zip is incomplete
   342  	errMsg := ""
   343  	for _, file := range files {
   344  		// If the file doesn't have a name, then we obviously can't add it to
   345  		// the zip. We have handling elsewhere to prevent files like this from
   346  		// getting here but are extra cautious.
   347  		if file == "" {
   348  			continue
   349  		}
   350  		if err := addFileToZip(file); err != nil {
   351  			errMsg += fmt.Sprintf("failed adding %s to zip file: %s ", file, err)
   352  		}
   353  	}
   354  	if errMsg != "" {
   355  		return errors.New(errMsg)
   356  	}
   357  	return nil
   358  }
   359  
   360  // anonymizeFileNames will anonymize the file names of all pods in the connector.LogResponse.
   361  func (a *anonymizer) anonymizeFileNames(lr *connector.LogsResponse, exportDir string) error {
   362  	for n, v := range lr.PodInfo {
   363  		qn := filepath.Join(exportDir, n)
   364  		if v != "ok" {
   365  			// Write the error to retrieve the log as the log content. It's better than nothing
   366  			_ = os.WriteFile(qn, []byte(v), 0o666)
   367  		}
   368  		anonQn := filepath.Join(exportDir, a.getPodName(n))
   369  		if err := os.Rename(qn, anonQn); err != nil {
   370  			return fmt.Errorf("failed to anonymize by renaming file name %s to %s", qn, anonQn)
   371  		}
   372  	}
   373  	return nil
   374  }
   375  
   376  // getPodName returns an anonymized version of the podName. The anonymized value is cached so that
   377  // the same anonymized name will be returned on subsequent calls using the same podName.
   378  func (a *anonymizer) getPodName(podName string) string {
   379  	// If this pod name has already been mapped, return that
   380  	if anonName, ok := a.podNames[podName]; ok {
   381  		return anonName
   382  	}
   383  
   384  	// the podName hasn't been anonymized yet so we split it up
   385  	// so we can anonymize the namespace
   386  	nameComponents := strings.SplitN(podName, ".", 2)
   387  	if len(nameComponents) != 2 {
   388  		// Note: the ordinal here is based on the total number of
   389  		// pods, not the number of anonPods that are found. This
   390  		// shouldn't be a problem because the main goal of this
   391  		// is to make them distinct, but should we ever want the
   392  		// ordinals to be strictly for anonPods, we'll need to
   393  		// make a change here.
   394  		unknownPodName := fmt.Sprintf("anonPod-%d.anonNamespace",
   395  			len(a.podNames)+1)
   396  		a.podNames[podName] = unknownPodName
   397  		return unknownPodName
   398  	}
   399  	var anonPodName, anonNamespace string
   400  	name, namespace := nameComponents[0], nameComponents[1]
   401  	if val, ok := a.namespaces[namespace]; ok {
   402  		anonNamespace = val
   403  	} else {
   404  		anonNamespace = fmt.Sprintf("namespace-%d", len(a.namespaces)+1)
   405  		a.namespaces[namespace] = anonNamespace
   406  	}
   407  
   408  	// we want to special case the traffic-manager so we can easily distinguish
   409  	// between that and the traffic-agents
   410  	if strings.Contains(name, "traffic-manager") {
   411  		anonPodName = fmt.Sprintf("traffic-manager.%s", anonNamespace)
   412  	} else {
   413  		anonPodName = fmt.Sprintf("pod-%d.%s", len(a.podNames)+1, anonNamespace)
   414  	}
   415  	// Store the anonPodName in the map
   416  	a.podNames[podName] = anonPodName
   417  	return anonPodName
   418  }
   419  
   420  // anonymizeLog is a helper function that replaces the namespace + podName
   421  // used in the log with its anonymized version, provided by the anonymizer.
   422  // It overwrites the file with the anonymized version.
   423  func (a *anonymizer) anonymizeLog(logFile string) error {
   424  	// Read the contents we are going to overwrite from the file
   425  	content, err := os.ReadFile(logFile)
   426  	if err != nil {
   427  		return err
   428  	}
   429  	// Open the file with write so we can overwrite it
   430  	stringContent := string(content)
   431  	f, err := os.OpenFile(logFile, os.O_RDWR, 0)
   432  	if err != nil {
   433  		return err
   434  	}
   435  	defer f.Close()
   436  
   437  	// First we replace the actual namespace with the anonymized
   438  	// version.
   439  	for namespace, anonNamespace := range a.namespaces {
   440  		stringContent = strings.ReplaceAll(stringContent, namespace, anonNamespace)
   441  	}
   442  	// Now we do pod name which is a little bit more complicated
   443  	for fullPodName, fullAnonPodName := range a.podNames {
   444  		// strip the namespace off of the anonymized name
   445  		anonPodParts := strings.Split(fullAnonPodName, ".")
   446  		anonPodName := anonPodParts[0]
   447  
   448  		// Strip the namespace off of the podName
   449  		podParts := strings.Split(fullPodName, ".")
   450  
   451  		for _, name := range getSignificantPodNames(podParts[0]) {
   452  			stringContent = strings.ReplaceAll(stringContent, name, anonPodName)
   453  		}
   454  	}
   455  
   456  	// Overwrite the file with the anonymized log
   457  	err = f.Truncate(0)
   458  	if err != nil {
   459  		return err
   460  	}
   461  	_, err = f.Seek(0, 0)
   462  	if err != nil {
   463  		return err
   464  	}
   465  	fdWriter := bufio.NewWriter(f)
   466  	_, err = fdWriter.WriteString(stringContent)
   467  	if err != nil {
   468  		return err
   469  	}
   470  	fdWriter.Flush()
   471  
   472  	return nil
   473  }
   474  
   475  // getSignificantPodNames is a helper function that takes in a
   476  // pod's name and returns the significant subnames that we want
   477  // to anonymize.  It currently works for pods owned by StatefulSets,
   478  // ReplicaSets, and Deployments.
   479  func getSignificantPodNames(podName string) []string {
   480  	// if the pods ends in an ordinal we can be pretty sure it's
   481  	// coming from a StatefulSet.
   482  	statefulSetRegex := regexp.MustCompile("(.*)-([0-9]+)$")
   483  	// ReplicasSets, and therefore Deployments because they create
   484  	// ReplicaSets, have a hash followed by a 5 character identity
   485  	// string attached to the end.
   486  	replicaSetRegex := regexp.MustCompile("(.*)-([0-9a-f]+)-([0-9a-z]{5})$")
   487  	sigNames := []string{}
   488  	switch {
   489  	case statefulSetRegex.MatchString(podName):
   490  		match := statefulSetRegex.FindStringSubmatch(podName)
   491  		appName := match[1]
   492  		// Add the pod name with and without the ordinal
   493  		sigNames = append(sigNames, podName, appName)
   494  	case replicaSetRegex.MatchString(podName):
   495  		match := replicaSetRegex.FindStringSubmatch(podName)
   496  		appName := match[1]
   497  		rsName := fmt.Sprintf("%s-%s", appName, match[2])
   498  		// add the app name with and without generated ReplicaSet hash
   499  		sigNames = append(sigNames, podName, rsName, appName)
   500  	default:
   501  		// For default we don't do anything and will leave sigNames
   502  		// as an empty slice
   503  	}
   504  	return sigNames
   505  }