github.com/kubearmor/cilium@v1.6.12/bugtool/cmd/root.go (about)

     1  // Copyright 2017-2020 Authors of Cilium
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package cmd
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"io"
    21  	"io/ioutil"
    22  	"net/http"
    23  	"os"
    24  	"os/exec"
    25  	"path/filepath"
    26  	"strings"
    27  	"sync"
    28  	"time"
    29  
    30  	"github.com/cilium/cilium/pkg/components"
    31  	"github.com/cilium/cilium/pkg/defaults"
    32  
    33  	"github.com/spf13/cobra"
    34  )
    35  
    36  // BugtoolRootCmd is the top level command for the bugtool.
    37  var BugtoolRootCmd = &cobra.Command{
    38  	Use:   "cilium-bugtool [OPTIONS]",
    39  	Short: "Collects agent & system information useful for bug reporting",
    40  	Example: `	# Collect information and create archive file
    41  	$ cilium-bugtool
    42  	[...]
    43  
    44  	# Collect and retrieve archive if Cilium is running in a Kubernetes pod
    45  	$ kubectl get pods --namespace kube-system
    46  	NAME                          READY     STATUS    RESTARTS   AGE
    47  	cilium-kg8lv                  1/1       Running   0          13m
    48  	[...]
    49  	$ kubectl -n kube-system exec cilium-kg8lv cilium-bugtool
    50  	$ kubectl cp kube-system/cilium-kg8lv:/tmp/cilium-bugtool-243785589.tar /tmp/cilium-bugtool-243785589.tar`,
    51  	Run: func(cmd *cobra.Command, args []string) {
    52  		runTool()
    53  	},
    54  }
    55  
    56  const (
    57  	disclaimer = `DISCLAIMER
    58  This tool has copied information about your environment.
    59  If you are going to register a issue on GitHub, please
    60  only provide files from the archive you have reviewed
    61  for sensitive information.
    62  `
    63  )
    64  
    65  var (
    66  	archive        bool
    67  	archiveType    string
    68  	k8s            bool
    69  	dumpPath       string
    70  	host           string
    71  	k8sNamespace   string
    72  	k8sLabel       string
    73  	execTimeout    time.Duration
    74  	configPath     string
    75  	dryRunMode     bool
    76  	enableMarkdown bool
    77  	archivePrefix  string
    78  	getPProf       bool
    79  	pprofPort      int
    80  	traceSeconds   int
    81  )
    82  
    83  func init() {
    84  	BugtoolRootCmd.Flags().BoolVar(&archive, "archive", true, "Create archive when false skips deletion of the output directory")
    85  	BugtoolRootCmd.Flags().BoolVar(&getPProf, "get-pprof", false, "When set, only gets the pprof traces from the cilium-agent binary")
    86  	BugtoolRootCmd.Flags().IntVar(&pprofPort, "pprof-port", 6060, "Port on which pprof server is exposed")
    87  	BugtoolRootCmd.Flags().IntVar(&traceSeconds, "pprof-trace-seconds", 180, "Amount of seconds used for pprof CPU traces")
    88  	BugtoolRootCmd.Flags().StringVarP(&archiveType, "archiveType", "o", "tar", "Archive type: tar | gz")
    89  	BugtoolRootCmd.Flags().BoolVar(&k8s, "k8s-mode", false, "Require Kubernetes pods to be found or fail")
    90  	BugtoolRootCmd.Flags().BoolVar(&dryRunMode, "dry-run", false, "Create configuration file of all commands that would have been executed")
    91  	BugtoolRootCmd.Flags().StringVarP(&dumpPath, "tmp", "t", "/tmp", "Path to store extracted files")
    92  	BugtoolRootCmd.Flags().StringVarP(&host, "host", "H", "", "URI to server-side API")
    93  	BugtoolRootCmd.Flags().StringVarP(&k8sNamespace, "k8s-namespace", "", "kube-system", "Kubernetes namespace for Cilium pod")
    94  	BugtoolRootCmd.Flags().StringVarP(&k8sLabel, "k8s-label", "", "k8s-app=cilium", "Kubernetes label for Cilium pod")
    95  	BugtoolRootCmd.Flags().DurationVarP(&execTimeout, "exec-timeout", "", 30*time.Second, "The default timeout for any cmd execution in seconds")
    96  	BugtoolRootCmd.Flags().StringVarP(&configPath, "config", "", "./.cilium-bugtool.config", "Configuration to decide what should be run")
    97  	BugtoolRootCmd.Flags().BoolVar(&enableMarkdown, "enable-markdown", false, "Dump output of commands in markdown format")
    98  	BugtoolRootCmd.Flags().StringVarP(&archivePrefix, "archive-prefix", "", "", "String to prefix to name of archive if created (e.g., with cilium pod-name)")
    99  }
   100  
   101  func getVerifyCiliumPods() (k8sPods []string) {
   102  	if k8s {
   103  		var err error
   104  		// By default try to pick either Kubernetes or non-k8s (host mode). If
   105  		// we find Cilium pod(s) then it's k8s-mode otherwise host mode.
   106  		// Passing extra flags can override the default.
   107  		k8sPods, err = getCiliumPods(k8sNamespace, k8sLabel)
   108  		// When the k8s flag is set, perform extra checks that we actually do have pods or fail.
   109  		if err != nil {
   110  			fmt.Fprintf(os.Stderr, "Error: %s\nFailed to find pods, is kube-apiserver running?\n", err)
   111  			os.Exit(1)
   112  		}
   113  		if len(k8sPods) < 1 {
   114  			fmt.Fprint(os.Stderr, "Found no pods, is kube-apiserver running?\n")
   115  			os.Exit(1)
   116  		}
   117  	}
   118  	if os.Getuid() != 0 && !k8s && len(k8sPods) == 0 {
   119  		// When the k8s flag is not set and the user is not root,
   120  		// debuginfo and BPF related commands can fail.
   121  		fmt.Printf("Warning, some of the BPF commands might fail when run as not root\n")
   122  	}
   123  
   124  	return k8sPods
   125  }
   126  
   127  func removeIfEmpty(dir string) {
   128  	d, err := os.Open(dir)
   129  	if err != nil {
   130  		fmt.Fprintf(os.Stderr, "Failed to open directory %s\n", err)
   131  		return
   132  	}
   133  	defer d.Close()
   134  
   135  	files, err := d.Readdir(-1)
   136  	if err != nil {
   137  		fmt.Fprintf(os.Stderr, "Failed to read directory %s\n", err)
   138  		return
   139  	} else if len(files) == 0 {
   140  		if err := os.Remove(dir); err != nil {
   141  			fmt.Fprintf(os.Stderr, "Failed to delete directory %s\n", err)
   142  			return
   143  		}
   144  	}
   145  
   146  	fmt.Printf("Deleted empty directory %s\n", dir)
   147  }
   148  
   149  func isValidArchiveType(archiveType string) bool {
   150  	switch archiveType {
   151  	case
   152  		"tar",
   153  		"gz":
   154  		return true
   155  	}
   156  	return false
   157  }
   158  
   159  func runTool() {
   160  	// Validate archive type
   161  	if !isValidArchiveType(archiveType) {
   162  		fmt.Fprintf(os.Stderr, "Error: unsupported output type: %s, must be one of tar|gz\n", archiveType)
   163  		os.Exit(1)
   164  	}
   165  
   166  	// Prevent collision with other directories
   167  	nowStr := time.Now().Format("20060102-150405.999-0700-MST")
   168  	var prefix string
   169  	if archivePrefix != "" {
   170  		prefix = fmt.Sprintf("%s-cilium-bugtool-%s-", archivePrefix, nowStr)
   171  	} else {
   172  		prefix = fmt.Sprintf("cilium-bugtool-%s-", nowStr)
   173  	}
   174  	dbgDir, err := ioutil.TempDir(dumpPath, prefix)
   175  	if err != nil {
   176  		fmt.Fprintf(os.Stderr, "Failed to create debug directory %s\n", err)
   177  		os.Exit(1)
   178  	}
   179  	defer cleanup(dbgDir)
   180  	cmdDir := createDir(dbgDir, "cmd")
   181  	confDir := createDir(dbgDir, "conf")
   182  
   183  	k8sPods := getVerifyCiliumPods()
   184  
   185  	var commands []string
   186  	if dryRunMode {
   187  		dryRun(configPath, k8sPods, confDir, cmdDir)
   188  		fmt.Printf("Configuration file at %s\n", configPath)
   189  		return
   190  	}
   191  
   192  	if getPProf {
   193  		err := pprofTraces(cmdDir)
   194  		if err != nil {
   195  			fmt.Fprintf(os.Stderr, "Failed to create debug directory %s\n", err)
   196  			os.Exit(1)
   197  		}
   198  	} else {
   199  		// Check if there is a user supplied configuration
   200  		if config, _ := loadConfigFile(configPath); config != nil {
   201  			// All of of the commands run are from the configuration file
   202  			commands = config.Commands
   203  		}
   204  		if len(commands) == 0 {
   205  			// Found no configuration file or empty so fall back to default commands.
   206  			commands = defaultCommands(confDir, cmdDir, k8sPods)
   207  		}
   208  		defer printDisclaimer()
   209  
   210  		runAll(commands, cmdDir, k8sPods)
   211  	}
   212  
   213  	removeIfEmpty(cmdDir)
   214  	removeIfEmpty(confDir)
   215  
   216  	if archive {
   217  		switch archiveType {
   218  		case "gz":
   219  			gzipPath, err := createGzip(dbgDir)
   220  			if err != nil {
   221  				fmt.Fprintf(os.Stderr, "Failed to create gzip %s\n", err)
   222  				os.Exit(1)
   223  			}
   224  			fmt.Printf("\nGZIP at %s\n", gzipPath)
   225  		case "tar":
   226  			archivePath, err := createArchive(dbgDir)
   227  			if err != nil {
   228  				fmt.Fprintf(os.Stderr, "Failed to create archive %s\n", err)
   229  				os.Exit(1)
   230  			}
   231  			fmt.Printf("\nARCHIVE at %s\n", archivePath)
   232  		}
   233  	} else {
   234  		fmt.Printf("\nDIRECTORY at %s\n", dbgDir)
   235  	}
   236  }
   237  
   238  // dryRun creates the configuration file to show the user what would have been run.
   239  // The same file can be used to modify what will be run by the bugtool.
   240  func dryRun(configPath string, k8sPods []string, confDir, cmdDir string) {
   241  	_, err := setupDefaultConfig(configPath, k8sPods, confDir, cmdDir)
   242  	if err != nil {
   243  		fmt.Printf("Error: %s", err)
   244  		os.Exit(1)
   245  	}
   246  }
   247  
   248  func printDisclaimer() {
   249  	fmt.Print(disclaimer)
   250  }
   251  
   252  func cleanup(dbgDir string) {
   253  	if archive {
   254  		var files []string
   255  
   256  		switch archiveType {
   257  		case "gz":
   258  			files = append(files, dbgDir)
   259  			files = append(files, fmt.Sprintf("%s.tar", dbgDir))
   260  		case "tar":
   261  			files = append(files, dbgDir)
   262  		}
   263  
   264  		for _, file := range files {
   265  			if err := os.RemoveAll(file); err != nil {
   266  				fmt.Fprintf(os.Stderr, "Failed to cleanup temporary files %s\n", err)
   267  			}
   268  		}
   269  	}
   270  }
   271  
   272  func createDir(dbgDir string, newDir string) string {
   273  	confDir := filepath.Join(dbgDir, newDir)
   274  	if err := os.Mkdir(confDir, defaults.RuntimePathRights); err != nil {
   275  		fmt.Fprintf(os.Stderr, "Failed to create %s info directory %s\n", newDir, err)
   276  		return dbgDir
   277  	}
   278  	return confDir
   279  }
   280  
   281  func podPrefix(pod, cmd string) string {
   282  	return fmt.Sprintf("kubectl exec %s -n %s -- %s", pod, k8sNamespace, cmd)
   283  }
   284  
   285  func runAll(commands []string, cmdDir string, k8sPods []string) {
   286  	var numRoutinesAtOnce int
   287  	// Perform sanity check to prevent division by zero
   288  	if l := len(commands); l > 1 {
   289  		numRoutinesAtOnce = l / 2
   290  	} else if l == 1 {
   291  		numRoutinesAtOnce = l
   292  	} else {
   293  		// No commands
   294  		return
   295  	}
   296  	semaphore := make(chan bool, numRoutinesAtOnce)
   297  	for i := 0; i < numRoutinesAtOnce; i++ {
   298  		// This will not block because the channel is buffered and we
   299  		// can write to it numRoutinesAtOnce before the write blocks
   300  		semaphore <- true
   301  	}
   302  
   303  	wg := sync.WaitGroup{}
   304  	for _, cmd := range commands {
   305  		if strings.Contains(cmd, "tables") {
   306  			// iptables commands hold locks so we can't have multiple runs. They
   307  			// have to be run one at a time to avoid 'Another app is currently
   308  			// holding the xtables lock...'
   309  			writeCmdToFile(cmdDir, cmd, k8sPods, enableMarkdown)
   310  			continue
   311  		}
   312  		// Tell the wait group it needs to track another goroutine
   313  		wg.Add(1)
   314  
   315  		// Start a subroutine to run our command
   316  		go func(cmd string) {
   317  			// Once we exit this goroutine completely, signal the
   318  			// original that we are done
   319  			defer wg.Done()
   320  
   321  			// This will wait until an entry in this channel is
   322  			// available to read. We started with numRoutinesAtOnce
   323  			// in there (from above)
   324  			<-semaphore
   325  			// When we are done we return the thing we took from
   326  			// the semaphore, so another goroutine can get it
   327  			defer func() { semaphore <- true }()
   328  			writeCmdToFile(cmdDir, cmd, k8sPods, enableMarkdown)
   329  		}(cmd)
   330  	}
   331  	// Wait for all the spawned goroutines to finish up.
   332  	wg.Wait()
   333  }
   334  
   335  func execCommand(prompt string) (string, error) {
   336  	ctx, cancel := context.WithTimeout(context.Background(), execTimeout)
   337  	defer cancel()
   338  	output, err := exec.CommandContext(ctx, "bash", "-c", prompt).CombinedOutput()
   339  	if ctx.Err() == context.DeadlineExceeded {
   340  		return "", fmt.Errorf("exec timeout")
   341  	}
   342  	return string(output), err
   343  }
   344  
   345  // writeCmdToFile will execute command and write markdown output to a file
   346  func writeCmdToFile(cmdDir, prompt string, k8sPods []string, enableMarkdown bool) {
   347  	// Clean up the filename
   348  	name := strings.Replace(prompt, "/", " ", -1)
   349  	name = strings.Replace(name, " ", "-", -1)
   350  	f, err := os.Create(filepath.Join(cmdDir, name+".md"))
   351  	if err != nil {
   352  		fmt.Fprintf(os.Stderr, "Could not create file %s\n", err)
   353  		return
   354  	}
   355  	defer f.Close()
   356  
   357  	cmd, args := split(prompt)
   358  
   359  	if len(k8sPods) == 0 {
   360  		// The command does not exist, abort.
   361  		if _, err := exec.LookPath(cmd); err != nil {
   362  			os.Remove(f.Name())
   363  			return
   364  		}
   365  	} else if len(args) > 5 {
   366  		// Boundary check is necessary to skip other non exec kubectl
   367  		// commands.
   368  		ctx, cancel := context.WithTimeout(context.Background(), execTimeout)
   369  		defer cancel()
   370  		if _, err := exec.CommandContext(ctx, "kubectl", "exec",
   371  			args[1], "-n", args[3], "--", "which",
   372  			args[5]).CombinedOutput(); err != nil || ctx.Err() == context.DeadlineExceeded {
   373  			os.Remove(f.Name())
   374  			return
   375  		}
   376  	}
   377  	// Write prompt as header and the output as body, and / or error but delete empty output.
   378  	output, err := execCommand(prompt)
   379  	if err != nil {
   380  		fmt.Fprintf(f, fmt.Sprintf("> Error while running '%s':  %s\n\n", prompt, err))
   381  	}
   382  	// We deliberately continue in case there was a error but the output
   383  	// produced might have useful information
   384  	if strings.Contains(output, "```") || !enableMarkdown {
   385  		// Already contains Markdown, print as is.
   386  		fmt.Fprint(f, output)
   387  	} else if enableMarkdown && len(output) > 0 {
   388  		fmt.Fprint(f, fmt.Sprintf("# %s\n\n```\n%s\n```\n", prompt, output))
   389  	} else {
   390  		// Empty file
   391  		os.Remove(f.Name())
   392  	}
   393  }
   394  
   395  // split takes a command prompt and returns the command and arguments separately
   396  func split(prompt string) (string, []string) {
   397  	// Split the command and arguments
   398  	split := strings.Split(prompt, " ")
   399  	argc := len(split)
   400  	var args []string
   401  	cmd := split[0]
   402  
   403  	if argc > 1 {
   404  		args = split[1:]
   405  	}
   406  
   407  	return cmd, args
   408  }
   409  
   410  func getCiliumPods(namespace, label string) ([]string, error) {
   411  	output, err := execCommand(fmt.Sprintf("kubectl -n %s get pods -l %s", namespace, label))
   412  	if err != nil {
   413  		return nil, err
   414  	}
   415  	var ciliumPods []string
   416  
   417  	lines := strings.Split(output, "\n")
   418  
   419  	for _, l := range lines {
   420  		if !strings.HasPrefix(l, "cilium") {
   421  			continue
   422  		}
   423  		// NAME           READY     STATUS    RESTARTS   AGE
   424  		// cilium-cfmww   0/1       Running   0          3m
   425  		// ^
   426  		pod := strings.Split(l, " ")[0]
   427  		ciliumPods = append(ciliumPods, pod)
   428  	}
   429  
   430  	return ciliumPods, nil
   431  }
   432  
   433  func pprofTraces(rootDir string) error {
   434  	var wg sync.WaitGroup
   435  	var profileErr error
   436  	pprofHost := fmt.Sprintf("localhost:%d", pprofPort)
   437  	wg.Add(1)
   438  	go func() {
   439  		url := fmt.Sprintf("http://%s/debug/pprof/profile?seconds=%d", pprofHost, traceSeconds)
   440  		dir := filepath.Join(rootDir, "pprof-cpu")
   441  		profileErr = downloadToFile(url, dir)
   442  		wg.Done()
   443  	}()
   444  
   445  	url := fmt.Sprintf("http://%s/debug/pprof/trace?seconds=%d", pprofHost, traceSeconds)
   446  	dir := filepath.Join(rootDir, "pprof-trace")
   447  	err := downloadToFile(url, dir)
   448  	if err != nil {
   449  		return err
   450  	}
   451  
   452  	url = fmt.Sprintf("http://%s/debug/pprof/heap?debug=1", pprofHost)
   453  	dir = filepath.Join(rootDir, "pprof-heap")
   454  	err = downloadToFile(url, dir)
   455  	if err != nil {
   456  		return err
   457  	}
   458  
   459  	cmd := fmt.Sprintf("gops stack $(pidof %s)", components.CiliumAgentName)
   460  	writeCmdToFile(rootDir, cmd, nil, enableMarkdown)
   461  
   462  	cmd = fmt.Sprintf("gops stats $(pidof %s)", components.CiliumAgentName)
   463  	writeCmdToFile(rootDir, cmd, nil, enableMarkdown)
   464  
   465  	cmd = fmt.Sprintf("gops memstats $(pidof %s)", components.CiliumAgentName)
   466  	writeCmdToFile(rootDir, cmd, nil, enableMarkdown)
   467  
   468  	wg.Wait()
   469  	if profileErr != nil {
   470  		return profileErr
   471  	}
   472  	return nil
   473  }
   474  
   475  func downloadToFile(url, file string) error {
   476  	out, err := os.Create(file)
   477  	if err != nil {
   478  		return err
   479  	}
   480  	defer out.Close()
   481  
   482  	resp, err := http.Get(url)
   483  	if err != nil {
   484  		return err
   485  	}
   486  	defer resp.Body.Close()
   487  	if resp.StatusCode != http.StatusOK {
   488  		return fmt.Errorf("bad status: %s", resp.Status)
   489  	}
   490  	_, err = io.Copy(out, resp.Body)
   491  	return err
   492  }