gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/runsc/cmd/debug.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package cmd
    16  
    17  import (
    18  	"context"
    19  	"os"
    20  	"os/signal"
    21  	"strconv"
    22  	"strings"
    23  	"sync"
    24  	"time"
    25  
    26  	"github.com/google/subcommands"
    27  	"golang.org/x/sys/unix"
    28  	"gvisor.dev/gvisor/pkg/log"
    29  	"gvisor.dev/gvisor/pkg/sentry/control"
    30  	"gvisor.dev/gvisor/runsc/cmd/util"
    31  	"gvisor.dev/gvisor/runsc/config"
    32  	"gvisor.dev/gvisor/runsc/container"
    33  	"gvisor.dev/gvisor/runsc/flag"
    34  )
    35  
    36  // Debug implements subcommands.Command for the "debug" command.
    37  type Debug struct {
    38  	pid          int
    39  	stacks       bool
    40  	signal       int
    41  	profileBlock string
    42  	profileCPU   string
    43  	profileHeap  string
    44  	profileMutex string
    45  	trace        string
    46  	strace       string
    47  	logLevel     string
    48  	logPackets   string
    49  	delay        time.Duration
    50  	duration     time.Duration
    51  	ps           bool
    52  	mount        string
    53  }
    54  
    55  // Name implements subcommands.Command.
    56  func (*Debug) Name() string {
    57  	return "debug"
    58  }
    59  
    60  // Synopsis implements subcommands.Command.
    61  func (*Debug) Synopsis() string {
    62  	return "shows a variety of debug information"
    63  }
    64  
    65  // Usage implements subcommands.Command.
    66  func (*Debug) Usage() string {
    67  	return `debug [flags] <container id>`
    68  }
    69  
    70  // SetFlags implements subcommands.Command.
    71  func (d *Debug) SetFlags(f *flag.FlagSet) {
    72  	f.IntVar(&d.pid, "pid", 0, "sandbox process ID. Container ID is not necessary if this is set")
    73  	f.BoolVar(&d.stacks, "stacks", false, "if true, dumps all sandbox stacks to the log")
    74  	f.StringVar(&d.profileBlock, "profile-block", "", "writes block profile to the given file.")
    75  	f.StringVar(&d.profileCPU, "profile-cpu", "", "writes CPU profile to the given file.")
    76  	f.StringVar(&d.profileHeap, "profile-heap", "", "writes heap profile to the given file.")
    77  	f.StringVar(&d.profileMutex, "profile-mutex", "", "writes mutex profile to the given file.")
    78  	f.DurationVar(&d.delay, "delay", time.Hour, "amount of time to delay for collecting heap and goroutine profiles.")
    79  	f.DurationVar(&d.duration, "duration", time.Hour, "amount of time to wait for CPU and trace profiles.")
    80  	f.StringVar(&d.trace, "trace", "", "writes an execution trace to the given file.")
    81  	f.IntVar(&d.signal, "signal", -1, "sends signal to the sandbox")
    82  	f.StringVar(&d.strace, "strace", "", `A comma separated list of syscalls to trace. "all" enables all traces, "off" disables all.`)
    83  	f.StringVar(&d.logLevel, "log-level", "", "The log level to set: warning (0), info (1), or debug (2).")
    84  	f.StringVar(&d.logPackets, "log-packets", "", "A boolean value to enable or disable packet logging: true or false.")
    85  	f.BoolVar(&d.ps, "ps", false, "lists processes")
    86  	f.StringVar(&d.mount, "mount", "", "Mount a filesystem (-mount fstype:source:destination).")
    87  }
    88  
    89  // Execute implements subcommands.Command.Execute.
    90  func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...any) subcommands.ExitStatus {
    91  	var c *container.Container
    92  	conf := args[0].(*config.Config)
    93  
    94  	if conf.ProfileBlock != "" || conf.ProfileCPU != "" || conf.ProfileHeap != "" || conf.ProfileMutex != "" {
    95  		return util.Errorf("global -profile-{block,cpu,heap,mutex} flags have no effect on runsc debug. Pass runsc debug -profile-{block,cpu,heap,mutex} instead")
    96  	}
    97  	if conf.TraceFile != "" {
    98  		return util.Errorf("global -trace flag has no effect on runsc debug. Pass runsc debug -trace instead")
    99  	}
   100  
   101  	if d.pid == 0 {
   102  		// No pid, container ID must have been provided.
   103  		if f.NArg() != 1 {
   104  			f.Usage()
   105  			return subcommands.ExitUsageError
   106  		}
   107  		id := f.Arg(0)
   108  
   109  		var err error
   110  		c, err = container.Load(conf.RootDir, container.FullID{ContainerID: id}, container.LoadOpts{SkipCheck: true})
   111  		if err != nil {
   112  			return util.Errorf("loading container %q: %v", f.Arg(0), err)
   113  		}
   114  	} else {
   115  		if f.NArg() != 0 {
   116  			f.Usage()
   117  			return subcommands.ExitUsageError
   118  		}
   119  		// Go over all sandboxes and find the one that matches PID.
   120  		ids, err := container.ListSandboxes(conf.RootDir)
   121  		if err != nil {
   122  			return util.Errorf("listing containers: %v", err)
   123  		}
   124  		for _, id := range ids {
   125  			candidate, err := container.Load(conf.RootDir, id, container.LoadOpts{Exact: true, SkipCheck: true})
   126  			if err != nil {
   127  				log.Warningf("Skipping container %q: %v", id, err)
   128  				continue
   129  			}
   130  			if candidate.SandboxPid() == d.pid {
   131  				c = candidate
   132  				break
   133  			}
   134  		}
   135  		if c == nil {
   136  			return util.Errorf("container with PID %d not found", d.pid)
   137  		}
   138  	}
   139  
   140  	if !c.IsSandboxRunning() {
   141  		return util.Errorf("container sandbox is not running")
   142  	}
   143  	util.Infof("Found sandbox %q, PID: %d", c.Sandbox.ID, c.Sandbox.Getpid())
   144  
   145  	// Perform synchronous actions.
   146  	if d.signal > 0 {
   147  		pid := c.Sandbox.Getpid()
   148  		util.Infof("Sending signal %d to process: %d", d.signal, pid)
   149  		if err := unix.Kill(pid, unix.Signal(d.signal)); err != nil {
   150  			return util.Errorf("failed to send signal %d to processs %d", d.signal, pid)
   151  		}
   152  	}
   153  	if d.stacks {
   154  		util.Infof("Retrieving sandbox stacks")
   155  		stacks, err := c.Sandbox.Stacks()
   156  		if err != nil {
   157  			return util.Errorf("retrieving stacks: %v", err)
   158  		}
   159  		util.Infof("     *** Stack dump ***\n%s", stacks)
   160  	}
   161  	if d.strace != "" || len(d.logLevel) != 0 || len(d.logPackets) != 0 {
   162  		args := control.LoggingArgs{}
   163  		switch strings.ToLower(d.strace) {
   164  		case "":
   165  			// strace not set, nothing to do here.
   166  
   167  		case "off":
   168  			util.Infof("Disabling strace")
   169  			args.SetStrace = true
   170  
   171  		case "all":
   172  			util.Infof("Enabling all straces")
   173  			args.SetStrace = true
   174  			args.EnableStrace = true
   175  
   176  		default:
   177  			util.Infof("Enabling strace for syscalls: %s", d.strace)
   178  			args.SetStrace = true
   179  			args.EnableStrace = true
   180  			args.StraceAllowlist = strings.Split(d.strace, ",")
   181  		}
   182  
   183  		if len(d.logLevel) != 0 {
   184  			args.SetLevel = true
   185  			switch strings.ToLower(d.logLevel) {
   186  			case "warning", "0":
   187  				args.Level = log.Warning
   188  			case "info", "1":
   189  				args.Level = log.Info
   190  			case "debug", "2":
   191  				args.Level = log.Debug
   192  			default:
   193  				return util.Errorf("invalid log level %q", d.logLevel)
   194  			}
   195  			util.Infof("Setting log level %v", args.Level)
   196  		}
   197  
   198  		if len(d.logPackets) != 0 {
   199  			args.SetLogPackets = true
   200  			lp, err := strconv.ParseBool(d.logPackets)
   201  			if err != nil {
   202  				return util.Errorf("invalid value for log_packets %q", d.logPackets)
   203  			}
   204  			args.LogPackets = lp
   205  			if args.LogPackets {
   206  				util.Infof("Enabling packet logging")
   207  			} else {
   208  				util.Infof("Disabling packet logging")
   209  			}
   210  		}
   211  
   212  		if err := c.Sandbox.ChangeLogging(args); err != nil {
   213  			return util.Errorf(err.Error())
   214  		}
   215  		util.Infof("Logging options changed")
   216  	}
   217  	if d.ps {
   218  		util.Infof("Retrieving process list")
   219  		pList, err := c.Processes()
   220  		if err != nil {
   221  			util.Fatalf("getting processes for container: %v", err)
   222  		}
   223  		o, err := control.ProcessListToJSON(pList)
   224  		if err != nil {
   225  			util.Fatalf("generating JSON: %v", err)
   226  		}
   227  		util.Infof("%s", o)
   228  	}
   229  	if d.mount != "" {
   230  		opts := strings.Split(d.mount, ":")
   231  		if len(opts) != 3 {
   232  			util.Fatalf("Mount failed: invalid option: %v", d.mount)
   233  		}
   234  		fstype := opts[0]
   235  		src := opts[1]
   236  		dest := opts[2]
   237  		if err := c.Sandbox.Mount(c.ID, fstype, src, dest); err != nil {
   238  			util.Fatalf(err.Error())
   239  		}
   240  	}
   241  
   242  	// Open profiling files.
   243  	var (
   244  		blockFile *os.File
   245  		cpuFile   *os.File
   246  		heapFile  *os.File
   247  		mutexFile *os.File
   248  		traceFile *os.File
   249  	)
   250  	if d.profileBlock != "" {
   251  		f, err := os.OpenFile(d.profileBlock, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
   252  		if err != nil {
   253  			return util.Errorf("error opening blocking profile output: %v", err)
   254  		}
   255  		defer f.Close()
   256  		blockFile = f
   257  	}
   258  	if d.profileCPU != "" {
   259  		f, err := os.OpenFile(d.profileCPU, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
   260  		if err != nil {
   261  			return util.Errorf("error opening cpu profile output: %v", err)
   262  		}
   263  		defer f.Close()
   264  		cpuFile = f
   265  	}
   266  	if d.profileHeap != "" {
   267  		f, err := os.OpenFile(d.profileHeap, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
   268  		if err != nil {
   269  			return util.Errorf("error opening heap profile output: %v", err)
   270  		}
   271  		defer f.Close()
   272  		heapFile = f
   273  	}
   274  	if d.profileMutex != "" {
   275  		f, err := os.OpenFile(d.profileMutex, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
   276  		if err != nil {
   277  			return util.Errorf("error opening mutex profile output: %v", err)
   278  		}
   279  		defer f.Close()
   280  		mutexFile = f
   281  	}
   282  	if d.trace != "" {
   283  		f, err := os.OpenFile(d.trace, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
   284  		if err != nil {
   285  			return util.Errorf("error opening trace profile output: %v", err)
   286  		}
   287  		traceFile = f
   288  	}
   289  
   290  	// Collect profiles.
   291  	var (
   292  		wg       sync.WaitGroup
   293  		blockErr error
   294  		cpuErr   error
   295  		heapErr  error
   296  		mutexErr error
   297  		traceErr error
   298  	)
   299  	if blockFile != nil {
   300  		wg.Add(1)
   301  		go func() {
   302  			defer wg.Done()
   303  			blockErr = c.Sandbox.BlockProfile(blockFile, d.duration)
   304  		}()
   305  	}
   306  	if cpuFile != nil {
   307  		wg.Add(1)
   308  		go func() {
   309  			defer wg.Done()
   310  			cpuErr = c.Sandbox.CPUProfile(cpuFile, d.duration)
   311  		}()
   312  	}
   313  	if heapFile != nil {
   314  		wg.Add(1)
   315  		go func() {
   316  			defer wg.Done()
   317  			heapErr = c.Sandbox.HeapProfile(heapFile, d.delay)
   318  		}()
   319  	}
   320  	if mutexFile != nil {
   321  		wg.Add(1)
   322  		go func() {
   323  			defer wg.Done()
   324  			mutexErr = c.Sandbox.MutexProfile(mutexFile, d.duration)
   325  		}()
   326  	}
   327  	if traceFile != nil {
   328  		wg.Add(1)
   329  		go func() {
   330  			defer wg.Done()
   331  			traceErr = c.Sandbox.Trace(traceFile, d.duration)
   332  		}()
   333  	}
   334  
   335  	// Before sleeping, allow us to catch signals and try to exit
   336  	// gracefully before just exiting. If we can't wait for wg, then
   337  	// we will not be able to read the errors below safely.
   338  	readyChan := make(chan struct{})
   339  	go func() {
   340  		defer close(readyChan)
   341  		wg.Wait()
   342  	}()
   343  	signals := make(chan os.Signal, 1)
   344  	signal.Notify(signals, unix.SIGTERM, unix.SIGINT)
   345  	select {
   346  	case <-readyChan:
   347  		break // Safe to proceed.
   348  	case <-signals:
   349  		util.Infof("caught signal, waiting at most one more second.")
   350  		select {
   351  		case <-signals:
   352  			util.Infof("caught second signal, exiting immediately.")
   353  			os.Exit(1) // Not finished.
   354  		case <-time.After(time.Second):
   355  			util.Infof("timeout, exiting.")
   356  			os.Exit(1) // Not finished.
   357  		case <-readyChan:
   358  			break // Safe to proceed.
   359  		}
   360  	}
   361  
   362  	// Collect all errors.
   363  	errorCount := 0
   364  	if blockErr != nil {
   365  		errorCount++
   366  		util.Infof("error collecting block profile: %v", blockErr)
   367  		os.Remove(blockFile.Name())
   368  	}
   369  	if cpuErr != nil {
   370  		errorCount++
   371  		util.Infof("error collecting cpu profile: %v", cpuErr)
   372  		os.Remove(cpuFile.Name())
   373  	}
   374  	if heapErr != nil {
   375  		errorCount++
   376  		util.Infof("error collecting heap profile: %v", heapErr)
   377  		os.Remove(heapFile.Name())
   378  	}
   379  	if mutexErr != nil {
   380  		errorCount++
   381  		util.Infof("error collecting mutex profile: %v", mutexErr)
   382  		os.Remove(mutexFile.Name())
   383  	}
   384  	if traceErr != nil {
   385  		errorCount++
   386  		util.Infof("error collecting trace profile: %v", traceErr)
   387  		os.Remove(traceFile.Name())
   388  	}
   389  
   390  	if errorCount > 0 {
   391  		return subcommands.ExitFailure
   392  	}
   393  
   394  	return subcommands.ExitSuccess
   395  }