github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/runsc/cmd/debug.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package cmd
    16  
    17  import (
    18  	"context"
    19  	"os"
    20  	"os/signal"
    21  	"strconv"
    22  	"strings"
    23  	"sync"
    24  	"time"
    25  
    26  	"github.com/MerlinKodo/gvisor/pkg/log"
    27  	"github.com/MerlinKodo/gvisor/pkg/sentry/control"
    28  	"github.com/MerlinKodo/gvisor/runsc/cmd/util"
    29  	"github.com/MerlinKodo/gvisor/runsc/config"
    30  	"github.com/MerlinKodo/gvisor/runsc/container"
    31  	"github.com/MerlinKodo/gvisor/runsc/flag"
    32  	"github.com/google/subcommands"
    33  	"golang.org/x/sys/unix"
    34  )
    35  
    36  // Debug implements subcommands.Command for the "debug" command.
    37  type Debug struct {
    38  	pid          int
    39  	stacks       bool
    40  	signal       int
    41  	profileBlock string
    42  	profileCPU   string
    43  	profileHeap  string
    44  	profileMutex string
    45  	trace        string
    46  	strace       string
    47  	logLevel     string
    48  	logPackets   string
    49  	delay        time.Duration
    50  	duration     time.Duration
    51  	ps           bool
    52  }
    53  
    54  // Name implements subcommands.Command.
    55  func (*Debug) Name() string {
    56  	return "debug"
    57  }
    58  
    59  // Synopsis implements subcommands.Command.
    60  func (*Debug) Synopsis() string {
    61  	return "shows a variety of debug information"
    62  }
    63  
    64  // Usage implements subcommands.Command.
    65  func (*Debug) Usage() string {
    66  	return `debug [flags] <container id>`
    67  }
    68  
    69  // SetFlags implements subcommands.Command.
    70  func (d *Debug) SetFlags(f *flag.FlagSet) {
    71  	f.IntVar(&d.pid, "pid", 0, "sandbox process ID. Container ID is not necessary if this is set")
    72  	f.BoolVar(&d.stacks, "stacks", false, "if true, dumps all sandbox stacks to the log")
    73  	f.StringVar(&d.profileBlock, "profile-block", "", "writes block profile to the given file.")
    74  	f.StringVar(&d.profileCPU, "profile-cpu", "", "writes CPU profile to the given file.")
    75  	f.StringVar(&d.profileHeap, "profile-heap", "", "writes heap profile to the given file.")
    76  	f.StringVar(&d.profileMutex, "profile-mutex", "", "writes mutex profile to the given file.")
    77  	f.DurationVar(&d.delay, "delay", time.Hour, "amount of time to delay for collecting heap and goroutine profiles.")
    78  	f.DurationVar(&d.duration, "duration", time.Hour, "amount of time to wait for CPU and trace profiles.")
    79  	f.StringVar(&d.trace, "trace", "", "writes an execution trace to the given file.")
    80  	f.IntVar(&d.signal, "signal", -1, "sends signal to the sandbox")
    81  	f.StringVar(&d.strace, "strace", "", `A comma separated list of syscalls to trace. "all" enables all traces, "off" disables all.`)
    82  	f.StringVar(&d.logLevel, "log-level", "", "The log level to set: warning (0), info (1), or debug (2).")
    83  	f.StringVar(&d.logPackets, "log-packets", "", "A boolean value to enable or disable packet logging: true or false.")
    84  	f.BoolVar(&d.ps, "ps", false, "lists processes")
    85  }
    86  
    87  // Execute implements subcommands.Command.Execute.
    88  func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...any) subcommands.ExitStatus {
    89  	var c *container.Container
    90  	conf := args[0].(*config.Config)
    91  
    92  	if conf.ProfileBlock != "" || conf.ProfileCPU != "" || conf.ProfileHeap != "" || conf.ProfileMutex != "" {
    93  		return util.Errorf("global -profile-{block,cpu,heap,mutex} flags have no effect on runsc debug. Pass runsc debug -profile-{block,cpu,heap,mutex} instead")
    94  	}
    95  	if conf.TraceFile != "" {
    96  		return util.Errorf("global -trace flag has no effect on runsc debug. Pass runsc debug -trace instead")
    97  	}
    98  
    99  	if d.pid == 0 {
   100  		// No pid, container ID must have been provided.
   101  		if f.NArg() != 1 {
   102  			f.Usage()
   103  			return subcommands.ExitUsageError
   104  		}
   105  		id := f.Arg(0)
   106  
   107  		var err error
   108  		c, err = container.Load(conf.RootDir, container.FullID{ContainerID: id}, container.LoadOpts{SkipCheck: true})
   109  		if err != nil {
   110  			return util.Errorf("loading container %q: %v", f.Arg(0), err)
   111  		}
   112  	} else {
   113  		if f.NArg() != 0 {
   114  			f.Usage()
   115  			return subcommands.ExitUsageError
   116  		}
   117  		// Go over all sandboxes and find the one that matches PID.
   118  		ids, err := container.ListSandboxes(conf.RootDir)
   119  		if err != nil {
   120  			return util.Errorf("listing containers: %v", err)
   121  		}
   122  		for _, id := range ids {
   123  			candidate, err := container.Load(conf.RootDir, id, container.LoadOpts{Exact: true, SkipCheck: true})
   124  			if err != nil {
   125  				log.Warningf("Skipping container %q: %v", id, err)
   126  				continue
   127  			}
   128  			if candidate.SandboxPid() == d.pid {
   129  				c = candidate
   130  				break
   131  			}
   132  		}
   133  		if c == nil {
   134  			return util.Errorf("container with PID %d not found", d.pid)
   135  		}
   136  	}
   137  
   138  	if !c.IsSandboxRunning() {
   139  		return util.Errorf("container sandbox is not running")
   140  	}
   141  	util.Infof("Found sandbox %q, PID: %d", c.Sandbox.ID, c.Sandbox.Getpid())
   142  
   143  	// Perform synchronous actions.
   144  	if d.signal > 0 {
   145  		pid := c.Sandbox.Getpid()
   146  		util.Infof("Sending signal %d to process: %d", d.signal, pid)
   147  		if err := unix.Kill(pid, unix.Signal(d.signal)); err != nil {
   148  			return util.Errorf("failed to send signal %d to processs %d", d.signal, pid)
   149  		}
   150  	}
   151  	if d.stacks {
   152  		util.Infof("Retrieving sandbox stacks")
   153  		stacks, err := c.Sandbox.Stacks()
   154  		if err != nil {
   155  			return util.Errorf("retrieving stacks: %v", err)
   156  		}
   157  		util.Infof("     *** Stack dump ***\n%s", stacks)
   158  	}
   159  	if d.strace != "" || len(d.logLevel) != 0 || len(d.logPackets) != 0 {
   160  		args := control.LoggingArgs{}
   161  		switch strings.ToLower(d.strace) {
   162  		case "":
   163  			// strace not set, nothing to do here.
   164  
   165  		case "off":
   166  			util.Infof("Disabling strace")
   167  			args.SetStrace = true
   168  
   169  		case "all":
   170  			util.Infof("Enabling all straces")
   171  			args.SetStrace = true
   172  			args.EnableStrace = true
   173  
   174  		default:
   175  			util.Infof("Enabling strace for syscalls: %s", d.strace)
   176  			args.SetStrace = true
   177  			args.EnableStrace = true
   178  			args.StraceAllowlist = strings.Split(d.strace, ",")
   179  		}
   180  
   181  		if len(d.logLevel) != 0 {
   182  			args.SetLevel = true
   183  			switch strings.ToLower(d.logLevel) {
   184  			case "warning", "0":
   185  				args.Level = log.Warning
   186  			case "info", "1":
   187  				args.Level = log.Info
   188  			case "debug", "2":
   189  				args.Level = log.Debug
   190  			default:
   191  				return util.Errorf("invalid log level %q", d.logLevel)
   192  			}
   193  			util.Infof("Setting log level %v", args.Level)
   194  		}
   195  
   196  		if len(d.logPackets) != 0 {
   197  			args.SetLogPackets = true
   198  			lp, err := strconv.ParseBool(d.logPackets)
   199  			if err != nil {
   200  				return util.Errorf("invalid value for log_packets %q", d.logPackets)
   201  			}
   202  			args.LogPackets = lp
   203  			if args.LogPackets {
   204  				util.Infof("Enabling packet logging")
   205  			} else {
   206  				util.Infof("Disabling packet logging")
   207  			}
   208  		}
   209  
   210  		if err := c.Sandbox.ChangeLogging(args); err != nil {
   211  			return util.Errorf(err.Error())
   212  		}
   213  		util.Infof("Logging options changed")
   214  	}
   215  	if d.ps {
   216  		util.Infof("Retrieving process list")
   217  		pList, err := c.Processes()
   218  		if err != nil {
   219  			util.Fatalf("getting processes for container: %v", err)
   220  		}
   221  		o, err := control.ProcessListToJSON(pList)
   222  		if err != nil {
   223  			util.Fatalf("generating JSON: %v", err)
   224  		}
   225  		util.Infof("%s", o)
   226  	}
   227  
   228  	// Open profiling files.
   229  	var (
   230  		blockFile *os.File
   231  		cpuFile   *os.File
   232  		heapFile  *os.File
   233  		mutexFile *os.File
   234  		traceFile *os.File
   235  	)
   236  	if d.profileBlock != "" {
   237  		f, err := os.OpenFile(d.profileBlock, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
   238  		if err != nil {
   239  			return util.Errorf("error opening blocking profile output: %v", err)
   240  		}
   241  		defer f.Close()
   242  		blockFile = f
   243  	}
   244  	if d.profileCPU != "" {
   245  		f, err := os.OpenFile(d.profileCPU, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
   246  		if err != nil {
   247  			return util.Errorf("error opening cpu profile output: %v", err)
   248  		}
   249  		defer f.Close()
   250  		cpuFile = f
   251  	}
   252  	if d.profileHeap != "" {
   253  		f, err := os.OpenFile(d.profileHeap, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
   254  		if err != nil {
   255  			return util.Errorf("error opening heap profile output: %v", err)
   256  		}
   257  		defer f.Close()
   258  		heapFile = f
   259  	}
   260  	if d.profileMutex != "" {
   261  		f, err := os.OpenFile(d.profileMutex, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
   262  		if err != nil {
   263  			return util.Errorf("error opening mutex profile output: %v", err)
   264  		}
   265  		defer f.Close()
   266  		mutexFile = f
   267  	}
   268  	if d.trace != "" {
   269  		f, err := os.OpenFile(d.trace, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
   270  		if err != nil {
   271  			return util.Errorf("error opening trace profile output: %v", err)
   272  		}
   273  		traceFile = f
   274  	}
   275  
   276  	// Collect profiles.
   277  	var (
   278  		wg       sync.WaitGroup
   279  		blockErr error
   280  		cpuErr   error
   281  		heapErr  error
   282  		mutexErr error
   283  		traceErr error
   284  	)
   285  	if blockFile != nil {
   286  		wg.Add(1)
   287  		go func() {
   288  			defer wg.Done()
   289  			blockErr = c.Sandbox.BlockProfile(blockFile, d.duration)
   290  		}()
   291  	}
   292  	if cpuFile != nil {
   293  		wg.Add(1)
   294  		go func() {
   295  			defer wg.Done()
   296  			cpuErr = c.Sandbox.CPUProfile(cpuFile, d.duration)
   297  		}()
   298  	}
   299  	if heapFile != nil {
   300  		wg.Add(1)
   301  		go func() {
   302  			defer wg.Done()
   303  			heapErr = c.Sandbox.HeapProfile(heapFile, d.delay)
   304  		}()
   305  	}
   306  	if mutexFile != nil {
   307  		wg.Add(1)
   308  		go func() {
   309  			defer wg.Done()
   310  			mutexErr = c.Sandbox.MutexProfile(mutexFile, d.duration)
   311  		}()
   312  	}
   313  	if traceFile != nil {
   314  		wg.Add(1)
   315  		go func() {
   316  			defer wg.Done()
   317  			traceErr = c.Sandbox.Trace(traceFile, d.duration)
   318  		}()
   319  	}
   320  
   321  	// Before sleeping, allow us to catch signals and try to exit
   322  	// gracefully before just exiting. If we can't wait for wg, then
   323  	// we will not be able to read the errors below safely.
   324  	readyChan := make(chan struct{})
   325  	go func() {
   326  		defer close(readyChan)
   327  		wg.Wait()
   328  	}()
   329  	signals := make(chan os.Signal, 1)
   330  	signal.Notify(signals, unix.SIGTERM, unix.SIGINT)
   331  	select {
   332  	case <-readyChan:
   333  		break // Safe to proceed.
   334  	case <-signals:
   335  		util.Infof("caught signal, waiting at most one more second.")
   336  		select {
   337  		case <-signals:
   338  			util.Infof("caught second signal, exiting immediately.")
   339  			os.Exit(1) // Not finished.
   340  		case <-time.After(time.Second):
   341  			util.Infof("timeout, exiting.")
   342  			os.Exit(1) // Not finished.
   343  		case <-readyChan:
   344  			break // Safe to proceed.
   345  		}
   346  	}
   347  
   348  	// Collect all errors.
   349  	errorCount := 0
   350  	if blockErr != nil {
   351  		errorCount++
   352  		util.Infof("error collecting block profile: %v", blockErr)
   353  		os.Remove(blockFile.Name())
   354  	}
   355  	if cpuErr != nil {
   356  		errorCount++
   357  		util.Infof("error collecting cpu profile: %v", cpuErr)
   358  		os.Remove(cpuFile.Name())
   359  	}
   360  	if heapErr != nil {
   361  		errorCount++
   362  		util.Infof("error collecting heap profile: %v", heapErr)
   363  		os.Remove(heapFile.Name())
   364  	}
   365  	if mutexErr != nil {
   366  		errorCount++
   367  		util.Infof("error collecting mutex profile: %v", mutexErr)
   368  		os.Remove(mutexFile.Name())
   369  	}
   370  	if traceErr != nil {
   371  		errorCount++
   372  		util.Infof("error collecting trace profile: %v", traceErr)
   373  		os.Remove(traceFile.Name())
   374  	}
   375  
   376  	if errorCount > 0 {
   377  		return subcommands.ExitFailure
   378  	}
   379  
   380  	return subcommands.ExitSuccess
   381  }