github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/runsc/cmd/debug.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package cmd
    16  
    17  import (
    18  	"context"
    19  	"os"
    20  	"os/signal"
    21  	"strconv"
    22  	"strings"
    23  	"sync"
    24  	"time"
    25  
    26  	"github.com/google/subcommands"
    27  	"golang.org/x/sys/unix"
    28  	"github.com/SagerNet/gvisor/pkg/log"
    29  	"github.com/SagerNet/gvisor/pkg/sentry/control"
    30  	"github.com/SagerNet/gvisor/runsc/config"
    31  	"github.com/SagerNet/gvisor/runsc/container"
    32  	"github.com/SagerNet/gvisor/runsc/flag"
    33  )
    34  
    35  // Debug implements subcommands.Command for the "debug" command.
    36  type Debug struct {
    37  	pid          int
    38  	stacks       bool
    39  	signal       int
    40  	profileHeap  string
    41  	profileCPU   string
    42  	profileBlock string
    43  	profileMutex string
    44  	trace        string
    45  	strace       string
    46  	logLevel     string
    47  	logPackets   string
    48  	delay        time.Duration
    49  	duration     time.Duration
    50  	ps           bool
    51  }
    52  
    53  // Name implements subcommands.Command.
    54  func (*Debug) Name() string {
    55  	return "debug"
    56  }
    57  
    58  // Synopsis implements subcommands.Command.
    59  func (*Debug) Synopsis() string {
    60  	return "shows a variety of debug information"
    61  }
    62  
    63  // Usage implements subcommands.Command.
    64  func (*Debug) Usage() string {
    65  	return `debug [flags] <container id>`
    66  }
    67  
    68  // SetFlags implements subcommands.Command.
    69  func (d *Debug) SetFlags(f *flag.FlagSet) {
    70  	f.IntVar(&d.pid, "pid", 0, "sandbox process ID. Container ID is not necessary if this is set")
    71  	f.BoolVar(&d.stacks, "stacks", false, "if true, dumps all sandbox stacks to the log")
    72  	f.StringVar(&d.profileHeap, "profile-heap", "", "writes heap profile to the given file.")
    73  	f.StringVar(&d.profileCPU, "profile-cpu", "", "writes CPU profile to the given file.")
    74  	f.StringVar(&d.profileBlock, "profile-block", "", "writes block profile to the given file.")
    75  	f.StringVar(&d.profileMutex, "profile-mutex", "", "writes mutex profile to the given file.")
    76  	f.DurationVar(&d.delay, "delay", time.Hour, "amount of time to delay for collecting heap and goroutine profiles.")
    77  	f.DurationVar(&d.duration, "duration", time.Hour, "amount of time to wait for CPU and trace profiles.")
    78  	f.StringVar(&d.trace, "trace", "", "writes an execution trace to the given file.")
    79  	f.IntVar(&d.signal, "signal", -1, "sends signal to the sandbox")
    80  	f.StringVar(&d.strace, "strace", "", `A comma separated list of syscalls to trace. "all" enables all traces, "off" disables all.`)
    81  	f.StringVar(&d.logLevel, "log-level", "", "The log level to set: warning (0), info (1), or debug (2).")
    82  	f.StringVar(&d.logPackets, "log-packets", "", "A boolean value to enable or disable packet logging: true or false.")
    83  	f.BoolVar(&d.ps, "ps", false, "lists processes")
    84  }
    85  
    86  // Execute implements subcommands.Command.Execute.
    87  func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
    88  	var c *container.Container
    89  	conf := args[0].(*config.Config)
    90  
    91  	if d.pid == 0 {
    92  		// No pid, container ID must have been provided.
    93  		if f.NArg() != 1 {
    94  			f.Usage()
    95  			return subcommands.ExitUsageError
    96  		}
    97  		id := f.Arg(0)
    98  
    99  		var err error
   100  		c, err = container.Load(conf.RootDir, container.FullID{ContainerID: id}, container.LoadOpts{})
   101  		if err != nil {
   102  			return Errorf("loading container %q: %v", f.Arg(0), err)
   103  		}
   104  	} else {
   105  		if f.NArg() != 0 {
   106  			f.Usage()
   107  			return subcommands.ExitUsageError
   108  		}
   109  		// Go over all sandboxes and find the one that matches PID.
   110  		ids, err := container.List(conf.RootDir)
   111  		if err != nil {
   112  			return Errorf("listing containers: %v", err)
   113  		}
   114  		for _, id := range ids {
   115  			candidate, err := container.Load(conf.RootDir, id, container.LoadOpts{Exact: true, SkipCheck: true})
   116  			if err != nil {
   117  				log.Warningf("Skipping container %q: %v", id, err)
   118  				continue
   119  			}
   120  			if candidate.SandboxPid() == d.pid {
   121  				c = candidate
   122  				break
   123  			}
   124  		}
   125  		if c == nil {
   126  			return Errorf("container with PID %d not found", d.pid)
   127  		}
   128  	}
   129  
   130  	if !c.IsSandboxRunning() {
   131  		return Errorf("container sandbox is not running")
   132  	}
   133  	log.Infof("Found sandbox %q, PID: %d", c.Sandbox.ID, c.Sandbox.Pid)
   134  
   135  	// Perform synchronous actions.
   136  	if d.signal > 0 {
   137  		log.Infof("Sending signal %d to process: %d", d.signal, c.Sandbox.Pid)
   138  		if err := unix.Kill(c.Sandbox.Pid, unix.Signal(d.signal)); err != nil {
   139  			return Errorf("failed to send signal %d to processs %d", d.signal, c.Sandbox.Pid)
   140  		}
   141  	}
   142  	if d.stacks {
   143  		log.Infof("Retrieving sandbox stacks")
   144  		stacks, err := c.Sandbox.Stacks()
   145  		if err != nil {
   146  			return Errorf("retrieving stacks: %v", err)
   147  		}
   148  		log.Infof("     *** Stack dump ***\n%s", stacks)
   149  	}
   150  	if d.strace != "" || len(d.logLevel) != 0 || len(d.logPackets) != 0 {
   151  		args := control.LoggingArgs{}
   152  		switch strings.ToLower(d.strace) {
   153  		case "":
   154  			// strace not set, nothing to do here.
   155  
   156  		case "off":
   157  			log.Infof("Disabling strace")
   158  			args.SetStrace = true
   159  
   160  		case "all":
   161  			log.Infof("Enabling all straces")
   162  			args.SetStrace = true
   163  			args.EnableStrace = true
   164  
   165  		default:
   166  			log.Infof("Enabling strace for syscalls: %s", d.strace)
   167  			args.SetStrace = true
   168  			args.EnableStrace = true
   169  			args.StraceWhitelist = strings.Split(d.strace, ",")
   170  		}
   171  
   172  		if len(d.logLevel) != 0 {
   173  			args.SetLevel = true
   174  			switch strings.ToLower(d.logLevel) {
   175  			case "warning", "0":
   176  				args.Level = log.Warning
   177  			case "info", "1":
   178  				args.Level = log.Info
   179  			case "debug", "2":
   180  				args.Level = log.Debug
   181  			default:
   182  				return Errorf("invalid log level %q", d.logLevel)
   183  			}
   184  			log.Infof("Setting log level %v", args.Level)
   185  		}
   186  
   187  		if len(d.logPackets) != 0 {
   188  			args.SetLogPackets = true
   189  			lp, err := strconv.ParseBool(d.logPackets)
   190  			if err != nil {
   191  				return Errorf("invalid value for log_packets %q", d.logPackets)
   192  			}
   193  			args.LogPackets = lp
   194  			if args.LogPackets {
   195  				log.Infof("Enabling packet logging")
   196  			} else {
   197  				log.Infof("Disabling packet logging")
   198  			}
   199  		}
   200  
   201  		if err := c.Sandbox.ChangeLogging(args); err != nil {
   202  			return Errorf(err.Error())
   203  		}
   204  		log.Infof("Logging options changed")
   205  	}
   206  	if d.ps {
   207  		pList, err := c.Processes()
   208  		if err != nil {
   209  			Fatalf("getting processes for container: %v", err)
   210  		}
   211  		o, err := control.ProcessListToJSON(pList)
   212  		if err != nil {
   213  			Fatalf("generating JSON: %v", err)
   214  		}
   215  		log.Infof(o)
   216  	}
   217  
   218  	// Open profiling files.
   219  	var (
   220  		heapFile  *os.File
   221  		cpuFile   *os.File
   222  		traceFile *os.File
   223  		blockFile *os.File
   224  		mutexFile *os.File
   225  	)
   226  	if d.profileHeap != "" {
   227  		f, err := os.OpenFile(d.profileHeap, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
   228  		if err != nil {
   229  			return Errorf("error opening heap profile output: %v", err)
   230  		}
   231  		defer f.Close()
   232  		heapFile = f
   233  	}
   234  	if d.profileCPU != "" {
   235  		f, err := os.OpenFile(d.profileCPU, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
   236  		if err != nil {
   237  			return Errorf("error opening cpu profile output: %v", err)
   238  		}
   239  		defer f.Close()
   240  		cpuFile = f
   241  	}
   242  	if d.trace != "" {
   243  		f, err := os.OpenFile(d.trace, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
   244  		if err != nil {
   245  			return Errorf("error opening trace profile output: %v", err)
   246  		}
   247  		traceFile = f
   248  	}
   249  	if d.profileBlock != "" {
   250  		f, err := os.OpenFile(d.profileBlock, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
   251  		if err != nil {
   252  			return Errorf("error opening blocking profile output: %v", err)
   253  		}
   254  		defer f.Close()
   255  		blockFile = f
   256  	}
   257  	if d.profileMutex != "" {
   258  		f, err := os.OpenFile(d.profileMutex, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
   259  		if err != nil {
   260  			return Errorf("error opening mutex profile output: %v", err)
   261  		}
   262  		defer f.Close()
   263  		mutexFile = f
   264  	}
   265  
   266  	// Collect profiles.
   267  	var (
   268  		wg       sync.WaitGroup
   269  		heapErr  error
   270  		cpuErr   error
   271  		traceErr error
   272  		blockErr error
   273  		mutexErr error
   274  	)
   275  	if heapFile != nil {
   276  		wg.Add(1)
   277  		go func() {
   278  			defer wg.Done()
   279  			heapErr = c.Sandbox.HeapProfile(heapFile, d.delay)
   280  		}()
   281  	}
   282  	if cpuFile != nil {
   283  		wg.Add(1)
   284  		go func() {
   285  			defer wg.Done()
   286  			cpuErr = c.Sandbox.CPUProfile(cpuFile, d.duration)
   287  		}()
   288  	}
   289  	if traceFile != nil {
   290  		wg.Add(1)
   291  		go func() {
   292  			defer wg.Done()
   293  			traceErr = c.Sandbox.Trace(traceFile, d.duration)
   294  		}()
   295  	}
   296  	if blockFile != nil {
   297  		wg.Add(1)
   298  		go func() {
   299  			defer wg.Done()
   300  			blockErr = c.Sandbox.BlockProfile(blockFile, d.duration)
   301  		}()
   302  	}
   303  	if mutexFile != nil {
   304  		wg.Add(1)
   305  		go func() {
   306  			defer wg.Done()
   307  			mutexErr = c.Sandbox.MutexProfile(mutexFile, d.duration)
   308  		}()
   309  	}
   310  
   311  	// Before sleeping, allow us to catch signals and try to exit
   312  	// gracefully before just exiting. If we can't wait for wg, then
   313  	// we will not be able to read the errors below safely.
   314  	readyChan := make(chan struct{})
   315  	go func() {
   316  		defer close(readyChan)
   317  		wg.Wait()
   318  	}()
   319  	signals := make(chan os.Signal, 1)
   320  	signal.Notify(signals, unix.SIGTERM, unix.SIGINT)
   321  	select {
   322  	case <-readyChan:
   323  		break // Safe to proceed.
   324  	case <-signals:
   325  		log.Infof("caught signal, waiting at most one more second.")
   326  		select {
   327  		case <-signals:
   328  			log.Infof("caught second signal, exiting immediately.")
   329  			os.Exit(1) // Not finished.
   330  		case <-time.After(time.Second):
   331  			log.Infof("timeout, exiting.")
   332  			os.Exit(1) // Not finished.
   333  		case <-readyChan:
   334  			break // Safe to proceed.
   335  		}
   336  	}
   337  
   338  	// Collect all errors.
   339  	errorCount := 0
   340  	if heapErr != nil {
   341  		errorCount++
   342  		log.Infof("error collecting heap profile: %v", heapErr)
   343  		os.Remove(heapFile.Name())
   344  	}
   345  	if cpuErr != nil {
   346  		errorCount++
   347  		log.Infof("error collecting cpu profile: %v", cpuErr)
   348  		os.Remove(cpuFile.Name())
   349  	}
   350  	if traceErr != nil {
   351  		errorCount++
   352  		log.Infof("error collecting trace profile: %v", traceErr)
   353  		os.Remove(traceFile.Name())
   354  	}
   355  	if blockErr != nil {
   356  		errorCount++
   357  		log.Infof("error collecting block profile: %v", blockErr)
   358  		os.Remove(blockFile.Name())
   359  	}
   360  	if mutexErr != nil {
   361  		errorCount++
   362  		log.Infof("error collecting mutex profile: %v", mutexErr)
   363  		os.Remove(mutexFile.Name())
   364  	}
   365  
   366  	if errorCount > 0 {
   367  		return subcommands.ExitFailure
   368  	}
   369  
   370  	return subcommands.ExitSuccess
   371  }