github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/runsc/container/container.go

github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/runsc/container/container.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package container creates and manipulates containers.
    16  package container
    17  
    18  import (
    19  	"context"
    20  	"errors"
    21  	"fmt"
    22  	"io/ioutil"
    23  	"os"
    24  	"os/exec"
    25  	"regexp"
    26  	"strconv"
    27  	"strings"
    28  	"syscall"
    29  	"time"
    30  
    31  	"github.com/cenkalti/backoff"
    32  	specs "github.com/opencontainers/runtime-spec/specs-go"
    33  	"golang.org/x/sys/unix"
    34  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    35  	"github.com/SagerNet/gvisor/pkg/cleanup"
    36  	"github.com/SagerNet/gvisor/pkg/log"
    37  	"github.com/SagerNet/gvisor/pkg/sentry/control"
    38  	"github.com/SagerNet/gvisor/pkg/sentry/sighandling"
    39  	"github.com/SagerNet/gvisor/runsc/boot"
    40  	"github.com/SagerNet/gvisor/runsc/cgroup"
    41  	"github.com/SagerNet/gvisor/runsc/config"
    42  	"github.com/SagerNet/gvisor/runsc/console"
    43  	"github.com/SagerNet/gvisor/runsc/sandbox"
    44  	"github.com/SagerNet/gvisor/runsc/specutils"
    45  )
    46  
    47  // validateID validates the container id.
    48  func validateID(id string) error {
    49  	// See libcontainer/factory_linux.go.
    50  	idRegex := regexp.MustCompile(`^[\w+-\.]+$`)
    51  	if !idRegex.MatchString(id) {
    52  		return fmt.Errorf("invalid container id: %v", id)
    53  	}
    54  	return nil
    55  }
    56  
    57  // Container represents a containerized application. When running, the
    58  // container is associated with a single Sandbox.
    59  //
    60  // Container metadata can be saved and loaded to disk. Within a root directory,
    61  // we maintain subdirectories for each container named with the container id.
    62  // The container metadata is stored as a json within the container directory
    63  // in a file named "meta.json". This metadata format is defined by us and is
    64  // not part of the OCI spec.
    65  //
    66  // Containers must write their metadata files after any change to their internal
    67  // states. The entire container directory is deleted when the container is
    68  // destroyed.
    69  //
    70  // When the container is stopped, all processes that belong to the container
    71  // must be stopped before Destroy() returns. containerd makes roughly the
    72  // following calls to stop a container:
    73  //   - First it attempts to kill the container process with
    74  //     'runsc kill SIGTERM'. After some time, it escalates to SIGKILL. In a
    75  //     separate thread, it's waiting on the container. As soon as the wait
    76  //     returns, it moves on to the next step:
    77  //   - It calls 'runsc kill --all SIGKILL' to stop every process that belongs to
    78  //     the container. 'kill --all SIGKILL' waits for all processes before
    79  //     returning.
    80  //   - Containerd waits for stdin, stdout and stderr to drain and be closed.
    81  //   - It calls 'runsc delete'. runc implementation kills --all SIGKILL once
    82  //     again just to be sure, waits, and then proceeds with remaining teardown.
    83  //
    84  // Container is thread-unsafe.
    85  type Container struct {
    86  	// ID is the container ID.
    87  	ID string `json:"id"`
    88  
    89  	// Spec is the OCI runtime spec that configures this container.
    90  	Spec *specs.Spec `json:"spec"`
    91  
    92  	// BundleDir is the directory containing the container bundle.
    93  	BundleDir string `json:"bundleDir"`
    94  
    95  	// CreatedAt is the time the container was created.
    96  	CreatedAt time.Time `json:"createdAt"`
    97  
    98  	// Owner is the container owner.
    99  	Owner string `json:"owner"`
   100  
   101  	// ConsoleSocket is the path to a unix domain socket that will receive
   102  	// the console FD.
   103  	ConsoleSocket string `json:"consoleSocket"`
   104  
   105  	// Status is the current container Status.
   106  	Status Status `json:"status"`
   107  
   108  	// GoferPid is the PID of the gofer running along side the sandbox. May
   109  	// be 0 if the gofer has been killed.
   110  	GoferPid int `json:"goferPid"`
   111  
   112  	// Sandbox is the sandbox this container is running in. It's set when the
   113  	// container is created and reset when the sandbox is destroyed.
   114  	Sandbox *sandbox.Sandbox `json:"sandbox"`
   115  
   116  	// Saver handles load from/save to the state file safely from multiple
   117  	// processes.
   118  	Saver StateFile `json:"saver"`
   119  
   120  	//
   121  	// Fields below this line are not saved in the state file and will not
   122  	// be preserved across commands.
   123  	//
   124  
   125  	// goferIsChild is set if a gofer process is a child of the current process.
   126  	//
   127  	// This field isn't saved to json, because only a creator of a gofer
   128  	// process will have it as a child process.
   129  	goferIsChild bool
   130  }
   131  
   132  // Args is used to configure a new container.
   133  type Args struct {
   134  	// ID is the container unique identifier.
   135  	ID string
   136  
   137  	// Spec is the OCI spec that describes the container.
   138  	Spec *specs.Spec
   139  
   140  	// BundleDir is the directory containing the container bundle.
   141  	BundleDir string
   142  
   143  	// ConsoleSocket is the path to a unix domain socket that will receive
   144  	// the console FD. It may be empty.
   145  	ConsoleSocket string
   146  
   147  	// PIDFile is the filename where the container's root process PID will be
   148  	// written to. It may be empty.
   149  	PIDFile string
   150  
   151  	// UserLog is the filename to send user-visible logs to. It may be empty.
   152  	//
   153  	// It only applies for the init container.
   154  	UserLog string
   155  
   156  	// Attached indicates that the sandbox lifecycle is attached with the caller.
   157  	// If the caller exits, the sandbox should exit too.
   158  	//
   159  	// It only applies for the init container.
   160  	Attached bool
   161  }
   162  
   163  // New creates the container in a new Sandbox process, unless the metadata
   164  // indicates that an existing Sandbox should be used. The caller must call
   165  // Destroy() on the container.
   166  func New(conf *config.Config, args Args) (*Container, error) {
   167  	log.Debugf("Create container, cid: %s, rootDir: %q", args.ID, conf.RootDir)
   168  	if err := validateID(args.ID); err != nil {
   169  		return nil, err
   170  	}
   171  
   172  	if err := os.MkdirAll(conf.RootDir, 0711); err != nil {
   173  		return nil, fmt.Errorf("creating container root directory %q: %v", conf.RootDir, err)
   174  	}
   175  
   176  	sandboxID := args.ID
   177  	if !isRoot(args.Spec) {
   178  		var ok bool
   179  		sandboxID, ok = specutils.SandboxID(args.Spec)
   180  		if !ok {
   181  			return nil, fmt.Errorf("no sandbox ID found when creating container")
   182  		}
   183  	}
   184  
   185  	c := &Container{
   186  		ID:            args.ID,
   187  		Spec:          args.Spec,
   188  		ConsoleSocket: args.ConsoleSocket,
   189  		BundleDir:     args.BundleDir,
   190  		Status:        Creating,
   191  		CreatedAt:     time.Now(),
   192  		Owner:         os.Getenv("USER"),
   193  		Saver: StateFile{
   194  			RootDir: conf.RootDir,
   195  			ID: FullID{
   196  				SandboxID:   sandboxID,
   197  				ContainerID: args.ID,
   198  			},
   199  		},
   200  	}
   201  	// The Cleanup object cleans up partially created containers when an error
   202  	// occurs. Any errors occurring during cleanup itself are ignored.
   203  	cu := cleanup.Make(func() { _ = c.Destroy() })
   204  	defer cu.Clean()
   205  
   206  	// Lock the container metadata file to prevent concurrent creations of
   207  	// containers with the same id.
   208  	if err := c.Saver.lockForNew(); err != nil {
   209  		return nil, err
   210  	}
   211  	defer c.Saver.unlock()
   212  
   213  	// If the metadata annotations indicate that this container should be started
   214  	// in an existing sandbox, we must do so. These are the possible metadata
   215  	// annotation states:
   216  	//   1. No annotations: it means that there is a single container and this
   217  	//      container is obviously the root. Both container and sandbox share the
   218  	//      ID.
   219  	//   2. Container type == sandbox: it means this is the root container
   220  	//  		starting the sandbox. Both container and sandbox share the same ID.
   221  	//   3. Container type == container: it means this is a subcontainer of an
   222  	//      already started sandbox. In this case, container ID is different than
   223  	//      the sandbox ID.
   224  	if isRoot(args.Spec) {
   225  		log.Debugf("Creating new sandbox for container, cid: %s", args.ID)
   226  
   227  		if args.Spec.Linux == nil {
   228  			args.Spec.Linux = &specs.Linux{}
   229  		}
   230  		// Don't force the use of cgroups in tests because they lack permission to do so.
   231  		if args.Spec.Linux.CgroupsPath == "" && !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
   232  			args.Spec.Linux.CgroupsPath = "/" + args.ID
   233  		}
   234  		// Create and join cgroup before processes are created to ensure they are
   235  		// part of the cgroup from the start (and all their children processes).
   236  		cg, err := cgroup.NewFromSpec(args.Spec)
   237  		if err != nil {
   238  			return nil, err
   239  		}
   240  		if cg != nil {
   241  			// TODO(github.com/SagerNet/issue/3481): Remove when cgroups v2 is supported.
   242  			if !conf.Rootless && cgroup.IsOnlyV2() {
   243  				return nil, fmt.Errorf("cgroups V2 is not yet supported. Enable cgroups V1 and retry")
   244  			}
   245  			// If there is cgroup config, install it before creating sandbox process.
   246  			if err := cg.Install(args.Spec.Linux.Resources); err != nil {
   247  				switch {
   248  				case errors.Is(err, unix.EACCES) && conf.Rootless:
   249  					log.Warningf("Skipping cgroup configuration in rootless mode: %v", err)
   250  					cg = nil
   251  				default:
   252  					return nil, fmt.Errorf("configuring cgroup: %v", err)
   253  				}
   254  			}
   255  		}
   256  		if err := runInCgroup(cg, func() error {
   257  			ioFiles, specFile, err := c.createGoferProcess(args.Spec, conf, args.BundleDir, args.Attached)
   258  			if err != nil {
   259  				return err
   260  			}
   261  
   262  			// Start a new sandbox for this container. Any errors after this point
   263  			// must destroy the container.
   264  			sandArgs := &sandbox.Args{
   265  				ID:            sandboxID,
   266  				Spec:          args.Spec,
   267  				BundleDir:     args.BundleDir,
   268  				ConsoleSocket: args.ConsoleSocket,
   269  				UserLog:       args.UserLog,
   270  				IOFiles:       ioFiles,
   271  				MountsFile:    specFile,
   272  				Cgroup:        cg,
   273  				Attached:      args.Attached,
   274  			}
   275  			sand, err := sandbox.New(conf, sandArgs)
   276  			if err != nil {
   277  				return err
   278  			}
   279  			c.Sandbox = sand
   280  			return nil
   281  
   282  		}); err != nil {
   283  			return nil, err
   284  		}
   285  	} else {
   286  		log.Debugf("Creating new container, cid: %s, sandbox: %s", c.ID, sandboxID)
   287  
   288  		// Find the sandbox associated with this ID.
   289  		fullID := FullID{
   290  			SandboxID:   sandboxID,
   291  			ContainerID: sandboxID,
   292  		}
   293  		sb, err := Load(conf.RootDir, fullID, LoadOpts{Exact: true})
   294  		if err != nil {
   295  			return nil, err
   296  		}
   297  		c.Sandbox = sb.Sandbox
   298  
   299  		// If the console control socket file is provided, then create a new
   300  		// pty master/slave pair and send the TTY to the sandbox process.
   301  		var tty *os.File
   302  		if c.ConsoleSocket != "" {
   303  			// Create a new TTY pair and send the master on the provided socket.
   304  			var err error
   305  			tty, err = console.NewWithSocket(c.ConsoleSocket)
   306  			if err != nil {
   307  				return nil, fmt.Errorf("setting up console with socket %q: %w", c.ConsoleSocket, err)
   308  			}
   309  			// tty file is transferred to the sandbox, then it can be closed here.
   310  			defer tty.Close()
   311  		}
   312  
   313  		if err := c.Sandbox.CreateContainer(c.ID, tty); err != nil {
   314  			return nil, err
   315  		}
   316  	}
   317  	c.changeStatus(Created)
   318  
   319  	// Save the metadata file.
   320  	if err := c.saveLocked(); err != nil {
   321  		return nil, err
   322  	}
   323  
   324  	// Write the PID file. Containerd considers the create complete after
   325  	// this file is created, so it must be the last thing we do.
   326  	if args.PIDFile != "" {
   327  		if err := ioutil.WriteFile(args.PIDFile, []byte(strconv.Itoa(c.SandboxPid())), 0644); err != nil {
   328  			return nil, fmt.Errorf("error writing PID file: %v", err)
   329  		}
   330  	}
   331  
   332  	cu.Release()
   333  	return c, nil
   334  }
   335  
   336  // Start starts running the containerized process inside the sandbox.
   337  func (c *Container) Start(conf *config.Config) error {
   338  	log.Debugf("Start container, cid: %s", c.ID)
   339  
   340  	if err := c.Saver.lock(); err != nil {
   341  		return err
   342  	}
   343  	unlock := cleanup.Make(func() { c.Saver.unlock() })
   344  	defer unlock.Clean()
   345  
   346  	if err := c.requireStatus("start", Created); err != nil {
   347  		return err
   348  	}
   349  
   350  	// "If any prestart hook fails, the runtime MUST generate an error,
   351  	// stop and destroy the container" -OCI spec.
   352  	if c.Spec.Hooks != nil {
   353  		if err := executeHooks(c.Spec.Hooks.Prestart, c.State()); err != nil {
   354  			return err
   355  		}
   356  	}
   357  
   358  	if isRoot(c.Spec) {
   359  		if err := c.Sandbox.StartRoot(c.Spec, conf); err != nil {
   360  			return err
   361  		}
   362  	} else {
   363  		// Join cgroup to start gofer process to ensure it's part of the cgroup from
   364  		// the start (and all their children processes).
   365  		if err := runInCgroup(c.Sandbox.Cgroup, func() error {
   366  			// Create the gofer process.
   367  			goferFiles, mountsFile, err := c.createGoferProcess(c.Spec, conf, c.BundleDir, false)
   368  			if err != nil {
   369  				return err
   370  			}
   371  			defer func() {
   372  				_ = mountsFile.Close()
   373  				for _, f := range goferFiles {
   374  					_ = f.Close()
   375  				}
   376  			}()
   377  
   378  			cleanMounts, err := specutils.ReadMounts(mountsFile)
   379  			if err != nil {
   380  				return fmt.Errorf("reading mounts file: %v", err)
   381  			}
   382  			c.Spec.Mounts = cleanMounts
   383  
   384  			// Setup stdios if the container is not using terminal. Otherwise TTY was
   385  			// already setup in create.
   386  			var stdios []*os.File
   387  			if !c.Spec.Process.Terminal {
   388  				stdios = []*os.File{os.Stdin, os.Stdout, os.Stderr}
   389  			}
   390  
   391  			return c.Sandbox.StartContainer(c.Spec, conf, c.ID, stdios, goferFiles)
   392  		}); err != nil {
   393  			return err
   394  		}
   395  	}
   396  
   397  	// "If any poststart hook fails, the runtime MUST log a warning, but
   398  	// the remaining hooks and lifecycle continue as if the hook had
   399  	// succeeded" -OCI spec.
   400  	if c.Spec.Hooks != nil {
   401  		executeHooksBestEffort(c.Spec.Hooks.Poststart, c.State())
   402  	}
   403  
   404  	c.changeStatus(Running)
   405  	if err := c.saveLocked(); err != nil {
   406  		return err
   407  	}
   408  
   409  	// Release lock before adjusting OOM score because the lock is acquired there.
   410  	unlock.Clean()
   411  
   412  	// Adjust the oom_score_adj for sandbox. This must be done after saveLocked().
   413  	if err := adjustSandboxOOMScoreAdj(c.Sandbox, c.Spec, c.Saver.RootDir, false); err != nil {
   414  		return err
   415  	}
   416  
   417  	// Set container's oom_score_adj to the gofer since it is dedicated to
   418  	// the container, in case the gofer uses up too much memory.
   419  	return c.adjustGoferOOMScoreAdj()
   420  }
   421  
   422  // Restore takes a container and replaces its kernel and file system
   423  // to restore a container from its state file.
   424  func (c *Container) Restore(spec *specs.Spec, conf *config.Config, restoreFile string) error {
   425  	log.Debugf("Restore container, cid: %s", c.ID)
   426  	if err := c.Saver.lock(); err != nil {
   427  		return err
   428  	}
   429  	defer c.Saver.unlock()
   430  
   431  	if err := c.requireStatus("restore", Created); err != nil {
   432  		return err
   433  	}
   434  
   435  	// "If any prestart hook fails, the runtime MUST generate an error,
   436  	// stop and destroy the container" -OCI spec.
   437  	if c.Spec.Hooks != nil {
   438  		if err := executeHooks(c.Spec.Hooks.Prestart, c.State()); err != nil {
   439  			return err
   440  		}
   441  	}
   442  
   443  	if err := c.Sandbox.Restore(c.ID, spec, conf, restoreFile); err != nil {
   444  		return err
   445  	}
   446  	c.changeStatus(Running)
   447  	return c.saveLocked()
   448  }
   449  
   450  // Run is a helper that calls Create + Start + Wait.
   451  func Run(conf *config.Config, args Args) (unix.WaitStatus, error) {
   452  	log.Debugf("Run container, cid: %s, rootDir: %q", args.ID, conf.RootDir)
   453  	c, err := New(conf, args)
   454  	if err != nil {
   455  		return 0, fmt.Errorf("creating container: %v", err)
   456  	}
   457  	// Clean up partially created container if an error occurs.
   458  	// Any errors returned by Destroy() itself are ignored.
   459  	cu := cleanup.Make(func() {
   460  		c.Destroy()
   461  	})
   462  	defer cu.Clean()
   463  
   464  	if conf.RestoreFile != "" {
   465  		log.Debugf("Restore: %v", conf.RestoreFile)
   466  		if err := c.Restore(args.Spec, conf, conf.RestoreFile); err != nil {
   467  			return 0, fmt.Errorf("starting container: %v", err)
   468  		}
   469  	} else {
   470  		if err := c.Start(conf); err != nil {
   471  			return 0, fmt.Errorf("starting container: %v", err)
   472  		}
   473  	}
   474  	if args.Attached {
   475  		return c.Wait()
   476  	}
   477  	cu.Release()
   478  	return 0, nil
   479  }
   480  
   481  // Execute runs the specified command in the container. It returns the PID of
   482  // the newly created process.
   483  func (c *Container) Execute(args *control.ExecArgs) (int32, error) {
   484  	log.Debugf("Execute in container, cid: %s, args: %+v", c.ID, args)
   485  	if err := c.requireStatus("execute in", Created, Running); err != nil {
   486  		return 0, err
   487  	}
   488  	args.ContainerID = c.ID
   489  	return c.Sandbox.Execute(args)
   490  }
   491  
   492  // Event returns events for the container.
   493  func (c *Container) Event() (*boot.EventOut, error) {
   494  	log.Debugf("Getting events for container, cid: %s", c.ID)
   495  	if err := c.requireStatus("get events for", Created, Running, Paused); err != nil {
   496  		return nil, err
   497  	}
   498  	event, err := c.Sandbox.Event(c.ID)
   499  	if err != nil {
   500  		return nil, err
   501  	}
   502  
   503  	// Some stats can utilize host cgroups for accuracy.
   504  	c.populateStats(event)
   505  
   506  	return event, nil
   507  }
   508  
   509  // SandboxPid returns the Pid of the sandbox the container is running in, or -1 if the
   510  // container is not running.
   511  func (c *Container) SandboxPid() int {
   512  	if err := c.requireStatus("get PID", Created, Running, Paused); err != nil {
   513  		return -1
   514  	}
   515  	return c.Sandbox.Pid
   516  }
   517  
   518  // Wait waits for the container to exit, and returns its WaitStatus.
   519  // Call to wait on a stopped container is needed to retrieve the exit status
   520  // and wait returns immediately.
   521  func (c *Container) Wait() (unix.WaitStatus, error) {
   522  	log.Debugf("Wait on container, cid: %s", c.ID)
   523  	ws, err := c.Sandbox.Wait(c.ID)
   524  	if err == nil {
   525  		// Wait succeeded, container is not running anymore.
   526  		c.changeStatus(Stopped)
   527  	}
   528  	return ws, err
   529  }
   530  
   531  // WaitRootPID waits for process 'pid' in the sandbox's PID namespace and
   532  // returns its WaitStatus.
   533  func (c *Container) WaitRootPID(pid int32) (unix.WaitStatus, error) {
   534  	log.Debugf("Wait on process %d in sandbox, cid: %s", pid, c.Sandbox.ID)
   535  	if !c.IsSandboxRunning() {
   536  		return 0, fmt.Errorf("sandbox is not running")
   537  	}
   538  	return c.Sandbox.WaitPID(c.Sandbox.ID, pid)
   539  }
   540  
   541  // WaitPID waits for process 'pid' in the container's PID namespace and returns
   542  // its WaitStatus.
   543  func (c *Container) WaitPID(pid int32) (unix.WaitStatus, error) {
   544  	log.Debugf("Wait on process %d in container, cid: %s", pid, c.ID)
   545  	if !c.IsSandboxRunning() {
   546  		return 0, fmt.Errorf("sandbox is not running")
   547  	}
   548  	return c.Sandbox.WaitPID(c.ID, pid)
   549  }
   550  
   551  // SignalContainer sends the signal to the container. If all is true and signal
   552  // is SIGKILL, then waits for all processes to exit before returning.
   553  // SignalContainer returns an error if the container is already stopped.
   554  // TODO(b/113680494): Distinguish different error types.
   555  func (c *Container) SignalContainer(sig unix.Signal, all bool) error {
   556  	log.Debugf("Signal container, cid: %s, signal: %v (%d)", c.ID, sig, sig)
   557  	// Signaling container in Stopped state is allowed. When all=false,
   558  	// an error will be returned anyway; when all=true, this allows
   559  	// sending signal to other processes inside the container even
   560  	// after the init process exits. This is especially useful for
   561  	// container cleanup.
   562  	if err := c.requireStatus("signal", Running, Stopped); err != nil {
   563  		return err
   564  	}
   565  	if !c.IsSandboxRunning() {
   566  		return fmt.Errorf("sandbox is not running")
   567  	}
   568  	return c.Sandbox.SignalContainer(c.ID, sig, all)
   569  }
   570  
   571  // SignalProcess sends sig to a specific process in the container.
   572  func (c *Container) SignalProcess(sig unix.Signal, pid int32) error {
   573  	log.Debugf("Signal process %d in container, cid: %s, signal: %v (%d)", pid, c.ID, sig, sig)
   574  	if err := c.requireStatus("signal a process inside", Running); err != nil {
   575  		return err
   576  	}
   577  	if !c.IsSandboxRunning() {
   578  		return fmt.Errorf("sandbox is not running")
   579  	}
   580  	return c.Sandbox.SignalProcess(c.ID, int32(pid), sig, false)
   581  }
   582  
   583  // ForwardSignals forwards all signals received by the current process to the
   584  // container process inside the sandbox. It returns a function that will stop
   585  // forwarding signals.
   586  func (c *Container) ForwardSignals(pid int32, fgProcess bool) func() {
   587  	log.Debugf("Forwarding all signals to container, cid: %s, PIDPID: %d, fgProcess: %t", c.ID, pid, fgProcess)
   588  	stop := sighandling.StartSignalForwarding(func(sig linux.Signal) {
   589  		log.Debugf("Forwarding signal %d to container, cid: %s, PID: %d, fgProcess: %t", sig, c.ID, pid, fgProcess)
   590  		if err := c.Sandbox.SignalProcess(c.ID, pid, unix.Signal(sig), fgProcess); err != nil {
   591  			log.Warningf("error forwarding signal %d to container %q: %v", sig, c.ID, err)
   592  		}
   593  	})
   594  	return func() {
   595  		log.Debugf("Done forwarding signals to container, cid: %s, PID: %d, fgProcess: %t", c.ID, pid, fgProcess)
   596  		stop()
   597  	}
   598  }
   599  
   600  // Checkpoint sends the checkpoint call to the container.
   601  // The statefile will be written to f, the file at the specified image-path.
   602  func (c *Container) Checkpoint(f *os.File) error {
   603  	log.Debugf("Checkpoint container, cid: %s", c.ID)
   604  	if err := c.requireStatus("checkpoint", Created, Running, Paused); err != nil {
   605  		return err
   606  	}
   607  	return c.Sandbox.Checkpoint(c.ID, f)
   608  }
   609  
   610  // Pause suspends the container and its kernel.
   611  // The call only succeeds if the container's status is created or running.
   612  func (c *Container) Pause() error {
   613  	log.Debugf("Pausing container, cid: %s", c.ID)
   614  	if err := c.Saver.lock(); err != nil {
   615  		return err
   616  	}
   617  	defer c.Saver.unlock()
   618  
   619  	if c.Status != Created && c.Status != Running {
   620  		return fmt.Errorf("cannot pause container %q in state %v", c.ID, c.Status)
   621  	}
   622  
   623  	if err := c.Sandbox.Pause(c.ID); err != nil {
   624  		return fmt.Errorf("pausing container %q: %v", c.ID, err)
   625  	}
   626  	c.changeStatus(Paused)
   627  	return c.saveLocked()
   628  }
   629  
   630  // Resume unpauses the container and its kernel.
   631  // The call only succeeds if the container's status is paused.
   632  func (c *Container) Resume() error {
   633  	log.Debugf("Resuming container, cid: %s", c.ID)
   634  	if err := c.Saver.lock(); err != nil {
   635  		return err
   636  	}
   637  	defer c.Saver.unlock()
   638  
   639  	if c.Status != Paused {
   640  		return fmt.Errorf("cannot resume container %q in state %v", c.ID, c.Status)
   641  	}
   642  	if err := c.Sandbox.Resume(c.ID); err != nil {
   643  		return fmt.Errorf("resuming container: %v", err)
   644  	}
   645  	c.changeStatus(Running)
   646  	return c.saveLocked()
   647  }
   648  
   649  // State returns the metadata of the container.
   650  func (c *Container) State() specs.State {
   651  	return specs.State{
   652  		Version: specs.Version,
   653  		ID:      c.ID,
   654  		Status:  c.Status.String(),
   655  		Pid:     c.SandboxPid(),
   656  		Bundle:  c.BundleDir,
   657  	}
   658  }
   659  
   660  // Processes retrieves the list of processes and associated metadata inside a
   661  // container.
   662  func (c *Container) Processes() ([]*control.Process, error) {
   663  	if err := c.requireStatus("get processes of", Running, Paused); err != nil {
   664  		return nil, err
   665  	}
   666  	return c.Sandbox.Processes(c.ID)
   667  }
   668  
   669  // Destroy stops all processes and frees all resources associated with the
   670  // container.
   671  func (c *Container) Destroy() error {
   672  	log.Debugf("Destroy container, cid: %s", c.ID)
   673  
   674  	if err := c.Saver.lock(); err != nil {
   675  		return err
   676  	}
   677  	defer func() {
   678  		c.Saver.unlock()
   679  		c.Saver.close()
   680  	}()
   681  
   682  	// Stored for later use as stop() sets c.Sandbox to nil.
   683  	sb := c.Sandbox
   684  
   685  	// We must perform the following cleanup steps:
   686  	// * stop the container and gofer processes,
   687  	// * remove the container filesystem on the host, and
   688  	// * delete the container metadata directory.
   689  	//
   690  	// It's possible for one or more of these steps to fail, but we should
   691  	// do our best to perform all of the cleanups. Hence, we keep a slice
   692  	// of errors return their concatenation.
   693  	var errs []string
   694  	if err := c.stop(); err != nil {
   695  		err = fmt.Errorf("stopping container: %v", err)
   696  		log.Warningf("%v", err)
   697  		errs = append(errs, err.Error())
   698  	}
   699  
   700  	if err := c.Saver.destroy(); err != nil {
   701  		err = fmt.Errorf("deleting container state files: %v", err)
   702  		log.Warningf("%v", err)
   703  		errs = append(errs, err.Error())
   704  	}
   705  
   706  	c.changeStatus(Stopped)
   707  
   708  	// Adjust oom_score_adj for the sandbox. This must be done after the container
   709  	// is stopped and the directory at c.Root is removed.
   710  	//
   711  	// Use 'sb' to tell whether it has been executed before because Destroy must
   712  	// be idempotent.
   713  	if sb != nil {
   714  		if err := adjustSandboxOOMScoreAdj(sb, c.Spec, c.Saver.RootDir, true); err != nil {
   715  			errs = append(errs, err.Error())
   716  		}
   717  	}
   718  
   719  	// "If any poststop hook fails, the runtime MUST log a warning, but the
   720  	// remaining hooks and lifecycle continue as if the hook had
   721  	// succeeded" - OCI spec.
   722  	//
   723  	// Based on the OCI, "The post-stop hooks MUST be called after the container
   724  	// is deleted but before the delete operation returns"
   725  	// Run it here to:
   726  	// 1) Conform to the OCI.
   727  	// 2) Make sure it only runs once, because the root has been deleted, the
   728  	// container can't be loaded again.
   729  	if c.Spec.Hooks != nil {
   730  		executeHooksBestEffort(c.Spec.Hooks.Poststop, c.State())
   731  	}
   732  
   733  	if len(errs) == 0 {
   734  		return nil
   735  	}
   736  	return fmt.Errorf(strings.Join(errs, "\n"))
   737  }
   738  
   739  // saveLocked saves the container metadata to a file.
   740  //
   741  // Precondition: container must be locked with container.lock().
   742  func (c *Container) saveLocked() error {
   743  	log.Debugf("Save container, cid: %s", c.ID)
   744  	if err := c.Saver.saveLocked(c); err != nil {
   745  		return fmt.Errorf("saving container metadata: %v", err)
   746  	}
   747  	return nil
   748  }
   749  
   750  // stop stops the container (for regular containers) or the sandbox (for
   751  // root containers), and waits for the container or sandbox and the gofer
   752  // to stop. If any of them doesn't stop before timeout, an error is returned.
   753  func (c *Container) stop() error {
   754  	var cgroup *cgroup.Cgroup
   755  
   756  	if c.Sandbox != nil {
   757  		log.Debugf("Destroying container, cid: %s", c.ID)
   758  		if err := c.Sandbox.DestroyContainer(c.ID); err != nil {
   759  			return fmt.Errorf("destroying container %q: %v", c.ID, err)
   760  		}
   761  		// Only uninstall cgroup for sandbox stop.
   762  		if c.Sandbox.IsRootContainer(c.ID) {
   763  			cgroup = c.Sandbox.Cgroup
   764  		}
   765  		// Only set sandbox to nil after it has been told to destroy the container.
   766  		c.Sandbox = nil
   767  	}
   768  
   769  	// Try killing gofer if it does not exit with container.
   770  	if c.GoferPid != 0 {
   771  		log.Debugf("Killing gofer for container, cid: %s, PID: %d", c.ID, c.GoferPid)
   772  		if err := unix.Kill(c.GoferPid, unix.SIGKILL); err != nil {
   773  			// The gofer may already be stopped, log the error.
   774  			log.Warningf("Error sending signal %d to gofer %d: %v", unix.SIGKILL, c.GoferPid, err)
   775  		}
   776  	}
   777  
   778  	if err := c.waitForStopped(); err != nil {
   779  		return err
   780  	}
   781  
   782  	// Gofer is running in cgroups, so Cgroup.Uninstall has to be called after it.
   783  	if cgroup != nil {
   784  		if err := cgroup.Uninstall(); err != nil {
   785  			return err
   786  		}
   787  	}
   788  	return nil
   789  }
   790  
   791  func (c *Container) waitForStopped() error {
   792  	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
   793  	defer cancel()
   794  	b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx)
   795  	op := func() error {
   796  		if c.IsSandboxRunning() {
   797  			if err := c.SignalContainer(unix.Signal(0), false); err == nil {
   798  				return fmt.Errorf("container is still running")
   799  			}
   800  		}
   801  		if c.GoferPid == 0 {
   802  			return nil
   803  		}
   804  		if c.goferIsChild {
   805  			// The gofer process is a child of the current process,
   806  			// so we can wait it and collect its zombie.
   807  			wpid, err := unix.Wait4(int(c.GoferPid), nil, unix.WNOHANG, nil)
   808  			if err != nil {
   809  				return fmt.Errorf("error waiting the gofer process: %v", err)
   810  			}
   811  			if wpid == 0 {
   812  				return fmt.Errorf("gofer is still running")
   813  			}
   814  
   815  		} else if err := unix.Kill(c.GoferPid, 0); err == nil {
   816  			return fmt.Errorf("gofer is still running")
   817  		}
   818  		c.GoferPid = 0
   819  		return nil
   820  	}
   821  	return backoff.Retry(op, b)
   822  }
   823  
   824  func (c *Container) createGoferProcess(spec *specs.Spec, conf *config.Config, bundleDir string, attached bool) ([]*os.File, *os.File, error) {
   825  	// Start with the general config flags.
   826  	args := conf.ToFlags()
   827  
   828  	var goferEnds []*os.File
   829  
   830  	// nextFD is the next available file descriptor for the gofer process.
   831  	// It starts at 3 because 0-2 are used by stdin/stdout/stderr.
   832  	nextFD := 3
   833  
   834  	if conf.LogFilename != "" {
   835  		logFile, err := os.OpenFile(conf.LogFilename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
   836  		if err != nil {
   837  			return nil, nil, fmt.Errorf("opening log file %q: %v", conf.LogFilename, err)
   838  		}
   839  		defer logFile.Close()
   840  		goferEnds = append(goferEnds, logFile)
   841  		args = append(args, "--log-fd="+strconv.Itoa(nextFD))
   842  		nextFD++
   843  	}
   844  
   845  	if conf.DebugLog != "" {
   846  		test := ""
   847  		if len(conf.TestOnlyTestNameEnv) != 0 {
   848  			// Fetch test name if one is provided and the test only flag was set.
   849  			if t, ok := specutils.EnvVar(spec.Process.Env, conf.TestOnlyTestNameEnv); ok {
   850  				test = t
   851  			}
   852  		}
   853  		debugLogFile, err := specutils.DebugLogFile(conf.DebugLog, "gofer", test)
   854  		if err != nil {
   855  			return nil, nil, fmt.Errorf("opening debug log file in %q: %v", conf.DebugLog, err)
   856  		}
   857  		defer debugLogFile.Close()
   858  		goferEnds = append(goferEnds, debugLogFile)
   859  		args = append(args, "--debug-log-fd="+strconv.Itoa(nextFD))
   860  		nextFD++
   861  	}
   862  
   863  	args = append(args, "gofer", "--bundle", bundleDir)
   864  
   865  	// Open the spec file to donate to the sandbox.
   866  	specFile, err := specutils.OpenSpec(bundleDir)
   867  	if err != nil {
   868  		return nil, nil, fmt.Errorf("opening spec file: %v", err)
   869  	}
   870  	defer specFile.Close()
   871  	goferEnds = append(goferEnds, specFile)
   872  	args = append(args, "--spec-fd="+strconv.Itoa(nextFD))
   873  	nextFD++
   874  
   875  	// Create pipe that allows gofer to send mount list to sandbox after all paths
   876  	// have been resolved.
   877  	mountsSand, mountsGofer, err := os.Pipe()
   878  	if err != nil {
   879  		return nil, nil, err
   880  	}
   881  	defer mountsGofer.Close()
   882  	goferEnds = append(goferEnds, mountsGofer)
   883  	args = append(args, fmt.Sprintf("--mounts-fd=%d", nextFD))
   884  	nextFD++
   885  
   886  	// Add root mount and then add any other additional mounts.
   887  	mountCount := 1
   888  	for _, m := range spec.Mounts {
   889  		if specutils.Is9PMount(m, conf.VFS2) {
   890  			mountCount++
   891  		}
   892  	}
   893  
   894  	sandEnds := make([]*os.File, 0, mountCount)
   895  	for i := 0; i < mountCount; i++ {
   896  		fds, err := unix.Socketpair(unix.AF_UNIX, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
   897  		if err != nil {
   898  			return nil, nil, err
   899  		}
   900  		sandEnds = append(sandEnds, os.NewFile(uintptr(fds[0]), "sandbox IO FD"))
   901  
   902  		goferEnd := os.NewFile(uintptr(fds[1]), "gofer IO FD")
   903  		defer goferEnd.Close()
   904  		goferEnds = append(goferEnds, goferEnd)
   905  
   906  		args = append(args, fmt.Sprintf("--io-fds=%d", nextFD))
   907  		nextFD++
   908  	}
   909  
   910  	binPath := specutils.ExePath
   911  	cmd := exec.Command(binPath, args...)
   912  	cmd.ExtraFiles = goferEnds
   913  	cmd.Args[0] = "runsc-gofer"
   914  
   915  	if attached {
   916  		// The gofer is attached to the lifetime of this process, so it
   917  		// should synchronously die when this process dies.
   918  		cmd.SysProcAttr = &unix.SysProcAttr{
   919  			Pdeathsig: unix.SIGKILL,
   920  		}
   921  	}
   922  
   923  	// Enter new namespaces to isolate from the rest of the system. Don't unshare
   924  	// cgroup because gofer is added to a cgroup in the caller's namespace.
   925  	nss := []specs.LinuxNamespace{
   926  		{Type: specs.IPCNamespace},
   927  		{Type: specs.MountNamespace},
   928  		{Type: specs.NetworkNamespace},
   929  		{Type: specs.PIDNamespace},
   930  		{Type: specs.UTSNamespace},
   931  	}
   932  
   933  	// Setup any uid/gid mappings, and create or join the configured user
   934  	// namespace so the gofer's view of the filesystem aligns with the
   935  	// users in the sandbox.
   936  	userNS := specutils.FilterNS([]specs.LinuxNamespaceType{specs.UserNamespace}, spec)
   937  	nss = append(nss, userNS...)
   938  	specutils.SetUIDGIDMappings(cmd, spec)
   939  	if len(userNS) != 0 {
   940  		// We need to set UID and GID to have capabilities in a new user namespace.
   941  		cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: 0}
   942  	}
   943  
   944  	// Start the gofer in the given namespace.
   945  	log.Debugf("Starting gofer: %s %v", binPath, args)
   946  	if err := specutils.StartInNS(cmd, nss); err != nil {
   947  		return nil, nil, fmt.Errorf("gofer: %v", err)
   948  	}
   949  	log.Infof("Gofer started, PID: %d", cmd.Process.Pid)
   950  	c.GoferPid = cmd.Process.Pid
   951  	c.goferIsChild = true
   952  	return sandEnds, mountsSand, nil
   953  }
   954  
   955  // changeStatus transitions from one status to another ensuring that the
   956  // transition is valid.
   957  func (c *Container) changeStatus(s Status) {
   958  	switch s {
   959  	case Creating:
   960  		// Initial state, never transitions to it.
   961  		panic(fmt.Sprintf("invalid state transition: %v => %v", c.Status, s))
   962  
   963  	case Created:
   964  		if c.Status != Creating {
   965  			panic(fmt.Sprintf("invalid state transition: %v => %v", c.Status, s))
   966  		}
   967  		if c.Sandbox == nil {
   968  			panic("sandbox cannot be nil")
   969  		}
   970  
   971  	case Paused:
   972  		if c.Status != Running {
   973  			panic(fmt.Sprintf("invalid state transition: %v => %v", c.Status, s))
   974  		}
   975  		if c.Sandbox == nil {
   976  			panic("sandbox cannot be nil")
   977  		}
   978  
   979  	case Running:
   980  		if c.Status != Created && c.Status != Paused {
   981  			panic(fmt.Sprintf("invalid state transition: %v => %v", c.Status, s))
   982  		}
   983  		if c.Sandbox == nil {
   984  			panic("sandbox cannot be nil")
   985  		}
   986  
   987  	case Stopped:
   988  		if c.Status != Creating && c.Status != Created && c.Status != Running && c.Status != Stopped {
   989  			panic(fmt.Sprintf("invalid state transition: %v => %v", c.Status, s))
   990  		}
   991  
   992  	default:
   993  		panic(fmt.Sprintf("invalid new state: %v", s))
   994  	}
   995  	c.Status = s
   996  }
   997  
   998  // IsSandboxRunning returns true if the sandbox exists and is running.
   999  func (c *Container) IsSandboxRunning() bool {
  1000  	return c.Sandbox != nil && c.Sandbox.IsRunning()
  1001  }
  1002  
  1003  func (c *Container) requireStatus(action string, statuses ...Status) error {
  1004  	for _, s := range statuses {
  1005  		if c.Status == s {
  1006  			return nil
  1007  		}
  1008  	}
  1009  	return fmt.Errorf("cannot %s container %q in state %s", action, c.ID, c.Status)
  1010  }
  1011  
  1012  func isRoot(spec *specs.Spec) bool {
  1013  	return specutils.SpecContainerType(spec) != specutils.ContainerTypeContainer
  1014  }
  1015  
  1016  // runInCgroup executes fn inside the specified cgroup. If cg is nil, execute
  1017  // it in the current context.
  1018  func runInCgroup(cg *cgroup.Cgroup, fn func() error) error {
  1019  	if cg == nil {
  1020  		return fn()
  1021  	}
  1022  	restore, err := cg.Join()
  1023  	defer restore()
  1024  	if err != nil {
  1025  		return err
  1026  	}
  1027  	return fn()
  1028  }
  1029  
  1030  // adjustGoferOOMScoreAdj sets the oom_store_adj for the container's gofer.
  1031  func (c *Container) adjustGoferOOMScoreAdj() error {
  1032  	if c.GoferPid == 0 || c.Spec.Process.OOMScoreAdj == nil {
  1033  		return nil
  1034  	}
  1035  	return setOOMScoreAdj(c.GoferPid, *c.Spec.Process.OOMScoreAdj)
  1036  }
  1037  
  1038  // adjustSandboxOOMScoreAdj sets the oom_score_adj for the sandbox.
  1039  // oom_score_adj is set to the lowest oom_score_adj among the containers
  1040  // running in the sandbox.
  1041  //
  1042  // TODO(github.com/SagerNet/issue/238): This call could race with other containers being
  1043  // created at the same time and end up setting the wrong oom_score_adj to the
  1044  // sandbox. Use rpc client to synchronize.
  1045  func adjustSandboxOOMScoreAdj(s *sandbox.Sandbox, spec *specs.Spec, rootDir string, destroy bool) error {
  1046  	// Adjustment can be skipped if the root container is exiting, because it
  1047  	// brings down the entire sandbox.
  1048  	if isRoot(spec) && destroy {
  1049  		return nil
  1050  	}
  1051  
  1052  	containers, err := loadSandbox(rootDir, s.ID)
  1053  	if err != nil {
  1054  		return fmt.Errorf("loading sandbox containers: %v", err)
  1055  	}
  1056  
  1057  	// Do nothing if the sandbox has been terminated.
  1058  	if len(containers) == 0 {
  1059  		return nil
  1060  	}
  1061  
  1062  	// Get the lowest score for all containers.
  1063  	var lowScore int
  1064  	scoreFound := false
  1065  	for _, container := range containers {
  1066  		// Special multi-container support for CRI. Ignore the root container when
  1067  		// calculating oom_score_adj for the sandbox because it is the
  1068  		// infrastructure (pause) container and always has a very low oom_score_adj.
  1069  		//
  1070  		// We will use OOMScoreAdj in the single-container case where the
  1071  		// containerd container-type annotation is not present.
  1072  		if specutils.SpecContainerType(container.Spec) == specutils.ContainerTypeSandbox {
  1073  			continue
  1074  		}
  1075  
  1076  		if container.Spec.Process.OOMScoreAdj != nil && (!scoreFound || *container.Spec.Process.OOMScoreAdj < lowScore) {
  1077  			scoreFound = true
  1078  			lowScore = *container.Spec.Process.OOMScoreAdj
  1079  		}
  1080  	}
  1081  
  1082  	// If the container is destroyed and remaining containers have no
  1083  	// oomScoreAdj specified then we must revert to the original oom_score_adj
  1084  	// saved with the root container.
  1085  	if !scoreFound && destroy {
  1086  		lowScore = containers[0].Sandbox.OriginalOOMScoreAdj
  1087  		scoreFound = true
  1088  	}
  1089  
  1090  	// Only set oom_score_adj if one of the containers has oom_score_adj set. If
  1091  	// not, oom_score_adj is inherited from the parent process.
  1092  	//
  1093  	// See: https://github.com/opencontainers/runtime-spec/blob/master/config.md#linux-process
  1094  	if !scoreFound {
  1095  		return nil
  1096  	}
  1097  
  1098  	// Set the lowest of all containers oom_score_adj to the sandbox.
  1099  	return setOOMScoreAdj(s.Pid, lowScore)
  1100  }
  1101  
  1102  // setOOMScoreAdj sets oom_score_adj to the given value for the given PID.
  1103  // /proc must be available and mounted read-write. scoreAdj should be between
  1104  // -1000 and 1000. It's a noop if the process has already exited.
  1105  func setOOMScoreAdj(pid int, scoreAdj int) error {
  1106  	f, err := os.OpenFile(fmt.Sprintf("/proc/%d/oom_score_adj", pid), os.O_WRONLY, 0644)
  1107  	if err != nil {
  1108  		// Ignore NotExist errors because it can race with process exit.
  1109  		if os.IsNotExist(err) {
  1110  			log.Warningf("Process (%d) not found setting oom_score_adj", pid)
  1111  			return nil
  1112  		}
  1113  		return err
  1114  	}
  1115  	defer f.Close()
  1116  	if _, err := f.WriteString(strconv.Itoa(scoreAdj)); err != nil {
  1117  		if errors.Is(err, unix.ESRCH) {
  1118  			log.Warningf("Process (%d) exited while setting oom_score_adj", pid)
  1119  			return nil
  1120  		}
  1121  		return fmt.Errorf("setting oom_score_adj to %q: %v", scoreAdj, err)
  1122  	}
  1123  	return nil
  1124  }
  1125  
  1126  // populateStats populates event with stats estimates based on cgroups and the
  1127  // sentry's accounting.
  1128  // TODO(github.com/SagerNet/issue/172): This is an estimation; we should do more
  1129  // detailed accounting.
  1130  func (c *Container) populateStats(event *boot.EventOut) {
  1131  	// The events command, when run for all running containers, should
  1132  	// account for the full cgroup CPU usage. We split cgroup usage
  1133  	// proportionally according to the sentry-internal usage measurements,
  1134  	// only counting Running containers.
  1135  	log.Debugf("event.ContainerUsage: %v", event.ContainerUsage)
  1136  	var containerUsage uint64
  1137  	var allContainersUsage uint64
  1138  	for ID, usage := range event.ContainerUsage {
  1139  		allContainersUsage += usage
  1140  		if ID == c.ID {
  1141  			containerUsage = usage
  1142  		}
  1143  	}
  1144  
  1145  	cgroup, err := c.Sandbox.NewCGroup()
  1146  	if err != nil {
  1147  		// No cgroup, so rely purely on the sentry's accounting.
  1148  		log.Warningf("events: no cgroups")
  1149  		event.Event.Data.CPU.Usage.Total = containerUsage
  1150  		return
  1151  	}
  1152  
  1153  	// Get the host cgroup CPU usage.
  1154  	cgroupsUsage, err := cgroup.CPUUsage()
  1155  	if err != nil {
  1156  		// No cgroup usage, so rely purely on the sentry's accounting.
  1157  		log.Warningf("events: failed when getting cgroup CPU usage for container: %v", err)
  1158  		event.Event.Data.CPU.Usage.Total = containerUsage
  1159  		return
  1160  	}
  1161  
  1162  	// If the sentry reports no CPU usage, fall back on cgroups and split usage
  1163  	// equally across containers.
  1164  	if allContainersUsage == 0 {
  1165  		log.Warningf("events: no sentry CPU usage reported")
  1166  		allContainersUsage = cgroupsUsage
  1167  		containerUsage = cgroupsUsage / uint64(len(event.ContainerUsage))
  1168  	}
  1169  
  1170  	// Scaling can easily overflow a uint64 (e.g. a containerUsage and
  1171  	// cgroupsUsage of 16 seconds each will overflow), so use floats.
  1172  	total := float64(containerUsage) * (float64(cgroupsUsage) / float64(allContainersUsage))
  1173  	log.Debugf("Usage, container: %d, cgroups: %d, all: %d, total: %.0f", containerUsage, cgroupsUsage, allContainersUsage, total)
  1174  	event.Event.Data.CPU.Usage.Total = uint64(total)
  1175  	return
  1176  }