gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/runsc/boot/events.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package boot
    16  
    17  import (
    18  	"fmt"
    19  	"strconv"
    20  
    21  	"gvisor.dev/gvisor/pkg/log"
    22  	"gvisor.dev/gvisor/pkg/sentry/control"
    23  	"gvisor.dev/gvisor/pkg/sentry/usage"
    24  )
    25  
    26  // NetworkInterface is the network statistics of the particular network interface
    27  type NetworkInterface struct {
    28  	// Name is the name of the network interface.
    29  	Name      string
    30  	RxBytes   uint64
    31  	RxPackets uint64
    32  	RxErrors  uint64
    33  	RxDropped uint64
    34  	TxBytes   uint64
    35  	TxPackets uint64
    36  	TxErrors  uint64
    37  	TxDropped uint64
    38  }
    39  
    40  // EventOut is the return type of the Event command.
    41  type EventOut struct {
    42  	Event Event `json:"event"`
    43  
    44  	// ContainerUsage maps each container ID to its total CPU usage.
    45  	ContainerUsage map[string]uint64 `json:"containerUsage"`
    46  }
    47  
    48  // Event struct for encoding the event data to JSON. Corresponds to runc's
    49  // main.event struct.
    50  type Event struct {
    51  	Type string `json:"type"`
    52  	ID   string `json:"id"`
    53  	Data Stats  `json:"data"`
    54  }
    55  
    56  // Stats is the runc specific stats structure for stability when encoding and
    57  // decoding stats.
    58  type Stats struct {
    59  	CPU               CPU                 `json:"cpu"`
    60  	Memory            Memory              `json:"memory"`
    61  	Pids              Pids                `json:"pids"`
    62  	NetworkInterfaces []*NetworkInterface `json:"network_interfaces"`
    63  }
    64  
    65  // Pids contains stats on processes.
    66  type Pids struct {
    67  	Current uint64 `json:"current,omitempty"`
    68  	Limit   uint64 `json:"limit,omitempty"`
    69  }
    70  
    71  // MemoryEntry contains stats on a kind of memory.
    72  type MemoryEntry struct {
    73  	Limit   uint64 `json:"limit"`
    74  	Usage   uint64 `json:"usage,omitempty"`
    75  	Max     uint64 `json:"max,omitempty"`
    76  	Failcnt uint64 `json:"failcnt"`
    77  }
    78  
    79  // Memory contains stats on memory.
    80  type Memory struct {
    81  	Cache     uint64            `json:"cache,omitempty"`
    82  	Usage     MemoryEntry       `json:"usage,omitempty"`
    83  	Swap      MemoryEntry       `json:"swap,omitempty"`
    84  	Kernel    MemoryEntry       `json:"kernel,omitempty"`
    85  	KernelTCP MemoryEntry       `json:"kernelTCP,omitempty"`
    86  	Raw       map[string]uint64 `json:"raw,omitempty"`
    87  }
    88  
    89  // CPU contains stats on the CPU.
    90  type CPU struct {
    91  	Usage CPUUsage `json:"usage"`
    92  }
    93  
    94  // CPUUsage contains stats on CPU usage.
    95  type CPUUsage struct {
    96  	Kernel uint64   `json:"kernel,omitempty"`
    97  	User   uint64   `json:"user,omitempty"`
    98  	Total  uint64   `json:"total,omitempty"`
    99  	PerCPU []uint64 `json:"percpu,omitempty"`
   100  }
   101  
   102  func (cm *containerManager) getUsageFromCgroups(file control.CgroupControlFile) (uint64, error) {
   103  	var out control.CgroupsResults
   104  	args := control.CgroupsReadArgs{
   105  		Args: []control.CgroupsReadArg{
   106  			{
   107  				File: file,
   108  			},
   109  		},
   110  	}
   111  	cgroups := control.Cgroups{Kernel: cm.l.k}
   112  	if err := cgroups.ReadControlFiles(&args, &out); err != nil {
   113  		return 0, err
   114  	}
   115  	if len(out.Results) != 1 {
   116  		return 0, fmt.Errorf("expected 1 result, got %d, raw: %+v", len(out.Results), out)
   117  	}
   118  	val, err := out.Results[0].Unpack()
   119  	if err != nil {
   120  		return 0, err
   121  	}
   122  	usage, err := strconv.ParseUint(val, 10, 64)
   123  	if err != nil {
   124  		return 0, err
   125  	}
   126  	return usage, nil
   127  }
   128  
   129  // Event gets the events from the container.
   130  func (cm *containerManager) Event(cid *string, out *EventOut) error {
   131  	*out = EventOut{
   132  		Event: Event{
   133  			ID:   *cid,
   134  			Type: "stats",
   135  		},
   136  	}
   137  
   138  	// PIDs and check that container exists before going further.
   139  	pids, err := cm.l.pidsCount(*cid)
   140  	if err != nil {
   141  		return err
   142  	}
   143  	out.Event.Data.Pids.Current = uint64(pids)
   144  
   145  	networkStats, err := cm.l.networkStats()
   146  	if err != nil {
   147  		return err
   148  	}
   149  	out.Event.Data.NetworkInterfaces = networkStats
   150  
   151  	numContainers := cm.l.containerCount()
   152  	if numContainers == 0 {
   153  		return fmt.Errorf("no container was found")
   154  	}
   155  
   156  	// Memory usage.
   157  	memFile := control.CgroupControlFile{"memory", "/" + *cid, "memory.usage_in_bytes"}
   158  	memUsage, err := cm.getUsageFromCgroups(memFile)
   159  	if err != nil {
   160  		// Cgroups is not installed or there was an error to get usage
   161  		// from the cgroups. Fall back to the old method of getting the
   162  		// usage from the sentry.
   163  		log.Warningf("could not get container memory usage from cgroups, error:  %v", err)
   164  
   165  		mem := cm.l.k.MemoryFile()
   166  		_ = mem.UpdateUsage(nil) // best effort to update.
   167  		_, totalUsage := usage.MemoryAccounting.Copy()
   168  		if numContainers == 1 {
   169  			memUsage = totalUsage
   170  		} else {
   171  			// In the multi-container case, reports 0 for the root (pause)
   172  			// container, since it's small and idle. Then equally split the
   173  			// usage to the other containers. At least the sum of all
   174  			// containers will correctly account for the memory used by the
   175  			// sandbox.
   176  			if *cid == cm.l.sandboxID {
   177  				memUsage = 0
   178  			} else {
   179  				memUsage = totalUsage / uint64(numContainers-1)
   180  			}
   181  		}
   182  	}
   183  	out.Event.Data.Memory.Usage.Usage = memUsage
   184  
   185  	// CPU usage by container.
   186  	cpuacctFile := control.CgroupControlFile{"cpuacct", "/" + *cid, "cpuacct.usage"}
   187  	if cpuUsage, err := cm.getUsageFromCgroups(cpuacctFile); err != nil {
   188  		// Cgroups is not installed or there was an error to get usage
   189  		// from the cgroups. Fall back to the old method of getting the
   190  		// usage from the sentry and host cgroups.
   191  		log.Warningf("could not get container cpu usage from cgroups, error:  %v", err)
   192  
   193  		out.ContainerUsage = control.ContainerUsage(cm.l.k)
   194  	} else {
   195  		out.Event.Data.CPU.Usage.Total = cpuUsage
   196  	}
   197  	return nil
   198  }