github.com/kata-containers/runtime@v0.0.0-20210505125100-04f29832a923/virtcontainers/pkg/cgroups/manager.go (about)

     1  // Copyright (c) 2020 Intel Corporation
     2  //
     3  // SPDX-License-Identifier: Apache-2.0
     4  //
     5  
     6  package cgroups
     7  
     8  import (
     9  	"bufio"
    10  	"fmt"
    11  	"io/ioutil"
    12  	"os"
    13  	"path/filepath"
    14  	"strconv"
    15  	"strings"
    16  	"sync"
    17  
    18  	"github.com/kata-containers/runtime/virtcontainers/pkg/rootless"
    19  	libcontcgroups "github.com/opencontainers/runc/libcontainer/cgroups"
    20  	libcontcgroupsfs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
    21  	libcontcgroupssystemd "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
    22  	"github.com/opencontainers/runc/libcontainer/configs"
    23  	"github.com/opencontainers/runc/libcontainer/specconv"
    24  	"github.com/opencontainers/runtime-spec/specs-go"
    25  	"github.com/sirupsen/logrus"
    26  )
    27  
    28  type Config struct {
    29  	// Cgroups specifies specific cgroup settings for the various subsystems that the container is
    30  	// placed into to limit the resources the container has available
    31  	// If nil, New() will create one.
    32  	Cgroups *configs.Cgroup
    33  
    34  	// CgroupPaths contains paths to all the cgroups setup for a container. Key is cgroup subsystem name
    35  	// with the value as the path.
    36  	CgroupPaths map[string]string
    37  
    38  	// Resources represents the runtime resource constraints
    39  	Resources specs.LinuxResources
    40  
    41  	// CgroupPath is the OCI spec cgroup path
    42  	CgroupPath string
    43  }
    44  
    45  type Manager struct {
    46  	sync.Mutex
    47  	mgr libcontcgroups.Manager
    48  }
    49  
    50  const (
    51  	// file in the cgroup that contains the pids
    52  	cgroupProcs = "cgroup.procs"
    53  )
    54  
    55  var (
    56  	cgroupsLogger = logrus.WithField("source", "virtcontainers/pkg/cgroups")
    57  )
    58  
    59  // SetLogger sets up a logger for this pkg
    60  func SetLogger(logger *logrus.Entry) {
    61  	fields := cgroupsLogger.Data
    62  
    63  	cgroupsLogger = logger.WithFields(fields)
    64  }
    65  
    66  // returns the list of devices that a hypervisor may need
    67  func hypervisorDevices() []specs.LinuxDeviceCgroup {
    68  	devices := []specs.LinuxDeviceCgroup{}
    69  
    70  	// Processes running in a device-cgroup are constrained, they have acccess
    71  	// only to the devices listed in the devices.list file.
    72  	// In order to run Virtual Machines and create virtqueues, hypervisors
    73  	// need access to certain character devices in the host, like kvm and vhost-net.
    74  	hypervisorDevices := []string{
    75  		"/dev/kvm",       // To run virtual machines
    76  		"/dev/vhost-net", // To create virtqueues
    77  		"/dev/vfio/vfio", // To access VFIO devices
    78  	}
    79  
    80  	for _, device := range hypervisorDevices {
    81  		ldevice, err := DeviceToLinuxDevice(device)
    82  		if err != nil {
    83  			cgroupsLogger.WithError(err).Warnf("Could not get device information")
    84  			continue
    85  		}
    86  		devices = append(devices, ldevice)
    87  	}
    88  
    89  	return devices
    90  }
    91  
    92  // New creates a new CgroupManager
    93  func New(config *Config) (*Manager, error) {
    94  	var err error
    95  
    96  	devices := config.Resources.Devices
    97  	devices = append(devices, hypervisorDevices()...)
    98  	// Do not modify original devices
    99  	config.Resources.Devices = devices
   100  
   101  	newSpec := specs.Spec{
   102  		Linux: &specs.Linux{
   103  			Resources: &config.Resources,
   104  		},
   105  	}
   106  
   107  	rootless := rootless.IsRootless()
   108  
   109  	cgroups := config.Cgroups
   110  	cgroupPaths := config.CgroupPaths
   111  
   112  	// determine if we are utilizing systemd managed cgroups based on the path provided
   113  	useSystemdCgroup := IsSystemdCgroup(config.CgroupPath)
   114  
   115  	// Create a new cgroup if the current one is nil
   116  	// this cgroups must be saved later
   117  	if cgroups == nil {
   118  		if config.CgroupPath == "" && !rootless {
   119  			cgroupsLogger.Warn("cgroups have not been created and cgroup path is empty")
   120  		}
   121  
   122  		newSpec.Linux.CgroupsPath, err = ValidCgroupPath(config.CgroupPath, useSystemdCgroup)
   123  		if err != nil {
   124  			return nil, fmt.Errorf("Invalid cgroup path: %v", err)
   125  		}
   126  
   127  		if cgroups, err = specconv.CreateCgroupConfig(&specconv.CreateOpts{
   128  			// cgroup name is taken from spec
   129  			CgroupName:       "",
   130  			UseSystemdCgroup: useSystemdCgroup,
   131  			Spec:             &newSpec,
   132  			RootlessCgroups:  rootless,
   133  		}); err != nil {
   134  			return nil, fmt.Errorf("Could not create cgroup config: %v", err)
   135  		}
   136  	}
   137  
   138  	// Set cgroupPaths to nil when the map is empty, it can and will be
   139  	// populated by `Manager.Apply()` when the runtime or any other process
   140  	// is moved to the cgroup.
   141  	if len(cgroupPaths) == 0 {
   142  		cgroupPaths = nil
   143  	}
   144  
   145  	if useSystemdCgroup {
   146  		systemdCgroupFunc, err := libcontcgroupssystemd.NewSystemdCgroupsManager()
   147  		if err != nil {
   148  			return nil, fmt.Errorf("Could not create systemd cgroup manager: %v", err)
   149  		}
   150  		libcontcgroupssystemd.UseSystemd()
   151  		return &Manager{
   152  			mgr: systemdCgroupFunc(cgroups, cgroupPaths),
   153  		}, nil
   154  	}
   155  
   156  	return &Manager{
   157  		mgr: &libcontcgroupsfs.Manager{
   158  			Cgroups:  cgroups,
   159  			Rootless: rootless,
   160  			Paths:    cgroupPaths,
   161  		},
   162  	}, nil
   163  }
   164  
   165  // read all the pids in cgroupPath
   166  func readPids(cgroupPath string) ([]int, error) {
   167  	pids := []int{}
   168  	f, err := os.Open(filepath.Join(cgroupPath, cgroupProcs))
   169  	if err != nil {
   170  		return nil, err
   171  	}
   172  	defer f.Close()
   173  	buf := bufio.NewScanner(f)
   174  
   175  	for buf.Scan() {
   176  		if t := buf.Text(); t != "" {
   177  			pid, err := strconv.Atoi(t)
   178  			if err != nil {
   179  				return nil, err
   180  			}
   181  			pids = append(pids, pid)
   182  		}
   183  	}
   184  	return pids, nil
   185  }
   186  
   187  // write the pids into cgroup.procs
   188  func writePids(pids []int, cgroupPath string) error {
   189  	cgroupProcsPath := filepath.Join(cgroupPath, cgroupProcs)
   190  	for _, pid := range pids {
   191  		if err := ioutil.WriteFile(cgroupProcsPath,
   192  			[]byte(strconv.Itoa(pid)),
   193  			os.FileMode(0),
   194  		); err != nil {
   195  			return err
   196  		}
   197  	}
   198  	return nil
   199  }
   200  
   201  func (m *Manager) logger() *logrus.Entry {
   202  	return cgroupsLogger.WithField("source", "cgroup-manager")
   203  }
   204  
   205  // move all the processes in the current cgroup to the parent
   206  func (m *Manager) moveToParent() error {
   207  	m.Lock()
   208  	defer m.Unlock()
   209  	for _, cgroupPath := range m.mgr.GetPaths() {
   210  
   211  		pids, err := readPids(cgroupPath)
   212  		// possible that the cgroupPath doesn't exist. If so, skip:
   213  		if os.IsNotExist(err) {
   214  			// The cgroup is not present on the filesystem: no pids to move. The systemd cgroup
   215  			// manager lists all of the subsystems, including those that are not actually being managed.
   216  			continue
   217  		}
   218  		if err != nil {
   219  			return err
   220  		}
   221  
   222  		if len(pids) == 0 {
   223  			// no pids in this cgroup
   224  			continue
   225  		}
   226  
   227  		cgroupParentPath := filepath.Dir(filepath.Clean(cgroupPath))
   228  		if err = writePids(pids, cgroupParentPath); err != nil {
   229  			if !strings.Contains(err.Error(), "no such process") {
   230  				return err
   231  			}
   232  		}
   233  	}
   234  	return nil
   235  }
   236  
   237  // Add pid to cgroups
   238  func (m *Manager) Add(pid int) error {
   239  	if rootless.IsRootless() {
   240  		m.logger().Debug("Unable to setup add pids to cgroup: running rootless")
   241  		return nil
   242  	}
   243  
   244  	m.Lock()
   245  	defer m.Unlock()
   246  	return m.mgr.Apply(pid)
   247  }
   248  
   249  // Apply constraints
   250  func (m *Manager) Apply() error {
   251  	if rootless.IsRootless() {
   252  		m.logger().Debug("Unable to apply constraints: running rootless")
   253  		return nil
   254  	}
   255  
   256  	cgroups, err := m.GetCgroups()
   257  	if err != nil {
   258  		return err
   259  	}
   260  
   261  	m.Lock()
   262  	defer m.Unlock()
   263  	return m.mgr.Set(&configs.Config{
   264  		Cgroups: cgroups,
   265  	})
   266  }
   267  
   268  func (m *Manager) GetCgroups() (*configs.Cgroup, error) {
   269  	m.Lock()
   270  	defer m.Unlock()
   271  	return m.mgr.GetCgroups()
   272  }
   273  
   274  func (m *Manager) GetPaths() map[string]string {
   275  	m.Lock()
   276  	defer m.Unlock()
   277  	return m.mgr.GetPaths()
   278  }
   279  
   280  func (m *Manager) Destroy() error {
   281  	// cgroup can't be destroyed if it contains running processes
   282  	if err := m.moveToParent(); err != nil {
   283  		// If the process migration to the parent cgroup fails, then
   284  		// we expect the Destroy to fail as well. Let's log an error here
   285  		// and attempt to execute the Destroy still to help cleanup the hosts' FS.
   286  		m.logger().WithError(err).Error("Could not move processes into parent cgroup")
   287  	}
   288  
   289  	m.Lock()
   290  	defer m.Unlock()
   291  	return m.mgr.Destroy()
   292  }
   293  
   294  // AddDevice adds a device to the device cgroup
   295  func (m *Manager) AddDevice(device string) error {
   296  	cgroups, err := m.GetCgroups()
   297  	if err != nil {
   298  		return err
   299  	}
   300  
   301  	ld, err := DeviceToCgroupDevice(device)
   302  	if err != nil {
   303  		return err
   304  	}
   305  
   306  	m.Lock()
   307  	cgroups.Devices = append(cgroups.Devices, ld)
   308  	m.Unlock()
   309  
   310  	return m.Apply()
   311  }
   312  
   313  // RemoceDevice removed a device from the device cgroup
   314  func (m *Manager) RemoveDevice(device string) error {
   315  	cgroups, err := m.GetCgroups()
   316  	if err != nil {
   317  		return err
   318  	}
   319  
   320  	m.Lock()
   321  	for i, d := range cgroups.Devices {
   322  		if d.Path == device {
   323  			cgroups.Devices = append(cgroups.Devices[:i], cgroups.Devices[i+1:]...)
   324  			m.Unlock()
   325  			return m.Apply()
   326  		}
   327  	}
   328  	m.Unlock()
   329  	return fmt.Errorf("device %v not found in the cgroup", device)
   330  }
   331  
   332  func (m *Manager) SetCPUSet(cpuset, memset string) error {
   333  	cgroups, err := m.GetCgroups()
   334  	if err != nil {
   335  		return err
   336  	}
   337  
   338  	m.Lock()
   339  	cgroups.CpusetCpus = cpuset
   340  	cgroups.CpusetMems = memset
   341  	m.Unlock()
   342  
   343  	return m.Apply()
   344  }