github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/libcontainer/cgroups/systemd/common.go (about)

     1  package systemd
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"math"
     8  	"os"
     9  	"strconv"
    10  	"strings"
    11  	"sync"
    12  	"time"
    13  
    14  	systemdDbus "github.com/coreos/go-systemd/v22/dbus"
    15  	dbus "github.com/godbus/dbus/v5"
    16  	"github.com/sirupsen/logrus"
    17  
    18  	"github.com/opencontainers/runc/libcontainer/cgroups"
    19  	"github.com/opencontainers/runc/libcontainer/configs"
    20  )
    21  
    22  const (
    23  	// Default kernel value for cpu quota period is 100000 us (100 ms), same for v1 and v2.
    24  	// v1: https://www.kernel.org/doc/html/latest/scheduler/sched-bwc.html and
    25  	// v2: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
    26  	defCPUQuotaPeriod = uint64(100000)
    27  )
    28  
    29  var (
    30  	versionOnce sync.Once
    31  	version     int
    32  
    33  	isRunningSystemdOnce sync.Once
    34  	isRunningSystemd     bool
    35  
    36  	// GenerateDeviceProps is a function to generate systemd device
    37  	// properties, used by Set methods. Unless
    38  	// [github.com/opencontainers/runc/libcontainer/cgroups/devices]
    39  	// package is imported, it is set to nil, so cgroup managers can't
    40  	// configure devices.
    41  	GenerateDeviceProps func(r *configs.Resources, sdVer int) ([]systemdDbus.Property, error)
    42  )
    43  
    44  // NOTE: This function comes from package github.com/coreos/go-systemd/util
    45  // It was borrowed here to avoid a dependency on cgo.
    46  //
    47  // IsRunningSystemd checks whether the host was booted with systemd as its init
    48  // system. This functions similarly to systemd's `sd_booted(3)`: internally, it
    49  // checks whether /run/systemd/system/ exists and is a directory.
    50  // http://www.freedesktop.org/software/systemd/man/sd_booted.html
    51  func IsRunningSystemd() bool {
    52  	isRunningSystemdOnce.Do(func() {
    53  		fi, err := os.Lstat("/run/systemd/system")
    54  		isRunningSystemd = err == nil && fi.IsDir()
    55  	})
    56  	return isRunningSystemd
    57  }
    58  
    59  // systemd represents slice hierarchy using `-`, so we need to follow suit when
    60  // generating the path of slice. Essentially, test-a-b.slice becomes
    61  // /test.slice/test-a.slice/test-a-b.slice.
    62  func ExpandSlice(slice string) (string, error) {
    63  	suffix := ".slice"
    64  	// Name has to end with ".slice", but can't be just ".slice".
    65  	if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) {
    66  		return "", fmt.Errorf("invalid slice name: %s", slice)
    67  	}
    68  
    69  	// Path-separators are not allowed.
    70  	if strings.Contains(slice, "/") {
    71  		return "", fmt.Errorf("invalid slice name: %s", slice)
    72  	}
    73  
    74  	var path, prefix string
    75  	sliceName := strings.TrimSuffix(slice, suffix)
    76  	// if input was -.slice, we should just return root now
    77  	if sliceName == "-" {
    78  		return "/", nil
    79  	}
    80  	for _, component := range strings.Split(sliceName, "-") {
    81  		// test--a.slice isn't permitted, nor is -test.slice.
    82  		if component == "" {
    83  			return "", fmt.Errorf("invalid slice name: %s", slice)
    84  		}
    85  
    86  		// Append the component to the path and to the prefix.
    87  		path += "/" + prefix + component + suffix
    88  		prefix += component + "-"
    89  	}
    90  	return path, nil
    91  }
    92  
    93  func newProp(name string, units interface{}) systemdDbus.Property {
    94  	return systemdDbus.Property{
    95  		Name:  name,
    96  		Value: dbus.MakeVariant(units),
    97  	}
    98  }
    99  
   100  func getUnitName(c *configs.Cgroup) string {
   101  	// by default, we create a scope unless the user explicitly asks for a slice.
   102  	if !strings.HasSuffix(c.Name, ".slice") {
   103  		return c.ScopePrefix + "-" + c.Name + ".scope"
   104  	}
   105  	return c.Name
   106  }
   107  
   108  // This code should be in sync with getUnitName.
   109  func getUnitType(unitName string) string {
   110  	if strings.HasSuffix(unitName, ".slice") {
   111  		return "Slice"
   112  	}
   113  	return "Scope"
   114  }
   115  
   116  // isDbusError returns true if the error is a specific dbus error.
   117  func isDbusError(err error, name string) bool {
   118  	if err != nil {
   119  		var derr dbus.Error
   120  		if errors.As(err, &derr) {
   121  			return strings.Contains(derr.Name, name)
   122  		}
   123  	}
   124  	return false
   125  }
   126  
   127  // isUnitExists returns true if the error is that a systemd unit already exists.
   128  func isUnitExists(err error) bool {
   129  	return isDbusError(err, "org.freedesktop.systemd1.UnitExists")
   130  }
   131  
   132  func startUnit(cm *dbusConnManager, unitName string, properties []systemdDbus.Property, ignoreExist bool) error {
   133  	statusChan := make(chan string, 1)
   134  	retry := true
   135  
   136  retry:
   137  	err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
   138  		_, err := c.StartTransientUnitContext(context.TODO(), unitName, "replace", properties, statusChan)
   139  		return err
   140  	})
   141  	if err != nil {
   142  		if !isUnitExists(err) {
   143  			return err
   144  		}
   145  		if ignoreExist {
   146  			// TODO: remove this hack.
   147  			// This is kubelet making sure a slice exists (see
   148  			// https://github.com/opencontainers/runc/pull/1124).
   149  			return nil
   150  		}
   151  		if retry {
   152  			// In case a unit with the same name exists, this may
   153  			// be a leftover failed unit. Reset it, so systemd can
   154  			// remove it, and retry once.
   155  			err = resetFailedUnit(cm, unitName)
   156  			if err != nil {
   157  				logrus.Warnf("unable to reset failed unit: %v", err)
   158  			}
   159  			retry = false
   160  			goto retry
   161  		}
   162  		return err
   163  	}
   164  
   165  	timeout := time.NewTimer(30 * time.Second)
   166  	defer timeout.Stop()
   167  
   168  	select {
   169  	case s := <-statusChan:
   170  		close(statusChan)
   171  		// Please refer to https://pkg.go.dev/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit
   172  		if s != "done" {
   173  			_ = resetFailedUnit(cm, unitName)
   174  			return fmt.Errorf("error creating systemd unit `%s`: got `%s`", unitName, s)
   175  		}
   176  	case <-timeout.C:
   177  		_ = resetFailedUnit(cm, unitName)
   178  		return errors.New("Timeout waiting for systemd to create " + unitName)
   179  	}
   180  
   181  	return nil
   182  }
   183  
   184  func stopUnit(cm *dbusConnManager, unitName string) error {
   185  	statusChan := make(chan string, 1)
   186  	err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
   187  		_, err := c.StopUnitContext(context.TODO(), unitName, "replace", statusChan)
   188  		return err
   189  	})
   190  	if err == nil {
   191  		timeout := time.NewTimer(30 * time.Second)
   192  		defer timeout.Stop()
   193  
   194  		select {
   195  		case s := <-statusChan:
   196  			close(statusChan)
   197  			// Please refer to https://godoc.org/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit
   198  			if s != "done" {
   199  				logrus.Warnf("error removing unit `%s`: got `%s`. Continuing...", unitName, s)
   200  			}
   201  		case <-timeout.C:
   202  			return errors.New("Timed out while waiting for systemd to remove " + unitName)
   203  		}
   204  	}
   205  
   206  	// In case of a failed unit, let systemd remove it.
   207  	_ = resetFailedUnit(cm, unitName)
   208  
   209  	return nil
   210  }
   211  
   212  func resetFailedUnit(cm *dbusConnManager, name string) error {
   213  	return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
   214  		return c.ResetFailedUnitContext(context.TODO(), name)
   215  	})
   216  }
   217  
   218  func getUnitTypeProperty(cm *dbusConnManager, unitName string, unitType string, propertyName string) (*systemdDbus.Property, error) {
   219  	var prop *systemdDbus.Property
   220  	err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) (Err error) {
   221  		prop, Err = c.GetUnitTypePropertyContext(context.TODO(), unitName, unitType, propertyName)
   222  		return Err
   223  	})
   224  	return prop, err
   225  }
   226  
   227  func setUnitProperties(cm *dbusConnManager, name string, properties ...systemdDbus.Property) error {
   228  	return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
   229  		return c.SetUnitPropertiesContext(context.TODO(), name, true, properties...)
   230  	})
   231  }
   232  
   233  func getManagerProperty(cm *dbusConnManager, name string) (string, error) {
   234  	str := ""
   235  	err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
   236  		var err error
   237  		str, err = c.GetManagerProperty(name)
   238  		return err
   239  	})
   240  	if err != nil {
   241  		return "", err
   242  	}
   243  	return strconv.Unquote(str)
   244  }
   245  
   246  func systemdVersion(cm *dbusConnManager) int {
   247  	versionOnce.Do(func() {
   248  		version = -1
   249  		verStr, err := getManagerProperty(cm, "Version")
   250  		if err == nil {
   251  			version, err = systemdVersionAtoi(verStr)
   252  		}
   253  
   254  		if err != nil {
   255  			logrus.WithError(err).Error("unable to get systemd version")
   256  		}
   257  	})
   258  
   259  	return version
   260  }
   261  
   262  // systemdVersionAtoi extracts a numeric systemd version from the argument.
   263  // The argument should be of the form: "v245.4-1.fc32", "245", "v245-1.fc32",
   264  // "245-1.fc32" (with or without quotes). The result for all of the above
   265  // should be 245.
   266  func systemdVersionAtoi(str string) (int, error) {
   267  	// Unconditionally remove the leading prefix ("v).
   268  	str = strings.TrimLeft(str, `"v`)
   269  	// Match on the first integer we can grab.
   270  	for i := 0; i < len(str); i++ {
   271  		if str[i] < '0' || str[i] > '9' {
   272  			// First non-digit: cut the tail.
   273  			str = str[:i]
   274  			break
   275  		}
   276  	}
   277  	ver, err := strconv.Atoi(str)
   278  	if err != nil {
   279  		return -1, fmt.Errorf("can't parse version: %w", err)
   280  	}
   281  	return ver, nil
   282  }
   283  
   284  func addCpuQuota(cm *dbusConnManager, properties *[]systemdDbus.Property, quota int64, period uint64) {
   285  	if period != 0 {
   286  		// systemd only supports CPUQuotaPeriodUSec since v242
   287  		sdVer := systemdVersion(cm)
   288  		if sdVer >= 242 {
   289  			*properties = append(*properties,
   290  				newProp("CPUQuotaPeriodUSec", period))
   291  		} else {
   292  			logrus.Debugf("systemd v%d is too old to support CPUQuotaPeriodSec "+
   293  				" (setting will still be applied to cgroupfs)", sdVer)
   294  		}
   295  	}
   296  	if quota != 0 || period != 0 {
   297  		// corresponds to USEC_INFINITY in systemd
   298  		cpuQuotaPerSecUSec := uint64(math.MaxUint64)
   299  		if quota > 0 {
   300  			if period == 0 {
   301  				// assume the default
   302  				period = defCPUQuotaPeriod
   303  			}
   304  			// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
   305  			// (integer percentage of CPU) internally.  This means that if a fractional percent of
   306  			// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
   307  			// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
   308  			cpuQuotaPerSecUSec = uint64(quota*1000000) / period
   309  			if cpuQuotaPerSecUSec%10000 != 0 {
   310  				cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
   311  			}
   312  		}
   313  		*properties = append(*properties,
   314  			newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
   315  	}
   316  }
   317  
   318  func addCpuset(cm *dbusConnManager, props *[]systemdDbus.Property, cpus, mems string) error {
   319  	if cpus == "" && mems == "" {
   320  		return nil
   321  	}
   322  
   323  	// systemd only supports AllowedCPUs/AllowedMemoryNodes since v244
   324  	sdVer := systemdVersion(cm)
   325  	if sdVer < 244 {
   326  		logrus.Debugf("systemd v%d is too old to support AllowedCPUs/AllowedMemoryNodes"+
   327  			" (settings will still be applied to cgroupfs)", sdVer)
   328  		return nil
   329  	}
   330  
   331  	if cpus != "" {
   332  		bits, err := RangeToBits(cpus)
   333  		if err != nil {
   334  			return fmt.Errorf("resources.CPU.Cpus=%q conversion error: %w",
   335  				cpus, err)
   336  		}
   337  		*props = append(*props,
   338  			newProp("AllowedCPUs", bits))
   339  	}
   340  	if mems != "" {
   341  		bits, err := RangeToBits(mems)
   342  		if err != nil {
   343  			return fmt.Errorf("resources.CPU.Mems=%q conversion error: %w",
   344  				mems, err)
   345  		}
   346  		*props = append(*props,
   347  			newProp("AllowedMemoryNodes", bits))
   348  	}
   349  	return nil
   350  }
   351  
   352  // generateDeviceProperties takes the configured device rules and generates a
   353  // corresponding set of systemd properties to configure the devices correctly.
   354  func generateDeviceProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
   355  	if GenerateDeviceProps == nil {
   356  		if len(r.Devices) > 0 {
   357  			return nil, cgroups.ErrDevicesUnsupported
   358  		}
   359  		return nil, nil
   360  	}
   361  
   362  	return GenerateDeviceProps(r, systemdVersion(cm))
   363  }