github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/libcontainer/cgroups/systemd/common.go (about) 1 package systemd 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "math" 8 "os" 9 "strconv" 10 "strings" 11 "sync" 12 "time" 13 14 systemdDbus "github.com/coreos/go-systemd/v22/dbus" 15 dbus "github.com/godbus/dbus/v5" 16 "github.com/sirupsen/logrus" 17 18 "github.com/opencontainers/runc/libcontainer/cgroups" 19 "github.com/opencontainers/runc/libcontainer/configs" 20 ) 21 22 const ( 23 // Default kernel value for cpu quota period is 100000 us (100 ms), same for v1 and v2. 24 // v1: https://www.kernel.org/doc/html/latest/scheduler/sched-bwc.html and 25 // v2: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html 26 defCPUQuotaPeriod = uint64(100000) 27 ) 28 29 var ( 30 versionOnce sync.Once 31 version int 32 33 isRunningSystemdOnce sync.Once 34 isRunningSystemd bool 35 36 // GenerateDeviceProps is a function to generate systemd device 37 // properties, used by Set methods. Unless 38 // [github.com/opencontainers/runc/libcontainer/cgroups/devices] 39 // package is imported, it is set to nil, so cgroup managers can't 40 // configure devices. 41 GenerateDeviceProps func(r *configs.Resources, sdVer int) ([]systemdDbus.Property, error) 42 ) 43 44 // NOTE: This function comes from package github.com/coreos/go-systemd/util 45 // It was borrowed here to avoid a dependency on cgo. 46 // 47 // IsRunningSystemd checks whether the host was booted with systemd as its init 48 // system. This functions similarly to systemd's `sd_booted(3)`: internally, it 49 // checks whether /run/systemd/system/ exists and is a directory. 50 // http://www.freedesktop.org/software/systemd/man/sd_booted.html 51 func IsRunningSystemd() bool { 52 isRunningSystemdOnce.Do(func() { 53 fi, err := os.Lstat("/run/systemd/system") 54 isRunningSystemd = err == nil && fi.IsDir() 55 }) 56 return isRunningSystemd 57 } 58 59 // systemd represents slice hierarchy using `-`, so we need to follow suit when 60 // generating the path of slice. Essentially, test-a-b.slice becomes 61 // /test.slice/test-a.slice/test-a-b.slice. 62 func ExpandSlice(slice string) (string, error) { 63 suffix := ".slice" 64 // Name has to end with ".slice", but can't be just ".slice". 65 if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) { 66 return "", fmt.Errorf("invalid slice name: %s", slice) 67 } 68 69 // Path-separators are not allowed. 70 if strings.Contains(slice, "/") { 71 return "", fmt.Errorf("invalid slice name: %s", slice) 72 } 73 74 var path, prefix string 75 sliceName := strings.TrimSuffix(slice, suffix) 76 // if input was -.slice, we should just return root now 77 if sliceName == "-" { 78 return "/", nil 79 } 80 for _, component := range strings.Split(sliceName, "-") { 81 // test--a.slice isn't permitted, nor is -test.slice. 82 if component == "" { 83 return "", fmt.Errorf("invalid slice name: %s", slice) 84 } 85 86 // Append the component to the path and to the prefix. 87 path += "/" + prefix + component + suffix 88 prefix += component + "-" 89 } 90 return path, nil 91 } 92 93 func newProp(name string, units interface{}) systemdDbus.Property { 94 return systemdDbus.Property{ 95 Name: name, 96 Value: dbus.MakeVariant(units), 97 } 98 } 99 100 func getUnitName(c *configs.Cgroup) string { 101 // by default, we create a scope unless the user explicitly asks for a slice. 102 if !strings.HasSuffix(c.Name, ".slice") { 103 return c.ScopePrefix + "-" + c.Name + ".scope" 104 } 105 return c.Name 106 } 107 108 // This code should be in sync with getUnitName. 109 func getUnitType(unitName string) string { 110 if strings.HasSuffix(unitName, ".slice") { 111 return "Slice" 112 } 113 return "Scope" 114 } 115 116 // isDbusError returns true if the error is a specific dbus error. 117 func isDbusError(err error, name string) bool { 118 if err != nil { 119 var derr dbus.Error 120 if errors.As(err, &derr) { 121 return strings.Contains(derr.Name, name) 122 } 123 } 124 return false 125 } 126 127 // isUnitExists returns true if the error is that a systemd unit already exists. 128 func isUnitExists(err error) bool { 129 return isDbusError(err, "org.freedesktop.systemd1.UnitExists") 130 } 131 132 func startUnit(cm *dbusConnManager, unitName string, properties []systemdDbus.Property, ignoreExist bool) error { 133 statusChan := make(chan string, 1) 134 retry := true 135 136 retry: 137 err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error { 138 _, err := c.StartTransientUnitContext(context.TODO(), unitName, "replace", properties, statusChan) 139 return err 140 }) 141 if err != nil { 142 if !isUnitExists(err) { 143 return err 144 } 145 if ignoreExist { 146 // TODO: remove this hack. 147 // This is kubelet making sure a slice exists (see 148 // https://github.com/opencontainers/runc/pull/1124). 149 return nil 150 } 151 if retry { 152 // In case a unit with the same name exists, this may 153 // be a leftover failed unit. Reset it, so systemd can 154 // remove it, and retry once. 155 err = resetFailedUnit(cm, unitName) 156 if err != nil { 157 logrus.Warnf("unable to reset failed unit: %v", err) 158 } 159 retry = false 160 goto retry 161 } 162 return err 163 } 164 165 timeout := time.NewTimer(30 * time.Second) 166 defer timeout.Stop() 167 168 select { 169 case s := <-statusChan: 170 close(statusChan) 171 // Please refer to https://pkg.go.dev/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit 172 if s != "done" { 173 _ = resetFailedUnit(cm, unitName) 174 return fmt.Errorf("error creating systemd unit `%s`: got `%s`", unitName, s) 175 } 176 case <-timeout.C: 177 _ = resetFailedUnit(cm, unitName) 178 return errors.New("Timeout waiting for systemd to create " + unitName) 179 } 180 181 return nil 182 } 183 184 func stopUnit(cm *dbusConnManager, unitName string) error { 185 statusChan := make(chan string, 1) 186 err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error { 187 _, err := c.StopUnitContext(context.TODO(), unitName, "replace", statusChan) 188 return err 189 }) 190 if err == nil { 191 timeout := time.NewTimer(30 * time.Second) 192 defer timeout.Stop() 193 194 select { 195 case s := <-statusChan: 196 close(statusChan) 197 // Please refer to https://godoc.org/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit 198 if s != "done" { 199 logrus.Warnf("error removing unit `%s`: got `%s`. Continuing...", unitName, s) 200 } 201 case <-timeout.C: 202 return errors.New("Timed out while waiting for systemd to remove " + unitName) 203 } 204 } 205 206 // In case of a failed unit, let systemd remove it. 207 _ = resetFailedUnit(cm, unitName) 208 209 return nil 210 } 211 212 func resetFailedUnit(cm *dbusConnManager, name string) error { 213 return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error { 214 return c.ResetFailedUnitContext(context.TODO(), name) 215 }) 216 } 217 218 func getUnitTypeProperty(cm *dbusConnManager, unitName string, unitType string, propertyName string) (*systemdDbus.Property, error) { 219 var prop *systemdDbus.Property 220 err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) (Err error) { 221 prop, Err = c.GetUnitTypePropertyContext(context.TODO(), unitName, unitType, propertyName) 222 return Err 223 }) 224 return prop, err 225 } 226 227 func setUnitProperties(cm *dbusConnManager, name string, properties ...systemdDbus.Property) error { 228 return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error { 229 return c.SetUnitPropertiesContext(context.TODO(), name, true, properties...) 230 }) 231 } 232 233 func getManagerProperty(cm *dbusConnManager, name string) (string, error) { 234 str := "" 235 err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error { 236 var err error 237 str, err = c.GetManagerProperty(name) 238 return err 239 }) 240 if err != nil { 241 return "", err 242 } 243 return strconv.Unquote(str) 244 } 245 246 func systemdVersion(cm *dbusConnManager) int { 247 versionOnce.Do(func() { 248 version = -1 249 verStr, err := getManagerProperty(cm, "Version") 250 if err == nil { 251 version, err = systemdVersionAtoi(verStr) 252 } 253 254 if err != nil { 255 logrus.WithError(err).Error("unable to get systemd version") 256 } 257 }) 258 259 return version 260 } 261 262 // systemdVersionAtoi extracts a numeric systemd version from the argument. 263 // The argument should be of the form: "v245.4-1.fc32", "245", "v245-1.fc32", 264 // "245-1.fc32" (with or without quotes). The result for all of the above 265 // should be 245. 266 func systemdVersionAtoi(str string) (int, error) { 267 // Unconditionally remove the leading prefix ("v). 268 str = strings.TrimLeft(str, `"v`) 269 // Match on the first integer we can grab. 270 for i := 0; i < len(str); i++ { 271 if str[i] < '0' || str[i] > '9' { 272 // First non-digit: cut the tail. 273 str = str[:i] 274 break 275 } 276 } 277 ver, err := strconv.Atoi(str) 278 if err != nil { 279 return -1, fmt.Errorf("can't parse version: %w", err) 280 } 281 return ver, nil 282 } 283 284 func addCpuQuota(cm *dbusConnManager, properties *[]systemdDbus.Property, quota int64, period uint64) { 285 if period != 0 { 286 // systemd only supports CPUQuotaPeriodUSec since v242 287 sdVer := systemdVersion(cm) 288 if sdVer >= 242 { 289 *properties = append(*properties, 290 newProp("CPUQuotaPeriodUSec", period)) 291 } else { 292 logrus.Debugf("systemd v%d is too old to support CPUQuotaPeriodSec "+ 293 " (setting will still be applied to cgroupfs)", sdVer) 294 } 295 } 296 if quota != 0 || period != 0 { 297 // corresponds to USEC_INFINITY in systemd 298 cpuQuotaPerSecUSec := uint64(math.MaxUint64) 299 if quota > 0 { 300 if period == 0 { 301 // assume the default 302 period = defCPUQuotaPeriod 303 } 304 // systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota 305 // (integer percentage of CPU) internally. This means that if a fractional percent of 306 // CPU is indicated by Resources.CpuQuota, we need to round up to the nearest 307 // 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect. 308 cpuQuotaPerSecUSec = uint64(quota*1000000) / period 309 if cpuQuotaPerSecUSec%10000 != 0 { 310 cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000 311 } 312 } 313 *properties = append(*properties, 314 newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec)) 315 } 316 } 317 318 func addCpuset(cm *dbusConnManager, props *[]systemdDbus.Property, cpus, mems string) error { 319 if cpus == "" && mems == "" { 320 return nil 321 } 322 323 // systemd only supports AllowedCPUs/AllowedMemoryNodes since v244 324 sdVer := systemdVersion(cm) 325 if sdVer < 244 { 326 logrus.Debugf("systemd v%d is too old to support AllowedCPUs/AllowedMemoryNodes"+ 327 " (settings will still be applied to cgroupfs)", sdVer) 328 return nil 329 } 330 331 if cpus != "" { 332 bits, err := RangeToBits(cpus) 333 if err != nil { 334 return fmt.Errorf("resources.CPU.Cpus=%q conversion error: %w", 335 cpus, err) 336 } 337 *props = append(*props, 338 newProp("AllowedCPUs", bits)) 339 } 340 if mems != "" { 341 bits, err := RangeToBits(mems) 342 if err != nil { 343 return fmt.Errorf("resources.CPU.Mems=%q conversion error: %w", 344 mems, err) 345 } 346 *props = append(*props, 347 newProp("AllowedMemoryNodes", bits)) 348 } 349 return nil 350 } 351 352 // generateDeviceProperties takes the configured device rules and generates a 353 // corresponding set of systemd properties to configure the devices correctly. 354 func generateDeviceProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) { 355 if GenerateDeviceProps == nil { 356 if len(r.Devices) > 0 { 357 return nil, cgroups.ErrDevicesUnsupported 358 } 359 return nil, nil 360 } 361 362 return GenerateDeviceProps(r, systemdVersion(cm)) 363 }