github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/lib/cgutil/cpuset_manager_v1.go (about)

     1  //go:build linux
     2  
     3  package cgutil
     4  
     5  import (
     6  	"context"
     7  	"fmt"
     8  	"io/ioutil"
     9  	"os"
    10  	"path/filepath"
    11  	"strings"
    12  	"sync"
    13  	"time"
    14  
    15  	"github.com/hashicorp/go-hclog"
    16  	"github.com/hashicorp/nomad/helper"
    17  	"github.com/hashicorp/nomad/lib/cpuset"
    18  	"github.com/hashicorp/nomad/nomad/structs"
    19  	"github.com/opencontainers/runc/libcontainer/cgroups"
    20  	"github.com/opencontainers/runc/libcontainer/cgroups/fs"
    21  	"github.com/opencontainers/runc/libcontainer/configs"
    22  	"golang.org/x/sys/unix"
    23  )
    24  
    25  const (
    26  	DefaultCgroupV1Parent    = "/nomad"
    27  	SharedCpusetCgroupName   = "shared"
    28  	ReservedCpusetCgroupName = "reserved"
    29  )
    30  
    31  // NewCpusetManagerV1 creates a  CpusetManager compatible with cgroups.v1
    32  func NewCpusetManagerV1(cgroupParent string, _ []uint16, logger hclog.Logger) CpusetManager {
    33  	if cgroupParent == "" {
    34  		cgroupParent = DefaultCgroupV1Parent
    35  	}
    36  
    37  	cgroupParentPath, err := GetCgroupPathHelperV1("cpuset", cgroupParent)
    38  	if err != nil {
    39  		logger.Warn("failed to get cgroup path; disable cpuset management", "error", err)
    40  		return new(NoopCpusetManager)
    41  	}
    42  
    43  	// ensures that shared cpuset exists and that the cpuset values are copied from the parent if created
    44  	if err = cpusetEnsureParentV1(filepath.Join(cgroupParentPath, SharedCpusetCgroupName)); err != nil {
    45  		logger.Warn("failed to ensure cgroup parent exists; disable cpuset management", "error", err)
    46  		return new(NoopCpusetManager)
    47  	}
    48  
    49  	parentCpus, parentMems, err := getCpusetSubsystemSettingsV1(cgroupParentPath)
    50  	if err != nil {
    51  		logger.Warn("failed to detect parent cpuset settings; disable cpuset management", "error", err)
    52  		return new(NoopCpusetManager)
    53  	}
    54  
    55  	parentCpuset, err := cpuset.Parse(parentCpus)
    56  	if err != nil {
    57  		logger.Warn("failed to parse parent cpuset.cpus setting; disable cpuset management", "error", err)
    58  		return new(NoopCpusetManager)
    59  	}
    60  
    61  	// ensure the reserved cpuset exists, but only copy the mems from the parent if creating the cgroup
    62  	if err = os.Mkdir(filepath.Join(cgroupParentPath, ReservedCpusetCgroupName), 0755); err != nil {
    63  		logger.Warn("failed to ensure reserved cpuset.cpus interface exists; disable cpuset management", "error", err)
    64  		return new(NoopCpusetManager)
    65  	}
    66  
    67  	if err = cgroups.WriteFile(filepath.Join(cgroupParentPath, ReservedCpusetCgroupName), "cpuset.mems", parentMems); err != nil {
    68  		logger.Warn("failed to ensure reserved cpuset.mems interface exists; disable cpuset management", "error", err)
    69  		return new(NoopCpusetManager)
    70  	}
    71  
    72  	return &cpusetManagerV1{
    73  		parentCpuset:     parentCpuset,
    74  		cgroupParent:     cgroupParent,
    75  		cgroupParentPath: cgroupParentPath,
    76  		cgroupInfo:       map[string]allocTaskCgroupInfo{},
    77  		logger:           logger,
    78  	}
    79  }
    80  
    81  var (
    82  	cpusetReconcileInterval = 30 * time.Second
    83  )
    84  
    85  type cpusetManagerV1 struct {
    86  	// cgroupParent relative to the cgroup root. ex. '/nomad'
    87  	cgroupParent string
    88  	// cgroupParentPath is the absolute path to the cgroup parent.
    89  	cgroupParentPath string
    90  
    91  	parentCpuset cpuset.CPUSet
    92  
    93  	// all exported functions are synchronized
    94  	mu sync.Mutex
    95  
    96  	cgroupInfo map[string]allocTaskCgroupInfo
    97  
    98  	doneCh   chan struct{}
    99  	signalCh chan struct{}
   100  	logger   hclog.Logger
   101  }
   102  
   103  func (c *cpusetManagerV1) AddAlloc(alloc *structs.Allocation) {
   104  	if alloc == nil || alloc.AllocatedResources == nil {
   105  		return
   106  	}
   107  	allocInfo := allocTaskCgroupInfo{}
   108  	for task, resources := range alloc.AllocatedResources.Tasks {
   109  		taskCpuset := cpuset.New(resources.Cpu.ReservedCores...)
   110  		cgroupPath := filepath.Join(c.cgroupParentPath, SharedCpusetCgroupName)
   111  		relativeCgroupPath := filepath.Join(c.cgroupParent, SharedCpusetCgroupName)
   112  		if taskCpuset.Size() > 0 {
   113  			cgroupPath, relativeCgroupPath = c.getCgroupPathsForTask(alloc.ID, task)
   114  		}
   115  		allocInfo[task] = &TaskCgroupInfo{
   116  			CgroupPath:         cgroupPath,
   117  			RelativeCgroupPath: relativeCgroupPath,
   118  			Cpuset:             taskCpuset,
   119  		}
   120  	}
   121  	c.mu.Lock()
   122  	c.cgroupInfo[alloc.ID] = allocInfo
   123  	c.mu.Unlock()
   124  	go c.signalReconcile()
   125  }
   126  
   127  func (c *cpusetManagerV1) RemoveAlloc(allocID string) {
   128  	c.mu.Lock()
   129  	delete(c.cgroupInfo, allocID)
   130  	c.mu.Unlock()
   131  	go c.signalReconcile()
   132  }
   133  
   134  func (c *cpusetManagerV1) CgroupPathFor(allocID, task string) CgroupPathGetter {
   135  	return func(ctx context.Context) (string, error) {
   136  		c.mu.Lock()
   137  		allocInfo, ok := c.cgroupInfo[allocID]
   138  		if !ok {
   139  			c.mu.Unlock()
   140  			return "", fmt.Errorf("alloc not found for id %q", allocID)
   141  		}
   142  
   143  		taskInfo, ok := allocInfo[task]
   144  		c.mu.Unlock()
   145  		if !ok {
   146  			return "", fmt.Errorf("task %q not found", task)
   147  		}
   148  
   149  		timer, stop := helper.NewSafeTimer(0)
   150  		defer stop()
   151  
   152  		for {
   153  
   154  			if taskInfo.Error != nil {
   155  				break
   156  			}
   157  
   158  			if _, err := os.Stat(taskInfo.CgroupPath); os.IsNotExist(err) {
   159  				select {
   160  				case <-ctx.Done():
   161  					return taskInfo.CgroupPath, ctx.Err()
   162  				case <-timer.C:
   163  					timer.Reset(100 * time.Millisecond)
   164  					continue
   165  				}
   166  			}
   167  			break
   168  		}
   169  
   170  		return taskInfo.CgroupPath, taskInfo.Error
   171  	}
   172  
   173  }
   174  
   175  // task name -> task cgroup info
   176  type allocTaskCgroupInfo map[string]*TaskCgroupInfo
   177  
   178  // Init checks that the cgroup parent and expected child cgroups have been created
   179  // If the cgroup parent is set to /nomad then this will ensure that the /nomad/shared
   180  // cgroup is initialized.
   181  func (c *cpusetManagerV1) Init() {
   182  	c.doneCh = make(chan struct{})
   183  	c.signalCh = make(chan struct{})
   184  	c.logger.Info("initialized cpuset cgroup manager", "parent", c.cgroupParent, "cpuset", c.parentCpuset.String())
   185  	go c.reconcileLoop()
   186  }
   187  
   188  func (c *cpusetManagerV1) reconcileLoop() {
   189  	timer := time.NewTimer(0)
   190  	if !timer.Stop() {
   191  		<-timer.C
   192  	}
   193  	defer timer.Stop()
   194  
   195  	for {
   196  		select {
   197  		case <-c.doneCh:
   198  			c.logger.Debug("shutting down reconcile loop")
   199  			return
   200  		case <-c.signalCh:
   201  			timer.Reset(500 * time.Millisecond)
   202  		case <-timer.C:
   203  			c.reconcileCpusets()
   204  			timer.Reset(cpusetReconcileInterval)
   205  		}
   206  	}
   207  }
   208  
   209  func (c *cpusetManagerV1) reconcileCpusets() {
   210  	c.mu.Lock()
   211  	defer c.mu.Unlock()
   212  	sharedCpuset := cpuset.New(c.parentCpuset.ToSlice()...)
   213  	reservedCpuset := cpuset.New()
   214  	taskCpusets := map[string]*TaskCgroupInfo{}
   215  	for _, alloc := range c.cgroupInfo {
   216  		for _, task := range alloc {
   217  			if task.Cpuset.Size() == 0 {
   218  				continue
   219  			}
   220  			sharedCpuset = sharedCpuset.Difference(task.Cpuset)
   221  			reservedCpuset = reservedCpuset.Union(task.Cpuset)
   222  			taskCpusets[task.CgroupPath] = task
   223  		}
   224  	}
   225  
   226  	// look for reserved cpusets which we don't know about and remove
   227  	files, err := ioutil.ReadDir(c.reservedCpusetPath())
   228  	if err != nil {
   229  		c.logger.Error("failed to list files in reserved cgroup path during reconciliation", "path", c.reservedCpusetPath(), "error", err)
   230  	}
   231  	for _, f := range files {
   232  		if !f.IsDir() {
   233  			continue
   234  		}
   235  		path := filepath.Join(c.reservedCpusetPath(), f.Name())
   236  		if _, ok := taskCpusets[path]; ok {
   237  			continue
   238  		}
   239  		c.logger.Debug("removing reserved cpuset cgroup", "path", path)
   240  		err := cgroups.RemovePaths(map[string]string{"cpuset": path})
   241  		if err != nil {
   242  			c.logger.Error("removal of existing cpuset cgroup failed", "path", path, "error", err)
   243  		}
   244  	}
   245  
   246  	if err := c.setCgroupCpusetCPUs(c.sharedCpusetPath(), sharedCpuset.String()); err != nil {
   247  		c.logger.Error("could not write shared cpuset.cpus", "path", c.sharedCpusetPath(), "cpuset.cpus", sharedCpuset.String(), "error", err)
   248  	}
   249  	if err := c.setCgroupCpusetCPUs(c.reservedCpusetPath(), reservedCpuset.String()); err != nil {
   250  		c.logger.Error("could not write reserved cpuset.cpus", "path", c.reservedCpusetPath(), "cpuset.cpus", reservedCpuset.String(), "error", err)
   251  	}
   252  	for _, info := range taskCpusets {
   253  		if err := os.Mkdir(info.CgroupPath, 0755); err != nil && !os.IsExist(err) {
   254  			c.logger.Error("failed to create new cgroup path for task", "path", info.CgroupPath, "error", err)
   255  			info.Error = err
   256  			continue
   257  		}
   258  
   259  		// copy cpuset.mems from parent
   260  		_, parentMems, err := getCpusetSubsystemSettingsV1(filepath.Dir(info.CgroupPath))
   261  		if err != nil {
   262  			c.logger.Error("failed to read parent cgroup settings for task", "path", info.CgroupPath, "error", err)
   263  			info.Error = err
   264  			continue
   265  		}
   266  		if err := cgroups.WriteFile(info.CgroupPath, "cpuset.mems", parentMems); err != nil {
   267  			c.logger.Error("failed to write cgroup cpuset.mems setting for task", "path", info.CgroupPath, "mems", parentMems, "error", err)
   268  			info.Error = err
   269  			continue
   270  		}
   271  		if err := c.setCgroupCpusetCPUs(info.CgroupPath, info.Cpuset.String()); err != nil {
   272  			c.logger.Error("failed to write cgroup cpuset.cpus settings for task", "path", info.CgroupPath, "cpus", info.Cpuset.String(), "error", err)
   273  			info.Error = err
   274  			continue
   275  		}
   276  	}
   277  }
   278  
   279  // setCgroupCpusetCPUs will compare an existing cpuset.cpus value with an expected value, overwriting the existing if different
   280  // must hold a lock on cpusetManagerV1.mu before calling
   281  func (_ *cpusetManagerV1) setCgroupCpusetCPUs(path, cpus string) error {
   282  	currentCpusRaw, err := cgroups.ReadFile(path, "cpuset.cpus")
   283  	if err != nil {
   284  		return err
   285  	}
   286  
   287  	if cpus != strings.TrimSpace(currentCpusRaw) {
   288  		if err := cgroups.WriteFile(path, "cpuset.cpus", cpus); err != nil {
   289  			return err
   290  		}
   291  	}
   292  	return nil
   293  }
   294  
   295  func (c *cpusetManagerV1) signalReconcile() {
   296  	select {
   297  	case c.signalCh <- struct{}{}:
   298  	case <-c.doneCh:
   299  	}
   300  }
   301  
   302  func (c *cpusetManagerV1) getCgroupPathsForTask(allocID, task string) (absolute, relative string) {
   303  	return filepath.Join(c.reservedCpusetPath(), fmt.Sprintf("%s-%s", allocID, task)),
   304  		filepath.Join(c.cgroupParent, ReservedCpusetCgroupName, fmt.Sprintf("%s-%s", allocID, task))
   305  }
   306  
   307  func (c *cpusetManagerV1) sharedCpusetPath() string {
   308  	return filepath.Join(c.cgroupParentPath, SharedCpusetCgroupName)
   309  }
   310  
   311  func (c *cpusetManagerV1) reservedCpusetPath() string {
   312  	return filepath.Join(c.cgroupParentPath, ReservedCpusetCgroupName)
   313  }
   314  
   315  func getCPUsFromCgroupV1(group string) ([]uint16, error) {
   316  	cgroupPath, err := GetCgroupPathHelperV1("cpuset", group)
   317  	if err != nil {
   318  		return nil, err
   319  	}
   320  
   321  	cgroup := &configs.Cgroup{
   322  		Path:      group,
   323  		Resources: new(configs.Resources),
   324  	}
   325  
   326  	paths := map[string]string{
   327  		"cpuset": cgroupPath,
   328  	}
   329  
   330  	man, err := fs.NewManager(cgroup, paths)
   331  	if err != nil {
   332  		return nil, err
   333  	}
   334  
   335  	stats, err := man.GetStats()
   336  	if err != nil {
   337  		return nil, err
   338  	}
   339  
   340  	return stats.CPUSetStats.CPUs, nil
   341  }
   342  
   343  // cpusetEnsureParentV1 makes sure that the parent directories of current
   344  // are created and populated with the proper cpus and mems files copied
   345  // from their respective parent. It does that recursively, starting from
   346  // the top of the cpuset hierarchy (i.e. cpuset cgroup mount point).
   347  func cpusetEnsureParentV1(current string) error {
   348  	var st unix.Statfs_t
   349  
   350  	parent := filepath.Dir(current)
   351  	err := unix.Statfs(parent, &st)
   352  	if err == nil && st.Type != unix.CGROUP_SUPER_MAGIC {
   353  		return nil
   354  	}
   355  	// Treat non-existing directory as cgroupfs as it will be created,
   356  	// and the root cpuset directory obviously exists.
   357  	if err != nil && err != unix.ENOENT {
   358  		return &os.PathError{Op: "statfs", Path: parent, Err: err}
   359  	}
   360  
   361  	if err := cpusetEnsureParentV1(parent); err != nil {
   362  		return err
   363  	}
   364  	if err := os.Mkdir(current, 0755); err != nil && !os.IsExist(err) {
   365  		return err
   366  	}
   367  	return cpusetCopyIfNeededV1(current, parent)
   368  }
   369  
   370  // cpusetCopyIfNeededV1 copies the cpuset.cpus and cpuset.mems from the parent
   371  // directory to the current directory if the file's contents are 0
   372  func cpusetCopyIfNeededV1(current, parent string) error {
   373  	currentCpus, currentMems, err := getCpusetSubsystemSettingsV1(current)
   374  	if err != nil {
   375  		return err
   376  	}
   377  	parentCpus, parentMems, err := getCpusetSubsystemSettingsV1(parent)
   378  	if err != nil {
   379  		return err
   380  	}
   381  
   382  	if isEmptyCpusetV1(currentCpus) {
   383  		if err := cgroups.WriteFile(current, "cpuset.cpus", parentCpus); err != nil {
   384  			return err
   385  		}
   386  	}
   387  	if isEmptyCpusetV1(currentMems) {
   388  		if err := cgroups.WriteFile(current, "cpuset.mems", parentMems); err != nil {
   389  			return err
   390  		}
   391  	}
   392  	return nil
   393  }
   394  
   395  func getCpusetSubsystemSettingsV1(parent string) (cpus, mems string, err error) {
   396  	if cpus, err = cgroups.ReadFile(parent, "cpuset.cpus"); err != nil {
   397  		return
   398  	}
   399  	if mems, err = cgroups.ReadFile(parent, "cpuset.mems"); err != nil {
   400  		return
   401  	}
   402  	return cpus, mems, nil
   403  }
   404  
   405  func isEmptyCpusetV1(str string) bool {
   406  	return str == "" || str == "\n"
   407  }
   408  
   409  func GetCgroupPathHelperV1(subsystem, cgroup string) (string, error) {
   410  	mnt, root, err := cgroups.FindCgroupMountpointAndRoot("", subsystem)
   411  	if err != nil {
   412  		return "", err
   413  	}
   414  
   415  	// This is needed for nested containers, because in /proc/self/cgroup we
   416  	// see paths from host, which don't exist in container.
   417  	relCgroup, err := filepath.Rel(root, cgroup)
   418  	if err != nil {
   419  		return "", err
   420  	}
   421  
   422  	result := filepath.Join(mnt, relCgroup)
   423  	return result, nil
   424  }