github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/util/cgroups/cgroups.go (about)

     1  // Copyright 2020 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package cgroups
    12  
    13  import (
    14  	"bufio"
    15  	"bytes"
    16  	"fmt"
    17  	"io/ioutil"
    18  	"math"
    19  	"os"
    20  	"path/filepath"
    21  	"strconv"
    22  
    23  	"github.com/cockroachdb/errors"
    24  )
    25  
    26  const (
    27  	cgroupV1MemLimitFilename = "memory.stat"
    28  	cgroupV2MemLimitFilename = "memory.max"
    29  )
    30  
    31  // GetMemoryLimit attempts to retrieve the cgroup memory limit for the current
    32  // process
    33  func GetMemoryLimit() (limit int64, warnings string, err error) {
    34  	return getCgroupMem("/")
    35  }
    36  
    37  // `root` is set to "/" in production code and exists only for testing.
    38  // cgroup memory limit detection path implemented here as
    39  // /proc/self/cgroup file -> /proc/self/mountinfo mounts -> cgroup version -> version specific limit check
    40  func getCgroupMem(root string) (limit int64, warnings string, err error) {
    41  	path, err := detectMemCntrlPath(filepath.Join(root, "/proc/self/cgroup"))
    42  	if err != nil {
    43  		return 0, "", err
    44  	}
    45  
    46  	// no memory controller detected
    47  	if path == "" {
    48  		return 0, "no cgroup memory controller detected", nil
    49  	}
    50  
    51  	mount, ver, err := getCgroupDetails(filepath.Join(root, "/proc/self/mountinfo"), path)
    52  	if err != nil {
    53  		return 0, "", err
    54  	}
    55  
    56  	switch ver {
    57  	case 1:
    58  		limit, warnings, err = detectLimitInV1(filepath.Join(root, mount))
    59  	case 2:
    60  		limit, warnings, err = detectLimitInV2(filepath.Join(root, mount, path))
    61  	default:
    62  		limit, err = 0, fmt.Errorf("detected unknown cgroup version index: %d", ver)
    63  	}
    64  
    65  	return limit, warnings, err
    66  }
    67  
    68  // Finds memory limit for cgroup V1 via looking in [contoller mount path]/memory.stat
    69  func detectLimitInV1(cRoot string) (limit int64, warnings string, err error) {
    70  	statFilePath := filepath.Join(cRoot, cgroupV1MemLimitFilename)
    71  	stat, err := os.Open(statFilePath)
    72  	if err != nil {
    73  		return 0, "", errors.Wrapf(err, "can't read available memory from cgroup v1 at %s", statFilePath)
    74  	}
    75  	defer func() {
    76  		_ = stat.Close()
    77  	}()
    78  
    79  	scanner := bufio.NewScanner(stat)
    80  	for scanner.Scan() {
    81  		fields := bytes.Fields(scanner.Bytes())
    82  		if len(fields) != 2 || string(fields[0]) != "hierarchical_memory_limit" {
    83  			continue
    84  		}
    85  
    86  		trimmed := string(bytes.TrimSpace(fields[1]))
    87  		limit, err = strconv.ParseInt(trimmed, 10, 64)
    88  		if err != nil {
    89  			return 0, "", errors.Wrapf(err, "can't read available memory from cgroup v1 at %s", statFilePath)
    90  		}
    91  
    92  		return limit, "", nil
    93  	}
    94  
    95  	return 0, "", fmt.Errorf("failed to find expected memory limit for cgroup v1 in %s", statFilePath)
    96  }
    97  
    98  // Finds memory limit for cgroup V2 via looking into [controller mount path]/[leaf path]/memory.max
    99  // TODO(vladdy): this implementation was based on podman+criu environment. It may cover not
   100  // all the cases when v2 becomes more widely used in container world.
   101  func detectLimitInV2(cRoot string) (limit int64, warnings string, err error) {
   102  	limitFilePath := filepath.Join(cRoot, cgroupV2MemLimitFilename)
   103  
   104  	var buf []byte
   105  	if buf, err = ioutil.ReadFile(limitFilePath); err != nil {
   106  		return 0, "", errors.Wrapf(err, "can't read available memory from cgroup v2 at %s", limitFilePath)
   107  	}
   108  
   109  	trimmed := string(bytes.TrimSpace(buf))
   110  	if trimmed == "max" {
   111  		return math.MaxInt64, "", nil
   112  	}
   113  
   114  	limit, err = strconv.ParseInt(trimmed, 10, 64)
   115  	if err != nil {
   116  		return 0, "", errors.Wrapf(err, "can't parse available memory from cgroup v2 in %s", limitFilePath)
   117  	}
   118  	return limit, "", nil
   119  }
   120  
   121  // The controller is defined via either type `memory` for cgroup v1 or via empty type for cgroup v2,
   122  // where the type is the second field in /proc/[pid]/cgroup file
   123  func detectMemCntrlPath(cgroupFilePath string) (string, error) {
   124  	cgroup, err := os.Open(cgroupFilePath)
   125  	if err != nil {
   126  		return "", errors.Wrapf(err, "failed to read memory cgroup from cgroups file: %s", cgroupFilePath)
   127  	}
   128  	defer func() { _ = cgroup.Close() }()
   129  
   130  	scanner := bufio.NewScanner(cgroup)
   131  	var unifiedPathIfFound string
   132  	for scanner.Scan() {
   133  		fields := bytes.Split(scanner.Bytes(), []byte{':'})
   134  		if len(fields) != 3 {
   135  			// The lines should always have three fields, there's something fishy here.
   136  			continue
   137  		}
   138  
   139  		f0, f1 := string(fields[0]), string(fields[1])
   140  		// First case if v2, second - v1. We give v2 the priority here.
   141  		// There is also a `hybrid` mode when both  versions are enabled,
   142  		// but no known container solutions support it afaik
   143  		if f0 == "0" && f1 == "" {
   144  			unifiedPathIfFound = string(fields[2])
   145  		} else if f1 == "memory" {
   146  			return string(fields[2]), nil
   147  		}
   148  	}
   149  
   150  	return unifiedPathIfFound, nil
   151  }
   152  
   153  // Reads /proc/[pid]/mountinfo for cgoup or cgroup2 mount which defines the used version.
   154  // See http://man7.org/linux/man-pages/man5/proc.5.html for `mountinfo` format.
   155  func getCgroupDetails(mountinfoPath string, cRoot string) (string, int, error) {
   156  	info, err := os.Open(mountinfoPath)
   157  	if err != nil {
   158  		return "", 0, errors.Wrapf(err, "failed to read mounts info from file: %s", mountinfoPath)
   159  	}
   160  	defer func() {
   161  		_ = info.Close()
   162  	}()
   163  
   164  	scanner := bufio.NewScanner(info)
   165  	for scanner.Scan() {
   166  		fields := bytes.Fields(scanner.Bytes())
   167  		if len(fields) < 10 {
   168  			continue
   169  		}
   170  
   171  		ver, ok := detectCgroupVersion(fields)
   172  		if ok && (ver == 1 && string(fields[3]) == cRoot) || ver == 2 {
   173  			return string(fields[4]), ver, nil
   174  		}
   175  	}
   176  
   177  	return "", 0, fmt.Errorf("failed to detect cgroup root mount and version")
   178  }
   179  
   180  // Return version of cgroup mount for memory controller if found
   181  func detectCgroupVersion(fields [][]byte) (_ int, found bool) {
   182  	if len(fields) < 10 {
   183  		return 0, false
   184  	}
   185  
   186  	// Due to strange format there can be optional fields in the middle of the set, starting
   187  	// from the field #7. The end of the fields is marked with "-" field
   188  	var pos = 6
   189  	for pos < len(fields) {
   190  		if bytes.Equal(fields[pos], []byte{'-'}) {
   191  			break
   192  		}
   193  
   194  		pos++
   195  	}
   196  
   197  	// No optional fields separator found or there is less than 3 fields after it which is wrong
   198  	if (len(fields) - pos - 1) < 3 {
   199  		return 0, false
   200  	}
   201  
   202  	pos++
   203  
   204  	// Check for memory controller specifically in cgroup v1 (it is listed in super options field),
   205  	// as the limit can't be found if it is not enforced
   206  	if bytes.Equal(fields[pos], []byte("cgroup")) && bytes.Contains(fields[pos+2], []byte("memory")) {
   207  		return 1, true
   208  	} else if bytes.Equal(fields[pos], []byte("cgroup2")) {
   209  		return 2, true
   210  	}
   211  
   212  	return 0, false
   213  }