github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/syscalls/linux/sys_rlimit.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package linux
    16  
    17  import (
    18  	"github.com/metacubex/gvisor/pkg/abi/linux"
    19  	"github.com/metacubex/gvisor/pkg/errors/linuxerr"
    20  	"github.com/metacubex/gvisor/pkg/hostarch"
    21  	"github.com/metacubex/gvisor/pkg/marshal"
    22  	"github.com/metacubex/gvisor/pkg/sentry/arch"
    23  	"github.com/metacubex/gvisor/pkg/sentry/kernel"
    24  	"github.com/metacubex/gvisor/pkg/sentry/limits"
    25  )
    26  
    27  // rlimit describes an implementation of 'struct rlimit', which may vary from
    28  // system-to-system.
    29  type rlimit interface {
    30  	marshal.Marshallable
    31  
    32  	// toLimit converts an rlimit to a limits.Limit.
    33  	toLimit() *limits.Limit
    34  
    35  	// fromLimit converts a limits.Limit to an rlimit.
    36  	fromLimit(lim limits.Limit)
    37  }
    38  
    39  // newRlimit returns the appropriate rlimit type for 'struct rlimit' on this system.
    40  func newRlimit(t *kernel.Task) (rlimit, error) {
    41  	switch t.Arch().Width() {
    42  	case 8:
    43  		// On 64-bit system, struct rlimit and struct rlimit64 are identical.
    44  		return &rlimit64{}, nil
    45  	default:
    46  		return nil, linuxerr.ENOSYS
    47  	}
    48  }
    49  
    50  // +marshal
    51  type rlimit64 struct {
    52  	Cur uint64
    53  	Max uint64
    54  }
    55  
    56  func (r *rlimit64) toLimit() *limits.Limit {
    57  	return &limits.Limit{
    58  		Cur: limits.FromLinux(r.Cur),
    59  		Max: limits.FromLinux(r.Max),
    60  	}
    61  }
    62  
    63  func (r *rlimit64) fromLimit(lim limits.Limit) {
    64  	*r = rlimit64{
    65  		Cur: limits.ToLinux(lim.Cur),
    66  		Max: limits.ToLinux(lim.Max),
    67  	}
    68  }
    69  
    70  func (r *rlimit64) copyIn(t *kernel.Task, addr hostarch.Addr) error {
    71  	_, err := r.CopyIn(t, addr)
    72  	return err
    73  }
    74  
    75  func (r *rlimit64) copyOut(t *kernel.Task, addr hostarch.Addr) error {
    76  	_, err := r.CopyOut(t, addr)
    77  	return err
    78  }
    79  
    80  func makeRlimit64(lim limits.Limit) *rlimit64 {
    81  	return &rlimit64{Cur: lim.Cur, Max: lim.Max}
    82  }
    83  
    84  // setableLimits is the set of supported setable limits.
    85  var setableLimits = map[limits.LimitType]struct{}{
    86  	limits.NumberOfFiles: {},
    87  	limits.AS:            {},
    88  	limits.CPU:           {},
    89  	limits.Data:          {},
    90  	limits.FileSize:      {},
    91  	limits.MemoryLocked:  {},
    92  	limits.Stack:         {},
    93  	// RSS can be set, but it's not enforced because Linux doesn't enforce it
    94  	// either: "This limit has effect only in Linux 2.4.x, x < 30"
    95  	limits.Rss: {},
    96  	// These are not enforced, but we include them here to avoid returning
    97  	// EPERM, since some apps expect them to succeed.
    98  	limits.Core:         {},
    99  	limits.ProcessCount: {},
   100  }
   101  
   102  func prlimit64(t *kernel.Task, resource limits.LimitType, newLim *limits.Limit) (limits.Limit, error) {
   103  	if newLim == nil {
   104  		return t.ThreadGroup().Limits().Get(resource), nil
   105  	}
   106  
   107  	if _, ok := setableLimits[resource]; !ok {
   108  		return limits.Limit{}, linuxerr.EPERM
   109  	}
   110  
   111  	switch resource {
   112  	case limits.NumberOfFiles:
   113  		if newLim.Max > uint64(t.Kernel().MaxFDLimit.Load()) {
   114  			return limits.Limit{}, linuxerr.EPERM
   115  		}
   116  	}
   117  
   118  	// "A privileged process (under Linux: one with the CAP_SYS_RESOURCE
   119  	// capability in the initial user namespace) may make arbitrary changes
   120  	// to either limit value."
   121  	privileged := t.HasCapabilityIn(linux.CAP_SYS_RESOURCE, t.Kernel().RootUserNamespace())
   122  
   123  	oldLim, err := t.ThreadGroup().Limits().Set(resource, *newLim, privileged)
   124  	if err != nil {
   125  		return limits.Limit{}, err
   126  	}
   127  
   128  	if resource == limits.CPU {
   129  		t.NotifyRlimitCPUUpdated()
   130  	}
   131  	return oldLim, nil
   132  }
   133  
   134  // Getrlimit implements linux syscall getrlimit(2).
   135  func Getrlimit(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
   136  	resource, ok := limits.FromLinuxResource[int(args[0].Int())]
   137  	if !ok {
   138  		// Return err; unknown limit.
   139  		return 0, nil, linuxerr.EINVAL
   140  	}
   141  	addr := args[1].Pointer()
   142  	rlim, err := newRlimit(t)
   143  	if err != nil {
   144  		return 0, nil, err
   145  	}
   146  	lim, err := prlimit64(t, resource, nil)
   147  	if err != nil {
   148  		return 0, nil, err
   149  	}
   150  	rlim.fromLimit(lim)
   151  	_, err = rlim.CopyOut(t, addr)
   152  	return 0, nil, err
   153  }
   154  
   155  // Setrlimit implements linux syscall setrlimit(2).
   156  func Setrlimit(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
   157  	resource, ok := limits.FromLinuxResource[int(args[0].Int())]
   158  	if !ok {
   159  		// Return err; unknown limit.
   160  		return 0, nil, linuxerr.EINVAL
   161  	}
   162  	addr := args[1].Pointer()
   163  	rlim, err := newRlimit(t)
   164  	if err != nil {
   165  		return 0, nil, err
   166  	}
   167  	if _, err := rlim.CopyIn(t, addr); err != nil {
   168  		return 0, nil, linuxerr.EFAULT
   169  	}
   170  	_, err = prlimit64(t, resource, rlim.toLimit())
   171  	return 0, nil, err
   172  }
   173  
   174  // Prlimit64 implements linux syscall prlimit64(2).
   175  func Prlimit64(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
   176  	tid := kernel.ThreadID(args[0].Int())
   177  	resource, ok := limits.FromLinuxResource[int(args[1].Int())]
   178  	if !ok {
   179  		// Return err; unknown limit.
   180  		return 0, nil, linuxerr.EINVAL
   181  	}
   182  	newRlimAddr := args[2].Pointer()
   183  	oldRlimAddr := args[3].Pointer()
   184  
   185  	var newLim *limits.Limit
   186  	if newRlimAddr != 0 {
   187  		var nrl rlimit64
   188  		if err := nrl.copyIn(t, newRlimAddr); err != nil {
   189  			return 0, nil, linuxerr.EFAULT
   190  		}
   191  		newLim = nrl.toLimit()
   192  	}
   193  
   194  	if tid < 0 {
   195  		return 0, nil, linuxerr.EINVAL
   196  	}
   197  	ot := t
   198  	if tid > 0 {
   199  		if ot = t.PIDNamespace().TaskWithID(tid); ot == nil {
   200  			return 0, nil, linuxerr.ESRCH
   201  		}
   202  	}
   203  
   204  	// "To set or get the resources of a process other than itself, the caller
   205  	// must have the CAP_SYS_RESOURCE capability, or the real, effective, and
   206  	// saved set user IDs of the target process must match the real user ID of
   207  	// the caller and the real, effective, and saved set group IDs of the
   208  	// target process must match the real group ID of the caller."
   209  	if ot != t && !t.HasCapabilityIn(linux.CAP_SYS_RESOURCE, t.PIDNamespace().UserNamespace()) {
   210  		cred, tcred := t.Credentials(), ot.Credentials()
   211  		if cred.RealKUID != tcred.RealKUID ||
   212  			cred.RealKUID != tcred.EffectiveKUID ||
   213  			cred.RealKUID != tcred.SavedKUID ||
   214  			cred.RealKGID != tcred.RealKGID ||
   215  			cred.RealKGID != tcred.EffectiveKGID ||
   216  			cred.RealKGID != tcred.SavedKGID {
   217  			return 0, nil, linuxerr.EPERM
   218  		}
   219  	}
   220  
   221  	oldLim, err := prlimit64(ot, resource, newLim)
   222  	if err != nil {
   223  		return 0, nil, err
   224  	}
   225  
   226  	if oldRlimAddr != 0 {
   227  		if err := makeRlimit64(oldLim).copyOut(t, oldRlimAddr); err != nil {
   228  			return 0, nil, linuxerr.EFAULT
   229  		}
   230  	}
   231  
   232  	return 0, nil, nil
   233  }