github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/kernel/auth/capability_set.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package auth
    16  
    17  import (
    18  	"encoding/binary"
    19  	"fmt"
    20  
    21  	"github.com/metacubex/gvisor/pkg/abi/linux"
    22  	"github.com/metacubex/gvisor/pkg/bits"
    23  	"github.com/metacubex/gvisor/pkg/errors/linuxerr"
    24  )
    25  
    26  // A CapabilitySet is a set of capabilities implemented as a bitset. The zero
    27  // value of CapabilitySet is a set containing no capabilities.
    28  type CapabilitySet uint64
    29  
    30  // VfsCapData is equivalent to Linux's cpu_vfs_cap_data, defined
    31  // in Linux's include/linux/capability.h.
    32  type VfsCapData struct {
    33  	MagicEtc    uint32
    34  	RootID      uint32
    35  	Permitted   CapabilitySet
    36  	Inheritable CapabilitySet
    37  }
    38  
    39  // AllCapabilities is a CapabilitySet containing all valid capabilities.
    40  var AllCapabilities = CapabilitySetOf(linux.CAP_LAST_CAP+1) - 1
    41  
    42  // CapabilitySetOf returns a CapabilitySet containing only the given
    43  // capability.
    44  func CapabilitySetOf(cp linux.Capability) CapabilitySet {
    45  	return CapabilitySet(bits.MaskOf64(int(cp)))
    46  }
    47  
    48  // CapabilitySetOfMany returns a CapabilitySet containing the given capabilities.
    49  func CapabilitySetOfMany(cps []linux.Capability) CapabilitySet {
    50  	var cs uint64
    51  	for _, cp := range cps {
    52  		cs |= bits.MaskOf64(int(cp))
    53  	}
    54  	return CapabilitySet(cs)
    55  }
    56  
    57  // VfsCapDataOf returns a VfsCapData containing the file capabilities for the given slice of bytes.
    58  // For each field of the cap data, which are in the structure of either vfs_cap_data or vfs_ns_cap_data,
    59  // the bytes are ordered in little endian.
    60  func VfsCapDataOf(data []byte) (VfsCapData, error) {
    61  	var capData VfsCapData
    62  	size := len(data)
    63  	if size < linux.XATTR_CAPS_SZ_1 {
    64  		return capData, fmt.Errorf("the size of security.capability is too small, actual size: %v", size)
    65  	}
    66  	capData.MagicEtc = binary.LittleEndian.Uint32(data[:4])
    67  	capData.Permitted = CapabilitySet(binary.LittleEndian.Uint32(data[4:8]))
    68  	capData.Inheritable = CapabilitySet(binary.LittleEndian.Uint32(data[8:12]))
    69  	// The version of the file capabilities takes first 4 bytes of the given
    70  	// slice.
    71  	version := capData.MagicEtc & linux.VFS_CAP_REVISION_MASK
    72  	switch {
    73  	case version == linux.VFS_CAP_REVISION_3 && size >= linux.XATTR_CAPS_SZ_3:
    74  		// Like version 2 file capabilities, version 3 capability
    75  		// masks are 64 bits in size.  In addition, version 3 has
    76  		// the root user ID of namespace, which is encoded in the
    77  		// security.capability extended attribute.
    78  		capData.RootID = binary.LittleEndian.Uint32(data[20:24])
    79  		fallthrough
    80  	case version == linux.VFS_CAP_REVISION_2 && size >= linux.XATTR_CAPS_SZ_2:
    81  		capData.Permitted += CapabilitySet(binary.LittleEndian.Uint32(data[12:16])) << 32
    82  		capData.Inheritable += CapabilitySet(binary.LittleEndian.Uint32(data[16:20])) << 32
    83  	default:
    84  		return VfsCapData{}, fmt.Errorf("VFS_CAP_REVISION_%v with cap data size %v is not supported", version, size)
    85  	}
    86  	return capData, nil
    87  }
    88  
    89  // CapsFromVfsCaps returns a copy of the given creds with new capability sets
    90  // by applying the file capability that is specified by capData.
    91  func CapsFromVfsCaps(capData VfsCapData, creds *Credentials) (*Credentials, error) {
    92  	// If the real or effective user ID of the process is root,
    93  	// the file inheritable and permitted sets are ignored from
    94  	// `Capabilities and execution of programs by root` at capabilities(7).
    95  	if root := creds.UserNamespace.MapToKUID(RootUID); creds.EffectiveKUID == root || creds.RealKUID == root {
    96  		return creds, nil
    97  	}
    98  	// The credentials object is immutable.
    99  	newCreds := creds.Fork()
   100  	effective := (capData.MagicEtc & linux.VFS_CAP_FLAGS_EFFECTIVE) > 0
   101  	newCreds.PermittedCaps = (capData.Permitted & creds.BoundingCaps) |
   102  		(capData.Inheritable & creds.InheritableCaps)
   103  	// P'(effective) = effective ? P'(permitted) : P'(ambient).
   104  	// The ambient capabilities has not supported yet in gVisor,
   105  	// set effective capabilities to 0 when effective bit is false.
   106  	newCreds.EffectiveCaps = 0
   107  	if effective {
   108  		newCreds.EffectiveCaps = newCreds.PermittedCaps
   109  	}
   110  	// Insufficient to execute correctly.
   111  	if (capData.Permitted & ^newCreds.PermittedCaps) != 0 {
   112  		return nil, linuxerr.EPERM
   113  	}
   114  	return newCreds, nil
   115  }
   116  
   117  // TaskCapabilities represents all the capability sets for a task. Each of these
   118  // sets is explained in greater detail in capabilities(7).
   119  type TaskCapabilities struct {
   120  	// Permitted is a limiting superset for the effective capabilities that
   121  	// the thread may assume.
   122  	PermittedCaps CapabilitySet
   123  	// Inheritable is a set of capabilities preserved across an execve(2).
   124  	InheritableCaps CapabilitySet
   125  	// Effective is the set of capabilities used by the kernel to perform
   126  	// permission checks for the thread.
   127  	EffectiveCaps CapabilitySet
   128  	// Bounding is a limiting superset for the capabilities that a thread
   129  	// can add to its inheritable set using capset(2).
   130  	BoundingCaps CapabilitySet
   131  	// Ambient is a set of capabilities that are preserved across an
   132  	// execve(2) of a program that is not privileged.
   133  	AmbientCaps CapabilitySet
   134  }