github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/kernel/auth/user_namespace.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package auth
    16  
    17  import (
    18  	"math"
    19  
    20  	"github.com/metacubex/gvisor/pkg/errors/linuxerr"
    21  )
    22  
    23  // A UserNamespace represents a user namespace. See user_namespaces(7) for
    24  // details.
    25  //
    26  // +stateify savable
    27  type UserNamespace struct {
    28  	// parent is this namespace's parent. If this is the root namespace, parent
    29  	// is nil. The parent pointer is immutable.
    30  	parent *UserNamespace
    31  
    32  	// owner is the effective UID of the namespace's creator in the root
    33  	// namespace. owner is immutable.
    34  	owner KUID
    35  
    36  	// Keys is the set of keys in this namespace.
    37  	Keys KeySet
    38  
    39  	// mu protects the following fields.
    40  	//
    41  	// If mu will be locked in multiple UserNamespaces, it must be locked in
    42  	// descendant namespaces before ancestors.
    43  	mu userNamespaceMutex `state:"nosave"`
    44  
    45  	// Mappings of user/group IDs between this namespace and its parent.
    46  	//
    47  	// All ID maps, once set, cannot be changed. This means that successful
    48  	// UID/GID translations cannot be racy.
    49  	uidMapFromParent idMapSet
    50  	uidMapToParent   idMapSet
    51  	gidMapFromParent idMapSet
    52  	gidMapToParent   idMapSet
    53  
    54  	// TODO(b/27454212): Support disabling setgroups(2).
    55  }
    56  
    57  // NewRootUserNamespace returns a UserNamespace that is appropriate for a
    58  // system's root user namespace. Note that namespaces returned by separate calls
    59  // to this function are *distinct* namespaces. Once a root namespace is created
    60  // by this function, the returned value must be reused to refer to the same
    61  // namespace.
    62  func NewRootUserNamespace() *UserNamespace {
    63  	var ns UserNamespace
    64  	// """
    65  	// The initial user namespace has no parent namespace, but, for
    66  	// consistency, the kernel provides dummy user and group ID mapping files
    67  	// for this namespace. Looking at the uid_map file (gid_map is the same)
    68  	// from a shell in the initial namespace shows:
    69  	//
    70  	// $ cat /proc/$$/uid_map
    71  	// 0          0 4294967295
    72  	// """ - user_namespaces(7)
    73  	for _, m := range []*idMapSet{
    74  		&ns.uidMapFromParent,
    75  		&ns.uidMapToParent,
    76  		&ns.gidMapFromParent,
    77  		&ns.gidMapToParent,
    78  	} {
    79  		// Insertion into an empty map shouldn't fail.
    80  		m.InsertRange(idMapRange{0, math.MaxUint32}, 0)
    81  	}
    82  	return &ns
    83  }
    84  
    85  // Root returns the root of the user namespace tree containing ns.
    86  func (ns *UserNamespace) Root() *UserNamespace {
    87  	for ns.parent != nil {
    88  		ns = ns.parent
    89  	}
    90  	return ns
    91  }
    92  
    93  // "The kernel imposes (since version 3.11) a limit of 32 nested levels of user
    94  // namespaces." - user_namespaces(7)
    95  const maxUserNamespaceDepth = 32
    96  
    97  func (ns *UserNamespace) depth() int {
    98  	var i int
    99  	for ns != nil {
   100  		i++
   101  		ns = ns.parent
   102  	}
   103  	return i
   104  }
   105  
   106  // NewChildUserNamespace returns a new user namespace created by a caller with
   107  // credentials c.
   108  func (c *Credentials) NewChildUserNamespace() (*UserNamespace, error) {
   109  	if c.UserNamespace.depth() >= maxUserNamespaceDepth {
   110  		// "... Calls to unshare(2) or clone(2) that would cause this limit to
   111  		// be exceeded fail with the error EUSERS." - user_namespaces(7)
   112  		return nil, linuxerr.EUSERS
   113  	}
   114  	// "EPERM: CLONE_NEWUSER was specified in flags, but either the effective
   115  	// user ID or the effective group ID of the caller does not have a mapping
   116  	// in the parent namespace (see user_namespaces(7))." - clone(2)
   117  	// "CLONE_NEWUSER requires that the user ID and group ID of the calling
   118  	// process are mapped to user IDs and group IDs in the user namespace of
   119  	// the calling process at the time of the call." - unshare(2)
   120  	if !c.EffectiveKUID.In(c.UserNamespace).Ok() {
   121  		return nil, linuxerr.EPERM
   122  	}
   123  	if !c.EffectiveKGID.In(c.UserNamespace).Ok() {
   124  		return nil, linuxerr.EPERM
   125  	}
   126  	return &UserNamespace{
   127  		parent: c.UserNamespace,
   128  		owner:  c.EffectiveKUID,
   129  		// "When a user namespace is created, it starts without a mapping of
   130  		// user IDs (group IDs) to the parent user namespace." -
   131  		// user_namespaces(7)
   132  	}, nil
   133  }