github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/kernel/task_identity.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package kernel
    16  
    17  import (
    18  	"github.com/nicocha30/gvisor-ligolo/pkg/abi/linux"
    19  	"github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr"
    20  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel/auth"
    21  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/mm"
    22  )
    23  
    24  // Credentials returns t's credentials.
    25  //
    26  // This value must be considered immutable.
    27  func (t *Task) Credentials() *auth.Credentials {
    28  	return t.creds.Load()
    29  }
    30  
    31  // UserNamespace returns the user namespace associated with the task.
    32  func (t *Task) UserNamespace() *auth.UserNamespace {
    33  	return t.Credentials().UserNamespace
    34  }
    35  
    36  // HasCapabilityIn checks if the task has capability cp in user namespace ns.
    37  func (t *Task) HasCapabilityIn(cp linux.Capability, ns *auth.UserNamespace) bool {
    38  	return t.Credentials().HasCapabilityIn(cp, ns)
    39  }
    40  
    41  // HasCapability checks if the task has capability cp in its user namespace.
    42  func (t *Task) HasCapability(cp linux.Capability) bool {
    43  	return t.Credentials().HasCapability(cp)
    44  }
    45  
    46  // SetUID implements the semantics of setuid(2).
    47  func (t *Task) SetUID(uid auth.UID) error {
    48  	// setuid considers -1 to be invalid.
    49  	if !uid.Ok() {
    50  		return linuxerr.EINVAL
    51  	}
    52  
    53  	t.mu.Lock()
    54  	defer t.mu.Unlock()
    55  
    56  	creds := t.Credentials()
    57  	kuid := creds.UserNamespace.MapToKUID(uid)
    58  	if !kuid.Ok() {
    59  		return linuxerr.EINVAL
    60  	}
    61  	// "setuid() sets the effective user ID of the calling process. If the
    62  	// effective UID of the caller is root (more precisely: if the caller has
    63  	// the CAP_SETUID capability), the real UID and saved set-user-ID are also
    64  	// set." - setuid(2)
    65  	if creds.HasCapability(linux.CAP_SETUID) {
    66  		t.setKUIDsUncheckedLocked(kuid, kuid, kuid)
    67  		return nil
    68  	}
    69  	// "EPERM: The user is not privileged (Linux: does not have the CAP_SETUID
    70  	// capability) and uid does not match the real UID or saved set-user-ID of
    71  	// the calling process."
    72  	if kuid != creds.RealKUID && kuid != creds.SavedKUID {
    73  		return linuxerr.EPERM
    74  	}
    75  	t.setKUIDsUncheckedLocked(creds.RealKUID, kuid, creds.SavedKUID)
    76  	return nil
    77  }
    78  
    79  // SetREUID implements the semantics of setreuid(2).
    80  func (t *Task) SetREUID(r, e auth.UID) error {
    81  	t.mu.Lock()
    82  	defer t.mu.Unlock()
    83  	// "Supplying a value of -1 for either the real or effective user ID forces
    84  	// the system to leave that ID unchanged." - setreuid(2)
    85  	creds := t.Credentials()
    86  	newR := creds.RealKUID
    87  	if r.Ok() {
    88  		newR = creds.UserNamespace.MapToKUID(r)
    89  		if !newR.Ok() {
    90  			return linuxerr.EINVAL
    91  		}
    92  	}
    93  	newE := creds.EffectiveKUID
    94  	if e.Ok() {
    95  		newE = creds.UserNamespace.MapToKUID(e)
    96  		if !newE.Ok() {
    97  			return linuxerr.EINVAL
    98  		}
    99  	}
   100  	if !creds.HasCapability(linux.CAP_SETUID) {
   101  		// "Unprivileged processes may only set the effective user ID to the
   102  		// real user ID, the effective user ID, or the saved set-user-ID."
   103  		if newE != creds.RealKUID && newE != creds.EffectiveKUID && newE != creds.SavedKUID {
   104  			return linuxerr.EPERM
   105  		}
   106  		// "Unprivileged users may only set the real user ID to the real user
   107  		// ID or the effective user ID."
   108  		if newR != creds.RealKUID && newR != creds.EffectiveKUID {
   109  			return linuxerr.EPERM
   110  		}
   111  	}
   112  	// "If the real user ID is set (i.e., ruid is not -1) or the effective user
   113  	// ID is set to a value not equal to the previous real user ID, the saved
   114  	// set-user-ID will be set to the new effective user ID."
   115  	newS := creds.SavedKUID
   116  	if r.Ok() || (e.Ok() && newE != creds.EffectiveKUID) {
   117  		newS = newE
   118  	}
   119  	t.setKUIDsUncheckedLocked(newR, newE, newS)
   120  	return nil
   121  }
   122  
   123  // SetRESUID implements the semantics of the setresuid(2) syscall.
   124  func (t *Task) SetRESUID(r, e, s auth.UID) error {
   125  	t.mu.Lock()
   126  	defer t.mu.Unlock()
   127  	// "Unprivileged user processes may change the real UID, effective UID, and
   128  	// saved set-user-ID, each to one of: the current real UID, the current
   129  	// effective UID or the current saved set-user-ID. Privileged processes (on
   130  	// Linux, those having the CAP_SETUID capability) may set the real UID,
   131  	// effective UID, and saved set-user-ID to arbitrary values. If one of the
   132  	// arguments equals -1, the corresponding value is not changed." -
   133  	// setresuid(2)
   134  	var err error
   135  	creds := t.Credentials()
   136  	newR := creds.RealKUID
   137  	if r.Ok() {
   138  		newR, err = creds.UseUID(r)
   139  		if err != nil {
   140  			return err
   141  		}
   142  	}
   143  	newE := creds.EffectiveKUID
   144  	if e.Ok() {
   145  		newE, err = creds.UseUID(e)
   146  		if err != nil {
   147  			return err
   148  		}
   149  	}
   150  	newS := creds.SavedKUID
   151  	if s.Ok() {
   152  		newS, err = creds.UseUID(s)
   153  		if err != nil {
   154  			return err
   155  		}
   156  	}
   157  	t.setKUIDsUncheckedLocked(newR, newE, newS)
   158  	return nil
   159  }
   160  
   161  // Preconditions: t.mu must be locked.
   162  func (t *Task) setKUIDsUncheckedLocked(newR, newE, newS auth.KUID) {
   163  	creds := t.Credentials().Fork() // The credentials object is immutable. See doc for creds.
   164  	root := creds.UserNamespace.MapToKUID(auth.RootUID)
   165  	oldR, oldE, oldS := creds.RealKUID, creds.EffectiveKUID, creds.SavedKUID
   166  	creds.RealKUID, creds.EffectiveKUID, creds.SavedKUID = newR, newE, newS
   167  
   168  	// "1. If one or more of the real, effective or saved set user IDs was
   169  	// previously 0, and as a result of the UID changes all of these IDs have a
   170  	// nonzero value, then all capabilities are cleared from the permitted and
   171  	// effective capability sets." - capabilities(7)
   172  	if (oldR == root || oldE == root || oldS == root) && (newR != root && newE != root && newS != root) {
   173  		// prctl(2): "PR_SET_KEEPCAP: Set the state of the calling thread's
   174  		// "keep capabilities" flag, which determines whether the thread's permitted
   175  		// capability set is cleared when a change is made to the
   176  		// thread's user IDs such that the thread's real UID, effective
   177  		// UID, and saved set-user-ID all become nonzero when at least
   178  		// one of them previously had the value 0.  By default, the
   179  		// permitted capability set is cleared when such a change is
   180  		// made; setting the "keep capabilities" flag prevents it from
   181  		// being cleared." (A thread's effective capability set is always
   182  		// cleared when such a credential change is made,
   183  		// regardless of the setting of the "keep capabilities" flag.)
   184  		if !creds.KeepCaps {
   185  			creds.PermittedCaps = 0
   186  			creds.EffectiveCaps = 0
   187  		}
   188  	}
   189  	// """
   190  	// 2. If the effective user ID is changed from 0 to nonzero, then all
   191  	// capabilities are cleared from the effective set.
   192  	//
   193  	// 3. If the effective user ID is changed from nonzero to 0, then the
   194  	// permitted set is copied to the effective set.
   195  	// """
   196  	if oldE == root && newE != root {
   197  		creds.EffectiveCaps = 0
   198  	} else if oldE != root && newE == root {
   199  		creds.EffectiveCaps = creds.PermittedCaps
   200  	}
   201  	// "4. If the filesystem user ID is changed from 0 to nonzero (see
   202  	// setfsuid(2)), then the following capabilities are cleared from the
   203  	// effective set: ..."
   204  	// (filesystem UIDs aren't implemented, nor are any of the capabilities in
   205  	// question)
   206  
   207  	if oldE != newE {
   208  		// "[dumpability] is reset to the current value contained in
   209  		// the file /proc/sys/fs/suid_dumpable (which by default has
   210  		// the value 0), in the following circumstances: The process's
   211  		// effective user or group ID is changed." - prctl(2)
   212  		//
   213  		// (suid_dumpable isn't implemented, so we just use the
   214  		// default.
   215  		t.MemoryManager().SetDumpability(mm.NotDumpable)
   216  
   217  		// Not documented, but compare Linux's kernel/cred.c:commit_creds().
   218  		t.parentDeathSignal = 0
   219  	}
   220  	t.creds.Store(creds)
   221  }
   222  
   223  // SetGID implements the semantics of setgid(2).
   224  func (t *Task) SetGID(gid auth.GID) error {
   225  	if !gid.Ok() {
   226  		return linuxerr.EINVAL
   227  	}
   228  
   229  	t.mu.Lock()
   230  	defer t.mu.Unlock()
   231  
   232  	creds := t.Credentials()
   233  	kgid := creds.UserNamespace.MapToKGID(gid)
   234  	if !kgid.Ok() {
   235  		return linuxerr.EINVAL
   236  	}
   237  	if creds.HasCapability(linux.CAP_SETGID) {
   238  		t.setKGIDsUncheckedLocked(kgid, kgid, kgid)
   239  		return nil
   240  	}
   241  	if kgid != creds.RealKGID && kgid != creds.SavedKGID {
   242  		return linuxerr.EPERM
   243  	}
   244  	t.setKGIDsUncheckedLocked(creds.RealKGID, kgid, creds.SavedKGID)
   245  	return nil
   246  }
   247  
   248  // SetREGID implements the semantics of setregid(2).
   249  func (t *Task) SetREGID(r, e auth.GID) error {
   250  	t.mu.Lock()
   251  	defer t.mu.Unlock()
   252  
   253  	creds := t.Credentials()
   254  	newR := creds.RealKGID
   255  	if r.Ok() {
   256  		newR = creds.UserNamespace.MapToKGID(r)
   257  		if !newR.Ok() {
   258  			return linuxerr.EINVAL
   259  		}
   260  	}
   261  	newE := creds.EffectiveKGID
   262  	if e.Ok() {
   263  		newE = creds.UserNamespace.MapToKGID(e)
   264  		if !newE.Ok() {
   265  			return linuxerr.EINVAL
   266  		}
   267  	}
   268  	if !creds.HasCapability(linux.CAP_SETGID) {
   269  		if newE != creds.RealKGID && newE != creds.EffectiveKGID && newE != creds.SavedKGID {
   270  			return linuxerr.EPERM
   271  		}
   272  		if newR != creds.RealKGID && newR != creds.EffectiveKGID {
   273  			return linuxerr.EPERM
   274  		}
   275  	}
   276  	newS := creds.SavedKGID
   277  	if r.Ok() || (e.Ok() && newE != creds.EffectiveKGID) {
   278  		newS = newE
   279  	}
   280  	t.setKGIDsUncheckedLocked(newR, newE, newS)
   281  	return nil
   282  }
   283  
   284  // SetRESGID implements the semantics of the setresgid(2) syscall.
   285  func (t *Task) SetRESGID(r, e, s auth.GID) error {
   286  	var err error
   287  
   288  	t.mu.Lock()
   289  	defer t.mu.Unlock()
   290  
   291  	creds := t.Credentials()
   292  	newR := creds.RealKGID
   293  	if r.Ok() {
   294  		newR, err = creds.UseGID(r)
   295  		if err != nil {
   296  			return err
   297  		}
   298  	}
   299  	newE := creds.EffectiveKGID
   300  	if e.Ok() {
   301  		newE, err = creds.UseGID(e)
   302  		if err != nil {
   303  			return err
   304  		}
   305  	}
   306  	newS := creds.SavedKGID
   307  	if s.Ok() {
   308  		newS, err = creds.UseGID(s)
   309  		if err != nil {
   310  			return err
   311  		}
   312  	}
   313  	t.setKGIDsUncheckedLocked(newR, newE, newS)
   314  	return nil
   315  }
   316  
   317  func (t *Task) setKGIDsUncheckedLocked(newR, newE, newS auth.KGID) {
   318  	creds := t.Credentials().Fork() // The credentials object is immutable. See doc for creds.
   319  	oldE := creds.EffectiveKGID
   320  	creds.RealKGID, creds.EffectiveKGID, creds.SavedKGID = newR, newE, newS
   321  
   322  	if oldE != newE {
   323  		// "[dumpability] is reset to the current value contained in
   324  		// the file /proc/sys/fs/suid_dumpable (which by default has
   325  		// the value 0), in the following circumstances: The process's
   326  		// effective user or group ID is changed." - prctl(2)
   327  		//
   328  		// (suid_dumpable isn't implemented, so we just use the
   329  		// default.
   330  		t.MemoryManager().SetDumpability(mm.NotDumpable)
   331  
   332  		// Not documented, but compare Linux's
   333  		// kernel/cred.c:commit_creds().
   334  		t.parentDeathSignal = 0
   335  	}
   336  	t.creds.Store(creds)
   337  }
   338  
   339  // SetExtraGIDs attempts to change t's supplemental groups. All IDs are
   340  // interpreted as being in t's user namespace.
   341  func (t *Task) SetExtraGIDs(gids []auth.GID) error {
   342  	t.mu.Lock()
   343  	defer t.mu.Unlock()
   344  	creds := t.Credentials()
   345  	if !creds.HasCapability(linux.CAP_SETGID) {
   346  		return linuxerr.EPERM
   347  	}
   348  	kgids := make([]auth.KGID, len(gids))
   349  	for i, gid := range gids {
   350  		kgid := creds.UserNamespace.MapToKGID(gid)
   351  		if !kgid.Ok() {
   352  			return linuxerr.EINVAL
   353  		}
   354  		kgids[i] = kgid
   355  	}
   356  	creds = creds.Fork() // The credentials object is immutable. See doc for creds.
   357  	creds.ExtraKGIDs = kgids
   358  	t.creds.Store(creds)
   359  	return nil
   360  }
   361  
   362  // SetCapabilitySets attempts to change t's permitted, inheritable, and
   363  // effective capability sets.
   364  func (t *Task) SetCapabilitySets(permitted, inheritable, effective auth.CapabilitySet) error {
   365  	t.mu.Lock()
   366  	defer t.mu.Unlock()
   367  	// "Permitted: This is a limiting superset for the effective capabilities
   368  	// that the thread may assume." - capabilities(7)
   369  	if effective & ^permitted != 0 {
   370  		return linuxerr.EPERM
   371  	}
   372  	creds := t.Credentials()
   373  	// "It is also a limiting superset for the capabilities that may be added
   374  	// to the inheritable set by a thread that does not have the CAP_SETPCAP
   375  	// capability in its effective set."
   376  	if !creds.HasCapability(linux.CAP_SETPCAP) && (inheritable & ^(creds.InheritableCaps|creds.PermittedCaps) != 0) {
   377  		return linuxerr.EPERM
   378  	}
   379  	// "If a thread drops a capability from its permitted set, it can never
   380  	// reacquire that capability (unless it execve(2)s ..."
   381  	if permitted & ^creds.PermittedCaps != 0 {
   382  		return linuxerr.EPERM
   383  	}
   384  	// "... if a capability is not in the bounding set, then a thread can't add
   385  	// this capability to its inheritable set, even if it was in its permitted
   386  	// capabilities ..."
   387  	if inheritable & ^(creds.InheritableCaps|creds.BoundingCaps) != 0 {
   388  		return linuxerr.EPERM
   389  	}
   390  	creds = creds.Fork() // The credentials object is immutable. See doc for creds.
   391  	creds.PermittedCaps = permitted
   392  	creds.InheritableCaps = inheritable
   393  	creds.EffectiveCaps = effective
   394  	t.creds.Store(creds)
   395  	return nil
   396  }
   397  
   398  // DropBoundingCapability attempts to drop capability cp from t's capability
   399  // bounding set.
   400  func (t *Task) DropBoundingCapability(cp linux.Capability) error {
   401  	t.mu.Lock()
   402  	defer t.mu.Unlock()
   403  	creds := t.Credentials()
   404  	if !creds.HasCapability(linux.CAP_SETPCAP) {
   405  		return linuxerr.EPERM
   406  	}
   407  	creds = creds.Fork() // The credentials object is immutable. See doc for creds.
   408  	creds.BoundingCaps &^= auth.CapabilitySetOf(cp)
   409  	t.creds.Store(creds)
   410  	return nil
   411  }
   412  
   413  // SetUserNamespace attempts to move c into ns.
   414  func (t *Task) SetUserNamespace(ns *auth.UserNamespace) error {
   415  	t.mu.Lock()
   416  	defer t.mu.Unlock()
   417  
   418  	creds := t.Credentials()
   419  	// "A process reassociating itself with a user namespace must have the
   420  	// CAP_SYS_ADMIN capability in the target user namespace." - setns(2)
   421  	//
   422  	// If t just created ns, then t.creds is guaranteed to have CAP_SYS_ADMIN
   423  	// in ns (by rule 3 in auth.Credentials.HasCapability).
   424  	if !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, ns) {
   425  		return linuxerr.EPERM
   426  	}
   427  
   428  	creds = creds.Fork() // The credentials object is immutable. See doc for creds.
   429  	creds.UserNamespace = ns
   430  	// "The child process created by clone(2) with the CLONE_NEWUSER flag
   431  	// starts out with a complete set of capabilities in the new user
   432  	// namespace. Likewise, a process that creates a new user namespace using
   433  	// unshare(2) or joins an existing user namespace using setns(2) gains a
   434  	// full set of capabilities in that namespace."
   435  	creds.PermittedCaps = auth.AllCapabilities
   436  	creds.InheritableCaps = 0
   437  	creds.EffectiveCaps = auth.AllCapabilities
   438  	creds.BoundingCaps = auth.AllCapabilities
   439  	// "A call to clone(2), unshare(2), or setns(2) using the CLONE_NEWUSER
   440  	// flag sets the "securebits" flags (see capabilities(7)) to their default
   441  	// values (all flags disabled) in the child (for clone(2)) or caller (for
   442  	// unshare(2), or setns(2)." - user_namespaces(7)
   443  	creds.KeepCaps = false
   444  	t.creds.Store(creds)
   445  
   446  	return nil
   447  }
   448  
   449  // SetKeepCaps will set the keep capabilities flag PR_SET_KEEPCAPS.
   450  func (t *Task) SetKeepCaps(k bool) {
   451  	t.mu.Lock()
   452  	defer t.mu.Unlock()
   453  	creds := t.Credentials().Fork() // The credentials object is immutable. See doc for creds.
   454  	creds.KeepCaps = k
   455  	t.creds.Store(creds)
   456  }
   457  
   458  // updateCredsForExecLocked updates t.creds to reflect an execve().
   459  //
   460  // NOTE(b/30815691): We currently do not implement privileged executables
   461  // (set-user/group-ID bits and file capabilities). This allows us to make a lot
   462  // of simplifying assumptions:
   463  //
   464  //   - We assume the no_new_privs bit (set by prctl(SET_NO_NEW_PRIVS)), which
   465  //     disables the features we don't support anyway, is always set. This
   466  //     drastically simplifies this function.
   467  //
   468  //   - We don't set AT_SECURE = 1, because no_new_privs always being set means
   469  //     that the conditions that require AT_SECURE = 1 never arise. (Compare Linux's
   470  //     security/commoncap.c:cap_bprm_set_creds() and cap_bprm_secureexec().)
   471  //
   472  //   - We don't check for CAP_SYS_ADMIN in prctl(PR_SET_SECCOMP), since
   473  //     seccomp-bpf is also allowed if the task has no_new_privs set.
   474  //
   475  //   - Task.ptraceAttach does not serialize with execve as it does in Linux,
   476  //     since no_new_privs being set has the same effect as the presence of an
   477  //     unprivileged tracer.
   478  //
   479  // Preconditions: t.mu must be locked.
   480  func (t *Task) updateCredsForExecLocked() {
   481  	// """
   482  	// During an execve(2), the kernel calculates the new capabilities of
   483  	// the process using the following algorithm:
   484  	//
   485  	//     P'(permitted) = (P(inheritable) & F(inheritable)) |
   486  	//                     (F(permitted) & cap_bset)
   487  	//
   488  	//     P'(effective) = F(effective) ? P'(permitted) : 0
   489  	//
   490  	//     P'(inheritable) = P(inheritable)    [i.e., unchanged]
   491  	//
   492  	// where:
   493  	//
   494  	//     P         denotes the value of a thread capability set before the
   495  	//               execve(2)
   496  	//
   497  	//     P'        denotes the value of a thread capability set after the
   498  	//               execve(2)
   499  	//
   500  	//     F         denotes a file capability set
   501  	//
   502  	//     cap_bset  is the value of the capability bounding set
   503  	//
   504  	// ...
   505  	//
   506  	// In order to provide an all-powerful root using capability sets, during
   507  	// an execve(2):
   508  	//
   509  	// 1. If a set-user-ID-root program is being executed, or the real user ID
   510  	// of the process is 0 (root) then the file inheritable and permitted sets
   511  	// are defined to be all ones (i.e. all capabilities enabled).
   512  	//
   513  	// 2. If a set-user-ID-root program is being executed, then the file
   514  	// effective bit is defined to be one (enabled).
   515  	//
   516  	// The upshot of the above rules, combined with the capabilities
   517  	// transformations described above, is that when a process execve(2)s a
   518  	// set-user-ID-root program, or when a process with an effective UID of 0
   519  	// execve(2)s a program, it gains all capabilities in its permitted and
   520  	// effective capability sets, except those masked out by the capability
   521  	// bounding set.
   522  	// """ - capabilities(7)
   523  	// (ambient capability sets omitted)
   524  	//
   525  	// As the last paragraph implies, the case of "a set-user-ID root program
   526  	// is being executed" also includes the case where (namespace) root is
   527  	// executing a non-set-user-ID program; the actual check is just based on
   528  	// the effective user ID.
   529  	var newPermitted auth.CapabilitySet // since F(inheritable) == F(permitted) == 0
   530  	fileEffective := false
   531  	creds := t.Credentials()
   532  	root := creds.UserNamespace.MapToKUID(auth.RootUID)
   533  	if creds.EffectiveKUID == root || creds.RealKUID == root {
   534  		newPermitted = creds.InheritableCaps | creds.BoundingCaps
   535  		if creds.EffectiveKUID == root {
   536  			fileEffective = true
   537  		}
   538  	}
   539  
   540  	creds = creds.Fork() // The credentials object is immutable. See doc for creds.
   541  
   542  	// Now we enter poorly-documented, somewhat confusing territory. (The
   543  	// accompanying comment in Linux's security/commoncap.c:cap_bprm_set_creds
   544  	// is not very helpful.) My reading of it is:
   545  	//
   546  	// If at least one of the following is true:
   547  	//
   548  	// A1. The execing task is ptraced, and the tracer did not have
   549  	// CAP_SYS_PTRACE in the execing task's user namespace at the time of
   550  	// PTRACE_ATTACH.
   551  	//
   552  	// A2. The execing task shares its FS context with at least one task in
   553  	// another thread group.
   554  	//
   555  	// A3. The execing task has no_new_privs set.
   556  	//
   557  	// AND at least one of the following is true:
   558  	//
   559  	// B1. The new effective user ID (which may come from set-user-ID, or be the
   560  	// execing task's existing effective user ID) is not equal to the task's
   561  	// real UID.
   562  	//
   563  	// B2. The new effective group ID (which may come from set-group-ID, or be
   564  	// the execing task's existing effective group ID) is not equal to the
   565  	// task's real GID.
   566  	//
   567  	// B3. The new permitted capability set contains capabilities not in the
   568  	// task's permitted capability set.
   569  	//
   570  	// Then:
   571  	//
   572  	// C1. Limit the new permitted capability set to the task's permitted
   573  	// capability set.
   574  	//
   575  	// C2. If either the task does not have CAP_SETUID in its user namespace, or
   576  	// the task has no_new_privs set, force the new effective UID and GID to
   577  	// the task's real UID and GID.
   578  	//
   579  	// But since no_new_privs is always set (A3 is always true), this becomes
   580  	// much simpler. If B1 and B2 are false, C2 is a no-op. If B3 is false, C1
   581  	// is a no-op. So we can just do C1 and C2 unconditionally.
   582  	if creds.EffectiveKUID != creds.RealKUID || creds.EffectiveKGID != creds.RealKGID {
   583  		creds.EffectiveKUID = creds.RealKUID
   584  		creds.EffectiveKGID = creds.RealKGID
   585  		t.parentDeathSignal = 0
   586  	}
   587  	// (Saved set-user-ID is always set to the new effective user ID, and saved
   588  	// set-group-ID is always set to the new effective group ID, regardless of
   589  	// the above.)
   590  	creds.SavedKUID = creds.RealKUID
   591  	creds.SavedKGID = creds.RealKGID
   592  	creds.PermittedCaps &= newPermitted
   593  	if fileEffective {
   594  		creds.EffectiveCaps = creds.PermittedCaps
   595  	} else {
   596  		creds.EffectiveCaps = 0
   597  	}
   598  
   599  	// prctl(2): The "keep capabilities" value will be reset to 0 on subsequent
   600  	// calls to execve(2).
   601  	creds.KeepCaps = false
   602  
   603  	// "The bounding set is inherited at fork(2) from the thread's parent, and
   604  	// is preserved across an execve(2)". So we're done.
   605  	t.creds.Store(creds)
   606  }