github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/kernel/task_identity.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package kernel 16 17 import ( 18 "github.com/MerlinKodo/gvisor/pkg/abi/linux" 19 "github.com/MerlinKodo/gvisor/pkg/errors/linuxerr" 20 "github.com/MerlinKodo/gvisor/pkg/sentry/kernel/auth" 21 "github.com/MerlinKodo/gvisor/pkg/sentry/mm" 22 ) 23 24 // Credentials returns t's credentials. 25 // 26 // This value must be considered immutable. 27 func (t *Task) Credentials() *auth.Credentials { 28 return t.creds.Load() 29 } 30 31 // UserNamespace returns the user namespace associated with the task. 32 func (t *Task) UserNamespace() *auth.UserNamespace { 33 return t.Credentials().UserNamespace 34 } 35 36 // HasCapabilityIn checks if the task has capability cp in user namespace ns. 37 func (t *Task) HasCapabilityIn(cp linux.Capability, ns *auth.UserNamespace) bool { 38 return t.Credentials().HasCapabilityIn(cp, ns) 39 } 40 41 // HasCapability checks if the task has capability cp in its user namespace. 42 func (t *Task) HasCapability(cp linux.Capability) bool { 43 return t.Credentials().HasCapability(cp) 44 } 45 46 // SetUID implements the semantics of setuid(2). 47 func (t *Task) SetUID(uid auth.UID) error { 48 // setuid considers -1 to be invalid. 49 if !uid.Ok() { 50 return linuxerr.EINVAL 51 } 52 53 t.mu.Lock() 54 defer t.mu.Unlock() 55 56 creds := t.Credentials() 57 kuid := creds.UserNamespace.MapToKUID(uid) 58 if !kuid.Ok() { 59 return linuxerr.EINVAL 60 } 61 // "setuid() sets the effective user ID of the calling process. If the 62 // effective UID of the caller is root (more precisely: if the caller has 63 // the CAP_SETUID capability), the real UID and saved set-user-ID are also 64 // set." - setuid(2) 65 if creds.HasCapability(linux.CAP_SETUID) { 66 t.setKUIDsUncheckedLocked(kuid, kuid, kuid) 67 return nil 68 } 69 // "EPERM: The user is not privileged (Linux: does not have the CAP_SETUID 70 // capability) and uid does not match the real UID or saved set-user-ID of 71 // the calling process." 72 if kuid != creds.RealKUID && kuid != creds.SavedKUID { 73 return linuxerr.EPERM 74 } 75 t.setKUIDsUncheckedLocked(creds.RealKUID, kuid, creds.SavedKUID) 76 return nil 77 } 78 79 // SetREUID implements the semantics of setreuid(2). 80 func (t *Task) SetREUID(r, e auth.UID) error { 81 t.mu.Lock() 82 defer t.mu.Unlock() 83 // "Supplying a value of -1 for either the real or effective user ID forces 84 // the system to leave that ID unchanged." - setreuid(2) 85 creds := t.Credentials() 86 newR := creds.RealKUID 87 if r.Ok() { 88 newR = creds.UserNamespace.MapToKUID(r) 89 if !newR.Ok() { 90 return linuxerr.EINVAL 91 } 92 } 93 newE := creds.EffectiveKUID 94 if e.Ok() { 95 newE = creds.UserNamespace.MapToKUID(e) 96 if !newE.Ok() { 97 return linuxerr.EINVAL 98 } 99 } 100 if !creds.HasCapability(linux.CAP_SETUID) { 101 // "Unprivileged processes may only set the effective user ID to the 102 // real user ID, the effective user ID, or the saved set-user-ID." 103 if newE != creds.RealKUID && newE != creds.EffectiveKUID && newE != creds.SavedKUID { 104 return linuxerr.EPERM 105 } 106 // "Unprivileged users may only set the real user ID to the real user 107 // ID or the effective user ID." 108 if newR != creds.RealKUID && newR != creds.EffectiveKUID { 109 return linuxerr.EPERM 110 } 111 } 112 // "If the real user ID is set (i.e., ruid is not -1) or the effective user 113 // ID is set to a value not equal to the previous real user ID, the saved 114 // set-user-ID will be set to the new effective user ID." 115 newS := creds.SavedKUID 116 if r.Ok() || (e.Ok() && newE != creds.EffectiveKUID) { 117 newS = newE 118 } 119 t.setKUIDsUncheckedLocked(newR, newE, newS) 120 return nil 121 } 122 123 // SetRESUID implements the semantics of the setresuid(2) syscall. 124 func (t *Task) SetRESUID(r, e, s auth.UID) error { 125 t.mu.Lock() 126 defer t.mu.Unlock() 127 // "Unprivileged user processes may change the real UID, effective UID, and 128 // saved set-user-ID, each to one of: the current real UID, the current 129 // effective UID or the current saved set-user-ID. Privileged processes (on 130 // Linux, those having the CAP_SETUID capability) may set the real UID, 131 // effective UID, and saved set-user-ID to arbitrary values. If one of the 132 // arguments equals -1, the corresponding value is not changed." - 133 // setresuid(2) 134 var err error 135 creds := t.Credentials() 136 newR := creds.RealKUID 137 if r.Ok() { 138 newR, err = creds.UseUID(r) 139 if err != nil { 140 return err 141 } 142 } 143 newE := creds.EffectiveKUID 144 if e.Ok() { 145 newE, err = creds.UseUID(e) 146 if err != nil { 147 return err 148 } 149 } 150 newS := creds.SavedKUID 151 if s.Ok() { 152 newS, err = creds.UseUID(s) 153 if err != nil { 154 return err 155 } 156 } 157 t.setKUIDsUncheckedLocked(newR, newE, newS) 158 return nil 159 } 160 161 // Preconditions: t.mu must be locked. 162 func (t *Task) setKUIDsUncheckedLocked(newR, newE, newS auth.KUID) { 163 creds := t.Credentials().Fork() // The credentials object is immutable. See doc for creds. 164 root := creds.UserNamespace.MapToKUID(auth.RootUID) 165 oldR, oldE, oldS := creds.RealKUID, creds.EffectiveKUID, creds.SavedKUID 166 creds.RealKUID, creds.EffectiveKUID, creds.SavedKUID = newR, newE, newS 167 168 // "1. If one or more of the real, effective or saved set user IDs was 169 // previously 0, and as a result of the UID changes all of these IDs have a 170 // nonzero value, then all capabilities are cleared from the permitted and 171 // effective capability sets." - capabilities(7) 172 if (oldR == root || oldE == root || oldS == root) && (newR != root && newE != root && newS != root) { 173 // prctl(2): "PR_SET_KEEPCAP: Set the state of the calling thread's 174 // "keep capabilities" flag, which determines whether the thread's permitted 175 // capability set is cleared when a change is made to the 176 // thread's user IDs such that the thread's real UID, effective 177 // UID, and saved set-user-ID all become nonzero when at least 178 // one of them previously had the value 0. By default, the 179 // permitted capability set is cleared when such a change is 180 // made; setting the "keep capabilities" flag prevents it from 181 // being cleared." (A thread's effective capability set is always 182 // cleared when such a credential change is made, 183 // regardless of the setting of the "keep capabilities" flag.) 184 if !creds.KeepCaps { 185 creds.PermittedCaps = 0 186 creds.EffectiveCaps = 0 187 } 188 } 189 // """ 190 // 2. If the effective user ID is changed from 0 to nonzero, then all 191 // capabilities are cleared from the effective set. 192 // 193 // 3. If the effective user ID is changed from nonzero to 0, then the 194 // permitted set is copied to the effective set. 195 // """ 196 if oldE == root && newE != root { 197 creds.EffectiveCaps = 0 198 } else if oldE != root && newE == root { 199 creds.EffectiveCaps = creds.PermittedCaps 200 } 201 // "4. If the filesystem user ID is changed from 0 to nonzero (see 202 // setfsuid(2)), then the following capabilities are cleared from the 203 // effective set: ..." 204 // (filesystem UIDs aren't implemented, nor are any of the capabilities in 205 // question) 206 207 if oldE != newE { 208 // "[dumpability] is reset to the current value contained in 209 // the file /proc/sys/fs/suid_dumpable (which by default has 210 // the value 0), in the following circumstances: The process's 211 // effective user or group ID is changed." - prctl(2) 212 // 213 // (suid_dumpable isn't implemented, so we just use the 214 // default. 215 t.MemoryManager().SetDumpability(mm.NotDumpable) 216 217 // Not documented, but compare Linux's kernel/cred.c:commit_creds(). 218 t.parentDeathSignal = 0 219 } 220 t.creds.Store(creds) 221 } 222 223 // SetGID implements the semantics of setgid(2). 224 func (t *Task) SetGID(gid auth.GID) error { 225 if !gid.Ok() { 226 return linuxerr.EINVAL 227 } 228 229 t.mu.Lock() 230 defer t.mu.Unlock() 231 232 creds := t.Credentials() 233 kgid := creds.UserNamespace.MapToKGID(gid) 234 if !kgid.Ok() { 235 return linuxerr.EINVAL 236 } 237 if creds.HasCapability(linux.CAP_SETGID) { 238 t.setKGIDsUncheckedLocked(kgid, kgid, kgid) 239 return nil 240 } 241 if kgid != creds.RealKGID && kgid != creds.SavedKGID { 242 return linuxerr.EPERM 243 } 244 t.setKGIDsUncheckedLocked(creds.RealKGID, kgid, creds.SavedKGID) 245 return nil 246 } 247 248 // SetREGID implements the semantics of setregid(2). 249 func (t *Task) SetREGID(r, e auth.GID) error { 250 t.mu.Lock() 251 defer t.mu.Unlock() 252 253 creds := t.Credentials() 254 newR := creds.RealKGID 255 if r.Ok() { 256 newR = creds.UserNamespace.MapToKGID(r) 257 if !newR.Ok() { 258 return linuxerr.EINVAL 259 } 260 } 261 newE := creds.EffectiveKGID 262 if e.Ok() { 263 newE = creds.UserNamespace.MapToKGID(e) 264 if !newE.Ok() { 265 return linuxerr.EINVAL 266 } 267 } 268 if !creds.HasCapability(linux.CAP_SETGID) { 269 if newE != creds.RealKGID && newE != creds.EffectiveKGID && newE != creds.SavedKGID { 270 return linuxerr.EPERM 271 } 272 if newR != creds.RealKGID && newR != creds.EffectiveKGID { 273 return linuxerr.EPERM 274 } 275 } 276 newS := creds.SavedKGID 277 if r.Ok() || (e.Ok() && newE != creds.EffectiveKGID) { 278 newS = newE 279 } 280 t.setKGIDsUncheckedLocked(newR, newE, newS) 281 return nil 282 } 283 284 // SetRESGID implements the semantics of the setresgid(2) syscall. 285 func (t *Task) SetRESGID(r, e, s auth.GID) error { 286 var err error 287 288 t.mu.Lock() 289 defer t.mu.Unlock() 290 291 creds := t.Credentials() 292 newR := creds.RealKGID 293 if r.Ok() { 294 newR, err = creds.UseGID(r) 295 if err != nil { 296 return err 297 } 298 } 299 newE := creds.EffectiveKGID 300 if e.Ok() { 301 newE, err = creds.UseGID(e) 302 if err != nil { 303 return err 304 } 305 } 306 newS := creds.SavedKGID 307 if s.Ok() { 308 newS, err = creds.UseGID(s) 309 if err != nil { 310 return err 311 } 312 } 313 t.setKGIDsUncheckedLocked(newR, newE, newS) 314 return nil 315 } 316 317 func (t *Task) setKGIDsUncheckedLocked(newR, newE, newS auth.KGID) { 318 creds := t.Credentials().Fork() // The credentials object is immutable. See doc for creds. 319 oldE := creds.EffectiveKGID 320 creds.RealKGID, creds.EffectiveKGID, creds.SavedKGID = newR, newE, newS 321 322 if oldE != newE { 323 // "[dumpability] is reset to the current value contained in 324 // the file /proc/sys/fs/suid_dumpable (which by default has 325 // the value 0), in the following circumstances: The process's 326 // effective user or group ID is changed." - prctl(2) 327 // 328 // (suid_dumpable isn't implemented, so we just use the 329 // default. 330 t.MemoryManager().SetDumpability(mm.NotDumpable) 331 332 // Not documented, but compare Linux's 333 // kernel/cred.c:commit_creds(). 334 t.parentDeathSignal = 0 335 } 336 t.creds.Store(creds) 337 } 338 339 // SetExtraGIDs attempts to change t's supplemental groups. All IDs are 340 // interpreted as being in t's user namespace. 341 func (t *Task) SetExtraGIDs(gids []auth.GID) error { 342 t.mu.Lock() 343 defer t.mu.Unlock() 344 creds := t.Credentials() 345 if !creds.HasCapability(linux.CAP_SETGID) { 346 return linuxerr.EPERM 347 } 348 kgids := make([]auth.KGID, len(gids)) 349 for i, gid := range gids { 350 kgid := creds.UserNamespace.MapToKGID(gid) 351 if !kgid.Ok() { 352 return linuxerr.EINVAL 353 } 354 kgids[i] = kgid 355 } 356 creds = creds.Fork() // The credentials object is immutable. See doc for creds. 357 creds.ExtraKGIDs = kgids 358 t.creds.Store(creds) 359 return nil 360 } 361 362 // SetCapabilitySets attempts to change t's permitted, inheritable, and 363 // effective capability sets. 364 func (t *Task) SetCapabilitySets(permitted, inheritable, effective auth.CapabilitySet) error { 365 t.mu.Lock() 366 defer t.mu.Unlock() 367 // "Permitted: This is a limiting superset for the effective capabilities 368 // that the thread may assume." - capabilities(7) 369 if effective & ^permitted != 0 { 370 return linuxerr.EPERM 371 } 372 creds := t.Credentials() 373 // "It is also a limiting superset for the capabilities that may be added 374 // to the inheritable set by a thread that does not have the CAP_SETPCAP 375 // capability in its effective set." 376 if !creds.HasCapability(linux.CAP_SETPCAP) && (inheritable & ^(creds.InheritableCaps|creds.PermittedCaps) != 0) { 377 return linuxerr.EPERM 378 } 379 // "If a thread drops a capability from its permitted set, it can never 380 // reacquire that capability (unless it execve(2)s ..." 381 if permitted & ^creds.PermittedCaps != 0 { 382 return linuxerr.EPERM 383 } 384 // "... if a capability is not in the bounding set, then a thread can't add 385 // this capability to its inheritable set, even if it was in its permitted 386 // capabilities ..." 387 if inheritable & ^(creds.InheritableCaps|creds.BoundingCaps) != 0 { 388 return linuxerr.EPERM 389 } 390 creds = creds.Fork() // The credentials object is immutable. See doc for creds. 391 creds.PermittedCaps = permitted 392 creds.InheritableCaps = inheritable 393 creds.EffectiveCaps = effective 394 t.creds.Store(creds) 395 return nil 396 } 397 398 // DropBoundingCapability attempts to drop capability cp from t's capability 399 // bounding set. 400 func (t *Task) DropBoundingCapability(cp linux.Capability) error { 401 t.mu.Lock() 402 defer t.mu.Unlock() 403 creds := t.Credentials() 404 if !creds.HasCapability(linux.CAP_SETPCAP) { 405 return linuxerr.EPERM 406 } 407 creds = creds.Fork() // The credentials object is immutable. See doc for creds. 408 creds.BoundingCaps &^= auth.CapabilitySetOf(cp) 409 t.creds.Store(creds) 410 return nil 411 } 412 413 // SetUserNamespace attempts to move c into ns. 414 func (t *Task) SetUserNamespace(ns *auth.UserNamespace) error { 415 t.mu.Lock() 416 defer t.mu.Unlock() 417 418 creds := t.Credentials() 419 // "A process reassociating itself with a user namespace must have the 420 // CAP_SYS_ADMIN capability in the target user namespace." - setns(2) 421 // 422 // If t just created ns, then t.creds is guaranteed to have CAP_SYS_ADMIN 423 // in ns (by rule 3 in auth.Credentials.HasCapability). 424 if !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, ns) { 425 return linuxerr.EPERM 426 } 427 428 creds = creds.Fork() // The credentials object is immutable. See doc for creds. 429 creds.UserNamespace = ns 430 // "The child process created by clone(2) with the CLONE_NEWUSER flag 431 // starts out with a complete set of capabilities in the new user 432 // namespace. Likewise, a process that creates a new user namespace using 433 // unshare(2) or joins an existing user namespace using setns(2) gains a 434 // full set of capabilities in that namespace." 435 creds.PermittedCaps = auth.AllCapabilities 436 creds.InheritableCaps = 0 437 creds.EffectiveCaps = auth.AllCapabilities 438 creds.BoundingCaps = auth.AllCapabilities 439 // "A call to clone(2), unshare(2), or setns(2) using the CLONE_NEWUSER 440 // flag sets the "securebits" flags (see capabilities(7)) to their default 441 // values (all flags disabled) in the child (for clone(2)) or caller (for 442 // unshare(2), or setns(2)." - user_namespaces(7) 443 creds.KeepCaps = false 444 t.creds.Store(creds) 445 446 return nil 447 } 448 449 // SetKeepCaps will set the keep capabilities flag PR_SET_KEEPCAPS. 450 func (t *Task) SetKeepCaps(k bool) { 451 t.mu.Lock() 452 defer t.mu.Unlock() 453 creds := t.Credentials().Fork() // The credentials object is immutable. See doc for creds. 454 creds.KeepCaps = k 455 t.creds.Store(creds) 456 } 457 458 // updateCredsForExecLocked updates t.creds to reflect an execve(). 459 // 460 // NOTE(b/30815691): We currently do not implement privileged executables 461 // (set-user/group-ID bits and file capabilities). This allows us to make a lot 462 // of simplifying assumptions: 463 // 464 // - We assume the no_new_privs bit (set by prctl(SET_NO_NEW_PRIVS)), which 465 // disables the features we don't support anyway, is always set. This 466 // drastically simplifies this function. 467 // 468 // - We don't set AT_SECURE = 1, because no_new_privs always being set means 469 // that the conditions that require AT_SECURE = 1 never arise. (Compare Linux's 470 // security/commoncap.c:cap_bprm_set_creds() and cap_bprm_secureexec().) 471 // 472 // - We don't check for CAP_SYS_ADMIN in prctl(PR_SET_SECCOMP), since 473 // seccomp-bpf is also allowed if the task has no_new_privs set. 474 // 475 // - Task.ptraceAttach does not serialize with execve as it does in Linux, 476 // since no_new_privs being set has the same effect as the presence of an 477 // unprivileged tracer. 478 // 479 // Preconditions: t.mu must be locked. 480 func (t *Task) updateCredsForExecLocked() { 481 // """ 482 // During an execve(2), the kernel calculates the new capabilities of 483 // the process using the following algorithm: 484 // 485 // P'(permitted) = (P(inheritable) & F(inheritable)) | 486 // (F(permitted) & cap_bset) 487 // 488 // P'(effective) = F(effective) ? P'(permitted) : 0 489 // 490 // P'(inheritable) = P(inheritable) [i.e., unchanged] 491 // 492 // where: 493 // 494 // P denotes the value of a thread capability set before the 495 // execve(2) 496 // 497 // P' denotes the value of a thread capability set after the 498 // execve(2) 499 // 500 // F denotes a file capability set 501 // 502 // cap_bset is the value of the capability bounding set 503 // 504 // ... 505 // 506 // In order to provide an all-powerful root using capability sets, during 507 // an execve(2): 508 // 509 // 1. If a set-user-ID-root program is being executed, or the real user ID 510 // of the process is 0 (root) then the file inheritable and permitted sets 511 // are defined to be all ones (i.e. all capabilities enabled). 512 // 513 // 2. If a set-user-ID-root program is being executed, then the file 514 // effective bit is defined to be one (enabled). 515 // 516 // The upshot of the above rules, combined with the capabilities 517 // transformations described above, is that when a process execve(2)s a 518 // set-user-ID-root program, or when a process with an effective UID of 0 519 // execve(2)s a program, it gains all capabilities in its permitted and 520 // effective capability sets, except those masked out by the capability 521 // bounding set. 522 // """ - capabilities(7) 523 // (ambient capability sets omitted) 524 // 525 // As the last paragraph implies, the case of "a set-user-ID root program 526 // is being executed" also includes the case where (namespace) root is 527 // executing a non-set-user-ID program; the actual check is just based on 528 // the effective user ID. 529 var newPermitted auth.CapabilitySet // since F(inheritable) == F(permitted) == 0 530 fileEffective := false 531 creds := t.Credentials() 532 root := creds.UserNamespace.MapToKUID(auth.RootUID) 533 if creds.EffectiveKUID == root || creds.RealKUID == root { 534 newPermitted = creds.InheritableCaps | creds.BoundingCaps 535 if creds.EffectiveKUID == root { 536 fileEffective = true 537 } 538 } 539 540 creds = creds.Fork() // The credentials object is immutable. See doc for creds. 541 542 // Now we enter poorly-documented, somewhat confusing territory. (The 543 // accompanying comment in Linux's security/commoncap.c:cap_bprm_set_creds 544 // is not very helpful.) My reading of it is: 545 // 546 // If at least one of the following is true: 547 // 548 // A1. The execing task is ptraced, and the tracer did not have 549 // CAP_SYS_PTRACE in the execing task's user namespace at the time of 550 // PTRACE_ATTACH. 551 // 552 // A2. The execing task shares its FS context with at least one task in 553 // another thread group. 554 // 555 // A3. The execing task has no_new_privs set. 556 // 557 // AND at least one of the following is true: 558 // 559 // B1. The new effective user ID (which may come from set-user-ID, or be the 560 // execing task's existing effective user ID) is not equal to the task's 561 // real UID. 562 // 563 // B2. The new effective group ID (which may come from set-group-ID, or be 564 // the execing task's existing effective group ID) is not equal to the 565 // task's real GID. 566 // 567 // B3. The new permitted capability set contains capabilities not in the 568 // task's permitted capability set. 569 // 570 // Then: 571 // 572 // C1. Limit the new permitted capability set to the task's permitted 573 // capability set. 574 // 575 // C2. If either the task does not have CAP_SETUID in its user namespace, or 576 // the task has no_new_privs set, force the new effective UID and GID to 577 // the task's real UID and GID. 578 // 579 // But since no_new_privs is always set (A3 is always true), this becomes 580 // much simpler. If B1 and B2 are false, C2 is a no-op. If B3 is false, C1 581 // is a no-op. So we can just do C1 and C2 unconditionally. 582 if creds.EffectiveKUID != creds.RealKUID || creds.EffectiveKGID != creds.RealKGID { 583 creds.EffectiveKUID = creds.RealKUID 584 creds.EffectiveKGID = creds.RealKGID 585 t.parentDeathSignal = 0 586 } 587 // (Saved set-user-ID is always set to the new effective user ID, and saved 588 // set-group-ID is always set to the new effective group ID, regardless of 589 // the above.) 590 creds.SavedKUID = creds.RealKUID 591 creds.SavedKGID = creds.RealKGID 592 creds.PermittedCaps &= newPermitted 593 if fileEffective { 594 creds.EffectiveCaps = creds.PermittedCaps 595 } else { 596 creds.EffectiveCaps = 0 597 } 598 599 // prctl(2): The "keep capabilities" value will be reset to 0 on subsequent 600 // calls to execve(2). 601 creds.KeepCaps = false 602 603 // "The bounding set is inherited at fork(2) from the thread's parent, and 604 // is preserved across an execve(2)". So we're done. 605 t.creds.Store(creds) 606 }