github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/kernel/semaphore/semaphore.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package semaphore implements System V semaphores. 16 package semaphore 17 18 import ( 19 "fmt" 20 21 "github.com/MerlinKodo/gvisor/pkg/abi/linux" 22 "github.com/MerlinKodo/gvisor/pkg/context" 23 "github.com/MerlinKodo/gvisor/pkg/errors/linuxerr" 24 "github.com/MerlinKodo/gvisor/pkg/sentry/kernel/auth" 25 "github.com/MerlinKodo/gvisor/pkg/sentry/kernel/ipc" 26 ktime "github.com/MerlinKodo/gvisor/pkg/sentry/kernel/time" 27 "github.com/MerlinKodo/gvisor/pkg/sentry/vfs" 28 "github.com/MerlinKodo/gvisor/pkg/sync" 29 ) 30 31 const ( 32 // Maximum semaphore value. 33 valueMax = linux.SEMVMX 34 35 // Maximum number of semaphore sets. 36 setsMax = linux.SEMMNI 37 38 // Maximum number of semaphores in a semaphore set. 39 semsMax = linux.SEMMSL 40 41 // Maximum number of semaphores in all semaphore sets. 42 semsTotalMax = linux.SEMMNS 43 ) 44 45 // Registry maintains a set of semaphores that can be found by key or ID. 46 // 47 // +stateify savable 48 type Registry struct { 49 // mu protects all fields below. 50 mu sync.Mutex `state:"nosave"` 51 52 // reg defines basic fields and operations needed for all SysV registries. 53 reg *ipc.Registry 54 55 // indexes maintains a mapping between a set's index in virtual array and 56 // its identifier. 57 indexes map[int32]ipc.ID 58 } 59 60 // Set represents a set of semaphores that can be operated atomically. 61 // 62 // +stateify savable 63 type Set struct { 64 // registry owning this sem set. Immutable. 65 registry *Registry 66 67 // mu protects all fields below. 68 mu sync.Mutex `state:"nosave"` 69 70 obj *ipc.Object 71 72 opTime ktime.Time 73 changeTime ktime.Time 74 75 // sems holds all semaphores in the set. The slice itself is immutable after 76 // it's been set, however each 'sem' object in the slice requires 'mu' lock. 77 sems []sem 78 79 // dead is set to true when the set is removed and can't be reached anymore. 80 // All waiters must wake up and fail when set is dead. 81 dead bool 82 } 83 84 // sem represents a single semaphore from a set. 85 // 86 // +stateify savable 87 type sem struct { 88 value int16 89 waiters waiterList `state:"zerovalue"` 90 pid int32 91 } 92 93 // waiter represents a caller that is waiting for the semaphore value to 94 // become positive or zero. 95 // 96 // +stateify savable 97 type waiter struct { 98 waiterEntry 99 100 // value represents how much resource the waiter needs to wake up. 101 // The value is either 0 or negative. 102 value int16 103 ch chan struct{} 104 } 105 106 // NewRegistry creates a new semaphore set registry. 107 func NewRegistry(userNS *auth.UserNamespace) *Registry { 108 return &Registry{ 109 reg: ipc.NewRegistry(userNS), 110 indexes: make(map[int32]ipc.ID), 111 } 112 } 113 114 // FindOrCreate searches for a semaphore set that matches 'key'. If not found, 115 // it may create a new one if requested. If private is true, key is ignored and 116 // a new set is always created. If create is false, it fails if a set cannot 117 // be found. If exclusive is true, it fails if a set with the same key already 118 // exists. 119 func (r *Registry) FindOrCreate(ctx context.Context, key ipc.Key, nsems int32, mode linux.FileMode, private, create, exclusive bool) (*Set, error) { 120 if nsems < 0 || nsems > semsMax { 121 return nil, linuxerr.EINVAL 122 } 123 124 r.mu.Lock() 125 defer r.mu.Unlock() 126 127 if !private { 128 set, err := r.reg.Find(ctx, key, mode, create, exclusive) 129 if err != nil { 130 return nil, err 131 } 132 133 // Validate semaphore-specific parameters. 134 if set != nil { 135 set := set.(*Set) 136 if nsems > int32(set.Size()) { 137 return nil, linuxerr.EINVAL 138 } 139 return set, nil 140 } 141 } 142 143 // Zero is only valid if an existing set is found. 144 if nsems == 0 { 145 return nil, linuxerr.EINVAL 146 } 147 148 // Apply system limits. 149 // 150 // Map reg.objects and map indexes in a registry are of the same size, 151 // check map reg.objects only here for the system limit. 152 if r.reg.ObjectCount() >= setsMax { 153 return nil, linuxerr.ENOSPC 154 } 155 if r.totalSems() > int(semsTotalMax-nsems) { 156 return nil, linuxerr.ENOSPC 157 } 158 159 // Finally create a new set. 160 return r.newSetLocked(ctx, key, auth.CredentialsFromContext(ctx), mode, nsems) 161 } 162 163 // IPCInfo returns information about system-wide semaphore limits and parameters. 164 func (r *Registry) IPCInfo() *linux.SemInfo { 165 return &linux.SemInfo{ 166 SemMap: linux.SEMMAP, 167 SemMni: linux.SEMMNI, 168 SemMns: linux.SEMMNS, 169 SemMnu: linux.SEMMNU, 170 SemMsl: linux.SEMMSL, 171 SemOpm: linux.SEMOPM, 172 SemUme: linux.SEMUME, 173 SemUsz: linux.SEMUSZ, 174 SemVmx: linux.SEMVMX, 175 SemAem: linux.SEMAEM, 176 } 177 } 178 179 // SemInfo returns a seminfo structure containing the same information as 180 // for IPC_INFO, except that SemUsz field returns the number of existing 181 // semaphore sets, and SemAem field returns the number of existing semaphores. 182 func (r *Registry) SemInfo() *linux.SemInfo { 183 r.mu.Lock() 184 defer r.mu.Unlock() 185 186 info := r.IPCInfo() 187 info.SemUsz = uint32(r.reg.ObjectCount()) 188 info.SemAem = uint32(r.totalSems()) 189 190 return info 191 } 192 193 // HighestIndex returns the index of the highest used entry in 194 // the kernel's array. 195 func (r *Registry) HighestIndex() int32 { 196 r.mu.Lock() 197 defer r.mu.Unlock() 198 199 // By default, highest used index is 0 even though 200 // there is no semaphore set. 201 var highestIndex int32 202 for index := range r.indexes { 203 if index > highestIndex { 204 highestIndex = index 205 } 206 } 207 return highestIndex 208 } 209 210 // Remove removes set with give 'id' from the registry and marks the set as 211 // dead. All waiters will be awakened and fail. 212 func (r *Registry) Remove(id ipc.ID, creds *auth.Credentials) error { 213 r.mu.Lock() 214 defer r.mu.Unlock() 215 216 index, found := r.findIndexByID(id) 217 if !found { 218 return linuxerr.EINVAL 219 } 220 delete(r.indexes, index) 221 222 r.reg.Remove(id, creds) 223 224 return nil 225 } 226 227 // newSetLocked creates a new Set using given fields. An error is returned if there 228 // are no more available identifiers. 229 // 230 // Precondition: r.mu must be held. 231 func (r *Registry) newSetLocked(ctx context.Context, key ipc.Key, creator *auth.Credentials, mode linux.FileMode, nsems int32) (*Set, error) { 232 set := &Set{ 233 registry: r, 234 obj: ipc.NewObject(r.reg.UserNS, ipc.Key(key), creator, creator, mode), 235 changeTime: ktime.NowFromContext(ctx), 236 sems: make([]sem, nsems), 237 } 238 239 err := r.reg.Register(set) 240 if err != nil { 241 return nil, err 242 } 243 244 index, found := r.findFirstAvailableIndex() 245 if !found { 246 // See linux, ipc/sem.c:newary(). 247 return nil, linuxerr.ENOSPC 248 } 249 r.indexes[index] = set.obj.ID 250 251 return set, nil 252 } 253 254 // FindByID looks up a set given an ID. 255 func (r *Registry) FindByID(id ipc.ID) *Set { 256 r.mu.Lock() 257 defer r.mu.Unlock() 258 mech := r.reg.FindByID(id) 259 if mech == nil { 260 return nil 261 } 262 return mech.(*Set) 263 } 264 265 // FindByIndex looks up a set given an index. 266 func (r *Registry) FindByIndex(index int32) *Set { 267 r.mu.Lock() 268 defer r.mu.Unlock() 269 270 id, present := r.indexes[index] 271 if !present { 272 return nil 273 } 274 return r.reg.FindByID(id).(*Set) 275 } 276 277 func (r *Registry) findIndexByID(id ipc.ID) (int32, bool) { 278 for k, v := range r.indexes { 279 if v == id { 280 return k, true 281 } 282 } 283 return 0, false 284 } 285 286 func (r *Registry) findFirstAvailableIndex() (int32, bool) { 287 for index := int32(0); index < setsMax; index++ { 288 if _, present := r.indexes[index]; !present { 289 return index, true 290 } 291 } 292 return 0, false 293 } 294 295 func (r *Registry) totalSems() int { 296 totalSems := 0 297 r.reg.ForAllObjects( 298 func(o ipc.Mechanism) { 299 totalSems += o.(*Set).Size() 300 }, 301 ) 302 return totalSems 303 } 304 305 // ID returns semaphore's ID. 306 func (s *Set) ID() ipc.ID { 307 return s.obj.ID 308 } 309 310 // Object implements ipc.Mechanism.Object. 311 func (s *Set) Object() *ipc.Object { 312 return s.obj 313 } 314 315 // Lock implements ipc.Mechanism.Lock. 316 func (s *Set) Lock() { 317 s.mu.Lock() 318 } 319 320 // Unlock implements ipc.mechanism.Unlock. 321 // 322 // +checklocksignore 323 func (s *Set) Unlock() { 324 s.mu.Unlock() 325 } 326 327 func (s *Set) findSem(num int32) *sem { 328 if num < 0 || int(num) >= s.Size() { 329 return nil 330 } 331 return &s.sems[num] 332 } 333 334 // Size returns the number of semaphores in the set. Size is immutable. 335 func (s *Set) Size() int { 336 return len(s.sems) 337 } 338 339 // Set modifies attributes for a semaphore set. See semctl(IPC_SET). 340 func (s *Set) Set(ctx context.Context, ds *linux.SemidDS) error { 341 s.mu.Lock() 342 defer s.mu.Unlock() 343 344 if err := s.obj.Set(ctx, &ds.SemPerm); err != nil { 345 return err 346 } 347 348 s.changeTime = ktime.NowFromContext(ctx) 349 return nil 350 } 351 352 // GetStat extracts semid_ds information from the set. 353 func (s *Set) GetStat(creds *auth.Credentials) (*linux.SemidDS, error) { 354 // "The calling process must have read permission on the semaphore set." 355 return s.semStat(creds, vfs.MayRead) 356 } 357 358 // GetStatAny extracts semid_ds information from the set without requiring read access. 359 func (s *Set) GetStatAny(creds *auth.Credentials) (*linux.SemidDS, error) { 360 return s.semStat(creds, 0) 361 } 362 363 func (s *Set) semStat(creds *auth.Credentials, ats vfs.AccessTypes) (*linux.SemidDS, error) { 364 s.mu.Lock() 365 defer s.mu.Unlock() 366 367 if !s.obj.CheckPermissions(creds, ats) { 368 return nil, linuxerr.EACCES 369 } 370 371 return &linux.SemidDS{ 372 SemPerm: linux.IPCPerm{ 373 Key: uint32(s.obj.Key), 374 UID: uint32(creds.UserNamespace.MapFromKUID(s.obj.OwnerUID)), 375 GID: uint32(creds.UserNamespace.MapFromKGID(s.obj.OwnerGID)), 376 CUID: uint32(creds.UserNamespace.MapFromKUID(s.obj.CreatorUID)), 377 CGID: uint32(creds.UserNamespace.MapFromKGID(s.obj.CreatorGID)), 378 Mode: uint16(s.obj.Mode), 379 Seq: 0, // IPC sequence not supported. 380 }, 381 SemOTime: s.opTime.TimeT(), 382 SemCTime: s.changeTime.TimeT(), 383 SemNSems: uint64(s.Size()), 384 }, nil 385 } 386 387 // SetVal overrides a semaphore value, waking up waiters as needed. 388 func (s *Set) SetVal(ctx context.Context, num int32, val int16, creds *auth.Credentials, pid int32) error { 389 if val < 0 || val > valueMax { 390 return linuxerr.ERANGE 391 } 392 393 s.mu.Lock() 394 defer s.mu.Unlock() 395 396 // "The calling process must have alter permission on the semaphore set." 397 if !s.obj.CheckPermissions(creds, vfs.MayWrite) { 398 return linuxerr.EACCES 399 } 400 401 sem := s.findSem(num) 402 if sem == nil { 403 return linuxerr.ERANGE 404 } 405 406 // TODO(gvisor.dev/issue/137): Clear undo entries in all processes. 407 sem.value = val 408 sem.pid = pid 409 s.changeTime = ktime.NowFromContext(ctx) 410 sem.wakeWaiters() 411 return nil 412 } 413 414 // SetValAll overrides all semaphores values, waking up waiters as needed. It also 415 // sets semaphore's PID which was fixed in Linux 4.6. 416 // 417 // 'len(vals)' must be equal to 's.Size()'. 418 func (s *Set) SetValAll(ctx context.Context, vals []uint16, creds *auth.Credentials, pid int32) error { 419 if len(vals) != s.Size() { 420 panic(fmt.Sprintf("vals length (%d) different that Set.Size() (%d)", len(vals), s.Size())) 421 } 422 423 for _, val := range vals { 424 if val > valueMax { 425 return linuxerr.ERANGE 426 } 427 } 428 429 s.mu.Lock() 430 defer s.mu.Unlock() 431 432 // "The calling process must have alter permission on the semaphore set." 433 if !s.obj.CheckPermissions(creds, vfs.MayWrite) { 434 return linuxerr.EACCES 435 } 436 437 for i, val := range vals { 438 sem := &s.sems[i] 439 440 // TODO(gvisor.dev/issue/137): Clear undo entries in all processes. 441 sem.value = int16(val) 442 sem.pid = pid 443 sem.wakeWaiters() 444 } 445 s.changeTime = ktime.NowFromContext(ctx) 446 return nil 447 } 448 449 // GetVal returns a semaphore value. 450 func (s *Set) GetVal(num int32, creds *auth.Credentials) (int16, error) { 451 s.mu.Lock() 452 defer s.mu.Unlock() 453 454 // "The calling process must have read permission on the semaphore set." 455 if !s.obj.CheckPermissions(creds, vfs.MayRead) { 456 return 0, linuxerr.EACCES 457 } 458 459 sem := s.findSem(num) 460 if sem == nil { 461 return 0, linuxerr.ERANGE 462 } 463 return sem.value, nil 464 } 465 466 // GetValAll returns value for all semaphores. 467 func (s *Set) GetValAll(creds *auth.Credentials) ([]uint16, error) { 468 s.mu.Lock() 469 defer s.mu.Unlock() 470 471 // "The calling process must have read permission on the semaphore set." 472 if !s.obj.CheckPermissions(creds, vfs.MayRead) { 473 return nil, linuxerr.EACCES 474 } 475 476 vals := make([]uint16, s.Size()) 477 for i, sem := range s.sems { 478 vals[i] = uint16(sem.value) 479 } 480 return vals, nil 481 } 482 483 // GetPID returns the PID set when performing operations in the semaphore. 484 func (s *Set) GetPID(num int32, creds *auth.Credentials) (int32, error) { 485 s.mu.Lock() 486 defer s.mu.Unlock() 487 488 // "The calling process must have read permission on the semaphore set." 489 if !s.obj.CheckPermissions(creds, vfs.MayRead) { 490 return 0, linuxerr.EACCES 491 } 492 493 sem := s.findSem(num) 494 if sem == nil { 495 return 0, linuxerr.ERANGE 496 } 497 return sem.pid, nil 498 } 499 500 func (s *Set) countWaiters(num int32, creds *auth.Credentials, pred func(w *waiter) bool) (uint16, error) { 501 s.mu.Lock() 502 defer s.mu.Unlock() 503 504 // The calling process must have read permission on the semaphore set. 505 if !s.obj.CheckPermissions(creds, vfs.MayRead) { 506 return 0, linuxerr.EACCES 507 } 508 509 sem := s.findSem(num) 510 if sem == nil { 511 return 0, linuxerr.ERANGE 512 } 513 var cnt uint16 514 for w := sem.waiters.Front(); w != nil; w = w.Next() { 515 if pred(w) { 516 cnt++ 517 } 518 } 519 return cnt, nil 520 } 521 522 // CountZeroWaiters returns number of waiters waiting for the sem's value to increase. 523 func (s *Set) CountZeroWaiters(num int32, creds *auth.Credentials) (uint16, error) { 524 return s.countWaiters(num, creds, func(w *waiter) bool { 525 return w.value == 0 526 }) 527 } 528 529 // CountNegativeWaiters returns number of waiters waiting for the sem to go to zero. 530 func (s *Set) CountNegativeWaiters(num int32, creds *auth.Credentials) (uint16, error) { 531 return s.countWaiters(num, creds, func(w *waiter) bool { 532 return w.value < 0 533 }) 534 } 535 536 // ExecuteOps attempts to execute a list of operations to the set. It only 537 // succeeds when all operations can be applied. No changes are made if it fails. 538 // 539 // On failure, it may return an error (retries are hopeless) or it may return 540 // a channel that can be waited on before attempting again. 541 func (s *Set) ExecuteOps(ctx context.Context, ops []linux.Sembuf, creds *auth.Credentials, pid int32) (chan struct{}, int32, error) { 542 s.mu.Lock() 543 defer s.mu.Unlock() 544 545 // Did it race with a removal operation? 546 if s.dead { 547 return nil, 0, linuxerr.EIDRM 548 } 549 550 // Validate the operations. 551 readOnly := true 552 for _, op := range ops { 553 if s.findSem(int32(op.SemNum)) == nil { 554 return nil, 0, linuxerr.EFBIG 555 } 556 if op.SemOp != 0 { 557 readOnly = false 558 } 559 } 560 561 ats := vfs.MayRead 562 if !readOnly { 563 ats = vfs.MayWrite 564 } 565 if !s.obj.CheckPermissions(creds, ats) { 566 return nil, 0, linuxerr.EACCES 567 } 568 569 ch, num, err := s.executeOps(ctx, ops, pid) 570 if err != nil { 571 return nil, 0, err 572 } 573 return ch, num, nil 574 } 575 576 func (s *Set) executeOps(ctx context.Context, ops []linux.Sembuf, pid int32) (chan struct{}, int32, error) { 577 // Changes to semaphores go to this slice temporarily until they all succeed. 578 tmpVals := make([]int16, len(s.sems)) 579 for i := range s.sems { 580 tmpVals[i] = s.sems[i].value 581 } 582 583 for _, op := range ops { 584 sem := &s.sems[op.SemNum] 585 if op.SemOp == 0 { 586 // Handle 'wait for zero' operation. 587 if tmpVals[op.SemNum] != 0 { 588 // Semaphore isn't 0, must wait. 589 if op.SemFlg&linux.IPC_NOWAIT != 0 { 590 return nil, 0, linuxerr.ErrWouldBlock 591 } 592 593 w := newWaiter(op.SemOp) 594 sem.waiters.PushBack(w) 595 return w.ch, int32(op.SemNum), nil 596 } 597 } else { 598 if op.SemOp < 0 { 599 // Handle 'wait' operation. 600 if -op.SemOp > valueMax { 601 return nil, 0, linuxerr.ERANGE 602 } 603 if -op.SemOp > tmpVals[op.SemNum] { 604 // Not enough resources, must wait. 605 if op.SemFlg&linux.IPC_NOWAIT != 0 { 606 return nil, 0, linuxerr.ErrWouldBlock 607 } 608 609 w := newWaiter(op.SemOp) 610 sem.waiters.PushBack(w) 611 return w.ch, int32(op.SemNum), nil 612 } 613 } else { 614 // op.SemOp > 0: Handle 'signal' operation. 615 if tmpVals[op.SemNum] > valueMax-op.SemOp { 616 return nil, 0, linuxerr.ERANGE 617 } 618 } 619 620 tmpVals[op.SemNum] += op.SemOp 621 } 622 } 623 624 // All operations succeeded, apply them. 625 // TODO(gvisor.dev/issue/137): handle undo operations. 626 for i, v := range tmpVals { 627 s.sems[i].value = v 628 s.sems[i].wakeWaiters() 629 s.sems[i].pid = pid 630 } 631 s.opTime = ktime.NowFromContext(ctx) 632 return nil, 0, nil 633 } 634 635 // AbortWait notifies that a waiter is giving up and will not wait on the 636 // channel anymore. 637 func (s *Set) AbortWait(num int32, ch chan struct{}) { 638 s.mu.Lock() 639 defer s.mu.Unlock() 640 641 sem := &s.sems[num] 642 for w := sem.waiters.Front(); w != nil; w = w.Next() { 643 if w.ch == ch { 644 sem.waiters.Remove(w) 645 return 646 } 647 } 648 // Waiter may not be found in case it raced with wakeWaiters(). 649 } 650 651 // Destroy implements ipc.Mechanism.Destroy. 652 // 653 // Preconditions: Caller must hold 's.mu'. 654 func (s *Set) Destroy() { 655 // Notify all waiters. They will fail on the next attempt to execute 656 // operations and return error. 657 s.dead = true 658 for _, s := range s.sems { 659 for w := s.waiters.Front(); w != nil; w = w.Next() { 660 w.ch <- struct{}{} 661 } 662 s.waiters.Reset() 663 } 664 } 665 666 func abs(val int16) int16 { 667 if val < 0 { 668 return -val 669 } 670 return val 671 } 672 673 // wakeWaiters goes over all waiters and checks which of them can be notified. 674 func (s *sem) wakeWaiters() { 675 // Note that this will release all waiters waiting for 0 too. 676 for w := s.waiters.Front(); w != nil; { 677 if s.value < abs(w.value) { 678 // Still blocked, skip it. 679 w = w.Next() 680 continue 681 } 682 w.ch <- struct{}{} 683 old := w 684 w = w.Next() 685 s.waiters.Remove(old) 686 } 687 } 688 689 func newWaiter(val int16) *waiter { 690 return &waiter{ 691 value: val, 692 ch: make(chan struct{}, 1), 693 } 694 }