github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/kernel/msgqueue/msgqueue.go (about) 1 // Copyright 2021 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package msgqueue implements System V message queues. 16 package msgqueue 17 18 import ( 19 "github.com/metacubex/gvisor/pkg/abi/linux" 20 "github.com/metacubex/gvisor/pkg/context" 21 "github.com/metacubex/gvisor/pkg/errors/linuxerr" 22 "github.com/metacubex/gvisor/pkg/sentry/kernel/auth" 23 "github.com/metacubex/gvisor/pkg/sentry/kernel/ipc" 24 ktime "github.com/metacubex/gvisor/pkg/sentry/kernel/time" 25 "github.com/metacubex/gvisor/pkg/sentry/vfs" 26 "github.com/metacubex/gvisor/pkg/sync" 27 "github.com/metacubex/gvisor/pkg/waiter" 28 ) 29 30 const ( 31 // System-wide limit for maximum number of queues. 32 maxQueues = linux.MSGMNI 33 34 // Maximum size of a queue in bytes. 35 maxQueueBytes = linux.MSGMNB 36 37 // Maximum size of a message in bytes. 38 maxMessageBytes = linux.MSGMAX 39 ) 40 41 // Registry contains a set of message queues that can be referenced using keys 42 // or IDs. 43 // 44 // +stateify savable 45 type Registry struct { 46 // mu protects all the fields below. 47 mu sync.Mutex `state:"nosave"` 48 49 // reg defines basic fields and operations needed for all SysV registries. 50 reg *ipc.Registry 51 } 52 53 // NewRegistry returns a new Registry ready to be used. 54 func NewRegistry(userNS *auth.UserNamespace) *Registry { 55 return &Registry{ 56 reg: ipc.NewRegistry(userNS), 57 } 58 } 59 60 // Queue represents a SysV message queue, described by sysvipc(7). 61 // 62 // +stateify savable 63 type Queue struct { 64 // registry is the registry owning this queue. Immutable. 65 registry *Registry 66 67 // mu protects all the fields below. 68 mu sync.Mutex `state:"nosave"` 69 70 // dead is set to true when a queue is removed from the registry and should 71 // not be used. Operations on the queue should check dead, and return 72 // EIDRM if set to true. 73 dead bool 74 75 // obj defines basic fields that should be included in all SysV IPC objects. 76 obj *ipc.Object 77 78 // senders holds a queue of blocked message senders. Senders are notified 79 // when enough space is available in the queue to insert their message. 80 senders waiter.Queue 81 82 // receivers holds a queue of blocked receivers. Receivers are notified 83 // when a new message is inserted into the queue and can be received. 84 receivers waiter.Queue 85 86 // messages is a list of sent messages. 87 messages msgList 88 89 // sendTime is the last time a msgsnd was performed. 90 sendTime ktime.Time 91 92 // receiveTime is the last time a msgrcv was performed. 93 receiveTime ktime.Time 94 95 // changeTime is the last time the queue was modified using msgctl. 96 changeTime ktime.Time 97 98 // byteCount is the current number of message bytes in the queue. 99 byteCount uint64 100 101 // messageCount is the current number of messages in the queue. 102 messageCount uint64 103 104 // maxBytes is the maximum allowed number of bytes in the queue, and is also 105 // used as a limit for the number of total possible messages. 106 maxBytes uint64 107 108 // sendPID is the PID of the process that performed the last msgsnd. 109 sendPID int32 110 111 // receivePID is the PID of the process that performed the last msgrcv. 112 receivePID int32 113 } 114 115 // Message represents a message exchanged through a Queue via msgsnd(2) and 116 // msgrcv(2). 117 // 118 // +stateify savable 119 type Message struct { 120 msgEntry 121 122 // Type is an integer representing the type of the sent message. 123 Type int64 124 125 // Text is an untyped block of memory. 126 Text []byte 127 128 // Size is the size of Text. 129 Size uint64 130 } 131 132 func (m *Message) makeCopy() *Message { 133 new := &Message{ 134 Type: m.Type, 135 Size: m.Size, 136 } 137 new.Text = make([]byte, len(m.Text)) 138 copy(new.Text, m.Text) 139 return new 140 } 141 142 // Blocker is used for blocking Queue.Send, and Queue.Receive calls that serves 143 // as an abstracted version of kernel.Task. kernel.Task is not directly used to 144 // prevent circular dependencies. 145 type Blocker interface { 146 Block(C <-chan struct{}) error 147 } 148 149 // FindOrCreate creates a new message queue or returns an existing one. See 150 // msgget(2). 151 func (r *Registry) FindOrCreate(ctx context.Context, key ipc.Key, mode linux.FileMode, private, create, exclusive bool) (*Queue, error) { 152 r.mu.Lock() 153 defer r.mu.Unlock() 154 155 if !private { 156 queue, err := r.reg.Find(ctx, key, mode, create, exclusive) 157 if err != nil { 158 return nil, err 159 } 160 161 if queue != nil { 162 return queue.(*Queue), nil 163 } 164 } 165 166 // Check system-wide limits. 167 if r.reg.ObjectCount() >= maxQueues { 168 return nil, linuxerr.ENOSPC 169 } 170 171 return r.newQueueLocked(ctx, key, auth.CredentialsFromContext(ctx), mode) 172 } 173 174 // newQueueLocked creates a new queue using the given fields. An error is 175 // returned if there're no more available identifiers. 176 // 177 // Precondition: r.mu must be held. 178 func (r *Registry) newQueueLocked(ctx context.Context, key ipc.Key, creds *auth.Credentials, mode linux.FileMode) (*Queue, error) { 179 q := &Queue{ 180 registry: r, 181 obj: ipc.NewObject(r.reg.UserNS, key, creds, creds, mode), 182 sendTime: ktime.ZeroTime, 183 receiveTime: ktime.ZeroTime, 184 changeTime: ktime.NowFromContext(ctx), 185 maxBytes: maxQueueBytes, 186 } 187 188 err := r.reg.Register(q) 189 if err != nil { 190 return nil, err 191 } 192 return q, nil 193 } 194 195 // Remove removes the queue with specified ID. All waiters (readers and 196 // writers) and writers will be awakened and fail. Remove will return an error 197 // if the ID is invalid, or the the user doesn't have privileges. 198 func (r *Registry) Remove(id ipc.ID, creds *auth.Credentials) error { 199 r.mu.Lock() 200 defer r.mu.Unlock() 201 202 r.reg.Remove(id, creds) 203 return nil 204 } 205 206 // FindByID returns the queue with the specified ID and an error if the ID 207 // doesn't exist. 208 func (r *Registry) FindByID(id ipc.ID) (*Queue, error) { 209 r.mu.Lock() 210 defer r.mu.Unlock() 211 212 mech := r.reg.FindByID(id) 213 if mech == nil { 214 return nil, linuxerr.EINVAL 215 } 216 return mech.(*Queue), nil 217 } 218 219 // IPCInfo reports global parameters for message queues. See msgctl(IPC_INFO). 220 func (r *Registry) IPCInfo(ctx context.Context) *linux.MsgInfo { 221 return &linux.MsgInfo{ 222 MsgPool: linux.MSGPOOL, 223 MsgMap: linux.MSGMAP, 224 MsgMax: linux.MSGMAX, 225 MsgMnb: linux.MSGMNB, 226 MsgMni: linux.MSGMNI, 227 MsgSsz: linux.MSGSSZ, 228 MsgTql: linux.MSGTQL, 229 MsgSeg: linux.MSGSEG, 230 } 231 } 232 233 // MsgInfo reports global parameters for message queues. See msgctl(MSG_INFO). 234 func (r *Registry) MsgInfo(ctx context.Context) *linux.MsgInfo { 235 r.mu.Lock() 236 defer r.mu.Unlock() 237 238 var messages, bytes uint64 239 r.reg.ForAllObjects( 240 func(o ipc.Mechanism) { 241 q := o.(*Queue) 242 q.mu.Lock() 243 messages += q.messageCount 244 bytes += q.byteCount 245 q.mu.Unlock() 246 }, 247 ) 248 249 return &linux.MsgInfo{ 250 MsgPool: int32(r.reg.ObjectCount()), 251 MsgMap: int32(messages), 252 MsgTql: int32(bytes), 253 MsgMax: linux.MSGMAX, 254 MsgMnb: linux.MSGMNB, 255 MsgMni: linux.MSGMNI, 256 MsgSsz: linux.MSGSSZ, 257 MsgSeg: linux.MSGSEG, 258 } 259 } 260 261 // Send appends a message to the message queue, and returns an error if sending 262 // fails. See msgsnd(2). 263 func (q *Queue) Send(ctx context.Context, m Message, b Blocker, wait bool, pid int32) error { 264 // Try to perform a non-blocking send using queue.append. If EWOULDBLOCK 265 // is returned, start the blocking procedure. Otherwise, return normally. 266 creds := auth.CredentialsFromContext(ctx) 267 268 // Fast path: first attempt a non-blocking push. 269 if err := q.push(ctx, m, creds, pid); err != linuxerr.EWOULDBLOCK { 270 return err 271 } 272 273 if !wait { 274 return linuxerr.EAGAIN 275 } 276 277 // Slow path: at this point, the queue was found to be full, and we were 278 // asked to block. 279 280 e, ch := waiter.NewChannelEntry(waiter.EventOut) 281 q.senders.EventRegister(&e) 282 defer q.senders.EventUnregister(&e) 283 284 // Note: we need to check again before blocking the first time since space 285 // may have become available. 286 for { 287 if err := q.push(ctx, m, creds, pid); err != linuxerr.EWOULDBLOCK { 288 return err 289 } 290 if err := b.Block(ch); err != nil { 291 return err 292 } 293 } 294 } 295 296 // push appends a message to the queue's message list and notifies waiting 297 // receivers that a message has been inserted. It returns an error if adding 298 // the message would cause the queue to exceed its maximum capacity, which can 299 // be used as a signal to block the task. Other errors should be returned as is. 300 func (q *Queue) push(ctx context.Context, m Message, creds *auth.Credentials, pid int32) error { 301 if m.Type <= 0 { 302 return linuxerr.EINVAL 303 } 304 305 q.mu.Lock() 306 defer q.mu.Unlock() 307 308 if !q.obj.CheckPermissions(creds, vfs.MayWrite) { 309 // The calling process does not have write permission on the message 310 // queue, and does not have the CAP_IPC_OWNER capability in the user 311 // namespace that governs its IPC namespace. 312 return linuxerr.EACCES 313 } 314 315 // Queue was removed while the process was waiting. 316 if q.dead { 317 return linuxerr.EIDRM 318 } 319 320 // Check if sufficient space is available (the queue isn't full.) From 321 // the man pages: 322 // 323 // "A message queue is considered to be full if either of the following 324 // conditions is true: 325 // 326 // • Adding a new message to the queue would cause the total number 327 // of bytes in the queue to exceed the queue's maximum size (the 328 // msg_qbytes field). 329 // 330 // • Adding another message to the queue would cause the total 331 // number of messages in the queue to exceed the queue's maximum 332 // size (the msg_qbytes field). This check is necessary to 333 // prevent an unlimited number of zero-length messages being 334 // placed on the queue. Although such messages contain no data, 335 // they nevertheless consume (locked) kernel memory." 336 // 337 // The msg_qbytes field in our implementation is q.maxBytes. 338 if m.Size+q.byteCount > q.maxBytes || q.messageCount+1 > q.maxBytes { 339 return linuxerr.EWOULDBLOCK 340 } 341 342 // Copy the message into the queue. 343 q.messages.PushBack(&m) 344 345 q.byteCount += m.Size 346 q.messageCount++ 347 q.sendPID = pid 348 q.sendTime = ktime.NowFromContext(ctx) 349 350 // Notify receivers about the new message. 351 q.receivers.Notify(waiter.EventIn) 352 353 return nil 354 } 355 356 // Receive removes a message from the queue and returns it. See msgrcv(2). 357 func (q *Queue) Receive(ctx context.Context, b Blocker, mType int64, maxSize int64, wait, truncate, except bool, pid int32) (*Message, error) { 358 if maxSize < 0 || maxSize > maxMessageBytes { 359 return nil, linuxerr.EINVAL 360 } 361 max := uint64(maxSize) 362 creds := auth.CredentialsFromContext(ctx) 363 364 // Fast path: first attempt a non-blocking pop. 365 if msg, err := q.pop(ctx, creds, mType, max, truncate, except, pid); err != linuxerr.EWOULDBLOCK { 366 return msg, err 367 } 368 369 if !wait { 370 return nil, linuxerr.ENOMSG 371 } 372 373 // Slow path: at this point, the queue was found to be empty, and we were 374 // asked to block. 375 376 e, ch := waiter.NewChannelEntry(waiter.EventIn) 377 q.receivers.EventRegister(&e) 378 defer q.receivers.EventUnregister(&e) 379 380 // Note: we need to check again before blocking the first time since a 381 // message may have become available. 382 for { 383 if msg, err := q.pop(ctx, creds, mType, max, truncate, except, pid); err != linuxerr.EWOULDBLOCK { 384 return msg, err 385 } 386 if err := b.Block(ch); err != nil { 387 return nil, err 388 } 389 } 390 } 391 392 // pop pops the first message from the queue that matches the given type. It 393 // returns an error for all the cases specified in msgrcv(2). If the queue is 394 // empty or no message of the specified type is available, a EWOULDBLOCK error 395 // is returned, which can then be used as a signal to block the process or fail. 396 func (q *Queue) pop(ctx context.Context, creds *auth.Credentials, mType int64, maxSize uint64, truncate, except bool, pid int32) (*Message, error) { 397 q.mu.Lock() 398 defer q.mu.Unlock() 399 400 if !q.obj.CheckPermissions(creds, vfs.MayRead) { 401 // The calling process does not have read permission on the message 402 // queue, and does not have the CAP_IPC_OWNER capability in the user 403 // namespace that governs its IPC namespace. 404 return nil, linuxerr.EACCES 405 } 406 407 // Queue was removed while the process was waiting. 408 if q.dead { 409 return nil, linuxerr.EIDRM 410 } 411 412 if q.messages.Empty() { 413 return nil, linuxerr.EWOULDBLOCK 414 } 415 416 // Get a message from the queue. 417 var msg *Message 418 switch { 419 case mType == 0: 420 msg = q.messages.Front() 421 case mType > 0: 422 msg = q.msgOfType(mType, except) 423 case mType < 0: 424 msg = q.msgOfTypeLessThan(-1 * mType) 425 } 426 427 // If no message exists, return a blocking signal. 428 if msg == nil { 429 return nil, linuxerr.EWOULDBLOCK 430 } 431 432 // Check message's size is acceptable. 433 if maxSize < msg.Size { 434 if !truncate { 435 return nil, linuxerr.E2BIG 436 } 437 msg.Size = maxSize 438 msg.Text = msg.Text[:maxSize+1] 439 } 440 441 q.messages.Remove(msg) 442 443 q.byteCount -= msg.Size 444 q.messageCount-- 445 q.receivePID = pid 446 q.receiveTime = ktime.NowFromContext(ctx) 447 448 // Notify senders about available space. 449 q.senders.Notify(waiter.EventOut) 450 451 return msg, nil 452 } 453 454 // Copy copies a message from the queue without deleting it. If no message 455 // exists, an error is returned. See msgrcv(MSG_COPY). 456 func (q *Queue) Copy(mType int64) (*Message, error) { 457 q.mu.Lock() 458 defer q.mu.Unlock() 459 460 if mType < 0 || q.messages.Empty() { 461 return nil, linuxerr.ENOMSG 462 } 463 464 msg := q.msgAtIndex(mType) 465 if msg == nil { 466 return nil, linuxerr.ENOMSG 467 } 468 return msg.makeCopy(), nil 469 } 470 471 // msgOfType returns the first message with the specified type, nil if no 472 // message is found. If except is true, the first message of a type not equal 473 // to mType will be returned. 474 // 475 // Precondition: caller must hold q.mu. 476 func (q *Queue) msgOfType(mType int64, except bool) *Message { 477 if except { 478 for msg := q.messages.Front(); msg != nil; msg = msg.Next() { 479 if msg.Type != mType { 480 return msg 481 } 482 } 483 return nil 484 } 485 486 for msg := q.messages.Front(); msg != nil; msg = msg.Next() { 487 if msg.Type == mType { 488 return msg 489 } 490 } 491 return nil 492 } 493 494 // msgOfTypeLessThan return the the first message with the lowest type less 495 // than or equal to mType, nil if no such message exists. 496 // 497 // Precondition: caller must hold q.mu. 498 func (q *Queue) msgOfTypeLessThan(mType int64) (m *Message) { 499 min := mType 500 for msg := q.messages.Front(); msg != nil; msg = msg.Next() { 501 if msg.Type <= mType && msg.Type < min { 502 m = msg 503 min = msg.Type 504 } 505 } 506 return m 507 } 508 509 // msgAtIndex returns a pointer to a message at given index, nil if non exits. 510 // 511 // Precondition: caller must hold q.mu. 512 func (q *Queue) msgAtIndex(mType int64) *Message { 513 msg := q.messages.Front() 514 for ; mType != 0 && msg != nil; mType-- { 515 msg = msg.Next() 516 } 517 return msg 518 } 519 520 // Set modifies some values of the queue. See msgctl(IPC_SET). 521 func (q *Queue) Set(ctx context.Context, ds *linux.MsqidDS) error { 522 q.mu.Lock() 523 defer q.mu.Unlock() 524 525 creds := auth.CredentialsFromContext(ctx) 526 if ds.MsgQbytes > maxQueueBytes && !creds.HasCapabilityIn(linux.CAP_SYS_RESOURCE, q.obj.UserNS) { 527 // "An attempt (IPC_SET) was made to increase msg_qbytes beyond the 528 // system parameter MSGMNB, but the caller is not privileged (Linux: 529 // does not have the CAP_SYS_RESOURCE capability)." 530 return linuxerr.EPERM 531 } 532 533 if err := q.obj.Set(ctx, &ds.MsgPerm); err != nil { 534 return err 535 } 536 537 q.maxBytes = ds.MsgQbytes 538 q.changeTime = ktime.NowFromContext(ctx) 539 return nil 540 } 541 542 // Stat returns a MsqidDS object filled with information about the queue. See 543 // msgctl(IPC_STAT) and msgctl(MSG_STAT). 544 func (q *Queue) Stat(ctx context.Context) (*linux.MsqidDS, error) { 545 return q.stat(ctx, vfs.MayRead) 546 } 547 548 // StatAny is similar to Queue.Stat, but doesn't require read permission. See 549 // msgctl(MSG_STAT_ANY). 550 func (q *Queue) StatAny(ctx context.Context) (*linux.MsqidDS, error) { 551 return q.stat(ctx, 0) 552 } 553 554 // stat returns a MsqidDS object filled with information about the queue. An 555 // error is returned if the user doesn't have the specified permissions. 556 func (q *Queue) stat(ctx context.Context, ats vfs.AccessTypes) (*linux.MsqidDS, error) { 557 q.mu.Lock() 558 defer q.mu.Unlock() 559 560 creds := auth.CredentialsFromContext(ctx) 561 if !q.obj.CheckPermissions(creds, ats) { 562 // "The caller must have read permission on the message queue." 563 return nil, linuxerr.EACCES 564 } 565 566 return &linux.MsqidDS{ 567 MsgPerm: linux.IPCPerm{ 568 Key: uint32(q.obj.Key), 569 UID: uint32(creds.UserNamespace.MapFromKUID(q.obj.OwnerUID)), 570 GID: uint32(creds.UserNamespace.MapFromKGID(q.obj.OwnerGID)), 571 CUID: uint32(creds.UserNamespace.MapFromKUID(q.obj.CreatorUID)), 572 CGID: uint32(creds.UserNamespace.MapFromKGID(q.obj.CreatorGID)), 573 Mode: uint16(q.obj.Mode), 574 Seq: 0, // IPC sequences not supported. 575 }, 576 MsgStime: q.sendTime.TimeT(), 577 MsgRtime: q.receiveTime.TimeT(), 578 MsgCtime: q.changeTime.TimeT(), 579 MsgCbytes: q.byteCount, 580 MsgQnum: q.messageCount, 581 MsgQbytes: q.maxBytes, 582 MsgLspid: q.sendPID, 583 MsgLrpid: q.receivePID, 584 }, nil 585 } 586 587 // Lock implements ipc.Mechanism.Lock. 588 func (q *Queue) Lock() { 589 q.mu.Lock() 590 } 591 592 // Unlock implements ipc.mechanism.Unlock. 593 // 594 // +checklocksignore 595 func (q *Queue) Unlock() { 596 q.mu.Unlock() 597 } 598 599 // Object implements ipc.Mechanism.Object. 600 func (q *Queue) Object() *ipc.Object { 601 return q.obj 602 } 603 604 // Destroy implements ipc.Mechanism.Destroy. 605 func (q *Queue) Destroy() { 606 q.dead = true 607 608 // Notify waiters. Senders and receivers will try to run, and return an 609 // error (EIDRM). Waiters should remove themselves from the queue after 610 // waking up. 611 q.senders.Notify(waiter.EventOut) 612 q.receivers.Notify(waiter.EventIn) 613 } 614 615 // ID returns queue's ID. 616 func (q *Queue) ID() ipc.ID { 617 return q.obj.ID 618 }