github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/kernel/msgqueue/msgqueue.go (about)

     1  // Copyright 2021 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package msgqueue implements System V message queues.
    16  package msgqueue
    17  
    18  import (
    19  	"github.com/metacubex/gvisor/pkg/abi/linux"
    20  	"github.com/metacubex/gvisor/pkg/context"
    21  	"github.com/metacubex/gvisor/pkg/errors/linuxerr"
    22  	"github.com/metacubex/gvisor/pkg/sentry/kernel/auth"
    23  	"github.com/metacubex/gvisor/pkg/sentry/kernel/ipc"
    24  	ktime "github.com/metacubex/gvisor/pkg/sentry/kernel/time"
    25  	"github.com/metacubex/gvisor/pkg/sentry/vfs"
    26  	"github.com/metacubex/gvisor/pkg/sync"
    27  	"github.com/metacubex/gvisor/pkg/waiter"
    28  )
    29  
    30  const (
    31  	// System-wide limit for maximum number of queues.
    32  	maxQueues = linux.MSGMNI
    33  
    34  	// Maximum size of a queue in bytes.
    35  	maxQueueBytes = linux.MSGMNB
    36  
    37  	// Maximum size of a message in bytes.
    38  	maxMessageBytes = linux.MSGMAX
    39  )
    40  
    41  // Registry contains a set of message queues that can be referenced using keys
    42  // or IDs.
    43  //
    44  // +stateify savable
    45  type Registry struct {
    46  	// mu protects all the fields below.
    47  	mu sync.Mutex `state:"nosave"`
    48  
    49  	// reg defines basic fields and operations needed for all SysV registries.
    50  	reg *ipc.Registry
    51  }
    52  
    53  // NewRegistry returns a new Registry ready to be used.
    54  func NewRegistry(userNS *auth.UserNamespace) *Registry {
    55  	return &Registry{
    56  		reg: ipc.NewRegistry(userNS),
    57  	}
    58  }
    59  
    60  // Queue represents a SysV message queue, described by sysvipc(7).
    61  //
    62  // +stateify savable
    63  type Queue struct {
    64  	// registry is the registry owning this queue. Immutable.
    65  	registry *Registry
    66  
    67  	// mu protects all the fields below.
    68  	mu sync.Mutex `state:"nosave"`
    69  
    70  	// dead is set to true when a queue is removed from the registry and should
    71  	// not be used. Operations on the queue should check dead, and return
    72  	// EIDRM if set to true.
    73  	dead bool
    74  
    75  	// obj defines basic fields that should be included in all SysV IPC objects.
    76  	obj *ipc.Object
    77  
    78  	// senders holds a queue of blocked message senders. Senders are notified
    79  	// when enough space is available in the queue to insert their message.
    80  	senders waiter.Queue
    81  
    82  	// receivers holds a queue of blocked receivers. Receivers are notified
    83  	// when a new message is inserted into the queue and can be received.
    84  	receivers waiter.Queue
    85  
    86  	// messages is a list of sent messages.
    87  	messages msgList
    88  
    89  	// sendTime is the last time a msgsnd was performed.
    90  	sendTime ktime.Time
    91  
    92  	// receiveTime is the last time a msgrcv was performed.
    93  	receiveTime ktime.Time
    94  
    95  	// changeTime is the last time the queue was modified using msgctl.
    96  	changeTime ktime.Time
    97  
    98  	// byteCount is the current number of message bytes in the queue.
    99  	byteCount uint64
   100  
   101  	// messageCount is the current number of messages in the queue.
   102  	messageCount uint64
   103  
   104  	// maxBytes is the maximum allowed number of bytes in the queue, and is also
   105  	// used as a limit for the number of total possible messages.
   106  	maxBytes uint64
   107  
   108  	// sendPID is the PID of the process that performed the last msgsnd.
   109  	sendPID int32
   110  
   111  	// receivePID is the PID of the process that performed the last msgrcv.
   112  	receivePID int32
   113  }
   114  
   115  // Message represents a message exchanged through a Queue via msgsnd(2) and
   116  // msgrcv(2).
   117  //
   118  // +stateify savable
   119  type Message struct {
   120  	msgEntry
   121  
   122  	// Type is an integer representing the type of the sent message.
   123  	Type int64
   124  
   125  	// Text is an untyped block of memory.
   126  	Text []byte
   127  
   128  	// Size is the size of Text.
   129  	Size uint64
   130  }
   131  
   132  func (m *Message) makeCopy() *Message {
   133  	new := &Message{
   134  		Type: m.Type,
   135  		Size: m.Size,
   136  	}
   137  	new.Text = make([]byte, len(m.Text))
   138  	copy(new.Text, m.Text)
   139  	return new
   140  }
   141  
   142  // Blocker is used for blocking Queue.Send, and Queue.Receive calls that serves
   143  // as an abstracted version of kernel.Task. kernel.Task is not directly used to
   144  // prevent circular dependencies.
   145  type Blocker interface {
   146  	Block(C <-chan struct{}) error
   147  }
   148  
   149  // FindOrCreate creates a new message queue or returns an existing one. See
   150  // msgget(2).
   151  func (r *Registry) FindOrCreate(ctx context.Context, key ipc.Key, mode linux.FileMode, private, create, exclusive bool) (*Queue, error) {
   152  	r.mu.Lock()
   153  	defer r.mu.Unlock()
   154  
   155  	if !private {
   156  		queue, err := r.reg.Find(ctx, key, mode, create, exclusive)
   157  		if err != nil {
   158  			return nil, err
   159  		}
   160  
   161  		if queue != nil {
   162  			return queue.(*Queue), nil
   163  		}
   164  	}
   165  
   166  	// Check system-wide limits.
   167  	if r.reg.ObjectCount() >= maxQueues {
   168  		return nil, linuxerr.ENOSPC
   169  	}
   170  
   171  	return r.newQueueLocked(ctx, key, auth.CredentialsFromContext(ctx), mode)
   172  }
   173  
   174  // newQueueLocked creates a new queue using the given fields. An error is
   175  // returned if there're no more available identifiers.
   176  //
   177  // Precondition: r.mu must be held.
   178  func (r *Registry) newQueueLocked(ctx context.Context, key ipc.Key, creds *auth.Credentials, mode linux.FileMode) (*Queue, error) {
   179  	q := &Queue{
   180  		registry:    r,
   181  		obj:         ipc.NewObject(r.reg.UserNS, key, creds, creds, mode),
   182  		sendTime:    ktime.ZeroTime,
   183  		receiveTime: ktime.ZeroTime,
   184  		changeTime:  ktime.NowFromContext(ctx),
   185  		maxBytes:    maxQueueBytes,
   186  	}
   187  
   188  	err := r.reg.Register(q)
   189  	if err != nil {
   190  		return nil, err
   191  	}
   192  	return q, nil
   193  }
   194  
   195  // Remove removes the queue with specified ID. All waiters (readers and
   196  // writers) and writers will be awakened and fail. Remove will return an error
   197  // if the ID is invalid, or the the user doesn't have privileges.
   198  func (r *Registry) Remove(id ipc.ID, creds *auth.Credentials) error {
   199  	r.mu.Lock()
   200  	defer r.mu.Unlock()
   201  
   202  	r.reg.Remove(id, creds)
   203  	return nil
   204  }
   205  
   206  // FindByID returns the queue with the specified ID and an error if the ID
   207  // doesn't exist.
   208  func (r *Registry) FindByID(id ipc.ID) (*Queue, error) {
   209  	r.mu.Lock()
   210  	defer r.mu.Unlock()
   211  
   212  	mech := r.reg.FindByID(id)
   213  	if mech == nil {
   214  		return nil, linuxerr.EINVAL
   215  	}
   216  	return mech.(*Queue), nil
   217  }
   218  
   219  // IPCInfo reports global parameters for message queues. See msgctl(IPC_INFO).
   220  func (r *Registry) IPCInfo(ctx context.Context) *linux.MsgInfo {
   221  	return &linux.MsgInfo{
   222  		MsgPool: linux.MSGPOOL,
   223  		MsgMap:  linux.MSGMAP,
   224  		MsgMax:  linux.MSGMAX,
   225  		MsgMnb:  linux.MSGMNB,
   226  		MsgMni:  linux.MSGMNI,
   227  		MsgSsz:  linux.MSGSSZ,
   228  		MsgTql:  linux.MSGTQL,
   229  		MsgSeg:  linux.MSGSEG,
   230  	}
   231  }
   232  
   233  // MsgInfo reports global parameters for message queues. See msgctl(MSG_INFO).
   234  func (r *Registry) MsgInfo(ctx context.Context) *linux.MsgInfo {
   235  	r.mu.Lock()
   236  	defer r.mu.Unlock()
   237  
   238  	var messages, bytes uint64
   239  	r.reg.ForAllObjects(
   240  		func(o ipc.Mechanism) {
   241  			q := o.(*Queue)
   242  			q.mu.Lock()
   243  			messages += q.messageCount
   244  			bytes += q.byteCount
   245  			q.mu.Unlock()
   246  		},
   247  	)
   248  
   249  	return &linux.MsgInfo{
   250  		MsgPool: int32(r.reg.ObjectCount()),
   251  		MsgMap:  int32(messages),
   252  		MsgTql:  int32(bytes),
   253  		MsgMax:  linux.MSGMAX,
   254  		MsgMnb:  linux.MSGMNB,
   255  		MsgMni:  linux.MSGMNI,
   256  		MsgSsz:  linux.MSGSSZ,
   257  		MsgSeg:  linux.MSGSEG,
   258  	}
   259  }
   260  
   261  // Send appends a message to the message queue, and returns an error if sending
   262  // fails. See msgsnd(2).
   263  func (q *Queue) Send(ctx context.Context, m Message, b Blocker, wait bool, pid int32) error {
   264  	// Try to perform a non-blocking send using queue.append. If EWOULDBLOCK
   265  	// is returned, start the blocking procedure. Otherwise, return normally.
   266  	creds := auth.CredentialsFromContext(ctx)
   267  
   268  	// Fast path: first attempt a non-blocking push.
   269  	if err := q.push(ctx, m, creds, pid); err != linuxerr.EWOULDBLOCK {
   270  		return err
   271  	}
   272  
   273  	if !wait {
   274  		return linuxerr.EAGAIN
   275  	}
   276  
   277  	// Slow path: at this point, the queue was found to be full, and we were
   278  	// asked to block.
   279  
   280  	e, ch := waiter.NewChannelEntry(waiter.EventOut)
   281  	q.senders.EventRegister(&e)
   282  	defer q.senders.EventUnregister(&e)
   283  
   284  	// Note: we need to check again before blocking the first time since space
   285  	// may have become available.
   286  	for {
   287  		if err := q.push(ctx, m, creds, pid); err != linuxerr.EWOULDBLOCK {
   288  			return err
   289  		}
   290  		if err := b.Block(ch); err != nil {
   291  			return err
   292  		}
   293  	}
   294  }
   295  
   296  // push appends a message to the queue's message list and notifies waiting
   297  // receivers that a message has been inserted. It returns an error if adding
   298  // the message would cause the queue to exceed its maximum capacity, which can
   299  // be used as a signal to block the task. Other errors should be returned as is.
   300  func (q *Queue) push(ctx context.Context, m Message, creds *auth.Credentials, pid int32) error {
   301  	if m.Type <= 0 {
   302  		return linuxerr.EINVAL
   303  	}
   304  
   305  	q.mu.Lock()
   306  	defer q.mu.Unlock()
   307  
   308  	if !q.obj.CheckPermissions(creds, vfs.MayWrite) {
   309  		// The calling process does not have write permission on the message
   310  		// queue, and does not have the CAP_IPC_OWNER capability in the user
   311  		// namespace that governs its IPC namespace.
   312  		return linuxerr.EACCES
   313  	}
   314  
   315  	// Queue was removed while the process was waiting.
   316  	if q.dead {
   317  		return linuxerr.EIDRM
   318  	}
   319  
   320  	// Check if sufficient space is available (the queue isn't full.) From
   321  	// the man pages:
   322  	//
   323  	// "A message queue is considered to be full if either of the following
   324  	// conditions is true:
   325  	//
   326  	//  • Adding a new message to the queue would cause the total number
   327  	//    of bytes in the queue to exceed the queue's maximum size (the
   328  	//    msg_qbytes field).
   329  	//
   330  	//  • Adding another message to the queue would cause the total
   331  	//    number of messages in the queue to exceed the queue's maximum
   332  	//    size (the msg_qbytes field).  This check is necessary to
   333  	//    prevent an unlimited number of zero-length messages being
   334  	//    placed on the queue.  Although such messages contain no data,
   335  	//    they nevertheless consume (locked) kernel memory."
   336  	//
   337  	// The msg_qbytes field in our implementation is q.maxBytes.
   338  	if m.Size+q.byteCount > q.maxBytes || q.messageCount+1 > q.maxBytes {
   339  		return linuxerr.EWOULDBLOCK
   340  	}
   341  
   342  	// Copy the message into the queue.
   343  	q.messages.PushBack(&m)
   344  
   345  	q.byteCount += m.Size
   346  	q.messageCount++
   347  	q.sendPID = pid
   348  	q.sendTime = ktime.NowFromContext(ctx)
   349  
   350  	// Notify receivers about the new message.
   351  	q.receivers.Notify(waiter.EventIn)
   352  
   353  	return nil
   354  }
   355  
   356  // Receive removes a message from the queue and returns it. See msgrcv(2).
   357  func (q *Queue) Receive(ctx context.Context, b Blocker, mType int64, maxSize int64, wait, truncate, except bool, pid int32) (*Message, error) {
   358  	if maxSize < 0 || maxSize > maxMessageBytes {
   359  		return nil, linuxerr.EINVAL
   360  	}
   361  	max := uint64(maxSize)
   362  	creds := auth.CredentialsFromContext(ctx)
   363  
   364  	// Fast path: first attempt a non-blocking pop.
   365  	if msg, err := q.pop(ctx, creds, mType, max, truncate, except, pid); err != linuxerr.EWOULDBLOCK {
   366  		return msg, err
   367  	}
   368  
   369  	if !wait {
   370  		return nil, linuxerr.ENOMSG
   371  	}
   372  
   373  	// Slow path: at this point, the queue was found to be empty, and we were
   374  	// asked to block.
   375  
   376  	e, ch := waiter.NewChannelEntry(waiter.EventIn)
   377  	q.receivers.EventRegister(&e)
   378  	defer q.receivers.EventUnregister(&e)
   379  
   380  	// Note: we need to check again before blocking the first time since a
   381  	// message may have become available.
   382  	for {
   383  		if msg, err := q.pop(ctx, creds, mType, max, truncate, except, pid); err != linuxerr.EWOULDBLOCK {
   384  			return msg, err
   385  		}
   386  		if err := b.Block(ch); err != nil {
   387  			return nil, err
   388  		}
   389  	}
   390  }
   391  
   392  // pop pops the first message from the queue that matches the given type. It
   393  // returns an error for all the cases specified in msgrcv(2). If the queue is
   394  // empty or no message of the specified type is available, a EWOULDBLOCK error
   395  // is returned, which can then be used as a signal to block the process or fail.
   396  func (q *Queue) pop(ctx context.Context, creds *auth.Credentials, mType int64, maxSize uint64, truncate, except bool, pid int32) (*Message, error) {
   397  	q.mu.Lock()
   398  	defer q.mu.Unlock()
   399  
   400  	if !q.obj.CheckPermissions(creds, vfs.MayRead) {
   401  		// The calling process does not have read permission on the message
   402  		// queue, and does not have the CAP_IPC_OWNER capability in the user
   403  		// namespace that governs its IPC namespace.
   404  		return nil, linuxerr.EACCES
   405  	}
   406  
   407  	// Queue was removed while the process was waiting.
   408  	if q.dead {
   409  		return nil, linuxerr.EIDRM
   410  	}
   411  
   412  	if q.messages.Empty() {
   413  		return nil, linuxerr.EWOULDBLOCK
   414  	}
   415  
   416  	// Get a message from the queue.
   417  	var msg *Message
   418  	switch {
   419  	case mType == 0:
   420  		msg = q.messages.Front()
   421  	case mType > 0:
   422  		msg = q.msgOfType(mType, except)
   423  	case mType < 0:
   424  		msg = q.msgOfTypeLessThan(-1 * mType)
   425  	}
   426  
   427  	// If no message exists, return a blocking signal.
   428  	if msg == nil {
   429  		return nil, linuxerr.EWOULDBLOCK
   430  	}
   431  
   432  	// Check message's size is acceptable.
   433  	if maxSize < msg.Size {
   434  		if !truncate {
   435  			return nil, linuxerr.E2BIG
   436  		}
   437  		msg.Size = maxSize
   438  		msg.Text = msg.Text[:maxSize+1]
   439  	}
   440  
   441  	q.messages.Remove(msg)
   442  
   443  	q.byteCount -= msg.Size
   444  	q.messageCount--
   445  	q.receivePID = pid
   446  	q.receiveTime = ktime.NowFromContext(ctx)
   447  
   448  	// Notify senders about available space.
   449  	q.senders.Notify(waiter.EventOut)
   450  
   451  	return msg, nil
   452  }
   453  
   454  // Copy copies a message from the queue without deleting it. If no message
   455  // exists, an error is returned. See msgrcv(MSG_COPY).
   456  func (q *Queue) Copy(mType int64) (*Message, error) {
   457  	q.mu.Lock()
   458  	defer q.mu.Unlock()
   459  
   460  	if mType < 0 || q.messages.Empty() {
   461  		return nil, linuxerr.ENOMSG
   462  	}
   463  
   464  	msg := q.msgAtIndex(mType)
   465  	if msg == nil {
   466  		return nil, linuxerr.ENOMSG
   467  	}
   468  	return msg.makeCopy(), nil
   469  }
   470  
   471  // msgOfType returns the first message with the specified type, nil if no
   472  // message is found. If except is true, the first message of a type not equal
   473  // to mType will be returned.
   474  //
   475  // Precondition: caller must hold q.mu.
   476  func (q *Queue) msgOfType(mType int64, except bool) *Message {
   477  	if except {
   478  		for msg := q.messages.Front(); msg != nil; msg = msg.Next() {
   479  			if msg.Type != mType {
   480  				return msg
   481  			}
   482  		}
   483  		return nil
   484  	}
   485  
   486  	for msg := q.messages.Front(); msg != nil; msg = msg.Next() {
   487  		if msg.Type == mType {
   488  			return msg
   489  		}
   490  	}
   491  	return nil
   492  }
   493  
   494  // msgOfTypeLessThan return the the first message with the lowest type less
   495  // than or equal to mType, nil if no such message exists.
   496  //
   497  // Precondition: caller must hold q.mu.
   498  func (q *Queue) msgOfTypeLessThan(mType int64) (m *Message) {
   499  	min := mType
   500  	for msg := q.messages.Front(); msg != nil; msg = msg.Next() {
   501  		if msg.Type <= mType && msg.Type < min {
   502  			m = msg
   503  			min = msg.Type
   504  		}
   505  	}
   506  	return m
   507  }
   508  
   509  // msgAtIndex returns a pointer to a message at given index, nil if non exits.
   510  //
   511  // Precondition: caller must hold q.mu.
   512  func (q *Queue) msgAtIndex(mType int64) *Message {
   513  	msg := q.messages.Front()
   514  	for ; mType != 0 && msg != nil; mType-- {
   515  		msg = msg.Next()
   516  	}
   517  	return msg
   518  }
   519  
   520  // Set modifies some values of the queue. See msgctl(IPC_SET).
   521  func (q *Queue) Set(ctx context.Context, ds *linux.MsqidDS) error {
   522  	q.mu.Lock()
   523  	defer q.mu.Unlock()
   524  
   525  	creds := auth.CredentialsFromContext(ctx)
   526  	if ds.MsgQbytes > maxQueueBytes && !creds.HasCapabilityIn(linux.CAP_SYS_RESOURCE, q.obj.UserNS) {
   527  		// "An attempt (IPC_SET) was made to increase msg_qbytes beyond the
   528  		// system parameter MSGMNB, but the caller is not privileged (Linux:
   529  		// does not have the CAP_SYS_RESOURCE capability)."
   530  		return linuxerr.EPERM
   531  	}
   532  
   533  	if err := q.obj.Set(ctx, &ds.MsgPerm); err != nil {
   534  		return err
   535  	}
   536  
   537  	q.maxBytes = ds.MsgQbytes
   538  	q.changeTime = ktime.NowFromContext(ctx)
   539  	return nil
   540  }
   541  
   542  // Stat returns a MsqidDS object filled with information about the queue. See
   543  // msgctl(IPC_STAT) and msgctl(MSG_STAT).
   544  func (q *Queue) Stat(ctx context.Context) (*linux.MsqidDS, error) {
   545  	return q.stat(ctx, vfs.MayRead)
   546  }
   547  
   548  // StatAny is similar to Queue.Stat, but doesn't require read permission. See
   549  // msgctl(MSG_STAT_ANY).
   550  func (q *Queue) StatAny(ctx context.Context) (*linux.MsqidDS, error) {
   551  	return q.stat(ctx, 0)
   552  }
   553  
   554  // stat returns a MsqidDS object filled with information about the queue. An
   555  // error is returned if the user doesn't have the specified permissions.
   556  func (q *Queue) stat(ctx context.Context, ats vfs.AccessTypes) (*linux.MsqidDS, error) {
   557  	q.mu.Lock()
   558  	defer q.mu.Unlock()
   559  
   560  	creds := auth.CredentialsFromContext(ctx)
   561  	if !q.obj.CheckPermissions(creds, ats) {
   562  		// "The caller must have read permission on the message queue."
   563  		return nil, linuxerr.EACCES
   564  	}
   565  
   566  	return &linux.MsqidDS{
   567  		MsgPerm: linux.IPCPerm{
   568  			Key:  uint32(q.obj.Key),
   569  			UID:  uint32(creds.UserNamespace.MapFromKUID(q.obj.OwnerUID)),
   570  			GID:  uint32(creds.UserNamespace.MapFromKGID(q.obj.OwnerGID)),
   571  			CUID: uint32(creds.UserNamespace.MapFromKUID(q.obj.CreatorUID)),
   572  			CGID: uint32(creds.UserNamespace.MapFromKGID(q.obj.CreatorGID)),
   573  			Mode: uint16(q.obj.Mode),
   574  			Seq:  0, // IPC sequences not supported.
   575  		},
   576  		MsgStime:  q.sendTime.TimeT(),
   577  		MsgRtime:  q.receiveTime.TimeT(),
   578  		MsgCtime:  q.changeTime.TimeT(),
   579  		MsgCbytes: q.byteCount,
   580  		MsgQnum:   q.messageCount,
   581  		MsgQbytes: q.maxBytes,
   582  		MsgLspid:  q.sendPID,
   583  		MsgLrpid:  q.receivePID,
   584  	}, nil
   585  }
   586  
   587  // Lock implements ipc.Mechanism.Lock.
   588  func (q *Queue) Lock() {
   589  	q.mu.Lock()
   590  }
   591  
   592  // Unlock implements ipc.mechanism.Unlock.
   593  //
   594  // +checklocksignore
   595  func (q *Queue) Unlock() {
   596  	q.mu.Unlock()
   597  }
   598  
   599  // Object implements ipc.Mechanism.Object.
   600  func (q *Queue) Object() *ipc.Object {
   601  	return q.obj
   602  }
   603  
   604  // Destroy implements ipc.Mechanism.Destroy.
   605  func (q *Queue) Destroy() {
   606  	q.dead = true
   607  
   608  	// Notify waiters. Senders and receivers will try to run, and return an
   609  	// error (EIDRM). Waiters should remove themselves from the queue after
   610  	// waking up.
   611  	q.senders.Notify(waiter.EventOut)
   612  	q.receivers.Notify(waiter.EventIn)
   613  }
   614  
   615  // ID returns queue's ID.
   616  func (q *Queue) ID() ipc.ID {
   617  	return q.obj.ID
   618  }