github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/kernel/mq/mq.go (about) 1 // Copyright 2021 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package mq provides an implementation for POSIX message queues. 16 package mq 17 18 import ( 19 "bytes" 20 "fmt" 21 "strings" 22 23 "github.com/MerlinKodo/gvisor/pkg/abi/linux" 24 "github.com/MerlinKodo/gvisor/pkg/context" 25 "github.com/MerlinKodo/gvisor/pkg/errors/linuxerr" 26 "github.com/MerlinKodo/gvisor/pkg/sentry/kernel/auth" 27 "github.com/MerlinKodo/gvisor/pkg/sentry/vfs" 28 "github.com/MerlinKodo/gvisor/pkg/sync" 29 "github.com/MerlinKodo/gvisor/pkg/waiter" 30 ) 31 32 // AccessType is the access type passed to mq_open. 33 type AccessType int 34 35 // Possible access types. 36 const ( 37 ReadOnly AccessType = iota 38 WriteOnly 39 ReadWrite 40 ) 41 42 // MaxName is the maximum size for a queue name. 43 const MaxName = 255 44 45 const ( 46 maxPriority = linux.MQ_PRIO_MAX - 1 // Highest possible message priority. 47 48 maxQueuesDefault = linux.DFLT_QUEUESMAX // Default max number of queues. 49 50 maxMsgDefault = linux.DFLT_MSG // Default max number of messages per queue. 51 maxMsgMin = linux.MIN_MSGMAX // Min value for max number of messages per queue. 52 maxMsgLimit = linux.DFLT_MSGMAX // Limit for max number of messages per queue. 53 maxMsgHardLimit = linux.HARD_MSGMAX // Hard limit for max number of messages per queue. 54 55 msgSizeDefault = linux.DFLT_MSGSIZE // Default max message size. 56 msgSizeMin = linux.MIN_MSGSIZEMAX // Min value for max message size. 57 msgSizeLimit = linux.DFLT_MSGSIZEMAX // Limit for max message size. 58 msgSizeHardLimit = linux.HARD_MSGSIZEMAX // Hard limit for max message size. 59 ) 60 61 // Registry is a POSIX message queue registry. 62 // 63 // Unlike SysV utilities, Registry is not map-based. It uses a provided 64 // RegistryImpl backed by a virtual filesystem to implement registry operations. 65 // 66 // +stateify savable 67 type Registry struct { 68 // userNS is the user namespace containing this registry. Immutable. 69 userNS *auth.UserNamespace 70 71 // mu protects all fields below. 72 mu sync.Mutex `state:"nosave"` 73 74 // impl is an implementation of several message queue utilities needed by 75 // the registry. impl should be provided by mqfs. 76 impl RegistryImpl 77 } 78 79 // RegistryImpl defines utilities needed by a Registry to provide actual 80 // registry implementation. It works mainly as an abstraction layer used by 81 // Registry to avoid dealing directly with the filesystem. RegistryImpl should 82 // be implemented by mqfs and provided to Registry at initialization. 83 type RegistryImpl interface { 84 // Get searchs for a queue with the given name, if it exists, the queue is 85 // used to create a new FD, return it and return true. If the queue doesn't 86 // exist, return false and no error. An error is returned if creation fails. 87 Get(ctx context.Context, name string, access AccessType, block bool, flags uint32) (*vfs.FileDescription, bool, error) 88 89 // New creates a new inode and file description using the given queue, 90 // inserts the inode into the filesystem tree using the given name, and 91 // returns the file description. An error is returned if creation fails, or 92 // if the name already exists. 93 New(ctx context.Context, name string, q *Queue, access AccessType, block bool, perm linux.FileMode, flags uint32) (*vfs.FileDescription, error) 94 95 // Unlink removes the queue with given name from the registry, and returns 96 // an error if the name doesn't exist. 97 Unlink(ctx context.Context, name string) error 98 99 // Destroy destroys the registry. 100 Destroy(context.Context) 101 } 102 103 // NewRegistry returns a new, initialized message queue registry. NewRegistry 104 // should be called when a new message queue filesystem is created, once per 105 // IPCNamespace. 106 func NewRegistry(userNS *auth.UserNamespace, impl RegistryImpl) *Registry { 107 return &Registry{ 108 userNS: userNS, 109 impl: impl, 110 } 111 } 112 113 // OpenOpts holds the options passed to FindOrCreate. 114 type OpenOpts struct { 115 Name string 116 Access AccessType 117 Create bool 118 Exclusive bool 119 Block bool 120 } 121 122 // FindOrCreate creates a new POSIX message queue or opens an existing queue. 123 // See mq_open(2). 124 func (r *Registry) FindOrCreate(ctx context.Context, opts OpenOpts, mode linux.FileMode, attr *linux.MqAttr) (*vfs.FileDescription, error) { 125 // mq_overview(7) mentions that: "Each message queue is identified by a name 126 // of the form '/somename'", but the mq_open(3) man pages mention: 127 // "The mq_open() library function is implemented on top of a system call 128 // of the same name. The library function performs the check that the 129 // name starts with a slash (/), giving the EINVAL error if it does not. 130 // The kernel system call expects name to contain no preceding slash, so 131 // the C library function passes name without the preceding slash (i.e., 132 // name+1) to the system call." 133 // So we don't need to check it. 134 135 if len(opts.Name) == 0 { 136 return nil, linuxerr.ENOENT 137 } 138 if len(opts.Name) > MaxName { 139 return nil, linuxerr.ENAMETOOLONG 140 } 141 if strings.ContainsRune(opts.Name, '/') { 142 return nil, linuxerr.EACCES 143 } 144 if opts.Name == "." || opts.Name == ".." { 145 return nil, linuxerr.EINVAL 146 } 147 148 // Construct status flags. 149 var flags uint32 150 if opts.Block { 151 flags = linux.O_NONBLOCK 152 } 153 switch opts.Access { 154 case ReadOnly: 155 flags = flags | linux.O_RDONLY 156 case WriteOnly: 157 flags = flags | linux.O_WRONLY 158 case ReadWrite: 159 flags = flags | linux.O_RDWR 160 } 161 162 r.mu.Lock() 163 defer r.mu.Unlock() 164 fd, ok, err := r.impl.Get(ctx, opts.Name, opts.Access, opts.Block, flags) 165 if err != nil { 166 return nil, err 167 } 168 169 if ok { 170 if opts.Create && opts.Exclusive { 171 // "Both O_CREAT and O_EXCL were specified in oflag, but a queue 172 // with this name already exists." 173 return nil, linuxerr.EEXIST 174 } 175 return fd, nil 176 } 177 178 if !opts.Create { 179 // "The O_CREAT flag was not specified in oflag, and no queue with this name 180 // exists." 181 return nil, linuxerr.ENOENT 182 } 183 184 q, err := r.newQueueLocked(auth.CredentialsFromContext(ctx), mode, attr) 185 if err != nil { 186 return nil, err 187 } 188 return r.impl.New(ctx, opts.Name, q, opts.Access, opts.Block, mode.Permissions(), flags) 189 } 190 191 // newQueueLocked creates a new queue using the given attributes. If attr is nil 192 // return a queue with default values, otherwise use attr to create a new queue, 193 // and return an error if attributes are invalid. 194 func (r *Registry) newQueueLocked(creds *auth.Credentials, mode linux.FileMode, attr *linux.MqAttr) (*Queue, error) { 195 if attr == nil { 196 return &Queue{ 197 ownerUID: creds.EffectiveKUID, 198 ownerGID: creds.EffectiveKGID, 199 mode: mode, 200 maxMessageCount: int64(maxMsgDefault), 201 maxMessageSize: uint64(msgSizeDefault), 202 }, nil 203 } 204 205 // "O_CREAT was specified in oflag, and attr was not NULL, but 206 // attr->mq_maxmsg or attr->mq_msqsize was invalid. Both of these fields 207 // these fields must be greater than zero. In a process that is 208 // unprivileged (does not have the CAP_SYS_RESOURCE capability), 209 // attr->mq_maxmsg must be less than or equal to the msg_max limit, and 210 // attr->mq_msgsize must be less than or equal to the msgsize_max limit. 211 // In addition, even in a privileged process, attr->mq_maxmsg cannot 212 // exceed the HARD_MAX limit." - man mq_open(3). 213 if attr.MqMaxmsg <= 0 || attr.MqMsgsize <= 0 { 214 return nil, linuxerr.EINVAL 215 } 216 217 if attr.MqMaxmsg > maxMsgHardLimit || (!creds.HasCapabilityIn(linux.CAP_SYS_RESOURCE, r.userNS) && (attr.MqMaxmsg > maxMsgLimit || attr.MqMsgsize > msgSizeLimit)) { 218 return nil, linuxerr.EINVAL 219 } 220 221 return &Queue{ 222 ownerUID: creds.EffectiveKUID, 223 ownerGID: creds.EffectiveKGID, 224 mode: mode, 225 maxMessageCount: attr.MqMaxmsg, 226 maxMessageSize: uint64(attr.MqMsgsize), 227 }, nil 228 } 229 230 // Remove removes the queue with the given name from the registry. See 231 // mq_unlink(2). 232 func (r *Registry) Remove(ctx context.Context, name string) error { 233 if len(name) > MaxName { 234 return linuxerr.ENAMETOOLONG 235 } 236 237 r.mu.Lock() 238 defer r.mu.Unlock() 239 return r.impl.Unlink(ctx, name) 240 } 241 242 // Destroy destroys the registry and releases all held references. 243 func (r *Registry) Destroy(ctx context.Context) { 244 r.mu.Lock() 245 defer r.mu.Unlock() 246 r.impl.Destroy(ctx) 247 } 248 249 // Impl returns RegistryImpl inside r. 250 func (r *Registry) Impl() RegistryImpl { 251 return r.impl 252 } 253 254 // Queue represents a POSIX message queue. 255 // 256 // +stateify savable 257 type Queue struct { 258 // ownerUID is the registry's owner's UID. Immutable. 259 ownerUID auth.KUID 260 261 // ownerGID is the registry's owner's GID. Immutable. 262 ownerGID auth.KGID 263 264 // mode is the registry's access permissions. Immutable. 265 mode linux.FileMode 266 267 // mu protects all the fields below. 268 mu sync.Mutex `state:"nosave"` 269 270 // queue is the queue of waiters. 271 queue waiter.Queue 272 273 // messages is a list of messages currently in the queue. 274 messages msgList 275 276 // subscriber represents a task registered to receive async notification 277 // from this queue. 278 subscriber *Subscriber 279 280 // messageCount is the number of messages currently in the queue. 281 messageCount int64 282 283 // maxMessageCount is the maximum number of messages that the queue can 284 // hold. 285 maxMessageCount int64 286 287 // maxMessageSize is the maximum size of a message held by the queue. 288 maxMessageSize uint64 289 290 // byteCount is the number of bytes of data in all messages in the queue. 291 byteCount uint64 292 } 293 294 // View is a view into a message queue. Views should only be used in file 295 // descriptions, but not inodes, because we use inodes to retreive the actual 296 // queue, and only FDs are responsible for providing user functionality. 297 type View interface { 298 // TODO: Add Send and Receive when mq_timedsend(2) and mq_timedreceive(2) 299 // are implemented. 300 301 // Flush checks if the calling process has attached a notification request 302 // to this queue, if yes, then the request is removed, and another process 303 // can attach a request. 304 Flush(ctx context.Context) 305 306 waiter.Waitable 307 } 308 309 // ReaderWriter provides a send and receive view into a queue. 310 type ReaderWriter struct { 311 *Queue 312 313 block bool 314 } 315 316 // Reader provides a send-only view into a queue. 317 type Reader struct { 318 *Queue 319 320 block bool 321 } 322 323 // Writer provides a receive-only view into a queue. 324 type Writer struct { 325 *Queue 326 327 block bool 328 } 329 330 // NewView creates a new view into a queue and returns it. 331 func NewView(q *Queue, access AccessType, block bool) (View, error) { 332 switch access { 333 case ReadWrite: 334 return ReaderWriter{Queue: q, block: block}, nil 335 case WriteOnly: 336 return Writer{Queue: q, block: block}, nil 337 case ReadOnly: 338 return Reader{Queue: q, block: block}, nil 339 default: 340 // This case can't happen, due to O_RDONLY flag being 0 and O_WRONLY 341 // being 1, so one of them must be true. 342 return nil, linuxerr.EINVAL 343 } 344 } 345 346 // Message holds a message exchanged through a Queue via mq_timedsend(2) and 347 // mq_timedreceive(2), and additional info relating to the message. 348 // 349 // +stateify savable 350 type Message struct { 351 msgEntry 352 353 // Text is the message's sent content. 354 Text string 355 356 // Size is the message's size in bytes. 357 Size uint64 358 359 // Priority is the message's priority. 360 Priority uint32 361 } 362 363 // Subscriber represents a task registered for async notification from a Queue. 364 // 365 // +stateify savable 366 type Subscriber struct { 367 // TODO: Add fields when mq_notify(2) is implemented. 368 369 // pid is the PID of the registered task. 370 pid int32 371 } 372 373 // Generate implements vfs.DynamicBytesSource.Generate. Queue is used as a 374 // DynamicBytesSource for mqfs's queueInode. 375 func (q *Queue) Generate(ctx context.Context, buf *bytes.Buffer) error { 376 q.mu.Lock() 377 defer q.mu.Unlock() 378 379 var ( 380 pid int32 381 method int 382 sigNumber int 383 ) 384 if q.subscriber != nil { 385 pid = q.subscriber.pid 386 // TODO: add method and sigNumber when mq_notify(2) is implemented. 387 } 388 389 buf.WriteString( 390 fmt.Sprintf("QSIZE:%-10d NOTIFY:%-5d SIGNO:%-5d NOTIFY_PID:%-6d\n", 391 q.byteCount, method, sigNumber, pid), 392 ) 393 return nil 394 } 395 396 // Flush implements View.Flush. 397 func (q *Queue) Flush(ctx context.Context) { 398 q.mu.Lock() 399 defer q.mu.Unlock() 400 401 pid, ok := auth.ThreadGroupIDFromContext(ctx) 402 if ok { 403 if q.subscriber != nil && pid == q.subscriber.pid { 404 q.subscriber = nil 405 } 406 } 407 } 408 409 // Readiness implements Waitable.Readiness. 410 func (q *Queue) Readiness(mask waiter.EventMask) waiter.EventMask { 411 q.mu.Lock() 412 defer q.mu.Unlock() 413 414 events := waiter.EventMask(0) 415 if q.messageCount > 0 { 416 events |= waiter.ReadableEvents 417 } 418 if q.messageCount < q.maxMessageCount { 419 events |= waiter.WritableEvents 420 } 421 return events & mask 422 } 423 424 // EventRegister implements Waitable.EventRegister. 425 func (q *Queue) EventRegister(e *waiter.Entry) error { 426 q.mu.Lock() 427 defer q.mu.Unlock() 428 q.queue.EventRegister(e) 429 return nil 430 } 431 432 // EventUnregister implements Waitable.EventUnregister. 433 func (q *Queue) EventUnregister(e *waiter.Entry) { 434 q.mu.Lock() 435 defer q.mu.Unlock() 436 q.queue.EventUnregister(e) 437 } 438 439 // HasPermissions returns true if the given credentials meet the access 440 // permissions required by the queue. 441 func (q *Queue) HasPermissions(creds *auth.Credentials, req vfs.AccessTypes) bool { 442 perms := uint16(q.mode.Permissions()) 443 if q.ownerUID == creds.EffectiveKUID { 444 perms >>= 6 445 } else if creds.InGroup(q.ownerGID) { 446 perms >>= 3 447 } 448 449 return uint16(req)&perms == uint16(req) 450 }