gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/kernel/pipe/pipe.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package pipe provides a pipe implementation. 16 package pipe 17 18 import ( 19 "fmt" 20 "io" 21 22 "golang.org/x/sys/unix" 23 "gvisor.dev/gvisor/pkg/atomicbitops" 24 "gvisor.dev/gvisor/pkg/errors/linuxerr" 25 "gvisor.dev/gvisor/pkg/hostarch" 26 "gvisor.dev/gvisor/pkg/safemem" 27 "gvisor.dev/gvisor/pkg/waiter" 28 ) 29 30 const ( 31 // MinimumPipeSize is a hard limit of the minimum size of a pipe. 32 // It corresponds to fs/pipe.c:pipe_min_size. 33 MinimumPipeSize = hostarch.PageSize 34 35 // MaximumPipeSize is a hard limit on the maximum size of a pipe. 36 // It corresponds to fs/pipe.c:pipe_max_size. 37 MaximumPipeSize = 1048576 38 39 // DefaultPipeSize is the system-wide default size of a pipe in bytes. 40 // It corresponds to pipe_fs_i.h:PIPE_DEF_BUFFERS. 41 DefaultPipeSize = 16 * hostarch.PageSize 42 43 // atomicIOBytes is the maximum number of bytes that the pipe will 44 // guarantee atomic reads or writes atomically. 45 // It corresponds to limits.h:PIPE_BUF. 46 atomicIOBytes = 4096 47 ) 48 49 // waitReaders is a wrapper around Pipe. 50 // 51 // This is used for ctx.Block operations that require the synchronization of 52 // readers and writers, along with the careful grabbing and releasing of locks. 53 type waitReaders Pipe 54 55 // Readiness implements waiter.Waitable.Readiness. 56 func (wq *waitReaders) Readiness(mask waiter.EventMask) waiter.EventMask { 57 return ((*Pipe)(wq)).rwReadiness() & mask 58 } 59 60 // EventRegister implements waiter.Waitable.EventRegister. 61 func (wq *waitReaders) EventRegister(e *waiter.Entry) error { 62 ((*Pipe)(wq)).queue.EventRegister(e) 63 64 // Notify synchronously. 65 if ((*Pipe)(wq)).HasReaders() { 66 e.NotifyEvent(waiter.EventInternal) 67 } 68 69 return nil 70 } 71 72 // EventUnregister implements waiter.Waitable.EventUnregister. 73 func (wq *waitReaders) EventUnregister(e *waiter.Entry) { 74 ((*Pipe)(wq)).queue.EventUnregister(e) 75 } 76 77 // waitWriters is a wrapper around Pipe. 78 // 79 // This is used for ctx.Block operations that require the synchronization of 80 // readers and writers, along with the careful grabbing and releasing of locks. 81 type waitWriters Pipe 82 83 // Readiness implements waiter.Waitable.Readiness. 84 func (wq *waitWriters) Readiness(mask waiter.EventMask) waiter.EventMask { 85 return ((*Pipe)(wq)).rwReadiness() & mask 86 } 87 88 // EventRegister implements waiter.Waitable.EventRegister. 89 func (wq *waitWriters) EventRegister(e *waiter.Entry) error { 90 ((*Pipe)(wq)).queue.EventRegister(e) 91 92 // Notify synchronously. 93 if ((*Pipe)(wq)).HasWriters() { 94 e.NotifyEvent(waiter.EventInternal) 95 } 96 97 return nil 98 } 99 100 // EventUnregister implements waiter.Waitable.EventUnregister. 101 func (wq *waitWriters) EventUnregister(e *waiter.Entry) { 102 ((*Pipe)(wq)).queue.EventUnregister(e) 103 } 104 105 // Pipe is an encapsulation of a platform-independent pipe. 106 // It manages a buffered byte queue shared between a reader/writer 107 // pair. 108 // 109 // +stateify savable 110 type Pipe struct { 111 // queue is the waiter queue. 112 queue waiter.Queue 113 114 // isNamed indicates whether this is a named pipe. 115 // 116 // This value is immutable. 117 isNamed bool 118 119 // The number of active readers for this pipe. 120 readers atomicbitops.Int32 121 122 // The total number of readers for this pipe. 123 totalReaders atomicbitops.Int32 124 125 // The number of active writers for this pipe. 126 writers atomicbitops.Int32 127 128 // The total number of writers for this pipe. 129 totalWriters atomicbitops.Int32 130 131 // mu protects all pipe internal state below. 132 mu pipeMutex `state:"nosave"` 133 134 // buf holds the pipe's data. buf is a circular buffer; the first valid 135 // byte in buf is at offset off, and the pipe contains size valid bytes. 136 // bufBlocks contains two identical safemem.Blocks representing buf; this 137 // avoids needing to heap-allocate a new safemem.Block slice when buf is 138 // resized. bufBlockSeq is a safemem.BlockSeq representing bufBlocks. 139 // 140 // These fields are protected by mu. 141 buf []byte 142 bufBlocks [2]safemem.Block `state:"nosave"` 143 bufBlockSeq safemem.BlockSeq `state:"nosave"` 144 off int64 145 size int64 146 147 // max is the maximum size of the pipe in bytes. When this max has been 148 // reached, writers will get EWOULDBLOCK. 149 // 150 // This is protected by mu. 151 max int64 152 153 // hadWriter indicates if this pipe ever had a writer. Note that this 154 // does not necessarily indicate there is *currently* a writer, just 155 // that there has been a writer at some point since the pipe was 156 // created. 157 // 158 // This is protected by mu. 159 hadWriter bool 160 } 161 162 // NewPipe initializes and returns a pipe. 163 // 164 // N.B. The size will be bounded. 165 func NewPipe(isNamed bool, sizeBytes int64) *Pipe { 166 var p Pipe 167 initPipe(&p, isNamed, sizeBytes) 168 return &p 169 } 170 171 func initPipe(pipe *Pipe, isNamed bool, sizeBytes int64) { 172 if sizeBytes < MinimumPipeSize { 173 sizeBytes = MinimumPipeSize 174 } 175 if sizeBytes > MaximumPipeSize { 176 sizeBytes = MaximumPipeSize 177 } 178 pipe.isNamed = isNamed 179 pipe.max = sizeBytes 180 } 181 182 // peekLocked passes the first count bytes in the pipe, starting at offset off, 183 // to f and returns its result. If fewer than count bytes are available, the 184 // safemem.BlockSeq passed to f will be less than count bytes in length. 185 // 186 // peekLocked does not mutate the pipe; if the read consumes bytes from the 187 // pipe, then the caller is responsible for calling p.consumeLocked() and 188 // p.queue.Notify(waiter.WritableEvents). (The latter must be called with p.mu 189 // unlocked.) 190 // 191 // Preconditions: 192 // - p.mu must be locked. 193 // - This pipe must have readers. 194 // - off <= p.size. 195 func (p *Pipe) peekLocked(off, count int64, f func(safemem.BlockSeq) (uint64, error)) (int64, error) { 196 // Don't block for a zero-length read even if the pipe is empty. 197 if count == 0 { 198 return 0, nil 199 } 200 201 // Limit the amount of data read to the amount of data in the pipe. 202 if rem := p.size - off; count > rem { 203 if rem == 0 { 204 if !p.HasWriters() { 205 return 0, io.EOF 206 } 207 return 0, linuxerr.ErrWouldBlock 208 } 209 count = rem 210 } 211 212 // Prepare the view of the data to be read. 213 pipeOff := p.off + off 214 if max := int64(len(p.buf)); pipeOff >= max { 215 pipeOff -= max 216 } 217 bs := p.bufBlockSeq.DropFirst64(uint64(pipeOff)).TakeFirst64(uint64(count)) 218 219 // Perform the read. 220 done, err := f(bs) 221 return int64(done), err 222 } 223 224 // consumeLocked consumes the first n bytes in the pipe, such that they will no 225 // longer be visible to future reads. 226 // 227 // Preconditions: 228 // - p.mu must be locked. 229 // - The pipe must contain at least n bytes. 230 func (p *Pipe) consumeLocked(n int64) { 231 p.off += n 232 if max := int64(len(p.buf)); p.off >= max { 233 p.off -= max 234 } 235 p.size -= n 236 } 237 238 // writeLocked passes a safemem.BlockSeq representing the first count bytes of 239 // unused space in the pipe to f and returns the result. If fewer than count 240 // bytes are free, the safemem.BlockSeq passed to f will be less than count 241 // bytes in length. If the pipe is full or otherwise cannot accommodate a write 242 // of any number of bytes up to count, writeLocked returns ErrWouldBlock 243 // without calling f. 244 // 245 // Unlike peekLocked, writeLocked assumes that f returns the number of bytes 246 // written to the pipe, and increases the number of bytes stored in the pipe 247 // accordingly. Callers are still responsible for calling 248 // p.queue.Notify(waiter.ReadableEvents) with p.mu unlocked. 249 // 250 // Preconditions: 251 // - p.mu must be locked. 252 func (p *Pipe) writeLocked(count int64, f func(safemem.BlockSeq) (uint64, error)) (int64, error) { 253 // Can't write to a pipe with no readers. 254 if !p.HasReaders() { 255 return 0, unix.EPIPE 256 } 257 258 avail := p.max - p.size 259 if avail == 0 { 260 return 0, linuxerr.ErrWouldBlock 261 } 262 short := false 263 if count > avail { 264 // POSIX requires that a write smaller than atomicIOBytes 265 // (PIPE_BUF) be atomic, but requires no atomicity for writes 266 // larger than this. 267 if count <= atomicIOBytes { 268 return 0, linuxerr.ErrWouldBlock 269 } 270 count = avail 271 short = true 272 } 273 274 // Ensure that the buffer is big enough. 275 if newLen, oldCap := p.size+count, int64(len(p.buf)); newLen > oldCap { 276 // Allocate a new buffer. 277 newCap := oldCap * 2 278 if oldCap == 0 { 279 newCap = 8 // arbitrary; sending individual integers across pipes is relatively common 280 } 281 for newLen > newCap { 282 newCap *= 2 283 } 284 if newCap > p.max { 285 newCap = p.max 286 } 287 newBuf := make([]byte, newCap) 288 // Copy the old buffer's contents to the beginning of the new one. 289 safemem.CopySeq( 290 safemem.BlockSeqOf(safemem.BlockFromSafeSlice(newBuf)), 291 p.bufBlockSeq.DropFirst64(uint64(p.off)).TakeFirst64(uint64(p.size))) 292 // Switch to the new buffer. 293 p.buf = newBuf 294 p.bufBlocks[0] = safemem.BlockFromSafeSlice(newBuf) 295 p.bufBlocks[1] = p.bufBlocks[0] 296 p.bufBlockSeq = safemem.BlockSeqFromSlice(p.bufBlocks[:]) 297 p.off = 0 298 } 299 300 // Prepare the view of the space to be written. 301 woff := p.off + p.size 302 if woff >= int64(len(p.buf)) { 303 woff -= int64(len(p.buf)) 304 } 305 bs := p.bufBlockSeq.DropFirst64(uint64(woff)).TakeFirst64(uint64(count)) 306 307 // Perform the write. 308 doneU64, err := f(bs) 309 done := int64(doneU64) 310 p.size += done 311 if done < count || err != nil { 312 return done, err 313 } 314 315 // If we shortened the write, adjust the returned error appropriately. 316 if short { 317 return done, linuxerr.ErrWouldBlock 318 } 319 320 return done, nil 321 } 322 323 // rOpen signals a new reader of the pipe. 324 func (p *Pipe) rOpen() { 325 p.readers.Add(1) 326 p.totalReaders.Add(1) 327 328 // Notify for blocking openers. 329 p.queue.Notify(waiter.EventInternal) 330 } 331 332 // wOpen signals a new writer of the pipe. 333 func (p *Pipe) wOpen() { 334 p.mu.Lock() 335 p.hadWriter = true 336 p.writers.Add(1) 337 p.totalWriters.Add(1) 338 p.mu.Unlock() 339 340 // Notify for blocking openers. 341 p.queue.Notify(waiter.EventInternal) 342 } 343 344 // rClose signals that a reader has closed their end of the pipe. 345 func (p *Pipe) rClose() { 346 if newReaders := p.readers.Add(-1); newReaders < 0 { 347 panic(fmt.Sprintf("Refcounting bug, pipe has negative readers: %v", newReaders)) 348 } 349 } 350 351 // wClose signals that a writer has closed their end of the pipe. 352 func (p *Pipe) wClose() { 353 if newWriters := p.writers.Add(-1); newWriters < 0 { 354 panic(fmt.Sprintf("Refcounting bug, pipe has negative writers: %v.", newWriters)) 355 } 356 } 357 358 // HasReaders returns whether the pipe has any active readers. 359 func (p *Pipe) HasReaders() bool { 360 return p.readers.Load() > 0 361 } 362 363 // HasWriters returns whether the pipe has any active writers. 364 func (p *Pipe) HasWriters() bool { 365 return p.writers.Load() > 0 366 } 367 368 // rReadinessLocked calculates the read readiness. 369 // 370 // Precondition: mu must be held. 371 func (p *Pipe) rReadinessLocked() waiter.EventMask { 372 ready := waiter.EventMask(0) 373 if p.HasReaders() && p.size != 0 { 374 ready |= waiter.ReadableEvents 375 } 376 if !p.HasWriters() && p.hadWriter { 377 // POLLHUP must be suppressed until the pipe has had at least one writer 378 // at some point. Otherwise a reader thread may poll and immediately get 379 // a POLLHUP before the writer ever opens the pipe, which the reader may 380 // interpret as the writer opening then closing the pipe. 381 ready |= waiter.EventHUp 382 } 383 return ready 384 } 385 386 // rReadiness returns a mask that states whether the read end of the pipe is 387 // ready for reading. 388 func (p *Pipe) rReadiness() waiter.EventMask { 389 p.mu.Lock() 390 defer p.mu.Unlock() 391 return p.rReadinessLocked() 392 } 393 394 // wReadinessLocked calculates the write readiness. 395 // 396 // Precondition: mu must be held. 397 func (p *Pipe) wReadinessLocked() waiter.EventMask { 398 ready := waiter.EventMask(0) 399 if p.HasWriters() && p.size < p.max { 400 ready |= waiter.WritableEvents 401 } 402 if !p.HasReaders() { 403 ready |= waiter.EventErr 404 } 405 return ready 406 } 407 408 // wReadiness returns a mask that states whether the write end of the pipe 409 // is ready for writing. 410 func (p *Pipe) wReadiness() waiter.EventMask { 411 p.mu.Lock() 412 defer p.mu.Unlock() 413 return p.wReadinessLocked() 414 } 415 416 // rwReadiness returns a mask that states whether a read-write handle to the 417 // pipe is ready for IO. 418 func (p *Pipe) rwReadiness() waiter.EventMask { 419 p.mu.Lock() 420 defer p.mu.Unlock() 421 return p.rReadinessLocked() | p.wReadinessLocked() 422 } 423 424 // EventRegister implements waiter.Waitable.EventRegister. 425 func (p *Pipe) EventRegister(e *waiter.Entry) error { 426 p.queue.EventRegister(e) 427 return nil 428 } 429 430 // EventUnregister implements waiter.Waitable.EventUnregister. 431 func (p *Pipe) EventUnregister(e *waiter.Entry) { 432 p.queue.EventUnregister(e) 433 } 434 435 // queued returns the amount of queued data. 436 func (p *Pipe) queued() int64 { 437 p.mu.Lock() 438 defer p.mu.Unlock() 439 return p.queuedLocked() 440 } 441 442 func (p *Pipe) queuedLocked() int64 { 443 return p.size 444 } 445 446 // SetFifoSize implements fs.FifoSizer.SetFifoSize. 447 func (p *Pipe) SetFifoSize(size int64) (int64, error) { 448 if size < 0 { 449 return 0, linuxerr.EINVAL 450 } 451 if size < MinimumPipeSize { 452 size = MinimumPipeSize // Per spec. 453 } 454 if size > MaximumPipeSize { 455 return 0, linuxerr.EPERM 456 } 457 p.mu.Lock() 458 defer p.mu.Unlock() 459 if size < p.size { 460 return 0, linuxerr.EBUSY 461 } 462 p.max = size 463 return size, nil 464 }