github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/kernel/pipe/pipe.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package pipe provides a pipe implementation. 16 package pipe 17 18 import ( 19 "fmt" 20 "io" 21 22 "github.com/MerlinKodo/gvisor/pkg/atomicbitops" 23 "github.com/MerlinKodo/gvisor/pkg/errors/linuxerr" 24 "github.com/MerlinKodo/gvisor/pkg/hostarch" 25 "github.com/MerlinKodo/gvisor/pkg/safemem" 26 "github.com/MerlinKodo/gvisor/pkg/waiter" 27 "golang.org/x/sys/unix" 28 ) 29 30 const ( 31 // MinimumPipeSize is a hard limit of the minimum size of a pipe. 32 // It corresponds to fs/pipe.c:pipe_min_size. 33 MinimumPipeSize = hostarch.PageSize 34 35 // MaximumPipeSize is a hard limit on the maximum size of a pipe. 36 // It corresponds to fs/pipe.c:pipe_max_size. 37 MaximumPipeSize = 1048576 38 39 // DefaultPipeSize is the system-wide default size of a pipe in bytes. 40 // It corresponds to pipe_fs_i.h:PIPE_DEF_BUFFERS. 41 DefaultPipeSize = 16 * hostarch.PageSize 42 43 // atomicIOBytes is the maximum number of bytes that the pipe will 44 // guarantee atomic reads or writes atomically. 45 // It corresponds to limits.h:PIPE_BUF. 46 atomicIOBytes = 4096 47 ) 48 49 // waitReaders is a wrapper around Pipe. 50 // 51 // This is used for ctx.Block operations that require the synchronization of 52 // readers and writers, along with the careful grabbing and releasing of locks. 53 type waitReaders Pipe 54 55 // Readiness implements waiter.Waitable.Readiness. 56 func (wq *waitReaders) Readiness(mask waiter.EventMask) waiter.EventMask { 57 return ((*Pipe)(wq)).rwReadiness() & mask 58 } 59 60 // EventRegister implements waiter.Waitable.EventRegister. 61 func (wq *waitReaders) EventRegister(e *waiter.Entry) error { 62 ((*Pipe)(wq)).queue.EventRegister(e) 63 64 // Notify synchronously. 65 if ((*Pipe)(wq)).HasReaders() { 66 e.NotifyEvent(waiter.EventInternal) 67 } 68 69 return nil 70 } 71 72 // EventUnregister implements waiter.Waitable.EventUnregister. 73 func (wq *waitReaders) EventUnregister(e *waiter.Entry) { 74 ((*Pipe)(wq)).queue.EventUnregister(e) 75 } 76 77 // waitWriters is a wrapper around Pipe. 78 // 79 // This is used for ctx.Block operations that require the synchronization of 80 // readers and writers, along with the careful grabbing and releasing of locks. 81 type waitWriters Pipe 82 83 // Readiness implements waiter.Waitable.Readiness. 84 func (wq *waitWriters) Readiness(mask waiter.EventMask) waiter.EventMask { 85 return ((*Pipe)(wq)).rwReadiness() & mask 86 } 87 88 // EventRegister implements waiter.Waitable.EventRegister. 89 func (wq *waitWriters) EventRegister(e *waiter.Entry) error { 90 ((*Pipe)(wq)).queue.EventRegister(e) 91 92 // Notify synchronously. 93 if ((*Pipe)(wq)).HasWriters() { 94 e.NotifyEvent(waiter.EventInternal) 95 } 96 97 return nil 98 } 99 100 // EventUnregister implements waiter.Waitable.EventUnregister. 101 func (wq *waitWriters) EventUnregister(e *waiter.Entry) { 102 ((*Pipe)(wq)).queue.EventUnregister(e) 103 } 104 105 // Pipe is an encapsulation of a platform-independent pipe. 106 // It manages a buffered byte queue shared between a reader/writer 107 // pair. 108 // 109 // +stateify savable 110 type Pipe struct { 111 // queue is the waiter queue. 112 queue waiter.Queue 113 114 // isNamed indicates whether this is a named pipe. 115 // 116 // This value is immutable. 117 isNamed bool 118 119 // The number of active readers for this pipe. 120 readers atomicbitops.Int32 121 122 // The total number of readers for this pipe. 123 totalReaders atomicbitops.Int32 124 125 // The number of active writers for this pipe. 126 writers atomicbitops.Int32 127 128 // The total number of writers for this pipe. 129 totalWriters atomicbitops.Int32 130 131 // mu protects all pipe internal state below. 132 mu pipeMutex `state:"nosave"` 133 134 // buf holds the pipe's data. buf is a circular buffer; the first valid 135 // byte in buf is at offset off, and the pipe contains size valid bytes. 136 // bufBlocks contains two identical safemem.Blocks representing buf; this 137 // avoids needing to heap-allocate a new safemem.Block slice when buf is 138 // resized. bufBlockSeq is a safemem.BlockSeq representing bufBlocks. 139 // 140 // These fields are protected by mu. 141 buf []byte 142 bufBlocks [2]safemem.Block `state:"nosave"` 143 bufBlockSeq safemem.BlockSeq `state:"nosave"` 144 off int64 145 size int64 146 147 // max is the maximum size of the pipe in bytes. When this max has been 148 // reached, writers will get EWOULDBLOCK. 149 // 150 // This is protected by mu. 151 max int64 152 153 // hadWriter indicates if this pipe ever had a writer. Note that this 154 // does not necessarily indicate there is *currently* a writer, just 155 // that there has been a writer at some point since the pipe was 156 // created. 157 // 158 // This is protected by mu. 159 hadWriter bool 160 } 161 162 // NewPipe initializes and returns a pipe. 163 // 164 // N.B. The size will be bounded. 165 func NewPipe(isNamed bool, sizeBytes int64) *Pipe { 166 var p Pipe 167 initPipe(&p, isNamed, sizeBytes) 168 return &p 169 } 170 171 func initPipe(pipe *Pipe, isNamed bool, sizeBytes int64) { 172 if sizeBytes < MinimumPipeSize { 173 sizeBytes = MinimumPipeSize 174 } 175 if sizeBytes > MaximumPipeSize { 176 sizeBytes = MaximumPipeSize 177 } 178 pipe.isNamed = isNamed 179 pipe.max = sizeBytes 180 } 181 182 // peekLocked passes the first count bytes in the pipe to f and returns its 183 // result. If fewer than count bytes are available, the safemem.BlockSeq passed 184 // to f will be less than count bytes in length. 185 // 186 // peekLocked does not mutate the pipe; if the read consumes bytes from the 187 // pipe, then the caller is responsible for calling p.consumeLocked() and 188 // p.queue.Notify(waiter.WritableEvents). (The latter must be called with p.mu 189 // unlocked.) 190 // 191 // Preconditions: 192 // - p.mu must be locked. 193 // - This pipe must have readers. 194 func (p *Pipe) peekLocked(count int64, f func(safemem.BlockSeq) (uint64, error)) (int64, error) { 195 // Don't block for a zero-length read even if the pipe is empty. 196 if count == 0 { 197 return 0, nil 198 } 199 200 // Limit the amount of data read to the amount of data in the pipe. 201 if count > p.size { 202 if p.size == 0 { 203 if !p.HasWriters() { 204 return 0, io.EOF 205 } 206 return 0, linuxerr.ErrWouldBlock 207 } 208 count = p.size 209 } 210 211 // Prepare the view of the data to be read. 212 bs := p.bufBlockSeq.DropFirst64(uint64(p.off)).TakeFirst64(uint64(count)) 213 214 // Perform the read. 215 done, err := f(bs) 216 return int64(done), err 217 } 218 219 // consumeLocked consumes the first n bytes in the pipe, such that they will no 220 // longer be visible to future reads. 221 // 222 // Preconditions: 223 // - p.mu must be locked. 224 // - The pipe must contain at least n bytes. 225 func (p *Pipe) consumeLocked(n int64) { 226 p.off += n 227 if max := int64(len(p.buf)); p.off >= max { 228 p.off -= max 229 } 230 p.size -= n 231 } 232 233 // writeLocked passes a safemem.BlockSeq representing the first count bytes of 234 // unused space in the pipe to f and returns the result. If fewer than count 235 // bytes are free, the safemem.BlockSeq passed to f will be less than count 236 // bytes in length. If the pipe is full or otherwise cannot accomodate a write 237 // of any number of bytes up to count, writeLocked returns ErrWouldBlock 238 // without calling f. 239 // 240 // Unlike peekLocked, writeLocked assumes that f returns the number of bytes 241 // written to the pipe, and increases the number of bytes stored in the pipe 242 // accordingly. Callers are still responsible for calling 243 // p.queue.Notify(waiter.ReadableEvents) with p.mu unlocked. 244 // 245 // Preconditions: 246 // - p.mu must be locked. 247 func (p *Pipe) writeLocked(count int64, f func(safemem.BlockSeq) (uint64, error)) (int64, error) { 248 // Can't write to a pipe with no readers. 249 if !p.HasReaders() { 250 return 0, unix.EPIPE 251 } 252 253 avail := p.max - p.size 254 if avail == 0 { 255 return 0, linuxerr.ErrWouldBlock 256 } 257 short := false 258 if count > avail { 259 // POSIX requires that a write smaller than atomicIOBytes 260 // (PIPE_BUF) be atomic, but requires no atomicity for writes 261 // larger than this. 262 if count <= atomicIOBytes { 263 return 0, linuxerr.ErrWouldBlock 264 } 265 count = avail 266 short = true 267 } 268 269 // Ensure that the buffer is big enough. 270 if newLen, oldCap := p.size+count, int64(len(p.buf)); newLen > oldCap { 271 // Allocate a new buffer. 272 newCap := oldCap * 2 273 if oldCap == 0 { 274 newCap = 8 // arbitrary; sending individual integers across pipes is relatively common 275 } 276 for newLen > newCap { 277 newCap *= 2 278 } 279 if newCap > p.max { 280 newCap = p.max 281 } 282 newBuf := make([]byte, newCap) 283 // Copy the old buffer's contents to the beginning of the new one. 284 safemem.CopySeq( 285 safemem.BlockSeqOf(safemem.BlockFromSafeSlice(newBuf)), 286 p.bufBlockSeq.DropFirst64(uint64(p.off)).TakeFirst64(uint64(p.size))) 287 // Switch to the new buffer. 288 p.buf = newBuf 289 p.bufBlocks[0] = safemem.BlockFromSafeSlice(newBuf) 290 p.bufBlocks[1] = p.bufBlocks[0] 291 p.bufBlockSeq = safemem.BlockSeqFromSlice(p.bufBlocks[:]) 292 p.off = 0 293 } 294 295 // Prepare the view of the space to be written. 296 woff := p.off + p.size 297 if woff >= int64(len(p.buf)) { 298 woff -= int64(len(p.buf)) 299 } 300 bs := p.bufBlockSeq.DropFirst64(uint64(woff)).TakeFirst64(uint64(count)) 301 302 // Perform the write. 303 doneU64, err := f(bs) 304 done := int64(doneU64) 305 p.size += done 306 if done < count || err != nil { 307 return done, err 308 } 309 310 // If we shortened the write, adjust the returned error appropriately. 311 if short { 312 return done, linuxerr.ErrWouldBlock 313 } 314 315 return done, nil 316 } 317 318 // rOpen signals a new reader of the pipe. 319 func (p *Pipe) rOpen() { 320 p.readers.Add(1) 321 p.totalReaders.Add(1) 322 323 // Notify for blocking openers. 324 p.queue.Notify(waiter.EventInternal) 325 } 326 327 // wOpen signals a new writer of the pipe. 328 func (p *Pipe) wOpen() { 329 p.mu.Lock() 330 p.hadWriter = true 331 p.writers.Add(1) 332 p.totalWriters.Add(1) 333 p.mu.Unlock() 334 335 // Notify for blocking openers. 336 p.queue.Notify(waiter.EventInternal) 337 } 338 339 // rClose signals that a reader has closed their end of the pipe. 340 func (p *Pipe) rClose() { 341 if newReaders := p.readers.Add(-1); newReaders < 0 { 342 panic(fmt.Sprintf("Refcounting bug, pipe has negative readers: %v", newReaders)) 343 } 344 } 345 346 // wClose signals that a writer has closed their end of the pipe. 347 func (p *Pipe) wClose() { 348 if newWriters := p.writers.Add(-1); newWriters < 0 { 349 panic(fmt.Sprintf("Refcounting bug, pipe has negative writers: %v.", newWriters)) 350 } 351 } 352 353 // HasReaders returns whether the pipe has any active readers. 354 func (p *Pipe) HasReaders() bool { 355 return p.readers.Load() > 0 356 } 357 358 // HasWriters returns whether the pipe has any active writers. 359 func (p *Pipe) HasWriters() bool { 360 return p.writers.Load() > 0 361 } 362 363 // rReadinessLocked calculates the read readiness. 364 // 365 // Precondition: mu must be held. 366 func (p *Pipe) rReadinessLocked() waiter.EventMask { 367 ready := waiter.EventMask(0) 368 if p.HasReaders() && p.size != 0 { 369 ready |= waiter.ReadableEvents 370 } 371 if !p.HasWriters() && p.hadWriter { 372 // POLLHUP must be suppressed until the pipe has had at least one writer 373 // at some point. Otherwise a reader thread may poll and immediately get 374 // a POLLHUP before the writer ever opens the pipe, which the reader may 375 // interpret as the writer opening then closing the pipe. 376 ready |= waiter.EventHUp 377 } 378 return ready 379 } 380 381 // rReadiness returns a mask that states whether the read end of the pipe is 382 // ready for reading. 383 func (p *Pipe) rReadiness() waiter.EventMask { 384 p.mu.Lock() 385 defer p.mu.Unlock() 386 return p.rReadinessLocked() 387 } 388 389 // wReadinessLocked calculates the write readiness. 390 // 391 // Precondition: mu must be held. 392 func (p *Pipe) wReadinessLocked() waiter.EventMask { 393 ready := waiter.EventMask(0) 394 if p.HasWriters() && p.size < p.max { 395 ready |= waiter.WritableEvents 396 } 397 if !p.HasReaders() { 398 ready |= waiter.EventErr 399 } 400 return ready 401 } 402 403 // wReadiness returns a mask that states whether the write end of the pipe 404 // is ready for writing. 405 func (p *Pipe) wReadiness() waiter.EventMask { 406 p.mu.Lock() 407 defer p.mu.Unlock() 408 return p.wReadinessLocked() 409 } 410 411 // rwReadiness returns a mask that states whether a read-write handle to the 412 // pipe is ready for IO. 413 func (p *Pipe) rwReadiness() waiter.EventMask { 414 p.mu.Lock() 415 defer p.mu.Unlock() 416 return p.rReadinessLocked() | p.wReadinessLocked() 417 } 418 419 // EventRegister implements waiter.Waitable.EventRegister. 420 func (p *Pipe) EventRegister(e *waiter.Entry) error { 421 p.queue.EventRegister(e) 422 return nil 423 } 424 425 // EventUnregister implements waiter.Waitable.EventUnregister. 426 func (p *Pipe) EventUnregister(e *waiter.Entry) { 427 p.queue.EventUnregister(e) 428 } 429 430 // queued returns the amount of queued data. 431 func (p *Pipe) queued() int64 { 432 p.mu.Lock() 433 defer p.mu.Unlock() 434 return p.queuedLocked() 435 } 436 437 func (p *Pipe) queuedLocked() int64 { 438 return p.size 439 } 440 441 // SetFifoSize implements fs.FifoSizer.SetFifoSize. 442 func (p *Pipe) SetFifoSize(size int64) (int64, error) { 443 if size < 0 { 444 return 0, linuxerr.EINVAL 445 } 446 if size < MinimumPipeSize { 447 size = MinimumPipeSize // Per spec. 448 } 449 if size > MaximumPipeSize { 450 return 0, linuxerr.EPERM 451 } 452 p.mu.Lock() 453 defer p.mu.Unlock() 454 if size < p.size { 455 return 0, linuxerr.EBUSY 456 } 457 p.max = size 458 return size, nil 459 }