github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/kernel/pipe/pipe.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package pipe provides a pipe implementation. 16 package pipe 17 18 import ( 19 "fmt" 20 "io" 21 "sync/atomic" 22 23 "golang.org/x/sys/unix" 24 "github.com/SagerNet/gvisor/pkg/context" 25 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 26 "github.com/SagerNet/gvisor/pkg/hostarch" 27 "github.com/SagerNet/gvisor/pkg/safemem" 28 "github.com/SagerNet/gvisor/pkg/sentry/fs" 29 "github.com/SagerNet/gvisor/pkg/sync" 30 "github.com/SagerNet/gvisor/pkg/syserror" 31 "github.com/SagerNet/gvisor/pkg/waiter" 32 ) 33 34 const ( 35 // MinimumPipeSize is a hard limit of the minimum size of a pipe. 36 // It corresponds to fs/pipe.c:pipe_min_size. 37 MinimumPipeSize = hostarch.PageSize 38 39 // MaximumPipeSize is a hard limit on the maximum size of a pipe. 40 // It corresponds to fs/pipe.c:pipe_max_size. 41 MaximumPipeSize = 1048576 42 43 // DefaultPipeSize is the system-wide default size of a pipe in bytes. 44 // It corresponds to pipe_fs_i.h:PIPE_DEF_BUFFERS. 45 DefaultPipeSize = 16 * hostarch.PageSize 46 47 // atomicIOBytes is the maximum number of bytes that the pipe will 48 // guarantee atomic reads or writes atomically. 49 // It corresponds to limits.h:PIPE_BUF. 50 atomicIOBytes = 4096 51 ) 52 53 // Pipe is an encapsulation of a platform-independent pipe. 54 // It manages a buffered byte queue shared between a reader/writer 55 // pair. 56 // 57 // +stateify savable 58 type Pipe struct { 59 waiter.Queue `state:"nosave"` 60 61 // isNamed indicates whether this is a named pipe. 62 // 63 // This value is immutable. 64 isNamed bool 65 66 // The number of active readers for this pipe. 67 // 68 // Access atomically. 69 readers int32 70 71 // The number of active writes for this pipe. 72 // 73 // Access atomically. 74 writers int32 75 76 // mu protects all pipe internal state below. 77 mu sync.Mutex `state:"nosave"` 78 79 // buf holds the pipe's data. buf is a circular buffer; the first valid 80 // byte in buf is at offset off, and the pipe contains size valid bytes. 81 // bufBlocks contains two identical safemem.Blocks representing buf; this 82 // avoids needing to heap-allocate a new safemem.Block slice when buf is 83 // resized. bufBlockSeq is a safemem.BlockSeq representing bufBlocks. 84 // 85 // These fields are protected by mu. 86 buf []byte 87 bufBlocks [2]safemem.Block `state:"nosave"` 88 bufBlockSeq safemem.BlockSeq `state:"nosave"` 89 off int64 90 size int64 91 92 // max is the maximum size of the pipe in bytes. When this max has been 93 // reached, writers will get EWOULDBLOCK. 94 // 95 // This is protected by mu. 96 max int64 97 98 // hadWriter indicates if this pipe ever had a writer. Note that this 99 // does not necessarily indicate there is *currently* a writer, just 100 // that there has been a writer at some point since the pipe was 101 // created. 102 // 103 // This is protected by mu. 104 hadWriter bool 105 } 106 107 // NewPipe initializes and returns a pipe. 108 // 109 // N.B. The size will be bounded. 110 func NewPipe(isNamed bool, sizeBytes int64) *Pipe { 111 var p Pipe 112 initPipe(&p, isNamed, sizeBytes) 113 return &p 114 } 115 116 func initPipe(pipe *Pipe, isNamed bool, sizeBytes int64) { 117 if sizeBytes < MinimumPipeSize { 118 sizeBytes = MinimumPipeSize 119 } 120 if sizeBytes > MaximumPipeSize { 121 sizeBytes = MaximumPipeSize 122 } 123 pipe.isNamed = isNamed 124 pipe.max = sizeBytes 125 } 126 127 // NewConnectedPipe initializes a pipe and returns a pair of objects 128 // representing the read and write ends of the pipe. 129 func NewConnectedPipe(ctx context.Context, sizeBytes int64) (*fs.File, *fs.File) { 130 p := NewPipe(false /* isNamed */, sizeBytes) 131 132 // Build an fs.Dirent for the pipe which will be shared by both 133 // returned files. 134 perms := fs.FilePermissions{ 135 User: fs.PermMask{Read: true, Write: true}, 136 } 137 iops := NewInodeOperations(ctx, perms, p) 138 ino := pipeDevice.NextIno() 139 sattr := fs.StableAttr{ 140 Type: fs.Pipe, 141 DeviceID: pipeDevice.DeviceID(), 142 InodeID: ino, 143 BlockSize: int64(atomicIOBytes), 144 } 145 ms := fs.NewPseudoMountSource(ctx) 146 d := fs.NewDirent(ctx, fs.NewInode(ctx, iops, ms, sattr), fmt.Sprintf("pipe:[%d]", ino)) 147 // The p.Open calls below will each take a reference on the Dirent. We 148 // must drop the one we already have. 149 defer d.DecRef(ctx) 150 return p.Open(ctx, d, fs.FileFlags{Read: true}), p.Open(ctx, d, fs.FileFlags{Write: true}) 151 } 152 153 // Open opens the pipe and returns a new file. 154 // 155 // Precondition: at least one of flags.Read or flags.Write must be set. 156 func (p *Pipe) Open(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) *fs.File { 157 flags.NonSeekable = true 158 switch { 159 case flags.Read && flags.Write: 160 p.rOpen() 161 p.wOpen() 162 return fs.NewFile(ctx, d, flags, &ReaderWriter{ 163 Pipe: p, 164 }) 165 case flags.Read: 166 p.rOpen() 167 return fs.NewFile(ctx, d, flags, &Reader{ 168 ReaderWriter: ReaderWriter{Pipe: p}, 169 }) 170 case flags.Write: 171 p.wOpen() 172 return fs.NewFile(ctx, d, flags, &Writer{ 173 ReaderWriter: ReaderWriter{Pipe: p}, 174 }) 175 default: 176 // Precondition violated. 177 panic("invalid pipe flags") 178 } 179 } 180 181 // peekLocked passes the first count bytes in the pipe to f and returns its 182 // result. If fewer than count bytes are available, the safemem.BlockSeq passed 183 // to f will be less than count bytes in length. 184 // 185 // peekLocked does not mutate the pipe; if the read consumes bytes from the 186 // pipe, then the caller is responsible for calling p.consumeLocked() and 187 // p.Notify(waiter.WritableEvents). (The latter must be called with p.mu unlocked.) 188 // 189 // Preconditions: 190 // * p.mu must be locked. 191 // * This pipe must have readers. 192 func (p *Pipe) peekLocked(count int64, f func(safemem.BlockSeq) (uint64, error)) (int64, error) { 193 // Don't block for a zero-length read even if the pipe is empty. 194 if count == 0 { 195 return 0, nil 196 } 197 198 // Limit the amount of data read to the amount of data in the pipe. 199 if count > p.size { 200 if p.size == 0 { 201 if !p.HasWriters() { 202 return 0, io.EOF 203 } 204 return 0, syserror.ErrWouldBlock 205 } 206 count = p.size 207 } 208 209 // Prepare the view of the data to be read. 210 bs := p.bufBlockSeq.DropFirst64(uint64(p.off)).TakeFirst64(uint64(count)) 211 212 // Perform the read. 213 done, err := f(bs) 214 return int64(done), err 215 } 216 217 // consumeLocked consumes the first n bytes in the pipe, such that they will no 218 // longer be visible to future reads. 219 // 220 // Preconditions: 221 // * p.mu must be locked. 222 // * The pipe must contain at least n bytes. 223 func (p *Pipe) consumeLocked(n int64) { 224 p.off += n 225 if max := int64(len(p.buf)); p.off >= max { 226 p.off -= max 227 } 228 p.size -= n 229 } 230 231 // writeLocked passes a safemem.BlockSeq representing the first count bytes of 232 // unused space in the pipe to f and returns the result. If fewer than count 233 // bytes are free, the safemem.BlockSeq passed to f will be less than count 234 // bytes in length. If the pipe is full or otherwise cannot accomodate a write 235 // of any number of bytes up to count, writeLocked returns ErrWouldBlock 236 // without calling f. 237 // 238 // Unlike peekLocked, writeLocked assumes that f returns the number of bytes 239 // written to the pipe, and increases the number of bytes stored in the pipe 240 // accordingly. Callers are still responsible for calling 241 // p.Notify(waiter.ReadableEvents) with p.mu unlocked. 242 // 243 // Preconditions: 244 // * p.mu must be locked. 245 func (p *Pipe) writeLocked(count int64, f func(safemem.BlockSeq) (uint64, error)) (int64, error) { 246 // Can't write to a pipe with no readers. 247 if !p.HasReaders() { 248 return 0, unix.EPIPE 249 } 250 251 avail := p.max - p.size 252 if avail == 0 { 253 return 0, syserror.ErrWouldBlock 254 } 255 short := false 256 if count > avail { 257 // POSIX requires that a write smaller than atomicIOBytes 258 // (PIPE_BUF) be atomic, but requires no atomicity for writes 259 // larger than this. 260 if count <= atomicIOBytes { 261 return 0, syserror.ErrWouldBlock 262 } 263 count = avail 264 short = true 265 } 266 267 // Ensure that the buffer is big enough. 268 if newLen, oldCap := p.size+count, int64(len(p.buf)); newLen > oldCap { 269 // Allocate a new buffer. 270 newCap := oldCap * 2 271 if oldCap == 0 { 272 newCap = 8 // arbitrary; sending individual integers across pipes is relatively common 273 } 274 for newLen > newCap { 275 newCap *= 2 276 } 277 if newCap > p.max { 278 newCap = p.max 279 } 280 newBuf := make([]byte, newCap) 281 // Copy the old buffer's contents to the beginning of the new one. 282 safemem.CopySeq( 283 safemem.BlockSeqOf(safemem.BlockFromSafeSlice(newBuf)), 284 p.bufBlockSeq.DropFirst64(uint64(p.off)).TakeFirst64(uint64(p.size))) 285 // Switch to the new buffer. 286 p.buf = newBuf 287 p.bufBlocks[0] = safemem.BlockFromSafeSlice(newBuf) 288 p.bufBlocks[1] = p.bufBlocks[0] 289 p.bufBlockSeq = safemem.BlockSeqFromSlice(p.bufBlocks[:]) 290 p.off = 0 291 } 292 293 // Prepare the view of the space to be written. 294 woff := p.off + p.size 295 if woff >= int64(len(p.buf)) { 296 woff -= int64(len(p.buf)) 297 } 298 bs := p.bufBlockSeq.DropFirst64(uint64(woff)).TakeFirst64(uint64(count)) 299 300 // Perform the write. 301 doneU64, err := f(bs) 302 done := int64(doneU64) 303 p.size += done 304 if done < count || err != nil { 305 return done, err 306 } 307 308 // If we shortened the write, adjust the returned error appropriately. 309 if short { 310 return done, syserror.ErrWouldBlock 311 } 312 313 return done, nil 314 } 315 316 // rOpen signals a new reader of the pipe. 317 func (p *Pipe) rOpen() { 318 atomic.AddInt32(&p.readers, 1) 319 } 320 321 // wOpen signals a new writer of the pipe. 322 func (p *Pipe) wOpen() { 323 p.mu.Lock() 324 defer p.mu.Unlock() 325 p.hadWriter = true 326 atomic.AddInt32(&p.writers, 1) 327 } 328 329 // rClose signals that a reader has closed their end of the pipe. 330 func (p *Pipe) rClose() { 331 newReaders := atomic.AddInt32(&p.readers, -1) 332 if newReaders < 0 { 333 panic(fmt.Sprintf("Refcounting bug, pipe has negative readers: %v", newReaders)) 334 } 335 } 336 337 // wClose signals that a writer has closed their end of the pipe. 338 func (p *Pipe) wClose() { 339 newWriters := atomic.AddInt32(&p.writers, -1) 340 if newWriters < 0 { 341 panic(fmt.Sprintf("Refcounting bug, pipe has negative writers: %v.", newWriters)) 342 } 343 } 344 345 // HasReaders returns whether the pipe has any active readers. 346 func (p *Pipe) HasReaders() bool { 347 return atomic.LoadInt32(&p.readers) > 0 348 } 349 350 // HasWriters returns whether the pipe has any active writers. 351 func (p *Pipe) HasWriters() bool { 352 return atomic.LoadInt32(&p.writers) > 0 353 } 354 355 // rReadinessLocked calculates the read readiness. 356 // 357 // Precondition: mu must be held. 358 func (p *Pipe) rReadinessLocked() waiter.EventMask { 359 ready := waiter.EventMask(0) 360 if p.HasReaders() && p.size != 0 { 361 ready |= waiter.ReadableEvents 362 } 363 if !p.HasWriters() && p.hadWriter { 364 // POLLHUP must be suppressed until the pipe has had at least one writer 365 // at some point. Otherwise a reader thread may poll and immediately get 366 // a POLLHUP before the writer ever opens the pipe, which the reader may 367 // interpret as the writer opening then closing the pipe. 368 ready |= waiter.EventHUp 369 } 370 return ready 371 } 372 373 // rReadiness returns a mask that states whether the read end of the pipe is 374 // ready for reading. 375 func (p *Pipe) rReadiness() waiter.EventMask { 376 p.mu.Lock() 377 defer p.mu.Unlock() 378 return p.rReadinessLocked() 379 } 380 381 // wReadinessLocked calculates the write readiness. 382 // 383 // Precondition: mu must be held. 384 func (p *Pipe) wReadinessLocked() waiter.EventMask { 385 ready := waiter.EventMask(0) 386 if p.HasWriters() && p.size < p.max { 387 ready |= waiter.WritableEvents 388 } 389 if !p.HasReaders() { 390 ready |= waiter.EventErr 391 } 392 return ready 393 } 394 395 // wReadiness returns a mask that states whether the write end of the pipe 396 // is ready for writing. 397 func (p *Pipe) wReadiness() waiter.EventMask { 398 p.mu.Lock() 399 defer p.mu.Unlock() 400 return p.wReadinessLocked() 401 } 402 403 // rwReadiness returns a mask that states whether a read-write handle to the 404 // pipe is ready for IO. 405 func (p *Pipe) rwReadiness() waiter.EventMask { 406 p.mu.Lock() 407 defer p.mu.Unlock() 408 return p.rReadinessLocked() | p.wReadinessLocked() 409 } 410 411 // queued returns the amount of queued data. 412 func (p *Pipe) queued() int64 { 413 p.mu.Lock() 414 defer p.mu.Unlock() 415 return p.queuedLocked() 416 } 417 418 func (p *Pipe) queuedLocked() int64 { 419 return p.size 420 } 421 422 // FifoSize implements fs.FifoSizer.FifoSize. 423 func (p *Pipe) FifoSize(context.Context, *fs.File) (int64, error) { 424 p.mu.Lock() 425 defer p.mu.Unlock() 426 return p.max, nil 427 } 428 429 // SetFifoSize implements fs.FifoSizer.SetFifoSize. 430 func (p *Pipe) SetFifoSize(size int64) (int64, error) { 431 if size < 0 { 432 return 0, linuxerr.EINVAL 433 } 434 if size < MinimumPipeSize { 435 size = MinimumPipeSize // Per spec. 436 } 437 if size > MaximumPipeSize { 438 return 0, linuxerr.EPERM 439 } 440 p.mu.Lock() 441 defer p.mu.Unlock() 442 if size < p.size { 443 return 0, linuxerr.EBUSY 444 } 445 p.max = size 446 return size, nil 447 }