github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fs/gofer/path.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package gofer 16 17 import ( 18 "fmt" 19 20 "github.com/SagerNet/gvisor/pkg/context" 21 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 22 "github.com/SagerNet/gvisor/pkg/log" 23 "github.com/SagerNet/gvisor/pkg/p9" 24 "github.com/SagerNet/gvisor/pkg/sentry/device" 25 "github.com/SagerNet/gvisor/pkg/sentry/fs" 26 "github.com/SagerNet/gvisor/pkg/sentry/kernel/pipe" 27 "github.com/SagerNet/gvisor/pkg/sentry/socket/unix/transport" 28 "github.com/SagerNet/gvisor/pkg/syserror" 29 ) 30 31 // maxFilenameLen is the maximum length of a filename. This is dictated by 9P's 32 // encoding of strings, which uses 2 bytes for the length prefix. 33 const maxFilenameLen = (1 << 16) - 1 34 35 func changeType(mode p9.FileMode, newType p9.FileMode) p9.FileMode { 36 if newType&^p9.FileModeMask != 0 { 37 panic(fmt.Sprintf("newType contained more bits than just file mode: %x", newType)) 38 } 39 clear := mode &^ p9.FileModeMask 40 return clear | newType 41 } 42 43 // Lookup loads an Inode at name into a Dirent based on the session's cache 44 // policy. 45 func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string) (*fs.Dirent, error) { 46 if len(name) > maxFilenameLen { 47 return nil, linuxerr.ENAMETOOLONG 48 } 49 50 s := i.session() 51 cp := s.cachePolicy 52 if cp.cacheReaddir() { 53 // Check to see if we have readdirCache that indicates the 54 // child does not exist. Avoid holding readdirMu longer than 55 // we need to. 56 i.readdirMu.Lock() 57 if i.readdirCache != nil && !i.readdirCache.Contains(name) { 58 // No such child. 59 i.readdirMu.Unlock() 60 if cp.cacheNegativeDirents() { 61 return fs.NewNegativeDirent(name), nil 62 } 63 return nil, syserror.ENOENT 64 } 65 i.readdirMu.Unlock() 66 } 67 68 // Get a p9.File for name. 69 qids, newFile, mask, p9attr, err := i.fileState.file.walkGetAttr(ctx, []string{name}) 70 if err != nil { 71 if linuxerr.Equals(linuxerr.ENOENT, err) { 72 if cp.cacheNegativeDirents() { 73 // Return a negative Dirent. It will stay cached until something 74 // is created over it. 75 return fs.NewNegativeDirent(name), nil 76 } 77 return nil, syserror.ENOENT 78 } 79 return nil, err 80 } 81 82 if s.overrides != nil { 83 // Check if file belongs to a internal named pipe. Note that it doesn't need 84 // to check for sockets because it's done in newInodeOperations below. 85 deviceKey := device.MultiDeviceKey{ 86 Device: p9attr.RDev, 87 SecondaryDevice: i.session().connID, 88 Inode: qids[0].Path, 89 } 90 s.overrides.lock() 91 if pipeInode := s.overrides.getPipe(deviceKey); pipeInode != nil { 92 s.overrides.unlock() 93 pipeInode.IncRef() 94 return fs.NewDirent(ctx, pipeInode, name), nil 95 } 96 s.overrides.unlock() 97 } 98 99 // Construct the Inode operations. 100 sattr, node := newInodeOperations(ctx, i.fileState.s, newFile, qids[0], mask, p9attr) 101 102 // Construct a positive Dirent. 103 return fs.NewDirent(ctx, fs.NewInode(ctx, node, dir.MountSource, sattr), name), nil 104 } 105 106 // Creates a new Inode at name and returns its File based on the session's cache policy. 107 // 108 // Ownership is currently ignored. 109 func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string, flags fs.FileFlags, perm fs.FilePermissions) (*fs.File, error) { 110 if len(name) > maxFilenameLen { 111 return nil, linuxerr.ENAMETOOLONG 112 } 113 114 // Create replaces the directory fid with the newly created/opened 115 // file, so clone this directory so it doesn't change out from under 116 // this node. 117 _, newFile, err := i.fileState.file.walk(ctx, nil) 118 if err != nil { 119 return nil, err 120 } 121 122 // Map the FileFlags to p9 OpenFlags. 123 var openFlags p9.OpenFlags 124 switch { 125 case flags.Read && flags.Write: 126 openFlags = p9.ReadWrite 127 case flags.Read: 128 openFlags = p9.ReadOnly 129 case flags.Write: 130 openFlags = p9.WriteOnly 131 default: 132 panic(fmt.Sprintf("Create called with unknown or unset open flags: %v", flags)) 133 } 134 135 // If the parent directory has setgid enabled, change the new file's owner. 136 owner := fs.FileOwnerFromContext(ctx) 137 parentUattr, err := dir.UnstableAttr(ctx) 138 if err != nil { 139 return nil, err 140 } 141 if parentUattr.Perms.SetGID { 142 owner.GID = parentUattr.Owner.GID 143 } 144 145 hostFile, err := newFile.create(ctx, name, openFlags, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID)) 146 if err != nil { 147 // Could not create the file. 148 newFile.close(ctx) 149 return nil, err 150 } 151 152 i.touchModificationAndStatusChangeTime(ctx, dir) 153 154 // Get an unopened p9.File for the file we created so that it can be cloned 155 // and re-opened multiple times after creation, while also getting its 156 // attributes. Both are required for inodeOperations. 157 qids, unopened, mask, p9attr, err := i.fileState.file.walkGetAttr(ctx, []string{name}) 158 if err != nil { 159 newFile.close(ctx) 160 if hostFile != nil { 161 hostFile.Close() 162 } 163 return nil, err 164 } 165 if len(qids) != 1 { 166 log.Warningf("WalkGetAttr(%s) succeeded, but returned %d QIDs (%v), wanted 1", name, len(qids), qids) 167 newFile.close(ctx) 168 if hostFile != nil { 169 hostFile.Close() 170 } 171 unopened.close(ctx) 172 return nil, syserror.EIO 173 } 174 qid := qids[0] 175 176 // Construct the InodeOperations. 177 sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, p9attr) 178 179 // Construct the positive Dirent. 180 d := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name) 181 defer d.DecRef(ctx) 182 183 // Construct the new file, caching the handles if allowed. 184 h := handles{ 185 File: newFile, 186 Host: hostFile, 187 } 188 h.EnableLeakCheck("gofer.handles") 189 if iops.fileState.canShareHandles() { 190 iops.fileState.handlesMu.Lock() 191 iops.fileState.setSharedHandlesLocked(flags, &h) 192 iops.fileState.handlesMu.Unlock() 193 } 194 return NewFile(ctx, d, name, flags, iops, &h), nil 195 } 196 197 // CreateLink uses Create to create a symlink between oldname and newname. 198 func (i *inodeOperations) CreateLink(ctx context.Context, dir *fs.Inode, oldname string, newname string) error { 199 if len(newname) > maxFilenameLen { 200 return linuxerr.ENAMETOOLONG 201 } 202 203 owner := fs.FileOwnerFromContext(ctx) 204 if _, err := i.fileState.file.symlink(ctx, oldname, newname, p9.UID(owner.UID), p9.GID(owner.GID)); err != nil { 205 return err 206 } 207 i.touchModificationAndStatusChangeTime(ctx, dir) 208 return nil 209 } 210 211 // CreateHardLink implements InodeOperations.CreateHardLink. 212 func (i *inodeOperations) CreateHardLink(ctx context.Context, inode *fs.Inode, target *fs.Inode, newName string) error { 213 if len(newName) > maxFilenameLen { 214 return linuxerr.ENAMETOOLONG 215 } 216 217 targetOpts, ok := target.InodeOperations.(*inodeOperations) 218 if !ok { 219 return linuxerr.EXDEV 220 } 221 222 if err := i.fileState.file.link(ctx, &targetOpts.fileState.file, newName); err != nil { 223 return err 224 } 225 226 s := i.session() 227 if s.cachePolicy.cacheUAttrs(inode) { 228 // Increase link count. 229 targetOpts.cachingInodeOps.IncLinks(ctx) 230 } 231 232 i.touchModificationAndStatusChangeTime(ctx, inode) 233 return nil 234 } 235 236 // CreateDirectory uses Create to create a directory named s under inodeOperations. 237 func (i *inodeOperations) CreateDirectory(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error { 238 if len(name) > maxFilenameLen { 239 return linuxerr.ENAMETOOLONG 240 } 241 242 // If the parent directory has setgid enabled, change the new directory's 243 // owner and enable setgid. 244 owner := fs.FileOwnerFromContext(ctx) 245 parentUattr, err := dir.UnstableAttr(ctx) 246 if err != nil { 247 return err 248 } 249 if parentUattr.Perms.SetGID { 250 owner.GID = parentUattr.Owner.GID 251 perm.SetGID = true 252 } 253 254 if _, err := i.fileState.file.mkdir(ctx, name, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID)); err != nil { 255 return err 256 } 257 258 s := i.session() 259 if s.cachePolicy.cacheUAttrs(dir) { 260 // Increase link count. 261 // 262 // N.B. This will update the modification time. 263 i.cachingInodeOps.IncLinks(ctx) 264 } 265 if s.cachePolicy.cacheReaddir() { 266 // Invalidate readdir cache. 267 i.markDirectoryDirty() 268 } 269 return nil 270 } 271 272 // Bind implements InodeOperations.Bind. 273 func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, ep transport.BoundEndpoint, perm fs.FilePermissions) (*fs.Dirent, error) { 274 if len(name) > maxFilenameLen { 275 return nil, linuxerr.ENAMETOOLONG 276 } 277 278 s := i.session() 279 if s.overrides == nil { 280 return nil, syserror.EOPNOTSUPP 281 } 282 283 // Stabilize the override map while creation is in progress. 284 s.overrides.lock() 285 defer s.overrides.unlock() 286 287 sattr, iops, err := i.createEndpointFile(ctx, dir, name, perm, p9.ModeSocket) 288 if err != nil { 289 return nil, err 290 } 291 292 // Construct the positive Dirent. 293 childDir := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name) 294 s.overrides.addBoundEndpoint(iops.fileState.key, childDir, ep) 295 return childDir, nil 296 } 297 298 // CreateFifo implements fs.InodeOperations.CreateFifo. 299 func (i *inodeOperations) CreateFifo(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error { 300 if len(name) > maxFilenameLen { 301 return linuxerr.ENAMETOOLONG 302 } 303 304 owner := fs.FileOwnerFromContext(ctx) 305 mode := p9.FileMode(perm.LinuxMode()) | p9.ModeNamedPipe 306 307 // N.B. FIFOs use major/minor numbers 0. 308 s := i.session() 309 if _, err := i.fileState.file.mknod(ctx, name, mode, 0, 0, p9.UID(owner.UID), p9.GID(owner.GID)); err != nil { 310 if s.overrides == nil || !linuxerr.Equals(linuxerr.EPERM, err) { 311 return err 312 } 313 // If gofer doesn't support mknod, check if we can create an internal fifo. 314 return i.createInternalFifo(ctx, dir, name, owner, perm) 315 } 316 317 i.touchModificationAndStatusChangeTime(ctx, dir) 318 return nil 319 } 320 321 func (i *inodeOperations) createInternalFifo(ctx context.Context, dir *fs.Inode, name string, owner fs.FileOwner, perm fs.FilePermissions) error { 322 s := i.session() 323 if s.overrides == nil { 324 return linuxerr.EPERM 325 } 326 327 // Stabilize the override map while creation is in progress. 328 s.overrides.lock() 329 defer s.overrides.unlock() 330 331 sattr, fileOps, err := i.createEndpointFile(ctx, dir, name, perm, p9.ModeNamedPipe) 332 if err != nil { 333 return err 334 } 335 336 // First create a pipe. 337 p := pipe.NewPipe(true /* isNamed */, pipe.DefaultPipeSize) 338 339 // Wrap the fileOps with our Fifo. 340 iops := &fifo{ 341 InodeOperations: pipe.NewInodeOperations(ctx, perm, p), 342 fileIops: fileOps, 343 } 344 inode := fs.NewInode(ctx, iops, dir.MountSource, sattr) 345 346 // Construct the positive Dirent. 347 childDir := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name) 348 s.overrides.addPipe(fileOps.fileState.key, childDir, inode) 349 return nil 350 } 351 352 // Caller must hold Session.endpoint lock. 353 func (i *inodeOperations) createEndpointFile(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions, fileType p9.FileMode) (fs.StableAttr, *inodeOperations, error) { 354 _, dirClone, err := i.fileState.file.walk(ctx, nil) 355 if err != nil { 356 return fs.StableAttr{}, nil, err 357 } 358 // We're not going to use dirClone after return. 359 defer dirClone.close(ctx) 360 361 // Create a regular file in the gofer and then mark it as a socket by 362 // adding this inode key in the 'overrides' map. 363 owner := fs.FileOwnerFromContext(ctx) 364 hostFile, err := dirClone.create(ctx, name, p9.ReadWrite, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID)) 365 if err != nil { 366 return fs.StableAttr{}, nil, err 367 } 368 // We're not going to use this file. 369 hostFile.Close() 370 371 i.touchModificationAndStatusChangeTime(ctx, dir) 372 373 // Get the attributes of the file to create inode key. 374 qid, mask, attr, err := getattr(ctx, dirClone) 375 if err != nil { 376 return fs.StableAttr{}, nil, err 377 } 378 379 // Get an unopened p9.File for the file we created so that it can be 380 // cloned and re-opened multiple times after creation. 381 _, unopened, err := i.fileState.file.walk(ctx, []string{name}) 382 if err != nil { 383 return fs.StableAttr{}, nil, err 384 } 385 386 // Construct new inode with file type overridden. 387 attr.Mode = changeType(attr.Mode, fileType) 388 sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, attr) 389 return sattr, iops, nil 390 } 391 392 // Remove implements InodeOperations.Remove. 393 func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string) error { 394 if len(name) > maxFilenameLen { 395 return linuxerr.ENAMETOOLONG 396 } 397 398 s := i.session() 399 var key *device.MultiDeviceKey 400 if s.overrides != nil { 401 // Find out if file being deleted is a socket or pipe that needs to be 402 // removed from endpoint map. 403 if d, err := i.Lookup(ctx, dir, name); err == nil { 404 defer d.DecRef(ctx) 405 406 if fs.IsSocket(d.Inode.StableAttr) || fs.IsPipe(d.Inode.StableAttr) { 407 switch iops := d.Inode.InodeOperations.(type) { 408 case *inodeOperations: 409 key = &iops.fileState.key 410 case *fifo: 411 key = &iops.fileIops.fileState.key 412 } 413 414 // Stabilize the override map while deletion is in progress. 415 s.overrides.lock() 416 defer s.overrides.unlock() 417 } 418 } 419 } 420 421 if err := i.fileState.file.unlinkAt(ctx, name, 0); err != nil { 422 return err 423 } 424 if key != nil { 425 s.overrides.remove(ctx, *key) 426 } 427 i.touchModificationAndStatusChangeTime(ctx, dir) 428 429 return nil 430 } 431 432 // Remove implements InodeOperations.RemoveDirectory. 433 func (i *inodeOperations) RemoveDirectory(ctx context.Context, dir *fs.Inode, name string) error { 434 if len(name) > maxFilenameLen { 435 return linuxerr.ENAMETOOLONG 436 } 437 438 // 0x200 = AT_REMOVEDIR. 439 if err := i.fileState.file.unlinkAt(ctx, name, 0x200); err != nil { 440 return err 441 } 442 443 s := i.session() 444 if s.cachePolicy.cacheUAttrs(dir) { 445 // Decrease link count and updates atime. 446 i.cachingInodeOps.DecLinks(ctx) 447 } 448 if s.cachePolicy.cacheReaddir() { 449 // Invalidate readdir cache. 450 i.markDirectoryDirty() 451 } 452 return nil 453 } 454 455 // Rename renames this node. 456 func (i *inodeOperations) Rename(ctx context.Context, inode *fs.Inode, oldParent *fs.Inode, oldName string, newParent *fs.Inode, newName string, replacement bool) error { 457 if len(newName) > maxFilenameLen { 458 return linuxerr.ENAMETOOLONG 459 } 460 461 // Don't allow renames across different mounts. 462 if newParent.MountSource != oldParent.MountSource { 463 return linuxerr.EXDEV 464 } 465 466 // Unwrap the new parent to a *inodeOperations. 467 newParentInodeOperations := newParent.InodeOperations.(*inodeOperations) 468 469 // Unwrap the old parent to a *inodeOperations. 470 oldParentInodeOperations := oldParent.InodeOperations.(*inodeOperations) 471 472 // Do the rename. 473 if err := i.fileState.file.rename(ctx, newParentInodeOperations.fileState.file, newName); err != nil { 474 return err 475 } 476 477 // Is the renamed entity a directory? Fix link counts. 478 s := i.session() 479 if fs.IsDir(i.fileState.sattr) { 480 // Update cached state. 481 if s.cachePolicy.cacheUAttrs(oldParent) { 482 oldParentInodeOperations.cachingInodeOps.DecLinks(ctx) 483 } 484 if s.cachePolicy.cacheUAttrs(newParent) { 485 // Only IncLinks if there is a new addition to 486 // newParent. If this is replacement, then the total 487 // count remains the same. 488 if !replacement { 489 newParentInodeOperations.cachingInodeOps.IncLinks(ctx) 490 } 491 } 492 } 493 if s.cachePolicy.cacheReaddir() { 494 // Mark old directory dirty. 495 oldParentInodeOperations.markDirectoryDirty() 496 if oldParent != newParent { 497 // Mark new directory dirty. 498 newParentInodeOperations.markDirectoryDirty() 499 } 500 } 501 502 // Rename always updates ctime. 503 if s.cachePolicy.cacheUAttrs(inode) { 504 i.cachingInodeOps.TouchStatusChangeTime(ctx) 505 } 506 return nil 507 } 508 509 func (i *inodeOperations) touchModificationAndStatusChangeTime(ctx context.Context, inode *fs.Inode) { 510 s := i.session() 511 if s.cachePolicy.cacheUAttrs(inode) { 512 i.cachingInodeOps.TouchModificationAndStatusChangeTime(ctx) 513 } 514 if s.cachePolicy.cacheReaddir() { 515 // Invalidate readdir cache. 516 i.markDirectoryDirty() 517 } 518 } 519 520 // markDirectoryDirty marks any cached data dirty for this directory. This is necessary in order 521 // to ensure that this node does not retain stale state throughout its lifetime across multiple 522 // open directory handles. 523 // 524 // Currently this means invalidating any readdir caches. 525 func (i *inodeOperations) markDirectoryDirty() { 526 i.readdirMu.Lock() 527 defer i.readdirMu.Unlock() 528 i.readdirCache = nil 529 }