github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fs/overlay.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package fs 16 17 import ( 18 "fmt" 19 "strings" 20 21 "github.com/SagerNet/gvisor/pkg/context" 22 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 23 "github.com/SagerNet/gvisor/pkg/hostarch" 24 "github.com/SagerNet/gvisor/pkg/log" 25 "github.com/SagerNet/gvisor/pkg/sentry/memmap" 26 "github.com/SagerNet/gvisor/pkg/sync" 27 ) 28 29 // The virtual filesystem implements an overlay configuration. For a high-level 30 // description, see README.md. 31 // 32 // Note on whiteouts: 33 // 34 // This implementation does not use the "Docker-style" whiteouts (symlinks with 35 // ".wh." prefix). Instead upper filesystem directories support a set of extended 36 // attributes to encode whiteouts: "trusted.overlay.whiteout.<filename>". This 37 // gives flexibility to persist whiteouts independently of the filesystem layout 38 // while additionally preventing name conflicts with files prefixed with ".wh.". 39 // 40 // Known deficiencies: 41 // 42 // - The device number of two files under the same overlay mount point may be 43 // different. This can happen if a file is found in the lower filesystem (takes 44 // the lower filesystem device) and another file is created in the upper 45 // filesystem (takes the upper filesystem device). This may appear odd but 46 // should not break applications. 47 // 48 // - Registered events on files (i.e. for notification of read/write readiness) 49 // are not copied across copy up. This is fine in the common case of files that 50 // do not block. For files that do block, like pipes and sockets, copy up is not 51 // supported. 52 // 53 // - Hardlinks in a lower filesystem are broken by copy up. For this reason, no 54 // attempt is made to preserve link count across copy up. 55 // 56 // - The maximum length of an extended attribute name is the same as the maximum 57 // length of a file path in Linux (XATTR_NAME_MAX == NAME_MAX). This means that 58 // whiteout attributes, if set directly on the host, are limited additionally by 59 // the extra whiteout prefix length (file paths must be strictly shorter than 60 // NAME_MAX). This is not a problem for in-memory filesystems which don't enforce 61 // XATTR_NAME_MAX. 62 63 const ( 64 // XattrOverlayPrefix is the prefix for extended attributes that affect 65 // the behavior of an overlay. 66 XattrOverlayPrefix = "trusted.overlay." 67 68 // XattrOverlayWhiteoutPrefix is the prefix for extended attributes 69 // that indicate that a whiteout exists. 70 XattrOverlayWhiteoutPrefix = XattrOverlayPrefix + "whiteout." 71 ) 72 73 // XattrOverlayWhiteout returns an extended attribute that indicates a 74 // whiteout exists for name. It is supported by directories that wish to 75 // mask the existence of name. 76 func XattrOverlayWhiteout(name string) string { 77 return XattrOverlayWhiteoutPrefix + name 78 } 79 80 // isXattrOverlay returns whether the given extended attribute configures the 81 // overlay. 82 func isXattrOverlay(name string) bool { 83 return strings.HasPrefix(name, XattrOverlayPrefix) 84 } 85 86 // NewOverlayRoot produces the root of an overlay. 87 // 88 // Preconditions: 89 // * upper and lower must be non-nil. 90 // * upper must not be an overlay. 91 // * lower should not expose character devices, pipes, or sockets, because 92 // copying up these types of files is not supported. 93 // * lower must not require that file objects be revalidated. 94 // * lower must not have dynamic file/directory content. 95 func NewOverlayRoot(ctx context.Context, upper *Inode, lower *Inode, flags MountSourceFlags) (*Inode, error) { 96 if !IsDir(upper.StableAttr) { 97 return nil, fmt.Errorf("upper Inode is a %v, not a directory", upper.StableAttr.Type) 98 } 99 if !IsDir(lower.StableAttr) { 100 return nil, fmt.Errorf("lower Inode is a %v, not a directory", lower.StableAttr.Type) 101 } 102 if upper.overlay != nil { 103 return nil, fmt.Errorf("cannot nest overlay in upper file of another overlay") 104 } 105 106 msrc := newOverlayMountSource(ctx, upper.MountSource, lower.MountSource, flags) 107 overlay, err := newOverlayEntry(ctx, upper, lower, true) 108 if err != nil { 109 msrc.DecRef(ctx) 110 return nil, err 111 } 112 113 return newOverlayInode(ctx, overlay, msrc), nil 114 } 115 116 // NewOverlayRootFile produces the root of an overlay that points to a file. 117 // 118 // Preconditions: 119 // * lower must be non-nil. 120 // * lower should not expose character devices, pipes, or sockets, because 121 // copying up these types of files is not supported. Neither it can be a dir. 122 // * lower must not require that file objects be revalidated. 123 // * lower must not have dynamic file/directory content. 124 func NewOverlayRootFile(ctx context.Context, upperMS *MountSource, lower *Inode, flags MountSourceFlags) (*Inode, error) { 125 if !IsRegular(lower.StableAttr) { 126 return nil, fmt.Errorf("lower Inode is not a regular file") 127 } 128 msrc := newOverlayMountSource(ctx, upperMS, lower.MountSource, flags) 129 overlay, err := newOverlayEntry(ctx, nil, lower, true) 130 if err != nil { 131 msrc.DecRef(ctx) 132 return nil, err 133 } 134 return newOverlayInode(ctx, overlay, msrc), nil 135 } 136 137 // newOverlayInode creates a new Inode for an overlay. 138 func newOverlayInode(ctx context.Context, o *overlayEntry, msrc *MountSource) *Inode { 139 var inode *Inode 140 if o.upper != nil { 141 inode = NewInode(ctx, nil, msrc, o.upper.StableAttr) 142 } else { 143 inode = NewInode(ctx, nil, msrc, o.lower.StableAttr) 144 } 145 inode.overlay = o 146 return inode 147 } 148 149 // overlayEntry is the overlay metadata of an Inode. It implements Mappable. 150 // 151 // +stateify savable 152 type overlayEntry struct { 153 // lowerExists is true if an Inode exists for this file in the lower 154 // filesystem. If lowerExists is true, then the overlay must create 155 // a whiteout entry when renaming and removing this entry to mask the 156 // lower Inode. 157 // 158 // Note that this is distinct from actually holding onto a non-nil 159 // lower Inode (below). The overlay does not need to keep a lower Inode 160 // around unless it needs to operate on it, but it always needs to know 161 // whether the lower Inode exists to correctly execute a rename or 162 // remove operation. 163 lowerExists bool 164 165 // lower is an Inode from a lower filesystem. Modifications are 166 // never made on this Inode. 167 lower *Inode 168 169 // copyMu serializes copy-up for operations above 170 // mm.MemoryManager.mappingMu in the lock order. 171 copyMu sync.RWMutex `state:"nosave"` 172 173 // mapsMu serializes copy-up for operations between 174 // mm.MemoryManager.mappingMu and mm.MemoryManager.activeMu in the lock 175 // order. 176 mapsMu sync.Mutex `state:"nosave"` 177 178 // mappings tracks memory mappings of this Mappable so they can be removed 179 // from the lower filesystem Mappable and added to the upper filesystem 180 // Mappable when copy up occurs. It is strictly unnecessary after copy-up. 181 // 182 // mappings is protected by mapsMu. 183 mappings memmap.MappingSet 184 185 // dataMu serializes copy-up for operations below mm.MemoryManager.activeMu 186 // in the lock order. 187 dataMu sync.RWMutex `state:"nosave"` 188 189 // upper is an Inode from an upper filesystem. It is non-nil if 190 // the file exists in the upper filesystem. It becomes non-nil 191 // when the Inode that owns this overlayEntry is modified. 192 // 193 // upper is protected by all of copyMu, mapsMu, and dataMu. Holding any of 194 // these locks is sufficient to read upper; holding all three for writing 195 // is required to mutate it. 196 upper *Inode 197 198 // dirCacheMu protects dirCache. 199 dirCacheMu sync.RWMutex `state:"nosave"` 200 201 // dirCache is cache of DentAttrs from upper and lower Inodes. 202 dirCache *SortedDentryMap 203 } 204 205 // newOverlayEntry returns a new overlayEntry. 206 func newOverlayEntry(ctx context.Context, upper *Inode, lower *Inode, lowerExists bool) (*overlayEntry, error) { 207 if upper == nil && lower == nil { 208 panic("invalid overlayEntry, needs at least one Inode") 209 } 210 if upper != nil && upper.overlay != nil { 211 panic("nested writable layers are not supported") 212 } 213 // Check for supported lower filesystem types. 214 if lower != nil { 215 switch lower.StableAttr.Type { 216 case RegularFile, Directory, Symlink, Socket: 217 default: 218 // We don't support copying up from character devices, 219 // named pipes, or anything weird (like proc files). 220 log.Warningf("%s not supported in lower filesytem", lower.StableAttr.Type) 221 return nil, linuxerr.EINVAL 222 } 223 } 224 return &overlayEntry{ 225 lowerExists: lowerExists, 226 lower: lower, 227 upper: upper, 228 }, nil 229 } 230 231 func (o *overlayEntry) release(ctx context.Context) { 232 // We drop a reference on upper and lower file system Inodes 233 // rather than releasing them, because in-memory filesystems 234 // may hold an extra reference to these Inodes so that they 235 // stay in memory. 236 if o.upper != nil { 237 o.upper.DecRef(ctx) 238 } 239 if o.lower != nil { 240 o.lower.DecRef(ctx) 241 } 242 } 243 244 // overlayUpperMountSource gives the upper mount of an overlay mount. 245 // 246 // The caller may not use this MountSource past the lifetime of overlayMountSource and may 247 // not call DecRef on it. 248 func overlayUpperMountSource(overlayMountSource *MountSource) *MountSource { 249 return overlayMountSource.MountSourceOperations.(*overlayMountSourceOperations).upper 250 } 251 252 // Preconditions: At least one of o.copyMu, o.mapsMu, or o.dataMu must be locked. 253 func (o *overlayEntry) inodeLocked() *Inode { 254 if o.upper != nil { 255 return o.upper 256 } 257 return o.lower 258 } 259 260 // Preconditions: At least one of o.copyMu, o.mapsMu, or o.dataMu must be locked. 261 func (o *overlayEntry) isMappableLocked() bool { 262 return o.inodeLocked().Mappable() != nil 263 } 264 265 // markDirectoryDirty marks any cached data dirty for this directory. This is 266 // necessary in order to ensure that this node does not retain stale state 267 // throughout its lifetime across multiple open directory handles. 268 // 269 // Currently this means invalidating any readdir caches. 270 func (o *overlayEntry) markDirectoryDirty() { 271 o.dirCacheMu.Lock() 272 o.dirCache = nil 273 o.dirCacheMu.Unlock() 274 } 275 276 // AddMapping implements memmap.Mappable.AddMapping. 277 func (o *overlayEntry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error { 278 o.mapsMu.Lock() 279 defer o.mapsMu.Unlock() 280 if err := o.inodeLocked().Mappable().AddMapping(ctx, ms, ar, offset, writable); err != nil { 281 return err 282 } 283 o.mappings.AddMapping(ms, ar, offset, writable) 284 return nil 285 } 286 287 // RemoveMapping implements memmap.Mappable.RemoveMapping. 288 func (o *overlayEntry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) { 289 o.mapsMu.Lock() 290 defer o.mapsMu.Unlock() 291 o.inodeLocked().Mappable().RemoveMapping(ctx, ms, ar, offset, writable) 292 o.mappings.RemoveMapping(ms, ar, offset, writable) 293 } 294 295 // CopyMapping implements memmap.Mappable.CopyMapping. 296 func (o *overlayEntry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error { 297 o.mapsMu.Lock() 298 defer o.mapsMu.Unlock() 299 if err := o.inodeLocked().Mappable().CopyMapping(ctx, ms, srcAR, dstAR, offset, writable); err != nil { 300 return err 301 } 302 o.mappings.AddMapping(ms, dstAR, offset, writable) 303 return nil 304 } 305 306 // Translate implements memmap.Mappable.Translate. 307 func (o *overlayEntry) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { 308 o.dataMu.RLock() 309 defer o.dataMu.RUnlock() 310 return o.inodeLocked().Mappable().Translate(ctx, required, optional, at) 311 } 312 313 // InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable. 314 func (o *overlayEntry) InvalidateUnsavable(ctx context.Context) error { 315 o.mapsMu.Lock() 316 defer o.mapsMu.Unlock() 317 return o.inodeLocked().Mappable().InvalidateUnsavable(ctx) 318 }