github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fs/inotify.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package fs 16 17 import ( 18 "io" 19 "sync/atomic" 20 21 "github.com/SagerNet/gvisor/pkg/abi/linux" 22 "github.com/SagerNet/gvisor/pkg/context" 23 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 24 "github.com/SagerNet/gvisor/pkg/hostarch" 25 "github.com/SagerNet/gvisor/pkg/sentry/arch" 26 "github.com/SagerNet/gvisor/pkg/sentry/memmap" 27 "github.com/SagerNet/gvisor/pkg/sentry/uniqueid" 28 "github.com/SagerNet/gvisor/pkg/sync" 29 "github.com/SagerNet/gvisor/pkg/syserror" 30 "github.com/SagerNet/gvisor/pkg/usermem" 31 "github.com/SagerNet/gvisor/pkg/waiter" 32 ) 33 34 // Inotify represents an inotify instance created by inotify_init(2) or 35 // inotify_init1(2). Inotify implements the FileOperations interface. 36 // 37 // Lock ordering: 38 // Inotify.mu -> Inode.Watches.mu -> Watch.mu -> Inotify.evMu 39 // 40 // +stateify savable 41 type Inotify struct { 42 // Unique identifier for this inotify instance. We don't just reuse the 43 // inotify fd because fds can be duped. These should not be exposed to the 44 // user, since we may aggressively reuse an id on S/R. 45 id uint64 46 47 waiter.Queue `state:"nosave"` 48 49 // evMu *only* protects the events list. We need a separate lock because 50 // while queuing events, a watch needs to lock the event queue, and using mu 51 // for that would violate lock ordering since at that point the calling 52 // goroutine already holds Watch.target.Watches.mu. 53 evMu sync.Mutex `state:"nosave"` 54 55 // A list of pending events for this inotify instance. Protected by evMu. 56 events eventList 57 58 // A scratch buffer, use to serialize inotify events. Use allocate this 59 // ahead of time and reuse performance. Protected by evMu. 60 scratch []byte 61 62 // mu protects the fields below. 63 mu sync.Mutex `state:"nosave"` 64 65 // The next watch descriptor number to use for this inotify instance. Note 66 // that Linux starts numbering watch descriptors from 1. 67 nextWatch int32 68 69 // Map from watch descriptors to watch objects. 70 watches map[int32]*Watch 71 } 72 73 // NewInotify constructs a new Inotify instance. 74 func NewInotify(ctx context.Context) *Inotify { 75 return &Inotify{ 76 id: uniqueid.GlobalFromContext(ctx), 77 scratch: make([]byte, inotifyEventBaseSize), 78 nextWatch: 1, // Linux starts numbering watch descriptors from 1. 79 watches: make(map[int32]*Watch), 80 } 81 } 82 83 // Release implements FileOperations.Release. Release removes all watches and 84 // frees all resources for an inotify instance. 85 func (i *Inotify) Release(ctx context.Context) { 86 // We need to hold i.mu to avoid a race with concurrent calls to 87 // Inotify.targetDestroyed from Watches. There's no risk of Watches 88 // accessing this Inotify after the destructor ends, because we remove all 89 // references to it below. 90 i.mu.Lock() 91 defer i.mu.Unlock() 92 for _, w := range i.watches { 93 // Remove references to the watch from the watch target. We don't need 94 // to worry about the references from the owner instance, since we're in 95 // the owner's destructor. 96 w.target.Watches.Remove(w.ID()) 97 // Don't leak any references to the target, held by pins in the watch. 98 w.destroy(ctx) 99 } 100 } 101 102 // Readiness implements waiter.Waitable.Readiness. 103 // 104 // Readiness indicates whether there are pending events for an inotify instance. 105 func (i *Inotify) Readiness(mask waiter.EventMask) waiter.EventMask { 106 ready := waiter.EventMask(0) 107 108 i.evMu.Lock() 109 defer i.evMu.Unlock() 110 111 if !i.events.Empty() { 112 ready |= waiter.ReadableEvents 113 } 114 115 return mask & ready 116 } 117 118 // Seek implements FileOperations.Seek. 119 func (*Inotify) Seek(context.Context, *File, SeekWhence, int64) (int64, error) { 120 return 0, linuxerr.ESPIPE 121 } 122 123 // Readdir implements FileOperatons.Readdir. 124 func (*Inotify) Readdir(context.Context, *File, DentrySerializer) (int64, error) { 125 return 0, syserror.ENOTDIR 126 } 127 128 // Write implements FileOperations.Write. 129 func (*Inotify) Write(context.Context, *File, usermem.IOSequence, int64) (int64, error) { 130 return 0, linuxerr.EBADF 131 } 132 133 // Read implements FileOperations.Read. 134 func (i *Inotify) Read(ctx context.Context, _ *File, dst usermem.IOSequence, _ int64) (int64, error) { 135 if dst.NumBytes() < inotifyEventBaseSize { 136 return 0, linuxerr.EINVAL 137 } 138 139 i.evMu.Lock() 140 defer i.evMu.Unlock() 141 142 if i.events.Empty() { 143 // Nothing to read yet, tell caller to block. 144 return 0, syserror.ErrWouldBlock 145 } 146 147 var writeLen int64 148 for it := i.events.Front(); it != nil; { 149 event := it 150 it = it.Next() 151 152 // Does the buffer have enough remaining space to hold the event we're 153 // about to write out? 154 if dst.NumBytes() < int64(event.sizeOf()) { 155 if writeLen > 0 { 156 // Buffer wasn't big enough for all pending events, but we did 157 // write some events out. 158 return writeLen, nil 159 } 160 return 0, linuxerr.EINVAL 161 } 162 163 // Linux always dequeues an available event as long as there's enough 164 // buffer space to copy it out, even if the copy below fails. Emulate 165 // this behaviour. 166 i.events.Remove(event) 167 168 // Buffer has enough space, copy event to the read buffer. 169 n, err := event.CopyTo(ctx, i.scratch, dst) 170 if err != nil { 171 return 0, err 172 } 173 174 writeLen += n 175 dst = dst.DropFirst64(n) 176 } 177 return writeLen, nil 178 } 179 180 // WriteTo implements FileOperations.WriteTo. 181 func (*Inotify) WriteTo(context.Context, *File, io.Writer, int64, bool) (int64, error) { 182 return 0, syserror.ENOSYS 183 } 184 185 // Fsync implements FileOperations.Fsync. 186 func (*Inotify) Fsync(context.Context, *File, int64, int64, SyncType) error { 187 return linuxerr.EINVAL 188 } 189 190 // ReadFrom implements FileOperations.ReadFrom. 191 func (*Inotify) ReadFrom(context.Context, *File, io.Reader, int64) (int64, error) { 192 return 0, syserror.ENOSYS 193 } 194 195 // Flush implements FileOperations.Flush. 196 func (*Inotify) Flush(context.Context, *File) error { 197 return nil 198 } 199 200 // ConfigureMMap implements FileOperations.ConfigureMMap. 201 func (*Inotify) ConfigureMMap(context.Context, *File, *memmap.MMapOpts) error { 202 return linuxerr.ENODEV 203 } 204 205 // UnstableAttr implements FileOperations.UnstableAttr. 206 func (i *Inotify) UnstableAttr(ctx context.Context, file *File) (UnstableAttr, error) { 207 return file.Dirent.Inode.UnstableAttr(ctx) 208 } 209 210 // Ioctl implements fs.FileOperations.Ioctl. 211 func (i *Inotify) Ioctl(ctx context.Context, _ *File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { 212 switch args[1].Int() { 213 case linux.FIONREAD: 214 i.evMu.Lock() 215 defer i.evMu.Unlock() 216 var n uint32 217 for e := i.events.Front(); e != nil; e = e.Next() { 218 n += uint32(e.sizeOf()) 219 } 220 var buf [4]byte 221 hostarch.ByteOrder.PutUint32(buf[:], n) 222 _, err := io.CopyOut(ctx, args[2].Pointer(), buf[:], usermem.IOOpts{}) 223 return 0, err 224 225 default: 226 return 0, syserror.ENOTTY 227 } 228 } 229 230 func (i *Inotify) queueEvent(ev *Event) { 231 i.evMu.Lock() 232 233 // Check if we should coalesce the event we're about to queue with the last 234 // one currently in the queue. Events are coalesced if they are identical. 235 if last := i.events.Back(); last != nil { 236 if ev.equals(last) { 237 // "Coalesce" the two events by simply not queuing the new one. We 238 // don't need to raise a waiter.EventIn notification because no new 239 // data is available for reading. 240 i.evMu.Unlock() 241 return 242 } 243 } 244 245 i.events.PushBack(ev) 246 247 // Release mutex before notifying waiters because we don't control what they 248 // can do. 249 i.evMu.Unlock() 250 251 i.Queue.Notify(waiter.ReadableEvents) 252 } 253 254 // newWatchLocked creates and adds a new watch to target. 255 func (i *Inotify) newWatchLocked(target *Dirent, mask uint32) *Watch { 256 wd := i.nextWatch 257 i.nextWatch++ 258 259 watch := &Watch{ 260 owner: i, 261 wd: wd, 262 mask: mask, 263 target: target.Inode, 264 pins: make(map[*Dirent]bool), 265 } 266 267 i.watches[wd] = watch 268 269 // Grab an extra reference to target to prevent it from being evicted from 270 // memory. This ref is dropped during either watch removal, target 271 // destruction, or inotify instance destruction. See callers of Watch.Unpin. 272 watch.Pin(target) 273 target.Inode.Watches.Add(watch) 274 275 return watch 276 } 277 278 // targetDestroyed is called by w to notify i that w's target is gone. This 279 // automatically generates a watch removal event. 280 func (i *Inotify) targetDestroyed(w *Watch) { 281 i.mu.Lock() 282 _, found := i.watches[w.wd] 283 delete(i.watches, w.wd) 284 i.mu.Unlock() 285 286 if found { 287 i.queueEvent(newEvent(w.wd, "", linux.IN_IGNORED, 0)) 288 } 289 } 290 291 // AddWatch constructs a new inotify watch and adds it to the target dirent. It 292 // returns the watch descriptor returned by inotify_add_watch(2). 293 func (i *Inotify) AddWatch(target *Dirent, mask uint32) int32 { 294 // Note: Locking this inotify instance protects the result returned by 295 // Lookup() below. With the lock held, we know for sure the lookup result 296 // won't become stale because it's impossible for *this* instance to 297 // add/remove watches on target. 298 i.mu.Lock() 299 defer i.mu.Unlock() 300 301 // Does the target already have a watch from this inotify instance? 302 if existing := target.Inode.Watches.Lookup(i.id); existing != nil { 303 // This may be a watch on a different dirent pointing to the 304 // same inode. Obtain an extra reference if necessary. 305 existing.Pin(target) 306 307 newmask := mask 308 if mergeMask := mask&linux.IN_MASK_ADD != 0; mergeMask { 309 // "Add (OR) events to watch mask for this pathname if it already 310 // exists (instead of replacing mask)." -- inotify(7) 311 newmask |= atomic.LoadUint32(&existing.mask) 312 } 313 atomic.StoreUint32(&existing.mask, newmask) 314 return existing.wd 315 } 316 317 // No existing watch, create a new watch. 318 watch := i.newWatchLocked(target, mask) 319 return watch.wd 320 } 321 322 // RmWatch implements watcher.Watchable.RmWatch. 323 // 324 // RmWatch looks up an inotify watch for the given 'wd' and configures the 325 // target dirent to stop sending events to this inotify instance. 326 func (i *Inotify) RmWatch(ctx context.Context, wd int32) error { 327 i.mu.Lock() 328 329 // Find the watch we were asked to removed. 330 watch, ok := i.watches[wd] 331 if !ok { 332 i.mu.Unlock() 333 return linuxerr.EINVAL 334 } 335 336 // Remove the watch from this instance. 337 delete(i.watches, wd) 338 339 // Remove the watch from the watch target. 340 watch.target.Watches.Remove(watch.ID()) 341 342 // The watch is now isolated and we can safely drop the instance lock. We 343 // need to do so because watch.destroy() acquires Watch.mu, which cannot be 344 // acquired with Inotify.mu held. 345 i.mu.Unlock() 346 347 // Generate the event for the removal. 348 i.queueEvent(newEvent(watch.wd, "", linux.IN_IGNORED, 0)) 349 350 // Remove all pins. 351 watch.destroy(ctx) 352 353 return nil 354 }