github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/kernel/eventfd/eventfd.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package eventfd provides an implementation of Linux's file-based event 16 // notification. 17 package eventfd 18 19 import ( 20 "math" 21 22 "golang.org/x/sys/unix" 23 "github.com/SagerNet/gvisor/pkg/abi/linux" 24 "github.com/SagerNet/gvisor/pkg/context" 25 "github.com/SagerNet/gvisor/pkg/fdnotifier" 26 "github.com/SagerNet/gvisor/pkg/hostarch" 27 "github.com/SagerNet/gvisor/pkg/sentry/fs" 28 "github.com/SagerNet/gvisor/pkg/sentry/fs/anon" 29 "github.com/SagerNet/gvisor/pkg/sentry/fs/fsutil" 30 "github.com/SagerNet/gvisor/pkg/sync" 31 "github.com/SagerNet/gvisor/pkg/syserror" 32 "github.com/SagerNet/gvisor/pkg/usermem" 33 "github.com/SagerNet/gvisor/pkg/waiter" 34 ) 35 36 // EventOperations represents an event with the semantics of Linux's file-based event 37 // notification (eventfd). Eventfds are usually internal to the Sentry but in certain 38 // situations they may be converted into a host-backed eventfd. 39 // 40 // +stateify savable 41 type EventOperations struct { 42 fsutil.FileNoopRelease `state:"nosave"` 43 fsutil.FilePipeSeek `state:"nosave"` 44 fsutil.FileNotDirReaddir `state:"nosave"` 45 fsutil.FileNoFsync `state:"nosave"` 46 fsutil.FileNoIoctl `state:"nosave"` 47 fsutil.FileNoMMap `state:"nosave"` 48 fsutil.FileNoSplice `state:"nosave"` 49 fsutil.FileNoopFlush `state:"nosave"` 50 fsutil.FileUseInodeUnstableAttr `state:"nosave"` 51 52 // Mutex that protects accesses to the fields of this event. 53 mu sync.Mutex `state:"nosave"` 54 55 // Queue is used to notify interested parties when the event object 56 // becomes readable or writable. 57 wq waiter.Queue `state:"zerovalue"` 58 59 // val is the current value of the event counter. 60 val uint64 61 62 // semMode specifies whether the event is in "semaphore" mode. 63 semMode bool 64 65 // hostfd indicates whether this eventfd is passed through to the host. 66 hostfd int 67 } 68 69 // New creates a new event object with the supplied initial value and mode. 70 func New(ctx context.Context, initVal uint64, semMode bool) *fs.File { 71 // name matches fs/eventfd.c:eventfd_file_create. 72 dirent := fs.NewDirent(ctx, anon.NewInode(ctx), "anon_inode:[eventfd]") 73 // Release the initial dirent reference after NewFile takes a reference. 74 defer dirent.DecRef(ctx) 75 return fs.NewFile(ctx, dirent, fs.FileFlags{Read: true, Write: true}, &EventOperations{ 76 val: initVal, 77 semMode: semMode, 78 hostfd: -1, 79 }) 80 } 81 82 // HostFD returns the host eventfd associated with this event. 83 func (e *EventOperations) HostFD() (int, error) { 84 e.mu.Lock() 85 defer e.mu.Unlock() 86 if e.hostfd >= 0 { 87 return e.hostfd, nil 88 } 89 90 flags := linux.EFD_NONBLOCK 91 if e.semMode { 92 flags |= linux.EFD_SEMAPHORE 93 } 94 95 fd, _, err := unix.Syscall(unix.SYS_EVENTFD2, uintptr(e.val), uintptr(flags), 0) 96 if err != 0 { 97 return -1, err 98 } 99 100 if err := fdnotifier.AddFD(int32(fd), &e.wq); err != nil { 101 unix.Close(int(fd)) 102 return -1, err 103 } 104 105 e.hostfd = int(fd) 106 return e.hostfd, nil 107 } 108 109 // Release implements fs.FileOperations.Release. 110 func (e *EventOperations) Release(context.Context) { 111 e.mu.Lock() 112 defer e.mu.Unlock() 113 if e.hostfd >= 0 { 114 fdnotifier.RemoveFD(int32(e.hostfd)) 115 unix.Close(e.hostfd) 116 e.hostfd = -1 117 } 118 } 119 120 // Read implements fs.FileOperations.Read. 121 func (e *EventOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, _ int64) (int64, error) { 122 if dst.NumBytes() < 8 { 123 return 0, unix.EINVAL 124 } 125 if err := e.read(ctx, dst); err != nil { 126 return 0, err 127 } 128 return 8, nil 129 } 130 131 // Write implements fs.FileOperations.Write. 132 func (e *EventOperations) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, _ int64) (int64, error) { 133 if src.NumBytes() < 8 { 134 return 0, unix.EINVAL 135 } 136 if err := e.write(ctx, src); err != nil { 137 return 0, err 138 } 139 return 8, nil 140 } 141 142 // Must be called with e.mu locked. 143 func (e *EventOperations) hostRead(ctx context.Context, dst usermem.IOSequence) error { 144 var buf [8]byte 145 146 if _, err := unix.Read(e.hostfd, buf[:]); err != nil { 147 if err == unix.EWOULDBLOCK { 148 return syserror.ErrWouldBlock 149 } 150 return err 151 } 152 153 _, err := dst.CopyOut(ctx, buf[:]) 154 return err 155 } 156 157 func (e *EventOperations) read(ctx context.Context, dst usermem.IOSequence) error { 158 e.mu.Lock() 159 160 if e.hostfd >= 0 { 161 defer e.mu.Unlock() 162 return e.hostRead(ctx, dst) 163 } 164 165 // We can't complete the read if the value is currently zero. 166 if e.val == 0 { 167 e.mu.Unlock() 168 return syserror.ErrWouldBlock 169 } 170 171 // Update the value based on the mode the event is operating in. 172 var val uint64 173 if e.semMode { 174 val = 1 175 // Consistent with Linux, this is done even if writing to memory fails. 176 e.val-- 177 } else { 178 val = e.val 179 e.val = 0 180 } 181 182 e.mu.Unlock() 183 184 // Notify writers. We do this even if we were already writable because 185 // it is possible that a writer is waiting to write the maximum value 186 // to the event. 187 e.wq.Notify(waiter.WritableEvents) 188 189 var buf [8]byte 190 hostarch.ByteOrder.PutUint64(buf[:], val) 191 _, err := dst.CopyOut(ctx, buf[:]) 192 return err 193 } 194 195 // Must be called with e.mu locked. 196 func (e *EventOperations) hostWrite(val uint64) error { 197 var buf [8]byte 198 hostarch.ByteOrder.PutUint64(buf[:], val) 199 _, err := unix.Write(e.hostfd, buf[:]) 200 if err == unix.EWOULDBLOCK { 201 return syserror.ErrWouldBlock 202 } 203 return err 204 } 205 206 func (e *EventOperations) write(ctx context.Context, src usermem.IOSequence) error { 207 var buf [8]byte 208 if _, err := src.CopyIn(ctx, buf[:]); err != nil { 209 return err 210 } 211 val := hostarch.ByteOrder.Uint64(buf[:]) 212 213 return e.Signal(val) 214 } 215 216 // Signal is an internal function to signal the event fd. 217 func (e *EventOperations) Signal(val uint64) error { 218 if val == math.MaxUint64 { 219 return unix.EINVAL 220 } 221 222 e.mu.Lock() 223 224 if e.hostfd >= 0 { 225 defer e.mu.Unlock() 226 return e.hostWrite(val) 227 } 228 229 // We only allow writes that won't cause the value to go over the max 230 // uint64 minus 1. 231 if val > math.MaxUint64-1-e.val { 232 e.mu.Unlock() 233 return syserror.ErrWouldBlock 234 } 235 236 e.val += val 237 e.mu.Unlock() 238 239 // Always trigger a notification. 240 e.wq.Notify(waiter.ReadableEvents) 241 242 return nil 243 } 244 245 // Readiness returns the ready events for the event fd. 246 func (e *EventOperations) Readiness(mask waiter.EventMask) waiter.EventMask { 247 e.mu.Lock() 248 if e.hostfd >= 0 { 249 defer e.mu.Unlock() 250 return fdnotifier.NonBlockingPoll(int32(e.hostfd), mask) 251 } 252 253 ready := waiter.EventMask(0) 254 if e.val > 0 { 255 ready |= waiter.ReadableEvents 256 } 257 258 if e.val < math.MaxUint64-1 { 259 ready |= waiter.WritableEvents 260 } 261 e.mu.Unlock() 262 263 return mask & ready 264 } 265 266 // EventRegister implements waiter.Waitable.EventRegister. 267 func (e *EventOperations) EventRegister(entry *waiter.Entry, mask waiter.EventMask) { 268 e.wq.EventRegister(entry, mask) 269 270 e.mu.Lock() 271 defer e.mu.Unlock() 272 if e.hostfd >= 0 { 273 fdnotifier.UpdateFD(int32(e.hostfd)) 274 } 275 } 276 277 // EventUnregister implements waiter.Waitable.EventUnregister. 278 func (e *EventOperations) EventUnregister(entry *waiter.Entry) { 279 e.wq.EventUnregister(entry) 280 281 e.mu.Lock() 282 defer e.mu.Unlock() 283 if e.hostfd >= 0 { 284 fdnotifier.UpdateFD(int32(e.hostfd)) 285 } 286 }