gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/fsimpl/eventfd/eventfd.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package eventfd implements event fds. 16 package eventfd 17 18 import ( 19 "fmt" 20 "math" 21 "sync" 22 23 "golang.org/x/sys/unix" 24 "gvisor.dev/gvisor/pkg/abi/linux" 25 "gvisor.dev/gvisor/pkg/context" 26 "gvisor.dev/gvisor/pkg/errors/linuxerr" 27 "gvisor.dev/gvisor/pkg/fdnotifier" 28 "gvisor.dev/gvisor/pkg/hostarch" 29 "gvisor.dev/gvisor/pkg/log" 30 "gvisor.dev/gvisor/pkg/sentry/vfs" 31 "gvisor.dev/gvisor/pkg/usermem" 32 "gvisor.dev/gvisor/pkg/waiter" 33 ) 34 35 // EventFileDescription implements vfs.FileDescriptionImpl for file-based event 36 // notification (eventfd). Eventfds are usually internal to the Sentry but in 37 // certain situations they may be converted into a host-backed eventfd. 38 // 39 // +stateify savable 40 type EventFileDescription struct { 41 vfsfd vfs.FileDescription 42 vfs.FileDescriptionDefaultImpl 43 vfs.DentryMetadataFileDescriptionImpl 44 vfs.NoLockFD 45 46 // queue is used to notify interested parties when the event object 47 // becomes readable or writable. 48 queue waiter.Queue 49 50 // mu protects the fields below. 51 mu sync.Mutex `state:"nosave"` 52 53 // val is the current value of the event counter. 54 val uint64 55 56 // semMode specifies whether the event is in "semaphore" mode. 57 semMode bool 58 59 // hostfd indicates whether this eventfd is passed through to the host. 60 hostfd int 61 } 62 63 var _ vfs.FileDescriptionImpl = (*EventFileDescription)(nil) 64 65 // New creates a new event fd. 66 func New(ctx context.Context, vfsObj *vfs.VirtualFilesystem, initVal uint64, semMode bool, flags uint32) (*vfs.FileDescription, error) { 67 vd := vfsObj.NewAnonVirtualDentry("[eventfd]") 68 defer vd.DecRef(ctx) 69 efd := &EventFileDescription{ 70 val: initVal, 71 semMode: semMode, 72 hostfd: -1, 73 } 74 if err := efd.vfsfd.Init(efd, flags, vd.Mount(), vd.Dentry(), &vfs.FileDescriptionOptions{ 75 UseDentryMetadata: true, 76 DenyPRead: true, 77 DenyPWrite: true, 78 DenySpliceIn: true, 79 }); err != nil { 80 return nil, err 81 } 82 return &efd.vfsfd, nil 83 } 84 85 // HostFD returns the host eventfd associated with this event. 86 func (efd *EventFileDescription) HostFD() (int, error) { 87 efd.mu.Lock() 88 defer efd.mu.Unlock() 89 if efd.hostfd >= 0 { 90 return efd.hostfd, nil 91 } 92 93 flags := linux.EFD_NONBLOCK 94 if efd.semMode { 95 flags |= linux.EFD_SEMAPHORE 96 } 97 98 fd, _, errno := unix.Syscall(unix.SYS_EVENTFD2, uintptr(efd.val), uintptr(flags), 0) 99 if errno != 0 { 100 return -1, errno 101 } 102 103 if err := fdnotifier.AddFD(int32(fd), &efd.queue); err != nil { 104 if closeErr := unix.Close(int(fd)); closeErr != nil { 105 log.Warningf("close(%d) eventfd failed: %v", fd, closeErr) 106 } 107 return -1, err 108 } 109 110 efd.hostfd = int(fd) 111 return efd.hostfd, nil 112 } 113 114 // Release implements vfs.FileDescriptionImpl.Release. 115 func (efd *EventFileDescription) Release(context.Context) { 116 efd.mu.Lock() 117 defer efd.mu.Unlock() 118 if efd.hostfd >= 0 { 119 fdnotifier.RemoveFD(int32(efd.hostfd)) 120 if closeErr := unix.Close(int(efd.hostfd)); closeErr != nil { 121 log.Warningf("close(%d) eventfd failed: %v", efd.hostfd, closeErr) 122 } 123 efd.hostfd = -1 124 } 125 } 126 127 // Read implements vfs.FileDescriptionImpl.Read. 128 func (efd *EventFileDescription) Read(ctx context.Context, dst usermem.IOSequence, _ vfs.ReadOptions) (int64, error) { 129 if dst.NumBytes() < 8 { 130 return 0, unix.EINVAL 131 } 132 if err := efd.read(ctx, dst); err != nil { 133 return 0, err 134 } 135 return 8, nil 136 } 137 138 // Write implements vfs.FileDescriptionImpl.Write. 139 func (efd *EventFileDescription) Write(ctx context.Context, src usermem.IOSequence, _ vfs.WriteOptions) (int64, error) { 140 if src.NumBytes() < 8 { 141 return 0, unix.EINVAL 142 } 143 if err := efd.write(ctx, src); err != nil { 144 return 0, err 145 } 146 return 8, nil 147 } 148 149 // Preconditions: Must be called with efd.mu locked. 150 func (efd *EventFileDescription) hostReadLocked(ctx context.Context, dst usermem.IOSequence) error { 151 var buf [8]byte 152 if _, err := unix.Read(efd.hostfd, buf[:]); err != nil { 153 if err == unix.EWOULDBLOCK { 154 return linuxerr.ErrWouldBlock 155 } 156 return err 157 } 158 _, err := dst.CopyOut(ctx, buf[:]) 159 return err 160 } 161 162 func (efd *EventFileDescription) read(ctx context.Context, dst usermem.IOSequence) error { 163 efd.mu.Lock() 164 if efd.hostfd >= 0 { 165 defer efd.mu.Unlock() 166 return efd.hostReadLocked(ctx, dst) 167 } 168 169 // We can't complete the read if the value is currently zero. 170 if efd.val == 0 { 171 efd.mu.Unlock() 172 return linuxerr.ErrWouldBlock 173 } 174 175 // Update the value based on the mode the event is operating in. 176 var val uint64 177 if efd.semMode { 178 val = 1 179 // Consistent with Linux, this is done even if writing to memory fails. 180 efd.val-- 181 } else { 182 val = efd.val 183 efd.val = 0 184 } 185 186 efd.mu.Unlock() 187 188 // Notify writers. We do this even if we were already writable because 189 // it is possible that a writer is waiting to write the maximum value 190 // to the event. 191 efd.queue.Notify(waiter.WritableEvents) 192 193 var buf [8]byte 194 hostarch.ByteOrder.PutUint64(buf[:], val) 195 _, err := dst.CopyOut(ctx, buf[:]) 196 return err 197 } 198 199 // Preconditions: Must be called with efd.mu locked. 200 func (efd *EventFileDescription) hostWriteLocked(val uint64) error { 201 var buf [8]byte 202 hostarch.ByteOrder.PutUint64(buf[:], val) 203 _, err := unix.Write(efd.hostfd, buf[:]) 204 if err == unix.EWOULDBLOCK { 205 return linuxerr.ErrWouldBlock 206 } 207 return err 208 } 209 210 func (efd *EventFileDescription) write(ctx context.Context, src usermem.IOSequence) error { 211 var buf [8]byte 212 if _, err := src.CopyIn(ctx, buf[:]); err != nil { 213 return err 214 } 215 val := hostarch.ByteOrder.Uint64(buf[:]) 216 217 return efd.Signal(val) 218 } 219 220 // Signal is an internal function to signal the event fd. 221 func (efd *EventFileDescription) Signal(val uint64) error { 222 if val == math.MaxUint64 { 223 return unix.EINVAL 224 } 225 226 efd.mu.Lock() 227 228 if efd.hostfd >= 0 { 229 defer efd.mu.Unlock() 230 return efd.hostWriteLocked(val) 231 } 232 233 // We only allow writes that won't cause the value to go over the max 234 // uint64 minus 1. 235 if val > math.MaxUint64-1-efd.val { 236 efd.mu.Unlock() 237 return linuxerr.ErrWouldBlock 238 } 239 240 efd.val += val 241 efd.mu.Unlock() 242 243 // Always trigger a notification. 244 efd.queue.Notify(waiter.ReadableEvents) 245 246 return nil 247 } 248 249 // Readiness implements waiter.Waitable.Readiness. 250 func (efd *EventFileDescription) Readiness(mask waiter.EventMask) waiter.EventMask { 251 efd.mu.Lock() 252 defer efd.mu.Unlock() 253 254 if efd.hostfd >= 0 { 255 return fdnotifier.NonBlockingPoll(int32(efd.hostfd), mask) 256 } 257 258 ready := waiter.EventMask(0) 259 if efd.val > 0 { 260 ready |= waiter.ReadableEvents 261 } 262 263 if efd.val < math.MaxUint64-1 { 264 ready |= waiter.WritableEvents 265 } 266 267 return mask & ready 268 } 269 270 // EventRegister implements waiter.Waitable.EventRegister. 271 func (efd *EventFileDescription) EventRegister(entry *waiter.Entry) error { 272 efd.queue.EventRegister(entry) 273 274 efd.mu.Lock() 275 defer efd.mu.Unlock() 276 if efd.hostfd >= 0 { 277 if err := fdnotifier.UpdateFD(int32(efd.hostfd)); err != nil { 278 efd.queue.EventUnregister(entry) 279 return err 280 } 281 } 282 return nil 283 } 284 285 // EventUnregister implements waiter.Waitable.EventUnregister. 286 func (efd *EventFileDescription) EventUnregister(entry *waiter.Entry) { 287 efd.queue.EventUnregister(entry) 288 289 efd.mu.Lock() 290 defer efd.mu.Unlock() 291 if efd.hostfd >= 0 { 292 if err := fdnotifier.UpdateFD(int32(efd.hostfd)); err != nil { 293 panic(fmt.Sprint("UpdateFD:", err)) 294 } 295 } 296 } 297 298 // Epollable implements FileDescriptionImpl.Epollable. 299 func (efd *EventFileDescription) Epollable() bool { 300 return true 301 }