gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/syscalls/linux/sys_epoll.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package linux 16 17 import ( 18 "math" 19 "time" 20 21 "gvisor.dev/gvisor/pkg/abi/linux" 22 "gvisor.dev/gvisor/pkg/errors/linuxerr" 23 "gvisor.dev/gvisor/pkg/hostarch" 24 "gvisor.dev/gvisor/pkg/sentry/arch" 25 "gvisor.dev/gvisor/pkg/sentry/kernel" 26 ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" 27 "gvisor.dev/gvisor/pkg/sentry/vfs" 28 "gvisor.dev/gvisor/pkg/waiter" 29 ) 30 31 var sizeofEpollEvent = (*linux.EpollEvent)(nil).SizeBytes() 32 33 // EpollCreate1 implements Linux syscall epoll_create1(2). 34 func EpollCreate1(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 35 flags := args[0].Int() 36 if flags&^linux.EPOLL_CLOEXEC != 0 { 37 return 0, nil, linuxerr.EINVAL 38 } 39 40 file, err := t.Kernel().VFS().NewEpollInstanceFD(t) 41 if err != nil { 42 return 0, nil, err 43 } 44 defer file.DecRef(t) 45 46 fd, err := t.NewFDFrom(0, file, kernel.FDFlags{ 47 CloseOnExec: flags&linux.EPOLL_CLOEXEC != 0, 48 }) 49 if err != nil { 50 return 0, nil, err 51 } 52 return uintptr(fd), nil, nil 53 } 54 55 // EpollCreate implements Linux syscall epoll_create(2). 56 func EpollCreate(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 57 size := args[0].Int() 58 59 // "Since Linux 2.6.8, the size argument is ignored, but must be greater 60 // than zero" - epoll_create(2) 61 if size <= 0 { 62 return 0, nil, linuxerr.EINVAL 63 } 64 65 file, err := t.Kernel().VFS().NewEpollInstanceFD(t) 66 if err != nil { 67 return 0, nil, err 68 } 69 defer file.DecRef(t) 70 71 fd, err := t.NewFDFrom(0, file, kernel.FDFlags{}) 72 if err != nil { 73 return 0, nil, err 74 } 75 return uintptr(fd), nil, nil 76 } 77 78 // EpollCtl implements Linux syscall epoll_ctl(2). 79 func EpollCtl(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 80 epfd := args[0].Int() 81 op := args[1].Int() 82 fd := args[2].Int() 83 eventAddr := args[3].Pointer() 84 85 epfile := t.GetFile(epfd) 86 if epfile == nil { 87 return 0, nil, linuxerr.EBADF 88 } 89 defer epfile.DecRef(t) 90 ep, ok := epfile.Impl().(*vfs.EpollInstance) 91 if !ok { 92 return 0, nil, linuxerr.EINVAL 93 } 94 file := t.GetFile(fd) 95 if file == nil { 96 return 0, nil, linuxerr.EBADF 97 } 98 defer file.DecRef(t) 99 if epfile == file { 100 return 0, nil, linuxerr.EINVAL 101 } 102 103 var event linux.EpollEvent 104 switch op { 105 case linux.EPOLL_CTL_ADD: 106 if _, err := event.CopyIn(t, eventAddr); err != nil { 107 return 0, nil, err 108 } 109 return 0, nil, ep.AddInterest(file, fd, event) 110 case linux.EPOLL_CTL_DEL: 111 return 0, nil, ep.DeleteInterest(file, fd) 112 case linux.EPOLL_CTL_MOD: 113 if _, err := event.CopyIn(t, eventAddr); err != nil { 114 return 0, nil, err 115 } 116 return 0, nil, ep.ModifyInterest(file, fd, event) 117 default: 118 return 0, nil, linuxerr.EINVAL 119 } 120 } 121 122 func waitEpoll(t *kernel.Task, epfd int32, eventsAddr hostarch.Addr, maxEvents int, timeoutInNanos int64) (uintptr, *kernel.SyscallControl, error) { 123 var _EP_MAX_EVENTS = math.MaxInt32 / sizeofEpollEvent // Linux: fs/eventpoll.c:EP_MAX_EVENTS 124 if maxEvents <= 0 || maxEvents > _EP_MAX_EVENTS { 125 return 0, nil, linuxerr.EINVAL 126 } 127 128 epfile := t.GetFile(epfd) 129 if epfile == nil { 130 return 0, nil, linuxerr.EBADF 131 } 132 defer epfile.DecRef(t) 133 ep, ok := epfile.Impl().(*vfs.EpollInstance) 134 if !ok { 135 return 0, nil, linuxerr.EINVAL 136 } 137 138 // Allocate space for a few events on the stack for the common case in 139 // which we don't have too many events. 140 var ( 141 eventsArr [16]linux.EpollEvent 142 ch chan struct{} 143 haveDeadline bool 144 deadline ktime.Time 145 ) 146 for { 147 events := ep.ReadEvents(eventsArr[:0], maxEvents) 148 if len(events) != 0 { 149 copiedBytes, err := linux.CopyEpollEventSliceOut(t, eventsAddr, events) 150 copiedEvents := copiedBytes / sizeofEpollEvent // rounded down 151 if copiedEvents != 0 { 152 return uintptr(copiedEvents), nil, nil 153 } 154 return 0, nil, err 155 } 156 if timeoutInNanos == 0 { 157 return 0, nil, nil 158 } 159 // In the first iteration of this loop, register with the epoll 160 // instance for readability events, but then immediately continue the 161 // loop since we need to retry ReadEvents() before blocking. In all 162 // subsequent iterations, block until events are available, the timeout 163 // expires, or an interrupt arrives. 164 if ch == nil { 165 var w waiter.Entry 166 w, ch = waiter.NewChannelEntry(waiter.ReadableEvents) 167 if err := epfile.EventRegister(&w); err != nil { 168 return 0, nil, err 169 } 170 defer epfile.EventUnregister(&w) 171 } else { 172 // Set up the timer if a timeout was specified. 173 if timeoutInNanos > 0 && !haveDeadline { 174 timeoutDur := time.Duration(timeoutInNanos) * time.Nanosecond 175 deadline = t.Kernel().MonotonicClock().Now().Add(timeoutDur) 176 haveDeadline = true 177 } 178 if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { 179 if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { 180 err = nil 181 } 182 return 0, nil, err 183 } 184 } 185 } 186 187 } 188 189 // EpollWait implements Linux syscall epoll_wait(2). 190 func EpollWait(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 191 epfd := args[0].Int() 192 eventsAddr := args[1].Pointer() 193 maxEvents := int(args[2].Int()) 194 timeoutInNanos := int64(args[3].Int()) * 1000000 195 196 return waitEpoll(t, epfd, eventsAddr, maxEvents, timeoutInNanos) 197 } 198 199 // EpollPwait implements Linux syscall epoll_pwait(2). 200 func EpollPwait(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 201 maskAddr := args[4].Pointer() 202 maskSize := uint(args[5].Uint()) 203 204 if err := setTempSignalSet(t, maskAddr, maskSize); err != nil { 205 return 0, nil, err 206 } 207 208 return EpollWait(t, sysno, args) 209 } 210 211 // EpollPwait2 implements Linux syscall epoll_pwait(2). 212 func EpollPwait2(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 213 epfd := args[0].Int() 214 eventsAddr := args[1].Pointer() 215 maxEvents := int(args[2].Int()) 216 timeoutPtr := args[3].Pointer() 217 maskAddr := args[4].Pointer() 218 maskSize := uint(args[5].Uint()) 219 haveTimeout := timeoutPtr != 0 220 221 var timeoutInNanos int64 = -1 222 if haveTimeout { 223 var timeout linux.Timespec 224 if _, err := timeout.CopyIn(t, timeoutPtr); err != nil { 225 return 0, nil, err 226 } 227 timeoutInNanos = timeout.ToNsec() 228 } 229 230 if err := setTempSignalSet(t, maskAddr, maskSize); err != nil { 231 return 0, nil, err 232 } 233 234 return waitEpoll(t, epfd, eventsAddr, maxEvents, timeoutInNanos) 235 }