github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/syscalls/linux/vfs2/epoll.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package vfs2 16 17 import ( 18 "math" 19 "time" 20 21 "github.com/SagerNet/gvisor/pkg/abi/linux" 22 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 23 "github.com/SagerNet/gvisor/pkg/hostarch" 24 "github.com/SagerNet/gvisor/pkg/sentry/arch" 25 "github.com/SagerNet/gvisor/pkg/sentry/kernel" 26 ktime "github.com/SagerNet/gvisor/pkg/sentry/kernel/time" 27 "github.com/SagerNet/gvisor/pkg/sentry/vfs" 28 "github.com/SagerNet/gvisor/pkg/waiter" 29 ) 30 31 var sizeofEpollEvent = (*linux.EpollEvent)(nil).SizeBytes() 32 33 // EpollCreate1 implements Linux syscall epoll_create1(2). 34 func EpollCreate1(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 35 flags := args[0].Int() 36 if flags&^linux.EPOLL_CLOEXEC != 0 { 37 return 0, nil, linuxerr.EINVAL 38 } 39 40 file, err := t.Kernel().VFS().NewEpollInstanceFD(t) 41 if err != nil { 42 return 0, nil, err 43 } 44 defer file.DecRef(t) 45 46 fd, err := t.NewFDFromVFS2(0, file, kernel.FDFlags{ 47 CloseOnExec: flags&linux.EPOLL_CLOEXEC != 0, 48 }) 49 if err != nil { 50 return 0, nil, err 51 } 52 return uintptr(fd), nil, nil 53 } 54 55 // EpollCreate implements Linux syscall epoll_create(2). 56 func EpollCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 57 size := args[0].Int() 58 59 // "Since Linux 2.6.8, the size argument is ignored, but must be greater 60 // than zero" - epoll_create(2) 61 if size <= 0 { 62 return 0, nil, linuxerr.EINVAL 63 } 64 65 file, err := t.Kernel().VFS().NewEpollInstanceFD(t) 66 if err != nil { 67 return 0, nil, err 68 } 69 defer file.DecRef(t) 70 71 fd, err := t.NewFDFromVFS2(0, file, kernel.FDFlags{}) 72 if err != nil { 73 return 0, nil, err 74 } 75 return uintptr(fd), nil, nil 76 } 77 78 // EpollCtl implements Linux syscall epoll_ctl(2). 79 func EpollCtl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 80 epfd := args[0].Int() 81 op := args[1].Int() 82 fd := args[2].Int() 83 eventAddr := args[3].Pointer() 84 85 epfile := t.GetFileVFS2(epfd) 86 if epfile == nil { 87 return 0, nil, linuxerr.EBADF 88 } 89 defer epfile.DecRef(t) 90 ep, ok := epfile.Impl().(*vfs.EpollInstance) 91 if !ok { 92 return 0, nil, linuxerr.EINVAL 93 } 94 file := t.GetFileVFS2(fd) 95 if file == nil { 96 return 0, nil, linuxerr.EBADF 97 } 98 defer file.DecRef(t) 99 if epfile == file { 100 return 0, nil, linuxerr.EINVAL 101 } 102 103 var event linux.EpollEvent 104 switch op { 105 case linux.EPOLL_CTL_ADD: 106 if _, err := event.CopyIn(t, eventAddr); err != nil { 107 return 0, nil, err 108 } 109 return 0, nil, ep.AddInterest(file, fd, event) 110 case linux.EPOLL_CTL_DEL: 111 return 0, nil, ep.DeleteInterest(file, fd) 112 case linux.EPOLL_CTL_MOD: 113 if _, err := event.CopyIn(t, eventAddr); err != nil { 114 return 0, nil, err 115 } 116 return 0, nil, ep.ModifyInterest(file, fd, event) 117 default: 118 return 0, nil, linuxerr.EINVAL 119 } 120 } 121 122 func waitEpoll(t *kernel.Task, epfd int32, eventsAddr hostarch.Addr, maxEvents int, timeoutInNanos int64) (uintptr, *kernel.SyscallControl, error) { 123 var _EP_MAX_EVENTS = math.MaxInt32 / sizeofEpollEvent // Linux: fs/eventpoll.c:EP_MAX_EVENTS 124 if maxEvents <= 0 || maxEvents > _EP_MAX_EVENTS { 125 return 0, nil, linuxerr.EINVAL 126 } 127 128 epfile := t.GetFileVFS2(epfd) 129 if epfile == nil { 130 return 0, nil, linuxerr.EBADF 131 } 132 defer epfile.DecRef(t) 133 ep, ok := epfile.Impl().(*vfs.EpollInstance) 134 if !ok { 135 return 0, nil, linuxerr.EINVAL 136 } 137 138 // Allocate space for a few events on the stack for the common case in 139 // which we don't have too many events. 140 var ( 141 eventsArr [16]linux.EpollEvent 142 ch chan struct{} 143 haveDeadline bool 144 deadline ktime.Time 145 ) 146 for { 147 events := ep.ReadEvents(eventsArr[:0], maxEvents) 148 if len(events) != 0 { 149 copiedBytes, err := linux.CopyEpollEventSliceOut(t, eventsAddr, events) 150 copiedEvents := copiedBytes / sizeofEpollEvent // rounded down 151 if copiedEvents != 0 { 152 return uintptr(copiedEvents), nil, nil 153 } 154 return 0, nil, err 155 } 156 if timeoutInNanos == 0 { 157 return 0, nil, nil 158 } 159 // In the first iteration of this loop, register with the epoll 160 // instance for readability events, but then immediately continue the 161 // loop since we need to retry ReadEvents() before blocking. In all 162 // subsequent iterations, block until events are available, the timeout 163 // expires, or an interrupt arrives. 164 if ch == nil { 165 var w waiter.Entry 166 w, ch = waiter.NewChannelEntry(nil) 167 epfile.EventRegister(&w, waiter.ReadableEvents) 168 defer epfile.EventUnregister(&w) 169 } else { 170 // Set up the timer if a timeout was specified. 171 if timeoutInNanos > 0 && !haveDeadline { 172 timeoutDur := time.Duration(timeoutInNanos) * time.Nanosecond 173 deadline = t.Kernel().MonotonicClock().Now().Add(timeoutDur) 174 haveDeadline = true 175 } 176 if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { 177 if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { 178 err = nil 179 } 180 return 0, nil, err 181 } 182 } 183 } 184 185 } 186 187 // EpollWait implements Linux syscall epoll_wait(2). 188 func EpollWait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 189 epfd := args[0].Int() 190 eventsAddr := args[1].Pointer() 191 maxEvents := int(args[2].Int()) 192 timeoutInNanos := int64(args[3].Int()) * 1000000 193 194 return waitEpoll(t, epfd, eventsAddr, maxEvents, timeoutInNanos) 195 } 196 197 // EpollPwait implements Linux syscall epoll_pwait(2). 198 func EpollPwait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 199 maskAddr := args[4].Pointer() 200 maskSize := uint(args[5].Uint()) 201 202 if err := setTempSignalSet(t, maskAddr, maskSize); err != nil { 203 return 0, nil, err 204 } 205 206 return EpollWait(t, args) 207 } 208 209 // EpollPwait2 implements Linux syscall epoll_pwait(2). 210 func EpollPwait2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 211 epfd := args[0].Int() 212 eventsAddr := args[1].Pointer() 213 maxEvents := int(args[2].Int()) 214 timeoutPtr := args[3].Pointer() 215 maskAddr := args[4].Pointer() 216 maskSize := uint(args[5].Uint()) 217 haveTimeout := timeoutPtr != 0 218 219 var timeoutInNanos int64 = -1 220 if haveTimeout { 221 var timeout linux.Timespec 222 if _, err := timeout.CopyIn(t, timeoutPtr); err != nil { 223 return 0, nil, err 224 } 225 timeoutInNanos = timeout.ToNsec() 226 } 227 228 if err := setTempSignalSet(t, maskAddr, maskSize); err != nil { 229 return 0, nil, err 230 } 231 232 return waitEpoll(t, epfd, eventsAddr, maxEvents, timeoutInNanos) 233 }