github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/syscalls/linux/vfs2/epoll.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package vfs2
    16  
    17  import (
    18  	"math"
    19  	"time"
    20  
    21  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    22  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    23  	"github.com/SagerNet/gvisor/pkg/hostarch"
    24  	"github.com/SagerNet/gvisor/pkg/sentry/arch"
    25  	"github.com/SagerNet/gvisor/pkg/sentry/kernel"
    26  	ktime "github.com/SagerNet/gvisor/pkg/sentry/kernel/time"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/vfs"
    28  	"github.com/SagerNet/gvisor/pkg/waiter"
    29  )
    30  
    31  var sizeofEpollEvent = (*linux.EpollEvent)(nil).SizeBytes()
    32  
    33  // EpollCreate1 implements Linux syscall epoll_create1(2).
    34  func EpollCreate1(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
    35  	flags := args[0].Int()
    36  	if flags&^linux.EPOLL_CLOEXEC != 0 {
    37  		return 0, nil, linuxerr.EINVAL
    38  	}
    39  
    40  	file, err := t.Kernel().VFS().NewEpollInstanceFD(t)
    41  	if err != nil {
    42  		return 0, nil, err
    43  	}
    44  	defer file.DecRef(t)
    45  
    46  	fd, err := t.NewFDFromVFS2(0, file, kernel.FDFlags{
    47  		CloseOnExec: flags&linux.EPOLL_CLOEXEC != 0,
    48  	})
    49  	if err != nil {
    50  		return 0, nil, err
    51  	}
    52  	return uintptr(fd), nil, nil
    53  }
    54  
    55  // EpollCreate implements Linux syscall epoll_create(2).
    56  func EpollCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
    57  	size := args[0].Int()
    58  
    59  	// "Since Linux 2.6.8, the size argument is ignored, but must be greater
    60  	// than zero" - epoll_create(2)
    61  	if size <= 0 {
    62  		return 0, nil, linuxerr.EINVAL
    63  	}
    64  
    65  	file, err := t.Kernel().VFS().NewEpollInstanceFD(t)
    66  	if err != nil {
    67  		return 0, nil, err
    68  	}
    69  	defer file.DecRef(t)
    70  
    71  	fd, err := t.NewFDFromVFS2(0, file, kernel.FDFlags{})
    72  	if err != nil {
    73  		return 0, nil, err
    74  	}
    75  	return uintptr(fd), nil, nil
    76  }
    77  
    78  // EpollCtl implements Linux syscall epoll_ctl(2).
    79  func EpollCtl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
    80  	epfd := args[0].Int()
    81  	op := args[1].Int()
    82  	fd := args[2].Int()
    83  	eventAddr := args[3].Pointer()
    84  
    85  	epfile := t.GetFileVFS2(epfd)
    86  	if epfile == nil {
    87  		return 0, nil, linuxerr.EBADF
    88  	}
    89  	defer epfile.DecRef(t)
    90  	ep, ok := epfile.Impl().(*vfs.EpollInstance)
    91  	if !ok {
    92  		return 0, nil, linuxerr.EINVAL
    93  	}
    94  	file := t.GetFileVFS2(fd)
    95  	if file == nil {
    96  		return 0, nil, linuxerr.EBADF
    97  	}
    98  	defer file.DecRef(t)
    99  	if epfile == file {
   100  		return 0, nil, linuxerr.EINVAL
   101  	}
   102  
   103  	var event linux.EpollEvent
   104  	switch op {
   105  	case linux.EPOLL_CTL_ADD:
   106  		if _, err := event.CopyIn(t, eventAddr); err != nil {
   107  			return 0, nil, err
   108  		}
   109  		return 0, nil, ep.AddInterest(file, fd, event)
   110  	case linux.EPOLL_CTL_DEL:
   111  		return 0, nil, ep.DeleteInterest(file, fd)
   112  	case linux.EPOLL_CTL_MOD:
   113  		if _, err := event.CopyIn(t, eventAddr); err != nil {
   114  			return 0, nil, err
   115  		}
   116  		return 0, nil, ep.ModifyInterest(file, fd, event)
   117  	default:
   118  		return 0, nil, linuxerr.EINVAL
   119  	}
   120  }
   121  
   122  func waitEpoll(t *kernel.Task, epfd int32, eventsAddr hostarch.Addr, maxEvents int, timeoutInNanos int64) (uintptr, *kernel.SyscallControl, error) {
   123  	var _EP_MAX_EVENTS = math.MaxInt32 / sizeofEpollEvent // Linux: fs/eventpoll.c:EP_MAX_EVENTS
   124  	if maxEvents <= 0 || maxEvents > _EP_MAX_EVENTS {
   125  		return 0, nil, linuxerr.EINVAL
   126  	}
   127  
   128  	epfile := t.GetFileVFS2(epfd)
   129  	if epfile == nil {
   130  		return 0, nil, linuxerr.EBADF
   131  	}
   132  	defer epfile.DecRef(t)
   133  	ep, ok := epfile.Impl().(*vfs.EpollInstance)
   134  	if !ok {
   135  		return 0, nil, linuxerr.EINVAL
   136  	}
   137  
   138  	// Allocate space for a few events on the stack for the common case in
   139  	// which we don't have too many events.
   140  	var (
   141  		eventsArr    [16]linux.EpollEvent
   142  		ch           chan struct{}
   143  		haveDeadline bool
   144  		deadline     ktime.Time
   145  	)
   146  	for {
   147  		events := ep.ReadEvents(eventsArr[:0], maxEvents)
   148  		if len(events) != 0 {
   149  			copiedBytes, err := linux.CopyEpollEventSliceOut(t, eventsAddr, events)
   150  			copiedEvents := copiedBytes / sizeofEpollEvent // rounded down
   151  			if copiedEvents != 0 {
   152  				return uintptr(copiedEvents), nil, nil
   153  			}
   154  			return 0, nil, err
   155  		}
   156  		if timeoutInNanos == 0 {
   157  			return 0, nil, nil
   158  		}
   159  		// In the first iteration of this loop, register with the epoll
   160  		// instance for readability events, but then immediately continue the
   161  		// loop since we need to retry ReadEvents() before blocking. In all
   162  		// subsequent iterations, block until events are available, the timeout
   163  		// expires, or an interrupt arrives.
   164  		if ch == nil {
   165  			var w waiter.Entry
   166  			w, ch = waiter.NewChannelEntry(nil)
   167  			epfile.EventRegister(&w, waiter.ReadableEvents)
   168  			defer epfile.EventUnregister(&w)
   169  		} else {
   170  			// Set up the timer if a timeout was specified.
   171  			if timeoutInNanos > 0 && !haveDeadline {
   172  				timeoutDur := time.Duration(timeoutInNanos) * time.Nanosecond
   173  				deadline = t.Kernel().MonotonicClock().Now().Add(timeoutDur)
   174  				haveDeadline = true
   175  			}
   176  			if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil {
   177  				if linuxerr.Equals(linuxerr.ETIMEDOUT, err) {
   178  					err = nil
   179  				}
   180  				return 0, nil, err
   181  			}
   182  		}
   183  	}
   184  
   185  }
   186  
   187  // EpollWait implements Linux syscall epoll_wait(2).
   188  func EpollWait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
   189  	epfd := args[0].Int()
   190  	eventsAddr := args[1].Pointer()
   191  	maxEvents := int(args[2].Int())
   192  	timeoutInNanos := int64(args[3].Int()) * 1000000
   193  
   194  	return waitEpoll(t, epfd, eventsAddr, maxEvents, timeoutInNanos)
   195  }
   196  
   197  // EpollPwait implements Linux syscall epoll_pwait(2).
   198  func EpollPwait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
   199  	maskAddr := args[4].Pointer()
   200  	maskSize := uint(args[5].Uint())
   201  
   202  	if err := setTempSignalSet(t, maskAddr, maskSize); err != nil {
   203  		return 0, nil, err
   204  	}
   205  
   206  	return EpollWait(t, args)
   207  }
   208  
   209  // EpollPwait2 implements Linux syscall epoll_pwait(2).
   210  func EpollPwait2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
   211  	epfd := args[0].Int()
   212  	eventsAddr := args[1].Pointer()
   213  	maxEvents := int(args[2].Int())
   214  	timeoutPtr := args[3].Pointer()
   215  	maskAddr := args[4].Pointer()
   216  	maskSize := uint(args[5].Uint())
   217  	haveTimeout := timeoutPtr != 0
   218  
   219  	var timeoutInNanos int64 = -1
   220  	if haveTimeout {
   221  		var timeout linux.Timespec
   222  		if _, err := timeout.CopyIn(t, timeoutPtr); err != nil {
   223  			return 0, nil, err
   224  		}
   225  		timeoutInNanos = timeout.ToNsec()
   226  	}
   227  
   228  	if err := setTempSignalSet(t, maskAddr, maskSize); err != nil {
   229  		return 0, nil, err
   230  	}
   231  
   232  	return waitEpoll(t, epfd, eventsAddr, maxEvents, timeoutInNanos)
   233  }