github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/syscalls/linux/sys_epoll.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package linux
    16  
    17  import (
    18  	"math"
    19  	"time"
    20  
    21  	"github.com/MerlinKodo/gvisor/pkg/abi/linux"
    22  	"github.com/MerlinKodo/gvisor/pkg/errors/linuxerr"
    23  	"github.com/MerlinKodo/gvisor/pkg/hostarch"
    24  	"github.com/MerlinKodo/gvisor/pkg/sentry/arch"
    25  	"github.com/MerlinKodo/gvisor/pkg/sentry/kernel"
    26  	ktime "github.com/MerlinKodo/gvisor/pkg/sentry/kernel/time"
    27  	"github.com/MerlinKodo/gvisor/pkg/sentry/vfs"
    28  	"github.com/MerlinKodo/gvisor/pkg/waiter"
    29  )
    30  
    31  var sizeofEpollEvent = (*linux.EpollEvent)(nil).SizeBytes()
    32  
    33  // EpollCreate1 implements Linux syscall epoll_create1(2).
    34  func EpollCreate1(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
    35  	flags := args[0].Int()
    36  	if flags&^linux.EPOLL_CLOEXEC != 0 {
    37  		return 0, nil, linuxerr.EINVAL
    38  	}
    39  
    40  	file, err := t.Kernel().VFS().NewEpollInstanceFD(t)
    41  	if err != nil {
    42  		return 0, nil, err
    43  	}
    44  	defer file.DecRef(t)
    45  
    46  	fd, err := t.NewFDFrom(0, file, kernel.FDFlags{
    47  		CloseOnExec: flags&linux.EPOLL_CLOEXEC != 0,
    48  	})
    49  	if err != nil {
    50  		return 0, nil, err
    51  	}
    52  	return uintptr(fd), nil, nil
    53  }
    54  
    55  // EpollCreate implements Linux syscall epoll_create(2).
    56  func EpollCreate(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
    57  	size := args[0].Int()
    58  
    59  	// "Since Linux 2.6.8, the size argument is ignored, but must be greater
    60  	// than zero" - epoll_create(2)
    61  	if size <= 0 {
    62  		return 0, nil, linuxerr.EINVAL
    63  	}
    64  
    65  	file, err := t.Kernel().VFS().NewEpollInstanceFD(t)
    66  	if err != nil {
    67  		return 0, nil, err
    68  	}
    69  	defer file.DecRef(t)
    70  
    71  	fd, err := t.NewFDFrom(0, file, kernel.FDFlags{})
    72  	if err != nil {
    73  		return 0, nil, err
    74  	}
    75  	return uintptr(fd), nil, nil
    76  }
    77  
    78  // EpollCtl implements Linux syscall epoll_ctl(2).
    79  func EpollCtl(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
    80  	epfd := args[0].Int()
    81  	op := args[1].Int()
    82  	fd := args[2].Int()
    83  	eventAddr := args[3].Pointer()
    84  
    85  	epfile := t.GetFile(epfd)
    86  	if epfile == nil {
    87  		return 0, nil, linuxerr.EBADF
    88  	}
    89  	defer epfile.DecRef(t)
    90  	ep, ok := epfile.Impl().(*vfs.EpollInstance)
    91  	if !ok {
    92  		return 0, nil, linuxerr.EINVAL
    93  	}
    94  	file := t.GetFile(fd)
    95  	if file == nil {
    96  		return 0, nil, linuxerr.EBADF
    97  	}
    98  	defer file.DecRef(t)
    99  	if epfile == file {
   100  		return 0, nil, linuxerr.EINVAL
   101  	}
   102  
   103  	var event linux.EpollEvent
   104  	switch op {
   105  	case linux.EPOLL_CTL_ADD:
   106  		if _, err := event.CopyIn(t, eventAddr); err != nil {
   107  			return 0, nil, err
   108  		}
   109  		return 0, nil, ep.AddInterest(file, fd, event)
   110  	case linux.EPOLL_CTL_DEL:
   111  		return 0, nil, ep.DeleteInterest(file, fd)
   112  	case linux.EPOLL_CTL_MOD:
   113  		if _, err := event.CopyIn(t, eventAddr); err != nil {
   114  			return 0, nil, err
   115  		}
   116  		return 0, nil, ep.ModifyInterest(file, fd, event)
   117  	default:
   118  		return 0, nil, linuxerr.EINVAL
   119  	}
   120  }
   121  
   122  func waitEpoll(t *kernel.Task, epfd int32, eventsAddr hostarch.Addr, maxEvents int, timeoutInNanos int64) (uintptr, *kernel.SyscallControl, error) {
   123  	var _EP_MAX_EVENTS = math.MaxInt32 / sizeofEpollEvent // Linux: fs/eventpoll.c:EP_MAX_EVENTS
   124  	if maxEvents <= 0 || maxEvents > _EP_MAX_EVENTS {
   125  		return 0, nil, linuxerr.EINVAL
   126  	}
   127  
   128  	epfile := t.GetFile(epfd)
   129  	if epfile == nil {
   130  		return 0, nil, linuxerr.EBADF
   131  	}
   132  	defer epfile.DecRef(t)
   133  	ep, ok := epfile.Impl().(*vfs.EpollInstance)
   134  	if !ok {
   135  		return 0, nil, linuxerr.EINVAL
   136  	}
   137  
   138  	// Allocate space for a few events on the stack for the common case in
   139  	// which we don't have too many events.
   140  	var (
   141  		eventsArr    [16]linux.EpollEvent
   142  		ch           chan struct{}
   143  		haveDeadline bool
   144  		deadline     ktime.Time
   145  	)
   146  	for {
   147  		events := ep.ReadEvents(eventsArr[:0], maxEvents)
   148  		if len(events) != 0 {
   149  			copiedBytes, err := linux.CopyEpollEventSliceOut(t, eventsAddr, events)
   150  			copiedEvents := copiedBytes / sizeofEpollEvent // rounded down
   151  			if copiedEvents != 0 {
   152  				return uintptr(copiedEvents), nil, nil
   153  			}
   154  			return 0, nil, err
   155  		}
   156  		if timeoutInNanos == 0 {
   157  			return 0, nil, nil
   158  		}
   159  		// In the first iteration of this loop, register with the epoll
   160  		// instance for readability events, but then immediately continue the
   161  		// loop since we need to retry ReadEvents() before blocking. In all
   162  		// subsequent iterations, block until events are available, the timeout
   163  		// expires, or an interrupt arrives.
   164  		if ch == nil {
   165  			var w waiter.Entry
   166  			w, ch = waiter.NewChannelEntry(waiter.ReadableEvents)
   167  			if err := epfile.EventRegister(&w); err != nil {
   168  				return 0, nil, err
   169  			}
   170  			defer epfile.EventUnregister(&w)
   171  		} else {
   172  			// Set up the timer if a timeout was specified.
   173  			if timeoutInNanos > 0 && !haveDeadline {
   174  				timeoutDur := time.Duration(timeoutInNanos) * time.Nanosecond
   175  				deadline = t.Kernel().MonotonicClock().Now().Add(timeoutDur)
   176  				haveDeadline = true
   177  			}
   178  			if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil {
   179  				if linuxerr.Equals(linuxerr.ETIMEDOUT, err) {
   180  					err = nil
   181  				}
   182  				return 0, nil, err
   183  			}
   184  		}
   185  	}
   186  
   187  }
   188  
   189  // EpollWait implements Linux syscall epoll_wait(2).
   190  func EpollWait(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
   191  	epfd := args[0].Int()
   192  	eventsAddr := args[1].Pointer()
   193  	maxEvents := int(args[2].Int())
   194  	timeoutInNanos := int64(args[3].Int()) * 1000000
   195  
   196  	return waitEpoll(t, epfd, eventsAddr, maxEvents, timeoutInNanos)
   197  }
   198  
   199  // EpollPwait implements Linux syscall epoll_pwait(2).
   200  func EpollPwait(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
   201  	maskAddr := args[4].Pointer()
   202  	maskSize := uint(args[5].Uint())
   203  
   204  	if err := setTempSignalSet(t, maskAddr, maskSize); err != nil {
   205  		return 0, nil, err
   206  	}
   207  
   208  	return EpollWait(t, sysno, args)
   209  }
   210  
   211  // EpollPwait2 implements Linux syscall epoll_pwait(2).
   212  func EpollPwait2(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
   213  	epfd := args[0].Int()
   214  	eventsAddr := args[1].Pointer()
   215  	maxEvents := int(args[2].Int())
   216  	timeoutPtr := args[3].Pointer()
   217  	maskAddr := args[4].Pointer()
   218  	maskSize := uint(args[5].Uint())
   219  	haveTimeout := timeoutPtr != 0
   220  
   221  	var timeoutInNanos int64 = -1
   222  	if haveTimeout {
   223  		var timeout linux.Timespec
   224  		if _, err := timeout.CopyIn(t, timeoutPtr); err != nil {
   225  			return 0, nil, err
   226  		}
   227  		timeoutInNanos = timeout.ToNsec()
   228  	}
   229  
   230  	if err := setTempSignalSet(t, maskAddr, maskSize); err != nil {
   231  		return 0, nil, err
   232  	}
   233  
   234  	return waitEpoll(t, epfd, eventsAddr, maxEvents, timeoutInNanos)
   235  }