github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/shim/epoll.go (about)

     1  // Copyright 2018 The containerd Authors.
     2  // Copyright 2018 The gVisor Authors.
     3  //
     4  // Licensed under the Apache License, Version 2.0 (the "License");
     5  // you may not use this file except in compliance with the License.
     6  // You may obtain a copy of the License at
     7  //
     8  //     https://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  // +build linux
    17  
    18  package shim
    19  
    20  import (
    21  	"context"
    22  	"fmt"
    23  	"sync"
    24  
    25  	"github.com/containerd/cgroups"
    26  	"github.com/containerd/containerd/events"
    27  	"github.com/containerd/containerd/runtime"
    28  	"golang.org/x/sys/unix"
    29  )
    30  
    31  func newOOMEpoller(publisher events.Publisher) (*epoller, error) {
    32  	fd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC)
    33  	if err != nil {
    34  		return nil, err
    35  	}
    36  	return &epoller{
    37  		fd:        fd,
    38  		publisher: publisher,
    39  		set:       make(map[uintptr]*item),
    40  	}, nil
    41  }
    42  
    43  type epoller struct {
    44  	mu sync.Mutex
    45  
    46  	fd        int
    47  	publisher events.Publisher
    48  	set       map[uintptr]*item
    49  }
    50  
    51  type item struct {
    52  	id string
    53  	cg cgroups.Cgroup
    54  }
    55  
    56  func (e *epoller) Close() error {
    57  	return unix.Close(e.fd)
    58  }
    59  
    60  func (e *epoller) run(ctx context.Context) {
    61  	var events [128]unix.EpollEvent
    62  	for {
    63  		select {
    64  		case <-ctx.Done():
    65  			e.Close()
    66  			return
    67  		default:
    68  			n, err := unix.EpollWait(e.fd, events[:], -1)
    69  			if err != nil {
    70  				if err == unix.EINTR || err == unix.EAGAIN {
    71  					continue
    72  				}
    73  				// Should not happen.
    74  				panic(fmt.Errorf("cgroups: epoll wait: %w", err))
    75  			}
    76  			for i := 0; i < n; i++ {
    77  				e.process(ctx, uintptr(events[i].Fd))
    78  			}
    79  		}
    80  	}
    81  }
    82  
    83  func (e *epoller) add(id string, cg cgroups.Cgroup) error {
    84  	e.mu.Lock()
    85  	defer e.mu.Unlock()
    86  	fd, err := cg.OOMEventFD()
    87  	if err != nil {
    88  		return err
    89  	}
    90  	e.set[fd] = &item{
    91  		id: id,
    92  		cg: cg,
    93  	}
    94  	event := unix.EpollEvent{
    95  		Fd:     int32(fd),
    96  		Events: unix.EPOLLHUP | unix.EPOLLIN | unix.EPOLLERR,
    97  	}
    98  	return unix.EpollCtl(e.fd, unix.EPOLL_CTL_ADD, int(fd), &event)
    99  }
   100  
   101  func (e *epoller) process(ctx context.Context, fd uintptr) {
   102  	flush(fd)
   103  	e.mu.Lock()
   104  	i, ok := e.set[fd]
   105  	if !ok {
   106  		e.mu.Unlock()
   107  		return
   108  	}
   109  	e.mu.Unlock()
   110  	if i.cg.State() == cgroups.Deleted {
   111  		e.mu.Lock()
   112  		delete(e.set, fd)
   113  		e.mu.Unlock()
   114  		unix.Close(int(fd))
   115  		return
   116  	}
   117  	if err := e.publisher.Publish(ctx, runtime.TaskOOMEventTopic, &TaskOOM{
   118  		ContainerID: i.id,
   119  	}); err != nil {
   120  		// Should not happen.
   121  		panic(fmt.Errorf("publish OOM event: %w", err))
   122  	}
   123  }
   124  
   125  func flush(fd uintptr) error {
   126  	var buf [8]byte
   127  	_, err := unix.Read(int(fd), buf[:])
   128  	return err
   129  }