github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/shim/epoll.go (about)

     1  // Copyright 2018 The containerd Authors.
     2  // Copyright 2018 The gVisor Authors.
     3  //
     4  // Licensed under the Apache License, Version 2.0 (the "License");
     5  // you may not use this file except in compliance with the License.
     6  // You may obtain a copy of the License at
     7  //
     8  //     https://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  //go:build linux
    17  // +build linux
    18  
    19  package shim
    20  
    21  import (
    22  	"context"
    23  	"fmt"
    24  	"sync"
    25  
    26  	"github.com/containerd/cgroups"
    27  	"github.com/containerd/containerd/events"
    28  	"github.com/containerd/containerd/runtime"
    29  	"golang.org/x/sys/unix"
    30  )
    31  
    32  func newOOMEpoller(publisher events.Publisher) (*epoller, error) {
    33  	fd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC)
    34  	if err != nil {
    35  		return nil, err
    36  	}
    37  	return &epoller{
    38  		fd:        fd,
    39  		publisher: publisher,
    40  		set:       make(map[uintptr]*item),
    41  	}, nil
    42  }
    43  
    44  type epoller struct {
    45  	mu sync.Mutex
    46  
    47  	fd        int
    48  	publisher events.Publisher
    49  	set       map[uintptr]*item
    50  }
    51  
    52  type item struct {
    53  	id string
    54  	cg cgroups.Cgroup
    55  }
    56  
    57  func (e *epoller) Close() error {
    58  	return unix.Close(e.fd)
    59  }
    60  
    61  func (e *epoller) run(ctx context.Context) {
    62  	var events [128]unix.EpollEvent
    63  	for {
    64  		select {
    65  		case <-ctx.Done():
    66  			e.Close()
    67  			return
    68  		default:
    69  			n, err := unix.EpollWait(e.fd, events[:], -1)
    70  			if err != nil {
    71  				if err == unix.EINTR || err == unix.EAGAIN {
    72  					continue
    73  				}
    74  				// Should not happen.
    75  				panic(fmt.Errorf("cgroups: epoll wait: %w", err))
    76  			}
    77  			for i := 0; i < n; i++ {
    78  				e.process(ctx, uintptr(events[i].Fd))
    79  			}
    80  		}
    81  	}
    82  }
    83  
    84  func (e *epoller) add(id string, cgx any) error {
    85  	e.mu.Lock()
    86  	defer e.mu.Unlock()
    87  	cg, ok := cgx.(cgroups.Cgroup)
    88  	if !ok {
    89  		return fmt.Errorf("expected cgroups.Cgroup, got: %T", cgx)
    90  	}
    91  	fd, err := cg.OOMEventFD()
    92  	if err != nil {
    93  		return err
    94  	}
    95  	e.set[fd] = &item{
    96  		id: id,
    97  		cg: cg,
    98  	}
    99  	event := unix.EpollEvent{
   100  		Fd:     int32(fd),
   101  		Events: unix.EPOLLHUP | unix.EPOLLIN | unix.EPOLLERR,
   102  	}
   103  	return unix.EpollCtl(e.fd, unix.EPOLL_CTL_ADD, int(fd), &event)
   104  }
   105  
   106  func (e *epoller) process(ctx context.Context, fd uintptr) {
   107  	flush(fd)
   108  	e.mu.Lock()
   109  	i, ok := e.set[fd]
   110  	if !ok {
   111  		e.mu.Unlock()
   112  		return
   113  	}
   114  	e.mu.Unlock()
   115  	if i.cg.State() == cgroups.Deleted {
   116  		e.mu.Lock()
   117  		delete(e.set, fd)
   118  		e.mu.Unlock()
   119  		unix.Close(int(fd))
   120  		return
   121  	}
   122  	if err := e.publisher.Publish(ctx, runtime.TaskOOMEventTopic, &TaskOOM{
   123  		ContainerID: i.id,
   124  	}); err != nil {
   125  		// Should not happen.
   126  		panic(fmt.Errorf("publish OOM event: %w", err))
   127  	}
   128  }
   129  
   130  func flush(fd uintptr) error {
   131  	var buf [8]byte
   132  	_, err := unix.Read(int(fd), buf[:])
   133  	return err
   134  }