github.com/demonoid81/containerd@v1.3.4/pkg/oom/epoll.go (about)

     1  // +build linux
     2  
     3  /*
     4     Copyright The containerd Authors.
     5  
     6     Licensed under the Apache License, Version 2.0 (the "License");
     7     you may not use this file except in compliance with the License.
     8     You may obtain a copy of the License at
     9  
    10         http://www.apache.org/licenses/LICENSE-2.0
    11  
    12     Unless required by applicable law or agreed to in writing, software
    13     distributed under the License is distributed on an "AS IS" BASIS,
    14     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15     See the License for the specific language governing permissions and
    16     limitations under the License.
    17  */
    18  
    19  package oom
    20  
    21  import (
    22  	"context"
    23  	"sync"
    24  
    25  	"github.com/containerd/cgroups"
    26  	eventstypes "github.com/containerd/containerd/api/events"
    27  	"github.com/containerd/containerd/runtime"
    28  	"github.com/containerd/containerd/runtime/v2/shim"
    29  	"github.com/sirupsen/logrus"
    30  	"golang.org/x/sys/unix"
    31  )
    32  
    33  // New returns an epoll implementation that listens to OOM events
    34  // from a container's cgroups.
    35  func New(publisher shim.Publisher) (*Epoller, error) {
    36  	fd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC)
    37  	if err != nil {
    38  		return nil, err
    39  	}
    40  	return &Epoller{
    41  		fd:        fd,
    42  		publisher: publisher,
    43  		set:       make(map[uintptr]*item),
    44  	}, nil
    45  }
    46  
    47  // Epoller implementation for handling OOM events from a container's cgroup
    48  type Epoller struct {
    49  	mu sync.Mutex
    50  
    51  	fd        int
    52  	publisher shim.Publisher
    53  	set       map[uintptr]*item
    54  }
    55  
    56  type item struct {
    57  	id string
    58  	cg cgroups.Cgroup
    59  }
    60  
    61  // Close the epoll fd
    62  func (e *Epoller) Close() error {
    63  	return unix.Close(e.fd)
    64  }
    65  
    66  // Run the epoll loop
    67  func (e *Epoller) Run(ctx context.Context) {
    68  	var events [128]unix.EpollEvent
    69  	for {
    70  		select {
    71  		case <-ctx.Done():
    72  			e.Close()
    73  			return
    74  		default:
    75  			n, err := unix.EpollWait(e.fd, events[:], -1)
    76  			if err != nil {
    77  				if err == unix.EINTR {
    78  					continue
    79  				}
    80  				logrus.WithError(err).Error("cgroups: epoll wait")
    81  			}
    82  			for i := 0; i < n; i++ {
    83  				e.process(ctx, uintptr(events[i].Fd))
    84  			}
    85  		}
    86  	}
    87  }
    88  
    89  // Add the cgroup to the epoll monitor
    90  func (e *Epoller) Add(id string, cg cgroups.Cgroup) error {
    91  	e.mu.Lock()
    92  	defer e.mu.Unlock()
    93  	fd, err := cg.OOMEventFD()
    94  	if err != nil {
    95  		return err
    96  	}
    97  	e.set[fd] = &item{
    98  		id: id,
    99  		cg: cg,
   100  	}
   101  	event := unix.EpollEvent{
   102  		Fd:     int32(fd),
   103  		Events: unix.EPOLLHUP | unix.EPOLLIN | unix.EPOLLERR,
   104  	}
   105  	return unix.EpollCtl(e.fd, unix.EPOLL_CTL_ADD, int(fd), &event)
   106  }
   107  
   108  func (e *Epoller) process(ctx context.Context, fd uintptr) {
   109  	flush(fd)
   110  	e.mu.Lock()
   111  	i, ok := e.set[fd]
   112  	if !ok {
   113  		e.mu.Unlock()
   114  		return
   115  	}
   116  	e.mu.Unlock()
   117  	if i.cg.State() == cgroups.Deleted {
   118  		e.mu.Lock()
   119  		delete(e.set, fd)
   120  		e.mu.Unlock()
   121  		unix.Close(int(fd))
   122  		return
   123  	}
   124  	if err := e.publisher.Publish(ctx, runtime.TaskOOMEventTopic, &eventstypes.TaskOOM{
   125  		ContainerID: i.id,
   126  	}); err != nil {
   127  		logrus.WithError(err).Error("publish OOM event")
   128  	}
   129  }
   130  
   131  func flush(fd uintptr) error {
   132  	var buf [8]byte
   133  	_, err := unix.Read(int(fd), buf[:])
   134  	return err
   135  }