gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/shim/oom_v2.go (about) 1 // Copyright The containerd Authors. 2 // Copyright 2021 The gVisor Authors. 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // https://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 //go:build linux 17 // +build linux 18 19 package shim 20 21 import ( 22 "context" 23 "fmt" 24 25 cgroupsv2 "github.com/containerd/cgroups/v2" 26 "github.com/containerd/containerd/runtime" 27 "github.com/containerd/containerd/runtime/v2/shim" 28 "github.com/sirupsen/logrus" 29 ) 30 31 // newOOMv2Epoller returns an implementation that listens to OOM events 32 // from a container's cgroups v2. This is copied from containerd to avoid 33 // having to upgrade containerd package just to get it 34 func newOOMv2Poller(publisher shim.Publisher) (oomPoller, error) { 35 return &watcherV2{ 36 itemCh: make(chan itemV2), 37 publisher: publisher, 38 }, nil 39 } 40 41 // watcher implementation for handling OOM events from a container's cgroup 42 type watcherV2 struct { 43 itemCh chan itemV2 44 publisher shim.Publisher 45 } 46 47 type itemV2 struct { 48 id string 49 ev cgroupsv2.Event 50 err error 51 } 52 53 // Close closes the watcher 54 func (w *watcherV2) Close() error { 55 return nil 56 } 57 58 // Run the loop 59 func (w *watcherV2) run(ctx context.Context) { 60 lastOOMMap := make(map[string]uint64) // key: id, value: ev.OOM 61 for { 62 select { 63 case <-ctx.Done(): 64 w.Close() 65 return 66 case i := <-w.itemCh: 67 if i.err != nil { 68 logrus.WithError(i.err).Debugf("Error listening for OOM, id: %q", i.id) 69 delete(lastOOMMap, i.id) 70 continue 71 } 72 logrus.Debugf("Received OOM event, id: %q, event: %+v", i.id, i.ev) 73 lastOOM := lastOOMMap[i.id] 74 if i.ev.OOM > lastOOM { 75 if err := w.publisher.Publish(ctx, runtime.TaskOOMEventTopic, &TaskOOM{ 76 ContainerID: i.id, 77 }); err != nil { 78 logrus.WithError(err).Error("Publish OOM event") 79 } 80 } 81 if i.ev.OOM > 0 { 82 lastOOMMap[i.id] = i.ev.OOM 83 } 84 } 85 } 86 } 87 88 // Add cgroups.Cgroup to the epoll monitor 89 func (w *watcherV2) add(id string, cgx any) error { 90 cg, ok := cgx.(*cgroupsv2.Manager) 91 if !ok { 92 return fmt.Errorf("expected *cgroupsv2.Manager, got: %T", cgx) 93 } 94 // NOTE: containerd/cgroups/v2 does not support closing eventCh routine 95 // currently. The routine shuts down when an error happens, mostly when the 96 // cgroup is deleted. 97 eventCh, errCh := cg.EventChan() 98 go func() { 99 for { 100 i := itemV2{id: id} 101 select { 102 case ev := <-eventCh: 103 i.ev = ev 104 w.itemCh <- i 105 case err := <-errCh: 106 i.err = err 107 w.itemCh <- i 108 // we no longer get any event/err when we got an err 109 logrus.WithError(err).Warn("error from eventChan") 110 return 111 } 112 } 113 }() 114 return nil 115 }