github.com/kata-containers/runtime@v0.0.0-20210505125100-04f29832a923/virtcontainers/monitor.go (about) 1 // Copyright (c) 2018 HyperHQ Inc. 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 6 package virtcontainers 7 8 import ( 9 "sync" 10 "time" 11 12 "github.com/pkg/errors" 13 ) 14 15 const ( 16 DefaultMonitorCheckInterval = 1 * time.Second 17 watcherChannelSize = 128 18 ) 19 20 type monitor struct { 21 sync.Mutex 22 23 sandbox *Sandbox 24 checkInterval time.Duration 25 watchers []chan error 26 wg sync.WaitGroup 27 running bool 28 stopCh chan bool 29 } 30 31 func newMonitor(s *Sandbox) *monitor { 32 return &monitor{ 33 sandbox: s, 34 checkInterval: DefaultMonitorCheckInterval, 35 stopCh: make(chan bool, 1), 36 } 37 } 38 39 func (m *monitor) newWatcher() (chan error, error) { 40 m.Lock() 41 defer m.Unlock() 42 43 watcher := make(chan error, watcherChannelSize) 44 m.watchers = append(m.watchers, watcher) 45 46 if !m.running { 47 m.running = true 48 m.wg.Add(1) 49 50 // create and start agent watcher 51 go func() { 52 tick := time.NewTicker(m.checkInterval) 53 for { 54 select { 55 case <-m.stopCh: 56 tick.Stop() 57 m.wg.Done() 58 return 59 case <-tick.C: 60 m.watchHypervisor() 61 m.watchAgent() 62 } 63 } 64 }() 65 } 66 67 return watcher, nil 68 } 69 70 func (m *monitor) notify(err error) { 71 m.sandbox.agent.markDead() 72 73 m.Lock() 74 defer m.Unlock() 75 76 if !m.running { 77 return 78 } 79 80 // a watcher is not supposed to close the channel 81 // but just in case... 82 defer func() { 83 if x := recover(); x != nil { 84 virtLog.Warnf("watcher closed channel: %v", x) 85 } 86 }() 87 88 for _, c := range m.watchers { 89 // throw away message can not write to channel 90 // make it not stuck, the first error is useful. 91 select { 92 case c <- err: 93 94 default: 95 virtLog.WithField("channel-size", watcherChannelSize).Warnf("watcher channel is full, throw notify message") 96 } 97 } 98 } 99 100 func (m *monitor) stop() { 101 // wait outside of monitor lock for the watcher channel to exit. 102 defer m.wg.Wait() 103 104 m.Lock() 105 defer m.Unlock() 106 107 if !m.running { 108 return 109 } 110 111 m.stopCh <- true 112 defer func() { 113 m.watchers = nil 114 m.running = false 115 }() 116 117 // a watcher is not supposed to close the channel 118 // but just in case... 119 defer func() { 120 if x := recover(); x != nil { 121 virtLog.Warnf("watcher closed channel: %v", x) 122 } 123 }() 124 125 for _, c := range m.watchers { 126 close(c) 127 } 128 } 129 130 func (m *monitor) watchAgent() { 131 err := m.sandbox.agent.check() 132 if err != nil { 133 // TODO: define and export error types 134 m.notify(errors.Wrapf(err, "failed to ping agent")) 135 } 136 } 137 138 func (m *monitor) watchHypervisor() error { 139 if err := m.sandbox.hypervisor.check(); err != nil { 140 m.notify(errors.Wrapf(err, "failed to ping hypervisor process")) 141 return err 142 } 143 return nil 144 }