github.com/kata-containers/runtime@v0.0.0-20210505125100-04f29832a923/virtcontainers/monitor.go (about)

     1  // Copyright (c) 2018 HyperHQ Inc.
     2  //
     3  // SPDX-License-Identifier: Apache-2.0
     4  //
     5  
     6  package virtcontainers
     7  
     8  import (
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/pkg/errors"
    13  )
    14  
    15  const (
    16  	DefaultMonitorCheckInterval = 1 * time.Second
    17  	watcherChannelSize          = 128
    18  )
    19  
    20  type monitor struct {
    21  	sync.Mutex
    22  
    23  	sandbox       *Sandbox
    24  	checkInterval time.Duration
    25  	watchers      []chan error
    26  	wg            sync.WaitGroup
    27  	running       bool
    28  	stopCh        chan bool
    29  }
    30  
    31  func newMonitor(s *Sandbox) *monitor {
    32  	return &monitor{
    33  		sandbox:       s,
    34  		checkInterval: DefaultMonitorCheckInterval,
    35  		stopCh:        make(chan bool, 1),
    36  	}
    37  }
    38  
    39  func (m *monitor) newWatcher() (chan error, error) {
    40  	m.Lock()
    41  	defer m.Unlock()
    42  
    43  	watcher := make(chan error, watcherChannelSize)
    44  	m.watchers = append(m.watchers, watcher)
    45  
    46  	if !m.running {
    47  		m.running = true
    48  		m.wg.Add(1)
    49  
    50  		// create and start agent watcher
    51  		go func() {
    52  			tick := time.NewTicker(m.checkInterval)
    53  			for {
    54  				select {
    55  				case <-m.stopCh:
    56  					tick.Stop()
    57  					m.wg.Done()
    58  					return
    59  				case <-tick.C:
    60  					m.watchHypervisor()
    61  					m.watchAgent()
    62  				}
    63  			}
    64  		}()
    65  	}
    66  
    67  	return watcher, nil
    68  }
    69  
    70  func (m *monitor) notify(err error) {
    71  	m.sandbox.agent.markDead()
    72  
    73  	m.Lock()
    74  	defer m.Unlock()
    75  
    76  	if !m.running {
    77  		return
    78  	}
    79  
    80  	// a watcher is not supposed to close the channel
    81  	// but just in case...
    82  	defer func() {
    83  		if x := recover(); x != nil {
    84  			virtLog.Warnf("watcher closed channel: %v", x)
    85  		}
    86  	}()
    87  
    88  	for _, c := range m.watchers {
    89  		// throw away message can not write to channel
    90  		// make it not stuck, the first error is useful.
    91  		select {
    92  		case c <- err:
    93  
    94  		default:
    95  			virtLog.WithField("channel-size", watcherChannelSize).Warnf("watcher channel is full, throw notify message")
    96  		}
    97  	}
    98  }
    99  
   100  func (m *monitor) stop() {
   101  	// wait outside of monitor lock for the watcher channel to exit.
   102  	defer m.wg.Wait()
   103  
   104  	m.Lock()
   105  	defer m.Unlock()
   106  
   107  	if !m.running {
   108  		return
   109  	}
   110  
   111  	m.stopCh <- true
   112  	defer func() {
   113  		m.watchers = nil
   114  		m.running = false
   115  	}()
   116  
   117  	// a watcher is not supposed to close the channel
   118  	// but just in case...
   119  	defer func() {
   120  		if x := recover(); x != nil {
   121  			virtLog.Warnf("watcher closed channel: %v", x)
   122  		}
   123  	}()
   124  
   125  	for _, c := range m.watchers {
   126  		close(c)
   127  	}
   128  }
   129  
   130  func (m *monitor) watchAgent() {
   131  	err := m.sandbox.agent.check()
   132  	if err != nil {
   133  		// TODO: define and export error types
   134  		m.notify(errors.Wrapf(err, "failed to ping agent"))
   135  	}
   136  }
   137  
   138  func (m *monitor) watchHypervisor() error {
   139  	if err := m.sandbox.hypervisor.check(); err != nil {
   140  		m.notify(errors.Wrapf(err, "failed to ping hypervisor process"))
   141  		return err
   142  	}
   143  	return nil
   144  }