github.com/Cloud-Foundations/Dominator@v0.3.4/fleetmanager/hypervisors/monitor.go (about)

     1  package hypervisors
     2  
     3  import (
     4  	"flag"
     5  	"time"
     6  
     7  	"github.com/Cloud-Foundations/Dominator/lib/srpc"
     8  	hyper_proto "github.com/Cloud-Foundations/Dominator/proto/hypervisor"
     9  )
    10  
    11  var (
    12  	hypervisorProbeTimeout = flag.Duration("hypervisorProbeTimeout",
    13  		time.Second*5, "time after which a probe is sent to a quiet Hypervisor")
    14  	hypervisorResponseTimeout = flag.Duration("hypervisorResponseTimeout",
    15  		time.Second*19,
    16  		"time after which a Hypervisor is marked as unresponsive")
    17  )
    18  
    19  func (h *hypervisorType) monitorLoop(client *srpc.Client, conn *srpc.Conn,
    20  	closeClientChannel <-chan struct{}) {
    21  	pingDeferChannel := make(chan struct{})
    22  	defer close(pingDeferChannel)
    23  	go h.pingLoop(conn, pingDeferChannel)
    24  	lastReceiveTime := time.Now()
    25  	for {
    26  		timeout := *hypervisorResponseTimeout - time.Since(lastReceiveTime)
    27  		if timeout <= 0 {
    28  			timeout = time.Millisecond
    29  		}
    30  		timer := time.NewTimer(timeout)
    31  		select {
    32  		case <-closeClientChannel:
    33  			client.Close()
    34  			return
    35  		case _, ok := <-h.receiveChannel:
    36  			if !timer.Stop() {
    37  				<-timer.C
    38  			}
    39  			if !ok {
    40  				return
    41  			}
    42  			select {
    43  			case pingDeferChannel <- struct{}{}:
    44  			default:
    45  			}
    46  			lastReceiveTime = time.Now()
    47  			h.mutex.Lock()
    48  			h.probeStatus = probeStatusConnected
    49  			h.mutex.Unlock()
    50  		case <-timer.C:
    51  			h.mutex.Lock()
    52  			h.probeStatus = probeStatusUnreachable
    53  			h.mutex.Unlock()
    54  			h.logger.Debugln(0, "shutting down unresponsive client")
    55  			client.Close()
    56  			return
    57  		}
    58  	}
    59  }
    60  
    61  func (h *hypervisorType) pingLoop(conn *srpc.Conn,
    62  	pingDeferChannel <-chan struct{}) {
    63  	pingsSinceLastDefer := 0
    64  	for {
    65  		timer := time.NewTimer(*hypervisorProbeTimeout)
    66  		select {
    67  		case _, ok := <-pingDeferChannel:
    68  			if !timer.Stop() {
    69  				<-timer.C
    70  			}
    71  			if !ok {
    72  				return
    73  			}
    74  			timer.Reset(*hypervisorProbeTimeout)
    75  			h.mutex.Lock()
    76  			h.probeStatus = probeStatusConnected
    77  			h.mutex.Unlock()
    78  			pingsSinceLastDefer = 0
    79  		case <-timer.C:
    80  			pingsSinceLastDefer++
    81  			if pingsSinceLastDefer > 1 {
    82  				h.logger.Debugf(0, "sending ping #%d since last activity\n",
    83  					pingsSinceLastDefer)
    84  			} else {
    85  				h.logger.Debugln(1, "sending first ping since last activity")
    86  			}
    87  			err := conn.Encode(hyper_proto.GetUpdatesRequest{})
    88  			if err != nil {
    89  				h.logger.Printf("error sending ping: %s\n", err)
    90  			} else {
    91  				if err := conn.Flush(); err != nil {
    92  					h.logger.Printf("error flushing ping: %s\n", err)
    93  				}
    94  			}
    95  			timer.Reset(*hypervisorProbeTimeout)
    96  		}
    97  	}
    98  }