github.com/Cloud-Foundations/Dominator@v0.3.4/fleetmanager/hypervisors/monitor.go (about) 1 package hypervisors 2 3 import ( 4 "flag" 5 "time" 6 7 "github.com/Cloud-Foundations/Dominator/lib/srpc" 8 hyper_proto "github.com/Cloud-Foundations/Dominator/proto/hypervisor" 9 ) 10 11 var ( 12 hypervisorProbeTimeout = flag.Duration("hypervisorProbeTimeout", 13 time.Second*5, "time after which a probe is sent to a quiet Hypervisor") 14 hypervisorResponseTimeout = flag.Duration("hypervisorResponseTimeout", 15 time.Second*19, 16 "time after which a Hypervisor is marked as unresponsive") 17 ) 18 19 func (h *hypervisorType) monitorLoop(client *srpc.Client, conn *srpc.Conn, 20 closeClientChannel <-chan struct{}) { 21 pingDeferChannel := make(chan struct{}) 22 defer close(pingDeferChannel) 23 go h.pingLoop(conn, pingDeferChannel) 24 lastReceiveTime := time.Now() 25 for { 26 timeout := *hypervisorResponseTimeout - time.Since(lastReceiveTime) 27 if timeout <= 0 { 28 timeout = time.Millisecond 29 } 30 timer := time.NewTimer(timeout) 31 select { 32 case <-closeClientChannel: 33 client.Close() 34 return 35 case _, ok := <-h.receiveChannel: 36 if !timer.Stop() { 37 <-timer.C 38 } 39 if !ok { 40 return 41 } 42 select { 43 case pingDeferChannel <- struct{}{}: 44 default: 45 } 46 lastReceiveTime = time.Now() 47 h.mutex.Lock() 48 h.probeStatus = probeStatusConnected 49 h.mutex.Unlock() 50 case <-timer.C: 51 h.mutex.Lock() 52 h.probeStatus = probeStatusUnreachable 53 h.mutex.Unlock() 54 h.logger.Debugln(0, "shutting down unresponsive client") 55 client.Close() 56 return 57 } 58 } 59 } 60 61 func (h *hypervisorType) pingLoop(conn *srpc.Conn, 62 pingDeferChannel <-chan struct{}) { 63 pingsSinceLastDefer := 0 64 for { 65 timer := time.NewTimer(*hypervisorProbeTimeout) 66 select { 67 case _, ok := <-pingDeferChannel: 68 if !timer.Stop() { 69 <-timer.C 70 } 71 if !ok { 72 return 73 } 74 timer.Reset(*hypervisorProbeTimeout) 75 h.mutex.Lock() 76 h.probeStatus = probeStatusConnected 77 h.mutex.Unlock() 78 pingsSinceLastDefer = 0 79 case <-timer.C: 80 pingsSinceLastDefer++ 81 if pingsSinceLastDefer > 1 { 82 h.logger.Debugf(0, "sending ping #%d since last activity\n", 83 pingsSinceLastDefer) 84 } else { 85 h.logger.Debugln(1, "sending first ping since last activity") 86 } 87 err := conn.Encode(hyper_proto.GetUpdatesRequest{}) 88 if err != nil { 89 h.logger.Printf("error sending ping: %s\n", err) 90 } else { 91 if err := conn.Flush(); err != nil { 92 h.logger.Printf("error flushing ping: %s\n", err) 93 } 94 } 95 timer.Reset(*hypervisorProbeTimeout) 96 } 97 } 98 }