github.com/google/fleetspeak@v0.1.15-0.20240426164851-4f31f62c1aea/fleetspeak/src/client/watchdog/watchdog.go (about) 1 // Copyright 2018 Google Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package watchdog implements a simple watchdog timer which dumps a trace of 16 // all goroutines to a file, and then ends the program, if the timer reaches its 17 // limit. 18 package watchdog 19 20 import ( 21 "os" 22 "runtime/pprof" 23 "time" 24 25 log "github.com/golang/glog" 26 ) 27 28 type Watchdog struct { 29 dir string 30 prefix string 31 duration time.Duration 32 reset chan struct{} 33 exit bool 34 } 35 36 // MakeWatchdog creates and starts running a watchdog timer. If <duration> 37 // passes without a reset it writes stack traces to a temporary file determined 38 // by dir, prefix. Then it exits the program if exit is set. 39 func MakeWatchdog(dir, prefix string, duration time.Duration, exit bool) *Watchdog { 40 r := &Watchdog{ 41 dir: dir, 42 prefix: prefix, 43 duration: duration, 44 reset: make(chan struct{}), 45 exit: exit, 46 } 47 go r.watch() 48 return r 49 } 50 51 // Reset resets the watchdog's timer to 0. 52 func (w *Watchdog) Reset() { 53 select { 54 case w.reset <- struct{}{}: 55 default: 56 } 57 } 58 59 func (w *Watchdog) watch() { 60 var t *time.Timer 61 defer func() { 62 if t != nil { 63 t.Stop() 64 } 65 }() 66 for { 67 t = time.NewTimer(w.duration) 68 select { 69 case _, ok := <-w.reset: 70 if !ok { 71 return 72 } 73 t.Stop() 74 t = nil 75 case <-t.C: 76 // We may have just woke up from sleep, wait another 5 77 // seconds for a connection attempt. 78 t = time.NewTimer(5 * time.Second) 79 select { 80 case _, ok := <-w.reset: 81 if !ok { 82 return 83 } 84 t.Stop() 85 t = nil 86 case <-t.C: 87 log.Errorf("Watchdog expired, attempting to write goroutine traces.") 88 f, err := os.CreateTemp(w.dir, w.prefix) 89 if err != nil { 90 log.Errorf("Unable to create file for goroutine traces: %v", err) 91 } else { 92 if err := pprof.Lookup("goroutine").WriteTo(f, 2); err != nil { 93 log.Errorf("Unable to write goroutine traces to [%s]: %v", f.Name(), err) 94 } 95 if err := f.Close(); err != nil { 96 log.Errorf("Unable to close file [%s]: %v", f.Name(), err) 97 } 98 log.Infof("Wrote goroutine traces to %s", f.Name()) 99 } 100 if w.exit { 101 log.Exitf("Watchdog expired.") 102 return 103 } 104 } 105 } 106 } 107 } 108 109 // Stop stops the watchdog timer, so that it will no longer trigger. 110 func (w *Watchdog) Stop() { 111 close(w.reset) 112 }