github.com/google/fleetspeak@v0.1.15-0.20240426164851-4f31f62c1aea/fleetspeak/src/client/watchdog/watchdog.go (about)

     1  // Copyright 2018 Google Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     https://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package watchdog implements a simple watchdog timer which dumps a trace of
    16  // all goroutines to a file, and then ends the program, if the timer reaches its
    17  // limit.
    18  package watchdog
    19  
    20  import (
    21  	"os"
    22  	"runtime/pprof"
    23  	"time"
    24  
    25  	log "github.com/golang/glog"
    26  )
    27  
    28  type Watchdog struct {
    29  	dir      string
    30  	prefix   string
    31  	duration time.Duration
    32  	reset    chan struct{}
    33  	exit     bool
    34  }
    35  
    36  // MakeWatchdog creates and starts running a watchdog timer. If <duration>
    37  // passes without a reset it writes stack traces to a temporary file determined
    38  // by dir, prefix. Then it exits the program if exit is set.
    39  func MakeWatchdog(dir, prefix string, duration time.Duration, exit bool) *Watchdog {
    40  	r := &Watchdog{
    41  		dir:      dir,
    42  		prefix:   prefix,
    43  		duration: duration,
    44  		reset:    make(chan struct{}),
    45  		exit:     exit,
    46  	}
    47  	go r.watch()
    48  	return r
    49  }
    50  
    51  // Reset resets the watchdog's timer to 0.
    52  func (w *Watchdog) Reset() {
    53  	select {
    54  	case w.reset <- struct{}{}:
    55  	default:
    56  	}
    57  }
    58  
    59  func (w *Watchdog) watch() {
    60  	var t *time.Timer
    61  	defer func() {
    62  		if t != nil {
    63  			t.Stop()
    64  		}
    65  	}()
    66  	for {
    67  		t = time.NewTimer(w.duration)
    68  		select {
    69  		case _, ok := <-w.reset:
    70  			if !ok {
    71  				return
    72  			}
    73  			t.Stop()
    74  			t = nil
    75  		case <-t.C:
    76  			// We may have just woke up from sleep, wait another 5
    77  			// seconds for a connection attempt.
    78  			t = time.NewTimer(5 * time.Second)
    79  			select {
    80  			case _, ok := <-w.reset:
    81  				if !ok {
    82  					return
    83  				}
    84  				t.Stop()
    85  				t = nil
    86  			case <-t.C:
    87  				log.Errorf("Watchdog expired, attempting to write goroutine traces.")
    88  				f, err := os.CreateTemp(w.dir, w.prefix)
    89  				if err != nil {
    90  					log.Errorf("Unable to create file for goroutine traces: %v", err)
    91  				} else {
    92  					if err := pprof.Lookup("goroutine").WriteTo(f, 2); err != nil {
    93  						log.Errorf("Unable to write goroutine traces to [%s]: %v", f.Name(), err)
    94  					}
    95  					if err := f.Close(); err != nil {
    96  						log.Errorf("Unable to close file [%s]: %v", f.Name(), err)
    97  					}
    98  					log.Infof("Wrote goroutine traces to %s", f.Name())
    99  				}
   100  				if w.exit {
   101  					log.Exitf("Watchdog expired.")
   102  					return
   103  				}
   104  			}
   105  		}
   106  	}
   107  }
   108  
   109  // Stop stops the watchdog timer, so that it will no longer trigger.
   110  func (w *Watchdog) Stop() {
   111  	close(w.reset)
   112  }