github.com/aristanetworks/goarista@v0.0.0-20240514173732-cca2755bbd44/monitor/loglevel.go (about)

     1  // Copyright (c) 2022 Arista Networks, Inc.
     2  // Use of this source code is governed by the Apache License 2.0
     3  // that can be found in the COPYING file.
     4  
     5  package monitor
     6  
     7  import (
     8  	"errors"
     9  	"fmt"
    10  	"net/http"
    11  	"regexp"
    12  	"strconv"
    13  	"sync"
    14  	"time"
    15  
    16  	"github.com/aristanetworks/glog"
    17  )
    18  
    19  type logsetSrv struct {
    20  	mu      sync.Mutex
    21  	resetTo map[string]*resetState // ongoing resets
    22  	timer   newTimerFunc           // dependency injencted timer to avoid time.Sleep in tests
    23  	wg      sync.WaitGroup         // used during testing to ensure we're not waiting
    24  }
    25  
    26  func newLogsetSrv() *logsetSrv {
    27  	return &logsetSrv{timer: realTimer, resetTo: map[string]*resetState{}}
    28  }
    29  
    30  func (ls *logsetSrv) err(w http.ResponseWriter, err string, code int) {
    31  	err = fmt.Sprintf("loglevel error: %v (code %v)", err, code)
    32  	glog.Error(err)
    33  	http.Error(w, err, code)
    34  }
    35  
    36  // ServeHTTP handles a /debug/loglevel request.
    37  //
    38  // It parses options from a HTTP form or from URL params.
    39  //
    40  // The following verbositys can be set:
    41  // - glog: set "github.com/aristanetworks/glog" verbosity.
    42  //
    43  // The following options control log resetting:
    44  //
    45  //   - timeout: A duration (e.g. "1m") for which the log should remain set at the verbosity
    46  //     passed in. it's safe to send multiple: if you send another request with a timeout,
    47  //     the ongoing timeout will be cancelled but the value will be reset to the original
    48  //     value detected by this endpoint.
    49  //
    50  // Here's a detailed example of timeout behavior with overlapping timeouts:
    51  // - User wants to increase verbosity to find bug. Lets assume it starts at 0.
    52  // - They call /debug/loglevel?glog=1&timeout=10m
    53  // - User decides this glog verbosity is not enough, so decides to increase to 10.
    54  // - They call /debug/loglevel?glog=10&timeout=5m
    55  // - 5 minutes later, the loglevel will be set back to 0.
    56  // - No further changes to verbosity occur.
    57  func (ls *logsetSrv) ServeHTTP(w http.ResponseWriter, r *http.Request) {
    58  	change, err := parseLoglevelReq(r)
    59  	if err != nil {
    60  		ls.err(w, "could not parse form: "+err.Error(), http.StatusBadRequest)
    61  		return
    62  	}
    63  	if err := ls.handle(change); err != nil {
    64  		ls.err(w, "could not update log: "+err.Error(), http.StatusInternalServerError)
    65  		return
    66  	}
    67  	fmt.Fprint(w, "OK\n")
    68  }
    69  
    70  func (ls *logsetSrv) handle(req loglevelReq) error {
    71  	ls.mu.Lock()
    72  	defer ls.mu.Unlock()
    73  
    74  	var errs []error
    75  	for typ, change := range req.updates {
    76  		typ := typ // capture for closure
    77  
    78  		resetFn, err := change.Apply()
    79  		if err != nil {
    80  			errs = append(errs, err)
    81  		}
    82  
    83  		// reset logic is kept as simple as possible by always cancelling a waiting reset.
    84  		// The reset function is carried across in cases where the resetFn was not run.
    85  		if ongoingReset, exists := ls.resetTo[typ]; exists {
    86  			resetFn = ongoingReset.Clear()
    87  			delete(ls.resetTo, typ)
    88  		}
    89  
    90  		if !req.reset {
    91  			continue // nothing to do
    92  		}
    93  
    94  		cancel := make(chan struct{})
    95  		rt := &resetState{cancel: cancel, do: resetFn}
    96  		ls.resetTo[typ] = rt
    97  		ls.wg.Add(1) // waitgroup used for testing only
    98  		go func() {
    99  			defer ls.wg.Done()
   100  			timer := ls.timer(req.resetTimeout)
   101  			select {
   102  			case <-cancel:
   103  				if !timer.Stop() {
   104  					<-timer.C()
   105  				}
   106  				return
   107  			case <-timer.C():
   108  				ls.mu.Lock()
   109  				defer ls.mu.Unlock()
   110  
   111  				// we have to check cancel again here in case we got cancelled
   112  				// while waiting for lock
   113  				select {
   114  				case <-rt.cancel:
   115  					return
   116  				default:
   117  				}
   118  
   119  				resetFn()
   120  				delete(ls.resetTo, typ) // delete so resetFn is dropped
   121  			}
   122  		}()
   123  	}
   124  
   125  	return errors.Join(errs...)
   126  }
   127  
   128  type resetState struct {
   129  	cancel chan struct{}
   130  	do     func()
   131  }
   132  
   133  func (r *resetState) Clear() func() {
   134  	if r.cancel != nil {
   135  		close(r.cancel)
   136  	}
   137  	old := r.do
   138  	r.cancel = nil
   139  	r.do = nil
   140  	return old
   141  }
   142  
   143  // logUpdater applys a log verbosity change
   144  type logUpdater interface {
   145  	// Apply changes the verbosity to the configured value.
   146  	//
   147  	// Apply should return a reset function if error is nil. This should reset the verbosity
   148  	// to the value prior to a change.
   149  	Apply() (func(), error)
   150  }
   151  
   152  type glogUpdater struct {
   153  	v glog.Level
   154  }
   155  
   156  func (v glogUpdater) Apply() (func(), error) {
   157  	prev := glog.SetVGlobal(v.v)
   158  	return func() { glog.SetVGlobal(prev) }, nil
   159  }
   160  
   161  const glogV = "glog"
   162  const glogVModule = "glog-vmodule"
   163  
   164  type vModuleUpdater struct {
   165  	v string
   166  }
   167  
   168  func (v vModuleUpdater) Apply() (func(), error) {
   169  	prev, err := glog.SetVModule(v.v)
   170  	if err != nil {
   171  		return nil, err
   172  	}
   173  	return func() {
   174  		// nothing we can do if we error now, just log it
   175  		if _, err := glog.SetVModule(prev); err != nil {
   176  			glog.Errorf("loglevel: cannot reset VModule: %v", err)
   177  		}
   178  	}, nil
   179  }
   180  
   181  type loglevelReq struct {
   182  	reset        bool
   183  	resetTimeout time.Duration         // duration change should be active
   184  	updates      map[string]logUpdater // log type as a string -> updater to apply change
   185  }
   186  
   187  var vModuleRegexp = regexp.MustCompile(`^(.*[0-9]+,?)+$`)
   188  
   189  func parseLoglevelReq(r *http.Request) (loglevelReq, error) {
   190  	if r.Method != http.MethodPost {
   191  		return loglevelReq{}, errors.New("HTTP method must be POST")
   192  	}
   193  
   194  	if err := r.ParseForm(); err != nil {
   195  		return loglevelReq{}, err
   196  	}
   197  	opts := r.Form
   198  
   199  	ll := loglevelReq{updates: map[string]logUpdater{}}
   200  
   201  	if timeout := opts.Get("timeout"); timeout != "" {
   202  		w, err := time.ParseDuration(timeout)
   203  		if err != nil {
   204  			return loglevelReq{}, fmt.Errorf("could not parse timeout: %v", err)
   205  		}
   206  		if w < time.Second {
   207  			return loglevelReq{}, errors.New("timeout too small: valid between 1s-24h")
   208  		} else if w > (time.Hour * 24) {
   209  			return loglevelReq{}, errors.New("timeout too large: valid between 1s-24h")
   210  		}
   211  		ll.resetTimeout = w
   212  		ll.reset = true
   213  	}
   214  
   215  	// parse glog options
   216  	if setGlog := opts.Get(glogV); setGlog != "" {
   217  		v, err := strconv.Atoi(setGlog)
   218  		if err != nil {
   219  			return loglevelReq{}, fmt.Errorf("invalid glog argument: %v", err)
   220  		}
   221  		if v < 0 {
   222  			return loglevelReq{}, fmt.Errorf("invalid glog argument: %v", err)
   223  		}
   224  		ll.updates[glogV] = glogUpdater{v: glog.Level(v)}
   225  	}
   226  
   227  	// parse glog-vmodule options
   228  	if setVModule := opts.Get(glogVModule); setVModule != "" {
   229  		// would be nice to be able to parse ahead of time in glog
   230  		// for now just use a basic regex
   231  		if !vModuleRegexp.MatchString(setVModule) {
   232  			return loglevelReq{}, fmt.Errorf(
   233  				"invalid glog-vmodule argument: should match regex: %v", vModuleRegexp)
   234  		}
   235  		ll.updates[glogVModule] = vModuleUpdater{v: setVModule}
   236  	}
   237  
   238  	if len(ll.updates) == 0 {
   239  		return loglevelReq{}, errors.New("empty request")
   240  	}
   241  	return ll, nil
   242  }
   243  
   244  // newTimerFunc is an interface used to mock out time behavior for unit tests.
   245  //
   246  // this is preferred to adding a chunky time mock dependency to goarista.
   247  type newTimerFunc func(time.Duration) timer
   248  
   249  type timer interface {
   250  	C() <-chan time.Time
   251  	Stop() bool
   252  }
   253  
   254  type timerImpl struct {
   255  	*time.Timer
   256  }
   257  
   258  func (t timerImpl) C() <-chan time.Time {
   259  	return t.Timer.C
   260  }
   261  
   262  func realTimer(d time.Duration) timer {
   263  	return timerImpl{time.NewTimer(d)}
   264  }