github.com/aristanetworks/goarista@v0.0.0-20240514173732-cca2755bbd44/monitor/loglevel.go (about) 1 // Copyright (c) 2022 Arista Networks, Inc. 2 // Use of this source code is governed by the Apache License 2.0 3 // that can be found in the COPYING file. 4 5 package monitor 6 7 import ( 8 "errors" 9 "fmt" 10 "net/http" 11 "regexp" 12 "strconv" 13 "sync" 14 "time" 15 16 "github.com/aristanetworks/glog" 17 ) 18 19 type logsetSrv struct { 20 mu sync.Mutex 21 resetTo map[string]*resetState // ongoing resets 22 timer newTimerFunc // dependency injencted timer to avoid time.Sleep in tests 23 wg sync.WaitGroup // used during testing to ensure we're not waiting 24 } 25 26 func newLogsetSrv() *logsetSrv { 27 return &logsetSrv{timer: realTimer, resetTo: map[string]*resetState{}} 28 } 29 30 func (ls *logsetSrv) err(w http.ResponseWriter, err string, code int) { 31 err = fmt.Sprintf("loglevel error: %v (code %v)", err, code) 32 glog.Error(err) 33 http.Error(w, err, code) 34 } 35 36 // ServeHTTP handles a /debug/loglevel request. 37 // 38 // It parses options from a HTTP form or from URL params. 39 // 40 // The following verbositys can be set: 41 // - glog: set "github.com/aristanetworks/glog" verbosity. 42 // 43 // The following options control log resetting: 44 // 45 // - timeout: A duration (e.g. "1m") for which the log should remain set at the verbosity 46 // passed in. it's safe to send multiple: if you send another request with a timeout, 47 // the ongoing timeout will be cancelled but the value will be reset to the original 48 // value detected by this endpoint. 49 // 50 // Here's a detailed example of timeout behavior with overlapping timeouts: 51 // - User wants to increase verbosity to find bug. Lets assume it starts at 0. 52 // - They call /debug/loglevel?glog=1&timeout=10m 53 // - User decides this glog verbosity is not enough, so decides to increase to 10. 54 // - They call /debug/loglevel?glog=10&timeout=5m 55 // - 5 minutes later, the loglevel will be set back to 0. 56 // - No further changes to verbosity occur. 57 func (ls *logsetSrv) ServeHTTP(w http.ResponseWriter, r *http.Request) { 58 change, err := parseLoglevelReq(r) 59 if err != nil { 60 ls.err(w, "could not parse form: "+err.Error(), http.StatusBadRequest) 61 return 62 } 63 if err := ls.handle(change); err != nil { 64 ls.err(w, "could not update log: "+err.Error(), http.StatusInternalServerError) 65 return 66 } 67 fmt.Fprint(w, "OK\n") 68 } 69 70 func (ls *logsetSrv) handle(req loglevelReq) error { 71 ls.mu.Lock() 72 defer ls.mu.Unlock() 73 74 var errs []error 75 for typ, change := range req.updates { 76 typ := typ // capture for closure 77 78 resetFn, err := change.Apply() 79 if err != nil { 80 errs = append(errs, err) 81 } 82 83 // reset logic is kept as simple as possible by always cancelling a waiting reset. 84 // The reset function is carried across in cases where the resetFn was not run. 85 if ongoingReset, exists := ls.resetTo[typ]; exists { 86 resetFn = ongoingReset.Clear() 87 delete(ls.resetTo, typ) 88 } 89 90 if !req.reset { 91 continue // nothing to do 92 } 93 94 cancel := make(chan struct{}) 95 rt := &resetState{cancel: cancel, do: resetFn} 96 ls.resetTo[typ] = rt 97 ls.wg.Add(1) // waitgroup used for testing only 98 go func() { 99 defer ls.wg.Done() 100 timer := ls.timer(req.resetTimeout) 101 select { 102 case <-cancel: 103 if !timer.Stop() { 104 <-timer.C() 105 } 106 return 107 case <-timer.C(): 108 ls.mu.Lock() 109 defer ls.mu.Unlock() 110 111 // we have to check cancel again here in case we got cancelled 112 // while waiting for lock 113 select { 114 case <-rt.cancel: 115 return 116 default: 117 } 118 119 resetFn() 120 delete(ls.resetTo, typ) // delete so resetFn is dropped 121 } 122 }() 123 } 124 125 return errors.Join(errs...) 126 } 127 128 type resetState struct { 129 cancel chan struct{} 130 do func() 131 } 132 133 func (r *resetState) Clear() func() { 134 if r.cancel != nil { 135 close(r.cancel) 136 } 137 old := r.do 138 r.cancel = nil 139 r.do = nil 140 return old 141 } 142 143 // logUpdater applys a log verbosity change 144 type logUpdater interface { 145 // Apply changes the verbosity to the configured value. 146 // 147 // Apply should return a reset function if error is nil. This should reset the verbosity 148 // to the value prior to a change. 149 Apply() (func(), error) 150 } 151 152 type glogUpdater struct { 153 v glog.Level 154 } 155 156 func (v glogUpdater) Apply() (func(), error) { 157 prev := glog.SetVGlobal(v.v) 158 return func() { glog.SetVGlobal(prev) }, nil 159 } 160 161 const glogV = "glog" 162 const glogVModule = "glog-vmodule" 163 164 type vModuleUpdater struct { 165 v string 166 } 167 168 func (v vModuleUpdater) Apply() (func(), error) { 169 prev, err := glog.SetVModule(v.v) 170 if err != nil { 171 return nil, err 172 } 173 return func() { 174 // nothing we can do if we error now, just log it 175 if _, err := glog.SetVModule(prev); err != nil { 176 glog.Errorf("loglevel: cannot reset VModule: %v", err) 177 } 178 }, nil 179 } 180 181 type loglevelReq struct { 182 reset bool 183 resetTimeout time.Duration // duration change should be active 184 updates map[string]logUpdater // log type as a string -> updater to apply change 185 } 186 187 var vModuleRegexp = regexp.MustCompile(`^(.*[0-9]+,?)+$`) 188 189 func parseLoglevelReq(r *http.Request) (loglevelReq, error) { 190 if r.Method != http.MethodPost { 191 return loglevelReq{}, errors.New("HTTP method must be POST") 192 } 193 194 if err := r.ParseForm(); err != nil { 195 return loglevelReq{}, err 196 } 197 opts := r.Form 198 199 ll := loglevelReq{updates: map[string]logUpdater{}} 200 201 if timeout := opts.Get("timeout"); timeout != "" { 202 w, err := time.ParseDuration(timeout) 203 if err != nil { 204 return loglevelReq{}, fmt.Errorf("could not parse timeout: %v", err) 205 } 206 if w < time.Second { 207 return loglevelReq{}, errors.New("timeout too small: valid between 1s-24h") 208 } else if w > (time.Hour * 24) { 209 return loglevelReq{}, errors.New("timeout too large: valid between 1s-24h") 210 } 211 ll.resetTimeout = w 212 ll.reset = true 213 } 214 215 // parse glog options 216 if setGlog := opts.Get(glogV); setGlog != "" { 217 v, err := strconv.Atoi(setGlog) 218 if err != nil { 219 return loglevelReq{}, fmt.Errorf("invalid glog argument: %v", err) 220 } 221 if v < 0 { 222 return loglevelReq{}, fmt.Errorf("invalid glog argument: %v", err) 223 } 224 ll.updates[glogV] = glogUpdater{v: glog.Level(v)} 225 } 226 227 // parse glog-vmodule options 228 if setVModule := opts.Get(glogVModule); setVModule != "" { 229 // would be nice to be able to parse ahead of time in glog 230 // for now just use a basic regex 231 if !vModuleRegexp.MatchString(setVModule) { 232 return loglevelReq{}, fmt.Errorf( 233 "invalid glog-vmodule argument: should match regex: %v", vModuleRegexp) 234 } 235 ll.updates[glogVModule] = vModuleUpdater{v: setVModule} 236 } 237 238 if len(ll.updates) == 0 { 239 return loglevelReq{}, errors.New("empty request") 240 } 241 return ll, nil 242 } 243 244 // newTimerFunc is an interface used to mock out time behavior for unit tests. 245 // 246 // this is preferred to adding a chunky time mock dependency to goarista. 247 type newTimerFunc func(time.Duration) timer 248 249 type timer interface { 250 C() <-chan time.Time 251 Stop() bool 252 } 253 254 type timerImpl struct { 255 *time.Timer 256 } 257 258 func (t timerImpl) C() <-chan time.Time { 259 return t.Timer.C 260 } 261 262 func realTimer(d time.Duration) timer { 263 return timerImpl{time.NewTimer(d)} 264 }