github.com/keybase/client/go@v0.0.0-20241007131713-f10651d043c8/kbfs/libkbfs/impatient_debug_dumper.go (about) 1 package libkbfs 2 3 import ( 4 "bytes" 5 "compress/gzip" 6 "context" 7 "encoding/base64" 8 "fmt" 9 "runtime/pprof" 10 "sync" 11 "sync/atomic" 12 "time" 13 14 "github.com/keybase/client/go/logger" 15 "golang.org/x/time/rate" 16 ) 17 18 type ctxTimeTracker struct { 19 ctx context.Context 20 expiresAt time.Time 21 22 done int32 23 } 24 25 func (c *ctxTimeTracker) markDone() { 26 atomic.StoreInt32(&c.done, 1) 27 } 28 29 func (c *ctxTimeTracker) isDone() bool { 30 return atomic.LoadInt32(&c.done) == 1 31 } 32 33 func (c *ctxTimeTracker) isExpired(clock Clock) bool { 34 return clock.Now().After(c.expiresAt) 35 } 36 37 type ctxTimeTrackerListNode struct { 38 tracker *ctxTimeTracker 39 next *ctxTimeTrackerListNode 40 } 41 42 type ctxTimeTrackerList struct { 43 front *ctxTimeTrackerListNode 44 tail *ctxTimeTrackerListNode 45 } 46 47 func (c *ctxTimeTrackerList) peekFront() (t *ctxTimeTracker) { 48 if c.front == nil { 49 return nil 50 } 51 return c.front.tracker 52 } 53 54 func (c *ctxTimeTrackerList) popFront() (t *ctxTimeTracker) { 55 if c.front == nil { 56 return nil 57 } 58 t = c.front.tracker 59 c.front = c.front.next 60 if c.front == nil { 61 // last one! 62 c.tail = nil 63 } 64 return t 65 } 66 67 func (c *ctxTimeTrackerList) append(t *ctxTimeTracker) { 68 newNode := &ctxTimeTrackerListNode{ 69 tracker: t, 70 } 71 if c.tail != nil { 72 c.tail.next = newNode 73 } 74 c.tail = newNode 75 if c.front == nil { 76 c.front = newNode 77 } 78 } 79 80 // ImpatientDebugDumper dumps all running goroutines if an operation takes 81 // longer than a preset value. User of this type should call Begin() with a 82 // context associated with an operation, and call the returned function when 83 // the operation is done. If the operation finishes within the preset duration, 84 // nothing is dumped into log. Despite being impatient, it tries not to pollute 85 // the log too much by rate limit goroutine dumping based on 86 // impatientDebugDumperDumpMinInterval (at most 1 per minute). 87 type ImpatientDebugDumper struct { 88 config Config 89 log logger.Logger 90 dumpIn time.Duration 91 92 ticker *time.Ticker 93 limiter *rate.Limiter 94 shutdownFunc func() 95 shutdownDoneCh chan struct{} 96 97 lock sync.Mutex 98 chronologicalTimeTrackerList *ctxTimeTrackerList 99 } 100 101 const impatientDebugDumperCheckInterval = time.Second 102 const impatientDebugDumperDumpMinInterval = time.Minute // 1 dump per min max 103 104 // NewImpatientDebugDumper creates a new *ImpatientDebugDumper, which logs with 105 // a logger made by config.MakeLogger("IGD"), and dumps goroutines if an 106 // operation takes longer than dumpIn. 107 func NewImpatientDebugDumper(config Config, dumpIn time.Duration) *ImpatientDebugDumper { 108 ctx, cancel := context.WithCancel(context.Background()) 109 d := &ImpatientDebugDumper{ 110 config: config, 111 log: config.MakeLogger("IGD"), 112 dumpIn: dumpIn, 113 ticker: time.NewTicker(impatientDebugDumperCheckInterval), 114 limiter: rate.NewLimiter( 115 rate.Every(impatientDebugDumperDumpMinInterval), 1), 116 shutdownFunc: cancel, 117 shutdownDoneCh: make(chan struct{}), 118 chronologicalTimeTrackerList: &ctxTimeTrackerList{}, 119 } 120 go d.dumpLoop(ctx.Done()) 121 return d 122 } 123 124 // NewImpatientDebugDumperForForcedDumps creates a new 125 // *ImpatientDebugDumper, just as above, though without launching any 126 // background goroutines or allowing the user to begin any 127 // time-tracked tasks. `ForceDump` is the only way to obtain a dump 128 // from a dumper constructed with this function. 129 func NewImpatientDebugDumperForForcedDumps(config Config) *ImpatientDebugDumper { 130 return &ImpatientDebugDumper{ 131 config: config, 132 log: config.MakeLogger("IGD"), 133 limiter: rate.NewLimiter( 134 rate.Every(impatientDebugDumperDumpMinInterval), 1), 135 } 136 } 137 138 func (d *ImpatientDebugDumper) dump(ctx context.Context) { 139 if !d.limiter.Allow() { 140 // Use a limiter to avoid dumping too much into log accidently. 141 return 142 } 143 buf := &bytes.Buffer{} 144 base64er := base64.NewEncoder(base64.StdEncoding, buf) 145 gzipper := gzip.NewWriter(base64er) 146 for _, p := range pprof.Profiles() { 147 fmt.Fprintf(gzipper, 148 "\n======== START Profile: %s ========\n\n", p.Name()) 149 _ = p.WriteTo(gzipper, 2) 150 fmt.Fprintf(gzipper, 151 "\n======== END Profile: %s ========\n\n", p.Name()) 152 } 153 gzipper.Close() 154 base64er.Close() 155 d.log.CDebugf(ctx, 156 "Operation exceeded max wait time. dump>gzip>base64: %q "+ 157 "Pipe the quoted content into ` | base64 -d | gzip -d ` "+ 158 "to read as a Homosapien.", buf.String()) 159 } 160 161 // ForceDump dumps all debug info to the log, if it hasn't done so 162 // recently according to the rate-limiter. 163 func (d *ImpatientDebugDumper) ForceDump(ctx context.Context) { 164 d.dump(ctx) 165 } 166 167 func (d *ImpatientDebugDumper) dumpTick() { 168 d.lock.Lock() 169 defer d.lock.Unlock() 170 for { 171 // In each iteration we deal with the front of list: 172 // 1) If list is empty, we just return and wait for the next tick; 173 // 2) If front is done, pop front from the list, and continue into 174 // next iteration to check next one; 175 // 3) If front is not done but expired, dump routines (if rate limiter 176 // permits), pop front from the list, and continue into next 177 // iteration to check next one; 178 // 4) If front is not done yet nor expired, just return and wait for 179 // next tick when we check again. 180 // 181 // Since we either move on or return, there's no risk of infinite 182 // looping here. 183 t := d.chronologicalTimeTrackerList.peekFront() 184 if t == nil { 185 return 186 } 187 if t.isDone() { 188 // This operation is done, so just move on. 189 d.chronologicalTimeTrackerList.popFront() 190 continue 191 } 192 if t.isExpired(d.config.Clock()) { 193 // This operation isn't done, and it has expired. So dump debug 194 // information and move on. 195 d.dump(t.ctx) 196 d.chronologicalTimeTrackerList.popFront() 197 continue 198 } 199 // This operation isn't done yet, but it also hasn't expired. So 200 // just return and wait for next tick and check again. 201 return 202 } 203 } 204 205 func (d *ImpatientDebugDumper) dumpLoop(shutdownCh <-chan struct{}) { 206 defer close(d.shutdownDoneCh) 207 for { 208 select { 209 case <-d.ticker.C: 210 d.dumpTick() 211 case <-shutdownCh: 212 d.ticker.Stop() 213 d.log.Debug("shutdown") 214 return 215 } 216 } 217 } 218 219 // Begin causes d to start tracking time for ctx. The returned function should 220 // be called once the associated operation is done, likely in a defer 221 // statement. 222 func (d *ImpatientDebugDumper) Begin(ctx context.Context) (done func()) { 223 if d.chronologicalTimeTrackerList == nil { 224 return nil 225 } 226 tracker := &ctxTimeTracker{ 227 ctx: ctx, 228 expiresAt: d.config.Clock().Now().Add(d.dumpIn), 229 } 230 d.lock.Lock() 231 defer d.lock.Unlock() 232 d.chronologicalTimeTrackerList.append(tracker) 233 return tracker.markDone 234 } 235 236 // Shutdown shuts down d idempotently. 237 func (d *ImpatientDebugDumper) Shutdown() { 238 if d.shutdownFunc != nil { 239 d.shutdownFunc() 240 <-d.shutdownDoneCh 241 } 242 }