github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/module/profiler/profiler.go (about) 1 package profiler 2 3 import ( 4 "bytes" 5 "context" 6 "errors" 7 "fmt" 8 "io" 9 "math/rand" 10 "os" 11 "path/filepath" 12 "runtime" 13 "runtime/pprof" 14 "time" 15 16 "github.com/google/pprof/profile" 17 "github.com/rs/zerolog" 18 "go.uber.org/atomic" 19 "go.uber.org/multierr" 20 pb "google.golang.org/genproto/googleapis/devtools/cloudprofiler/v2" 21 22 "github.com/onflow/flow-go/engine" 23 ) 24 25 type timedProfileFunc func(io.Writer, time.Duration) error 26 type profileDef struct { 27 profileName string 28 profileType pb.ProfileType 29 profileFunc timedProfileFunc 30 } 31 32 // ProfilerConfig profiler parameters. 33 type ProfilerConfig struct { 34 Enabled bool 35 UploaderEnabled bool 36 37 Dir string 38 Interval time.Duration 39 Duration time.Duration 40 } 41 42 type AutoProfiler struct { 43 unit *engine.Unit 44 dir string 45 log zerolog.Logger 46 interval time.Duration 47 duration time.Duration 48 49 uploader Uploader 50 enabled *atomic.Bool 51 52 // used to trigger a profile run for a given duration 53 trigger chan time.Duration 54 } 55 56 // New creates a new AutoProfiler instance performing profiling every interval for duration. 57 func New(log zerolog.Logger, uploader Uploader, cfg ProfilerConfig) (*AutoProfiler, error) { 58 59 err := os.MkdirAll(cfg.Dir, os.ModePerm) 60 if err != nil { 61 return nil, fmt.Errorf("could not create profile dir %v: %w", cfg.Dir, err) 62 } 63 64 // add 50% jitter to the interval 65 jitter := time.Duration(rand.Int63n(int64(cfg.Interval))) 66 interval := cfg.Interval/2 + jitter 67 68 p := &AutoProfiler{ 69 unit: engine.NewUnit(), 70 log: log.With().Str("component", "profiler").Logger(), 71 dir: cfg.Dir, 72 interval: interval, 73 duration: cfg.Duration, 74 uploader: uploader, 75 enabled: atomic.NewBool(cfg.Enabled), 76 trigger: make(chan time.Duration), 77 } 78 79 go p.runForever() 80 81 return p, nil 82 } 83 84 // SetEnabled sets whether the profiler is active. 85 // No errors are expected during normal operation. 86 func (p *AutoProfiler) SetEnabled(enabled bool) error { 87 p.enabled.Store(enabled) 88 return nil 89 } 90 91 // Enabled returns the current enabled state of the profiler. 92 func (p *AutoProfiler) Enabled() bool { 93 return p.enabled.Load() 94 } 95 96 // TriggerRun manually triggers a profile run if one is not already running. 97 func (p *AutoProfiler) TriggerRun(d time.Duration) error { 98 select { 99 case p.trigger <- d: 100 return nil 101 default: 102 return errors.New("profiling is already in progress") 103 } 104 } 105 106 func (p *AutoProfiler) runForever() { 107 t := time.NewTicker(p.interval) 108 defer t.Stop() 109 110 for { 111 select { 112 case <-t.C: 113 if p.Enabled() { 114 p.runOnce(p.duration) 115 } 116 case d := <-p.trigger: 117 p.runOnce(d) 118 case <-p.unit.Quit(): 119 return 120 } 121 } 122 } 123 124 func (p *AutoProfiler) Ready() <-chan struct{} { 125 if p.Enabled() { 126 p.log.Info().Dur("duration", p.duration).Time("nextRunAt", time.Now().Add(p.interval)).Msg("AutoProfiler has started") 127 } else { 128 p.log.Info().Msg("AutoProfiler has started, profiler is disabled") 129 } 130 return p.unit.Ready() 131 } 132 133 func (p *AutoProfiler) Done() <-chan struct{} { 134 return p.unit.Done() 135 } 136 137 func (p *AutoProfiler) runOnce(d time.Duration) { 138 startTime := time.Now() 139 p.log.Info().Msg("starting profile trace") 140 141 for _, prof := range [...]profileDef{ 142 {profileName: "goroutine", profileType: pb.ProfileType_THREADS, profileFunc: func(w io.Writer, _ time.Duration) error { return newProfileFunc("goroutine")(w) }}, 143 {profileName: "heap", profileType: pb.ProfileType_HEAP, profileFunc: p.pprofHeap}, 144 {profileName: "allocs", profileType: pb.ProfileType_HEAP_ALLOC, profileFunc: p.pprofAllocs}, 145 {profileName: "block", profileType: pb.ProfileType_CONTENTION, profileFunc: p.pprofBlock}, 146 {profileName: "cpu", profileType: pb.ProfileType_WALL, profileFunc: p.pprofCpu}, 147 } { 148 path := filepath.Join(p.dir, fmt.Sprintf("%s-%s", prof.profileName, time.Now().Format(time.RFC3339))) 149 150 logger := p.log.With().Str("profileName", prof.profileName).Str("profilePath", path).Logger() 151 logger.Info().Str("file", path).Msg("capturing") 152 153 f, err := os.CreateTemp(p.dir, "profile") 154 if err != nil { 155 logger.Err(err).Msg("failed to create temp profile") 156 continue 157 } 158 logger = logger.With().Str("tempFile", f.Name()).Logger() 159 160 // Remove temp file if it still exists. 161 defer func(logger zerolog.Logger, tmpName string) { 162 if _, err := os.Stat(tmpName); errors.Is(err, os.ErrNotExist) { 163 return 164 } 165 if err := os.Remove(tmpName); err != nil { 166 logger.Warn().Err(err).Msg("failed to remove profile") 167 } 168 }(logger, f.Name()) 169 170 err = p.pprof(f, prof.profileFunc, d) 171 if err != nil { 172 logger.Error().Err(err).Msg("failed to generate profile") 173 continue 174 } 175 176 // default CreateTemp permissions are 0600. 177 err = os.Chmod(f.Name(), 0644) 178 if err != nil { 179 logger.Error().Err(err).Msg("failed to set profile permissions") 180 continue 181 } 182 183 err = os.Rename(f.Name(), path) 184 if err != nil { 185 logger.Error().Err(err).Msg("failed to rename profile") 186 continue 187 } 188 189 ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) 190 defer cancel() 191 192 err = p.uploader.Upload(ctx, path, prof.profileType) 193 if err != nil { 194 logger.Warn().Err(err).Msg("failed to upload profile") 195 continue 196 } 197 } 198 p.log.Info().Dur("duration", time.Since(startTime)).Msg("finished profile trace") 199 } 200 201 func (p *AutoProfiler) pprof(f *os.File, fn timedProfileFunc, d time.Duration) (err error) { 202 defer func() { 203 multierr.AppendInto(&err, f.Close()) 204 }() 205 206 return fn(f, d) 207 } 208 209 type profileFunc func(io.Writer) error 210 211 func newProfileFunc(name string) profileFunc { 212 return func(w io.Writer) error { 213 return pprof.Lookup(name).WriteTo(w, 0) 214 } 215 } 216 217 func (p *AutoProfiler) goHeapProfile(sampleTypes ...string) (*profile.Profile, error) { 218 if len(sampleTypes) == 0 { 219 return nil, fmt.Errorf("no sample types specified") 220 } 221 222 // Forces the GC before taking each of the heap profiles and improves the profile accuracy. 223 // Autoprofiler runs very infrequently so performance impact is minimal. 224 runtime.GC() 225 226 buf := &bytes.Buffer{} 227 err := newProfileFunc("heap")(buf) 228 if err != nil { 229 return nil, fmt.Errorf("failed to generate allocs profile: %w", err) 230 } 231 232 prof, err := profile.Parse(buf) 233 if err != nil { 234 return nil, fmt.Errorf("failed to parse allocs profile: %w", err) 235 } 236 prof.TimeNanos = time.Now().UnixNano() 237 238 selectedSampleTypes := make([]int, 0, len(sampleTypes)) 239 for _, name := range sampleTypes { 240 for i, sampleType := range prof.SampleType { 241 if sampleType.Type == name { 242 selectedSampleTypes = append(selectedSampleTypes, i) 243 } 244 } 245 } 246 if len(selectedSampleTypes) != len(sampleTypes) { 247 return nil, fmt.Errorf("failed to find all sample types: want: %+v, got: %+v", sampleTypes, prof.SampleType) 248 } 249 250 newSampleType := make([]*profile.ValueType, 0, len(selectedSampleTypes)) 251 for _, j := range selectedSampleTypes { 252 newSampleType = append(newSampleType, prof.SampleType[j]) 253 } 254 prof.SampleType = newSampleType 255 prof.DefaultSampleType = prof.SampleType[0].Type 256 257 for _, s := range prof.Sample { 258 newValue := make([]int64, 0, len(selectedSampleTypes)) 259 for _, j := range selectedSampleTypes { 260 newValue = append(newValue, s.Value[j]) 261 } 262 s.Value = newValue 263 } 264 265 // Merge profile with itself to remove empty samples. 266 prof, err = profile.Merge([]*profile.Profile{prof}) 267 if err != nil { 268 return nil, fmt.Errorf("failed to merge profile: %w", err) 269 } 270 271 return prof, nil 272 } 273 274 // pprofHeap produces cumulative heap profile since the program start. 275 func (p *AutoProfiler) pprofHeap(w io.Writer, _ time.Duration) error { 276 prof, err := p.goHeapProfile("inuse_objects", "inuse_space") 277 if err != nil { 278 return fmt.Errorf("failed to get heap profile: %w", err) 279 } 280 281 return prof.Write(w) 282 } 283 284 // pprofAllocs produces differential allocs profile for the given duration. 285 func (p *AutoProfiler) pprofAllocs(w io.Writer, d time.Duration) (err error) { 286 p1, err := p.goHeapProfile("alloc_objects", "alloc_space") 287 if err != nil { 288 return fmt.Errorf("failed to get allocs profile: %w", err) 289 } 290 291 select { 292 case <-time.After(d): 293 case <-p.unit.Quit(): 294 return context.Canceled 295 } 296 297 p2, err := p.goHeapProfile("alloc_objects", "alloc_space") 298 if err != nil { 299 return fmt.Errorf("failed to get allocs profile: %w", err) 300 } 301 302 // multiply values by -1 and merge to get differential profile. 303 p1.Scale(-1) 304 diff, err := profile.Merge([]*profile.Profile{p1, p2}) 305 if err != nil { 306 return fmt.Errorf("failed to merge allocs profiles: %w", err) 307 } 308 diff.TimeNanos = time.Now().UnixNano() 309 diff.DurationNanos = d.Nanoseconds() 310 311 return diff.Write(w) 312 } 313 314 func (p *AutoProfiler) pprofBlock(w io.Writer, d time.Duration) error { 315 runtime.SetBlockProfileRate(100) 316 defer runtime.SetBlockProfileRate(0) 317 318 select { 319 case <-time.After(d): 320 return newProfileFunc("block")(w) 321 case <-p.unit.Quit(): 322 return context.Canceled 323 } 324 } 325 326 func (p *AutoProfiler) pprofCpu(w io.Writer, d time.Duration) error { 327 err := pprof.StartCPUProfile(w) 328 if err != nil { 329 return fmt.Errorf("failed to start CPU profile: %w", err) 330 } 331 defer pprof.StopCPUProfile() 332 333 select { 334 case <-time.After(d): 335 return nil 336 case <-p.unit.Quit(): 337 return context.Canceled 338 } 339 }