github.com/pyroscope-io/pyroscope@v0.37.3-0.20230725203016-5f6947968bd0/pkg/agent/session.go (about) 1 package agent 2 3 import ( 4 "fmt" 5 logger2 "github.com/pyroscope-io/pyroscope/pkg/agent/log" 6 "github.com/pyroscope-io/pyroscope/pkg/util/alignedticker" 7 "os" 8 "sync" 9 "time" 10 11 // revive:disable:blank-imports Depending on configuration these packages may or may not be used. 12 // That's why we do a blank import here and then packages themselves register with the rest of the code. 13 14 _ "github.com/pyroscope-io/pyroscope/pkg/agent/debugspy" 15 _ "github.com/pyroscope-io/pyroscope/pkg/agent/dotnetspy" 16 _ "github.com/pyroscope-io/pyroscope/pkg/agent/ebpfspy" 17 _ "github.com/pyroscope-io/pyroscope/pkg/agent/gospy" 18 _ "github.com/pyroscope-io/pyroscope/pkg/agent/phpspy" 19 "github.com/pyroscope-io/pyroscope/pkg/agent/upstream" 20 "github.com/pyroscope-io/pyroscope/pkg/flameql" 21 "github.com/pyroscope-io/pyroscope/pkg/storage/segment" 22 "github.com/pyroscope-io/pyroscope/pkg/util/process" 23 "github.com/pyroscope-io/pyroscope/pkg/util/throttle" 24 25 // revive:enable:blank-imports 26 27 "github.com/pyroscope-io/pyroscope/pkg/agent/spy" 28 "github.com/pyroscope-io/pyroscope/pkg/structs/transporttrie" 29 ) 30 31 // Each Session can deal with: 32 // * multiple processes (one main process and zero or more subprocesses) 33 // * multiple profile types (cpu, mem, etc) 34 // * multiple names (app.cpu{} or app.cpu{controller=foo}) (one at a time) 35 36 /* 37 PROCESSES 38 ┌─────┬─────┬─────┐ 39 │pid 1│pid 2│pid 3│ 40 └──┬──┴──┬──┴──┬──┘ 41 │ │ │ NAMES/TAGS 42 │ │ │ ┌─app.cpu{} 43 0 ▼ 1 ▼ 2 ▼ │ ┌─app.cpu{controller=bar} 44 ┌─────┬─────┬─────┐ ┌─────┬─────┐ ┌──────┐ 45 0 cpu │ │ │ │ ───► │ │ │ ──► │ │ 46 └─────┴─────┴─────┘ └─────┴─────┘ │ │ 47 PROFILE TYPES SPIES TRIES ──► │server│ 48 ┌─────┬─────┬─────┐ ┌─────┬─────┐ │ │ 49 1 mem │ │ │ │ ───► │ │ │ ──► │ │ 50 └─────┴─────┴─────┘ └─────┴─────┘ └──────┘ 51 */ 52 // type process struct { 53 // pid int 54 // spies []*spy.Spy 55 // errorThrottler *throttle.Throttler 56 // } 57 58 const errorThrottlerPeriod = 10 * time.Second 59 60 type ProfileSession struct { 61 // configuration, doesn't change 62 upstream upstream.Upstream 63 spyName string 64 sampleRate uint32 65 profileTypes []spy.ProfileType 66 uploadRate time.Duration 67 disableGCRuns bool 68 withSubprocesses bool 69 clibIntegration bool 70 spyFactory SpyFactory 71 noForkDetection bool 72 pid int 73 74 logger logger2.Logger 75 throttler *throttle.Throttler 76 stopOnce sync.Once 77 stopCh chan struct{} 78 trieMutex sync.Mutex 79 80 // these things do change: 81 appName string 82 startTimeTruncated time.Time 83 84 // these slices / maps keep track of processes, spies, and tries 85 // see comment about multiple dimensions above 86 spies map[int][]spy.Spy // pid, profileType 87 // string is appName, int is index in pids 88 previousTries map[string][]*transporttrie.Trie 89 tries map[string][]*transporttrie.Trie 90 } 91 92 type SpyFactory func(pid int) ([]spy.Spy, error) 93 94 type SessionConfig struct { 95 upstream.Upstream 96 logger2.Logger 97 AppName string 98 Tags map[string]string 99 ProfilingTypes []spy.ProfileType 100 DisableGCRuns bool 101 SpyName string 102 SampleRate uint32 103 UploadRate time.Duration 104 Pid int 105 WithSubprocesses bool 106 ClibIntegration bool 107 PHPSpyArgs string 108 } 109 110 func NewSession(c SessionConfig) (*ProfileSession, error) { 111 return NewSessionWithSpyFactory(c, NewGenericSpyFactory(c)) 112 } 113 114 func NewSessionWithSpyFactory(c SessionConfig, spyFactory SpyFactory) (*ProfileSession, error) { 115 appName, err := mergeTagsWithAppName(c.AppName, c.Tags) 116 if err != nil { 117 return nil, err 118 } 119 120 ps := &ProfileSession{ 121 upstream: c.Upstream, 122 appName: appName, 123 spyName: c.SpyName, 124 profileTypes: c.ProfilingTypes, 125 disableGCRuns: c.DisableGCRuns, 126 sampleRate: c.SampleRate, 127 uploadRate: c.UploadRate, 128 pid: c.Pid, 129 spies: make(map[int][]spy.Spy), 130 stopCh: make(chan struct{}), 131 withSubprocesses: c.WithSubprocesses, 132 clibIntegration: c.ClibIntegration, 133 logger: c.Logger, 134 throttler: throttle.New(errorThrottlerPeriod), 135 spyFactory: spyFactory, 136 137 // string is appName, int is index in pids 138 previousTries: make(map[string][]*transporttrie.Trie), 139 tries: make(map[string][]*transporttrie.Trie), 140 } 141 142 ps.initializeTries(ps.appName) 143 144 return ps, nil 145 } 146 147 func NewGenericSpyFactory(c SessionConfig) SpyFactory { 148 return func(pid int) ([]spy.Spy, error) { 149 var res []spy.Spy 150 151 sf, err := spy.StartFunc(c.SpyName) 152 if err != nil { 153 return res, err 154 } 155 156 for _, pt := range c.ProfilingTypes { 157 params := spy.InitParams{ 158 Pid: pid, 159 ProfileType: pt, 160 SampleRate: c.SampleRate, 161 DisableGCRuns: c.DisableGCRuns, 162 Logger: c.Logger, 163 PHPSpyArgs: c.PHPSpyArgs, 164 } 165 s, err := sf(params) 166 167 if err != nil { 168 return res, err 169 } 170 res = append(res, s) 171 } 172 return res, nil 173 } 174 } 175 176 func addSuffix(name string, ptype spy.ProfileType) (string, error) { 177 k, err := segment.ParseKey(name) 178 if err != nil { 179 return "", err 180 } 181 k.Add("__name__", k.AppName()+"."+string(ptype)) 182 return k.Normalized(), nil 183 } 184 185 // mergeTagsWithAppName validates user input and merges explicitly specified 186 // tags with tags from app name. 187 // 188 // App name may be in the full form including tags (app.name{foo=bar,baz=qux}). 189 // Returned application name is always short, any tags that were included are 190 // moved to tags map. When merged with explicitly provided tags (config/CLI), 191 // last take precedence. 192 // 193 // App name may be an empty string. Tags must not contain reserved keys, 194 // the map is modified in place. 195 func mergeTagsWithAppName(appName string, tags map[string]string) (string, error) { 196 k, err := segment.ParseKey(appName) 197 if err != nil { 198 return "", err 199 } 200 for tagKey, tagValue := range tags { 201 if flameql.IsTagKeyReserved(tagKey) { 202 continue 203 } 204 if err = flameql.ValidateTagKey(tagKey); err != nil { 205 return "", err 206 } 207 k.Add(tagKey, tagValue) 208 } 209 return k.Normalized(), nil 210 } 211 212 func (ps *ProfileSession) takeSnapshots() { 213 var samplingCh <-chan time.Time 214 if ps.areSpiesResettable() { 215 samplingCh = make(chan time.Time) // will never fire 216 } else { 217 ticker := time.NewTicker(time.Second / time.Duration(ps.sampleRate)) 218 defer ticker.Stop() 219 samplingCh = ticker.C 220 } 221 uploadTicker := alignedticker.NewAlignedTicker(ps.uploadRate) 222 defer uploadTicker.Stop() 223 for { 224 select { 225 case endTimeTruncated := <-uploadTicker.C: 226 ps.resetSpies() 227 ps.takeSnapshot() 228 ps.reset(endTimeTruncated) 229 case <-samplingCh: 230 ps.takeSnapshot() 231 case <-ps.stopCh: 232 ps.stopSpies() 233 return 234 } 235 } 236 } 237 238 func (ps *ProfileSession) stopSpies() { 239 for _, sarr := range ps.spies { 240 for _, s := range sarr { 241 s.Stop() 242 } 243 } 244 } 245 246 func (ps *ProfileSession) takeSnapshot() { 247 ps.trieMutex.Lock() 248 defer ps.trieMutex.Unlock() 249 250 pidsToRemove := []int{} 251 for pid, sarr := range ps.spies { 252 for i, s := range sarr { 253 labelsCache := map[string]string{} 254 err := s.Snapshot(func(labels *spy.Labels, stack []byte, v uint64) error { 255 appName := ps.appName 256 if labels != nil { 257 if newAppName, ok := labelsCache[labels.ID()]; ok { 258 appName = newAppName 259 } else { 260 newAppName, err := mergeTagsWithAppName(appName, labels.Tags()) 261 if err != nil { 262 return fmt.Errorf("error setting tags: %w", err) 263 } 264 appName = newAppName 265 labelsCache[labels.ID()] = appName 266 } 267 } 268 if len(stack) > 0 { 269 if _, ok := ps.tries[appName]; !ok { 270 ps.initializeTries(appName) 271 } 272 ps.tries[appName][i].Insert(stack, v, true) 273 } 274 return nil 275 }) 276 if err != nil { 277 if pid >= 0 && !process.Exists(pid) { 278 ps.logger.Debugf("error taking snapshot: PID %d: process doesn't exist?", pid) 279 pidsToRemove = append(pidsToRemove, pid) 280 } else { 281 ps.throttler.Run(func(skipped int) { 282 if skipped > 0 { 283 ps.logger.Errorf("error taking snapshot: %v, %d messages skipped due to throttling", err, skipped) 284 } else { 285 ps.logger.Errorf("error taking snapshot: %v", err) 286 } 287 }) 288 } 289 } 290 } 291 } 292 for _, pid := range pidsToRemove { 293 for _, s := range ps.spies[pid] { 294 s.Stop() 295 } 296 delete(ps.spies, pid) 297 } 298 } 299 300 func (ps *ProfileSession) areSpiesResettable() bool { 301 for _, sarr := range ps.spies { 302 for _, s := range sarr { 303 if _, ok := s.(spy.Resettable); ok { 304 return true 305 } 306 } 307 } 308 return false 309 } 310 311 func (ps *ProfileSession) resetSpies() { 312 for _, sarr := range ps.spies { 313 for _, s := range sarr { 314 if sr, ok := s.(spy.Resettable); ok { 315 sr.Reset() 316 } 317 } 318 } 319 } 320 321 func (ps *ProfileSession) initializeSpies(pid int) ([]spy.Spy, error) { 322 return ps.spyFactory(pid) 323 } 324 325 func (ps *ProfileSession) ChangeName(newName string) error { 326 ps.trieMutex.Lock() 327 defer ps.trieMutex.Unlock() 328 329 var err error 330 newName, err = mergeTagsWithAppName(newName, map[string]string{}) 331 if err != nil { 332 return err 333 } 334 335 ps.appName = newName 336 ps.initializeTries(ps.appName) 337 338 return nil 339 } 340 341 func (ps *ProfileSession) initializeTries(appName string) { 342 if _, ok := ps.previousTries[appName]; !ok { 343 // TODO Only set the trie if it's not already set 344 ps.previousTries[appName] = []*transporttrie.Trie{} 345 ps.tries[appName] = []*transporttrie.Trie{} 346 for i := 0; i < len(ps.profileTypes); i++ { 347 ps.previousTries[appName] = append(ps.previousTries[appName], nil) 348 ps.tries[appName] = append(ps.tries[appName], transporttrie.New()) 349 } 350 } 351 } 352 353 // SetTags - add new tags to the session. 354 func (ps *ProfileSession) SetTags(tags map[string]string) error { 355 newName, err := mergeTagsWithAppName(ps.appName, tags) 356 if err != nil { 357 return err 358 } 359 return ps.ChangeName(newName) 360 } 361 362 // SetTag - add a new tag to the session. 363 func (ps *ProfileSession) SetTag(key, val string) error { 364 return ps.SetTags(map[string]string{key: val}) 365 } 366 367 // RemoveTags - remove tags from the session. 368 func (ps *ProfileSession) RemoveTags(keys ...string) error { 369 removals := make(map[string]string) 370 for _, key := range keys { 371 // 'Adding' a key with an empty string triggers a key removal. 372 removals[key] = "" 373 } 374 newName, err := mergeTagsWithAppName(ps.appName, removals) 375 if err != nil { 376 return err 377 } 378 return ps.ChangeName(newName) 379 } 380 381 func (ps *ProfileSession) Start() error { 382 ps.reset(time.Now().Truncate(ps.uploadRate)) 383 384 pid := ps.pid 385 spies, err := ps.initializeSpies(pid) 386 if err != nil { 387 return err 388 } 389 390 ps.spies[pid] = spies 391 392 go ps.takeSnapshots() 393 return nil 394 } 395 396 // the difference between stop and reset is that reset stops current session 397 // and then instantly starts a new one 398 func (ps *ProfileSession) reset(endTimeTruncated time.Time) { 399 ps.trieMutex.Lock() 400 defer ps.trieMutex.Unlock() 401 402 // if the process was forked the spy will keep profiling the old process. That's usually not what you want 403 // so in that case we stop the profiling session early 404 if ps.clibIntegration && !ps.noForkDetection && ps.isForked() { 405 ps.logger.Debugf("fork detected, stopping the session") 406 ps.stopOnce.Do(func() { 407 close(ps.stopCh) 408 }) 409 return 410 } 411 412 // upload the read data to server 413 if !ps.startTimeTruncated.IsZero() { 414 ps.uploadTries(endTimeTruncated) 415 } 416 417 // reset the start time 418 ps.startTimeTruncated = endTimeTruncated 419 420 if ps.withSubprocesses { 421 ps.addSubprocesses() 422 } 423 } 424 425 func (ps *ProfileSession) Stop() { 426 ps.trieMutex.Lock() 427 defer ps.trieMutex.Unlock() 428 429 ps.stopOnce.Do(func() { 430 // TODO: wait for stopCh consumer to finish! 431 close(ps.stopCh) 432 // before stopping, upload the tries 433 if !ps.startTimeTruncated.IsZero() { 434 ps.uploadTries(ps.startTimeTruncated.Add(ps.uploadRate)) 435 } // was never started 436 }) 437 } 438 439 func (ps *ProfileSession) uploadTries(endTimeTruncated time.Time) { 440 for name, tarr := range ps.tries { 441 for i, trie := range tarr { 442 profileType := ps.profileTypes[i] 443 skipUpload := false 444 445 if trie != nil { 446 endTime := endTimeTruncated 447 startTime := endTime.Add(-ps.uploadRate) 448 449 uploadTrie := trie 450 if profileType.IsCumulative() { 451 previousTrie := ps.previousTries[name][i] 452 if previousTrie == nil { 453 skipUpload = true 454 } else { 455 // TODO: Diff doesn't remove empty branches. We need to add that at some point 456 uploadTrie = trie.Diff(previousTrie) 457 } 458 } 459 460 if !skipUpload && !uploadTrie.IsEmpty() { 461 nameWithSuffix, _ := addSuffix(name, profileType) 462 ps.upstream.Upload(&upstream.UploadJob{ 463 Name: nameWithSuffix, 464 StartTime: startTime, 465 EndTime: endTime, 466 SpyName: ps.spyName, 467 SampleRate: ps.sampleRate, 468 Units: profileType.Units(), 469 AggregationType: profileType.AggregationType(), 470 Trie: uploadTrie, 471 }) 472 } 473 if profileType.IsCumulative() { 474 ps.previousTries[name][i] = trie 475 } 476 } 477 ps.tries[name][i] = transporttrie.New() 478 } 479 } 480 } 481 482 func (ps *ProfileSession) isForked() bool { 483 return os.Getpid() != ps.pid 484 } 485 486 func (ps *ProfileSession) addSubprocesses() { 487 newPids := findAllSubprocesses(ps.pid) 488 for _, newPid := range newPids { 489 if _, ok := ps.spies[newPid]; !ok { 490 newSpies, err := ps.initializeSpies(newPid) 491 if err != nil { 492 if ps.logger != nil { 493 ps.logger.Errorf("failed to initialize a spy %d [%s]", newPid, ps.spyName) 494 } 495 } else { 496 if ps.logger != nil { 497 ps.logger.Debugf("started spy for subprocess %d [%s]", newPid, ps.spyName) 498 } 499 ps.spies[newPid] = newSpies 500 } 501 } 502 } 503 }