github.com/pyroscope-io/pyroscope@v0.37.3-0.20230725203016-5f6947968bd0/pkg/scrape/scrape.go (about) 1 // Copyright 2013 The Prometheus Authors 2 // Copyright 2021 The Pyroscope Authors 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 package scrape 17 18 import ( 19 "bufio" 20 "bytes" 21 "context" 22 "errors" 23 "fmt" 24 "io" 25 "math" 26 "net/http" 27 "sync" 28 "time" 29 30 "github.com/sirupsen/logrus" 31 32 "github.com/pyroscope-io/pyroscope/pkg/build" 33 "github.com/pyroscope-io/pyroscope/pkg/convert/pprof" 34 "github.com/pyroscope-io/pyroscope/pkg/ingestion" 35 "github.com/pyroscope-io/pyroscope/pkg/scrape/config" 36 "github.com/pyroscope-io/pyroscope/pkg/scrape/discovery/targetgroup" 37 "github.com/pyroscope-io/pyroscope/pkg/storage/segment" 38 ) 39 40 var UserAgent = fmt.Sprintf("Pyroscope/%s", build.Version) 41 42 var errBodySizeLimit = errors.New("body size limit exceeded") 43 44 // scrapePool manages scrapes for sets of targets. 45 type scrapePool struct { 46 ingester ingestion.Ingester 47 logger logrus.FieldLogger 48 49 // Global metrics shared by all pools. 50 metrics *metrics 51 // Job-specific metrics. 52 poolMetrics *poolMetrics 53 54 ctx context.Context 55 cancel context.CancelFunc 56 57 // mtx must not be taken after targetMtx. 58 mtx sync.Mutex 59 config *config.Config 60 client *http.Client 61 loops map[uint64]*scrapeLoop 62 63 targetMtx sync.Mutex 64 // activeTargets and loops must always be synchronized to have the same 65 // set of hashes. 66 activeTargets map[uint64]*Target 67 droppedTargets []*Target 68 } 69 70 func newScrapePool(cfg *config.Config, p ingestion.Ingester, logger logrus.FieldLogger, m *metrics) (*scrapePool, error) { 71 m.pools.Inc() 72 client, err := config.NewClientFromConfig(cfg.HTTPClientConfig, cfg.JobName) 73 if err != nil { 74 m.poolsFailed.Inc() 75 return nil, fmt.Errorf("creating HTTP client: %w", err) 76 } 77 78 ctx, cancel := context.WithCancel(context.Background()) 79 sp := scrapePool{ 80 ctx: ctx, 81 cancel: cancel, 82 logger: logger, 83 ingester: p, 84 config: cfg, 85 client: client, 86 activeTargets: make(map[uint64]*Target), 87 loops: make(map[uint64]*scrapeLoop), 88 89 metrics: m, 90 poolMetrics: m.poolMetrics(cfg.JobName), 91 } 92 93 return &sp, nil 94 } 95 96 func (sp *scrapePool) newScrapeLoop(s *scraper, i, t time.Duration) *scrapeLoop { 97 // TODO(kolesnikovae): Refactor. 98 d, _ := s.Target.deltaDuration() 99 x := scrapeLoop{ 100 scraper: s, 101 logger: sp.logger, 102 ingester: sp.ingester, 103 poolMetrics: sp.poolMetrics, 104 stopped: make(chan struct{}), 105 delta: d, 106 interval: i, 107 timeout: t, 108 } 109 x.ctx, x.cancel = context.WithCancel(sp.ctx) 110 return &x 111 } 112 113 func (sp *scrapePool) ActiveTargets() []*Target { 114 sp.targetMtx.Lock() 115 defer sp.targetMtx.Unlock() 116 var tActive []*Target 117 for _, t := range sp.activeTargets { 118 tActive = append(tActive, t) 119 } 120 return tActive 121 } 122 123 func (sp *scrapePool) DroppedTargets() []*Target { 124 sp.targetMtx.Lock() 125 defer sp.targetMtx.Unlock() 126 return sp.droppedTargets 127 } 128 129 // stop terminates all scrapers and returns after they all terminated. 130 func (sp *scrapePool) stop() { 131 sp.mtx.Lock() 132 defer sp.mtx.Unlock() 133 sp.cancel() 134 sp.targetMtx.Lock() 135 var wg sync.WaitGroup 136 wg.Add(len(sp.loops)) 137 for fp, l := range sp.loops { 138 go func(l *scrapeLoop) { 139 l.stop() 140 wg.Done() 141 }(l) 142 delete(sp.loops, fp) 143 delete(sp.activeTargets, fp) 144 metricsLabels := []string{sp.config.JobName, l.scraper.Target.config.Path} 145 sp.metrics.profileSize.DeleteLabelValues(metricsLabels...) 146 sp.metrics.profileSamples.DeleteLabelValues(metricsLabels...) 147 sp.metrics.scrapeDuration.DeleteLabelValues(metricsLabels...) 148 } 149 sp.targetMtx.Unlock() 150 wg.Wait() 151 sp.client.CloseIdleConnections() 152 if sp.config == nil { 153 return 154 } 155 sp.metrics.scrapeIntervalLength.DeleteLabelValues(sp.config.JobName) 156 sp.metrics.poolReloadIntervalLength.DeleteLabelValues(sp.config.JobName) 157 sp.metrics.poolSyncIntervalLength.DeleteLabelValues(sp.config.JobName) 158 sp.metrics.poolSyncs.DeleteLabelValues(sp.config.JobName) 159 sp.metrics.poolSyncFailed.DeleteLabelValues(sp.config.JobName) 160 sp.metrics.poolTargetsAdded.DeleteLabelValues(sp.config.JobName) 161 sp.metrics.scrapesFailed.DeleteLabelValues(sp.config.JobName) 162 } 163 164 // reload the scrape pool with the given scrape configuration. The target state is preserved 165 // but all scrapers are restarted with the new scrape configuration. 166 func (sp *scrapePool) reload(cfg *config.Config) error { 167 sp.mtx.Lock() 168 defer sp.mtx.Unlock() 169 sp.metrics.poolReloads.Inc() 170 start := time.Now() 171 172 client, err := config.NewClientFromConfig(cfg.HTTPClientConfig, cfg.JobName) 173 if err != nil { 174 sp.metrics.poolReloadsFailed.Inc() 175 return fmt.Errorf("creating HTTP client: %w", err) 176 } 177 178 sp.config = cfg 179 oldClient := sp.client 180 sp.client = client 181 182 var ( 183 wg sync.WaitGroup 184 interval = sp.config.ScrapeInterval 185 timeout = sp.config.ScrapeTimeout 186 bodySizeLimit = int64(sp.config.BodySizeLimit) 187 ) 188 189 sp.targetMtx.Lock() 190 for fp, oldLoop := range sp.loops { 191 wg.Add(1) 192 t := sp.activeTargets[fp] 193 s := sp.newScraper(t, timeout, bodySizeLimit) 194 n := sp.newScrapeLoop(s, interval, timeout) 195 go func(oldLoop, newLoop *scrapeLoop) { 196 oldLoop.stop() 197 wg.Done() 198 newLoop.run() 199 }(oldLoop, n) 200 sp.loops[fp] = n 201 } 202 203 sp.targetMtx.Unlock() 204 wg.Wait() 205 oldClient.CloseIdleConnections() 206 sp.poolMetrics.poolReloadIntervalLength.Observe(time.Since(start).Seconds()) 207 return nil 208 } 209 210 func (sp *scrapePool) newScraper(t *Target, timeout time.Duration, bodySizeLimit int64) *scraper { 211 return &scraper{ 212 Target: t, 213 client: sp.client, 214 timeout: timeout, 215 bodySizeLimit: bodySizeLimit, 216 targetMetrics: sp.metrics.targetMetrics(sp.config.JobName, t.config.Path), 217 ingester: sp.ingester, 218 key: segment.NewKey(t.Labels().Map()), 219 spyName: t.SpyName(), 220 cumulative: t.IsCumulative(), 221 } 222 } 223 224 // Sync converts target groups into actual scrape targets and synchronizes 225 // the currently running scraper with the resulting set and returns all scraped and dropped targets. 226 func (sp *scrapePool) Sync(tgs []*targetgroup.Group) { 227 sp.mtx.Lock() 228 defer sp.mtx.Unlock() 229 start := time.Now() 230 231 sp.targetMtx.Lock() 232 var all []*Target 233 sp.droppedTargets = []*Target{} 234 for _, tg := range tgs { 235 targets, failures := TargetsFromGroup(tg, sp.config) 236 for _, err := range failures { 237 sp.logger.WithError(err).Errorf("creating target") 238 } 239 sp.poolMetrics.poolSyncFailed.Add(float64(len(failures))) 240 for _, t := range targets { 241 if t.Labels().Len() > 0 { 242 all = append(all, t) 243 } else if t.DiscoveredLabels().Len() > 0 { 244 sp.droppedTargets = append(sp.droppedTargets, t) 245 } 246 } 247 } 248 sp.targetMtx.Unlock() 249 sp.sync(all) 250 251 sp.poolMetrics.poolSyncIntervalLength.Observe(time.Since(start).Seconds()) 252 sp.poolMetrics.poolSyncs.Inc() 253 } 254 255 // revive:disable:confusing-naming private 256 // revive:disable:import-shadowing methods don't shadow imports 257 func (sp *scrapePool) sync(targets []*Target) { 258 var ( 259 uniqueLoops = make(map[uint64]*scrapeLoop) 260 interval = sp.config.ScrapeInterval 261 timeout = sp.config.ScrapeTimeout 262 bodySizeLimit = int64(sp.config.BodySizeLimit) 263 ) 264 265 sp.targetMtx.Lock() 266 for _, t := range targets { 267 hash := t.hash() 268 _, ok := sp.activeTargets[hash] 269 if ok { 270 if _, ok := uniqueLoops[hash]; !ok { 271 uniqueLoops[hash] = nil 272 } 273 continue 274 } 275 276 var err error 277 interval, timeout, err = t.intervalAndTimeout(interval, timeout) 278 if err != nil { 279 sp.logger.WithError(err).Errorf("invalid target label") 280 } 281 282 s := sp.newScraper(t, timeout, bodySizeLimit) 283 l := sp.newScrapeLoop(s, interval, timeout) 284 sp.activeTargets[hash] = t 285 sp.loops[hash] = l 286 uniqueLoops[hash] = l 287 } 288 289 var wg sync.WaitGroup 290 for hash := range sp.activeTargets { 291 if _, ok := uniqueLoops[hash]; !ok { 292 wg.Add(1) 293 go func(l *scrapeLoop) { 294 l.stop() 295 wg.Done() 296 }(sp.loops[hash]) 297 delete(sp.loops, hash) 298 delete(sp.activeTargets, hash) 299 } 300 } 301 302 sp.targetMtx.Unlock() 303 sp.poolMetrics.poolTargetsAdded.Set(float64(len(uniqueLoops))) 304 for _, l := range uniqueLoops { 305 if l != nil { 306 go l.run() 307 } 308 } 309 310 wg.Wait() 311 } 312 313 type scrapeLoop struct { 314 scraper *scraper 315 logger logrus.FieldLogger 316 ingester ingestion.Ingester 317 318 poolMetrics *poolMetrics 319 320 ctx context.Context 321 cancel func() 322 stopped chan struct{} 323 324 delta time.Duration 325 interval time.Duration 326 timeout time.Duration 327 } 328 329 func (sl *scrapeLoop) run() { 330 defer close(sl.stopped) 331 select { 332 case <-time.After(sl.scraper.offset(sl.interval)): 333 case <-sl.ctx.Done(): 334 return 335 } 336 ticker := time.NewTicker(sl.interval) 337 defer ticker.Stop() 338 for { 339 select { 340 default: 341 case <-sl.ctx.Done(): 342 return 343 } 344 if !sl.scraper.Target.lastScrape.IsZero() { 345 sl.poolMetrics.scrapeIntervalLength.Observe(time.Since(sl.scraper.Target.lastScrape).Seconds()) 346 } 347 sl.scrapeAndReport(sl.scraper.Target) 348 select { 349 case <-ticker.C: 350 case <-sl.ctx.Done(): 351 return 352 } 353 } 354 } 355 356 func (sl *scrapeLoop) scrapeAndReport(t *Target) { 357 now := time.Now() 358 // There are two possible cases: 359 // 1. "delta" profile that is collected during scrape. In instance, 360 // Go cpu profile requires "seconds" parameter. Such a profile 361 // represent a time span since now to now+delta. 362 // 2. Profile is captured immediately. Despite the fact that the 363 // data represent the current moment, we need to know when it 364 // was scraped last time. 365 if sl.delta == 0 && t.lastScrape.IsZero() { 366 // Skip this round as we would not figure out time span of the 367 // profile reliably either way. 368 t.lastScrape = now 369 return 370 } 371 // N.B: Although in some cases we can retrieve timings from 372 // the profile itself (using TimeNanos and DurationNanos fields), 373 // there is a big chance that the period will overlap multiple 374 // segment "slots", hereby producing redundant segment nodes and 375 // trees. Therefore, it's better to adhere standard 10s period 376 // that fits segment node size (at level 0). 377 var startTime, endTime time.Time 378 if sl.delta > 0 { 379 startTime = now.Round(sl.delta) 380 endTime = startTime.Add(sl.delta) 381 } else { 382 endTime = now.Round(sl.interval) 383 startTime = endTime.Add(-1 * sl.interval) 384 } 385 err := sl.scrape(startTime, endTime) 386 t.mtx.Lock() 387 defer t.mtx.Unlock() 388 if err == nil { 389 t.health = HealthGood 390 } else { 391 t.health = HealthBad 392 } 393 t.lastError = err 394 t.lastScrape = now 395 t.lastScrapeDuration = time.Since(now) 396 sl.scraper.targetMetrics.scrapeDuration.Observe(sl.scraper.Target.lastScrapeDuration.Seconds()) 397 } 398 399 func (sl *scrapeLoop) scrape(startTime, endTime time.Time) error { 400 ctx, cancel := context.WithTimeout(sl.ctx, sl.timeout) 401 defer cancel() 402 sl.poolMetrics.scrapes.Inc() 403 buf := bytes.NewBuffer(make([]byte, 0, 64<<10)) 404 switch err := sl.scraper.scrape(ctx, buf); { 405 case err == nil: 406 case errors.Is(err, context.Canceled): 407 sl.scraper.profile = nil 408 return nil 409 default: 410 sl.poolMetrics.scrapesFailed.Inc() 411 sl.logger.WithError(err).WithField("target", sl.scraper.Target.String()).Debug("scraping failed") 412 sl.scraper.profile = nil 413 return err 414 } 415 416 sl.scraper.targetMetrics.profileSize.Observe(float64(buf.Len())) 417 if sl.scraper.profile == nil { 418 sl.scraper.profile = &pprof.RawProfile{ 419 SampleTypeConfig: sl.scraper.config.SampleTypes, 420 } 421 } 422 423 profile := sl.scraper.profile 424 sl.scraper.profile = profile.Push(buf.Bytes(), sl.scraper.cumulative) 425 return sl.scraper.ingester.Ingest(ctx, &ingestion.IngestInput{ 426 Profile: profile, 427 Metadata: ingestion.Metadata{ 428 SpyName: sl.scraper.spyName, 429 Key: sl.scraper.key, 430 StartTime: startTime, 431 EndTime: endTime, 432 }, 433 }) 434 } 435 436 func (sl *scrapeLoop) stop() { 437 sl.cancel() 438 <-sl.stopped 439 } 440 441 type scraper struct { 442 *Target 443 444 ingester ingestion.Ingester 445 profile *pprof.RawProfile 446 447 cumulative bool 448 spyName string 449 key *segment.Key 450 451 client *http.Client 452 req *http.Request 453 timeout time.Duration 454 455 buf *bufio.Reader 456 bodySizeLimit int64 457 458 *targetMetrics 459 } 460 461 func (s *scraper) scrape(ctx context.Context, dst *bytes.Buffer) error { 462 if s.req == nil { 463 req, err := http.NewRequest("GET", s.URL().String(), nil) 464 if err != nil { 465 return err 466 } 467 req.Header.Set("User-Agent", UserAgent) 468 s.req = req 469 } 470 471 resp, err := s.client.Do(s.req.WithContext(ctx)) 472 if err != nil { 473 return err 474 } 475 defer func() { 476 _ = resp.Body.Close() 477 }() 478 479 if resp.StatusCode != http.StatusOK { 480 return fmt.Errorf("server returned HTTP status %s", resp.Status) 481 } 482 if s.bodySizeLimit <= 0 { 483 s.bodySizeLimit = math.MaxInt64 484 } 485 n, err := io.Copy(dst, io.LimitReader(resp.Body, s.bodySizeLimit)) 486 if err != nil { 487 return err 488 } 489 if n >= s.bodySizeLimit { 490 return errBodySizeLimit 491 } 492 return nil 493 }