github.com/pyroscope-io/pyroscope@v0.37.3-0.20230725203016-5f6947968bd0/pkg/scrape/scrape.go (about)

     1  // Copyright 2013 The Prometheus Authors
     2  // Copyright 2021 The Pyroscope Authors
     3  //
     4  // Licensed under the Apache License, Version 2.0 (the "License");
     5  // you may not use this file except in compliance with the License.
     6  // You may obtain a copy of the License at
     7  //
     8  // http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  package scrape
    17  
    18  import (
    19  	"bufio"
    20  	"bytes"
    21  	"context"
    22  	"errors"
    23  	"fmt"
    24  	"io"
    25  	"math"
    26  	"net/http"
    27  	"sync"
    28  	"time"
    29  
    30  	"github.com/sirupsen/logrus"
    31  
    32  	"github.com/pyroscope-io/pyroscope/pkg/build"
    33  	"github.com/pyroscope-io/pyroscope/pkg/convert/pprof"
    34  	"github.com/pyroscope-io/pyroscope/pkg/ingestion"
    35  	"github.com/pyroscope-io/pyroscope/pkg/scrape/config"
    36  	"github.com/pyroscope-io/pyroscope/pkg/scrape/discovery/targetgroup"
    37  	"github.com/pyroscope-io/pyroscope/pkg/storage/segment"
    38  )
    39  
    40  var UserAgent = fmt.Sprintf("Pyroscope/%s", build.Version)
    41  
    42  var errBodySizeLimit = errors.New("body size limit exceeded")
    43  
    44  // scrapePool manages scrapes for sets of targets.
    45  type scrapePool struct {
    46  	ingester ingestion.Ingester
    47  	logger   logrus.FieldLogger
    48  
    49  	// Global metrics shared by all pools.
    50  	metrics *metrics
    51  	// Job-specific metrics.
    52  	poolMetrics *poolMetrics
    53  
    54  	ctx    context.Context
    55  	cancel context.CancelFunc
    56  
    57  	// mtx must not be taken after targetMtx.
    58  	mtx    sync.Mutex
    59  	config *config.Config
    60  	client *http.Client
    61  	loops  map[uint64]*scrapeLoop
    62  
    63  	targetMtx sync.Mutex
    64  	// activeTargets and loops must always be synchronized to have the same
    65  	// set of hashes.
    66  	activeTargets  map[uint64]*Target
    67  	droppedTargets []*Target
    68  }
    69  
    70  func newScrapePool(cfg *config.Config, p ingestion.Ingester, logger logrus.FieldLogger, m *metrics) (*scrapePool, error) {
    71  	m.pools.Inc()
    72  	client, err := config.NewClientFromConfig(cfg.HTTPClientConfig, cfg.JobName)
    73  	if err != nil {
    74  		m.poolsFailed.Inc()
    75  		return nil, fmt.Errorf("creating HTTP client: %w", err)
    76  	}
    77  
    78  	ctx, cancel := context.WithCancel(context.Background())
    79  	sp := scrapePool{
    80  		ctx:           ctx,
    81  		cancel:        cancel,
    82  		logger:        logger,
    83  		ingester:      p,
    84  		config:        cfg,
    85  		client:        client,
    86  		activeTargets: make(map[uint64]*Target),
    87  		loops:         make(map[uint64]*scrapeLoop),
    88  
    89  		metrics:     m,
    90  		poolMetrics: m.poolMetrics(cfg.JobName),
    91  	}
    92  
    93  	return &sp, nil
    94  }
    95  
    96  func (sp *scrapePool) newScrapeLoop(s *scraper, i, t time.Duration) *scrapeLoop {
    97  	// TODO(kolesnikovae): Refactor.
    98  	d, _ := s.Target.deltaDuration()
    99  	x := scrapeLoop{
   100  		scraper:     s,
   101  		logger:      sp.logger,
   102  		ingester:    sp.ingester,
   103  		poolMetrics: sp.poolMetrics,
   104  		stopped:     make(chan struct{}),
   105  		delta:       d,
   106  		interval:    i,
   107  		timeout:     t,
   108  	}
   109  	x.ctx, x.cancel = context.WithCancel(sp.ctx)
   110  	return &x
   111  }
   112  
   113  func (sp *scrapePool) ActiveTargets() []*Target {
   114  	sp.targetMtx.Lock()
   115  	defer sp.targetMtx.Unlock()
   116  	var tActive []*Target
   117  	for _, t := range sp.activeTargets {
   118  		tActive = append(tActive, t)
   119  	}
   120  	return tActive
   121  }
   122  
   123  func (sp *scrapePool) DroppedTargets() []*Target {
   124  	sp.targetMtx.Lock()
   125  	defer sp.targetMtx.Unlock()
   126  	return sp.droppedTargets
   127  }
   128  
   129  // stop terminates all scrapers and returns after they all terminated.
   130  func (sp *scrapePool) stop() {
   131  	sp.mtx.Lock()
   132  	defer sp.mtx.Unlock()
   133  	sp.cancel()
   134  	sp.targetMtx.Lock()
   135  	var wg sync.WaitGroup
   136  	wg.Add(len(sp.loops))
   137  	for fp, l := range sp.loops {
   138  		go func(l *scrapeLoop) {
   139  			l.stop()
   140  			wg.Done()
   141  		}(l)
   142  		delete(sp.loops, fp)
   143  		delete(sp.activeTargets, fp)
   144  		metricsLabels := []string{sp.config.JobName, l.scraper.Target.config.Path}
   145  		sp.metrics.profileSize.DeleteLabelValues(metricsLabels...)
   146  		sp.metrics.profileSamples.DeleteLabelValues(metricsLabels...)
   147  		sp.metrics.scrapeDuration.DeleteLabelValues(metricsLabels...)
   148  	}
   149  	sp.targetMtx.Unlock()
   150  	wg.Wait()
   151  	sp.client.CloseIdleConnections()
   152  	if sp.config == nil {
   153  		return
   154  	}
   155  	sp.metrics.scrapeIntervalLength.DeleteLabelValues(sp.config.JobName)
   156  	sp.metrics.poolReloadIntervalLength.DeleteLabelValues(sp.config.JobName)
   157  	sp.metrics.poolSyncIntervalLength.DeleteLabelValues(sp.config.JobName)
   158  	sp.metrics.poolSyncs.DeleteLabelValues(sp.config.JobName)
   159  	sp.metrics.poolSyncFailed.DeleteLabelValues(sp.config.JobName)
   160  	sp.metrics.poolTargetsAdded.DeleteLabelValues(sp.config.JobName)
   161  	sp.metrics.scrapesFailed.DeleteLabelValues(sp.config.JobName)
   162  }
   163  
   164  // reload the scrape pool with the given scrape configuration. The target state is preserved
   165  // but all scrapers are restarted with the new scrape configuration.
   166  func (sp *scrapePool) reload(cfg *config.Config) error {
   167  	sp.mtx.Lock()
   168  	defer sp.mtx.Unlock()
   169  	sp.metrics.poolReloads.Inc()
   170  	start := time.Now()
   171  
   172  	client, err := config.NewClientFromConfig(cfg.HTTPClientConfig, cfg.JobName)
   173  	if err != nil {
   174  		sp.metrics.poolReloadsFailed.Inc()
   175  		return fmt.Errorf("creating HTTP client: %w", err)
   176  	}
   177  
   178  	sp.config = cfg
   179  	oldClient := sp.client
   180  	sp.client = client
   181  
   182  	var (
   183  		wg            sync.WaitGroup
   184  		interval      = sp.config.ScrapeInterval
   185  		timeout       = sp.config.ScrapeTimeout
   186  		bodySizeLimit = int64(sp.config.BodySizeLimit)
   187  	)
   188  
   189  	sp.targetMtx.Lock()
   190  	for fp, oldLoop := range sp.loops {
   191  		wg.Add(1)
   192  		t := sp.activeTargets[fp]
   193  		s := sp.newScraper(t, timeout, bodySizeLimit)
   194  		n := sp.newScrapeLoop(s, interval, timeout)
   195  		go func(oldLoop, newLoop *scrapeLoop) {
   196  			oldLoop.stop()
   197  			wg.Done()
   198  			newLoop.run()
   199  		}(oldLoop, n)
   200  		sp.loops[fp] = n
   201  	}
   202  
   203  	sp.targetMtx.Unlock()
   204  	wg.Wait()
   205  	oldClient.CloseIdleConnections()
   206  	sp.poolMetrics.poolReloadIntervalLength.Observe(time.Since(start).Seconds())
   207  	return nil
   208  }
   209  
   210  func (sp *scrapePool) newScraper(t *Target, timeout time.Duration, bodySizeLimit int64) *scraper {
   211  	return &scraper{
   212  		Target:        t,
   213  		client:        sp.client,
   214  		timeout:       timeout,
   215  		bodySizeLimit: bodySizeLimit,
   216  		targetMetrics: sp.metrics.targetMetrics(sp.config.JobName, t.config.Path),
   217  		ingester:      sp.ingester,
   218  		key:           segment.NewKey(t.Labels().Map()),
   219  		spyName:       t.SpyName(),
   220  		cumulative:    t.IsCumulative(),
   221  	}
   222  }
   223  
   224  // Sync converts target groups into actual scrape targets and synchronizes
   225  // the currently running scraper with the resulting set and returns all scraped and dropped targets.
   226  func (sp *scrapePool) Sync(tgs []*targetgroup.Group) {
   227  	sp.mtx.Lock()
   228  	defer sp.mtx.Unlock()
   229  	start := time.Now()
   230  
   231  	sp.targetMtx.Lock()
   232  	var all []*Target
   233  	sp.droppedTargets = []*Target{}
   234  	for _, tg := range tgs {
   235  		targets, failures := TargetsFromGroup(tg, sp.config)
   236  		for _, err := range failures {
   237  			sp.logger.WithError(err).Errorf("creating target")
   238  		}
   239  		sp.poolMetrics.poolSyncFailed.Add(float64(len(failures)))
   240  		for _, t := range targets {
   241  			if t.Labels().Len() > 0 {
   242  				all = append(all, t)
   243  			} else if t.DiscoveredLabels().Len() > 0 {
   244  				sp.droppedTargets = append(sp.droppedTargets, t)
   245  			}
   246  		}
   247  	}
   248  	sp.targetMtx.Unlock()
   249  	sp.sync(all)
   250  
   251  	sp.poolMetrics.poolSyncIntervalLength.Observe(time.Since(start).Seconds())
   252  	sp.poolMetrics.poolSyncs.Inc()
   253  }
   254  
   255  // revive:disable:confusing-naming private
   256  // revive:disable:import-shadowing methods don't shadow imports
   257  func (sp *scrapePool) sync(targets []*Target) {
   258  	var (
   259  		uniqueLoops   = make(map[uint64]*scrapeLoop)
   260  		interval      = sp.config.ScrapeInterval
   261  		timeout       = sp.config.ScrapeTimeout
   262  		bodySizeLimit = int64(sp.config.BodySizeLimit)
   263  	)
   264  
   265  	sp.targetMtx.Lock()
   266  	for _, t := range targets {
   267  		hash := t.hash()
   268  		_, ok := sp.activeTargets[hash]
   269  		if ok {
   270  			if _, ok := uniqueLoops[hash]; !ok {
   271  				uniqueLoops[hash] = nil
   272  			}
   273  			continue
   274  		}
   275  
   276  		var err error
   277  		interval, timeout, err = t.intervalAndTimeout(interval, timeout)
   278  		if err != nil {
   279  			sp.logger.WithError(err).Errorf("invalid target label")
   280  		}
   281  
   282  		s := sp.newScraper(t, timeout, bodySizeLimit)
   283  		l := sp.newScrapeLoop(s, interval, timeout)
   284  		sp.activeTargets[hash] = t
   285  		sp.loops[hash] = l
   286  		uniqueLoops[hash] = l
   287  	}
   288  
   289  	var wg sync.WaitGroup
   290  	for hash := range sp.activeTargets {
   291  		if _, ok := uniqueLoops[hash]; !ok {
   292  			wg.Add(1)
   293  			go func(l *scrapeLoop) {
   294  				l.stop()
   295  				wg.Done()
   296  			}(sp.loops[hash])
   297  			delete(sp.loops, hash)
   298  			delete(sp.activeTargets, hash)
   299  		}
   300  	}
   301  
   302  	sp.targetMtx.Unlock()
   303  	sp.poolMetrics.poolTargetsAdded.Set(float64(len(uniqueLoops)))
   304  	for _, l := range uniqueLoops {
   305  		if l != nil {
   306  			go l.run()
   307  		}
   308  	}
   309  
   310  	wg.Wait()
   311  }
   312  
   313  type scrapeLoop struct {
   314  	scraper  *scraper
   315  	logger   logrus.FieldLogger
   316  	ingester ingestion.Ingester
   317  
   318  	poolMetrics *poolMetrics
   319  
   320  	ctx     context.Context
   321  	cancel  func()
   322  	stopped chan struct{}
   323  
   324  	delta    time.Duration
   325  	interval time.Duration
   326  	timeout  time.Duration
   327  }
   328  
   329  func (sl *scrapeLoop) run() {
   330  	defer close(sl.stopped)
   331  	select {
   332  	case <-time.After(sl.scraper.offset(sl.interval)):
   333  	case <-sl.ctx.Done():
   334  		return
   335  	}
   336  	ticker := time.NewTicker(sl.interval)
   337  	defer ticker.Stop()
   338  	for {
   339  		select {
   340  		default:
   341  		case <-sl.ctx.Done():
   342  			return
   343  		}
   344  		if !sl.scraper.Target.lastScrape.IsZero() {
   345  			sl.poolMetrics.scrapeIntervalLength.Observe(time.Since(sl.scraper.Target.lastScrape).Seconds())
   346  		}
   347  		sl.scrapeAndReport(sl.scraper.Target)
   348  		select {
   349  		case <-ticker.C:
   350  		case <-sl.ctx.Done():
   351  			return
   352  		}
   353  	}
   354  }
   355  
   356  func (sl *scrapeLoop) scrapeAndReport(t *Target) {
   357  	now := time.Now()
   358  	// There are two possible cases:
   359  	//  1. "delta" profile that is collected during scrape. In instance,
   360  	//     Go cpu profile requires "seconds" parameter. Such a profile
   361  	//     represent a time span since now to now+delta.
   362  	//  2. Profile is captured immediately. Despite the fact that the
   363  	//     data represent the current moment, we need to know when it
   364  	//     was scraped last time.
   365  	if sl.delta == 0 && t.lastScrape.IsZero() {
   366  		// Skip this round as we would not figure out time span of the
   367  		// profile reliably either way.
   368  		t.lastScrape = now
   369  		return
   370  	}
   371  	// N.B: Although in some cases we can retrieve timings from
   372  	// the profile itself (using TimeNanos and DurationNanos fields),
   373  	// there is a big chance that the period will overlap multiple
   374  	// segment "slots", hereby producing redundant segment nodes and
   375  	// trees. Therefore, it's better to adhere standard 10s period
   376  	// that fits segment node size (at level 0).
   377  	var startTime, endTime time.Time
   378  	if sl.delta > 0 {
   379  		startTime = now.Round(sl.delta)
   380  		endTime = startTime.Add(sl.delta)
   381  	} else {
   382  		endTime = now.Round(sl.interval)
   383  		startTime = endTime.Add(-1 * sl.interval)
   384  	}
   385  	err := sl.scrape(startTime, endTime)
   386  	t.mtx.Lock()
   387  	defer t.mtx.Unlock()
   388  	if err == nil {
   389  		t.health = HealthGood
   390  	} else {
   391  		t.health = HealthBad
   392  	}
   393  	t.lastError = err
   394  	t.lastScrape = now
   395  	t.lastScrapeDuration = time.Since(now)
   396  	sl.scraper.targetMetrics.scrapeDuration.Observe(sl.scraper.Target.lastScrapeDuration.Seconds())
   397  }
   398  
   399  func (sl *scrapeLoop) scrape(startTime, endTime time.Time) error {
   400  	ctx, cancel := context.WithTimeout(sl.ctx, sl.timeout)
   401  	defer cancel()
   402  	sl.poolMetrics.scrapes.Inc()
   403  	buf := bytes.NewBuffer(make([]byte, 0, 64<<10))
   404  	switch err := sl.scraper.scrape(ctx, buf); {
   405  	case err == nil:
   406  	case errors.Is(err, context.Canceled):
   407  		sl.scraper.profile = nil
   408  		return nil
   409  	default:
   410  		sl.poolMetrics.scrapesFailed.Inc()
   411  		sl.logger.WithError(err).WithField("target", sl.scraper.Target.String()).Debug("scraping failed")
   412  		sl.scraper.profile = nil
   413  		return err
   414  	}
   415  
   416  	sl.scraper.targetMetrics.profileSize.Observe(float64(buf.Len()))
   417  	if sl.scraper.profile == nil {
   418  		sl.scraper.profile = &pprof.RawProfile{
   419  			SampleTypeConfig: sl.scraper.config.SampleTypes,
   420  		}
   421  	}
   422  
   423  	profile := sl.scraper.profile
   424  	sl.scraper.profile = profile.Push(buf.Bytes(), sl.scraper.cumulative)
   425  	return sl.scraper.ingester.Ingest(ctx, &ingestion.IngestInput{
   426  		Profile: profile,
   427  		Metadata: ingestion.Metadata{
   428  			SpyName:   sl.scraper.spyName,
   429  			Key:       sl.scraper.key,
   430  			StartTime: startTime,
   431  			EndTime:   endTime,
   432  		},
   433  	})
   434  }
   435  
   436  func (sl *scrapeLoop) stop() {
   437  	sl.cancel()
   438  	<-sl.stopped
   439  }
   440  
   441  type scraper struct {
   442  	*Target
   443  
   444  	ingester ingestion.Ingester
   445  	profile  *pprof.RawProfile
   446  
   447  	cumulative bool
   448  	spyName    string
   449  	key        *segment.Key
   450  
   451  	client  *http.Client
   452  	req     *http.Request
   453  	timeout time.Duration
   454  
   455  	buf           *bufio.Reader
   456  	bodySizeLimit int64
   457  
   458  	*targetMetrics
   459  }
   460  
   461  func (s *scraper) scrape(ctx context.Context, dst *bytes.Buffer) error {
   462  	if s.req == nil {
   463  		req, err := http.NewRequest("GET", s.URL().String(), nil)
   464  		if err != nil {
   465  			return err
   466  		}
   467  		req.Header.Set("User-Agent", UserAgent)
   468  		s.req = req
   469  	}
   470  
   471  	resp, err := s.client.Do(s.req.WithContext(ctx))
   472  	if err != nil {
   473  		return err
   474  	}
   475  	defer func() {
   476  		_ = resp.Body.Close()
   477  	}()
   478  
   479  	if resp.StatusCode != http.StatusOK {
   480  		return fmt.Errorf("server returned HTTP status %s", resp.Status)
   481  	}
   482  	if s.bodySizeLimit <= 0 {
   483  		s.bodySizeLimit = math.MaxInt64
   484  	}
   485  	n, err := io.Copy(dst, io.LimitReader(resp.Body, s.bodySizeLimit))
   486  	if err != nil {
   487  		return err
   488  	}
   489  	if n >= s.bodySizeLimit {
   490  		return errBodySizeLimit
   491  	}
   492  	return nil
   493  }