github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/clients/pkg/promtail/targets/cloudflare/target.go (about)

     1  package cloudflare
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"regexp"
     7  	"strings"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/buger/jsonparser"
    12  	"github.com/cloudflare/cloudflare-go"
    13  	"github.com/go-kit/log"
    14  	"github.com/go-kit/log/level"
    15  	"github.com/grafana/dskit/backoff"
    16  	"github.com/grafana/dskit/concurrency"
    17  	"github.com/grafana/dskit/multierror"
    18  	"github.com/prometheus/common/model"
    19  	"go.uber.org/atomic"
    20  
    21  	"github.com/grafana/loki/clients/pkg/promtail/api"
    22  	"github.com/grafana/loki/clients/pkg/promtail/positions"
    23  	"github.com/grafana/loki/clients/pkg/promtail/scrapeconfig"
    24  	"github.com/grafana/loki/clients/pkg/promtail/targets/target"
    25  
    26  	"github.com/grafana/loki/pkg/logproto"
    27  )
    28  
    29  // The minimun window size is 1 minute.
    30  const minDelay = time.Minute
    31  
    32  var cloudflareTooEarlyError = regexp.MustCompile(`too early: logs older than \S+ are not available`)
    33  
    34  var defaultBackoff = backoff.Config{
    35  	MinBackoff: 1 * time.Second,
    36  	MaxBackoff: 10 * time.Second,
    37  	MaxRetries: 5,
    38  }
    39  
    40  type Target struct {
    41  	logger    log.Logger
    42  	handler   api.EntryHandler
    43  	positions positions.Positions
    44  	config    *scrapeconfig.CloudflareConfig
    45  	metrics   *Metrics
    46  
    47  	client  Client
    48  	ctx     context.Context
    49  	cancel  context.CancelFunc
    50  	wg      sync.WaitGroup
    51  	to      time.Time // the end of the next pull interval
    52  	running *atomic.Bool
    53  	err     error
    54  }
    55  
    56  func NewTarget(
    57  	metrics *Metrics,
    58  	logger log.Logger,
    59  	handler api.EntryHandler,
    60  	position positions.Positions,
    61  	config *scrapeconfig.CloudflareConfig,
    62  ) (*Target, error) {
    63  	if err := validateConfig(config); err != nil {
    64  		return nil, err
    65  	}
    66  	fields, err := Fields(FieldsType(config.FieldsType))
    67  	if err != nil {
    68  		return nil, err
    69  	}
    70  	client, err := getClient(config.APIToken, config.ZoneID, fields)
    71  	if err != nil {
    72  		return nil, err
    73  	}
    74  	pos, err := position.Get(positions.CursorKey(config.ZoneID))
    75  	if err != nil {
    76  		return nil, err
    77  	}
    78  	to := time.Now()
    79  	if pos != 0 {
    80  		to = time.Unix(0, pos)
    81  	}
    82  	ctx, cancel := context.WithCancel(context.Background())
    83  	t := &Target{
    84  		logger:    logger,
    85  		handler:   handler,
    86  		positions: position,
    87  		config:    config,
    88  		metrics:   metrics,
    89  
    90  		ctx:     ctx,
    91  		cancel:  cancel,
    92  		client:  client,
    93  		to:      to,
    94  		running: atomic.NewBool(false),
    95  	}
    96  	t.start()
    97  	return t, nil
    98  }
    99  
   100  func (t *Target) start() {
   101  	t.wg.Add(1)
   102  	t.running.Store(true)
   103  	go func() {
   104  		defer func() {
   105  			t.wg.Done()
   106  			t.running.Store(false)
   107  		}()
   108  		for t.ctx.Err() == nil {
   109  			end := t.to
   110  			maxEnd := time.Now().Add(-minDelay)
   111  			if end.After(maxEnd) {
   112  				end = maxEnd
   113  			}
   114  			start := end.Add(-time.Duration(t.config.PullRange))
   115  			requests := splitRequests(start, end, t.config.Workers)
   116  			// Use background context for workers as we don't want to cancel half way through.
   117  			// In case of errors we stop the target, each worker has it's own retry logic.
   118  			if err := concurrency.ForEachJob(context.Background(), len(requests), t.config.Workers, func(ctx context.Context, idx int) error {
   119  				request := requests[idx]
   120  				return t.pull(ctx, request.start, request.end)
   121  			}); err != nil {
   122  				level.Error(t.logger).Log("msg", "failed to pull logs", "err", err, "start", start, "end", end)
   123  				t.err = err
   124  				return
   125  			}
   126  
   127  			// Sets current timestamp metrics, move to the next interval and saves the position.
   128  			t.metrics.LastEnd.Set(float64(end.UnixNano()) / 1e9)
   129  			t.to = end.Add(time.Duration(t.config.PullRange))
   130  			t.positions.Put(positions.CursorKey(t.config.ZoneID), t.to.UnixNano())
   131  
   132  			// If the next window can be fetched do it, if not sleep for a while.
   133  			// This is because Cloudflare logs should never be pulled between now-1m and now.
   134  			diff := t.to.Sub(time.Now().Add(-minDelay))
   135  			if diff > 0 {
   136  				select {
   137  				case <-time.After(diff):
   138  				case <-t.ctx.Done():
   139  				}
   140  			}
   141  		}
   142  	}()
   143  }
   144  
   145  // pull pulls logs from cloudflare for a given time range.
   146  // It will retry on errors.
   147  func (t *Target) pull(ctx context.Context, start, end time.Time) error {
   148  	var (
   149  		backoff = backoff.New(ctx, defaultBackoff)
   150  		errs    = multierror.New()
   151  		it      cloudflare.LogpullReceivedIterator
   152  		err     error
   153  	)
   154  
   155  	for backoff.Ongoing() {
   156  		it, err = t.client.LogpullReceived(ctx, start, end)
   157  		if err != nil && cloudflareTooEarlyError.MatchString(err.Error()) {
   158  			level.Warn(t.logger).Log("msg", "failed iterating over logs, out of cloudflare range, not retrying", "err", err, "start", start, "end", end, "retries", backoff.NumRetries())
   159  			return nil
   160  		} else if err != nil {
   161  			errs.Add(err)
   162  			backoff.Wait()
   163  			continue
   164  		}
   165  		if err := func() error {
   166  			defer it.Close()
   167  			var lineRead int64
   168  			for it.Next() {
   169  				line := it.Line()
   170  				ts, err := jsonparser.GetInt(line, "EdgeStartTimestamp")
   171  				if err != nil {
   172  					ts = time.Now().UnixNano()
   173  				}
   174  				t.handler.Chan() <- api.Entry{
   175  					Labels: t.config.Labels.Clone(),
   176  					Entry: logproto.Entry{
   177  						Timestamp: time.Unix(0, ts),
   178  						Line:      string(line),
   179  					},
   180  				}
   181  				lineRead++
   182  				t.metrics.Entries.Inc()
   183  			}
   184  			if it.Err() != nil {
   185  				level.Warn(t.logger).Log("msg", "failed iterating over logs", "err", it.Err(), "start", start, "end", end, "retries", backoff.NumRetries(), "lineRead", lineRead)
   186  				return it.Err()
   187  			}
   188  			return nil
   189  		}(); err != nil {
   190  			errs.Add(err)
   191  			backoff.Wait()
   192  			continue
   193  		}
   194  		return nil
   195  
   196  	}
   197  	return errs.Err()
   198  }
   199  
   200  func (t *Target) Stop() {
   201  	t.cancel()
   202  	t.wg.Wait()
   203  	t.handler.Stop()
   204  }
   205  
   206  func (t *Target) Type() target.TargetType {
   207  	return target.CloudflareTargetType
   208  }
   209  
   210  func (t *Target) DiscoveredLabels() model.LabelSet {
   211  	return nil
   212  }
   213  
   214  func (t *Target) Labels() model.LabelSet {
   215  	return t.config.Labels
   216  }
   217  
   218  func (t *Target) Ready() bool {
   219  	return t.running.Load()
   220  }
   221  
   222  func (t *Target) Details() interface{} {
   223  	fields, _ := Fields(FieldsType(t.config.FieldsType))
   224  	return map[string]string{
   225  		"zone_id":        t.config.ZoneID,
   226  		"error":          t.err.Error(),
   227  		"position":       t.positions.GetString(positions.CursorKey(t.config.ZoneID)),
   228  		"last_timestamp": t.to.String(),
   229  		"fields":         strings.Join(fields, ","),
   230  	}
   231  }
   232  
   233  type pullRequest struct {
   234  	start time.Time
   235  	end   time.Time
   236  }
   237  
   238  func splitRequests(start, end time.Time, workers int) []pullRequest {
   239  	perWorker := end.Sub(start) / time.Duration(workers)
   240  	var requests []pullRequest
   241  	for i := 0; i < workers; i++ {
   242  		r := pullRequest{
   243  			start: start.Add(time.Duration(i) * perWorker),
   244  			end:   start.Add(time.Duration(i+1) * perWorker),
   245  		}
   246  		// If the last worker is smaller than the others, we need to make sure it gets the last chunk.
   247  		if i == workers-1 && r.end != end {
   248  			r.end = end
   249  		}
   250  		requests = append(requests, r)
   251  	}
   252  	return requests
   253  }
   254  
   255  func validateConfig(cfg *scrapeconfig.CloudflareConfig) error {
   256  	if cfg.FieldsType == "" {
   257  		cfg.FieldsType = string(FieldsTypeDefault)
   258  	}
   259  	if cfg.APIToken == "" {
   260  		return errors.New("cloudflare api token is required")
   261  	}
   262  	if cfg.ZoneID == "" {
   263  		return errors.New("cloudflare zone id is required")
   264  	}
   265  	if cfg.PullRange == 0 {
   266  		cfg.PullRange = model.Duration(time.Minute)
   267  	}
   268  	if cfg.Workers == 0 {
   269  		cfg.Workers = 3
   270  	}
   271  	return nil
   272  }