github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/clients/pkg/promtail/targets/cloudflare/target.go (about) 1 package cloudflare 2 3 import ( 4 "context" 5 "errors" 6 "regexp" 7 "strings" 8 "sync" 9 "time" 10 11 "github.com/buger/jsonparser" 12 "github.com/cloudflare/cloudflare-go" 13 "github.com/go-kit/log" 14 "github.com/go-kit/log/level" 15 "github.com/grafana/dskit/backoff" 16 "github.com/grafana/dskit/concurrency" 17 "github.com/grafana/dskit/multierror" 18 "github.com/prometheus/common/model" 19 "go.uber.org/atomic" 20 21 "github.com/grafana/loki/clients/pkg/promtail/api" 22 "github.com/grafana/loki/clients/pkg/promtail/positions" 23 "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" 24 "github.com/grafana/loki/clients/pkg/promtail/targets/target" 25 26 "github.com/grafana/loki/pkg/logproto" 27 ) 28 29 // The minimun window size is 1 minute. 30 const minDelay = time.Minute 31 32 var cloudflareTooEarlyError = regexp.MustCompile(`too early: logs older than \S+ are not available`) 33 34 var defaultBackoff = backoff.Config{ 35 MinBackoff: 1 * time.Second, 36 MaxBackoff: 10 * time.Second, 37 MaxRetries: 5, 38 } 39 40 type Target struct { 41 logger log.Logger 42 handler api.EntryHandler 43 positions positions.Positions 44 config *scrapeconfig.CloudflareConfig 45 metrics *Metrics 46 47 client Client 48 ctx context.Context 49 cancel context.CancelFunc 50 wg sync.WaitGroup 51 to time.Time // the end of the next pull interval 52 running *atomic.Bool 53 err error 54 } 55 56 func NewTarget( 57 metrics *Metrics, 58 logger log.Logger, 59 handler api.EntryHandler, 60 position positions.Positions, 61 config *scrapeconfig.CloudflareConfig, 62 ) (*Target, error) { 63 if err := validateConfig(config); err != nil { 64 return nil, err 65 } 66 fields, err := Fields(FieldsType(config.FieldsType)) 67 if err != nil { 68 return nil, err 69 } 70 client, err := getClient(config.APIToken, config.ZoneID, fields) 71 if err != nil { 72 return nil, err 73 } 74 pos, err := position.Get(positions.CursorKey(config.ZoneID)) 75 if err != nil { 76 return nil, err 77 } 78 to := time.Now() 79 if pos != 0 { 80 to = time.Unix(0, pos) 81 } 82 ctx, cancel := context.WithCancel(context.Background()) 83 t := &Target{ 84 logger: logger, 85 handler: handler, 86 positions: position, 87 config: config, 88 metrics: metrics, 89 90 ctx: ctx, 91 cancel: cancel, 92 client: client, 93 to: to, 94 running: atomic.NewBool(false), 95 } 96 t.start() 97 return t, nil 98 } 99 100 func (t *Target) start() { 101 t.wg.Add(1) 102 t.running.Store(true) 103 go func() { 104 defer func() { 105 t.wg.Done() 106 t.running.Store(false) 107 }() 108 for t.ctx.Err() == nil { 109 end := t.to 110 maxEnd := time.Now().Add(-minDelay) 111 if end.After(maxEnd) { 112 end = maxEnd 113 } 114 start := end.Add(-time.Duration(t.config.PullRange)) 115 requests := splitRequests(start, end, t.config.Workers) 116 // Use background context for workers as we don't want to cancel half way through. 117 // In case of errors we stop the target, each worker has it's own retry logic. 118 if err := concurrency.ForEachJob(context.Background(), len(requests), t.config.Workers, func(ctx context.Context, idx int) error { 119 request := requests[idx] 120 return t.pull(ctx, request.start, request.end) 121 }); err != nil { 122 level.Error(t.logger).Log("msg", "failed to pull logs", "err", err, "start", start, "end", end) 123 t.err = err 124 return 125 } 126 127 // Sets current timestamp metrics, move to the next interval and saves the position. 128 t.metrics.LastEnd.Set(float64(end.UnixNano()) / 1e9) 129 t.to = end.Add(time.Duration(t.config.PullRange)) 130 t.positions.Put(positions.CursorKey(t.config.ZoneID), t.to.UnixNano()) 131 132 // If the next window can be fetched do it, if not sleep for a while. 133 // This is because Cloudflare logs should never be pulled between now-1m and now. 134 diff := t.to.Sub(time.Now().Add(-minDelay)) 135 if diff > 0 { 136 select { 137 case <-time.After(diff): 138 case <-t.ctx.Done(): 139 } 140 } 141 } 142 }() 143 } 144 145 // pull pulls logs from cloudflare for a given time range. 146 // It will retry on errors. 147 func (t *Target) pull(ctx context.Context, start, end time.Time) error { 148 var ( 149 backoff = backoff.New(ctx, defaultBackoff) 150 errs = multierror.New() 151 it cloudflare.LogpullReceivedIterator 152 err error 153 ) 154 155 for backoff.Ongoing() { 156 it, err = t.client.LogpullReceived(ctx, start, end) 157 if err != nil && cloudflareTooEarlyError.MatchString(err.Error()) { 158 level.Warn(t.logger).Log("msg", "failed iterating over logs, out of cloudflare range, not retrying", "err", err, "start", start, "end", end, "retries", backoff.NumRetries()) 159 return nil 160 } else if err != nil { 161 errs.Add(err) 162 backoff.Wait() 163 continue 164 } 165 if err := func() error { 166 defer it.Close() 167 var lineRead int64 168 for it.Next() { 169 line := it.Line() 170 ts, err := jsonparser.GetInt(line, "EdgeStartTimestamp") 171 if err != nil { 172 ts = time.Now().UnixNano() 173 } 174 t.handler.Chan() <- api.Entry{ 175 Labels: t.config.Labels.Clone(), 176 Entry: logproto.Entry{ 177 Timestamp: time.Unix(0, ts), 178 Line: string(line), 179 }, 180 } 181 lineRead++ 182 t.metrics.Entries.Inc() 183 } 184 if it.Err() != nil { 185 level.Warn(t.logger).Log("msg", "failed iterating over logs", "err", it.Err(), "start", start, "end", end, "retries", backoff.NumRetries(), "lineRead", lineRead) 186 return it.Err() 187 } 188 return nil 189 }(); err != nil { 190 errs.Add(err) 191 backoff.Wait() 192 continue 193 } 194 return nil 195 196 } 197 return errs.Err() 198 } 199 200 func (t *Target) Stop() { 201 t.cancel() 202 t.wg.Wait() 203 t.handler.Stop() 204 } 205 206 func (t *Target) Type() target.TargetType { 207 return target.CloudflareTargetType 208 } 209 210 func (t *Target) DiscoveredLabels() model.LabelSet { 211 return nil 212 } 213 214 func (t *Target) Labels() model.LabelSet { 215 return t.config.Labels 216 } 217 218 func (t *Target) Ready() bool { 219 return t.running.Load() 220 } 221 222 func (t *Target) Details() interface{} { 223 fields, _ := Fields(FieldsType(t.config.FieldsType)) 224 return map[string]string{ 225 "zone_id": t.config.ZoneID, 226 "error": t.err.Error(), 227 "position": t.positions.GetString(positions.CursorKey(t.config.ZoneID)), 228 "last_timestamp": t.to.String(), 229 "fields": strings.Join(fields, ","), 230 } 231 } 232 233 type pullRequest struct { 234 start time.Time 235 end time.Time 236 } 237 238 func splitRequests(start, end time.Time, workers int) []pullRequest { 239 perWorker := end.Sub(start) / time.Duration(workers) 240 var requests []pullRequest 241 for i := 0; i < workers; i++ { 242 r := pullRequest{ 243 start: start.Add(time.Duration(i) * perWorker), 244 end: start.Add(time.Duration(i+1) * perWorker), 245 } 246 // If the last worker is smaller than the others, we need to make sure it gets the last chunk. 247 if i == workers-1 && r.end != end { 248 r.end = end 249 } 250 requests = append(requests, r) 251 } 252 return requests 253 } 254 255 func validateConfig(cfg *scrapeconfig.CloudflareConfig) error { 256 if cfg.FieldsType == "" { 257 cfg.FieldsType = string(FieldsTypeDefault) 258 } 259 if cfg.APIToken == "" { 260 return errors.New("cloudflare api token is required") 261 } 262 if cfg.ZoneID == "" { 263 return errors.New("cloudflare zone id is required") 264 } 265 if cfg.PullRange == 0 { 266 cfg.PullRange = model.Duration(time.Minute) 267 } 268 if cfg.Workers == 0 { 269 cfg.Workers = 3 270 } 271 return nil 272 }