github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/ingester/flush.go (about) 1 package ingester 2 3 import ( 4 "bytes" 5 "fmt" 6 "net/http" 7 "sync" 8 "time" 9 10 "github.com/go-kit/log/level" 11 "github.com/prometheus/client_golang/prometheus" 12 "github.com/prometheus/common/model" 13 "github.com/prometheus/prometheus/model/labels" 14 "github.com/weaveworks/common/user" 15 "golang.org/x/net/context" 16 17 "github.com/grafana/dskit/tenant" 18 19 "github.com/grafana/loki/pkg/chunkenc" 20 "github.com/grafana/loki/pkg/storage/chunk" 21 "github.com/grafana/loki/pkg/util" 22 loki_util "github.com/grafana/loki/pkg/util" 23 util_log "github.com/grafana/loki/pkg/util/log" 24 ) 25 26 const ( 27 // Backoff for retrying 'immediate' flushes. Only counts for queue 28 // position, not wallclock time. 29 flushBackoff = 1 * time.Second 30 31 nameLabel = "__name__" 32 logsValue = "logs" 33 34 flushReasonIdle = "idle" 35 flushReasonMaxAge = "max_age" 36 flushReasonForced = "forced" 37 flushReasonFull = "full" 38 flushReasonSynced = "synced" 39 ) 40 41 // Note: this is called both during the WAL replay (zero or more times) 42 // and then after replay as well. 43 func (i *Ingester) InitFlushQueues() { 44 i.flushQueuesDone.Add(i.cfg.ConcurrentFlushes) 45 for j := 0; j < i.cfg.ConcurrentFlushes; j++ { 46 i.flushQueues[j] = util.NewPriorityQueue(flushQueueLength) 47 go i.flushLoop(j) 48 } 49 } 50 51 // Flush triggers a flush of all the chunks and closes the flush queues. 52 // Called from the Lifecycler as part of the ingester shutdown. 53 func (i *Ingester) Flush() { 54 i.flush(true) 55 } 56 57 func (i *Ingester) flush(mayRemoveStreams bool) { 58 i.sweepUsers(true, mayRemoveStreams) 59 60 // Close the flush queues, to unblock waiting workers. 61 for _, flushQueue := range i.flushQueues { 62 flushQueue.Close() 63 } 64 65 i.flushQueuesDone.Wait() 66 level.Debug(util_log.Logger).Log("msg", "flush queues have drained") 67 } 68 69 // FlushHandler triggers a flush of all in memory chunks. Mainly used for 70 // local testing. 71 func (i *Ingester) FlushHandler(w http.ResponseWriter, _ *http.Request) { 72 i.sweepUsers(true, true) 73 w.WriteHeader(http.StatusNoContent) 74 } 75 76 type flushOp struct { 77 from model.Time 78 userID string 79 fp model.Fingerprint 80 immediate bool 81 } 82 83 func (o *flushOp) Key() string { 84 return fmt.Sprintf("%s-%s-%v", o.userID, o.fp, o.immediate) 85 } 86 87 func (o *flushOp) Priority() int64 { 88 return -int64(o.from) 89 } 90 91 // sweepUsers periodically schedules series for flushing and garbage collects users with no series 92 func (i *Ingester) sweepUsers(immediate, mayRemoveStreams bool) { 93 instances := i.getInstances() 94 95 for _, instance := range instances { 96 i.sweepInstance(instance, immediate, mayRemoveStreams) 97 } 98 } 99 100 func (i *Ingester) sweepInstance(instance *instance, immediate, mayRemoveStreams bool) { 101 _ = instance.streams.ForEach(func(s *stream) (bool, error) { 102 i.sweepStream(instance, s, immediate) 103 i.removeFlushedChunks(instance, s, mayRemoveStreams) 104 return true, nil 105 }) 106 } 107 108 func (i *Ingester) sweepStream(instance *instance, stream *stream, immediate bool) { 109 stream.chunkMtx.RLock() 110 defer stream.chunkMtx.RUnlock() 111 if len(stream.chunks) == 0 { 112 return 113 } 114 115 lastChunk := stream.chunks[len(stream.chunks)-1] 116 shouldFlush, _ := i.shouldFlushChunk(&lastChunk) 117 if len(stream.chunks) == 1 && !immediate && !shouldFlush { 118 return 119 } 120 121 flushQueueIndex := int(uint64(stream.fp) % uint64(i.cfg.ConcurrentFlushes)) 122 firstTime, _ := stream.chunks[0].chunk.Bounds() 123 i.flushQueues[flushQueueIndex].Enqueue(&flushOp{ 124 model.TimeFromUnixNano(firstTime.UnixNano()), instance.instanceID, 125 stream.fp, immediate, 126 }) 127 } 128 129 func (i *Ingester) flushLoop(j int) { 130 defer func() { 131 level.Debug(util_log.Logger).Log("msg", "Ingester.flushLoop() exited") 132 i.flushQueuesDone.Done() 133 }() 134 135 for { 136 o := i.flushQueues[j].Dequeue() 137 if o == nil { 138 return 139 } 140 op := o.(*flushOp) 141 142 err := i.flushUserSeries(op.userID, op.fp, op.immediate) 143 if err != nil { 144 level.Error(util_log.WithUserID(op.userID, util_log.Logger)).Log("msg", "failed to flush", "err", err) 145 } 146 147 // If we're exiting & we failed to flush, put the failed operation 148 // back in the queue at a later point. 149 if op.immediate && err != nil { 150 op.from = op.from.Add(flushBackoff) 151 i.flushQueues[j].Enqueue(op) 152 } 153 } 154 } 155 156 func (i *Ingester) flushUserSeries(userID string, fp model.Fingerprint, immediate bool) error { 157 instance, ok := i.getInstanceByID(userID) 158 if !ok { 159 return nil 160 } 161 162 chunks, labels, chunkMtx := i.collectChunksToFlush(instance, fp, immediate) 163 if len(chunks) < 1 { 164 return nil 165 } 166 167 lbs := labels.String() 168 level.Info(util_log.Logger).Log("msg", "flushing stream", "user", userID, "fp", fp, "immediate", immediate, "num_chunks", len(chunks), "labels", lbs) 169 170 ctx := user.InjectOrgID(context.Background(), userID) 171 ctx, cancel := context.WithTimeout(ctx, i.cfg.FlushOpTimeout) 172 defer cancel() 173 err := i.flushChunks(ctx, fp, labels, chunks, chunkMtx) 174 if err != nil { 175 return fmt.Errorf("failed to flush chunks: %w, num_chunks: %d, labels: %s", err, len(chunks), lbs) 176 } 177 178 return nil 179 } 180 181 func (i *Ingester) collectChunksToFlush(instance *instance, fp model.Fingerprint, immediate bool) ([]*chunkDesc, labels.Labels, *sync.RWMutex) { 182 var stream *stream 183 var ok bool 184 stream, ok = instance.streams.LoadByFP(fp) 185 186 if !ok { 187 return nil, nil, nil 188 } 189 190 stream.chunkMtx.Lock() 191 defer stream.chunkMtx.Unlock() 192 193 var result []*chunkDesc 194 for j := range stream.chunks { 195 shouldFlush, reason := i.shouldFlushChunk(&stream.chunks[j]) 196 if immediate || shouldFlush { 197 // Ensure no more writes happen to this chunk. 198 if !stream.chunks[j].closed { 199 stream.chunks[j].closed = true 200 } 201 // Flush this chunk if it hasn't already been successfully flushed. 202 if stream.chunks[j].flushed.IsZero() { 203 if immediate { 204 reason = flushReasonForced 205 } 206 stream.chunks[j].reason = reason 207 208 result = append(result, &stream.chunks[j]) 209 } 210 } 211 } 212 return result, stream.labels, &stream.chunkMtx 213 } 214 215 func (i *Ingester) shouldFlushChunk(chunk *chunkDesc) (bool, string) { 216 // Append should close the chunk when the a new one is added. 217 if chunk.closed { 218 if chunk.synced { 219 return true, flushReasonSynced 220 } 221 return true, flushReasonFull 222 } 223 224 if time.Since(chunk.lastUpdated) > i.cfg.MaxChunkIdle { 225 return true, flushReasonIdle 226 } 227 228 if from, to := chunk.chunk.Bounds(); to.Sub(from) > i.cfg.MaxChunkAge { 229 return true, flushReasonMaxAge 230 } 231 232 return false, "" 233 } 234 235 func (i *Ingester) removeFlushedChunks(instance *instance, stream *stream, mayRemoveStream bool) { 236 now := time.Now() 237 238 stream.chunkMtx.Lock() 239 defer stream.chunkMtx.Unlock() 240 prevNumChunks := len(stream.chunks) 241 var subtracted int 242 for len(stream.chunks) > 0 { 243 if stream.chunks[0].flushed.IsZero() || now.Sub(stream.chunks[0].flushed) < i.cfg.RetainPeriod { 244 break 245 } 246 247 subtracted += stream.chunks[0].chunk.UncompressedSize() 248 stream.chunks[0].chunk = nil // erase reference so the chunk can be garbage-collected 249 stream.chunks = stream.chunks[1:] 250 } 251 i.metrics.memoryChunks.Sub(float64(prevNumChunks - len(stream.chunks))) 252 253 // Signal how much data has been flushed to lessen any WAL replay pressure. 254 i.replayController.Sub(int64(subtracted)) 255 256 if mayRemoveStream && len(stream.chunks) == 0 { 257 // Unlock first, then lock inside streams' lock to prevent deadlock 258 stream.chunkMtx.Unlock() 259 // Only lock streamsMap when it's needed to remove a stream 260 instance.streams.WithLock(func() { 261 stream.chunkMtx.Lock() 262 // Double check length 263 if len(stream.chunks) == 0 { 264 instance.removeStream(stream) 265 } 266 }) 267 } 268 } 269 270 // flushChunks iterates over given chunkDescs, derives chunk.Chunk from them and flush them to the store, one at a time. 271 // 272 // If a chunk fails to be flushed, this operation is reinserted in the queue. Since previously flushed chunks 273 // are marked as flushed, they shouldn't be flushed again. 274 // It has to close given chunks to have have the head block included. 275 func (i *Ingester) flushChunks(ctx context.Context, fp model.Fingerprint, labelPairs labels.Labels, cs []*chunkDesc, chunkMtx sync.Locker) error { 276 userID, err := tenant.TenantID(ctx) 277 if err != nil { 278 return err 279 } 280 281 // NB(owen-d): No longer needed in TSDB (and is removed in that code path) 282 // It's required by historical index stores so we keep it for now. 283 labelsBuilder := labels.NewBuilder(labelPairs) 284 labelsBuilder.Set(nameLabel, logsValue) 285 metric := labelsBuilder.Labels() 286 287 sizePerTenant := i.metrics.chunkSizePerTenant.WithLabelValues(userID) 288 countPerTenant := i.metrics.chunksPerTenant.WithLabelValues(userID) 289 290 for j, c := range cs { 291 if err := i.closeChunk(c, chunkMtx); err != nil { 292 return fmt.Errorf("chunk close for flushing: %w", err) 293 } 294 295 firstTime, lastTime := loki_util.RoundToMilliseconds(c.chunk.Bounds()) 296 ch := chunk.NewChunk( 297 userID, fp, metric, 298 chunkenc.NewFacade(c.chunk, i.cfg.BlockSize, i.cfg.TargetChunkSize), 299 firstTime, 300 lastTime, 301 ) 302 303 // encodeChunk mutates the chunk so we must pass by reference 304 if err := i.encodeChunk(ctx, &ch, c); err != nil { 305 return err 306 } 307 308 if err := i.flushChunk(ctx, &ch); err != nil { 309 return err 310 } 311 312 i.markChunkAsFlushed(cs[j], chunkMtx) 313 314 reason := func() string { 315 chunkMtx.Lock() 316 defer chunkMtx.Unlock() 317 318 return c.reason 319 }() 320 321 i.reportFlushedChunkStatistics(&ch, c, sizePerTenant, countPerTenant, reason) 322 } 323 324 return nil 325 } 326 327 // markChunkAsFlushed mark a chunk to make sure it won't be flushed if this operation fails. 328 func (i *Ingester) markChunkAsFlushed(desc *chunkDesc, chunkMtx sync.Locker) { 329 chunkMtx.Lock() 330 defer chunkMtx.Unlock() 331 desc.flushed = time.Now() 332 } 333 334 // closeChunk closes the given chunk while locking it to ensure that new blocks are cut before flushing. 335 // 336 // If the chunk isn't closed, data in the head block isn't included. 337 func (i *Ingester) closeChunk(desc *chunkDesc, chunkMtx sync.Locker) error { 338 chunkMtx.Lock() 339 defer chunkMtx.Unlock() 340 341 return desc.chunk.Close() 342 } 343 344 // encodeChunk encodes a chunk.Chunk based on the given chunkDesc. 345 // 346 // If the encoding is unsuccessful the flush operation is reinserted in the queue which will cause 347 // the encoding for a given chunk to be evaluated again. 348 func (i *Ingester) encodeChunk(ctx context.Context, ch *chunk.Chunk, desc *chunkDesc) error { 349 if err := ctx.Err(); err != nil { 350 return err 351 } 352 start := time.Now() 353 chunkBytesSize := desc.chunk.BytesSize() + 4*1024 // size + 4kB should be enough room for cortex header 354 if err := ch.EncodeTo(bytes.NewBuffer(make([]byte, 0, chunkBytesSize))); err != nil { 355 return fmt.Errorf("chunk encoding: %w", err) 356 } 357 i.metrics.chunkEncodeTime.Observe(time.Since(start).Seconds()) 358 return nil 359 } 360 361 // flushChunk flushes the given chunk to the store. 362 // 363 // If the flush is successful, metrics for this flush are to be reported. 364 // If the flush isn't successful, the operation for this userID is requeued allowing this and all other unflushed 365 // chunk to have another opportunity to be flushed. 366 func (i *Ingester) flushChunk(ctx context.Context, ch *chunk.Chunk) error { 367 if err := i.store.Put(ctx, []chunk.Chunk{*ch}); err != nil { 368 return fmt.Errorf("store put chunk: %w", err) 369 } 370 i.metrics.flushedChunksStats.Inc(1) 371 return nil 372 } 373 374 // reportFlushedChunkStatistics calculate overall statistics of flushed chunks without compromising the flush process. 375 func (i *Ingester) reportFlushedChunkStatistics(ch *chunk.Chunk, desc *chunkDesc, sizePerTenant prometheus.Counter, countPerTenant prometheus.Counter, reason string) { 376 byt, err := ch.Encoded() 377 if err != nil { 378 level.Error(util_log.Logger).Log("msg", "failed to encode flushed wire chunk", "err", err) 379 return 380 } 381 382 i.metrics.chunksFlushedPerReason.WithLabelValues(reason).Add(1) 383 384 compressedSize := float64(len(byt)) 385 uncompressedSize, ok := chunkenc.UncompressedSize(ch.Data) 386 387 if ok && compressedSize > 0 { 388 i.metrics.chunkCompressionRatio.Observe(float64(uncompressedSize) / compressedSize) 389 } 390 391 utilization := ch.Data.Utilization() 392 i.metrics.chunkUtilization.Observe(utilization) 393 numEntries := desc.chunk.Size() 394 i.metrics.chunkEntries.Observe(float64(numEntries)) 395 i.metrics.chunkSize.Observe(compressedSize) 396 sizePerTenant.Add(compressedSize) 397 countPerTenant.Inc() 398 399 boundsFrom, boundsTo := desc.chunk.Bounds() 400 i.metrics.chunkAge.Observe(time.Since(boundsFrom).Seconds()) 401 i.metrics.chunkLifespan.Observe(boundsTo.Sub(boundsFrom).Hours()) 402 403 i.metrics.flushedChunksBytesStats.Record(compressedSize) 404 i.metrics.flushedChunksLinesStats.Record(float64(numEntries)) 405 i.metrics.flushedChunksUtilizationStats.Record(utilization) 406 i.metrics.flushedChunksAgeStats.Record(time.Since(boundsFrom).Seconds()) 407 i.metrics.flushedChunksLifespanStats.Record(boundsTo.Sub(boundsFrom).Hours()) 408 }