github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/ingester/stream.go (about) 1 package ingester 2 3 import ( 4 "bytes" 5 "context" 6 "fmt" 7 "net/http" 8 "sync" 9 "time" 10 11 "github.com/go-kit/log/level" 12 "github.com/pkg/errors" 13 "github.com/prometheus/common/model" 14 "github.com/prometheus/prometheus/model/labels" 15 "github.com/weaveworks/common/httpgrpc" 16 17 "github.com/grafana/loki/pkg/chunkenc" 18 "github.com/grafana/loki/pkg/iter" 19 "github.com/grafana/loki/pkg/logproto" 20 "github.com/grafana/loki/pkg/logql/log" 21 "github.com/grafana/loki/pkg/logqlmodel/stats" 22 "github.com/grafana/loki/pkg/util/flagext" 23 util_log "github.com/grafana/loki/pkg/util/log" 24 "github.com/grafana/loki/pkg/validation" 25 ) 26 27 var ErrEntriesExist = errors.New("duplicate push - entries already exist") 28 29 type line struct { 30 ts time.Time 31 content string 32 } 33 34 type stream struct { 35 limiter *StreamRateLimiter 36 cfg *Config 37 tenant string 38 // Newest chunk at chunks[n-1]. 39 // Not thread-safe; assume accesses to this are locked by caller. 40 chunks []chunkDesc 41 fp model.Fingerprint // possibly remapped fingerprint, used in the streams map 42 chunkMtx sync.RWMutex 43 44 labels labels.Labels 45 labelsString string 46 47 // most recently pushed line. This is used to prevent duplicate pushes. 48 // It also determines chunk synchronization when unordered writes are disabled. 49 lastLine line 50 51 // keeps track of the highest timestamp accepted by the stream. 52 // This is used when unordered writes are enabled to cap the validity window 53 // of accepted writes and for chunk synchronization. 54 highestTs time.Time 55 56 metrics *ingesterMetrics 57 58 tailers map[uint32]*tailer 59 tailerMtx sync.RWMutex 60 61 // entryCt is a counter which is incremented on each accepted entry. 62 // This allows us to discard WAL entries during replays which were 63 // already recovered via checkpoints. Historically out of order 64 // errors were used to detect this, but this counter has been 65 // introduced to facilitate removing the ordering constraint. 66 entryCt int64 67 68 unorderedWrites bool 69 } 70 71 type chunkDesc struct { 72 chunk *chunkenc.MemChunk 73 closed bool 74 synced bool 75 flushed time.Time 76 reason string 77 78 lastUpdated time.Time 79 } 80 81 type entryWithError struct { 82 entry *logproto.Entry 83 e error 84 } 85 86 func newStream(cfg *Config, limits RateLimiterStrategy, tenant string, fp model.Fingerprint, labels labels.Labels, unorderedWrites bool, metrics *ingesterMetrics) *stream { 87 return &stream{ 88 limiter: NewStreamRateLimiter(limits, tenant, 10*time.Second), 89 cfg: cfg, 90 fp: fp, 91 labels: labels, 92 labelsString: labels.String(), 93 tailers: map[uint32]*tailer{}, 94 metrics: metrics, 95 tenant: tenant, 96 unorderedWrites: unorderedWrites, 97 } 98 } 99 100 // consumeChunk manually adds a chunk to the stream that was received during 101 // ingester chunk transfer. 102 // Must hold chunkMtx 103 // DEPRECATED: chunk transfers are no longer suggested and remain for compatibility. 104 func (s *stream) consumeChunk(_ context.Context, chunk *logproto.Chunk) error { 105 c, err := chunkenc.NewByteChunk(chunk.Data, s.cfg.BlockSize, s.cfg.TargetChunkSize) 106 if err != nil { 107 return err 108 } 109 110 s.chunks = append(s.chunks, chunkDesc{ 111 chunk: c, 112 }) 113 s.metrics.chunksCreatedTotal.Inc() 114 return nil 115 } 116 117 // setChunks is used during checkpoint recovery 118 func (s *stream) setChunks(chunks []Chunk) (bytesAdded, entriesAdded int, err error) { 119 s.chunkMtx.Lock() 120 defer s.chunkMtx.Unlock() 121 chks, err := fromWireChunks(s.cfg, chunks) 122 if err != nil { 123 return 0, 0, err 124 } 125 s.chunks = chks 126 for _, c := range s.chunks { 127 entriesAdded += c.chunk.Size() 128 bytesAdded += c.chunk.UncompressedSize() 129 } 130 return bytesAdded, entriesAdded, nil 131 } 132 133 func (s *stream) NewChunk() *chunkenc.MemChunk { 134 return chunkenc.NewMemChunk(s.cfg.parsedEncoding, headBlockType(s.unorderedWrites), s.cfg.BlockSize, s.cfg.TargetChunkSize) 135 } 136 137 func (s *stream) Push( 138 ctx context.Context, 139 entries []logproto.Entry, 140 // WAL record to add push contents to. 141 // May be nil to disable this functionality. 142 record *WALRecord, 143 // Counter used in WAL replay to avoid duplicates. 144 // If this is non-zero, the stream will reject entries 145 // with a counter value less than or equal to it's own. 146 // It is set to zero and thus bypassed outside of WAL replays. 147 counter int64, 148 // Lock chunkMtx while pushing. 149 // If this is false, chunkMtx must be held outside Push. 150 lockChunk bool, 151 ) (int, error) { 152 if lockChunk { 153 s.chunkMtx.Lock() 154 defer s.chunkMtx.Unlock() 155 } 156 157 isReplay := counter > 0 158 if isReplay && counter <= s.entryCt { 159 var byteCt int 160 for _, e := range entries { 161 byteCt += len(e.Line) 162 } 163 164 s.metrics.walReplaySamplesDropped.WithLabelValues(duplicateReason).Add(float64(len(entries))) 165 s.metrics.walReplayBytesDropped.WithLabelValues(duplicateReason).Add(float64(byteCt)) 166 return 0, ErrEntriesExist 167 } 168 169 var bytesAdded int 170 prevNumChunks := len(s.chunks) 171 if prevNumChunks == 0 { 172 s.chunks = append(s.chunks, chunkDesc{ 173 chunk: s.NewChunk(), 174 }) 175 s.metrics.chunksCreatedTotal.Inc() 176 s.metrics.chunkCreatedStats.Inc(1) 177 } 178 179 var storedEntries []logproto.Entry 180 failedEntriesWithError := []entryWithError{} 181 182 var outOfOrderSamples, outOfOrderBytes int 183 var rateLimitedSamples, rateLimitedBytes int 184 defer func() { 185 if outOfOrderSamples > 0 { 186 name := validation.OutOfOrder 187 if s.unorderedWrites { 188 name = validation.TooFarBehind 189 } 190 validation.DiscardedSamples.WithLabelValues(name, s.tenant).Add(float64(outOfOrderSamples)) 191 validation.DiscardedBytes.WithLabelValues(name, s.tenant).Add(float64(outOfOrderBytes)) 192 } 193 if rateLimitedSamples > 0 { 194 validation.DiscardedSamples.WithLabelValues(validation.StreamRateLimit, s.tenant).Add(float64(rateLimitedSamples)) 195 validation.DiscardedBytes.WithLabelValues(validation.StreamRateLimit, s.tenant).Add(float64(rateLimitedBytes)) 196 } 197 }() 198 199 // This call uses a mutex under the hood, cache the result since we're checking the limit 200 // on each entry in the push (hot path) and we only use this value when logging entries 201 // over the rate limit. 202 limit := s.limiter.lim.Limit() 203 204 // Don't fail on the first append error - if samples are sent out of order, 205 // we still want to append the later ones. 206 for i := range entries { 207 // If this entry matches our last appended line's timestamp and contents, 208 // ignore it. 209 // 210 // This check is done at the stream level so it persists across cut and 211 // flushed chunks. 212 // 213 // NOTE: it's still possible for duplicates to be appended if a stream is 214 // deleted from inactivity. 215 if entries[i].Timestamp.Equal(s.lastLine.ts) && entries[i].Line == s.lastLine.content { 216 continue 217 } 218 219 chunk := &s.chunks[len(s.chunks)-1] 220 if chunk.closed || !chunk.chunk.SpaceFor(&entries[i]) || s.cutChunkForSynchronization(entries[i].Timestamp, s.highestTs, chunk, s.cfg.SyncPeriod, s.cfg.SyncMinUtilization) { 221 chunk = s.cutChunk(ctx) 222 } 223 // Check if this this should be rate limited. 224 now := time.Now() 225 if !s.limiter.AllowN(now, len(entries[i].Line)) { 226 failedEntriesWithError = append(failedEntriesWithError, entryWithError{&entries[i], &validation.ErrStreamRateLimit{RateLimit: flagext.ByteSize(limit), Labels: s.labelsString, Bytes: flagext.ByteSize(len(entries[i].Line))}}) 227 rateLimitedSamples++ 228 rateLimitedBytes += len(entries[i].Line) 229 continue 230 } 231 232 // The validity window for unordered writes is the highest timestamp present minus 1/2 * max-chunk-age. 233 cutoff := s.highestTs.Add(-s.cfg.MaxChunkAge / 2) 234 if !isReplay && s.unorderedWrites && !s.highestTs.IsZero() && cutoff.After(entries[i].Timestamp) { 235 failedEntriesWithError = append(failedEntriesWithError, entryWithError{&entries[i], chunkenc.ErrTooFarBehind(cutoff)}) 236 outOfOrderSamples++ 237 outOfOrderBytes += len(entries[i].Line) 238 } else if err := chunk.chunk.Append(&entries[i]); err != nil { 239 failedEntriesWithError = append(failedEntriesWithError, entryWithError{&entries[i], err}) 240 if chunkenc.IsOutOfOrderErr(err) { 241 outOfOrderSamples++ 242 outOfOrderBytes += len(entries[i].Line) 243 } 244 } else { 245 storedEntries = append(storedEntries, entries[i]) 246 s.lastLine.ts = entries[i].Timestamp 247 s.lastLine.content = entries[i].Line 248 if s.highestTs.Before(entries[i].Timestamp) { 249 s.highestTs = entries[i].Timestamp 250 } 251 s.entryCt++ 252 253 // length of string plus 254 bytesAdded += len(entries[i].Line) 255 } 256 chunk.lastUpdated = time.Now() 257 } 258 259 if len(storedEntries) != 0 { 260 // record will be nil when replaying the wal (we don't want to rewrite wal entries as we replay them). 261 if record != nil { 262 record.AddEntries(uint64(s.fp), s.entryCt, storedEntries...) 263 } else { 264 // If record is nil, this is a WAL recovery. 265 s.metrics.recoveredEntriesTotal.Add(float64(len(storedEntries))) 266 } 267 268 s.tailerMtx.RLock() 269 hasTailers := len(s.tailers) != 0 270 s.tailerMtx.RUnlock() 271 if hasTailers { 272 go func() { 273 stream := logproto.Stream{Labels: s.labelsString, Entries: storedEntries} 274 275 closedTailers := []uint32{} 276 277 s.tailerMtx.RLock() 278 for _, tailer := range s.tailers { 279 if tailer.isClosed() { 280 closedTailers = append(closedTailers, tailer.getID()) 281 continue 282 } 283 tailer.send(stream, s.labels) 284 } 285 s.tailerMtx.RUnlock() 286 287 if len(closedTailers) != 0 { 288 s.tailerMtx.Lock() 289 defer s.tailerMtx.Unlock() 290 291 for _, closedTailerID := range closedTailers { 292 delete(s.tailers, closedTailerID) 293 } 294 } 295 }() 296 } 297 } 298 299 if len(s.chunks) != prevNumChunks { 300 s.metrics.memoryChunks.Add(float64(len(s.chunks) - prevNumChunks)) 301 } 302 303 if len(failedEntriesWithError) > 0 { 304 lastEntryWithErr := failedEntriesWithError[len(failedEntriesWithError)-1] 305 _, ok := lastEntryWithErr.e.(*validation.ErrStreamRateLimit) 306 outOfOrder := chunkenc.IsOutOfOrderErr(lastEntryWithErr.e) 307 if !outOfOrder && !ok { 308 return bytesAdded, lastEntryWithErr.e 309 } 310 var statusCode int 311 if outOfOrder { 312 statusCode = http.StatusBadRequest 313 } 314 if ok { 315 statusCode = http.StatusTooManyRequests 316 } 317 // Return a http status 4xx request response with all failed entries. 318 buf := bytes.Buffer{} 319 streamName := s.labelsString 320 321 limitedFailedEntries := failedEntriesWithError 322 if maxIgnore := s.cfg.MaxReturnedErrors; maxIgnore > 0 && len(limitedFailedEntries) > maxIgnore { 323 limitedFailedEntries = limitedFailedEntries[:maxIgnore] 324 } 325 326 for _, entryWithError := range limitedFailedEntries { 327 fmt.Fprintf(&buf, 328 "entry with timestamp %s ignored, reason: '%s' for stream: %s,\n", 329 entryWithError.entry.Timestamp.String(), entryWithError.e.Error(), streamName) 330 } 331 332 fmt.Fprintf(&buf, "total ignored: %d out of %d", len(failedEntriesWithError), len(entries)) 333 334 return bytesAdded, httpgrpc.Errorf(statusCode, buf.String()) 335 } 336 337 return bytesAdded, nil 338 } 339 340 func (s *stream) cutChunk(ctx context.Context) *chunkDesc { 341 // If the chunk has no more space call Close to make sure anything in the head block is cut and compressed 342 chunk := &s.chunks[len(s.chunks)-1] 343 err := chunk.chunk.Close() 344 if err != nil { 345 // This should be an unlikely situation, returning an error up the stack doesn't help much here 346 // so instead log this to help debug the issue if it ever arises. 347 level.Error(util_log.WithContext(ctx, util_log.Logger)).Log("msg", "failed to Close chunk", "err", err) 348 } 349 chunk.closed = true 350 351 s.metrics.samplesPerChunk.Observe(float64(chunk.chunk.Size())) 352 s.metrics.blocksPerChunk.Observe(float64(chunk.chunk.BlockCount())) 353 s.metrics.chunksCreatedTotal.Inc() 354 s.metrics.chunkCreatedStats.Inc(1) 355 356 s.chunks = append(s.chunks, chunkDesc{ 357 chunk: s.NewChunk(), 358 }) 359 return &s.chunks[len(s.chunks)-1] 360 } 361 362 // Returns true, if chunk should be cut before adding new entry. This is done to make ingesters 363 // cut the chunk for this stream at the same moment, so that new chunk will contain exactly the same entries. 364 func (s *stream) cutChunkForSynchronization(entryTimestamp, latestTs time.Time, c *chunkDesc, synchronizePeriod time.Duration, minUtilization float64) bool { 365 // Never sync when it's not enabled, it's the first push, or if a write isn't the latest ts 366 // to prevent syncing many unordered writes. 367 if synchronizePeriod <= 0 || latestTs.IsZero() || latestTs.After(entryTimestamp) { 368 return false 369 } 370 371 // we use fingerprint as a jitter here, basically offsetting stream synchronization points to different 372 // this breaks if streams are mapped to different fingerprints on different ingesters, which is too bad. 373 cts := (uint64(entryTimestamp.UnixNano()) + uint64(s.fp)) % uint64(synchronizePeriod.Nanoseconds()) 374 pts := (uint64(latestTs.UnixNano()) + uint64(s.fp)) % uint64(synchronizePeriod.Nanoseconds()) 375 376 // if current entry timestamp has rolled over synchronization period 377 if cts < pts { 378 if minUtilization <= 0 { 379 c.synced = true 380 return true 381 } 382 383 if c.chunk.Utilization() > minUtilization { 384 c.synced = true 385 return true 386 } 387 } 388 389 return false 390 } 391 392 func (s *stream) Bounds() (from, to time.Time) { 393 s.chunkMtx.RLock() 394 defer s.chunkMtx.RUnlock() 395 if len(s.chunks) > 0 { 396 from, _ = s.chunks[0].chunk.Bounds() 397 _, to = s.chunks[len(s.chunks)-1].chunk.Bounds() 398 } 399 return from, to 400 } 401 402 // Returns an iterator. 403 func (s *stream) Iterator(ctx context.Context, statsCtx *stats.Context, from, through time.Time, direction logproto.Direction, pipeline log.StreamPipeline) (iter.EntryIterator, error) { 404 s.chunkMtx.RLock() 405 defer s.chunkMtx.RUnlock() 406 iterators := make([]iter.EntryIterator, 0, len(s.chunks)) 407 408 var lastMax time.Time 409 ordered := true 410 411 for _, c := range s.chunks { 412 mint, maxt := c.chunk.Bounds() 413 414 // skip this chunk 415 if through.Before(mint) || maxt.Before(from) { 416 continue 417 } 418 419 if mint.Before(lastMax) { 420 ordered = false 421 } 422 lastMax = maxt 423 424 itr, err := c.chunk.Iterator(ctx, from, through, direction, pipeline) 425 if err != nil { 426 return nil, err 427 } 428 if itr != nil { 429 iterators = append(iterators, itr) 430 } 431 } 432 433 if direction != logproto.FORWARD { 434 for left, right := 0, len(iterators)-1; left < right; left, right = left+1, right-1 { 435 iterators[left], iterators[right] = iterators[right], iterators[left] 436 } 437 } 438 439 if statsCtx != nil { 440 statsCtx.AddIngesterTotalChunkMatched(int64(len(iterators))) 441 } 442 443 if ordered { 444 return iter.NewNonOverlappingIterator(iterators), nil 445 } 446 return iter.NewSortEntryIterator(iterators, direction), nil 447 } 448 449 // Returns an SampleIterator. 450 func (s *stream) SampleIterator(ctx context.Context, statsCtx *stats.Context, from, through time.Time, extractor log.StreamSampleExtractor) (iter.SampleIterator, error) { 451 s.chunkMtx.RLock() 452 defer s.chunkMtx.RUnlock() 453 iterators := make([]iter.SampleIterator, 0, len(s.chunks)) 454 455 var lastMax time.Time 456 ordered := true 457 458 for _, c := range s.chunks { 459 mint, maxt := c.chunk.Bounds() 460 461 // skip this chunk 462 if through.Before(mint) || maxt.Before(from) { 463 continue 464 } 465 466 if mint.Before(lastMax) { 467 ordered = false 468 } 469 lastMax = maxt 470 471 if itr := c.chunk.SampleIterator(ctx, from, through, extractor); itr != nil { 472 iterators = append(iterators, itr) 473 } 474 } 475 476 if statsCtx != nil { 477 statsCtx.AddIngesterTotalChunkMatched(int64(len(iterators))) 478 } 479 480 if ordered { 481 return iter.NewNonOverlappingSampleIterator(iterators), nil 482 } 483 return iter.NewSortSampleIterator(iterators), nil 484 } 485 486 func (s *stream) addTailer(t *tailer) { 487 s.tailerMtx.Lock() 488 defer s.tailerMtx.Unlock() 489 490 s.tailers[t.getID()] = t 491 } 492 493 func (s *stream) resetCounter() { 494 s.entryCt = 0 495 } 496 497 func headBlockType(unorderedWrites bool) chunkenc.HeadBlockFmt { 498 if unorderedWrites { 499 return chunkenc.UnorderedHeadBlockFmt 500 } 501 return chunkenc.OrderedHeadBlockFmt 502 }