github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/iter/sample_iterator.go (about) 1 package iter 2 3 import ( 4 "container/heap" 5 "context" 6 "io" 7 "sync" 8 9 "github.com/grafana/loki/pkg/logproto" 10 "github.com/grafana/loki/pkg/logqlmodel/stats" 11 "github.com/grafana/loki/pkg/util" 12 ) 13 14 // SampleIterator iterates over samples in time-order. 15 type SampleIterator interface { 16 Iterator 17 // todo(ctovena) we should add `Seek(t int64) bool` 18 // This way we can skip when ranging over samples. 19 Sample() logproto.Sample 20 } 21 22 // PeekingSampleIterator is a sample iterator that can peek sample without moving the current sample. 23 type PeekingSampleIterator interface { 24 SampleIterator 25 Peek() (string, logproto.Sample, bool) 26 } 27 28 type peekingSampleIterator struct { 29 iter SampleIterator 30 31 cache *sampleWithLabels 32 next *sampleWithLabels 33 } 34 35 type sampleWithLabels struct { 36 logproto.Sample 37 labels string 38 streamHash uint64 39 } 40 41 func NewPeekingSampleIterator(iter SampleIterator) PeekingSampleIterator { 42 // initialize the next entry so we can peek right from the start. 43 var cache *sampleWithLabels 44 next := &sampleWithLabels{} 45 if iter.Next() { 46 cache = &sampleWithLabels{ 47 Sample: iter.Sample(), 48 labels: iter.Labels(), 49 streamHash: iter.StreamHash(), 50 } 51 next.Sample = cache.Sample 52 next.labels = cache.labels 53 } 54 return &peekingSampleIterator{ 55 iter: iter, 56 cache: cache, 57 next: next, 58 } 59 } 60 61 func (it *peekingSampleIterator) Close() error { 62 return it.iter.Close() 63 } 64 65 func (it *peekingSampleIterator) Labels() string { 66 if it.next != nil { 67 return it.next.labels 68 } 69 return "" 70 } 71 72 func (it *peekingSampleIterator) StreamHash() uint64 { 73 if it.next != nil { 74 return it.next.streamHash 75 } 76 return 0 77 } 78 79 func (it *peekingSampleIterator) Next() bool { 80 if it.cache != nil { 81 it.next.Sample = it.cache.Sample 82 it.next.labels = it.cache.labels 83 it.next.streamHash = it.cache.streamHash 84 it.cacheNext() 85 return true 86 } 87 return false 88 } 89 90 // cacheNext caches the next element if it exists. 91 func (it *peekingSampleIterator) cacheNext() { 92 if it.iter.Next() { 93 it.cache.Sample = it.iter.Sample() 94 it.cache.labels = it.iter.Labels() 95 it.cache.streamHash = it.iter.StreamHash() 96 return 97 } 98 // nothing left removes the cached entry 99 it.cache = nil 100 } 101 102 func (it *peekingSampleIterator) Sample() logproto.Sample { 103 if it.next != nil { 104 return it.next.Sample 105 } 106 return logproto.Sample{} 107 } 108 109 func (it *peekingSampleIterator) Peek() (string, logproto.Sample, bool) { 110 if it.cache != nil { 111 return it.cache.labels, it.cache.Sample, true 112 } 113 return "", logproto.Sample{}, false 114 } 115 116 func (it *peekingSampleIterator) Error() error { 117 return it.iter.Error() 118 } 119 120 type sampleIteratorHeap struct { 121 its []SampleIterator 122 } 123 124 func (h sampleIteratorHeap) Len() int { return len(h.its) } 125 func (h sampleIteratorHeap) Swap(i, j int) { h.its[i], h.its[j] = h.its[j], h.its[i] } 126 func (h sampleIteratorHeap) Peek() SampleIterator { return h.its[0] } 127 func (h *sampleIteratorHeap) Push(x interface{}) { 128 h.its = append(h.its, x.(SampleIterator)) 129 } 130 131 func (h *sampleIteratorHeap) Pop() interface{} { 132 n := len(h.its) 133 x := h.its[n-1] 134 h.its = h.its[0 : n-1] 135 return x 136 } 137 138 func (h sampleIteratorHeap) Less(i, j int) bool { 139 s1, s2 := h.its[i].Sample(), h.its[j].Sample() 140 if s1.Timestamp == s2.Timestamp { 141 if h.its[i].StreamHash() == 0 { 142 return h.its[i].Labels() < h.its[j].Labels() 143 } 144 return h.its[i].StreamHash() < h.its[j].StreamHash() 145 } 146 return s1.Timestamp < s2.Timestamp 147 } 148 149 // mergeSampleIterator iterates over a heap of iterators by merging samples. 150 type mergeSampleIterator struct { 151 heap *sampleIteratorHeap 152 is []SampleIterator 153 prefetched bool 154 stats *stats.Context 155 // pushBuffer contains the list of iterators that needs to be pushed to the heap 156 // This is to avoid allocations. 157 pushBuffer []SampleIterator 158 159 // buffer of entries to be returned by Next() 160 // We buffer entries with the same timestamp to correctly dedupe them. 161 buffer []sampleWithLabels 162 curr sampleWithLabels 163 errs []error 164 } 165 166 // NewMergeSampleIterator returns a new iterator which uses a heap to merge together samples for multiple iterators and deduplicate if any. 167 // The iterator only order and merge entries across given `is` iterators, it does not merge entries within individual iterator. 168 // This means using this iterator with a single iterator will result in the same result as the input iterator. 169 // If you don't need to deduplicate sample, use `NewSortSampleIterator` instead. 170 func NewMergeSampleIterator(ctx context.Context, is []SampleIterator) SampleIterator { 171 h := sampleIteratorHeap{ 172 its: make([]SampleIterator, 0, len(is)), 173 } 174 return &mergeSampleIterator{ 175 stats: stats.FromContext(ctx), 176 is: is, 177 heap: &h, 178 buffer: make([]sampleWithLabels, 0, len(is)), 179 pushBuffer: make([]SampleIterator, 0, len(is)), 180 } 181 } 182 183 // prefetch iterates over all inner iterators to merge together, calls Next() on 184 // each of them to prefetch the first entry and pushes of them - who are not 185 // empty - to the heap 186 func (i *mergeSampleIterator) prefetch() { 187 if i.prefetched { 188 return 189 } 190 191 i.prefetched = true 192 for _, it := range i.is { 193 i.requeue(it, false) 194 } 195 196 // We can now clear the list of input iterators to merge, given they have all 197 // been processed and the non empty ones have been pushed to the heap 198 i.is = nil 199 } 200 201 // requeue pushes the input ei EntryIterator to the heap, advancing it via an ei.Next() 202 // call unless the advanced input parameter is true. In this latter case it expects that 203 // the iterator has already been advanced before calling requeue(). 204 // 205 // If the iterator has no more entries or an error occur while advancing it, the iterator 206 // is not pushed to the heap and any possible error captured, so that can be get via Error(). 207 func (i *mergeSampleIterator) requeue(ei SampleIterator, advanced bool) { 208 if advanced || ei.Next() { 209 heap.Push(i.heap, ei) 210 return 211 } 212 213 if err := ei.Error(); err != nil { 214 i.errs = append(i.errs, err) 215 } 216 util.LogError("closing iterator", ei.Close) 217 } 218 219 func (i *mergeSampleIterator) Next() bool { 220 i.prefetch() 221 222 if len(i.buffer) != 0 { 223 i.nextFromBuffer() 224 return true 225 } 226 227 if i.heap.Len() == 0 { 228 return false 229 } 230 231 // shortcut for the last iterator. 232 if i.heap.Len() == 1 { 233 i.curr.Sample = i.heap.Peek().Sample() 234 i.curr.labels = i.heap.Peek().Labels() 235 i.curr.streamHash = i.heap.Peek().StreamHash() 236 if !i.heap.Peek().Next() { 237 i.heap.Pop() 238 } 239 return true 240 } 241 242 // We support multiple entries with the same timestamp, and we want to 243 // preserve their original order. We look at all the top entries in the 244 // heap with the same timestamp, and pop the ones whose common value 245 // occurs most often. 246 Outer: 247 for i.heap.Len() > 0 { 248 next := i.heap.Peek() 249 sample := next.Sample() 250 if len(i.buffer) > 0 && (i.buffer[0].streamHash != next.StreamHash() || i.buffer[0].Timestamp != sample.Timestamp) { 251 break 252 } 253 heap.Pop(i.heap) 254 previous := i.buffer 255 var dupe bool 256 for _, t := range previous { 257 if t.Sample.Hash == sample.Hash { 258 i.stats.AddDuplicates(1) 259 dupe = true 260 break 261 } 262 } 263 if !dupe { 264 i.buffer = append(i.buffer, sampleWithLabels{ 265 Sample: sample, 266 labels: next.Labels(), 267 streamHash: next.StreamHash(), 268 }) 269 } 270 inner: 271 for { 272 if !next.Next() { 273 continue Outer 274 } 275 sample := next.Sample() 276 if next.StreamHash() != i.buffer[0].streamHash || 277 sample.Timestamp != i.buffer[0].Timestamp { 278 break 279 } 280 for _, t := range previous { 281 if t.Hash == sample.Hash { 282 i.stats.AddDuplicates(1) 283 continue inner 284 } 285 } 286 i.buffer = append(i.buffer, sampleWithLabels{ 287 Sample: sample, 288 labels: next.Labels(), 289 streamHash: next.StreamHash(), 290 }) 291 } 292 i.pushBuffer = append(i.pushBuffer, next) 293 } 294 295 for _, ei := range i.pushBuffer { 296 heap.Push(i.heap, ei) 297 } 298 i.pushBuffer = i.pushBuffer[:0] 299 300 i.nextFromBuffer() 301 302 return true 303 } 304 305 func (i *mergeSampleIterator) nextFromBuffer() { 306 i.curr.Sample = i.buffer[0].Sample 307 i.curr.labels = i.buffer[0].labels 308 i.curr.streamHash = i.buffer[0].streamHash 309 if len(i.buffer) == 1 { 310 i.buffer = i.buffer[:0] 311 return 312 } 313 i.buffer = i.buffer[1:] 314 } 315 316 func (i *mergeSampleIterator) Sample() logproto.Sample { 317 return i.curr.Sample 318 } 319 320 func (i *mergeSampleIterator) Labels() string { 321 return i.curr.labels 322 } 323 324 func (i *mergeSampleIterator) StreamHash() uint64 { 325 return i.curr.streamHash 326 } 327 328 func (i *mergeSampleIterator) Error() error { 329 switch len(i.errs) { 330 case 0: 331 return nil 332 case 1: 333 return i.errs[0] 334 default: 335 return util.MultiError(i.errs) 336 } 337 } 338 339 func (i *mergeSampleIterator) Close() error { 340 for i.heap.Len() > 0 { 341 if err := i.heap.Pop().(SampleIterator).Close(); err != nil { 342 return err 343 } 344 } 345 i.buffer = nil 346 return nil 347 } 348 349 // sortSampleIterator iterates over a heap of iterators by sorting samples. 350 type sortSampleIterator struct { 351 heap *sampleIteratorHeap 352 is []SampleIterator 353 prefetched bool 354 355 curr sampleWithLabels 356 errs []error 357 } 358 359 // NewSortSampleIterator returns a new SampleIterator that sorts samples by ascending timestamp the input iterators. 360 // The iterator only order sample across given `is` iterators, it does not sort samples within individual iterator. 361 // This means using this iterator with a single iterator will result in the same result as the input iterator. 362 // When timestamp is equal, the iterator sorts samples by their label alphabetically. 363 func NewSortSampleIterator(is []SampleIterator) SampleIterator { 364 if len(is) == 0 { 365 return NoopIterator 366 } 367 if len(is) == 1 { 368 return is[0] 369 } 370 h := sampleIteratorHeap{ 371 its: make([]SampleIterator, 0, len(is)), 372 } 373 return &sortSampleIterator{ 374 is: is, 375 heap: &h, 376 } 377 } 378 379 // init initialize the underlaying heap 380 func (i *sortSampleIterator) init() { 381 if i.prefetched { 382 return 383 } 384 385 i.prefetched = true 386 for _, it := range i.is { 387 if it.Next() { 388 i.heap.Push(it) 389 continue 390 } 391 392 if err := it.Error(); err != nil { 393 i.errs = append(i.errs, err) 394 } 395 util.LogError("closing iterator", it.Close) 396 } 397 heap.Init(i.heap) 398 399 // We can now clear the list of input iterators to merge, given they have all 400 // been processed and the non empty ones have been pushed to the heap 401 i.is = nil 402 } 403 404 func (i *sortSampleIterator) Next() bool { 405 i.init() 406 407 if i.heap.Len() == 0 { 408 return false 409 } 410 411 next := i.heap.Peek() 412 i.curr.Sample = next.Sample() 413 i.curr.labels = next.Labels() 414 i.curr.streamHash = next.StreamHash() 415 // if the top iterator is empty, we remove it. 416 if !next.Next() { 417 heap.Pop(i.heap) 418 if err := next.Error(); err != nil { 419 i.errs = append(i.errs, err) 420 } 421 util.LogError("closing iterator", next.Close) 422 return true 423 } 424 if i.heap.Len() > 1 { 425 heap.Fix(i.heap, 0) 426 } 427 return true 428 } 429 430 func (i *sortSampleIterator) Sample() logproto.Sample { 431 return i.curr.Sample 432 } 433 434 func (i *sortSampleIterator) Labels() string { 435 return i.curr.labels 436 } 437 438 func (i *sortSampleIterator) StreamHash() uint64 { 439 return i.curr.streamHash 440 } 441 442 func (i *sortSampleIterator) Error() error { 443 switch len(i.errs) { 444 case 0: 445 return nil 446 case 1: 447 return i.errs[0] 448 default: 449 return util.MultiError(i.errs) 450 } 451 } 452 453 func (i *sortSampleIterator) Close() error { 454 for i.heap.Len() > 0 { 455 if err := i.heap.Pop().(SampleIterator).Close(); err != nil { 456 return err 457 } 458 } 459 return nil 460 } 461 462 type sampleQueryClientIterator struct { 463 client QuerySampleClient 464 err error 465 curr SampleIterator 466 } 467 468 // QuerySampleClient is GRPC stream client with only method used by the SampleQueryClientIterator 469 type QuerySampleClient interface { 470 Recv() (*logproto.SampleQueryResponse, error) 471 Context() context.Context 472 CloseSend() error 473 } 474 475 // NewQueryClientIterator returns an iterator over a QueryClient. 476 func NewSampleQueryClientIterator(client QuerySampleClient) SampleIterator { 477 return &sampleQueryClientIterator{ 478 client: client, 479 } 480 } 481 482 func (i *sampleQueryClientIterator) Next() bool { 483 ctx := i.client.Context() 484 for i.curr == nil || !i.curr.Next() { 485 batch, err := i.client.Recv() 486 if err == io.EOF { 487 return false 488 } else if err != nil { 489 i.err = err 490 return false 491 } 492 stats.JoinIngesters(ctx, batch.Stats) 493 i.curr = NewSampleQueryResponseIterator(batch) 494 } 495 return true 496 } 497 498 func (i *sampleQueryClientIterator) Sample() logproto.Sample { 499 return i.curr.Sample() 500 } 501 502 func (i *sampleQueryClientIterator) Labels() string { 503 return i.curr.Labels() 504 } 505 506 func (i *sampleQueryClientIterator) StreamHash() uint64 { 507 return i.curr.StreamHash() 508 } 509 510 func (i *sampleQueryClientIterator) Error() error { 511 return i.err 512 } 513 514 func (i *sampleQueryClientIterator) Close() error { 515 return i.client.CloseSend() 516 } 517 518 // NewSampleQueryResponseIterator returns an iterator over a SampleQueryResponse. 519 func NewSampleQueryResponseIterator(resp *logproto.SampleQueryResponse) SampleIterator { 520 return NewMultiSeriesIterator(resp.Series) 521 } 522 523 type seriesIterator struct { 524 i int 525 series logproto.Series 526 } 527 528 type withCloseSampleIterator struct { 529 closeOnce sync.Once 530 closeFn func() error 531 errs []error 532 SampleIterator 533 } 534 535 func (w *withCloseSampleIterator) Close() error { 536 w.closeOnce.Do(func() { 537 if err := w.SampleIterator.Close(); err != nil { 538 w.errs = append(w.errs, err) 539 } 540 if err := w.closeFn(); err != nil { 541 w.errs = append(w.errs, err) 542 } 543 }) 544 if len(w.errs) == 0 { 545 return nil 546 } 547 return util.MultiError(w.errs) 548 } 549 550 func SampleIteratorWithClose(it SampleIterator, closeFn func() error) SampleIterator { 551 return &withCloseSampleIterator{ 552 closeOnce: sync.Once{}, 553 closeFn: closeFn, 554 SampleIterator: it, 555 } 556 } 557 558 // NewMultiSeriesIterator returns an iterator over multiple logproto.Series 559 func NewMultiSeriesIterator(series []logproto.Series) SampleIterator { 560 is := make([]SampleIterator, 0, len(series)) 561 for i := range series { 562 is = append(is, NewSeriesIterator(series[i])) 563 } 564 return NewSortSampleIterator(is) 565 } 566 567 // NewSeriesIterator iterates over sample in a series. 568 func NewSeriesIterator(series logproto.Series) SampleIterator { 569 return &seriesIterator{ 570 i: -1, 571 series: series, 572 } 573 } 574 575 func (i *seriesIterator) Next() bool { 576 i.i++ 577 return i.i < len(i.series.Samples) 578 } 579 580 func (i *seriesIterator) Error() error { 581 return nil 582 } 583 584 func (i *seriesIterator) Labels() string { 585 return i.series.Labels 586 } 587 588 func (i *seriesIterator) StreamHash() uint64 { 589 return i.series.StreamHash 590 } 591 592 func (i *seriesIterator) Sample() logproto.Sample { 593 return i.series.Samples[i.i] 594 } 595 596 func (i *seriesIterator) Close() error { 597 return nil 598 } 599 600 type nonOverlappingSampleIterator struct { 601 i int 602 iterators []SampleIterator 603 curr SampleIterator 604 } 605 606 // NewNonOverlappingSampleIterator gives a chained iterator over a list of iterators. 607 func NewNonOverlappingSampleIterator(iterators []SampleIterator) SampleIterator { 608 return &nonOverlappingSampleIterator{ 609 iterators: iterators, 610 } 611 } 612 613 func (i *nonOverlappingSampleIterator) Next() bool { 614 for i.curr == nil || !i.curr.Next() { 615 if len(i.iterators) == 0 { 616 if i.curr != nil { 617 i.curr.Close() 618 } 619 return false 620 } 621 if i.curr != nil { 622 i.curr.Close() 623 } 624 i.i++ 625 i.curr, i.iterators = i.iterators[0], i.iterators[1:] 626 } 627 628 return true 629 } 630 631 func (i *nonOverlappingSampleIterator) Sample() logproto.Sample { 632 return i.curr.Sample() 633 } 634 635 func (i *nonOverlappingSampleIterator) Labels() string { 636 if i.curr == nil { 637 return "" 638 } 639 return i.curr.Labels() 640 } 641 642 func (i *nonOverlappingSampleIterator) StreamHash() uint64 { 643 if i.curr == nil { 644 return 0 645 } 646 return i.curr.StreamHash() 647 } 648 649 func (i *nonOverlappingSampleIterator) Error() error { 650 if i.curr == nil { 651 return nil 652 } 653 return i.curr.Error() 654 } 655 656 func (i *nonOverlappingSampleIterator) Close() error { 657 if i.curr != nil { 658 i.curr.Close() 659 } 660 for _, iter := range i.iterators { 661 iter.Close() 662 } 663 i.iterators = nil 664 return nil 665 } 666 667 type timeRangedSampleIterator struct { 668 SampleIterator 669 mint, maxt int64 670 } 671 672 // NewTimeRangedSampleIterator returns an iterator which filters entries by time range. 673 func NewTimeRangedSampleIterator(it SampleIterator, mint, maxt int64) SampleIterator { 674 return &timeRangedSampleIterator{ 675 SampleIterator: it, 676 mint: mint, 677 maxt: maxt, 678 } 679 } 680 681 func (i *timeRangedSampleIterator) Next() bool { 682 ok := i.SampleIterator.Next() 683 if !ok { 684 i.SampleIterator.Close() 685 return ok 686 } 687 ts := i.SampleIterator.Sample().Timestamp 688 for ok && i.mint > ts { 689 ok = i.SampleIterator.Next() 690 if !ok { 691 continue 692 } 693 ts = i.SampleIterator.Sample().Timestamp 694 } 695 if ok { 696 if ts == i.mint { // The mint is inclusive 697 return true 698 } 699 if i.maxt < ts || i.maxt == ts { // The maxt is exclusive. 700 ok = false 701 } 702 } 703 if !ok { 704 i.SampleIterator.Close() 705 } 706 return ok 707 } 708 709 // ReadBatch reads a set of entries off an iterator. 710 func ReadSampleBatch(i SampleIterator, size uint32) (*logproto.SampleQueryResponse, uint32, error) { 711 var ( 712 series = map[uint64]map[string]*logproto.Series{} 713 respSize uint32 714 seriesCount int 715 ) 716 for ; respSize < size && i.Next(); respSize++ { 717 labels, hash, sample := i.Labels(), i.StreamHash(), i.Sample() 718 streams, ok := series[hash] 719 if !ok { 720 streams = map[string]*logproto.Series{} 721 series[hash] = streams 722 } 723 s, ok := streams[labels] 724 if !ok { 725 seriesCount++ 726 s = &logproto.Series{ 727 Labels: labels, 728 StreamHash: hash, 729 } 730 streams[labels] = s 731 } 732 s.Samples = append(s.Samples, sample) 733 } 734 735 result := logproto.SampleQueryResponse{ 736 Series: make([]logproto.Series, 0, seriesCount), 737 } 738 for _, streams := range series { 739 for _, s := range streams { 740 result.Series = append(result.Series, *s) 741 } 742 } 743 return &result, respSize, i.Error() 744 }