github.com/grafana/pyroscope@v1.18.0/pkg/iter/tee.go (about)

     1  package iter
     2  
     3  import (
     4  	"math"
     5  	"sync"
     6  )
     7  
     8  const defaultTeeBufferSize = 4096
     9  
    10  // Tee returns 2 independent iterators from a single iterable.
    11  //
    12  // The original iterator should not be used anywhere else, except that it's
    13  // caller responsibility to close it and handle the error, after all the
    14  // tee iterators finished.
    15  //
    16  // Tee buffers source objects, and frees them eventually: when an object
    17  // from the source iterator is consumed, the ownership is transferred to Tee.
    18  // Therefore, the caller must ensure the source iterator never reuses objects
    19  // returned with At.
    20  //
    21  // Tee never blocks the leader iterator, instead, it grows the internal buffer:
    22  // if any of the returned iterators are abandoned, all source iterator objects
    23  // will be held in the buffer.
    24  func Tee[T any](iter Iterator[T]) (a, b Iterator[T]) {
    25  	s := newTee[T](iter, 2, defaultTeeBufferSize)
    26  	return s[0], s[1]
    27  }
    28  
    29  func TeeN[T any](iter Iterator[T], n int) []Iterator[T] {
    30  	return newTee[T](iter, n, defaultTeeBufferSize)
    31  }
    32  
    33  // NOTE(kolesnikovae): The implementation design aims for simplicity.
    34  // A more efficient tee can be implemented on top of a linked
    35  // list of small arrays:
    36  //  - More efficient (de-)allocations (chunk pool).
    37  //  - Less/no mutex contention.
    38  
    39  func newTee[T any](iter Iterator[T], n, bufSize int) []Iterator[T] {
    40  	if n < 0 {
    41  		return nil
    42  	}
    43  	s := &sharedIterator[T]{
    44  		s: int64(bufSize),
    45  		i: iter,
    46  		t: make([]int64, n),
    47  		v: make([]T, 0, bufSize),
    48  	}
    49  	t := make([]Iterator[T], n)
    50  	for i := range s.t {
    51  		t[i] = &tee[T]{
    52  			s: s,
    53  			n: i,
    54  		}
    55  	}
    56  	return t
    57  }
    58  
    59  type sharedIterator[T any] struct {
    60  	s int64
    61  	i Iterator[T]
    62  	e error
    63  	t []int64
    64  	m sync.RWMutex
    65  	v []T
    66  	w int64
    67  }
    68  
    69  func (s *sharedIterator[T]) next(n int) bool {
    70  	s.m.RLock()
    71  	if s.t[n] < s.w {
    72  		s.t[n]++
    73  		s.m.RUnlock()
    74  		return true
    75  	}
    76  	s.m.RUnlock()
    77  	s.m.Lock()
    78  	defer s.m.Unlock()
    79  	if s.t[n] < s.w {
    80  		s.t[n]++
    81  		return true
    82  	}
    83  	// All the memoized items were consumed.
    84  	if s.e != nil {
    85  		return false
    86  	}
    87  	s.clean() // Conditionally clean consumed values.
    88  	// Fetch the next batch from the source iterator.
    89  	var i int64
    90  	for ; i < s.s; i++ {
    91  		if !s.i.Next() {
    92  			break
    93  		}
    94  		s.v = append(s.v, s.i.At())
    95  	}
    96  	s.e = s.i.Err()
    97  	s.w += i
    98  	if i != 0 {
    99  		s.t[n]++
   100  		return true
   101  	}
   102  	return false
   103  }
   104  
   105  func (s *sharedIterator[T]) clean() {
   106  	lo := int64(-1)
   107  	for _, v := range s.t {
   108  		if v < lo || lo == -1 {
   109  			lo = v
   110  		}
   111  	}
   112  	if lo < s.s {
   113  		return
   114  	}
   115  	if lo == math.MaxInt64 {
   116  		// All iterators have been closed.
   117  		return
   118  	}
   119  	// Clean values that will be removed, shift
   120  	// remaining values to the beginning and update
   121  	// iterator offsets accordingly.
   122  	lo--
   123  	var v T
   124  	for i := range s.v[:lo] {
   125  		s.v[i] = v
   126  	}
   127  	s.v = s.v[:copy(s.v, s.v[lo:])]
   128  	s.w -= lo
   129  	for i := range s.t {
   130  		if s.t[i] != math.MaxInt64 {
   131  			s.t[i] -= lo
   132  		}
   133  	}
   134  }
   135  
   136  func (s *sharedIterator[T]) at(n int) T {
   137  	s.m.RLock()
   138  	v := s.v[s.t[n]-1]
   139  	s.m.RUnlock()
   140  	return v
   141  }
   142  
   143  func (s *sharedIterator[T]) close(n int) {
   144  	s.m.RLock()
   145  	s.t[n] = math.MaxInt64
   146  	s.m.RUnlock()
   147  }
   148  
   149  func (s *sharedIterator[T]) err() error {
   150  	s.m.RLock()
   151  	e := s.e
   152  	s.m.RUnlock()
   153  	return e
   154  }
   155  
   156  type tee[T any] struct {
   157  	s *sharedIterator[T]
   158  	n int
   159  }
   160  
   161  func (t *tee[T]) Next() bool { return t.s.next(t.n) }
   162  
   163  func (t *tee[T]) At() T { return t.s.at(t.n) }
   164  
   165  func (t *tee[T]) Err() error { return t.s.err() }
   166  
   167  func (t *tee[T]) Close() error {
   168  	t.s.close(t.n)
   169  	return nil
   170  }