github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/ruler/storage/wal/series.go (about)

     1  // This directory was copied and adapted from https://github.com/grafana/agent/tree/main/pkg/metrics.
     2  // We cannot vendor the agent in since the agent vendors loki in, which would cause a cyclic dependency.
     3  // NOTE: many changes have been made to the original code for our use-case.
     4  package wal
     5  
     6  import (
     7  	"sync"
     8  
     9  	"github.com/prometheus/prometheus/model/labels"
    10  	"github.com/prometheus/prometheus/tsdb/chunks"
    11  )
    12  
    13  // TODO(dannyk): add label set interning
    14  
    15  type memSeries struct {
    16  	sync.Mutex
    17  
    18  	ref    chunks.HeadSeriesRef
    19  	lset   labels.Labels
    20  	lastTs int64
    21  
    22  	// TODO(rfratto): this solution below isn't perfect, and there's still
    23  	// the possibility for a series to be deleted before it's
    24  	// completely gone from the WAL. Rather, we should have gc return
    25  	// a "should delete" map and be given a "deleted" map.
    26  	// If a series that is going to be marked for deletion is in the
    27  	// "deleted" map, then it should be deleted instead.
    28  	//
    29  	// The "deleted" map will be populated by the Truncate function.
    30  	// It will be cleared with every call to gc.
    31  
    32  	// willDelete marks a series as to be deleted on the next garbage
    33  	// collection. If it receives a write, willDelete is disabled.
    34  	willDelete bool
    35  
    36  	// Whether this series has samples waiting to be committed to the WAL
    37  	pendingCommit bool
    38  }
    39  
    40  func (s *memSeries) updateTs(ts int64) {
    41  	s.lastTs = ts
    42  	s.willDelete = false
    43  	s.pendingCommit = true
    44  }
    45  
    46  // seriesHashmap is a simple hashmap for memSeries by their label set. It is
    47  // built on top of a regular hashmap and holds a slice of series to resolve
    48  // hash collisions. Its methods require the hash to be submitted with it to
    49  // avoid re-computations throughout the code.
    50  //
    51  // This code is copied from the Prometheus TSDB.
    52  type seriesHashmap map[uint64][]*memSeries
    53  
    54  func (m seriesHashmap) get(hash uint64, lset labels.Labels) *memSeries {
    55  	for _, s := range m[hash] {
    56  		if labels.Equal(s.lset, lset) {
    57  			return s
    58  		}
    59  	}
    60  	return nil
    61  }
    62  
    63  func (m seriesHashmap) set(hash uint64, s *memSeries) {
    64  	//intern.InternLabels(intern.Global, s.lset)
    65  
    66  	l := m[hash]
    67  	for i, prev := range l {
    68  		if labels.Equal(prev.lset, s.lset) {
    69  			l[i] = s
    70  			return
    71  		}
    72  	}
    73  	m[hash] = append(l, s)
    74  }
    75  
    76  func (m seriesHashmap) del(hash uint64, ref chunks.HeadSeriesRef) {
    77  	var rem []*memSeries
    78  	for _, s := range m[hash] {
    79  		if s.ref != ref {
    80  			rem = append(rem, s)
    81  		}
    82  	}
    83  	if len(rem) == 0 {
    84  		delete(m, hash)
    85  	} else {
    86  		m[hash] = rem
    87  	}
    88  }
    89  
    90  const (
    91  	// defaultStripeSize is the default number of entries to allocate in the
    92  	// stripeSeries hash map.
    93  	defaultStripeSize = 1 << 14
    94  )
    95  
    96  // stripeSeries locks modulo ranges of IDs and hashes to reduce lock contention.
    97  // The locks are padded to not be on the same cache line. Filling the padded space
    98  // with the maps was profiled to be slower – likely due to the additional pointer
    99  // dereferences.
   100  //
   101  // This code is copied from the Prometheus TSDB.
   102  type stripeSeries struct {
   103  	size   int
   104  	series []map[chunks.HeadSeriesRef]*memSeries
   105  	hashes []seriesHashmap
   106  	locks  []stripeLock
   107  }
   108  
   109  type stripeLock struct {
   110  	sync.RWMutex
   111  	// Padding to avoid multiple locks being on the same cache line.
   112  	_ [40]byte
   113  }
   114  
   115  func newStripeSeries() *stripeSeries {
   116  	stripeSize := defaultStripeSize
   117  	s := &stripeSeries{
   118  		size:   stripeSize,
   119  		series: make([]map[chunks.HeadSeriesRef]*memSeries, stripeSize),
   120  		hashes: make([]seriesHashmap, stripeSize),
   121  		locks:  make([]stripeLock, stripeSize),
   122  	}
   123  
   124  	for i := range s.series {
   125  		s.series[i] = map[chunks.HeadSeriesRef]*memSeries{}
   126  	}
   127  	for i := range s.hashes {
   128  		s.hashes[i] = seriesHashmap{}
   129  	}
   130  	return s
   131  }
   132  
   133  // gc garbage collects old chunks that are strictly before mint and removes
   134  // series entirely that have no chunks left.
   135  func (s *stripeSeries) gc(mint int64) map[chunks.HeadSeriesRef]struct{} {
   136  	var (
   137  		deleted = map[chunks.HeadSeriesRef]struct{}{}
   138  	)
   139  
   140  	// Run through all series and find series that haven't been written to
   141  	// since mint. Mark those series as deleted and store their ID.
   142  	for i := 0; i < s.size; i++ {
   143  		s.locks[i].Lock()
   144  
   145  		for _, series := range s.series[i] {
   146  			series.Lock()
   147  			seriesHash := series.lset.Hash()
   148  
   149  			// If the series has received a write after mint, there's still
   150  			// data and it's not completely gone yet.
   151  			if series.lastTs >= mint || series.pendingCommit {
   152  				series.willDelete = false
   153  				series.Unlock()
   154  				continue
   155  			}
   156  
   157  			// The series hasn't received any data and *might* be gone, but
   158  			// we want to give it an opportunity to come back before marking
   159  			// it as deleted, so we wait one more GC cycle.
   160  			if !series.willDelete {
   161  				series.willDelete = true
   162  				series.Unlock()
   163  				continue
   164  			}
   165  
   166  			// The series is gone entirely. We'll need to delete the label
   167  			// hash (if one exists) so we'll obtain a lock for that too.
   168  			j := int(seriesHash) & (s.size - 1)
   169  			if i != j {
   170  				s.locks[j].Lock()
   171  			}
   172  
   173  			deleted[series.ref] = struct{}{}
   174  			delete(s.series[i], series.ref)
   175  			s.hashes[j].del(seriesHash, series.ref)
   176  
   177  			if i != j {
   178  				s.locks[j].Unlock()
   179  			}
   180  
   181  			series.Unlock()
   182  		}
   183  
   184  		s.locks[i].Unlock()
   185  	}
   186  
   187  	return deleted
   188  }
   189  
   190  func (s *stripeSeries) getByID(id chunks.HeadSeriesRef) *memSeries {
   191  	i := uint64(id) & uint64(s.size-1)
   192  
   193  	s.locks[i].RLock()
   194  	series := s.series[i][id]
   195  	s.locks[i].RUnlock()
   196  
   197  	return series
   198  }
   199  
   200  func (s *stripeSeries) getByHash(hash uint64, lset labels.Labels) *memSeries {
   201  	i := hash & uint64(s.size-1)
   202  
   203  	s.locks[i].RLock()
   204  	series := s.hashes[i].get(hash, lset)
   205  	s.locks[i].RUnlock()
   206  
   207  	return series
   208  }
   209  
   210  func (s *stripeSeries) set(hash uint64, series *memSeries) {
   211  	i := hash & uint64(s.size-1)
   212  	s.locks[i].Lock()
   213  	s.hashes[i].set(hash, series)
   214  	s.locks[i].Unlock()
   215  
   216  	i = uint64(series.ref) & uint64(s.size-1)
   217  	s.locks[i].Lock()
   218  	s.series[i][series.ref] = series
   219  	s.locks[i].Unlock()
   220  }
   221  
   222  func (s *stripeSeries) iterator() *stripeSeriesIterator {
   223  	return &stripeSeriesIterator{s}
   224  }
   225  
   226  // stripeSeriesIterator allows to iterate over series through a channel.
   227  // The channel should always be completely consumed to not leak.
   228  type stripeSeriesIterator struct {
   229  	s *stripeSeries
   230  }
   231  
   232  func (it *stripeSeriesIterator) Channel() <-chan *memSeries {
   233  	ret := make(chan *memSeries)
   234  
   235  	go func() {
   236  		for i := 0; i < it.s.size; i++ {
   237  			it.s.locks[i].RLock()
   238  
   239  			for _, series := range it.s.series[i] {
   240  				series.Lock()
   241  
   242  				j := int(series.lset.Hash()) & (it.s.size - 1)
   243  				if i != j {
   244  					it.s.locks[j].RLock()
   245  				}
   246  
   247  				ret <- series
   248  
   249  				if i != j {
   250  					it.s.locks[j].RUnlock()
   251  				}
   252  				series.Unlock()
   253  			}
   254  
   255  			it.s.locks[i].RUnlock()
   256  		}
   257  
   258  		close(ret)
   259  	}()
   260  
   261  	return ret
   262  }