github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/tsdb/head.go (about)

     1  // Copyright 2021 The Prometheus Authors
     2  // Licensed under the Apache License, Version 2.0 (the "License");
     3  // you may not use this file except in compliance with the License.
     4  // You may obtain a copy of the License at
     5  //
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  package tsdb
    14  
    15  import (
    16  	"sync"
    17  
    18  	"github.com/go-kit/log"
    19  	"github.com/prometheus/client_golang/prometheus"
    20  	"github.com/prometheus/client_golang/prometheus/promauto"
    21  	"github.com/prometheus/prometheus/model/labels"
    22  	"github.com/prometheus/prometheus/storage"
    23  	"go.uber.org/atomic"
    24  
    25  	"github.com/grafana/loki/pkg/storage/stores/tsdb/index"
    26  )
    27  
    28  /*
    29  Disclaimer: This is largely inspired from Prometheus' TSDB Head, albeit
    30  with significant changes (generally reductions rather than additions) to accommodate Loki
    31  */
    32  
    33  const (
    34  	// Note, this is significantly less than the stripe values used by Prometheus' stripeSeries.
    35  	// This is for two reasons.
    36  	// 1) Heads are per-tenant in Loki
    37  	// 2) Loki tends to have a few orders of magnitude less series per node than
    38  	// Prometheus|Cortex|Mimir.
    39  	// Do not specify without bit shifting. This allows us to
    40  	// do shard index calcuations via bitwise & rather than modulos.
    41  	defaultStripeSize = 64
    42  )
    43  
    44  /*
    45  Head is a per-tenant accumulator for index entries in memory.
    46  It can be queried as an IndexReader and consumed to generate a TSDB index.
    47  These are written to on the ingester component when chunks are flushed,
    48  then written to disk as per tenant TSDB indices at the end of the WAL checkpointing cycle.
    49  Every n cycles, they are compacted together and written to object storage.
    50  
    51  In turn, many `Head`s may be wrapped into a multi-tenant head.
    52  This allows Loki to serve `GetChunkRefs` requests for _chunks_ which have been flushed
    53  whereas the corresponding index has not yet been uploaded to object storage,
    54  guaranteeing we maintain querying consistency for the entire data lifecycle.
    55  */
    56  
    57  // TODO(owen-d)
    58  type Metrics struct {
    59  	seriesNotFound                prometheus.Counter
    60  	tsdbCreationsTotal            prometheus.Counter
    61  	tsdbCreationFailures          prometheus.Counter
    62  	tsdbManagerUpdatesTotal       prometheus.Counter
    63  	tsdbManagerUpdatesFailedTotal prometheus.Counter
    64  }
    65  
    66  func NewMetrics(r prometheus.Registerer) *Metrics {
    67  	return &Metrics{
    68  		seriesNotFound: promauto.With(r).NewCounter(prometheus.CounterOpts{
    69  			Name: "loki_tsdb_head_series_not_found_total",
    70  			Help: "Total number of requests for series that were not found.",
    71  		}),
    72  		tsdbCreationsTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{
    73  			Name: "loki_tsdb_creations_total",
    74  			Help: "Total number of tsdb creations attempted",
    75  		}),
    76  		tsdbCreationFailures: promauto.With(r).NewCounter(prometheus.CounterOpts{
    77  			Name: "loki_tsdb_creations_failed_total",
    78  			Help: "Total number of tsdb creations failed",
    79  		}),
    80  		tsdbManagerUpdatesTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{
    81  			Name: "loki_tsdb_manager_updates_total",
    82  			Help: "Total number of tsdb manager updates (loading/rotating tsdbs in mem)",
    83  		}),
    84  		tsdbManagerUpdatesFailedTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{
    85  			Name: "loki_tsdb_manager_updates_failed_total",
    86  			Help: "Total number of tsdb manager update failures (loading/rotating tsdbs in mem)",
    87  		}),
    88  	}
    89  }
    90  
    91  type Head struct {
    92  	tenant           string
    93  	numSeries        atomic.Uint64
    94  	minTime, maxTime atomic.Int64 // Current min and max of the samples included in the head.
    95  
    96  	// auto incrementing counter to uniquely identify series. This is also used
    97  	// in the MemPostings, but is eventually discarded when we create a real TSDB index.
    98  	lastSeriesID atomic.Uint64
    99  
   100  	metrics *Metrics
   101  	logger  log.Logger
   102  
   103  	series *stripeSeries
   104  
   105  	postings *index.MemPostings // Postings lists for terms.
   106  }
   107  
   108  func NewHead(tenant string, metrics *Metrics, logger log.Logger) *Head {
   109  	return &Head{
   110  		tenant:   tenant,
   111  		metrics:  metrics,
   112  		logger:   logger,
   113  		series:   newStripeSeries(),
   114  		postings: index.NewMemPostings(),
   115  	}
   116  }
   117  
   118  // MinTime returns the lowest time bound on visible data in the head.
   119  func (h *Head) MinTime() int64 {
   120  	return h.minTime.Load()
   121  }
   122  
   123  // MaxTime returns the highest timestamp seen in data of the head.
   124  func (h *Head) MaxTime() int64 {
   125  	return h.maxTime.Load()
   126  }
   127  
   128  // Will CAS until successfully updates bounds or the condition is no longer valid
   129  func updateMintMaxt(mint, maxt int64, mintSrc, maxtSrc *atomic.Int64) {
   130  	for {
   131  		lt := mintSrc.Load()
   132  		if mint >= lt && lt != 0 {
   133  			break
   134  		}
   135  		if mintSrc.CAS(lt, mint) {
   136  			break
   137  		}
   138  	}
   139  	for {
   140  		ht := maxtSrc.Load()
   141  		if maxt <= ht {
   142  			break
   143  		}
   144  		if maxtSrc.CAS(ht, maxt) {
   145  			break
   146  		}
   147  	}
   148  }
   149  
   150  // Note: chks must not be nil or zero-length
   151  func (h *Head) Append(ls labels.Labels, chks index.ChunkMetas) (created bool, refID uint64) {
   152  	from, through := chks.Bounds()
   153  	var id uint64
   154  	created, refID = h.series.Append(ls, chks, func() *memSeries {
   155  		id = h.lastSeriesID.Inc()
   156  		return newMemSeries(id, ls)
   157  	})
   158  	updateMintMaxt(int64(from), int64(through), &h.minTime, &h.maxTime)
   159  
   160  	if !created {
   161  		return
   162  	}
   163  	h.postings.Add(storage.SeriesRef(id), ls)
   164  	h.numSeries.Inc()
   165  	return
   166  }
   167  
   168  // seriesHashmap is a simple hashmap for memSeries by their label set. It is built
   169  // on top of a regular hashmap and holds a slice of series to resolve hash collisions.
   170  // Its methods require the hash to be submitted with it to avoid re-computations throughout
   171  // the code.
   172  type seriesHashmap map[uint64][]*memSeries
   173  
   174  func (m seriesHashmap) get(hash uint64, ls labels.Labels) *memSeries {
   175  	for _, s := range m[hash] {
   176  		if labels.Equal(s.ls, ls) {
   177  			return s
   178  		}
   179  	}
   180  	return nil
   181  }
   182  
   183  func (m seriesHashmap) set(hash uint64, s *memSeries) {
   184  	l := m[hash]
   185  	for i, prev := range l {
   186  		if labels.Equal(prev.ls, s.ls) {
   187  			l[i] = s
   188  			return
   189  		}
   190  	}
   191  	m[hash] = append(l, s)
   192  }
   193  
   194  type stripeSeries struct {
   195  	shards int
   196  	locks  []sync.RWMutex
   197  	hashes []seriesHashmap
   198  	// Sharded by ref. A series ref is the value of `size` when the series was being newly added.
   199  	series []map[uint64]*memSeries
   200  }
   201  
   202  func newStripeSeries() *stripeSeries {
   203  	s := &stripeSeries{
   204  		shards: defaultStripeSize,
   205  		locks:  make([]sync.RWMutex, defaultStripeSize),
   206  		hashes: make([]seriesHashmap, defaultStripeSize),
   207  		series: make([]map[uint64]*memSeries, defaultStripeSize),
   208  	}
   209  	for i := range s.hashes {
   210  		s.hashes[i] = seriesHashmap{}
   211  	}
   212  	for i := range s.series {
   213  		s.series[i] = map[uint64]*memSeries{}
   214  	}
   215  	return s
   216  }
   217  
   218  func (s *stripeSeries) getByID(id uint64) *memSeries {
   219  	i := id & uint64(s.shards-1)
   220  
   221  	s.locks[i].RLock()
   222  	series := s.series[i][id]
   223  	s.locks[i].RUnlock()
   224  
   225  	return series
   226  }
   227  
   228  // Append adds chunks to the correct series and returns whether a new series was added
   229  func (s *stripeSeries) Append(
   230  	ls labels.Labels,
   231  	chks index.ChunkMetas,
   232  	createFn func() *memSeries,
   233  ) (created bool, refID uint64) {
   234  	fp := ls.Hash()
   235  	i := fp & uint64(s.shards-1)
   236  	mtx := &s.locks[i]
   237  
   238  	mtx.Lock()
   239  	series := s.hashes[i].get(fp, ls)
   240  	if series == nil {
   241  		series = createFn()
   242  		s.hashes[i].set(fp, series)
   243  
   244  		// the series locks are determined by the ref, not fingerprint
   245  		refIdx := series.ref & uint64(s.shards-1)
   246  		s.series[refIdx][series.ref] = series
   247  		created = true
   248  	}
   249  	mtx.Unlock()
   250  
   251  	series.Lock()
   252  	series.chks = append(series.chks, chks...)
   253  	refID = series.ref
   254  	series.Unlock()
   255  
   256  	return
   257  }
   258  
   259  type memSeries struct {
   260  	sync.RWMutex
   261  	ref  uint64 // The unique reference within a *Head
   262  	ls   labels.Labels
   263  	fp   uint64
   264  	chks index.ChunkMetas
   265  }
   266  
   267  func newMemSeries(ref uint64, ls labels.Labels) *memSeries {
   268  	return &memSeries{
   269  		ref: ref,
   270  		ls:  ls,
   271  		fp:  ls.Hash(),
   272  	}
   273  }