github.com/thanos-io/thanos@v0.32.5/pkg/receive/multitsdb.go (about)

     1  // Copyright (c) The Thanos Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package receive
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"os"
    10  	"path"
    11  	"path/filepath"
    12  	"sort"
    13  	"strings"
    14  	"sync"
    15  	"time"
    16  
    17  	"github.com/go-kit/log"
    18  	"github.com/go-kit/log/level"
    19  	"github.com/pkg/errors"
    20  	"github.com/prometheus/client_golang/prometheus"
    21  	"github.com/prometheus/common/model"
    22  	"github.com/prometheus/prometheus/model/labels"
    23  	"github.com/prometheus/prometheus/storage"
    24  	"github.com/prometheus/prometheus/tsdb"
    25  	"go.uber.org/atomic"
    26  	"golang.org/x/exp/slices"
    27  	"golang.org/x/sync/errgroup"
    28  
    29  	"github.com/thanos-io/thanos/pkg/api/status"
    30  	"github.com/thanos-io/thanos/pkg/info/infopb"
    31  
    32  	"github.com/thanos-io/objstore"
    33  
    34  	"github.com/thanos-io/thanos/pkg/block/metadata"
    35  	"github.com/thanos-io/thanos/pkg/component"
    36  	"github.com/thanos-io/thanos/pkg/errutil"
    37  	"github.com/thanos-io/thanos/pkg/exemplars"
    38  	"github.com/thanos-io/thanos/pkg/shipper"
    39  	"github.com/thanos-io/thanos/pkg/store"
    40  	"github.com/thanos-io/thanos/pkg/store/labelpb"
    41  	"github.com/thanos-io/thanos/pkg/store/storepb"
    42  )
    43  
    44  type TSDBStats interface {
    45  	// TenantStats returns TSDB head stats for the given tenants.
    46  	// If no tenantIDs are provided, stats for all tenants are returned.
    47  	TenantStats(limit int, statsByLabelName string, tenantIDs ...string) []status.TenantStats
    48  }
    49  
    50  type MultiTSDB struct {
    51  	dataDir         string
    52  	logger          log.Logger
    53  	reg             prometheus.Registerer
    54  	tsdbOpts        *tsdb.Options
    55  	tenantLabelName string
    56  	labels          labels.Labels
    57  	bucket          objstore.Bucket
    58  
    59  	mtx                   *sync.RWMutex
    60  	tenants               map[string]*tenant
    61  	allowOutOfOrderUpload bool
    62  	hashFunc              metadata.HashFunc
    63  	hashringConfigs       []HashringConfig
    64  }
    65  
    66  // NewMultiTSDB creates new MultiTSDB.
    67  // NOTE: Passed labels must be sorted lexicographically (alphabetically).
    68  func NewMultiTSDB(
    69  	dataDir string,
    70  	l log.Logger,
    71  	reg prometheus.Registerer,
    72  	tsdbOpts *tsdb.Options,
    73  	labels labels.Labels,
    74  	tenantLabelName string,
    75  	bucket objstore.Bucket,
    76  	allowOutOfOrderUpload bool,
    77  	hashFunc metadata.HashFunc,
    78  ) *MultiTSDB {
    79  	if l == nil {
    80  		l = log.NewNopLogger()
    81  	}
    82  
    83  	return &MultiTSDB{
    84  		dataDir:               dataDir,
    85  		logger:                log.With(l, "component", "multi-tsdb"),
    86  		reg:                   reg,
    87  		tsdbOpts:              tsdbOpts,
    88  		mtx:                   &sync.RWMutex{},
    89  		tenants:               map[string]*tenant{},
    90  		labels:                labels,
    91  		tenantLabelName:       tenantLabelName,
    92  		bucket:                bucket,
    93  		allowOutOfOrderUpload: allowOutOfOrderUpload,
    94  		hashFunc:              hashFunc,
    95  	}
    96  }
    97  
    98  type localClient struct {
    99  	storepb.StoreClient
   100  	store *store.TSDBStore
   101  }
   102  
   103  func newLocalClient(c storepb.StoreClient, store *store.TSDBStore) *localClient {
   104  	return &localClient{
   105  		StoreClient: c,
   106  		store:       store,
   107  	}
   108  }
   109  
   110  func (l *localClient) LabelSets() []labels.Labels {
   111  	return labelpb.ZLabelSetsToPromLabelSets(l.store.LabelSet()...)
   112  }
   113  
   114  func (l *localClient) TimeRange() (mint int64, maxt int64) {
   115  	return l.store.TimeRange()
   116  }
   117  
   118  func (l *localClient) TSDBInfos() []infopb.TSDBInfo {
   119  	labelsets := l.store.LabelSet()
   120  	if len(labelsets) == 0 {
   121  		return []infopb.TSDBInfo{}
   122  	}
   123  
   124  	mint, maxt := l.store.TimeRange()
   125  	return []infopb.TSDBInfo{
   126  		{
   127  			Labels:  labelsets[0],
   128  			MinTime: mint,
   129  			MaxTime: maxt,
   130  		},
   131  	}
   132  }
   133  
   134  func (l *localClient) String() string {
   135  	mint, maxt := l.store.TimeRange()
   136  	return fmt.Sprintf(
   137  		"LabelSets: %v MinTime: %d MaxTime: %d",
   138  		labelpb.PromLabelSetsToString(l.LabelSets()), mint, maxt,
   139  	)
   140  }
   141  
   142  func (l *localClient) Addr() (string, bool) {
   143  	return "", true
   144  }
   145  
   146  func (l *localClient) SupportsSharding() bool {
   147  	return true
   148  }
   149  
   150  func (l *localClient) SupportsWithoutReplicaLabels() bool {
   151  	return true
   152  }
   153  
   154  type tenant struct {
   155  	readyS        *ReadyStorage
   156  	storeTSDB     *store.TSDBStore
   157  	exemplarsTSDB *exemplars.TSDB
   158  	ship          *shipper.Shipper
   159  
   160  	mtx *sync.RWMutex
   161  }
   162  
   163  func newTenant() *tenant {
   164  	return &tenant{
   165  		readyS: &ReadyStorage{},
   166  		mtx:    &sync.RWMutex{},
   167  	}
   168  }
   169  
   170  func (t *tenant) readyStorage() *ReadyStorage {
   171  	return t.readyS
   172  }
   173  
   174  func (t *tenant) store() *store.TSDBStore {
   175  	t.mtx.RLock()
   176  	defer t.mtx.RUnlock()
   177  	return t.storeTSDB
   178  }
   179  
   180  func (t *tenant) client(logger log.Logger) store.Client {
   181  	t.mtx.RLock()
   182  	defer t.mtx.RUnlock()
   183  
   184  	tsdbStore := t.store()
   185  	if tsdbStore == nil {
   186  		return nil
   187  	}
   188  
   189  	client := storepb.ServerAsClient(store.NewRecoverableStoreServer(logger, tsdbStore), 0)
   190  	return newLocalClient(client, tsdbStore)
   191  }
   192  
   193  func (t *tenant) exemplars() *exemplars.TSDB {
   194  	t.mtx.RLock()
   195  	defer t.mtx.RUnlock()
   196  	return t.exemplarsTSDB
   197  }
   198  
   199  func (t *tenant) shipper() *shipper.Shipper {
   200  	t.mtx.RLock()
   201  	defer t.mtx.RUnlock()
   202  	return t.ship
   203  }
   204  
   205  func (t *tenant) set(storeTSDB *store.TSDBStore, tenantTSDB *tsdb.DB, ship *shipper.Shipper, exemplarsTSDB *exemplars.TSDB) {
   206  	t.readyS.Set(tenantTSDB)
   207  	t.mtx.Lock()
   208  	t.setComponents(storeTSDB, ship, exemplarsTSDB)
   209  	t.mtx.Unlock()
   210  }
   211  
   212  func (t *tenant) setComponents(storeTSDB *store.TSDBStore, ship *shipper.Shipper, exemplarsTSDB *exemplars.TSDB) {
   213  	t.storeTSDB = storeTSDB
   214  	t.ship = ship
   215  	t.exemplarsTSDB = exemplarsTSDB
   216  }
   217  
   218  func (t *MultiTSDB) Open() error {
   219  	if err := os.MkdirAll(t.dataDir, 0750); err != nil {
   220  		return err
   221  	}
   222  
   223  	files, err := os.ReadDir(t.dataDir)
   224  	if err != nil {
   225  		return err
   226  	}
   227  
   228  	var g errgroup.Group
   229  	for _, f := range files {
   230  		f := f
   231  		if !f.IsDir() {
   232  			continue
   233  		}
   234  
   235  		g.Go(func() error {
   236  			_, err := t.getOrLoadTenant(f.Name(), true)
   237  			return err
   238  		})
   239  	}
   240  
   241  	return g.Wait()
   242  }
   243  
   244  func (t *MultiTSDB) Flush() error {
   245  	t.mtx.RLock()
   246  	defer t.mtx.RUnlock()
   247  
   248  	errmtx := &sync.Mutex{}
   249  	merr := errutil.MultiError{}
   250  	wg := &sync.WaitGroup{}
   251  	for id, tenant := range t.tenants {
   252  		db := tenant.readyStorage().Get()
   253  		if db == nil {
   254  			level.Error(t.logger).Log("msg", "flushing TSDB failed; not ready", "tenant", id)
   255  			continue
   256  		}
   257  		level.Info(t.logger).Log("msg", "flushing TSDB", "tenant", id)
   258  		wg.Add(1)
   259  		go func() {
   260  			head := db.Head()
   261  			if err := db.CompactHead(tsdb.NewRangeHead(head, head.MinTime(), head.MaxTime())); err != nil {
   262  				errmtx.Lock()
   263  				merr.Add(err)
   264  				errmtx.Unlock()
   265  			}
   266  			wg.Done()
   267  		}()
   268  	}
   269  
   270  	wg.Wait()
   271  	return merr.Err()
   272  }
   273  
   274  func (t *MultiTSDB) Close() error {
   275  	t.mtx.Lock()
   276  	defer t.mtx.Unlock()
   277  
   278  	merr := errutil.MultiError{}
   279  	for id, tenant := range t.tenants {
   280  		db := tenant.readyStorage().Get()
   281  		if db == nil {
   282  			level.Error(t.logger).Log("msg", "closing TSDB failed; not ready", "tenant", id)
   283  			continue
   284  		}
   285  		level.Info(t.logger).Log("msg", "closing TSDB", "tenant", id)
   286  		merr.Add(db.Close())
   287  	}
   288  	return merr.Err()
   289  }
   290  
   291  // Prune flushes and closes the TSDB for tenants that haven't received
   292  // any new samples for longer than the TSDB retention period.
   293  func (t *MultiTSDB) Prune(ctx context.Context) error {
   294  	// Retention of 0 means infinite retention.
   295  	if t.tsdbOpts.RetentionDuration == 0 {
   296  		return nil
   297  	}
   298  
   299  	var (
   300  		wg   sync.WaitGroup
   301  		merr errutil.SyncMultiError
   302  
   303  		prunedTenants []string
   304  		pmtx          sync.Mutex
   305  	)
   306  
   307  	t.mtx.RLock()
   308  	for tenantID, tenantInstance := range t.tenants {
   309  		wg.Add(1)
   310  		go func(tenantID string, tenantInstance *tenant) {
   311  			defer wg.Done()
   312  			tlog := log.With(t.logger, "tenant", tenantID)
   313  			pruned, err := t.pruneTSDB(ctx, tlog, tenantInstance)
   314  			if err != nil {
   315  				merr.Add(err)
   316  				return
   317  			}
   318  
   319  			if pruned {
   320  				pmtx.Lock()
   321  				defer pmtx.Unlock()
   322  				prunedTenants = append(prunedTenants, tenantID)
   323  			}
   324  		}(tenantID, tenantInstance)
   325  	}
   326  	wg.Wait()
   327  	t.mtx.RUnlock()
   328  
   329  	t.mtx.Lock()
   330  	defer t.mtx.Unlock()
   331  	for _, tenantID := range prunedTenants {
   332  		// Check that the tenant hasn't been reinitialized in-between locks.
   333  		if t.tenants[tenantID].readyStorage().get() != nil {
   334  			continue
   335  		}
   336  
   337  		level.Info(t.logger).Log("msg", "Pruned tenant", "tenant", tenantID)
   338  		delete(t.tenants, tenantID)
   339  	}
   340  
   341  	return merr.Err()
   342  }
   343  
   344  // pruneTSDB removes a TSDB if its past the retention period.
   345  // It compacts the TSDB head, sends all remaining blocks to S3 and removes the TSDB from disk.
   346  func (t *MultiTSDB) pruneTSDB(ctx context.Context, logger log.Logger, tenantInstance *tenant) (bool, error) {
   347  	tenantTSDB := tenantInstance.readyStorage()
   348  	if tenantTSDB == nil {
   349  		return false, nil
   350  	}
   351  	tenantTSDB.mtx.RLock()
   352  	if tenantTSDB.a == nil || tenantTSDB.a.db == nil {
   353  		tenantTSDB.mtx.RUnlock()
   354  		return false, nil
   355  	}
   356  
   357  	tdb := tenantTSDB.a.db
   358  	head := tdb.Head()
   359  	if head.MaxTime() < 0 {
   360  		tenantTSDB.mtx.RUnlock()
   361  		return false, nil
   362  	}
   363  
   364  	sinceLastAppendMillis := time.Since(time.UnixMilli(head.MaxTime())).Milliseconds()
   365  	compactThreshold := int64(1.5 * float64(t.tsdbOpts.MaxBlockDuration))
   366  	if sinceLastAppendMillis <= compactThreshold {
   367  		tenantTSDB.mtx.RUnlock()
   368  		return false, nil
   369  	}
   370  	tenantTSDB.mtx.RUnlock()
   371  
   372  	// Acquire a write lock and check that no writes have occurred in-between locks.
   373  	tenantTSDB.mtx.Lock()
   374  	defer tenantTSDB.mtx.Unlock()
   375  
   376  	// Lock the entire tenant to make sure the shipper is not running in parallel.
   377  	tenantInstance.mtx.Lock()
   378  	defer tenantInstance.mtx.Unlock()
   379  
   380  	sinceLastAppendMillis = time.Since(time.UnixMilli(head.MaxTime())).Milliseconds()
   381  	if sinceLastAppendMillis <= compactThreshold {
   382  		return false, nil
   383  	}
   384  
   385  	level.Info(logger).Log("msg", "Compacting tenant")
   386  	if err := tdb.CompactHead(tsdb.NewRangeHead(head, head.MinTime(), head.MaxTime())); err != nil {
   387  		return false, err
   388  	}
   389  
   390  	if sinceLastAppendMillis <= t.tsdbOpts.RetentionDuration {
   391  		return false, nil
   392  	}
   393  
   394  	level.Info(logger).Log("msg", "Pruning tenant")
   395  	if tenantInstance.ship != nil {
   396  		uploaded, err := tenantInstance.ship.Sync(ctx)
   397  		if err != nil {
   398  			return false, err
   399  		}
   400  
   401  		if uploaded > 0 {
   402  			level.Info(logger).Log("msg", "Uploaded head block")
   403  		}
   404  	}
   405  
   406  	if err := tdb.Close(); err != nil {
   407  		return false, err
   408  	}
   409  
   410  	if err := os.RemoveAll(tdb.Dir()); err != nil {
   411  		return false, err
   412  	}
   413  
   414  	tenantInstance.readyS.set(nil)
   415  	tenantInstance.setComponents(nil, nil, nil)
   416  
   417  	return true, nil
   418  }
   419  
   420  func (t *MultiTSDB) Sync(ctx context.Context) (int, error) {
   421  	if t.bucket == nil {
   422  		return 0, errors.New("bucket is not specified, Sync should not be invoked")
   423  	}
   424  
   425  	t.mtx.RLock()
   426  	defer t.mtx.RUnlock()
   427  
   428  	var (
   429  		errmtx   = &sync.Mutex{}
   430  		merr     = errutil.MultiError{}
   431  		wg       = &sync.WaitGroup{}
   432  		uploaded atomic.Int64
   433  	)
   434  
   435  	for tenantID, tenant := range t.tenants {
   436  		level.Debug(t.logger).Log("msg", "uploading block for tenant", "tenant", tenantID)
   437  		s := tenant.shipper()
   438  		if s == nil {
   439  			continue
   440  		}
   441  		wg.Add(1)
   442  		go func() {
   443  			up, err := s.Sync(ctx)
   444  			if err != nil {
   445  				errmtx.Lock()
   446  				merr.Add(errors.Wrap(err, "upload"))
   447  				errmtx.Unlock()
   448  			}
   449  			uploaded.Add(int64(up))
   450  			wg.Done()
   451  		}()
   452  	}
   453  	wg.Wait()
   454  	return int(uploaded.Load()), merr.Err()
   455  }
   456  
   457  func (t *MultiTSDB) RemoveLockFilesIfAny() error {
   458  	fis, err := os.ReadDir(t.dataDir)
   459  	if err != nil {
   460  		if os.IsNotExist(err) {
   461  			return nil
   462  		}
   463  		return err
   464  	}
   465  
   466  	merr := errutil.MultiError{}
   467  	for _, fi := range fis {
   468  		if !fi.IsDir() {
   469  			continue
   470  		}
   471  		if err := os.Remove(filepath.Join(t.defaultTenantDataDir(fi.Name()), "lock")); err != nil {
   472  			if os.IsNotExist(err) {
   473  				continue
   474  			}
   475  			merr.Add(err)
   476  			continue
   477  		}
   478  		level.Info(t.logger).Log("msg", "a leftover lockfile found and removed", "tenant", fi.Name())
   479  	}
   480  	return merr.Err()
   481  }
   482  
   483  func (t *MultiTSDB) TSDBLocalClients() []store.Client {
   484  	t.mtx.RLock()
   485  	defer t.mtx.RUnlock()
   486  
   487  	res := make([]store.Client, 0, len(t.tenants))
   488  	for _, tenant := range t.tenants {
   489  		client := tenant.client(t.logger)
   490  		if client != nil {
   491  			res = append(res, client)
   492  		}
   493  	}
   494  
   495  	return res
   496  }
   497  
   498  func (t *MultiTSDB) TSDBExemplars() map[string]*exemplars.TSDB {
   499  	t.mtx.RLock()
   500  	defer t.mtx.RUnlock()
   501  
   502  	res := make(map[string]*exemplars.TSDB, len(t.tenants))
   503  	for k, tenant := range t.tenants {
   504  		e := tenant.exemplars()
   505  		if e != nil {
   506  			res[k] = e
   507  		}
   508  	}
   509  	return res
   510  }
   511  
   512  func (t *MultiTSDB) TenantStats(limit int, statsByLabelName string, tenantIDs ...string) []status.TenantStats {
   513  	t.mtx.RLock()
   514  	defer t.mtx.RUnlock()
   515  	if len(tenantIDs) == 0 {
   516  		for tenantID := range t.tenants {
   517  			tenantIDs = append(tenantIDs, tenantID)
   518  		}
   519  	}
   520  
   521  	var (
   522  		mu     sync.Mutex
   523  		wg     sync.WaitGroup
   524  		result = make([]status.TenantStats, 0, len(t.tenants))
   525  	)
   526  	for _, tenantID := range tenantIDs {
   527  		tenantInstance, ok := t.tenants[tenantID]
   528  		if !ok {
   529  			continue
   530  		}
   531  
   532  		wg.Add(1)
   533  		go func(tenantID string, tenantInstance *tenant) {
   534  			defer wg.Done()
   535  			db := tenantInstance.readyS.Get()
   536  			if db == nil {
   537  				return
   538  			}
   539  			stats := db.Head().Stats(statsByLabelName, limit)
   540  
   541  			mu.Lock()
   542  			defer mu.Unlock()
   543  			result = append(result, status.TenantStats{
   544  				Tenant: tenantID,
   545  				Stats:  stats,
   546  			})
   547  		}(tenantID, tenantInstance)
   548  	}
   549  	wg.Wait()
   550  
   551  	sort.Slice(result, func(i, j int) bool {
   552  		return result[i].Tenant < result[j].Tenant
   553  	})
   554  	return result
   555  }
   556  
   557  func (t *MultiTSDB) startTSDB(logger log.Logger, tenantID string, tenant *tenant) error {
   558  	reg := prometheus.WrapRegistererWith(prometheus.Labels{"tenant": tenantID}, t.reg)
   559  	reg = NewUnRegisterer(reg)
   560  
   561  	initialLset := labelpb.ExtendSortedLabels(t.labels, labels.FromStrings(t.tenantLabelName, tenantID))
   562  
   563  	lset, err := t.extractTenantsLabels(tenantID, initialLset)
   564  	if err != nil {
   565  		return err
   566  	}
   567  
   568  	dataDir := t.defaultTenantDataDir(tenantID)
   569  
   570  	level.Info(logger).Log("msg", "opening TSDB")
   571  	opts := *t.tsdbOpts
   572  	s, err := tsdb.Open(
   573  		dataDir,
   574  		logger,
   575  		reg,
   576  		&opts,
   577  		nil,
   578  	)
   579  	if err != nil {
   580  		t.mtx.Lock()
   581  		delete(t.tenants, tenantID)
   582  		t.mtx.Unlock()
   583  		return err
   584  	}
   585  	var ship *shipper.Shipper
   586  	if t.bucket != nil {
   587  		ship = shipper.New(
   588  			logger,
   589  			reg,
   590  			dataDir,
   591  			t.bucket,
   592  			func() labels.Labels { return lset },
   593  			metadata.ReceiveSource,
   594  			nil,
   595  			t.allowOutOfOrderUpload,
   596  			t.hashFunc,
   597  		)
   598  	}
   599  	tenant.set(store.NewTSDBStore(logger, s, component.Receive, lset), s, ship, exemplars.NewTSDB(s, lset))
   600  	level.Info(logger).Log("msg", "TSDB is now ready")
   601  	return nil
   602  }
   603  
   604  func (t *MultiTSDB) defaultTenantDataDir(tenantID string) string {
   605  	return path.Join(t.dataDir, tenantID)
   606  }
   607  
   608  func (t *MultiTSDB) getOrLoadTenant(tenantID string, blockingStart bool) (*tenant, error) {
   609  	// Fast path, as creating tenants is a very rare operation.
   610  	t.mtx.RLock()
   611  	tenant, exist := t.tenants[tenantID]
   612  	t.mtx.RUnlock()
   613  	if exist {
   614  		return tenant, nil
   615  	}
   616  
   617  	// Slow path needs to lock fully and attempt to read again to prevent race
   618  	// conditions, where since the fast path was tried, there may have actually
   619  	// been the same tenant inserted in the map.
   620  	t.mtx.Lock()
   621  	tenant, exist = t.tenants[tenantID]
   622  	if exist {
   623  		t.mtx.Unlock()
   624  		return tenant, nil
   625  	}
   626  
   627  	tenant = newTenant()
   628  	t.tenants[tenantID] = tenant
   629  	t.mtx.Unlock()
   630  
   631  	logger := log.With(t.logger, "tenant", tenantID)
   632  	if !blockingStart {
   633  		go func() {
   634  			if err := t.startTSDB(logger, tenantID, tenant); err != nil {
   635  				level.Error(logger).Log("msg", "failed to start tsdb asynchronously", "err", err)
   636  			}
   637  		}()
   638  		return tenant, nil
   639  	}
   640  	return tenant, t.startTSDB(logger, tenantID, tenant)
   641  }
   642  
   643  func (t *MultiTSDB) TenantAppendable(tenantID string) (Appendable, error) {
   644  	tenant, err := t.getOrLoadTenant(tenantID, false)
   645  	if err != nil {
   646  		return nil, err
   647  	}
   648  	return tenant.readyStorage(), nil
   649  }
   650  
   651  func (t *MultiTSDB) SetHashringConfig(cfg []HashringConfig) error {
   652  	t.hashringConfigs = cfg
   653  
   654  	// If a tenant's already existed in MultiTSDB, update its label set
   655  	// from the latest []HashringConfig.
   656  	// In case one tenant appears in multiple hashring configs,
   657  	// only the label set from the first hashring config is applied.
   658  	// This is the same logic as startTSDB.
   659  	updatedTenants := make([]string, 0)
   660  	for _, hc := range t.hashringConfigs {
   661  		for _, tenantID := range hc.Tenants {
   662  			if slices.Contains(updatedTenants, tenantID) {
   663  				continue
   664  			}
   665  			if t.tenants[tenantID] != nil {
   666  				updatedTenants = append(updatedTenants, tenantID)
   667  
   668  				lset := labelpb.ExtendSortedLabels(t.labels, labels.FromStrings(t.tenantLabelName, tenantID))
   669  
   670  				if hc.ExternalLabels != nil {
   671  					extendedLset, err := extendLabels(lset, hc.ExternalLabels, t.logger)
   672  					if err != nil {
   673  						return errors.Wrap(err, "failed to extend external labels for tenant "+tenantID)
   674  					}
   675  					lset = extendedLset
   676  				}
   677  
   678  				if t.tenants[tenantID].ship != nil {
   679  					t.tenants[tenantID].ship.SetLabels(lset)
   680  				}
   681  				t.tenants[tenantID].storeTSDB.SetExtLset(lset)
   682  				t.tenants[tenantID].exemplarsTSDB.SetExtLabels(lset)
   683  			}
   684  		}
   685  	}
   686  
   687  	return nil
   688  }
   689  
   690  // ErrNotReady is returned if the underlying storage is not ready yet.
   691  var ErrNotReady = errors.New("TSDB not ready")
   692  
   693  // ReadyStorage implements the Storage interface while allowing to set the actual
   694  // storage at a later point in time.
   695  // TODO: Replace this with upstream Prometheus implementation when it is exposed.
   696  type ReadyStorage struct {
   697  	mtx sync.RWMutex
   698  	a   *adapter
   699  }
   700  
   701  // Set the storage.
   702  func (s *ReadyStorage) Set(db *tsdb.DB) {
   703  	s.mtx.Lock()
   704  	defer s.mtx.Unlock()
   705  
   706  	s.set(&adapter{db: db})
   707  }
   708  
   709  func (s *ReadyStorage) set(a *adapter) {
   710  	s.a = a
   711  }
   712  
   713  // Get the storage.
   714  func (s *ReadyStorage) Get() *tsdb.DB {
   715  	if x := s.get(); x != nil {
   716  		return x.db
   717  	}
   718  	return nil
   719  }
   720  
   721  func (s *ReadyStorage) get() *adapter {
   722  	s.mtx.RLock()
   723  	x := s.a
   724  	s.mtx.RUnlock()
   725  	return x
   726  }
   727  
   728  // StartTime implements the Storage interface.
   729  func (s *ReadyStorage) StartTime() (int64, error) {
   730  	return 0, errors.New("not implemented")
   731  }
   732  
   733  // Querier implements the Storage interface.
   734  func (s *ReadyStorage) Querier(ctx context.Context, mint, maxt int64) (storage.Querier, error) {
   735  	if x := s.get(); x != nil {
   736  		return x.Querier(ctx, mint, maxt)
   737  	}
   738  	return nil, ErrNotReady
   739  }
   740  
   741  // ExemplarQuerier implements the Storage interface.
   742  func (s *ReadyStorage) ExemplarQuerier(ctx context.Context) (storage.ExemplarQuerier, error) {
   743  	if x := s.get(); x != nil {
   744  		return x.ExemplarQuerier(ctx)
   745  	}
   746  	return nil, ErrNotReady
   747  }
   748  
   749  // Appender implements the Storage interface.
   750  func (s *ReadyStorage) Appender(ctx context.Context) (storage.Appender, error) {
   751  	if x := s.get(); x != nil {
   752  		return x.Appender(ctx)
   753  	}
   754  	return nil, ErrNotReady
   755  }
   756  
   757  // Close implements the Storage interface.
   758  func (s *ReadyStorage) Close() error {
   759  	if x := s.Get(); x != nil {
   760  		return x.Close()
   761  	}
   762  	return nil
   763  }
   764  
   765  // adapter implements a storage.Storage around TSDB.
   766  type adapter struct {
   767  	db *tsdb.DB
   768  }
   769  
   770  // StartTime implements the Storage interface.
   771  func (a adapter) StartTime() (int64, error) {
   772  	return 0, errors.New("not implemented")
   773  }
   774  
   775  func (a adapter) Querier(ctx context.Context, mint, maxt int64) (storage.Querier, error) {
   776  	return a.db.Querier(ctx, mint, maxt)
   777  }
   778  
   779  func (a adapter) ExemplarQuerier(ctx context.Context) (storage.ExemplarQuerier, error) {
   780  	return a.db.ExemplarQuerier(ctx)
   781  }
   782  
   783  // Appender returns a new appender against the storage.
   784  func (a adapter) Appender(ctx context.Context) (storage.Appender, error) {
   785  	return a.db.Appender(ctx), nil
   786  }
   787  
   788  // Close closes the storage and all its underlying resources.
   789  func (a adapter) Close() error {
   790  	return a.db.Close()
   791  }
   792  
   793  // UnRegisterer is a Prometheus registerer that
   794  // ensures that collectors can be registered
   795  // by unregistering already-registered collectors.
   796  // FlushableStorage uses this registerer in order
   797  // to not lose metric values between DB flushes.
   798  //
   799  // This type cannot embed the inner registerer, because Prometheus since
   800  // v2.39.0 is wrapping the Registry with prometheus.WrapRegistererWithPrefix.
   801  // This wrapper will call the Register function of the wrapped registerer.
   802  // If UnRegisterer is the wrapped registerer, this would end up calling the
   803  // inner registerer's Register, which doesn't implement the "unregister" logic
   804  // that this type intends to use.
   805  type UnRegisterer struct {
   806  	innerReg prometheus.Registerer
   807  }
   808  
   809  func NewUnRegisterer(inner prometheus.Registerer) *UnRegisterer {
   810  	return &UnRegisterer{innerReg: inner}
   811  }
   812  
   813  // Register registers the given collector. If it's already registered, it will
   814  // be unregistered and registered.
   815  func (u *UnRegisterer) Register(c prometheus.Collector) error {
   816  	if err := u.innerReg.Register(c); err != nil {
   817  		if _, ok := err.(prometheus.AlreadyRegisteredError); ok {
   818  			if ok = u.innerReg.Unregister(c); !ok {
   819  				panic("unable to unregister existing collector")
   820  			}
   821  			u.innerReg.MustRegister(c)
   822  			return nil
   823  		}
   824  		return err
   825  	}
   826  	return nil
   827  }
   828  
   829  // Unregister unregisters the given collector.
   830  func (u *UnRegisterer) Unregister(c prometheus.Collector) bool {
   831  	return u.innerReg.Unregister(c)
   832  }
   833  
   834  // MustRegister registers the given collectors. It panics if an error happens.
   835  // Note that if a collector is already registered it will be re-registered
   836  // without panicking.
   837  func (u *UnRegisterer) MustRegister(cs ...prometheus.Collector) {
   838  	for _, c := range cs {
   839  		if err := u.Register(c); err != nil {
   840  			panic(err)
   841  		}
   842  	}
   843  }
   844  
   845  // extractTenantsLabels extracts tenant's external labels from hashring configs.
   846  // If one tenant appears in multiple hashring configs,
   847  // only the external label set from the first hashring config is applied.
   848  func (t *MultiTSDB) extractTenantsLabels(tenantID string, initialLset labels.Labels) (labels.Labels, error) {
   849  	for _, hc := range t.hashringConfigs {
   850  		for _, tenant := range hc.Tenants {
   851  			if tenant != tenantID {
   852  				continue
   853  			}
   854  
   855  			if hc.ExternalLabels != nil {
   856  				extendedLset, err := extendLabels(initialLset, hc.ExternalLabels, t.logger)
   857  				if err != nil {
   858  					return nil, errors.Wrap(err, "failed to extend external labels for tenant "+tenantID)
   859  				}
   860  				return extendedLset, nil
   861  			}
   862  
   863  			return initialLset, nil
   864  		}
   865  	}
   866  
   867  	return initialLset, nil
   868  }
   869  
   870  // extendLabels extends external labels of the initial label set.
   871  // If an external label shares same name with a label in the initial label set,
   872  // use the label in the initial label set and inform user about it.
   873  func extendLabels(labelSet labels.Labels, extend map[string]string, logger log.Logger) (labels.Labels, error) {
   874  	var extendLabels labels.Labels
   875  	for name, value := range extend {
   876  		if !model.LabelName.IsValid(model.LabelName(name)) {
   877  			return nil, errors.Errorf("unsupported format for label's name: %s", name)
   878  		}
   879  		extendLabels = append(extendLabels, labels.Label{Name: name, Value: value})
   880  	}
   881  
   882  	sort.Sort(labelSet)
   883  	sort.Sort(extendLabels)
   884  
   885  	extendedLabelSet := make(labels.Labels, 0, len(labelSet)+len(extendLabels))
   886  	for len(labelSet) > 0 && len(extendLabels) > 0 {
   887  		d := strings.Compare(labelSet[0].Name, extendLabels[0].Name)
   888  		if d == 0 {
   889  			extendedLabelSet = append(extendedLabelSet, labelSet[0])
   890  			level.Info(logger).Log("msg", "Duplicate label found. Using initial label instead.",
   891  				"label's name", extendLabels[0].Name)
   892  			labelSet, extendLabels = labelSet[1:], extendLabels[1:]
   893  		} else if d < 0 {
   894  			extendedLabelSet = append(extendedLabelSet, labelSet[0])
   895  			labelSet = labelSet[1:]
   896  		} else if d > 0 {
   897  			extendedLabelSet = append(extendedLabelSet, extendLabels[0])
   898  			extendLabels = extendLabels[1:]
   899  		}
   900  	}
   901  	extendedLabelSet = append(extendedLabelSet, labelSet...)
   902  	extendedLabelSet = append(extendedLabelSet, extendLabels...)
   903  
   904  	sort.Sort(extendedLabelSet)
   905  
   906  	return extendedLabelSet, nil
   907  }