github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/tsdb/builder.go (about)

     1  package tsdb
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"math/rand"
     7  	"os"
     8  	"path/filepath"
     9  	"sort"
    10  
    11  	"github.com/prometheus/common/model"
    12  	"github.com/prometheus/prometheus/model/labels"
    13  	"github.com/prometheus/prometheus/storage"
    14  
    15  	chunk_util "github.com/grafana/loki/pkg/storage/chunk/client/util"
    16  	"github.com/grafana/loki/pkg/storage/stores/tsdb/index"
    17  )
    18  
    19  // Builder is a helper used to create tsdb indices.
    20  // It can accept streams in any order and will create the tsdb
    21  // index appropriately via `Build()`
    22  // It can even receive multiple writes for the same stream with the caveat
    23  // that chunks must be added in order and not duplicated
    24  type Builder struct {
    25  	streams         map[string]*stream
    26  	chunksFinalized bool
    27  }
    28  
    29  type stream struct {
    30  	labels labels.Labels
    31  	fp     model.Fingerprint
    32  	chunks index.ChunkMetas
    33  }
    34  
    35  func NewBuilder() *Builder {
    36  	return &Builder{streams: make(map[string]*stream)}
    37  }
    38  
    39  func (b *Builder) AddSeries(ls labels.Labels, fp model.Fingerprint, chks []index.ChunkMeta) {
    40  	id := ls.String()
    41  	s, ok := b.streams[id]
    42  	if !ok {
    43  		s = &stream{
    44  			labels: ls,
    45  			fp:     fp,
    46  		}
    47  		b.streams[id] = s
    48  	}
    49  
    50  	s.chunks = append(s.chunks, chks...)
    51  }
    52  
    53  func (b *Builder) FinalizeChunks() {
    54  	for id := range b.streams {
    55  		b.streams[id].chunks = b.streams[id].chunks.Finalize()
    56  	}
    57  	b.chunksFinalized = true
    58  }
    59  
    60  func (b *Builder) InsertChunk(streamID string, chk index.ChunkMeta) error {
    61  	if !b.chunksFinalized {
    62  		return fmt.Errorf("chunk insertion is only allowed on finalized chunks")
    63  	}
    64  
    65  	s, ok := b.streams[streamID]
    66  	if !ok {
    67  		return fmt.Errorf("chunk insertion is only allowed on existing streams")
    68  	}
    69  
    70  	s.chunks = s.chunks.Add(chk)
    71  	return nil
    72  }
    73  
    74  func (b *Builder) DropChunk(streamID string, chk index.ChunkMeta) (bool, error) {
    75  	if !b.chunksFinalized {
    76  		return false, fmt.Errorf("dropping of chunk is only allowed on finalized chunks")
    77  	}
    78  
    79  	s, ok := b.streams[streamID]
    80  	if !ok {
    81  		return false, fmt.Errorf("dropping of chunk is only allowed on existing streams")
    82  	}
    83  
    84  	var chunkFound bool
    85  	s.chunks, chunkFound = s.chunks.Drop(chk)
    86  	return chunkFound, nil
    87  }
    88  
    89  func (b *Builder) Build(
    90  	ctx context.Context,
    91  	scratchDir string,
    92  	// Determines how to create the resulting Identifier and file name.
    93  	// This is variable as we use Builder for multiple reasons,
    94  	// such as building multi-tenant tsdbs on the ingester
    95  	// and per tenant ones during compaction
    96  	createFn func(from, through model.Time, checksum uint32) Identifier,
    97  ) (id Identifier, err error) {
    98  	// Ensure the parent dir exists (i.e. index/<bucket>/<tenant>/)
    99  	if scratchDir != "" {
   100  		if err := chunk_util.EnsureDirectory(scratchDir); err != nil {
   101  			return id, err
   102  		}
   103  	}
   104  
   105  	// First write tenant/index-bounds-random.staging
   106  	rng := rand.Int63()
   107  	name := fmt.Sprintf("%s-%x.staging", index.IndexFilename, rng)
   108  	tmpPath := filepath.Join(scratchDir, name)
   109  
   110  	writer, err := index.NewWriter(ctx, tmpPath)
   111  	if err != nil {
   112  		return id, err
   113  	}
   114  	// TODO(owen-d): multithread
   115  
   116  	// Sort series
   117  	streams := make([]*stream, 0, len(b.streams))
   118  	for _, s := range b.streams {
   119  		streams = append(streams, s)
   120  	}
   121  
   122  	// Use the supplied fingerprints instead of hashing labels for two reasons:
   123  	// 1) Correctness: fingerprints differ from label hashes because
   124  	// we add a synthesized __loki_tennat__ label, which is eventually compacted away.
   125  	// 2) Speed: No hashing required
   126  	sort.Slice(streams, func(i, j int) bool {
   127  		if streams[i].fp != streams[j].fp {
   128  			return streams[i].fp < streams[j].fp
   129  		}
   130  		return labels.Compare(streams[i].labels, streams[j].labels) < 0
   131  	})
   132  
   133  	// Build symbols
   134  	symbolsMap := make(map[string]struct{})
   135  	for _, s := range streams {
   136  		for _, l := range s.labels {
   137  			symbolsMap[l.Name] = struct{}{}
   138  			symbolsMap[l.Value] = struct{}{}
   139  		}
   140  	}
   141  
   142  	// Sort symbols
   143  	symbols := make([]string, 0, len(symbolsMap))
   144  	for s := range symbolsMap {
   145  		symbols = append(symbols, s)
   146  	}
   147  	sort.Strings(symbols)
   148  
   149  	// Add symbols
   150  	for _, symbol := range symbols {
   151  		if err := writer.AddSymbol(symbol); err != nil {
   152  			return id, err
   153  		}
   154  	}
   155  
   156  	// Add series
   157  	for i, s := range streams {
   158  		if !b.chunksFinalized {
   159  			s.chunks = s.chunks.Finalize()
   160  		}
   161  		if err := writer.AddSeries(storage.SeriesRef(i), s.labels, s.fp, s.chunks...); err != nil {
   162  			return id, err
   163  		}
   164  	}
   165  
   166  	if err := writer.Close(); err != nil {
   167  		return id, err
   168  	}
   169  
   170  	reader, err := index.NewFileReader(tmpPath)
   171  	if err != nil {
   172  		return id, err
   173  	}
   174  
   175  	from, through := reader.Bounds()
   176  
   177  	// load the newly compacted index to grab checksum, promptly close
   178  	dst := createFn(model.Time(from), model.Time(through), reader.Checksum())
   179  
   180  	reader.Close()
   181  	defer func() {
   182  		if err != nil {
   183  			os.RemoveAll(tmpPath)
   184  		}
   185  	}()
   186  
   187  	if err := chunk_util.EnsureDirectory(filepath.Dir(dst.Path())); err != nil {
   188  		return id, err
   189  	}
   190  	dstPath := dst.Path()
   191  	if err := os.Rename(tmpPath, dstPath); err != nil {
   192  		return id, err
   193  	}
   194  
   195  	return dst, nil
   196  }