github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/tsdb/single_file_index.go (about)

     1  package tsdb
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"io"
     7  	"io/ioutil"
     8  	"strings"
     9  	"time"
    10  
    11  	"github.com/prometheus/common/model"
    12  	"github.com/prometheus/prometheus/model/labels"
    13  
    14  	"github.com/grafana/loki/pkg/chunkenc"
    15  	"github.com/grafana/loki/pkg/storage/chunk"
    16  	"github.com/grafana/loki/pkg/storage/stores/index/stats"
    17  	index_shipper "github.com/grafana/loki/pkg/storage/stores/indexshipper/index"
    18  	"github.com/grafana/loki/pkg/storage/stores/tsdb/index"
    19  )
    20  
    21  const (
    22  	gzipSuffix = ".gz"
    23  )
    24  
    25  func OpenShippableTSDB(p string) (index_shipper.Index, error) {
    26  	var gz bool
    27  	trimmed := strings.TrimSuffix(p, gzipSuffix)
    28  	if trimmed != p {
    29  		gz = true
    30  	}
    31  
    32  	id, err := identifierFromPath(trimmed)
    33  	if err != nil {
    34  		return nil, err
    35  	}
    36  
    37  	return NewShippableTSDBFile(id, gz)
    38  }
    39  
    40  // nolint
    41  // TSDBFile is backed by an actual file and implements the indexshipper/index.Index interface
    42  type TSDBFile struct {
    43  	// reuse Identifier for resolving locations
    44  	Identifier
    45  
    46  	// reuse TSDBIndex for reading
    47  	Index
    48  
    49  	// to sastisfy Reader() and Close() methods
    50  	r io.ReadSeeker
    51  }
    52  
    53  func NewShippableTSDBFile(id Identifier, gzip bool) (*TSDBFile, error) {
    54  	if gzip {
    55  		id = newSuffixedIdentifier(id, gzipSuffix)
    56  	}
    57  
    58  	idx, b, err := NewTSDBIndexFromFile(id.Path(), gzip)
    59  	if err != nil {
    60  		return nil, err
    61  	}
    62  
    63  	return &TSDBFile{
    64  		Identifier: id,
    65  		Index:      idx,
    66  		r:          bytes.NewReader(b),
    67  	}, err
    68  }
    69  
    70  func (f *TSDBFile) Close() error {
    71  	return f.Index.Close()
    72  }
    73  
    74  func (f *TSDBFile) Reader() (io.ReadSeeker, error) {
    75  	return f.r, nil
    76  }
    77  
    78  // nolint
    79  // TSDBIndex is backed by an IndexReader
    80  // and translates the IndexReader to an Index implementation
    81  // It loads the file into memory and doesn't keep a file descriptor open
    82  type TSDBIndex struct {
    83  	reader      IndexReader
    84  	chunkFilter chunk.RequestChunkFilterer
    85  }
    86  
    87  // Return the index as well as the underlying []byte which isn't exposed as an index
    88  // method but is helpful for building an io.reader for the index shipper
    89  func NewTSDBIndexFromFile(location string, gzip bool) (*TSDBIndex, []byte, error) {
    90  	raw, err := ioutil.ReadFile(location)
    91  	if err != nil {
    92  		return nil, nil, err
    93  	}
    94  
    95  	cleaned := raw
    96  
    97  	// decompress if needed
    98  	if gzip {
    99  		r := chunkenc.Gzip.GetReader(bytes.NewReader(raw))
   100  		defer chunkenc.Gzip.PutReader(r)
   101  
   102  		var err error
   103  		cleaned, err = io.ReadAll(r)
   104  		if err != nil {
   105  			return nil, nil, err
   106  		}
   107  	}
   108  
   109  	reader, err := index.NewReader(index.RealByteSlice(cleaned))
   110  	if err != nil {
   111  		return nil, nil, err
   112  	}
   113  	return NewTSDBIndex(reader), cleaned, nil
   114  }
   115  
   116  func NewTSDBIndex(reader IndexReader) *TSDBIndex {
   117  	return &TSDBIndex{
   118  		reader: reader,
   119  	}
   120  }
   121  
   122  func (i *TSDBIndex) Close() error {
   123  	return i.reader.Close()
   124  }
   125  
   126  func (i *TSDBIndex) Bounds() (model.Time, model.Time) {
   127  	from, through := i.reader.Bounds()
   128  	return model.Time(from), model.Time(through)
   129  }
   130  
   131  func (i *TSDBIndex) SetChunkFilterer(chunkFilter chunk.RequestChunkFilterer) {
   132  	i.chunkFilter = chunkFilter
   133  }
   134  
   135  // fn must NOT capture it's arguments. They're reused across series iterations and returned to
   136  // a pool after completion.
   137  func (i *TSDBIndex) forSeries(
   138  	ctx context.Context,
   139  	shard *index.ShardAnnotation,
   140  	fn func(labels.Labels, model.Fingerprint, []index.ChunkMeta),
   141  	matchers ...*labels.Matcher,
   142  ) error {
   143  	p, err := PostingsForMatchers(i.reader, shard, matchers...)
   144  	if err != nil {
   145  		return err
   146  	}
   147  
   148  	var ls labels.Labels
   149  	chks := ChunkMetasPool.Get()
   150  	defer ChunkMetasPool.Put(chks)
   151  
   152  	var filterer chunk.Filterer
   153  	if i.chunkFilter != nil {
   154  		filterer = i.chunkFilter.ForRequest(ctx)
   155  	}
   156  
   157  	for p.Next() {
   158  		hash, err := i.reader.Series(p.At(), &ls, &chks)
   159  		if err != nil {
   160  			return err
   161  		}
   162  
   163  		// skip series that belong to different shards
   164  		if shard != nil && !shard.Match(model.Fingerprint(hash)) {
   165  			continue
   166  		}
   167  
   168  		if filterer != nil && filterer.ShouldFilter(ls) {
   169  			continue
   170  		}
   171  
   172  		fn(ls, model.Fingerprint(hash), chks)
   173  	}
   174  	return p.Err()
   175  }
   176  
   177  func (i *TSDBIndex) GetChunkRefs(ctx context.Context, userID string, from, through model.Time, res []ChunkRef, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]ChunkRef, error) {
   178  	queryBounds := newBounds(from, through)
   179  	if res == nil {
   180  		res = ChunkRefsPool.Get()
   181  	}
   182  	res = res[:0]
   183  
   184  	if err := i.forSeries(ctx, shard,
   185  		func(ls labels.Labels, fp model.Fingerprint, chks []index.ChunkMeta) {
   186  			// TODO(owen-d): use logarithmic approach
   187  			for _, chk := range chks {
   188  
   189  				// current chunk is outside the range of this request
   190  				if !Overlap(queryBounds, chk) {
   191  					continue
   192  				}
   193  
   194  				res = append(res, ChunkRef{
   195  					User:        userID, // assumed to be the same, will be enforced by caller.
   196  					Fingerprint: fp,
   197  					Start:       chk.From(),
   198  					End:         chk.Through(),
   199  					Checksum:    chk.Checksum,
   200  				})
   201  			}
   202  		},
   203  		matchers...); err != nil {
   204  		return nil, err
   205  	}
   206  
   207  	return res, nil
   208  }
   209  
   210  func (i *TSDBIndex) Series(ctx context.Context, _ string, from, through model.Time, res []Series, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]Series, error) {
   211  	queryBounds := newBounds(from, through)
   212  	if res == nil {
   213  		res = SeriesPool.Get()
   214  	}
   215  	res = res[:0]
   216  
   217  	if err := i.forSeries(ctx, shard,
   218  		func(ls labels.Labels, fp model.Fingerprint, chks []index.ChunkMeta) {
   219  			// TODO(owen-d): use logarithmic approach
   220  			for _, chk := range chks {
   221  				if Overlap(queryBounds, chk) {
   222  					// this series has at least one chunk in the desired range
   223  					res = append(res, Series{
   224  						Labels:      ls.Copy(),
   225  						Fingerprint: fp,
   226  					})
   227  					break
   228  				}
   229  			}
   230  		},
   231  		matchers...); err != nil {
   232  		return nil, err
   233  	}
   234  
   235  	return res, nil
   236  }
   237  
   238  func (i *TSDBIndex) LabelNames(_ context.Context, _ string, _, _ model.Time, matchers ...*labels.Matcher) ([]string, error) {
   239  	if len(matchers) == 0 {
   240  		return i.reader.LabelNames()
   241  	}
   242  
   243  	return labelNamesWithMatchers(i.reader, matchers...)
   244  }
   245  
   246  func (i *TSDBIndex) LabelValues(_ context.Context, _ string, _, _ model.Time, name string, matchers ...*labels.Matcher) ([]string, error) {
   247  	if len(matchers) == 0 {
   248  		return i.reader.LabelValues(name)
   249  	}
   250  	return labelValuesWithMatchers(i.reader, name, matchers...)
   251  }
   252  
   253  func (i *TSDBIndex) Checksum() uint32 {
   254  	return i.reader.Checksum()
   255  }
   256  
   257  func (i *TSDBIndex) Identifier(string) SingleTenantTSDBIdentifier {
   258  	lower, upper := i.Bounds()
   259  	return SingleTenantTSDBIdentifier{
   260  		TS:       time.Now(),
   261  		From:     lower,
   262  		Through:  upper,
   263  		Checksum: i.Checksum(),
   264  	}
   265  }
   266  
   267  func (i *TSDBIndex) Stats(ctx context.Context, userID string, from, through model.Time, blooms *stats.Blooms, shard *index.ShardAnnotation, matchers ...*labels.Matcher) (*stats.Blooms, error) {
   268  	if blooms == nil {
   269  		blooms = stats.BloomPool.Get()
   270  	}
   271  	queryBounds := newBounds(from, through)
   272  
   273  	if err := i.forSeries(ctx, shard,
   274  		func(ls labels.Labels, fp model.Fingerprint, chks []index.ChunkMeta) {
   275  			// TODO(owen-d): use logarithmic approach
   276  			var addedStream bool
   277  			for _, chk := range chks {
   278  				if Overlap(queryBounds, chk) {
   279  					if !addedStream {
   280  						blooms.AddStream(fp)
   281  						addedStream = true
   282  					}
   283  					blooms.AddChunk(fp, chk)
   284  				}
   285  			}
   286  		},
   287  		matchers...); err != nil {
   288  		return blooms, err
   289  	}
   290  
   291  	return blooms, nil
   292  }