github.com/thanos-io/thanos@v0.32.5/pkg/store/local.go (about)

     1  // Copyright (c) The Thanos Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package store
     5  
     6  import (
     7  	"bufio"
     8  	"bytes"
     9  	"context"
    10  	"io"
    11  	"math"
    12  	"sort"
    13  
    14  	"github.com/go-kit/log"
    15  	"github.com/go-kit/log/level"
    16  	"github.com/gogo/protobuf/jsonpb"
    17  	"github.com/pkg/errors"
    18  	"github.com/prometheus/prometheus/model/labels"
    19  	"github.com/prometheus/prometheus/tsdb/fileutil"
    20  	"google.golang.org/grpc/codes"
    21  	"google.golang.org/grpc/status"
    22  
    23  	"github.com/thanos-io/thanos/pkg/component"
    24  	"github.com/thanos-io/thanos/pkg/runutil"
    25  	"github.com/thanos-io/thanos/pkg/store/labelpb"
    26  	"github.com/thanos-io/thanos/pkg/store/storepb"
    27  )
    28  
    29  // LocalStore implements the store API against single file with stream of proto-based SeriesResponses in JSON format.
    30  // Inefficient implementation for quick StoreAPI view.
    31  // Chunk order is exactly the same as in a given file.
    32  type LocalStore struct {
    33  	logger    log.Logger
    34  	extLabels labels.Labels
    35  
    36  	info *storepb.InfoResponse
    37  	c    io.Closer
    38  
    39  	// TODO(bwplotka): This is very naive in-memory DB. We can support much larger files, by
    40  	// indexing labels, symbolizing strings and get chunk refs only without storing protobufs in memory.
    41  	// For small debug purposes, this is good enough.
    42  	series       []*storepb.Series
    43  	sortedChunks [][]int
    44  }
    45  
    46  // TODO(bwplotka): Add remote read so Prometheus users can use this. Potentially after streaming will be added
    47  // https://github.com/prometheus/prometheus/issues/5926.
    48  // TODO(bwplotka): Consider non mmaped version of this, as well different versions.
    49  func NewLocalStoreFromJSONMmappableFile(
    50  	logger log.Logger,
    51  	component component.StoreAPI,
    52  	extLabels labels.Labels,
    53  	path string,
    54  	split bufio.SplitFunc,
    55  ) (*LocalStore, error) {
    56  	f, err := fileutil.OpenMmapFile(path)
    57  	if err != nil {
    58  		return nil, err
    59  	}
    60  	defer func() {
    61  		if err != nil {
    62  			runutil.CloseWithErrCapture(&err, f, "json file %s close", path)
    63  		}
    64  	}()
    65  
    66  	s := &LocalStore{
    67  		logger:    logger,
    68  		extLabels: extLabels,
    69  		c:         f,
    70  		info: &storepb.InfoResponse{
    71  			LabelSets: []labelpb.ZLabelSet{
    72  				{Labels: labelpb.ZLabelsFromPromLabels(extLabels)},
    73  			},
    74  			StoreType: component.ToProto(),
    75  			MinTime:   math.MaxInt64,
    76  			MaxTime:   math.MinInt64,
    77  		},
    78  	}
    79  
    80  	// Do quick pass for in-mem index.
    81  	content := f.Bytes()
    82  	contentStart := bytes.Index(content, []byte("{"))
    83  	if contentStart != -1 {
    84  		content = content[contentStart:]
    85  	}
    86  
    87  	if idx := bytes.LastIndex(content, []byte("}")); idx != -1 {
    88  		content = content[:idx+1]
    89  	}
    90  
    91  	skanner := NewNoCopyScanner(content, split)
    92  	resp := &storepb.SeriesResponse{}
    93  	for skanner.Scan() {
    94  		if err := jsonpb.Unmarshal(bytes.NewReader(skanner.Bytes()), resp); err != nil {
    95  			return nil, errors.Wrapf(err, "unmarshal storepb.SeriesResponse frame for file %s", path)
    96  		}
    97  		series := resp.GetSeries()
    98  		if series == nil {
    99  			level.Warn(logger).Log("msg", "not a valid series", "frame", resp.String())
   100  			continue
   101  		}
   102  		chks := make([]int, 0, len(series.Chunks))
   103  		// Sort chunks in separate slice by MinTime for easier lookup. Find global max and min.
   104  		for ci, c := range series.Chunks {
   105  			if s.info.MinTime > c.MinTime {
   106  				s.info.MinTime = c.MinTime
   107  			}
   108  			if s.info.MaxTime < c.MaxTime {
   109  				s.info.MaxTime = c.MaxTime
   110  			}
   111  			chks = append(chks, ci)
   112  		}
   113  
   114  		sort.Slice(chks, func(i, j int) bool {
   115  			return series.Chunks[chks[i]].MinTime < series.Chunks[chks[j]].MinTime
   116  		})
   117  		s.series = append(s.series, series)
   118  		s.sortedChunks = append(s.sortedChunks, chks)
   119  	}
   120  
   121  	if err := skanner.Err(); err != nil {
   122  		return nil, errors.Wrapf(err, "scanning file %s", path)
   123  	}
   124  	level.Info(logger).Log("msg", "loading JSON file succeeded", "file", path, "info", s.info.String(), "series", len(s.series))
   125  	return s, nil
   126  }
   127  
   128  // ScanGRPCCurlProtoStreamMessages allows to tokenize each streamed gRPC message from grpcurl tool.
   129  func ScanGRPCCurlProtoStreamMessages(data []byte, atEOF bool) (advance int, token []byte, err error) {
   130  	var delim = []byte(`}
   131  {`)
   132  	if atEOF && len(data) == 0 {
   133  		return 0, nil, nil
   134  	}
   135  	if idx := bytes.LastIndex(data, delim); idx != -1 {
   136  		return idx + 2, data[:idx+1], nil
   137  	}
   138  	// If we're at EOF, let's return all.
   139  	if atEOF {
   140  		return len(data), data, nil
   141  	}
   142  	// Incomplete; get more bytes.
   143  	return len(delim), nil, nil
   144  }
   145  
   146  // Info returns store information about the Prometheus instance.
   147  func (s *LocalStore) Info(_ context.Context, _ *storepb.InfoRequest) (*storepb.InfoResponse, error) {
   148  	return s.info, nil
   149  }
   150  
   151  // Series returns all series for a requested time range and label matcher. The returned data may
   152  // exceed the requested time bounds.
   153  func (s *LocalStore) Series(r *storepb.SeriesRequest, srv storepb.Store_SeriesServer) error {
   154  	match, matchers, err := matchesExternalLabels(r.Matchers, s.extLabels)
   155  	if err != nil {
   156  		return status.Error(codes.InvalidArgument, err.Error())
   157  	}
   158  	if !match {
   159  		return nil
   160  	}
   161  	if len(matchers) == 0 {
   162  		return status.Error(codes.InvalidArgument, errors.New("no matchers specified (excluding external labels)").Error())
   163  	}
   164  
   165  	var chosen []int
   166  	for si, series := range s.series {
   167  		lbls := labelpb.ZLabelsToPromLabels(series.Labels)
   168  		var noMatch bool
   169  		for _, m := range matchers {
   170  			extValue := lbls.Get(m.Name)
   171  			if extValue == "" {
   172  				continue
   173  			}
   174  			if !m.Matches(extValue) {
   175  				noMatch = true
   176  				break
   177  			}
   178  		}
   179  		if noMatch {
   180  			continue
   181  		}
   182  
   183  		chosen = chosen[:0]
   184  		resp := &storepb.Series{
   185  			// Copy labels as in-process clients like proxy tend to work on same memory for labels.
   186  			Labels: labelpb.DeepCopy(series.Labels),
   187  			Chunks: make([]storepb.AggrChunk, 0, len(s.sortedChunks[si])),
   188  		}
   189  
   190  		for _, ci := range s.sortedChunks[si] {
   191  			if series.Chunks[ci].MaxTime < r.MinTime {
   192  				continue
   193  			}
   194  			if series.Chunks[ci].MinTime > r.MaxTime {
   195  				continue
   196  			}
   197  			chosen = append(chosen, ci)
   198  		}
   199  
   200  		sort.Ints(chosen)
   201  		for _, ci := range chosen {
   202  			resp.Chunks = append(resp.Chunks, series.Chunks[ci])
   203  		}
   204  
   205  		if err := srv.Send(storepb.NewSeriesResponse(resp)); err != nil {
   206  			return status.Error(codes.Aborted, err.Error())
   207  		}
   208  	}
   209  	return nil
   210  }
   211  
   212  // LabelNames returns all known label names.
   213  func (s *LocalStore) LabelNames(_ context.Context, _ *storepb.LabelNamesRequest) (
   214  	*storepb.LabelNamesResponse, error,
   215  ) {
   216  	// TODO(bwplotka): Consider precomputing.
   217  	names := map[string]struct{}{}
   218  	for _, series := range s.series {
   219  		for _, l := range series.Labels {
   220  			names[l.Name] = struct{}{}
   221  		}
   222  	}
   223  	resp := &storepb.LabelNamesResponse{}
   224  	for n := range names {
   225  		resp.Names = append(resp.Names, n)
   226  	}
   227  	return resp, nil
   228  }
   229  
   230  // LabelValues returns all known label values for a given label name.
   231  func (s *LocalStore) LabelValues(_ context.Context, r *storepb.LabelValuesRequest) (
   232  	*storepb.LabelValuesResponse, error,
   233  ) {
   234  	vals := map[string]struct{}{}
   235  	for _, series := range s.series {
   236  		lbls := labelpb.ZLabelsToPromLabels(series.Labels)
   237  		val := lbls.Get(r.Label)
   238  		if val == "" {
   239  			continue
   240  		}
   241  		vals[val] = struct{}{}
   242  	}
   243  	resp := &storepb.LabelValuesResponse{}
   244  	for val := range vals {
   245  		resp.Values = append(resp.Values, val)
   246  	}
   247  	return resp, nil
   248  }
   249  
   250  func (s *LocalStore) Close() (err error) {
   251  	return s.c.Close()
   252  }
   253  
   254  type noCopyScanner struct {
   255  	b         []byte
   256  	splitFunc bufio.SplitFunc
   257  
   258  	start, end int
   259  	err        error
   260  
   261  	token []byte
   262  }
   263  
   264  // NewNoCopyScanner returns bufio.Scanner-like scanner that is meant to be used on already allocated byte slice (or mmapped)
   265  // one. Returned tokens are shared.
   266  func NewNoCopyScanner(b []byte, splitFunc bufio.SplitFunc) *noCopyScanner {
   267  	return &noCopyScanner{
   268  		b:         b,
   269  		splitFunc: splitFunc,
   270  		start:     0,
   271  		end:       0,
   272  	}
   273  }
   274  
   275  func (s *noCopyScanner) Scan() bool {
   276  	if s.start >= len(s.b) {
   277  		return false
   278  	}
   279  
   280  	advance := 1
   281  	for s.end+advance < len(s.b) {
   282  		s.end += advance
   283  
   284  		advance, s.token, s.err = s.splitFunc(s.b[s.start:s.end], false)
   285  		if s.err != nil {
   286  			return false
   287  		}
   288  
   289  		if len(s.token) > 0 {
   290  			s.start += advance
   291  			s.end = s.start
   292  			return true
   293  		}
   294  	}
   295  
   296  	_, s.token, s.err = s.splitFunc(s.b[s.start:], true)
   297  	if s.err != nil {
   298  		return false
   299  	}
   300  	s.start = len(s.b)
   301  	return len(s.token) > 0
   302  }
   303  
   304  func (s *noCopyScanner) Bytes() []byte {
   305  	return s.token
   306  }
   307  
   308  func (s *noCopyScanner) Err() error {
   309  	return s.err
   310  }