github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/cmd/tools/query_index_segments/main/main.go (about)

     1  // Copyright (c) 2020 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package main
    22  
    23  import (
    24  	"errors"
    25  	golog "log"
    26  	"math"
    27  	"os"
    28  	"runtime"
    29  	"sync"
    30  	"time"
    31  
    32  	"github.com/m3db/m3/src/dbnode/persist"
    33  	"github.com/m3db/m3/src/dbnode/persist/fs"
    34  	"github.com/m3db/m3/src/m3ninx/index"
    35  	"github.com/m3db/m3/src/m3ninx/index/segment/fst/encoding/docs"
    36  	"github.com/m3db/m3/src/m3ninx/search/executor"
    37  	"github.com/m3db/m3/src/query/generated/proto/prompb"
    38  	"github.com/m3db/m3/src/query/parser/promql"
    39  	"github.com/m3db/m3/src/query/storage"
    40  	"github.com/m3db/m3/src/x/context"
    41  	"github.com/m3db/m3/src/x/ident"
    42  	xsync "github.com/m3db/m3/src/x/sync"
    43  
    44  	"github.com/pborman/getopt"
    45  	"github.com/prometheus/prometheus/model/labels"
    46  	"go.uber.org/zap"
    47  )
    48  
    49  var (
    50  	halfCPUs     = int(math.Max(float64(runtime.GOMAXPROCS(0)/2), 1))
    51  	endlineBytes = []byte("\n")
    52  )
    53  
    54  func main() {
    55  
    56  	var (
    57  		optPathPrefix  = getopt.StringLong("path-prefix", 'p', "/var/lib/m3db", "Path prefix [e.g. /var/lib/m3db]")
    58  		optNamespace   = getopt.StringLong("namespace", 'n', "", "Namespace to query")
    59  		optQuery       = getopt.StringLong("query", 'q', "", "Query to issue to match time series (PromQL selector)")
    60  		optConcurrency = getopt.IntLong("concurrency", 'c', halfCPUs, "Query concurrency")
    61  		optValidate    = true
    62  	)
    63  	getopt.BoolVarLong(&optValidate, "validate", 'v', "Validate index segments before querying")
    64  	getopt.Parse()
    65  
    66  	logConfig := zap.NewDevelopmentConfig()
    67  	log, err := logConfig.Build()
    68  	if err != nil {
    69  		golog.Fatalf("unable to create logger: %+v", err)
    70  	}
    71  
    72  	if *optNamespace == "" || *optQuery == "" {
    73  		getopt.Usage()
    74  		os.Exit(1)
    75  	}
    76  
    77  	run(runOptions{
    78  		filePathPrefix: *optPathPrefix,
    79  		namespace:      *optNamespace,
    80  		query:          *optQuery,
    81  		validate:       optValidate,
    82  		concurrency:    *optConcurrency,
    83  		log:            log,
    84  	})
    85  
    86  }
    87  
    88  type runOptions struct {
    89  	filePathPrefix string
    90  	namespace      string
    91  	query          string
    92  	validate       bool
    93  	concurrency    int
    94  	log            *zap.Logger
    95  }
    96  
    97  func run(opts runOptions) {
    98  	log := opts.log
    99  	ctx := context.NewBackground()
   100  
   101  	parseOpts := promql.NewParseOptions()
   102  	parse := parseOpts.MetricSelectorFn()
   103  
   104  	matchers, err := parse(opts.query)
   105  	if err != nil {
   106  		log.Fatal("could not create matchers", zap.Error(err))
   107  	}
   108  
   109  	labelMatchers, err := toLabelMatchers(matchers)
   110  	if err != nil {
   111  		log.Fatal("could not create label matchers", zap.Error(err))
   112  	}
   113  
   114  	query, err := storage.PromReadQueryToM3(&prompb.Query{
   115  		Matchers:         labelMatchers,
   116  		StartTimestampMs: 0,
   117  		EndTimestampMs:   time.Now().UnixNano() / int64(time.Millisecond),
   118  	})
   119  	if err != nil {
   120  		log.Fatal("could not create M3 fetch query", zap.Error(err))
   121  	}
   122  
   123  	indexQuery, err := storage.FetchQueryToM3Query(query, storage.NewFetchOptions())
   124  	if err != nil {
   125  		log.Fatal("could not create M3 index query", zap.Error(err))
   126  	}
   127  
   128  	fsOpts := fs.NewOptions().
   129  		SetFilePathPrefix(opts.filePathPrefix)
   130  
   131  	if opts.validate {
   132  		// Validate checksums before reading and/or validating contents if set.
   133  		fsOpts = fsOpts.SetIndexReaderAutovalidateIndexSegments(true)
   134  	}
   135  
   136  	var (
   137  		nsID      = ident.StringID(opts.namespace)
   138  		infoFiles = fs.ReadIndexInfoFiles(fs.ReadIndexInfoFilesOptions{
   139  			FilePathPrefix:   fsOpts.FilePathPrefix(),
   140  			Namespace:        nsID,
   141  			ReaderBufferSize: fsOpts.InfoReaderBufferSize(),
   142  		})
   143  		results     = make(map[string]struct{})
   144  		resultsLock sync.Mutex
   145  		wg          sync.WaitGroup
   146  	)
   147  
   148  	log.Info("starting query",
   149  		zap.Int("concurrency", opts.concurrency),
   150  		zap.Bool("validateSegments", opts.validate))
   151  
   152  	workers := xsync.NewWorkerPool(opts.concurrency)
   153  	workers.Init()
   154  
   155  	for _, infoFile := range infoFiles {
   156  		if err := infoFile.Err.Error(); err != nil {
   157  			log.Error("unable to read index info file",
   158  				zap.Stringer("namespace", nsID),
   159  				zap.Error(err),
   160  				zap.String("filepath", infoFile.Err.Filepath()),
   161  			)
   162  			continue
   163  		}
   164  
   165  		readResult, err := fs.ReadIndexSegments(fs.ReadIndexSegmentsOptions{
   166  			ReaderOptions: fs.IndexReaderOpenOptions{
   167  				Identifier:  infoFile.ID,
   168  				FileSetType: persist.FileSetFlushType,
   169  			},
   170  			FilesystemOptions: fsOpts,
   171  		})
   172  		if err != nil {
   173  			log.Fatal("unable to read segments from index fileset", zap.Error(err))
   174  			return
   175  		}
   176  
   177  		wg.Add(1)
   178  		workers.Go(func() {
   179  			defer wg.Done()
   180  
   181  			var readers []index.Reader
   182  			for _, seg := range readResult.Segments {
   183  				reader, err := seg.Reader()
   184  				if err != nil {
   185  					log.Fatal("segment reader error", zap.Error(err))
   186  				}
   187  
   188  				readers = append(readers, reader)
   189  			}
   190  
   191  			executor := executor.NewExecutor(readers)
   192  
   193  			iter, err := executor.Execute(ctx, indexQuery.Query.SearchQuery())
   194  			if err != nil {
   195  				log.Fatal("search execute error", zap.Error(err))
   196  			}
   197  
   198  			reader := docs.NewEncodedDocumentReader()
   199  			fields := make(map[string]string)
   200  			for iter.Next() {
   201  				d := iter.Current()
   202  				m, err := docs.MetadataFromDocument(d, reader)
   203  				if err != nil {
   204  					log.Fatal("error retrieve document metadata", zap.Error(err))
   205  				}
   206  
   207  				key := string(m.ID)
   208  
   209  				resultsLock.Lock()
   210  				_, ok := results[key]
   211  				if !ok {
   212  					results[key] = struct{}{}
   213  				}
   214  				resultsLock.Unlock()
   215  
   216  				if ok {
   217  					continue // Already printed.
   218  				}
   219  
   220  				for k := range fields {
   221  					delete(fields, k)
   222  				}
   223  				for _, field := range m.Fields { // nolint:gocritic
   224  					fields[string(field.Name)] = string(field.Value)
   225  				}
   226  
   227  				log.Info("matched document",
   228  					zap.String("id", key),
   229  					zap.Any("fields", fields))
   230  			}
   231  
   232  			if err := iter.Err(); err != nil {
   233  				log.Fatal("iterate err", zap.Error(err))
   234  			}
   235  			if err := iter.Close(); err != nil {
   236  				log.Fatal("iterate close err", zap.Error(err))
   237  			}
   238  		})
   239  	}
   240  
   241  	wg.Wait()
   242  }
   243  
   244  func toLabelMatchers(matchers []*labels.Matcher) ([]*prompb.LabelMatcher, error) {
   245  	pbMatchers := make([]*prompb.LabelMatcher, 0, len(matchers))
   246  	for _, m := range matchers {
   247  		var mType prompb.LabelMatcher_Type
   248  		switch m.Type {
   249  		case labels.MatchEqual:
   250  			mType = prompb.LabelMatcher_EQ
   251  		case labels.MatchNotEqual:
   252  			mType = prompb.LabelMatcher_NEQ
   253  		case labels.MatchRegexp:
   254  			mType = prompb.LabelMatcher_RE
   255  		case labels.MatchNotRegexp:
   256  			mType = prompb.LabelMatcher_NRE
   257  		default:
   258  			return nil, errors.New("invalid matcher type")
   259  		}
   260  		pbMatchers = append(pbMatchers, &prompb.LabelMatcher{
   261  			Type:  mType,
   262  			Name:  []byte(m.Name),
   263  			Value: []byte(m.Value),
   264  		})
   265  	}
   266  	return pbMatchers, nil
   267  }