github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/cmd/tools/read_index_files/main/main.go (about)

     1  // Copyright (c) 2018 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package main
    22  
    23  import (
    24  	"bufio"
    25  	"bytes"
    26  	"fmt"
    27  	"io"
    28  	"log"
    29  	"os"
    30  	"regexp"
    31  	"sort"
    32  	"strconv"
    33  
    34  	"github.com/m3db/m3/src/dbnode/persist"
    35  	"github.com/m3db/m3/src/dbnode/persist/fs"
    36  	"github.com/m3db/m3/src/m3ninx/doc"
    37  	m3ninxpersist "github.com/m3db/m3/src/m3ninx/persist"
    38  	"github.com/m3db/m3/src/x/ident"
    39  	xtime "github.com/m3db/m3/src/x/time"
    40  
    41  	"github.com/pborman/getopt"
    42  	"go.uber.org/zap"
    43  )
    44  
    45  func main() {
    46  	var (
    47  		optPathPrefix            = getopt.StringLong("path-prefix", 'p', "/var/lib/m3db", "Path prefix [e.g. /var/lib/m3db]")
    48  		optNamespace             = getopt.StringLong("namespace", 'n', "metrics", "Namespace [e.g. metrics]")
    49  		optBlockstart            = getopt.Int64Long("block-start", 'b', 0, "Block Start Time [in nsec]")
    50  		optVolumeIndex           = getopt.Int64Long("volume-index", 'v', 0, "Volume index")
    51  		optLargeFieldLimit       = getopt.Int64Long("large-field-limit", 'l', 0, "Large Field Limit (non-zero to display fields with num terms > limit)")
    52  		optOutputIdsPrefix       = getopt.StringLong("output-ids-prefix", 'o', "", "If set, it emits all terms for the _m3ninx_id field.")
    53  		optSkipValidateIntegrity = getopt.BoolLong("skip-validate-integrity", 's', "If set will skip integrity validation on segment open")
    54  	)
    55  	getopt.Parse()
    56  
    57  	rawLogger, err := zap.NewDevelopment()
    58  	if err != nil {
    59  		log.Fatalf("unable to create logger: %+v", err)
    60  	}
    61  	log := rawLogger.Sugar()
    62  
    63  	if *optPathPrefix == "" ||
    64  		*optNamespace == "" ||
    65  		*optBlockstart <= 0 {
    66  		getopt.Usage()
    67  		os.Exit(1)
    68  	}
    69  
    70  	fsOpts := fs.NewOptions().
    71  		SetFilePathPrefix(*optPathPrefix).
    72  		SetIndexReaderAutovalidateIndexSegments(!*optSkipValidateIntegrity)
    73  	reader, err := fs.NewIndexReader(fsOpts)
    74  	if err != nil {
    75  		log.Fatalf("could not create new index reader: %v", err)
    76  	}
    77  
    78  	openOpts := fs.IndexReaderOpenOptions{
    79  		Identifier: fs.FileSetFileIdentifier{
    80  			FileSetContentType: persist.FileSetIndexContentType,
    81  			Namespace:          ident.StringID(*optNamespace),
    82  			BlockStart:         xtime.UnixNano(*optBlockstart),
    83  			VolumeIndex:        int(*optVolumeIndex),
    84  		},
    85  	}
    86  
    87  	result, err := reader.Open(openOpts)
    88  	if err != nil {
    89  		log.Fatalf("unable to open reader: %v", err)
    90  	}
    91  
    92  	shards := make([]int, 0, len(result.Shards))
    93  	for shard := range result.Shards {
    94  		shards = append(shards, int(shard))
    95  	}
    96  	sort.Ints(shards)
    97  
    98  	log.Infof("shards: %v, volumeType: %s", shards, reader.IndexVolumeType())
    99  
   100  	i := 0
   101  	for {
   102  		i++
   103  		log.Info("opening index segment file set")
   104  		fileset, err := reader.ReadSegmentFileSet()
   105  		if err == io.EOF {
   106  			break
   107  		}
   108  		if err != nil {
   109  			log.Fatalf("unable to retrieve fileset: %v", err)
   110  		}
   111  
   112  		log.Info("validating index segment file set")
   113  		if err := reader.Validate(); err != nil {
   114  			log.Fatalf("error validating segment file set: %v", err)
   115  		}
   116  
   117  		seg, err := m3ninxpersist.NewSegment(fileset, fsOpts.FSTOptions())
   118  		if err != nil {
   119  			log.Fatalf("unable to open segment reader: %v", err)
   120  		}
   121  		defer seg.Close()
   122  
   123  		var (
   124  			idsFile   *os.File
   125  			idsWriter *bufio.Writer
   126  		)
   127  		if *optOutputIdsPrefix != "" {
   128  			idsFile, err = os.Create(fmt.Sprintf("%s-ids-segment-%d.out", *optOutputIdsPrefix, i))
   129  			if err != nil {
   130  				log.Fatalf("unable to create output ids file: %v", err)
   131  			}
   132  			idsWriter = bufio.NewWriter(idsFile)
   133  			defer func() {
   134  				idsWriter.Flush()
   135  				idsFile.Sync()
   136  				if err := idsFile.Close(); err != nil {
   137  					log.Fatalf("error closing ids file: %v", err)
   138  				}
   139  			}()
   140  		}
   141  
   142  		fields, err := seg.FieldsIterable().Fields()
   143  		if err != nil {
   144  			log.Fatalf("unable to retrieve segment fields: %v", err)
   145  		}
   146  
   147  		type largeField struct {
   148  			field    string
   149  			numTerms int
   150  		}
   151  		var largeFields []largeField
   152  		var termLens ints
   153  		var numFields int
   154  		for fields.Next() {
   155  			numFields++
   156  
   157  			f := fields.Current()
   158  			terms, err := seg.TermsIterable().Terms(f)
   159  			if err != nil {
   160  				log.Fatalf("unable to retrieve segment term: %v", err)
   161  			}
   162  
   163  			numTerms := 0
   164  			isNameField := bytes.Equal(doc.IDReservedFieldName, f)
   165  			for terms.Next() {
   166  				numTerms++
   167  
   168  				if isNameField && idsWriter != nil {
   169  					// ids output
   170  					t, _ := terms.Current()
   171  					idsWriter.Write(t)
   172  					idsWriter.WriteByte('\n')
   173  				}
   174  			}
   175  
   176  			// large field output
   177  			if *optLargeFieldLimit > 0 && numTerms > int(*optLargeFieldLimit) {
   178  				largeFields = append(largeFields, largeField{
   179  					field:    string(f),
   180  					numTerms: numTerms,
   181  				})
   182  			}
   183  			termLens = append(termLens, numTerms)
   184  		}
   185  
   186  		summary := termLens.summary()
   187  		log.Infof("Segment: [%v], Size: [%v], NumFields: [%v], Num Terms: [%+v]", i,
   188  			formatCommas(int(seg.Size())), formatCommas(numFields), summary)
   189  		if *optLargeFieldLimit > 0 {
   190  			log.Infof("Large fields: %+v", largeFields)
   191  		}
   192  	}
   193  }
   194  
   195  type summaryStats struct {
   196  	max     float64
   197  	min     float64
   198  	average float64
   199  	median  float64
   200  }
   201  
   202  type ints []int
   203  
   204  func (vals ints) summary() summaryStats {
   205  	res := summaryStats{}
   206  	sort.Ints(vals)
   207  	if len(vals)%2 == 1 {
   208  		res.median = float64(vals[len(vals)/2])
   209  	} else {
   210  		res.median = (float64(vals[len(vals)/2]) + float64(vals[(1+len(vals))/2])) / 2
   211  	}
   212  	res.min = float64(vals[0])
   213  	res.max = float64(vals[len(vals)-1])
   214  	sum := 0
   215  	for _, val := range vals {
   216  		sum += val
   217  	}
   218  	res.average = float64(sum) / float64(len(vals))
   219  	return res
   220  }
   221  
   222  func formatCommas(num int) string {
   223  	str := strconv.Itoa(num)
   224  	re := regexp.MustCompile(`(\d+)(\d{3})`)
   225  	for i := 0; i < (len(str)-1)/3; i++ {
   226  		str = re.ReplaceAllString(str, "$1,$2")
   227  	}
   228  	return str
   229  }