github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/cmd/tools/read_data_files/main/main.go (about)

     1  // Copyright (c) 2018 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package main
    22  
    23  import (
    24  	"encoding/base64"
    25  	"fmt"
    26  	"io"
    27  	"io/ioutil"
    28  	"log"
    29  	"os"
    30  	"strconv"
    31  	"strings"
    32  	"time"
    33  
    34  	"github.com/m3db/m3/src/dbnode/encoding"
    35  	"github.com/m3db/m3/src/dbnode/encoding/m3tsz"
    36  	"github.com/m3db/m3/src/dbnode/persist"
    37  	"github.com/m3db/m3/src/dbnode/persist/fs"
    38  	"github.com/m3db/m3/src/dbnode/x/xio"
    39  	xerrors "github.com/m3db/m3/src/x/errors"
    40  	"github.com/m3db/m3/src/x/ident"
    41  	"github.com/m3db/m3/src/x/pool"
    42  	xtime "github.com/m3db/m3/src/x/time"
    43  
    44  	"github.com/pborman/getopt"
    45  	"go.uber.org/zap"
    46  )
    47  
    48  const (
    49  	snapshotType = "snapshot"
    50  	flushType    = "flush"
    51  
    52  	allShards = -1
    53  )
    54  
    55  type benchmarkMode uint8
    56  
    57  const (
    58  	// benchmarkNone prints the data read to the standard output and does not measure performance.
    59  	benchmarkNone benchmarkMode = iota
    60  
    61  	// benchmarkSeries benchmarks time series read performance (skipping datapoint decoding).
    62  	benchmarkSeries
    63  
    64  	// benchmarkDatapoints benchmarks series read, including datapoint decoding.
    65  	benchmarkDatapoints
    66  )
    67  
    68  func main() {
    69  	var (
    70  		optPathPrefix = getopt.StringLong("path-prefix", 'p', "", "Path prefix [e.g. /var/lib/m3db]")
    71  		optNamespace  = getopt.StringLong("namespace", 'n', "default", "Namespace [e.g. metrics]")
    72  		optShard      = getopt.IntLong("shard", 's', allShards,
    73  			fmt.Sprintf("Shard [expected format uint32], or %v for all shards in the directory", allShards))
    74  		optBlockstart  = getopt.Int64Long("block-start", 'b', 0, "Block Start Time [in nsec]")
    75  		volume         = getopt.Int64Long("volume", 'v', 0, "Volume number")
    76  		fileSetTypeArg = getopt.StringLong("fileset-type", 't', flushType, fmt.Sprintf("%s|%s", flushType, snapshotType))
    77  		idFilter       = getopt.StringLong("id-filter", 'f', "", "ID Contains Filter (optional)")
    78  		benchmark      = getopt.StringLong(
    79  			"benchmark", 'B', "", "benchmark mode (optional), [series|datapoints]")
    80  	)
    81  	getopt.Parse()
    82  
    83  	rawLogger, err := zap.NewDevelopment()
    84  	if err != nil {
    85  		log.Fatalf("unable to create logger: %+v", err)
    86  	}
    87  	log := rawLogger.Sugar()
    88  
    89  	if *optPathPrefix == "" ||
    90  		*optNamespace == "" ||
    91  		*optShard < allShards ||
    92  		*optBlockstart <= 0 ||
    93  		*volume < 0 ||
    94  		(*fileSetTypeArg != snapshotType && *fileSetTypeArg != flushType) {
    95  		getopt.Usage()
    96  		os.Exit(1)
    97  	}
    98  
    99  	var fileSetType persist.FileSetType
   100  	switch *fileSetTypeArg {
   101  	case flushType:
   102  		fileSetType = persist.FileSetFlushType
   103  	case snapshotType:
   104  		fileSetType = persist.FileSetSnapshotType
   105  	default:
   106  		log.Fatalf("unknown fileset type: %s", *fileSetTypeArg)
   107  	}
   108  
   109  	var benchMode benchmarkMode
   110  	switch *benchmark {
   111  	case "":
   112  	case "series":
   113  		benchMode = benchmarkSeries
   114  	case "datapoints":
   115  		benchMode = benchmarkDatapoints
   116  	default:
   117  		log.Fatalf("unknown benchmark type: %s", *benchmark)
   118  	}
   119  
   120  	// Not using bytes pool with streaming reads/writes to avoid the fixed memory overhead.
   121  	var bytesPool pool.CheckedBytesPool
   122  	encodingOpts := encoding.NewOptions().SetBytesPool(bytesPool)
   123  
   124  	fsOpts := fs.NewOptions().SetFilePathPrefix(*optPathPrefix)
   125  
   126  	shards := []uint32{uint32(*optShard)}
   127  	if *optShard == allShards {
   128  		shards, err = getShards(*optPathPrefix, fileSetType, *optNamespace)
   129  		if err != nil {
   130  			log.Fatalf("failed to resolve shards: %v", err)
   131  		}
   132  	}
   133  
   134  	reader, err := fs.NewReader(bytesPool, fsOpts)
   135  	if err != nil {
   136  		log.Fatalf("could not create new reader: %v", err)
   137  	}
   138  
   139  	for _, shard := range shards {
   140  		var (
   141  			seriesCount         = 0
   142  			datapointCount      = 0
   143  			annotationSizeTotal uint64
   144  			start               = time.Now()
   145  		)
   146  
   147  		openOpts := fs.DataReaderOpenOptions{
   148  			Identifier: fs.FileSetFileIdentifier{
   149  				Namespace:   ident.StringID(*optNamespace),
   150  				Shard:       shard,
   151  				BlockStart:  xtime.UnixNano(*optBlockstart),
   152  				VolumeIndex: int(*volume),
   153  			},
   154  			FileSetType:      fileSetType,
   155  			StreamingEnabled: true,
   156  		}
   157  
   158  		err = reader.Open(openOpts)
   159  		if err != nil {
   160  			log.Fatalf("unable to open reader for shard %v: %v", shard, err)
   161  		}
   162  
   163  		for {
   164  			entry, err := reader.StreamingRead()
   165  			if xerrors.Is(err, io.EOF) {
   166  				break
   167  			}
   168  			if err != nil {
   169  				log.Fatalf("err reading metadata: %v", err)
   170  			}
   171  
   172  			var (
   173  				id   = entry.ID
   174  				data = entry.Data
   175  			)
   176  
   177  			if *idFilter != "" && !strings.Contains(id.String(), *idFilter) {
   178  				continue
   179  			}
   180  
   181  			if benchMode != benchmarkSeries {
   182  				iter := m3tsz.NewReaderIterator(xio.NewBytesReader64(data), true, encodingOpts)
   183  				for iter.Next() {
   184  					dp, _, annotation := iter.Current()
   185  					if benchMode == benchmarkNone {
   186  						// Use fmt package so it goes to stdout instead of stderr
   187  						fmt.Printf("{id: %s, dp: %+v", id.String(), dp) // nolint: forbidigo
   188  						if len(annotation) > 0 {
   189  							fmt.Printf(", annotation: %s", // nolint: forbidigo
   190  								base64.StdEncoding.EncodeToString(annotation))
   191  						}
   192  						fmt.Println("}") // nolint: forbidigo
   193  					}
   194  					annotationSizeTotal += uint64(len(annotation))
   195  					datapointCount++
   196  				}
   197  				if err := iter.Err(); err != nil {
   198  					log.Fatalf("unable to iterate original data: %v", err)
   199  				}
   200  				iter.Close()
   201  			}
   202  
   203  			seriesCount++
   204  		}
   205  
   206  		if seriesCount != reader.Entries() && *idFilter == "" {
   207  			log.Warnf("actual time series count (%d) did not match info file data (%d)",
   208  				seriesCount, reader.Entries())
   209  		}
   210  
   211  		if benchMode != benchmarkNone {
   212  			runTime := time.Since(start)
   213  			fmt.Printf("Running time: %s\n", runTime)     // nolint: forbidigo
   214  			fmt.Printf("\n%d series read\n", seriesCount) // nolint: forbidigo
   215  			if runTime > 0 {
   216  				fmt.Printf("(%.2f series/second)\n", float64(seriesCount)/runTime.Seconds()) // nolint: forbidigo
   217  			}
   218  
   219  			if benchMode == benchmarkDatapoints {
   220  				fmt.Printf("\n%d datapoints decoded\n", datapointCount) // nolint: forbidigo
   221  				if runTime > 0 {
   222  					fmt.Printf("(%.2f datapoints/second)\n", float64(datapointCount)/runTime.Seconds()) // nolint: forbidigo
   223  				}
   224  
   225  				fmt.Printf("\nTotal annotation size: %d bytes\n", annotationSizeTotal) // nolint: forbidigo
   226  			}
   227  		}
   228  	}
   229  
   230  	if err := reader.Close(); err != nil {
   231  		log.Fatalf("unable to close reader: %v", err)
   232  	}
   233  }
   234  
   235  func getShards(pathPrefix string, fileSetType persist.FileSetType, namespace string) ([]uint32, error) {
   236  	nsID := ident.StringID(namespace)
   237  	path := fs.NamespaceDataDirPath(pathPrefix, nsID)
   238  	if fileSetType == persist.FileSetSnapshotType {
   239  		path = fs.NamespaceSnapshotsDirPath(pathPrefix, nsID)
   240  	}
   241  
   242  	files, err := ioutil.ReadDir(path)
   243  	if err != nil {
   244  		return nil, fmt.Errorf("failed reading namespace directory: %w", err)
   245  	}
   246  
   247  	shards := make([]uint32, 0)
   248  	for _, f := range files {
   249  		if !f.IsDir() {
   250  			continue
   251  		}
   252  		i, err := strconv.Atoi(f.Name())
   253  		if err != nil {
   254  			return nil, fmt.Errorf("failed extracting shard number: %w", err)
   255  		}
   256  		if i < 0 {
   257  			return nil, fmt.Errorf("negative shard number %v", i)
   258  		}
   259  		shards = append(shards, uint32(i))
   260  	}
   261  
   262  	return shards, nil
   263  }