github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/cmd/tools/verify_ids/main/main.go (about)

     1  // Copyright (c) 2023 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  // Package main is the entry point for verify_ids command-line tool.
    22  package main
    23  
    24  import (
    25  	"bufio"
    26  	"encoding/json"
    27  	"errors"
    28  	"flag"
    29  	"fmt"
    30  	"io"
    31  	"log"
    32  	"math"
    33  	"os"
    34  	"strconv"
    35  	"strings"
    36  	"text/tabwriter"
    37  	"time"
    38  
    39  	"github.com/m3db/m3/src/dbnode/client"
    40  	"github.com/m3db/m3/src/dbnode/encoding"
    41  	"github.com/m3db/m3/src/dbnode/encoding/m3tsz"
    42  	"github.com/m3db/m3/src/dbnode/generated/thrift/rpc"
    43  	"github.com/m3db/m3/src/dbnode/network/server/tchannelthrift/node/channel"
    44  	"github.com/m3db/m3/src/dbnode/ts"
    45  	"github.com/m3db/m3/src/m3ninx/doc"
    46  	"github.com/m3db/m3/src/m3ninx/idx"
    47  
    48  	"github.com/uber/tchannel-go"
    49  	"github.com/uber/tchannel-go/thrift"
    50  	"golang.org/x/exp/slices"
    51  )
    52  
    53  const (
    54  	maxIDs  = 1000
    55  	epsilon = 0.001
    56  )
    57  
    58  type queryRange struct {
    59  	start time.Time
    60  	end   time.Time
    61  }
    62  
    63  func main() {
    64  	var (
    65  		tchannelNodeAddrsArg = flag.String("nodes", "127.0.0.1:9000", "Node TChannel server addresses, comma separated")
    66  		namespaceArg         = flag.String("namespace", "default", "Namespace to read from")
    67  		shardsArg            = flag.String("shards", "0", "Shards to pull IDs from, comma separated")
    68  		rangeStartArg        = flag.String("start", "", "Query time range start")
    69  		rangeEndArg          = flag.String("end", "", "Query time range end")
    70  		limitArg             = flag.Int64("num", 8, "Number of IDs to fetch per shard")
    71  		fromStdInArg         = flag.Bool("stdin", false, "Read IDs from stdin instead")
    72  		dumpRawArg           = flag.Bool("raw", false, "Dump data as json to stdout only")
    73  	)
    74  	flag.Parse()
    75  
    76  	if *tchannelNodeAddrsArg == "" ||
    77  		*namespaceArg == "" ||
    78  		*shardsArg == "" ||
    79  		*limitArg < 0 {
    80  		flag.Usage()
    81  		os.Exit(1)
    82  	}
    83  
    84  	if *limitArg > maxIDs {
    85  		log.Fatalf("requested number of IDs is too high")
    86  	}
    87  
    88  	var (
    89  		rangeStart time.Time
    90  		rangeEnd   time.Time
    91  	)
    92  
    93  	if *rangeStartArg == "" {
    94  		rangeStart = time.Now().Add(-30 * time.Minute).Truncate(time.Minute)
    95  	} else if err := rangeEnd.UnmarshalText([]byte(*rangeStartArg)); err != nil {
    96  		log.Fatalf("failed to parse start time: %v", *rangeStartArg)
    97  	}
    98  
    99  	if *rangeEndArg == "" {
   100  		rangeEnd = time.Now().Add(-1 * time.Minute).Truncate(time.Minute)
   101  	} else if err := rangeEnd.UnmarshalText([]byte(*rangeEndArg)); err != nil {
   102  		log.Fatalf("failed to parse end time: %v", *rangeEndArg)
   103  	}
   104  
   105  	dumpRaw := *dumpRawArg
   106  	readInputFromStdin := *fromStdInArg
   107  	namespace := []byte(*namespaceArg)
   108  
   109  	var shards []uint32 //nolint:prealloc
   110  	for _, str := range strings.Split(*shardsArg, ",") {
   111  		value, err := strconv.Atoi(str)
   112  		if err != nil {
   113  			log.Fatalf("could not parse shard '%s': %v", str, err)
   114  		}
   115  		if value < 0 {
   116  			log.Fatalf("could not parse shard '%s': not uint", str)
   117  		}
   118  		shards = append(shards, uint32(value))
   119  	}
   120  	nodeAddrs := strings.Split(*tchannelNodeAddrsArg, ",")
   121  	resultLimit := int(*limitArg)
   122  
   123  	nodes := make([]dbnode, 0, len(nodeAddrs))
   124  	for _, v := range nodeAddrs {
   125  		nodes = append(nodes, dbnode{
   126  			client:    getClient(v),
   127  			namespace: namespace,
   128  			addr:      v,
   129  		})
   130  	}
   131  
   132  	qr := queryRange{start: rangeStart, end: rangeEnd}
   133  
   134  	var ids [][]byte
   135  
   136  	if !readInputFromStdin {
   137  		for i := range shards {
   138  			res, err := nodes[0].getIDs(shards[i], qr, resultLimit)
   139  			if err != nil {
   140  				log.Fatalf("failed to get IDs from %q for shard %v: %v", nodes[0].addr, shards[i], err)
   141  			}
   142  			ids = append(ids, res...)
   143  		}
   144  		log.Printf("read %d ids from %q", len(ids), nodes[0].addr)
   145  	} else {
   146  		scanner := bufio.NewScanner(os.Stdin)
   147  		for scanner.Scan() {
   148  			if b := scanner.Bytes(); len(b) > 0 {
   149  				ids = append(ids, append([]byte(nil), b...))
   150  			}
   151  		}
   152  		if len(ids) > maxIDs {
   153  			log.Fatalf("got more than max of %v ids", maxIDs)
   154  		}
   155  	}
   156  
   157  	resultsByIDByNode := map[string]map[string][]ts.Datapoint{}
   158  
   159  	for _, n := range nodes {
   160  		res, err := n.query(ids, qr)
   161  		if err != nil {
   162  			log.Fatalf("failed to query node %q: %v", n.addr, err)
   163  		}
   164  		dps, err := fetchTaggedResultsToDatapoints(res)
   165  		if err != nil {
   166  			log.Fatalf("could not convert results: %v", err)
   167  		}
   168  
   169  		for id, result := range dps {
   170  			if _, ok := resultsByIDByNode[id]; !ok {
   171  				resultsByIDByNode[id] = map[string][]ts.Datapoint{}
   172  			}
   173  			resultsByIDByNode[id][n.addr] = result
   174  		}
   175  	}
   176  
   177  	if dumpRaw {
   178  		if err := dumpJSON(resultsByIDByNode); err != nil {
   179  			log.Fatal(err)
   180  		}
   181  		return
   182  	}
   183  	printComparision(nodeAddrs, resultsByIDByNode)
   184  }
   185  
   186  func dumpJSON(results map[string]map[string][]ts.Datapoint) error {
   187  	b, err := json.Marshal(results)
   188  	if err != nil {
   189  		return err
   190  	}
   191  	fmt.Printf("%s", b) //nolint:forbidigo
   192  	return nil
   193  }
   194  
   195  //nolint:errcheck
   196  func printComparision(nodes []string, results map[string]map[string][]ts.Datapoint) {
   197  	bufferedStdout := bufio.NewWriter(os.Stdout)
   198  	defer bufferedStdout.Flush()
   199  
   200  	for id, res := range results {
   201  		var (
   202  			timestamps []int64
   203  			datapoints = make([][]float64, len(nodes))
   204  		)
   205  
   206  		// first collect all seen timestamps from nodes,
   207  		// then sort and deduplicate.
   208  		for _, node := range nodes {
   209  			r := res[node]
   210  			for _, dp := range r {
   211  				timestamps = append(timestamps, int64(dp.TimestampNanos))
   212  			}
   213  		}
   214  		slices.Sort(timestamps)
   215  		timestamps = slices.Compact(timestamps)
   216  
   217  		for i, node := range nodes {
   218  			datapoints[i] = make([]float64, len(timestamps))
   219  			src, dst := 0, 0
   220  			r := res[node]
   221  			// there should be at least 1 datapoint for each timestamp from any of the nodes.
   222  			for ; dst < len(timestamps); dst++ {
   223  				datapoints[i][dst] = math.NaN()
   224  				for src < len(r) && src <= dst && int64(r[src].TimestampNanos) != timestamps[dst] {
   225  					src++
   226  				}
   227  				if src < len(r) && int64(r[src].TimestampNanos) == timestamps[dst] {
   228  					datapoints[i][dst] = r[src].Value
   229  				}
   230  			}
   231  		}
   232  
   233  		fmt.Fprintf(bufferedStdout, "===\nID: %v\n", id)
   234  		w := tabwriter.NewWriter(bufferedStdout, 0, 0, 0, ' ', tabwriter.Debug|tabwriter.AlignRight)
   235  		w.Write([]byte("Timestamp"))
   236  		w.Write([]byte{'\t'})
   237  		for _, node := range nodes {
   238  			w.Write([]byte(node))
   239  			w.Write([]byte{'\t'})
   240  		}
   241  		w.Write([]byte{'\n'})
   242  
   243  		var mismatches []mismatch
   244  		for i := range timestamps {
   245  			fmt.Fprint(w, time.Unix(0, timestamps[i]).Format(time.StampMilli))
   246  			w.Write([]byte{'\t'})
   247  			for j := range nodes {
   248  				ne := false
   249  				refVal := datapoints[0][i]
   250  				curVal := datapoints[j][i]
   251  
   252  				if math.IsNaN(refVal) && !math.IsNaN(curVal) ||
   253  					!math.IsNaN(refVal) && math.IsNaN(curVal) {
   254  					ne = true
   255  				} else if math.Abs(curVal-refVal) > epsilon {
   256  					ne = true
   257  				}
   258  
   259  				if j > 0 && ne {
   260  					mismatches = append(mismatches, mismatch{
   261  						node:      nodes[j],
   262  						timestamp: timestamps[i],
   263  					})
   264  					fmt.Fprint(w, " (!) ")
   265  				}
   266  				fmt.Fprintf(w, "%f", datapoints[j][i])
   267  				fmt.Fprint(w, "\t")
   268  			}
   269  			fmt.Fprint(w, "\n")
   270  		}
   271  		w.Flush()
   272  
   273  		if len(mismatches) > 0 {
   274  			fmt.Fprintf(bufferedStdout, "Mismatches (timestamp, node) for ID: %v\n", id)
   275  			for _, m := range mismatches {
   276  				fmt.Fprintf(bufferedStdout,
   277  					"%v %v\n",
   278  					time.Unix(0, m.timestamp).Format(time.StampMilli),
   279  					m.node)
   280  			}
   281  		}
   282  	}
   283  }
   284  
   285  type dbnode struct {
   286  	addr      string
   287  	namespace []byte
   288  	client    rpc.TChanNode
   289  }
   290  
   291  func (d *dbnode) getIDs(shard uint32, r queryRange, num int) ([][]byte, error) {
   292  	var (
   293  		results   [][]byte
   294  		pageToken []byte
   295  	)
   296  
   297  	getIDsFn := func() error {
   298  		tctx, cancel := thrift.NewContext(60 * time.Second)
   299  		defer cancel()
   300  
   301  		req := rpc.NewFetchBlocksMetadataRawV2Request()
   302  		req.NameSpace = d.namespace
   303  		req.Shard = int32(shard)
   304  		if !r.start.IsZero() {
   305  			req.RangeStart = r.start.UnixNano()
   306  		}
   307  		if !r.end.IsZero() {
   308  			req.RangeEnd = r.end.UnixNano()
   309  		}
   310  		req.Limit = int64(num)
   311  		req.PageToken = pageToken
   312  
   313  		result, err := d.client.FetchBlocksMetadataRawV2(tctx, req)
   314  		if err != nil {
   315  			return err
   316  		}
   317  
   318  		for _, elem := range result.Elements {
   319  			results = append(results, elem.ID)
   320  			if len(results) >= num {
   321  				return io.EOF
   322  			}
   323  		}
   324  
   325  		if result.NextPageToken == nil {
   326  			return io.EOF
   327  		}
   328  
   329  		pageToken = append([]byte(nil), result.NextPageToken...)
   330  		return nil
   331  	}
   332  
   333  	var err error
   334  	for err == nil {
   335  		if err = getIDsFn(); err != nil {
   336  			break
   337  		}
   338  	}
   339  
   340  	if errors.Is(err, io.EOF) {
   341  		return results, nil
   342  	}
   343  	return results, err
   344  }
   345  
   346  func (d *dbnode) query(ids [][]byte, r queryRange) (*rpc.FetchTaggedResult_, error) {
   347  	req := rpc.NewFetchTaggedRequest()
   348  	req.NameSpace = d.namespace
   349  	req.FetchData = true
   350  
   351  	if !r.start.IsZero() {
   352  		req.RangeStart = r.start.UnixNano()
   353  	}
   354  	if !r.end.IsZero() {
   355  		req.RangeEnd = r.end.UnixNano()
   356  	}
   357  
   358  	var (
   359  		termQueries = make([]idx.Query, 0, len(ids))
   360  		err         error
   361  	)
   362  	for _, id := range ids {
   363  		termQueries = append(termQueries, idx.NewTermQuery(doc.IDReservedFieldName, id))
   364  	}
   365  
   366  	req.Query, err = idx.Marshal(idx.NewDisjunctionQuery(termQueries...))
   367  	if err != nil {
   368  		return nil, err
   369  	}
   370  
   371  	tctx, cancel := thrift.NewContext(15 * time.Second)
   372  	defer cancel()
   373  
   374  	return d.client.FetchTagged(tctx, req)
   375  }
   376  
   377  func getClient(nodeAddr string) rpc.TChanNode {
   378  	ch, err := tchannel.NewChannel("Client", nil)
   379  	if err != nil {
   380  		panic(fmt.Sprintf("could not create new tchannel channel for %q: %v", nodeAddr, err))
   381  	}
   382  	endpoint := &thrift.ClientOptions{HostPort: nodeAddr}
   383  	thriftClient := thrift.NewClient(ch, channel.ChannelName, endpoint)
   384  	return rpc.NewTChanNodeClient(thriftClient)
   385  }
   386  
   387  func fetchTaggedResultsToDatapoints(result *rpc.FetchTaggedResult_) (map[string][]ts.Datapoint, error) {
   388  	res := map[string][]ts.Datapoint{}
   389  	encodingOpts := encoding.NewOptions()
   390  
   391  	for _, elem := range result.Elements {
   392  		var dps []ts.Datapoint
   393  		iter := client.NewReaderSliceOfSlicesIterator(elem.Segments, nil)
   394  		multiReader := encoding.NewMultiReaderIterator(m3tsz.DefaultReaderIteratorAllocFn(encodingOpts), nil)
   395  		multiReader.ResetSliceOfSlices(iter, nil)
   396  
   397  		for multiReader.Next() {
   398  			dp, _, _ := multiReader.Current()
   399  			dps = append(dps, dp)
   400  		}
   401  
   402  		if err := multiReader.Err(); err != nil {
   403  			return nil, err
   404  		}
   405  		res[string(elem.ID)] = dps
   406  	}
   407  
   408  	return res, nil
   409  }
   410  
   411  type mismatch struct {
   412  	node      string
   413  	timestamp int64
   414  }