github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/cmd/tools/verify_ids/main/main.go (about) 1 // Copyright (c) 2023 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 // Package main is the entry point for verify_ids command-line tool. 22 package main 23 24 import ( 25 "bufio" 26 "encoding/json" 27 "errors" 28 "flag" 29 "fmt" 30 "io" 31 "log" 32 "math" 33 "os" 34 "strconv" 35 "strings" 36 "text/tabwriter" 37 "time" 38 39 "github.com/m3db/m3/src/dbnode/client" 40 "github.com/m3db/m3/src/dbnode/encoding" 41 "github.com/m3db/m3/src/dbnode/encoding/m3tsz" 42 "github.com/m3db/m3/src/dbnode/generated/thrift/rpc" 43 "github.com/m3db/m3/src/dbnode/network/server/tchannelthrift/node/channel" 44 "github.com/m3db/m3/src/dbnode/ts" 45 "github.com/m3db/m3/src/m3ninx/doc" 46 "github.com/m3db/m3/src/m3ninx/idx" 47 48 "github.com/uber/tchannel-go" 49 "github.com/uber/tchannel-go/thrift" 50 "golang.org/x/exp/slices" 51 ) 52 53 const ( 54 maxIDs = 1000 55 epsilon = 0.001 56 ) 57 58 type queryRange struct { 59 start time.Time 60 end time.Time 61 } 62 63 func main() { 64 var ( 65 tchannelNodeAddrsArg = flag.String("nodes", "127.0.0.1:9000", "Node TChannel server addresses, comma separated") 66 namespaceArg = flag.String("namespace", "default", "Namespace to read from") 67 shardsArg = flag.String("shards", "0", "Shards to pull IDs from, comma separated") 68 rangeStartArg = flag.String("start", "", "Query time range start") 69 rangeEndArg = flag.String("end", "", "Query time range end") 70 limitArg = flag.Int64("num", 8, "Number of IDs to fetch per shard") 71 fromStdInArg = flag.Bool("stdin", false, "Read IDs from stdin instead") 72 dumpRawArg = flag.Bool("raw", false, "Dump data as json to stdout only") 73 ) 74 flag.Parse() 75 76 if *tchannelNodeAddrsArg == "" || 77 *namespaceArg == "" || 78 *shardsArg == "" || 79 *limitArg < 0 { 80 flag.Usage() 81 os.Exit(1) 82 } 83 84 if *limitArg > maxIDs { 85 log.Fatalf("requested number of IDs is too high") 86 } 87 88 var ( 89 rangeStart time.Time 90 rangeEnd time.Time 91 ) 92 93 if *rangeStartArg == "" { 94 rangeStart = time.Now().Add(-30 * time.Minute).Truncate(time.Minute) 95 } else if err := rangeEnd.UnmarshalText([]byte(*rangeStartArg)); err != nil { 96 log.Fatalf("failed to parse start time: %v", *rangeStartArg) 97 } 98 99 if *rangeEndArg == "" { 100 rangeEnd = time.Now().Add(-1 * time.Minute).Truncate(time.Minute) 101 } else if err := rangeEnd.UnmarshalText([]byte(*rangeEndArg)); err != nil { 102 log.Fatalf("failed to parse end time: %v", *rangeEndArg) 103 } 104 105 dumpRaw := *dumpRawArg 106 readInputFromStdin := *fromStdInArg 107 namespace := []byte(*namespaceArg) 108 109 var shards []uint32 //nolint:prealloc 110 for _, str := range strings.Split(*shardsArg, ",") { 111 value, err := strconv.Atoi(str) 112 if err != nil { 113 log.Fatalf("could not parse shard '%s': %v", str, err) 114 } 115 if value < 0 { 116 log.Fatalf("could not parse shard '%s': not uint", str) 117 } 118 shards = append(shards, uint32(value)) 119 } 120 nodeAddrs := strings.Split(*tchannelNodeAddrsArg, ",") 121 resultLimit := int(*limitArg) 122 123 nodes := make([]dbnode, 0, len(nodeAddrs)) 124 for _, v := range nodeAddrs { 125 nodes = append(nodes, dbnode{ 126 client: getClient(v), 127 namespace: namespace, 128 addr: v, 129 }) 130 } 131 132 qr := queryRange{start: rangeStart, end: rangeEnd} 133 134 var ids [][]byte 135 136 if !readInputFromStdin { 137 for i := range shards { 138 res, err := nodes[0].getIDs(shards[i], qr, resultLimit) 139 if err != nil { 140 log.Fatalf("failed to get IDs from %q for shard %v: %v", nodes[0].addr, shards[i], err) 141 } 142 ids = append(ids, res...) 143 } 144 log.Printf("read %d ids from %q", len(ids), nodes[0].addr) 145 } else { 146 scanner := bufio.NewScanner(os.Stdin) 147 for scanner.Scan() { 148 if b := scanner.Bytes(); len(b) > 0 { 149 ids = append(ids, append([]byte(nil), b...)) 150 } 151 } 152 if len(ids) > maxIDs { 153 log.Fatalf("got more than max of %v ids", maxIDs) 154 } 155 } 156 157 resultsByIDByNode := map[string]map[string][]ts.Datapoint{} 158 159 for _, n := range nodes { 160 res, err := n.query(ids, qr) 161 if err != nil { 162 log.Fatalf("failed to query node %q: %v", n.addr, err) 163 } 164 dps, err := fetchTaggedResultsToDatapoints(res) 165 if err != nil { 166 log.Fatalf("could not convert results: %v", err) 167 } 168 169 for id, result := range dps { 170 if _, ok := resultsByIDByNode[id]; !ok { 171 resultsByIDByNode[id] = map[string][]ts.Datapoint{} 172 } 173 resultsByIDByNode[id][n.addr] = result 174 } 175 } 176 177 if dumpRaw { 178 if err := dumpJSON(resultsByIDByNode); err != nil { 179 log.Fatal(err) 180 } 181 return 182 } 183 printComparision(nodeAddrs, resultsByIDByNode) 184 } 185 186 func dumpJSON(results map[string]map[string][]ts.Datapoint) error { 187 b, err := json.Marshal(results) 188 if err != nil { 189 return err 190 } 191 fmt.Printf("%s", b) //nolint:forbidigo 192 return nil 193 } 194 195 //nolint:errcheck 196 func printComparision(nodes []string, results map[string]map[string][]ts.Datapoint) { 197 bufferedStdout := bufio.NewWriter(os.Stdout) 198 defer bufferedStdout.Flush() 199 200 for id, res := range results { 201 var ( 202 timestamps []int64 203 datapoints = make([][]float64, len(nodes)) 204 ) 205 206 // first collect all seen timestamps from nodes, 207 // then sort and deduplicate. 208 for _, node := range nodes { 209 r := res[node] 210 for _, dp := range r { 211 timestamps = append(timestamps, int64(dp.TimestampNanos)) 212 } 213 } 214 slices.Sort(timestamps) 215 timestamps = slices.Compact(timestamps) 216 217 for i, node := range nodes { 218 datapoints[i] = make([]float64, len(timestamps)) 219 src, dst := 0, 0 220 r := res[node] 221 // there should be at least 1 datapoint for each timestamp from any of the nodes. 222 for ; dst < len(timestamps); dst++ { 223 datapoints[i][dst] = math.NaN() 224 for src < len(r) && src <= dst && int64(r[src].TimestampNanos) != timestamps[dst] { 225 src++ 226 } 227 if src < len(r) && int64(r[src].TimestampNanos) == timestamps[dst] { 228 datapoints[i][dst] = r[src].Value 229 } 230 } 231 } 232 233 fmt.Fprintf(bufferedStdout, "===\nID: %v\n", id) 234 w := tabwriter.NewWriter(bufferedStdout, 0, 0, 0, ' ', tabwriter.Debug|tabwriter.AlignRight) 235 w.Write([]byte("Timestamp")) 236 w.Write([]byte{'\t'}) 237 for _, node := range nodes { 238 w.Write([]byte(node)) 239 w.Write([]byte{'\t'}) 240 } 241 w.Write([]byte{'\n'}) 242 243 var mismatches []mismatch 244 for i := range timestamps { 245 fmt.Fprint(w, time.Unix(0, timestamps[i]).Format(time.StampMilli)) 246 w.Write([]byte{'\t'}) 247 for j := range nodes { 248 ne := false 249 refVal := datapoints[0][i] 250 curVal := datapoints[j][i] 251 252 if math.IsNaN(refVal) && !math.IsNaN(curVal) || 253 !math.IsNaN(refVal) && math.IsNaN(curVal) { 254 ne = true 255 } else if math.Abs(curVal-refVal) > epsilon { 256 ne = true 257 } 258 259 if j > 0 && ne { 260 mismatches = append(mismatches, mismatch{ 261 node: nodes[j], 262 timestamp: timestamps[i], 263 }) 264 fmt.Fprint(w, " (!) ") 265 } 266 fmt.Fprintf(w, "%f", datapoints[j][i]) 267 fmt.Fprint(w, "\t") 268 } 269 fmt.Fprint(w, "\n") 270 } 271 w.Flush() 272 273 if len(mismatches) > 0 { 274 fmt.Fprintf(bufferedStdout, "Mismatches (timestamp, node) for ID: %v\n", id) 275 for _, m := range mismatches { 276 fmt.Fprintf(bufferedStdout, 277 "%v %v\n", 278 time.Unix(0, m.timestamp).Format(time.StampMilli), 279 m.node) 280 } 281 } 282 } 283 } 284 285 type dbnode struct { 286 addr string 287 namespace []byte 288 client rpc.TChanNode 289 } 290 291 func (d *dbnode) getIDs(shard uint32, r queryRange, num int) ([][]byte, error) { 292 var ( 293 results [][]byte 294 pageToken []byte 295 ) 296 297 getIDsFn := func() error { 298 tctx, cancel := thrift.NewContext(60 * time.Second) 299 defer cancel() 300 301 req := rpc.NewFetchBlocksMetadataRawV2Request() 302 req.NameSpace = d.namespace 303 req.Shard = int32(shard) 304 if !r.start.IsZero() { 305 req.RangeStart = r.start.UnixNano() 306 } 307 if !r.end.IsZero() { 308 req.RangeEnd = r.end.UnixNano() 309 } 310 req.Limit = int64(num) 311 req.PageToken = pageToken 312 313 result, err := d.client.FetchBlocksMetadataRawV2(tctx, req) 314 if err != nil { 315 return err 316 } 317 318 for _, elem := range result.Elements { 319 results = append(results, elem.ID) 320 if len(results) >= num { 321 return io.EOF 322 } 323 } 324 325 if result.NextPageToken == nil { 326 return io.EOF 327 } 328 329 pageToken = append([]byte(nil), result.NextPageToken...) 330 return nil 331 } 332 333 var err error 334 for err == nil { 335 if err = getIDsFn(); err != nil { 336 break 337 } 338 } 339 340 if errors.Is(err, io.EOF) { 341 return results, nil 342 } 343 return results, err 344 } 345 346 func (d *dbnode) query(ids [][]byte, r queryRange) (*rpc.FetchTaggedResult_, error) { 347 req := rpc.NewFetchTaggedRequest() 348 req.NameSpace = d.namespace 349 req.FetchData = true 350 351 if !r.start.IsZero() { 352 req.RangeStart = r.start.UnixNano() 353 } 354 if !r.end.IsZero() { 355 req.RangeEnd = r.end.UnixNano() 356 } 357 358 var ( 359 termQueries = make([]idx.Query, 0, len(ids)) 360 err error 361 ) 362 for _, id := range ids { 363 termQueries = append(termQueries, idx.NewTermQuery(doc.IDReservedFieldName, id)) 364 } 365 366 req.Query, err = idx.Marshal(idx.NewDisjunctionQuery(termQueries...)) 367 if err != nil { 368 return nil, err 369 } 370 371 tctx, cancel := thrift.NewContext(15 * time.Second) 372 defer cancel() 373 374 return d.client.FetchTagged(tctx, req) 375 } 376 377 func getClient(nodeAddr string) rpc.TChanNode { 378 ch, err := tchannel.NewChannel("Client", nil) 379 if err != nil { 380 panic(fmt.Sprintf("could not create new tchannel channel for %q: %v", nodeAddr, err)) 381 } 382 endpoint := &thrift.ClientOptions{HostPort: nodeAddr} 383 thriftClient := thrift.NewClient(ch, channel.ChannelName, endpoint) 384 return rpc.NewTChanNodeClient(thriftClient) 385 } 386 387 func fetchTaggedResultsToDatapoints(result *rpc.FetchTaggedResult_) (map[string][]ts.Datapoint, error) { 388 res := map[string][]ts.Datapoint{} 389 encodingOpts := encoding.NewOptions() 390 391 for _, elem := range result.Elements { 392 var dps []ts.Datapoint 393 iter := client.NewReaderSliceOfSlicesIterator(elem.Segments, nil) 394 multiReader := encoding.NewMultiReaderIterator(m3tsz.DefaultReaderIteratorAllocFn(encodingOpts), nil) 395 multiReader.ResetSliceOfSlices(iter, nil) 396 397 for multiReader.Next() { 398 dp, _, _ := multiReader.Current() 399 dps = append(dps, dp) 400 } 401 402 if err := multiReader.Err(); err != nil { 403 return nil, err 404 } 405 res[string(elem.ID)] = dps 406 } 407 408 return res, nil 409 } 410 411 type mismatch struct { 412 node string 413 timestamp int64 414 }