github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cli/zip.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package cli 12 13 import ( 14 "archive/zip" 15 "context" 16 "encoding/json" 17 "fmt" 18 "io" 19 "net" 20 "net/url" 21 "os" 22 "path/filepath" 23 "sort" 24 "strconv" 25 "strings" 26 "time" 27 "unicode" 28 29 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb" 30 "github.com/cockroachdb/cockroach/pkg/roachpb" 31 "github.com/cockroachdb/cockroach/pkg/server/serverpb" 32 "github.com/cockroachdb/cockroach/pkg/server/status/statuspb" 33 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" 34 "github.com/cockroachdb/cockroach/pkg/util/contextutil" 35 "github.com/cockroachdb/cockroach/pkg/util/log" 36 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 37 "github.com/cockroachdb/errors" 38 "github.com/lib/pq" 39 "github.com/spf13/cobra" 40 ) 41 42 var debugZipCmd = &cobra.Command{ 43 Use: "zip <file>", 44 Short: "gather cluster debug data into a zip file", 45 Long: ` 46 47 Gather cluster debug data into a zip file. Data includes cluster events, node 48 liveness, node status, range status, node stack traces, node engine stats, log 49 files, and SQL schema. 50 51 Retrieval of per-node details (status, stack traces, range status, engine stats) 52 requires the node to be live and operating properly. Retrieval of SQL data 53 requires the cluster to be live. 54 `, 55 Args: cobra.ExactArgs(1), 56 RunE: MaybeDecorateGRPCError(runDebugZip), 57 } 58 59 // Tables containing cluster-wide info that are collected in a debug zip. 60 var debugZipTablesPerCluster = []string{ 61 "crdb_internal.cluster_queries", 62 "crdb_internal.cluster_sessions", 63 "crdb_internal.cluster_settings", 64 "crdb_internal.cluster_transactions", 65 66 "crdb_internal.jobs", 67 "system.jobs", // get the raw, restorable jobs records too. 68 "system.descriptor", // descriptors also contain job-like mutation state. 69 "system.namespace", 70 "system.namespace2", // TODO(sqlexec): consider removing in 20.2 or later. 71 72 "crdb_internal.kv_node_status", 73 "crdb_internal.kv_store_status", 74 75 "crdb_internal.schema_changes", 76 "crdb_internal.partitions", 77 "crdb_internal.zones", 78 } 79 80 // Tables collected from each node in a debug zip. 81 var debugZipTablesPerNode = []string{ 82 "crdb_internal.feature_usage", 83 84 "crdb_internal.gossip_alerts", 85 "crdb_internal.gossip_liveness", 86 "crdb_internal.gossip_network", 87 "crdb_internal.gossip_nodes", 88 89 "crdb_internal.leases", 90 91 "crdb_internal.node_build_info", 92 "crdb_internal.node_metrics", 93 "crdb_internal.node_queries", 94 "crdb_internal.node_runtime_info", 95 "crdb_internal.node_sessions", 96 "crdb_internal.node_statement_statistics", 97 "crdb_internal.node_transactions", 98 "crdb_internal.node_txn_stats", 99 } 100 101 // Override for the default SELECT * when dumping the table. 102 var customSelectClause = map[string]string{ 103 "system.jobs": "*, to_hex(payload) AS hex_payload, to_hex(progress) AS hex_progress", 104 "system.descriptor": "*, to_hex(descriptor) AS hex_descriptor", 105 } 106 107 type zipper struct { 108 f *os.File 109 z *zip.Writer 110 } 111 112 func newZipper(f *os.File) *zipper { 113 return &zipper{ 114 f: f, 115 z: zip.NewWriter(f), 116 } 117 } 118 119 func (z *zipper) close() error { 120 err1 := z.z.Close() 121 err2 := z.f.Close() 122 return errors.CombineErrors(err1, err2) 123 } 124 125 func (z *zipper) create(name string, mtime time.Time) (io.Writer, error) { 126 fmt.Printf("writing: %s\n", name) 127 if mtime.IsZero() { 128 mtime = timeutil.Now() 129 } 130 return z.z.CreateHeader(&zip.FileHeader{ 131 Name: name, 132 Method: zip.Deflate, 133 Modified: mtime, 134 }) 135 } 136 137 func (z *zipper) createRaw(name string, b []byte) error { 138 w, err := z.create(name, time.Time{}) 139 if err != nil { 140 return err 141 } 142 _, err = w.Write(b) 143 return err 144 } 145 146 func (z *zipper) createJSON(name string, m interface{}) error { 147 if !strings.HasSuffix(name, ".json") { 148 return errors.Errorf("%s does not have .json suffix", name) 149 } 150 b, err := json.MarshalIndent(m, "", " ") 151 if err != nil { 152 return err 153 } 154 return z.createRaw(name, b) 155 } 156 157 func (z *zipper) createError(name string, e error) error { 158 w, err := z.create(name+".err.txt", time.Time{}) 159 if err != nil { 160 return err 161 } 162 fmt.Printf(" ^- resulted in %s\n", e) 163 fmt.Fprintf(w, "%s\n", e) 164 return nil 165 } 166 167 func (z *zipper) createJSONOrError(name string, m interface{}, e error) error { 168 if e != nil { 169 return z.createError(name, e) 170 } 171 return z.createJSON(name, m) 172 } 173 174 func (z *zipper) createRawOrError(name string, b []byte, e error) error { 175 if filepath.Ext(name) == "" { 176 return errors.Errorf("%s has no extension", name) 177 } 178 if e != nil { 179 return z.createError(name, e) 180 } 181 return z.createRaw(name, b) 182 } 183 184 type zipRequest struct { 185 fn func(ctx context.Context) (interface{}, error) 186 pathName string 187 } 188 189 func guessNodeURL(workingURL string, hostport string) *sqlConn { 190 u, err := url.Parse(workingURL) 191 if err != nil { 192 u = &url.URL{Host: "invalid"} 193 } 194 u.Host = hostport 195 return makeSQLConn(u.String()) 196 } 197 198 func runZipRequestWithTimeout( 199 ctx context.Context, 200 requestName string, 201 timeout time.Duration, 202 fn func(ctx context.Context) error, 203 ) error { 204 fmt.Printf("%s... ", requestName) 205 return contextutil.RunWithTimeout(ctx, requestName, timeout, fn) 206 } 207 208 func runDebugZip(cmd *cobra.Command, args []string) (retErr error) { 209 const ( 210 base = "debug" 211 eventsName = base + "/events" 212 livenessName = base + "/liveness" 213 nodesPrefix = base + "/nodes" 214 rangelogName = base + "/rangelog" 215 reportsPrefix = base + "/reports" 216 schemaPrefix = base + "/schema" 217 settingsName = base + "/settings" 218 ) 219 220 baseCtx, cancel := context.WithCancel(context.Background()) 221 defer cancel() 222 223 fmt.Printf("establishing RPC connection to %s...\n", serverCfg.AdvertiseAddr) 224 conn, _, finish, err := getClientGRPCConn(baseCtx, serverCfg) 225 if err != nil { 226 return err 227 } 228 defer finish() 229 230 status := serverpb.NewStatusClient(conn) 231 admin := serverpb.NewAdminClient(conn) 232 233 fmt.Println("retrieving the node status to get the SQL address...") 234 nodeD, err := status.Details(baseCtx, &serverpb.DetailsRequest{NodeId: "local"}) 235 if err != nil { 236 return err 237 } 238 sqlAddr := nodeD.SQLAddress 239 if sqlAddr.IsEmpty() { 240 // No SQL address: either a pre-19.2 node, or same address for both 241 // SQL and RPC. 242 sqlAddr = nodeD.Address 243 } 244 fmt.Printf("using SQL address: %s\n", sqlAddr.AddressField) 245 cliCtx.clientConnHost, cliCtx.clientConnPort, err = net.SplitHostPort(sqlAddr.AddressField) 246 if err != nil { 247 return err 248 } 249 250 // We're going to use the SQL code, but in non-interactive mode. 251 // Override whatever terminal-driven defaults there may be out there. 252 cliCtx.isInteractive = false 253 cliCtx.terminalOutput = false 254 sqlCtx.showTimes = false 255 // Use a streaming format to avoid accumulating all rows in RAM. 256 cliCtx.tableDisplayFormat = tableDisplayTSV 257 258 sqlConn, err := makeSQLClient("cockroach zip", useSystemDb) 259 if err != nil { 260 log.Warningf(baseCtx, "unable to open a SQL session. Debug information will be incomplete: %s", err) 261 } 262 defer sqlConn.Close() 263 // Note: we're not printing "connection established" because the driver we're using 264 // does late binding. 265 if sqlConn != nil { 266 fmt.Printf("using SQL connection URL: %s\n", sqlConn.url) 267 } 268 269 name := args[0] 270 out, err := os.Create(name) 271 if err != nil { 272 return err 273 } 274 fmt.Printf("writing %s\n", name) 275 276 z := newZipper(out) 277 defer func() { 278 cErr := z.close() 279 retErr = errors.CombineErrors(retErr, cErr) 280 }() 281 282 timeout := 10 * time.Second 283 if cliCtx.cmdTimeout != 0 { 284 timeout = cliCtx.cmdTimeout 285 } 286 287 var runZipRequest = func(r zipRequest) error { 288 var data interface{} 289 err = runZipRequestWithTimeout(baseCtx, "requesting data for "+r.pathName, timeout, func(ctx context.Context) error { 290 data, err = r.fn(ctx) 291 return err 292 }) 293 return z.createJSONOrError(r.pathName+".json", data, err) 294 } 295 296 for _, r := range []zipRequest{ 297 { 298 fn: func(ctx context.Context) (interface{}, error) { 299 return admin.Events(ctx, &serverpb.EventsRequest{}) 300 }, 301 pathName: eventsName, 302 }, 303 { 304 fn: func(ctx context.Context) (interface{}, error) { 305 return admin.RangeLog(ctx, &serverpb.RangeLogRequest{}) 306 }, 307 pathName: rangelogName, 308 }, 309 { 310 fn: func(ctx context.Context) (interface{}, error) { 311 return admin.Liveness(ctx, &serverpb.LivenessRequest{}) 312 }, 313 pathName: livenessName, 314 }, 315 { 316 fn: func(ctx context.Context) (interface{}, error) { 317 return admin.Settings(ctx, &serverpb.SettingsRequest{}) 318 }, 319 pathName: settingsName, 320 }, 321 { 322 fn: func(ctx context.Context) (interface{}, error) { 323 return status.ProblemRanges(ctx, &serverpb.ProblemRangesRequest{}) 324 }, 325 pathName: reportsPrefix + "/problemranges", 326 }, 327 } { 328 if err := runZipRequest(r); err != nil { 329 return err 330 } 331 } 332 333 for _, table := range debugZipTablesPerCluster { 334 selectClause, ok := customSelectClause[table] 335 if !ok { 336 selectClause = "*" 337 } 338 if err := dumpTableDataForZip(z, sqlConn, timeout, base, table, selectClause); err != nil { 339 return errors.Wrapf(err, "fetching %s", table) 340 } 341 } 342 343 { 344 var nodes *serverpb.NodesResponse 345 err := runZipRequestWithTimeout(baseCtx, "requesting nodes", timeout, func(ctx context.Context) error { 346 nodes, err = status.Nodes(ctx, &serverpb.NodesRequest{}) 347 return err 348 }) 349 if cErr := z.createJSONOrError(base+"/nodes.json", nodes, err); cErr != nil { 350 return cErr 351 } 352 353 // In case nodes came up back empty (the Nodes() RPC failed), we 354 // still want to inspect the per-node endpoints on the head 355 // node. As per the above, we were able to connect at least to 356 // that. 357 nodeList := []statuspb.NodeStatus{{Desc: roachpb.NodeDescriptor{ 358 NodeID: nodeD.NodeID, 359 Address: nodeD.Address, 360 SQLAddress: nodeD.SQLAddress, 361 }}} 362 if nodes != nil { 363 // If the nodes were found, use that instead. 364 nodeList = nodes.Nodes 365 } 366 367 // We'll want livenesses to decide whether a node is decommissioned. 368 var lresponse *serverpb.LivenessResponse 369 err = runZipRequestWithTimeout(baseCtx, "requesting liveness", timeout, func(ctx context.Context) error { 370 lresponse, err = admin.Liveness(ctx, &serverpb.LivenessRequest{}) 371 return err 372 }) 373 if cErr := z.createJSONOrError(base+"/liveness.json", nodes, err); cErr != nil { 374 return cErr 375 } 376 livenessByNodeID := map[roachpb.NodeID]kvserverpb.NodeLivenessStatus{} 377 if lresponse != nil { 378 livenessByNodeID = lresponse.Statuses 379 } 380 381 for _, node := range nodeList { 382 nodeID := node.Desc.NodeID 383 384 liveness := livenessByNodeID[nodeID] 385 if liveness == kvserverpb.NodeLivenessStatus_DECOMMISSIONED { 386 // Decommissioned + process terminated. Let's not waste time 387 // on this node. 388 // 389 // NB: we still inspect DECOMMISSIONING nodes (marked as 390 // decommissioned but the process is still alive) to get a 391 // chance to collect their log files. 392 // 393 // NB: we still inspect DEAD nodes because even though they 394 // don't heartbeat their liveness record their process might 395 // still be up and willing to deliver some log files. 396 continue 397 } 398 399 id := fmt.Sprintf("%d", nodeID) 400 prefix := fmt.Sprintf("%s/%s", nodesPrefix, id) 401 402 if !zipCtx.nodes.isIncluded(nodeID) { 403 if err := z.createRaw(prefix+".skipped", 404 []byte(fmt.Sprintf("skipping excluded node %d\n", nodeID))); err != nil { 405 return err 406 } 407 continue 408 } 409 410 // Don't use sqlConn because that's only for is the node `debug 411 // zip` was pointed at, but here we want to connect to nodes 412 // individually to grab node- local SQL tables. Try to guess by 413 // replacing the host in the connection string; this may or may 414 // not work and if it doesn't, we let the invalid curSQLConn get 415 // used anyway so that anything that does *not* need it will 416 // still happen. 417 sqlAddr := node.Desc.SQLAddress 418 if sqlAddr.IsEmpty() { 419 // No SQL address: either a pre-19.2 node, or same address for both 420 // SQL and RPC. 421 sqlAddr = node.Desc.Address 422 } 423 curSQLConn := guessNodeURL(sqlConn.url, sqlAddr.AddressField) 424 if err := z.createJSON(prefix+"/status.json", node); err != nil { 425 return err 426 } 427 fmt.Printf("using SQL connection URL for node %s: %s\n", id, curSQLConn.url) 428 429 for _, table := range debugZipTablesPerNode { 430 selectClause, ok := customSelectClause[table] 431 if !ok { 432 selectClause = "*" 433 } 434 if err := dumpTableDataForZip(z, curSQLConn, timeout, prefix, table, selectClause); err != nil { 435 return errors.Wrapf(err, "fetching %s", table) 436 } 437 } 438 439 for _, r := range []zipRequest{ 440 { 441 fn: func(ctx context.Context) (interface{}, error) { 442 return status.Details(ctx, &serverpb.DetailsRequest{NodeId: id}) 443 }, 444 pathName: prefix + "/details", 445 }, 446 { 447 fn: func(ctx context.Context) (interface{}, error) { 448 return status.Gossip(ctx, &serverpb.GossipRequest{NodeId: id}) 449 }, 450 pathName: prefix + "/gossip", 451 }, 452 { 453 fn: func(ctx context.Context) (interface{}, error) { 454 return status.EngineStats(ctx, &serverpb.EngineStatsRequest{NodeId: id}) 455 }, 456 pathName: prefix + "/enginestats", 457 }, 458 } { 459 if err := runZipRequest(r); err != nil { 460 return err 461 } 462 } 463 464 var stacksData []byte 465 err = runZipRequestWithTimeout(baseCtx, "requesting stacks for node "+id, timeout, 466 func(ctx context.Context) error { 467 stacks, err := status.Stacks(ctx, &serverpb.StacksRequest{ 468 NodeId: id, 469 Type: serverpb.StacksType_GOROUTINE_STACKS, 470 }) 471 if err == nil { 472 stacksData = stacks.Data 473 } 474 return err 475 }) 476 if err := z.createRawOrError(prefix+"/stacks.txt", stacksData, err); err != nil { 477 return err 478 } 479 480 var threadData []byte 481 err = runZipRequestWithTimeout(baseCtx, "requesting threads for node "+id, timeout, 482 func(ctx context.Context) error { 483 threads, err := status.Stacks(ctx, &serverpb.StacksRequest{ 484 NodeId: id, 485 Type: serverpb.StacksType_THREAD_STACKS, 486 }) 487 if err == nil { 488 threadData = threads.Data 489 } 490 return err 491 }) 492 if err := z.createRawOrError(prefix+"/threads.txt", threadData, err); err != nil { 493 return err 494 } 495 496 var heapData []byte 497 err = runZipRequestWithTimeout(baseCtx, "requesting heap profile for node "+id, timeout, 498 func(ctx context.Context) error { 499 heap, err := status.Profile(ctx, &serverpb.ProfileRequest{ 500 NodeId: id, 501 Type: serverpb.ProfileRequest_HEAP, 502 }) 503 if err == nil { 504 heapData = heap.Data 505 } 506 return err 507 }) 508 if err := z.createRawOrError(prefix+"/heap.pprof", heapData, err); err != nil { 509 return err 510 } 511 512 var profiles *serverpb.GetFilesResponse 513 if err := runZipRequestWithTimeout(baseCtx, "requesting heap files for node "+id, timeout, 514 func(ctx context.Context) error { 515 profiles, err = status.GetFiles(ctx, &serverpb.GetFilesRequest{ 516 NodeId: id, 517 Type: serverpb.FileType_HEAP, 518 Patterns: []string{"*"}, 519 }) 520 return err 521 }); err != nil { 522 if err := z.createError(prefix+"/heapprof", err); err != nil { 523 return err 524 } 525 } else { 526 fmt.Printf("%d found\n", len(profiles.Files)) 527 for _, file := range profiles.Files { 528 name := prefix + "/heapprof/" + file.Name + ".pprof" 529 if err := z.createRaw(name, file.Contents); err != nil { 530 return err 531 } 532 } 533 } 534 535 var goroutinesResp *serverpb.GetFilesResponse 536 if err := runZipRequestWithTimeout(baseCtx, "requesting goroutine files for node "+id, timeout, 537 func(ctx context.Context) error { 538 goroutinesResp, err = status.GetFiles(ctx, &serverpb.GetFilesRequest{ 539 NodeId: id, 540 Type: serverpb.FileType_GOROUTINES, 541 Patterns: []string{"*"}, 542 }) 543 return err 544 }); err != nil { 545 if err := z.createError(prefix+"/goroutines", err); err != nil { 546 return err 547 } 548 } else { 549 fmt.Printf("%d found\n", len(goroutinesResp.Files)) 550 for _, file := range goroutinesResp.Files { 551 // NB: the files have a .txt.gz suffix already. 552 name := prefix + "/goroutines/" + file.Name 553 if err := z.createRawOrError(name, file.Contents, err); err != nil { 554 return err 555 } 556 } 557 } 558 559 var logs *serverpb.LogFilesListResponse 560 if err := runZipRequestWithTimeout(baseCtx, "requesting log files list", timeout, 561 func(ctx context.Context) error { 562 logs, err = status.LogFilesList( 563 ctx, &serverpb.LogFilesListRequest{NodeId: id}) 564 return err 565 }); err != nil { 566 if err := z.createError(prefix+"/logs", err); err != nil { 567 return err 568 } 569 } else { 570 fmt.Printf("%d found\n", len(logs.Files)) 571 for _, file := range logs.Files { 572 name := prefix + "/logs/" + file.Name 573 var entries *serverpb.LogEntriesResponse 574 if err := runZipRequestWithTimeout(baseCtx, fmt.Sprintf("requesting log file %s", file.Name), timeout, 575 func(ctx context.Context) error { 576 entries, err = status.LogFile( 577 ctx, &serverpb.LogFileRequest{NodeId: id, File: file.Name}) 578 return err 579 }); err != nil { 580 if err := z.createError(name, err); err != nil { 581 return err 582 } 583 continue 584 } 585 logOut, err := z.create(name, timeutil.Unix(0, file.ModTimeNanos)) 586 if err != nil { 587 return err 588 } 589 for _, e := range entries.Entries { 590 if err := e.Format(logOut); err != nil { 591 return err 592 } 593 } 594 } 595 } 596 597 var ranges *serverpb.RangesResponse 598 if err := runZipRequestWithTimeout(baseCtx, "requesting ranges", timeout, func(ctx context.Context) error { 599 ranges, err = status.Ranges(ctx, &serverpb.RangesRequest{NodeId: id}) 600 return err 601 }); err != nil { 602 if err := z.createError(prefix+"/ranges", err); err != nil { 603 return err 604 } 605 } else { 606 fmt.Printf("%d found\n", len(ranges.Ranges)) 607 sort.Slice(ranges.Ranges, func(i, j int) bool { 608 return ranges.Ranges[i].State.Desc.RangeID < 609 ranges.Ranges[j].State.Desc.RangeID 610 }) 611 for _, r := range ranges.Ranges { 612 name := fmt.Sprintf("%s/ranges/%s", prefix, r.State.Desc.RangeID) 613 if err := z.createJSON(name+".json", r); err != nil { 614 return err 615 } 616 } 617 } 618 } 619 } 620 621 { 622 var databases *serverpb.DatabasesResponse 623 if err := runZipRequestWithTimeout(baseCtx, "requesting list of SQL databases", timeout, func(ctx context.Context) error { 624 databases, err = admin.Databases(ctx, &serverpb.DatabasesRequest{}) 625 return err 626 }); err != nil { 627 if err := z.createError(schemaPrefix, err); err != nil { 628 return err 629 } 630 } else { 631 fmt.Printf("%d found\n", len(databases.Databases)) 632 var dbEscaper fileNameEscaper 633 for _, dbName := range databases.Databases { 634 prefix := schemaPrefix + "/" + dbEscaper.escape(dbName) 635 var database *serverpb.DatabaseDetailsResponse 636 requestErr := runZipRequestWithTimeout(baseCtx, fmt.Sprintf("requesting database details for %s", dbName), timeout, 637 func(ctx context.Context) error { 638 database, err = admin.DatabaseDetails(ctx, &serverpb.DatabaseDetailsRequest{Database: dbName}) 639 return err 640 }) 641 if err := z.createJSONOrError(prefix+"@details.json", database, requestErr); err != nil { 642 return err 643 } 644 if requestErr != nil { 645 continue 646 } 647 648 fmt.Printf("%d tables found\n", len(database.TableNames)) 649 var tbEscaper fileNameEscaper 650 for _, tableName := range database.TableNames { 651 name := prefix + "/" + tbEscaper.escape(tableName) 652 var table *serverpb.TableDetailsResponse 653 err := runZipRequestWithTimeout(baseCtx, fmt.Sprintf("requesting table details for %s.%s", dbName, tableName), timeout, 654 func(ctx context.Context) error { 655 table, err = admin.TableDetails(ctx, &serverpb.TableDetailsRequest{Database: dbName, Table: tableName}) 656 return err 657 }) 658 if err := z.createJSONOrError(name+".json", table, err); err != nil { 659 return err 660 } 661 } 662 } 663 } 664 } 665 666 return nil 667 } 668 669 type fileNameEscaper struct { 670 counters map[string]int 671 } 672 673 // escape ensures that f is stripped of characters that 674 // may be invalid in file names. The characters are also lowercased 675 // to ensure proper normalization in case-insensitive filesystems. 676 func (fne *fileNameEscaper) escape(f string) string { 677 f = strings.ToLower(f) 678 var out strings.Builder 679 for _, c := range f { 680 if c < 127 && (unicode.IsLetter(c) || unicode.IsDigit(c)) { 681 out.WriteRune(c) 682 } else { 683 out.WriteByte('_') 684 } 685 } 686 objName := out.String() 687 result := objName 688 689 if fne.counters == nil { 690 fne.counters = make(map[string]int) 691 } 692 cnt := fne.counters[objName] 693 if cnt > 0 { 694 result += fmt.Sprintf("-%d", cnt) 695 } 696 cnt++ 697 fne.counters[objName] = cnt 698 return result 699 } 700 701 func dumpTableDataForZip( 702 z *zipper, conn *sqlConn, timeout time.Duration, base, table, selectClause string, 703 ) error { 704 query := fmt.Sprintf(`SET statement_timeout = '%s'; SELECT %s FROM %s`, timeout, selectClause, table) 705 baseName := base + "/" + table 706 707 fmt.Printf("retrieving SQL data for %s... ", table) 708 const maxRetries = 5 709 suffix := "" 710 for numRetries := 1; numRetries <= maxRetries; numRetries++ { 711 name := baseName + suffix + ".txt" 712 w, err := z.create(name, time.Time{}) 713 if err != nil { 714 return err 715 } 716 // Pump the SQL rows directly into the zip writer, to avoid 717 // in-RAM buffering. 718 if err := runQueryAndFormatResults(conn, w, makeQuery(query)); err != nil { 719 if cErr := z.createError(name, err); cErr != nil { 720 return cErr 721 } 722 var pqErr *pq.Error 723 if !errors.As(err, &pqErr) { 724 // Not a SQL error. Nothing to retry. 725 break 726 } 727 if pqErr.Code != pgcode.SerializationFailure { 728 // A non-retry error. We've printed the error, and 729 // there's nothing to retry. Stop here. 730 break 731 } 732 // We've encountered a retry error. Add a suffix then loop. 733 suffix = fmt.Sprintf(".%d", numRetries) 734 continue 735 } 736 break 737 } 738 return nil 739 } 740 741 type nodeSelection struct { 742 inclusive rangeSelection 743 exclusive rangeSelection 744 includedCache map[int]struct{} 745 excludedCache map[int]struct{} 746 } 747 748 func (n *nodeSelection) isIncluded(nodeID roachpb.NodeID) bool { 749 // Avoid recomputing the maps on every call. 750 if n.includedCache == nil { 751 n.includedCache = n.inclusive.items() 752 } 753 if n.excludedCache == nil { 754 n.excludedCache = n.exclusive.items() 755 } 756 757 // If the included cache is empty, then we're assuming the node is included. 758 isIncluded := true 759 if len(n.includedCache) > 0 { 760 _, isIncluded = n.includedCache[int(nodeID)] 761 } 762 // Then filter out excluded IDs. 763 if _, excluded := n.excludedCache[int(nodeID)]; excluded { 764 isIncluded = false 765 } 766 return isIncluded 767 } 768 769 type rangeSelection struct { 770 input string 771 ranges []vrange 772 } 773 774 type vrange struct { 775 a, b int 776 } 777 778 func (r *rangeSelection) String() string { return r.input } 779 780 func (r *rangeSelection) Type() string { 781 return "a-b,c,d-e,..." 782 } 783 784 func (r *rangeSelection) Set(v string) error { 785 r.input = v 786 for _, rs := range strings.Split(v, ",") { 787 var thisRange vrange 788 if strings.Contains(rs, "-") { 789 ab := strings.SplitN(rs, "-", 2) 790 a, err := strconv.Atoi(ab[0]) 791 if err != nil { 792 return err 793 } 794 b, err := strconv.Atoi(ab[1]) 795 if err != nil { 796 return err 797 } 798 if b < a { 799 return errors.New("invalid range") 800 } 801 thisRange = vrange{a, b} 802 } else { 803 a, err := strconv.Atoi(rs) 804 if err != nil { 805 return err 806 } 807 thisRange = vrange{a, a} 808 } 809 r.ranges = append(r.ranges, thisRange) 810 } 811 return nil 812 } 813 814 // items returns the values selected by the range selection 815 func (r *rangeSelection) items() map[int]struct{} { 816 s := map[int]struct{}{} 817 for _, vr := range r.ranges { 818 for i := vr.a; i <= vr.b; i++ { 819 s[i] = struct{}{} 820 } 821 } 822 return s 823 }