github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/server/status_test.go (about) 1 // Copyright 2015 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package server 12 13 import ( 14 "bytes" 15 "context" 16 "fmt" 17 "io/ioutil" 18 "math" 19 "os" 20 "path/filepath" 21 "reflect" 22 "regexp" 23 "sort" 24 "strconv" 25 "strings" 26 "testing" 27 "time" 28 29 "github.com/cockroachdb/cockroach/pkg/base" 30 "github.com/cockroachdb/cockroach/pkg/build" 31 "github.com/cockroachdb/cockroach/pkg/gossip" 32 "github.com/cockroachdb/cockroach/pkg/jobs" 33 "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" 34 "github.com/cockroachdb/cockroach/pkg/keys" 35 "github.com/cockroachdb/cockroach/pkg/kv/kvserver" 36 "github.com/cockroachdb/cockroach/pkg/roachpb" 37 "github.com/cockroachdb/cockroach/pkg/rpc" 38 "github.com/cockroachdb/cockroach/pkg/security" 39 "github.com/cockroachdb/cockroach/pkg/security/securitytest" 40 "github.com/cockroachdb/cockroach/pkg/server/diagnosticspb" 41 "github.com/cockroachdb/cockroach/pkg/server/serverpb" 42 "github.com/cockroachdb/cockroach/pkg/server/status/statuspb" 43 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 44 "github.com/cockroachdb/cockroach/pkg/storage/enginepb" 45 "github.com/cockroachdb/cockroach/pkg/testutils" 46 "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" 47 "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" 48 "github.com/cockroachdb/cockroach/pkg/ts" 49 "github.com/cockroachdb/cockroach/pkg/ts/catalog" 50 "github.com/cockroachdb/cockroach/pkg/util/httputil" 51 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 52 "github.com/cockroachdb/cockroach/pkg/util/log" 53 "github.com/cockroachdb/cockroach/pkg/util/metric" 54 "github.com/cockroachdb/cockroach/pkg/util/protoutil" 55 "github.com/cockroachdb/cockroach/pkg/util/stop" 56 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 57 "github.com/cockroachdb/errors" 58 "github.com/gogo/protobuf/proto" 59 "github.com/kr/pretty" 60 "github.com/stretchr/testify/require" 61 ) 62 63 func getStatusJSONProto( 64 ts serverutils.TestServerInterface, path string, response protoutil.Message, 65 ) error { 66 return serverutils.GetJSONProto(ts, statusPrefix+path, response) 67 } 68 69 func postStatusJSONProto( 70 ts serverutils.TestServerInterface, path string, request, response protoutil.Message, 71 ) error { 72 return serverutils.PostJSONProto(ts, statusPrefix+path, request, response) 73 } 74 75 func getStatusJSONProtoWithAdminOption( 76 ts serverutils.TestServerInterface, path string, response protoutil.Message, isAdmin bool, 77 ) error { 78 return serverutils.GetJSONProtoWithAdminOption(ts, statusPrefix+path, response, isAdmin) 79 } 80 81 // TestStatusLocalStacks verifies that goroutine stack traces are available 82 // via the /_status/stacks/local endpoint. 83 func TestStatusLocalStacks(t *testing.T) { 84 defer leaktest.AfterTest(t)() 85 s, _, _ := serverutils.StartServer(t, base.TestServerArgs{}) 86 defer s.Stopper().Stop(context.Background()) 87 88 // Verify match with at least two goroutine stacks. 89 re := regexp.MustCompile("(?s)goroutine [0-9]+.*goroutine [0-9]+.*") 90 91 var stacks serverpb.JSONResponse 92 for _, nodeID := range []string{"local", "1"} { 93 if err := getStatusJSONProto(s, "stacks/"+nodeID, &stacks); err != nil { 94 t.Fatal(err) 95 } 96 if !re.Match(stacks.Data) { 97 t.Errorf("expected %s to match %s", stacks.Data, re) 98 } 99 } 100 } 101 102 // TestStatusJson verifies that status endpoints return expected Json results. 103 // The content type of the responses is always httputil.JSONContentType. 104 func TestStatusJson(t *testing.T) { 105 defer leaktest.AfterTest(t)() 106 s, _, _ := serverutils.StartServer(t, base.TestServerArgs{}) 107 defer s.Stopper().Stop(context.Background()) 108 ts := s.(*TestServer) 109 110 nodeID := ts.Gossip().NodeID.Get() 111 addr, err := ts.Gossip().GetNodeIDAddress(nodeID) 112 if err != nil { 113 t.Fatal(err) 114 } 115 sqlAddr, err := ts.Gossip().GetNodeIDSQLAddress(nodeID) 116 if err != nil { 117 t.Fatal(err) 118 } 119 120 var nodes serverpb.NodesResponse 121 testutils.SucceedsSoon(t, func() error { 122 if err := getStatusJSONProto(s, "nodes", &nodes); err != nil { 123 t.Fatal(err) 124 } 125 126 if len(nodes.Nodes) == 0 { 127 return errors.Errorf("expected non-empty node list, got: %v", nodes) 128 } 129 return nil 130 }) 131 132 for _, path := range []string{ 133 statusPrefix + "details/local", 134 statusPrefix + "details/" + strconv.FormatUint(uint64(nodeID), 10), 135 } { 136 var details serverpb.DetailsResponse 137 if err := serverutils.GetJSONProto(s, path, &details); err != nil { 138 t.Fatal(err) 139 } 140 if a, e := details.NodeID, nodeID; a != e { 141 t.Errorf("expected: %d, got: %d", e, a) 142 } 143 if a, e := details.Address, *addr; a != e { 144 t.Errorf("expected: %v, got: %v", e, a) 145 } 146 if a, e := details.SQLAddress, *sqlAddr; a != e { 147 t.Errorf("expected: %v, got: %v", e, a) 148 } 149 if a, e := details.BuildInfo, build.GetInfo(); a != e { 150 t.Errorf("expected: %v, got: %v", e, a) 151 } 152 } 153 } 154 155 // TestHealthTelemetry confirms that hits on some status endpoints increment 156 // feature telemetry counters. 157 func TestHealthTelemetry(t *testing.T) { 158 defer leaktest.AfterTest(t)() 159 s, db, _ := serverutils.StartServer(t, base.TestServerArgs{}) 160 defer s.Stopper().Stop(context.Background()) 161 162 rows, err := db.Query("SELECT * FROM crdb_internal.feature_usage WHERE feature_name LIKE 'monitoring%' AND usage_count > 0;") 163 defer func() { 164 if err := rows.Close(); err != nil { 165 t.Fatal(err) 166 } 167 }() 168 if err != nil { 169 t.Fatal(err) 170 } 171 172 initialCounts := make(map[string]int) 173 for rows.Next() { 174 var featureName string 175 var usageCount int 176 177 if err := rows.Scan(&featureName, &usageCount); err != nil { 178 t.Fatal(err) 179 } 180 181 initialCounts[featureName] = usageCount 182 } 183 184 var details serverpb.DetailsResponse 185 if err := serverutils.GetJSONProto(s, "/health", &details); err != nil { 186 t.Fatal(err) 187 } 188 if _, err := getText(s, s.AdminURL()+statusPrefix+"vars"); err != nil { 189 t.Fatal(err) 190 } 191 192 expectedCounts := map[string]int{ 193 "monitoring.prometheus.vars": 1, 194 "monitoring.health.details": 1, 195 } 196 197 rows2, err := db.Query("SELECT feature_name, usage_count FROM crdb_internal.feature_usage WHERE feature_name LIKE 'monitoring%' AND usage_count > 0;") 198 defer func() { 199 if err := rows2.Close(); err != nil { 200 t.Fatal(err) 201 } 202 }() 203 if err != nil { 204 t.Fatal(err) 205 } 206 207 for rows2.Next() { 208 var featureName string 209 var usageCount int 210 211 if err := rows2.Scan(&featureName, &usageCount); err != nil { 212 t.Fatal(err) 213 } 214 215 usageCount -= initialCounts[featureName] 216 if count, ok := expectedCounts[featureName]; ok { 217 if count != usageCount { 218 t.Fatalf("expected %d count for feature %s, got %d", count, featureName, usageCount) 219 } 220 delete(expectedCounts, featureName) 221 } 222 } 223 224 if len(expectedCounts) > 0 { 225 t.Fatalf("%d expected telemetry counters not emitted", len(expectedCounts)) 226 } 227 } 228 229 // TestStatusGossipJson ensures that the output response for the full gossip 230 // info contains the required fields. 231 func TestStatusGossipJson(t *testing.T) { 232 defer leaktest.AfterTest(t)() 233 s, _, _ := serverutils.StartServer(t, base.TestServerArgs{}) 234 defer s.Stopper().Stop(context.Background()) 235 236 var data gossip.InfoStatus 237 if err := getStatusJSONProto(s, "gossip/local", &data); err != nil { 238 t.Fatal(err) 239 } 240 if _, ok := data.Infos["first-range"]; !ok { 241 t.Errorf("no first-range info returned: %v", data) 242 } 243 if _, ok := data.Infos["cluster-id"]; !ok { 244 t.Errorf("no clusterID info returned: %v", data) 245 } 246 if _, ok := data.Infos["node:1"]; !ok { 247 t.Errorf("no node 1 info returned: %v", data) 248 } 249 if _, ok := data.Infos["system-db"]; !ok { 250 t.Errorf("no system config info returned: %v", data) 251 } 252 } 253 254 // TestStatusEngineStatsJson ensures that the output response for the engine 255 // stats contains the required fields. 256 func TestStatusEngineStatsJson(t *testing.T) { 257 defer leaktest.AfterTest(t)() 258 259 dir, cleanupFn := testutils.TempDir(t) 260 defer cleanupFn() 261 262 s, err := serverutils.StartServerRaw(base.TestServerArgs{ 263 StoreSpecs: []base.StoreSpec{{ 264 Path: dir, 265 }}, 266 }) 267 if err != nil { 268 t.Fatal(err) 269 } 270 defer s.Stopper().Stop(context.Background()) 271 272 var engineStats serverpb.EngineStatsResponse 273 if err := getStatusJSONProto(s, "enginestats/local", &engineStats); err != nil { 274 t.Fatal(err) 275 } 276 if len(engineStats.Stats) != 1 { 277 t.Fatal(errors.Errorf("expected one engine stats, got: %v", engineStats)) 278 } 279 280 if engineStats.Stats[0].EngineType == enginepb.EngineTypePebble { 281 // Pebble does not have RocksDB style TickersAnd Histogram. 282 return 283 } 284 285 tickers := engineStats.Stats[0].TickersAndHistograms.Tickers 286 if len(tickers) == 0 { 287 t.Fatal(errors.Errorf("expected non-empty tickers list, got: %v", tickers)) 288 } 289 allTickersZero := true 290 for _, ticker := range tickers { 291 if ticker != 0 { 292 allTickersZero = false 293 } 294 } 295 if allTickersZero { 296 t.Fatal(errors.Errorf("expected some tickers nonzero, got: %v", tickers)) 297 } 298 299 histograms := engineStats.Stats[0].TickersAndHistograms.Histograms 300 if len(histograms) == 0 { 301 t.Fatal(errors.Errorf("expected non-empty histograms list, got: %v", histograms)) 302 } 303 allHistogramsZero := true 304 for _, histogram := range histograms { 305 if histogram.Max == 0 { 306 allHistogramsZero = false 307 } 308 } 309 if allHistogramsZero { 310 t.Fatal(errors.Errorf("expected some histograms nonzero, got: %v", histograms)) 311 } 312 } 313 314 // startServer will start a server with a short scan interval, wait for 315 // the scan to complete, and return the server. The caller is 316 // responsible for stopping the server. 317 func startServer(t *testing.T) *TestServer { 318 tsI, _, kvDB := serverutils.StartServer(t, base.TestServerArgs{ 319 StoreSpecs: []base.StoreSpec{ 320 base.DefaultTestStoreSpec, 321 base.DefaultTestStoreSpec, 322 base.DefaultTestStoreSpec, 323 }, 324 }) 325 326 ts := tsI.(*TestServer) 327 328 // Make sure the range is spun up with an arbitrary read command. We do not 329 // expect a specific response. 330 if _, err := kvDB.Get(context.Background(), "a"); err != nil { 331 t.Fatal(err) 332 } 333 334 // Make sure the node status is available. This is done by forcing stores to 335 // publish their status, synchronizing to the event feed with a canary 336 // event, and then forcing the server to write summaries immediately. 337 if err := ts.node.computePeriodicMetrics(context.Background(), 0); err != nil { 338 t.Fatalf("error publishing store statuses: %s", err) 339 } 340 341 if err := ts.WriteSummaries(); err != nil { 342 t.Fatalf("error writing summaries: %s", err) 343 } 344 345 return ts 346 } 347 348 func newRPCTestContext(ts *TestServer, cfg *base.Config) *rpc.Context { 349 rpcContext := rpc.NewContext( 350 log.AmbientContext{Tracer: ts.ClusterSettings().Tracer}, cfg, ts.Clock(), ts.Stopper(), 351 ts.ClusterSettings()) 352 // Ensure that the RPC client context validates the server cluster ID. 353 // This ensures that a test where the server is restarted will not let 354 // its test RPC client talk to a server started by an unrelated concurrent test. 355 rpcContext.ClusterID.Set(context.Background(), ts.ClusterID()) 356 return rpcContext 357 } 358 359 // TestStatusGetFiles tests the GetFiles endpoint. 360 func TestStatusGetFiles(t *testing.T) { 361 defer leaktest.AfterTest(t)() 362 363 tempDir, cleanupFn := testutils.TempDir(t) 364 defer cleanupFn() 365 366 storeSpec := base.StoreSpec{Path: tempDir} 367 368 tsI, _, _ := serverutils.StartServer(t, base.TestServerArgs{ 369 StoreSpecs: []base.StoreSpec{ 370 storeSpec, 371 }, 372 }) 373 ts := tsI.(*TestServer) 374 defer ts.Stopper().Stop(context.Background()) 375 376 rootConfig := testutils.NewTestBaseContext(security.RootUser) 377 rpcContext := newRPCTestContext(ts, rootConfig) 378 379 url := ts.ServingRPCAddr() 380 nodeID := ts.NodeID() 381 conn, err := rpcContext.GRPCDialNode(url, nodeID, rpc.DefaultClass).Connect(context.Background()) 382 if err != nil { 383 t.Fatal(err) 384 } 385 client := serverpb.NewStatusClient(conn) 386 387 // Test fetching heap files. 388 t.Run("heap", func(t *testing.T) { 389 const testFilesNo = 3 390 for i := 0; i < testFilesNo; i++ { 391 testHeapDir := filepath.Join(storeSpec.Path, "logs", base.HeapProfileDir) 392 testHeapFile := filepath.Join(testHeapDir, fmt.Sprintf("heap%d.pprof", i)) 393 if err := os.MkdirAll(testHeapDir, os.ModePerm); err != nil { 394 t.Fatal(err) 395 } 396 if err := ioutil.WriteFile(testHeapFile, []byte(fmt.Sprintf("I'm heap file %d", i)), 0644); err != nil { 397 t.Fatal(err) 398 } 399 } 400 401 request := serverpb.GetFilesRequest{ 402 NodeId: "local", Type: serverpb.FileType_HEAP, Patterns: []string{"*"}} 403 response, err := client.GetFiles(context.Background(), &request) 404 if err != nil { 405 t.Fatal(err) 406 } 407 408 if a, e := len(response.Files), testFilesNo; a != e { 409 t.Errorf("expected %d files(s), found %d", e, a) 410 } 411 412 for i, file := range response.Files { 413 expectedFileName := fmt.Sprintf("heap%d.pprof", i) 414 if file.Name != expectedFileName { 415 t.Fatalf("expected file name %s, found %s", expectedFileName, file.Name) 416 } 417 expectedFileContents := []byte(fmt.Sprintf("I'm heap file %d", i)) 418 if !bytes.Equal(file.Contents, expectedFileContents) { 419 t.Fatalf("expected file contents %s, found %s", expectedFileContents, file.Contents) 420 } 421 } 422 }) 423 424 // Test fetching goroutine files. 425 t.Run("goroutines", func(t *testing.T) { 426 const testFilesNo = 3 427 for i := 0; i < testFilesNo; i++ { 428 testGoroutineDir := filepath.Join(storeSpec.Path, "logs", base.GoroutineDumpDir) 429 testGoroutineFile := filepath.Join(testGoroutineDir, fmt.Sprintf("goroutine_dump%d.txt.gz", i)) 430 if err := os.MkdirAll(testGoroutineDir, os.ModePerm); err != nil { 431 t.Fatal(err) 432 } 433 if err := ioutil.WriteFile(testGoroutineFile, []byte(fmt.Sprintf("Goroutine dump %d", i)), 0644); err != nil { 434 t.Fatal(err) 435 } 436 } 437 438 request := serverpb.GetFilesRequest{ 439 NodeId: "local", Type: serverpb.FileType_GOROUTINES, Patterns: []string{"*"}} 440 response, err := client.GetFiles(context.Background(), &request) 441 if err != nil { 442 t.Fatal(err) 443 } 444 445 if a, e := len(response.Files), testFilesNo; a != e { 446 t.Errorf("expected %d files(s), found %d", e, a) 447 } 448 449 for i, file := range response.Files { 450 expectedFileName := fmt.Sprintf("goroutine_dump%d.txt.gz", i) 451 if file.Name != expectedFileName { 452 t.Fatalf("expected file name %s, found %s", expectedFileName, file.Name) 453 } 454 expectedFileContents := []byte(fmt.Sprintf("Goroutine dump %d", i)) 455 if !bytes.Equal(file.Contents, expectedFileContents) { 456 t.Fatalf("expected file contents %s, found %s", expectedFileContents, file.Contents) 457 } 458 } 459 }) 460 461 // Testing path separators in pattern. 462 t.Run("path separators", func(t *testing.T) { 463 request := serverpb.GetFilesRequest{NodeId: "local", ListOnly: true, 464 Type: serverpb.FileType_HEAP, Patterns: []string{"pattern/with/separators"}} 465 _, err = client.GetFiles(context.Background(), &request) 466 if !testutils.IsError(err, "invalid pattern: cannot have path seperators") { 467 t.Errorf("GetFiles: path separators allowed in pattern") 468 } 469 }) 470 471 // Testing invalid filetypes. 472 t.Run("filetypes", func(t *testing.T) { 473 request := serverpb.GetFilesRequest{NodeId: "local", ListOnly: true, 474 Type: -1, Patterns: []string{"*"}} 475 _, err = client.GetFiles(context.Background(), &request) 476 if !testutils.IsError(err, "unknown file type: -1") { 477 t.Errorf("GetFiles: invalid file type allowed") 478 } 479 }) 480 } 481 482 // TestStatusLocalLogs checks to ensure that local/logfiles, 483 // local/logfiles/{filename} and local/log function 484 // correctly. 485 func TestStatusLocalLogs(t *testing.T) { 486 defer leaktest.AfterTest(t)() 487 if log.V(3) { 488 t.Skip("Test only works with low verbosity levels") 489 } 490 491 s := log.ScopeWithoutShowLogs(t) 492 defer s.Close(t) 493 494 ts := startServer(t) 495 defer ts.Stopper().Stop(context.Background()) 496 497 // Log an error of each main type which we expect to be able to retrieve. 498 // The resolution of our log timestamps is such that it's possible to get 499 // two subsequent log messages with the same timestamp. This test will fail 500 // when that occurs. By adding a small sleep in here after each timestamp to 501 // ensures this isn't the case and that the log filtering doesn't filter out 502 // the log entires we're looking for. The value of 20 μs was chosen because 503 // the log timestamps have a fidelity of 10 μs and thus doubling that should 504 // be a sufficient buffer. 505 // See util/log/clog.go formatHeader() for more details. 506 const sleepBuffer = time.Microsecond * 20 507 timestamp := timeutil.Now().UnixNano() 508 time.Sleep(sleepBuffer) 509 log.Errorf(context.Background(), "TestStatusLocalLogFile test message-Error") 510 time.Sleep(sleepBuffer) 511 timestampE := timeutil.Now().UnixNano() 512 time.Sleep(sleepBuffer) 513 log.Warningf(context.Background(), "TestStatusLocalLogFile test message-Warning") 514 time.Sleep(sleepBuffer) 515 timestampEW := timeutil.Now().UnixNano() 516 time.Sleep(sleepBuffer) 517 log.Infof(context.Background(), "TestStatusLocalLogFile test message-Info") 518 time.Sleep(sleepBuffer) 519 timestampEWI := timeutil.Now().UnixNano() 520 521 var wrapper serverpb.LogFilesListResponse 522 if err := getStatusJSONProto(ts, "logfiles/local", &wrapper); err != nil { 523 t.Fatal(err) 524 } 525 if a, e := len(wrapper.Files), 1; a != e { 526 t.Fatalf("expected %d log files; got %d", e, a) 527 } 528 529 // Check each individual log can be fetched and is non-empty. 530 var foundInfo, foundWarning, foundError bool 531 for _, file := range wrapper.Files { 532 var wrapper serverpb.LogEntriesResponse 533 if err := getStatusJSONProto(ts, "logfiles/local/"+file.Name, &wrapper); err != nil { 534 t.Fatal(err) 535 } 536 for _, entry := range wrapper.Entries { 537 switch entry.Message { 538 case "TestStatusLocalLogFile test message-Error": 539 foundError = true 540 case "TestStatusLocalLogFile test message-Warning": 541 foundWarning = true 542 case "TestStatusLocalLogFile test message-Info": 543 foundInfo = true 544 } 545 } 546 } 547 548 if !(foundInfo && foundWarning && foundError) { 549 t.Errorf("expected to find test messages in %v", wrapper.Files) 550 } 551 552 type levelPresence struct { 553 Error, Warning, Info bool 554 } 555 556 testCases := []struct { 557 MaxEntities int 558 StartTimestamp int64 559 EndTimestamp int64 560 Pattern string 561 levelPresence 562 }{ 563 // Test filtering by log severity. 564 // // Test entry limit. Ignore Info/Warning/Error filters. 565 {1, timestamp, timestampEWI, "", levelPresence{false, false, false}}, 566 {2, timestamp, timestampEWI, "", levelPresence{false, false, false}}, 567 {3, timestamp, timestampEWI, "", levelPresence{false, false, false}}, 568 // Test filtering in different timestamp windows. 569 {0, timestamp, timestamp, "", levelPresence{false, false, false}}, 570 {0, timestamp, timestampE, "", levelPresence{true, false, false}}, 571 {0, timestampE, timestampEW, "", levelPresence{false, true, false}}, 572 {0, timestampEW, timestampEWI, "", levelPresence{false, false, true}}, 573 {0, timestamp, timestampEW, "", levelPresence{true, true, false}}, 574 {0, timestampE, timestampEWI, "", levelPresence{false, true, true}}, 575 {0, timestamp, timestampEWI, "", levelPresence{true, true, true}}, 576 // Test filtering by regexp pattern. 577 {0, 0, 0, "Info", levelPresence{false, false, true}}, 578 {0, 0, 0, "Warning", levelPresence{false, true, false}}, 579 {0, 0, 0, "Error", levelPresence{true, false, false}}, 580 {0, 0, 0, "Info|Error|Warning", levelPresence{true, true, true}}, 581 {0, 0, 0, "Nothing", levelPresence{false, false, false}}, 582 } 583 584 for i, testCase := range testCases { 585 var url bytes.Buffer 586 fmt.Fprintf(&url, "logs/local?level=") 587 if testCase.MaxEntities > 0 { 588 fmt.Fprintf(&url, "&max=%d", testCase.MaxEntities) 589 } 590 if testCase.StartTimestamp > 0 { 591 fmt.Fprintf(&url, "&start_time=%d", testCase.StartTimestamp) 592 } 593 if testCase.StartTimestamp > 0 { 594 fmt.Fprintf(&url, "&end_time=%d", testCase.EndTimestamp) 595 } 596 if len(testCase.Pattern) > 0 { 597 fmt.Fprintf(&url, "&pattern=%s", testCase.Pattern) 598 } 599 600 var wrapper serverpb.LogEntriesResponse 601 path := url.String() 602 if err := getStatusJSONProto(ts, path, &wrapper); err != nil { 603 t.Fatal(err) 604 } 605 606 if testCase.MaxEntities > 0 { 607 if a, e := len(wrapper.Entries), testCase.MaxEntities; a != e { 608 t.Errorf("%d expected %d entries, got %d: \n%+v", i, e, a, wrapper.Entries) 609 } 610 } else { 611 var actual levelPresence 612 var logsBuf bytes.Buffer 613 for _, entry := range wrapper.Entries { 614 fmt.Fprintln(&logsBuf, entry.Message) 615 616 switch entry.Message { 617 case "TestStatusLocalLogFile test message-Error": 618 actual.Error = true 619 case "TestStatusLocalLogFile test message-Warning": 620 actual.Warning = true 621 case "TestStatusLocalLogFile test message-Info": 622 actual.Info = true 623 } 624 } 625 626 if testCase.levelPresence != actual { 627 t.Errorf("%d: expected %+v at %s, got:\n%s", i, testCase, path, logsBuf.String()) 628 } 629 } 630 } 631 } 632 633 // TestNodeStatusResponse verifies that node status returns the expected 634 // results. 635 func TestNodeStatusResponse(t *testing.T) { 636 defer leaktest.AfterTest(t)() 637 s := startServer(t) 638 defer s.Stopper().Stop(context.Background()) 639 640 // First fetch all the node statuses. 641 wrapper := serverpb.NodesResponse{} 642 if err := getStatusJSONProto(s, "nodes", &wrapper); err != nil { 643 t.Fatal(err) 644 } 645 nodeStatuses := wrapper.Nodes 646 647 if len(nodeStatuses) != 1 { 648 t.Errorf("too many node statuses returned - expected:1 actual:%d", len(nodeStatuses)) 649 } 650 if !proto.Equal(&s.node.Descriptor, &nodeStatuses[0].Desc) { 651 t.Errorf("node status descriptors are not equal\nexpected:%+v\nactual:%+v\n", s.node.Descriptor, nodeStatuses[0].Desc) 652 } 653 654 // Now fetch each one individually. Loop through the nodeStatuses to use the 655 // ids only. 656 for _, oldNodeStatus := range nodeStatuses { 657 nodeStatus := statuspb.NodeStatus{} 658 if err := getStatusJSONProto(s, "nodes/"+oldNodeStatus.Desc.NodeID.String(), &nodeStatus); err != nil { 659 t.Fatal(err) 660 } 661 if !proto.Equal(&s.node.Descriptor, &nodeStatus.Desc) { 662 t.Errorf("node status descriptors are not equal\nexpected:%+v\nactual:%+v\n", s.node.Descriptor, nodeStatus.Desc) 663 } 664 } 665 } 666 667 // TestMetricsRecording verifies that Node statistics are periodically recorded 668 // as time series data. 669 func TestMetricsRecording(t *testing.T) { 670 defer leaktest.AfterTest(t)() 671 672 ctx := context.Background() 673 674 s, _, kvDB := serverutils.StartServer(t, base.TestServerArgs{}) 675 defer s.Stopper().Stop(ctx) 676 677 // Verify that metrics for the current timestamp are recorded. This should 678 // be true very quickly even though DefaultMetricsSampleInterval is large, 679 // because the server writes an entry eagerly on startup. 680 testutils.SucceedsSoon(t, func() error { 681 now := s.Clock().PhysicalNow() 682 683 var data roachpb.InternalTimeSeriesData 684 for _, keyName := range []string{ 685 "cr.store.livebytes.1", 686 "cr.node.sys.go.allocbytes.1", 687 } { 688 key := ts.MakeDataKey(keyName, "", ts.Resolution10s, now) 689 if err := kvDB.GetProto(ctx, key, &data); err != nil { 690 return err 691 } 692 } 693 return nil 694 }) 695 } 696 697 // TestMetricsEndpoint retrieves the metrics endpoint, which is currently only 698 // used for development purposes. The metrics within the response are verified 699 // in other tests. 700 func TestMetricsEndpoint(t *testing.T) { 701 defer leaktest.AfterTest(t)() 702 s := startServer(t) 703 defer s.Stopper().Stop(context.Background()) 704 705 if _, err := getText(s, s.AdminURL()+statusPrefix+"metrics/"+s.Gossip().NodeID.String()); err != nil { 706 t.Fatal(err) 707 } 708 } 709 710 // TestMetricsMetadata ensures that the server's recorder return metrics and 711 // that each metric has a Name, Help, Unit, and DisplayUnit defined. 712 func TestMetricsMetadata(t *testing.T) { 713 defer leaktest.AfterTest(t)() 714 s := startServer(t) 715 defer s.Stopper().Stop(context.Background()) 716 717 metricsMetadata := s.recorder.GetMetricsMetadata() 718 719 if len(metricsMetadata) < 200 { 720 t.Fatal("s.recorder.GetMetricsMetadata() failed sanity check; didn't return enough metrics.") 721 } 722 723 for _, v := range metricsMetadata { 724 if v.Name == "" { 725 t.Fatal("metric missing name.") 726 } 727 if v.Help == "" { 728 t.Fatalf("%s missing Help.", v.Name) 729 } 730 if v.Measurement == "" { 731 t.Fatalf("%s missing Measurement.", v.Name) 732 } 733 if v.Unit == 0 { 734 t.Fatalf("%s missing Unit.", v.Name) 735 } 736 } 737 } 738 739 // TestChartCatalog ensures that the server successfully generates the chart catalog. 740 func TestChartCatalogGen(t *testing.T) { 741 defer leaktest.AfterTest(t)() 742 s := startServer(t) 743 defer s.Stopper().Stop(context.Background()) 744 745 metricsMetadata := s.recorder.GetMetricsMetadata() 746 747 chartCatalog, err := catalog.GenerateCatalog(metricsMetadata) 748 749 if err != nil { 750 t.Fatal(err) 751 } 752 753 // Ensure each of the 7 constant sections of the chart catalog exist. 754 if len(chartCatalog) != 7 { 755 t.Fatal("Chart catalog failed to generate.") 756 } 757 758 for _, section := range chartCatalog { 759 // Ensure that one of the chartSections has defined Subsections. 760 if len(section.Subsections) == 0 { 761 t.Fatalf(`Chart catalog has missing subsections in %v`, section) 762 } 763 } 764 } 765 766 // findUndefinedMetrics finds metrics listed in pkg/ts/catalog/chart_catalog.go 767 // that are not defined. This is most likely caused by a metric being removed. 768 func findUndefinedMetrics(c *catalog.ChartSection, metadata map[string]metric.Metadata) []string { 769 var undefinedMetrics []string 770 for _, ic := range c.Charts { 771 for _, metric := range ic.Metrics { 772 _, ok := metadata[metric.Name] 773 if !ok { 774 undefinedMetrics = append(undefinedMetrics, metric.Name) 775 } 776 } 777 } 778 779 for _, x := range c.Subsections { 780 undefinedMetrics = append(undefinedMetrics, findUndefinedMetrics(x, metadata)...) 781 } 782 783 return undefinedMetrics 784 } 785 786 // deleteSeenMetrics removes all metrics in a section from the metricMetadata map. 787 func deleteSeenMetrics(c *catalog.ChartSection, metadata map[string]metric.Metadata, t *testing.T) { 788 // if c.Title == "SQL" { 789 // t.Log(c) 790 // } 791 for _, x := range c.Charts { 792 if x.Title == "Connections" || x.Title == "Byte I/O" { 793 t.Log(x) 794 } 795 796 for _, metric := range x.Metrics { 797 if metric.Name == "sql.new_conns" || metric.Name == "sql.bytesin" { 798 t.Logf("found %v\n", metric.Name) 799 } 800 _, ok := metadata[metric.Name] 801 if ok { 802 delete(metadata, metric.Name) 803 } 804 } 805 } 806 807 for _, x := range c.Subsections { 808 deleteSeenMetrics(x, metadata, t) 809 } 810 } 811 812 // TestChartCatalogMetric ensures that all metrics are included in at least one 813 // chart, and that every metric included in a chart is still part of the metrics 814 // registry. 815 func TestChartCatalogMetrics(t *testing.T) { 816 defer leaktest.AfterTest(t)() 817 s := startServer(t) 818 defer s.Stopper().Stop(context.Background()) 819 820 metricsMetadata := s.recorder.GetMetricsMetadata() 821 822 chartCatalog, err := catalog.GenerateCatalog(metricsMetadata) 823 824 if err != nil { 825 t.Fatal(err) 826 } 827 828 // Each metric referenced in the chartCatalog must have a definition in metricsMetadata 829 var undefinedMetrics []string 830 for _, cs := range chartCatalog { 831 undefinedMetrics = append(undefinedMetrics, findUndefinedMetrics(&cs, metricsMetadata)...) 832 } 833 834 if len(undefinedMetrics) > 0 { 835 t.Fatalf(`The following metrics need are no longer present and need to be removed 836 from the chart catalog (pkg/ts/catalog/chart_catalog.go):%v`, undefinedMetrics) 837 } 838 839 // Each metric in metricsMetadata should have at least one entry in 840 // chartCatalog, which we track by deleting the metric from metricsMetadata. 841 for _, v := range chartCatalog { 842 deleteSeenMetrics(&v, metricsMetadata, t) 843 } 844 845 if len(metricsMetadata) > 0 { 846 var metricNames []string 847 for metricName := range metricsMetadata { 848 metricNames = append(metricNames, metricName) 849 } 850 sort.Strings(metricNames) 851 t.Fatalf(`The following metrics need to be added to the chart catalog 852 (pkg/ts/catalog/chart_catalog.go): %v`, metricNames) 853 } 854 } 855 856 func TestHotRangesResponse(t *testing.T) { 857 defer leaktest.AfterTest(t)() 858 ts := startServer(t) 859 defer ts.Stopper().Stop(context.Background()) 860 861 var hotRangesResp serverpb.HotRangesResponse 862 if err := getStatusJSONProto(ts, "hotranges", &hotRangesResp); err != nil { 863 t.Fatal(err) 864 } 865 if len(hotRangesResp.HotRangesByNodeID) == 0 { 866 t.Fatalf("didn't get hot range responses from any nodes") 867 } 868 869 for nodeID, nodeResp := range hotRangesResp.HotRangesByNodeID { 870 if len(nodeResp.Stores) == 0 { 871 t.Errorf("didn't get any stores in hot range response from n%d: %v", 872 nodeID, nodeResp.ErrorMessage) 873 } 874 for _, storeResp := range nodeResp.Stores { 875 // Only the first store will actually have any ranges on it. 876 if storeResp.StoreID != roachpb.StoreID(1) { 877 continue 878 } 879 lastQPS := math.MaxFloat64 880 if len(storeResp.HotRanges) == 0 { 881 t.Errorf("didn't get any hot ranges in response from n%d,s%d: %v", 882 nodeID, storeResp.StoreID, nodeResp.ErrorMessage) 883 } 884 for _, r := range storeResp.HotRanges { 885 if r.Desc.RangeID == 0 || (len(r.Desc.StartKey) == 0 && len(r.Desc.EndKey) == 0) { 886 t.Errorf("unexpected empty/unpopulated range descriptor: %+v", r.Desc) 887 } 888 if r.QueriesPerSecond > lastQPS { 889 t.Errorf("unexpected increase in qps between ranges; prev=%.2f, current=%.2f, desc=%v", 890 lastQPS, r.QueriesPerSecond, r.Desc) 891 } 892 lastQPS = r.QueriesPerSecond 893 } 894 } 895 896 } 897 } 898 899 func TestRangesResponse(t *testing.T) { 900 defer leaktest.AfterTest(t)() 901 defer kvserver.EnableLeaseHistory(100)() 902 ts := startServer(t) 903 defer ts.Stopper().Stop(context.Background()) 904 905 // Perform a scan to ensure that all the raft groups are initialized. 906 if _, err := ts.db.Scan(context.Background(), keys.LocalMax, roachpb.KeyMax, 0); err != nil { 907 t.Fatal(err) 908 } 909 910 var response serverpb.RangesResponse 911 if err := getStatusJSONProto(ts, "ranges/local", &response); err != nil { 912 t.Fatal(err) 913 } 914 if len(response.Ranges) == 0 { 915 t.Errorf("didn't get any ranges") 916 } 917 for _, ri := range response.Ranges { 918 // Do some simple validation based on the fact that this is a 919 // single-node cluster. 920 if ri.RaftState.State != "StateLeader" && ri.RaftState.State != raftStateDormant { 921 t.Errorf("expected to be Raft leader or dormant, but was '%s'", ri.RaftState.State) 922 } 923 expReplica := roachpb.ReplicaDescriptor{ 924 NodeID: 1, 925 StoreID: 1, 926 ReplicaID: 1, 927 } 928 if len(ri.State.Desc.InternalReplicas) != 1 || ri.State.Desc.InternalReplicas[0] != expReplica { 929 t.Errorf("unexpected replica list %+v", ri.State.Desc.InternalReplicas) 930 } 931 if ri.State.Lease == nil || *ri.State.Lease == (roachpb.Lease{}) { 932 t.Error("expected a nontrivial Lease") 933 } 934 if ri.State.LastIndex == 0 { 935 t.Error("expected positive LastIndex") 936 } 937 if len(ri.LeaseHistory) == 0 { 938 t.Error("expected at least one lease history entry") 939 } 940 } 941 } 942 943 func TestRaftDebug(t *testing.T) { 944 defer leaktest.AfterTest(t)() 945 s := startServer(t) 946 defer s.Stopper().Stop(context.Background()) 947 948 var resp serverpb.RaftDebugResponse 949 if err := getStatusJSONProto(s, "raft", &resp); err != nil { 950 t.Fatal(err) 951 } 952 if len(resp.Ranges) == 0 { 953 t.Errorf("didn't get any ranges") 954 } 955 956 if len(resp.Ranges) < 3 { 957 t.Errorf("expected more than 2 ranges, got %d", len(resp.Ranges)) 958 } 959 960 reqURI := "raft" 961 requestedIDs := []roachpb.RangeID{} 962 for id := range resp.Ranges { 963 if len(requestedIDs) == 0 { 964 reqURI += "?" 965 } else { 966 reqURI += "&" 967 } 968 reqURI += fmt.Sprintf("range_ids=%d", id) 969 requestedIDs = append(requestedIDs, id) 970 if len(requestedIDs) >= 2 { 971 break 972 } 973 } 974 975 if err := getStatusJSONProto(s, reqURI, &resp); err != nil { 976 t.Fatal(err) 977 } 978 979 // Make sure we get exactly two ranges back. 980 if len(resp.Ranges) != 2 { 981 t.Errorf("expected exactly two ranges in response, got %d", len(resp.Ranges)) 982 } 983 984 // Make sure the ranges returned are those requested. 985 for _, reqID := range requestedIDs { 986 if _, ok := resp.Ranges[reqID]; !ok { 987 t.Errorf("request URI was %s, but range ID %d not returned: %+v", reqURI, reqID, resp.Ranges) 988 } 989 } 990 } 991 992 // TestStatusVars verifies that prometheus metrics are available via the 993 // /_status/vars endpoint. 994 func TestStatusVars(t *testing.T) { 995 defer leaktest.AfterTest(t)() 996 s, _, _ := serverutils.StartServer(t, base.TestServerArgs{}) 997 defer s.Stopper().Stop(context.Background()) 998 999 if body, err := getText(s, s.AdminURL()+statusPrefix+"vars"); err != nil { 1000 t.Fatal(err) 1001 } else if !bytes.Contains(body, []byte("# TYPE sql_bytesout counter\nsql_bytesout")) { 1002 t.Errorf("expected sql_bytesout, got: %s", body) 1003 } 1004 } 1005 1006 func TestSpanStatsResponse(t *testing.T) { 1007 defer leaktest.AfterTest(t)() 1008 ts := startServer(t) 1009 defer ts.Stopper().Stop(context.Background()) 1010 1011 httpClient, err := ts.GetAdminAuthenticatedHTTPClient() 1012 if err != nil { 1013 t.Fatal(err) 1014 } 1015 1016 var response serverpb.SpanStatsResponse 1017 request := serverpb.SpanStatsRequest{ 1018 NodeID: "1", 1019 StartKey: []byte(roachpb.RKeyMin), 1020 EndKey: []byte(roachpb.RKeyMax), 1021 } 1022 1023 url := ts.AdminURL() + statusPrefix + "span" 1024 if err := httputil.PostJSON(httpClient, url, &request, &response); err != nil { 1025 t.Fatal(err) 1026 } 1027 initialRanges, err := ts.ExpectedInitialRangeCount() 1028 if err != nil { 1029 t.Fatal(err) 1030 } 1031 if a, e := int(response.RangeCount), initialRanges; a != e { 1032 t.Errorf("expected %d ranges, found %d", e, a) 1033 } 1034 } 1035 1036 func TestSpanStatsGRPCResponse(t *testing.T) { 1037 defer leaktest.AfterTest(t)() 1038 ctx := context.Background() 1039 ts := startServer(t) 1040 defer ts.Stopper().Stop(ctx) 1041 1042 rpcStopper := stop.NewStopper() 1043 defer rpcStopper.Stop(ctx) 1044 rpcContext := newRPCTestContext(ts, ts.RPCContext().Config) 1045 request := serverpb.SpanStatsRequest{ 1046 NodeID: "1", 1047 StartKey: []byte(roachpb.RKeyMin), 1048 EndKey: []byte(roachpb.RKeyMax), 1049 } 1050 1051 url := ts.ServingRPCAddr() 1052 nodeID := ts.NodeID() 1053 conn, err := rpcContext.GRPCDialNode(url, nodeID, rpc.DefaultClass).Connect(ctx) 1054 if err != nil { 1055 t.Fatal(err) 1056 } 1057 client := serverpb.NewStatusClient(conn) 1058 1059 response, err := client.SpanStats(ctx, &request) 1060 if err != nil { 1061 t.Fatal(err) 1062 } 1063 initialRanges, err := ts.ExpectedInitialRangeCount() 1064 if err != nil { 1065 t.Fatal(err) 1066 } 1067 if a, e := int(response.RangeCount), initialRanges; a != e { 1068 t.Fatalf("expected %d ranges, found %d", e, a) 1069 } 1070 } 1071 1072 func TestNodesGRPCResponse(t *testing.T) { 1073 defer leaktest.AfterTest(t)() 1074 ts := startServer(t) 1075 defer ts.Stopper().Stop(context.Background()) 1076 1077 rootConfig := testutils.NewTestBaseContext(security.RootUser) 1078 rpcContext := newRPCTestContext(ts, rootConfig) 1079 var request serverpb.NodesRequest 1080 1081 url := ts.ServingRPCAddr() 1082 nodeID := ts.NodeID() 1083 conn, err := rpcContext.GRPCDialNode(url, nodeID, rpc.DefaultClass).Connect(context.Background()) 1084 if err != nil { 1085 t.Fatal(err) 1086 } 1087 client := serverpb.NewStatusClient(conn) 1088 1089 response, err := client.Nodes(context.Background(), &request) 1090 if err != nil { 1091 t.Fatal(err) 1092 } 1093 1094 if a, e := len(response.Nodes), 1; a != e { 1095 t.Errorf("expected %d node(s), found %d", e, a) 1096 } 1097 } 1098 1099 func TestCertificatesResponse(t *testing.T) { 1100 defer leaktest.AfterTest(t)() 1101 ts := startServer(t) 1102 defer ts.Stopper().Stop(context.Background()) 1103 1104 var response serverpb.CertificatesResponse 1105 if err := getStatusJSONProto(ts, "certificates/local", &response); err != nil { 1106 t.Fatal(err) 1107 } 1108 1109 // We expect 4 certificates: CA, node, and client certs for root, testuser. 1110 if a, e := len(response.Certificates), 4; a != e { 1111 t.Errorf("expected %d certificates, found %d", e, a) 1112 } 1113 1114 // Read the certificates from the embedded assets. 1115 caPath := filepath.Join(security.EmbeddedCertsDir, security.EmbeddedCACert) 1116 nodePath := filepath.Join(security.EmbeddedCertsDir, security.EmbeddedNodeCert) 1117 1118 caFile, err := securitytest.EmbeddedAssets.ReadFile(caPath) 1119 if err != nil { 1120 t.Fatal(err) 1121 } 1122 1123 nodeFile, err := securitytest.EmbeddedAssets.ReadFile(nodePath) 1124 if err != nil { 1125 t.Fatal(err) 1126 } 1127 1128 // The response is ordered: CA cert followed by node cert. 1129 cert := response.Certificates[0] 1130 if a, e := cert.Type, serverpb.CertificateDetails_CA; a != e { 1131 t.Errorf("wrong type %s, expected %s", a, e) 1132 } else if cert.ErrorMessage != "" { 1133 t.Errorf("expected cert without error, got %v", cert.ErrorMessage) 1134 } else if a, e := cert.Data, caFile; !bytes.Equal(a, e) { 1135 t.Errorf("mismatched contents: %s vs %s", a, e) 1136 } 1137 1138 cert = response.Certificates[1] 1139 if a, e := cert.Type, serverpb.CertificateDetails_NODE; a != e { 1140 t.Errorf("wrong type %s, expected %s", a, e) 1141 } else if cert.ErrorMessage != "" { 1142 t.Errorf("expected cert without error, got %v", cert.ErrorMessage) 1143 } else if a, e := cert.Data, nodeFile; !bytes.Equal(a, e) { 1144 t.Errorf("mismatched contents: %s vs %s", a, e) 1145 } 1146 } 1147 1148 func TestDiagnosticsResponse(t *testing.T) { 1149 defer leaktest.AfterTest(t)() 1150 1151 s, _, _ := serverutils.StartServer(t, base.TestServerArgs{}) 1152 defer s.Stopper().Stop(context.Background()) 1153 1154 var resp diagnosticspb.DiagnosticReport 1155 if err := getStatusJSONProto(s, "diagnostics/local", &resp); err != nil { 1156 t.Fatal(err) 1157 } 1158 1159 // The endpoint just serializes result of getReportingInfo() which is already 1160 // tested elsewhere, so simply verify that we have a non-empty reply. 1161 if expected, actual := s.NodeID(), resp.Node.NodeID; expected != actual { 1162 t.Fatalf("expected %v got %v", expected, actual) 1163 } 1164 } 1165 1166 func TestRangeResponse(t *testing.T) { 1167 defer leaktest.AfterTest(t)() 1168 defer kvserver.EnableLeaseHistory(100)() 1169 ts := startServer(t) 1170 defer ts.Stopper().Stop(context.Background()) 1171 1172 // Perform a scan to ensure that all the raft groups are initialized. 1173 if _, err := ts.db.Scan(context.Background(), keys.LocalMax, roachpb.KeyMax, 0); err != nil { 1174 t.Fatal(err) 1175 } 1176 1177 var response serverpb.RangeResponse 1178 if err := getStatusJSONProto(ts, "range/1", &response); err != nil { 1179 t.Fatal(err) 1180 } 1181 1182 // This is a single node cluster, so only expect a single response. 1183 if e, a := 1, len(response.ResponsesByNodeID); e != a { 1184 t.Errorf("got the wrong number of responses, expected %d, actual %d", e, a) 1185 } 1186 1187 node1Response := response.ResponsesByNodeID[response.NodeID] 1188 1189 // The response should come back as valid. 1190 if !node1Response.Response { 1191 t.Errorf("node1's response returned as false, expected true") 1192 } 1193 1194 // The response should include just the one range. 1195 if e, a := 1, len(node1Response.Infos); e != a { 1196 t.Errorf("got the wrong number of ranges in the response, expected %d, actual %d", e, a) 1197 } 1198 1199 info := node1Response.Infos[0] 1200 expReplica := roachpb.ReplicaDescriptor{ 1201 NodeID: 1, 1202 StoreID: 1, 1203 ReplicaID: 1, 1204 } 1205 1206 // Check some other values. 1207 if len(info.State.Desc.InternalReplicas) != 1 || info.State.Desc.InternalReplicas[0] != expReplica { 1208 t.Errorf("unexpected replica list %+v", info.State.Desc.InternalReplicas) 1209 } 1210 1211 if info.State.Lease == nil || *info.State.Lease == (roachpb.Lease{}) { 1212 t.Error("expected a nontrivial Lease") 1213 } 1214 1215 if info.State.LastIndex == 0 { 1216 t.Error("expected positive LastIndex") 1217 } 1218 1219 if len(info.LeaseHistory) == 0 { 1220 t.Error("expected at least one lease history entry") 1221 } 1222 } 1223 1224 func TestRemoteDebugModeSetting(t *testing.T) { 1225 defer leaktest.AfterTest(t)() 1226 ctx := context.Background() 1227 s, db, _ := serverutils.StartServer(t, base.TestServerArgs{ 1228 StoreSpecs: []base.StoreSpec{ 1229 base.DefaultTestStoreSpec, 1230 base.DefaultTestStoreSpec, 1231 base.DefaultTestStoreSpec, 1232 }, 1233 }) 1234 ts := s.(*TestServer) 1235 defer ts.Stopper().Stop(context.Background()) 1236 1237 if _, err := db.Exec(`SET CLUSTER SETTING server.remote_debugging.mode = 'off'`); err != nil { 1238 t.Fatal(err) 1239 } 1240 1241 // Create a split so that there's some records in the system.rangelog table. 1242 // The test needs them. 1243 if _, err := db.Exec( 1244 `create table t(x int primary key); 1245 alter table t split at values(1);`, 1246 ); err != nil { 1247 t.Fatal(err) 1248 } 1249 1250 // Verify that the remote debugging mode is respected for HTTP requests. 1251 // This needs to be wrapped in SucceedsSoon because settings changes have to 1252 // propagate through gossip and thus don't always take effect immediately. 1253 testutils.SucceedsSoon(t, func() error { 1254 for _, tc := range []struct { 1255 path string 1256 response protoutil.Message 1257 }{ 1258 {"gossip/local", &gossip.InfoStatus{}}, 1259 {"allocator/node/local", &serverpb.AllocatorResponse{}}, 1260 {"allocator/range/1", &serverpb.AllocatorResponse{}}, 1261 {"logs/local", &serverpb.LogEntriesResponse{}}, 1262 {"logfiles/local/cockroach.log", &serverpb.LogEntriesResponse{}}, 1263 {"local_sessions", &serverpb.ListSessionsResponse{}}, 1264 {"sessions", &serverpb.ListSessionsResponse{}}, 1265 } { 1266 err := getStatusJSONProto(ts, tc.path, tc.response) 1267 if !testutils.IsError(err, "403 Forbidden") { 1268 return fmt.Errorf("expected '403 Forbidden' error, but %q returned %+v: %v", 1269 tc.path, tc.response, err) 1270 } 1271 } 1272 return nil 1273 }) 1274 1275 // But not for grpc requests. The fact that the above gets an error but these 1276 // don't indicate that the grpc gateway is correctly adding the necessary 1277 // metadata for differentiating between the two (and that we're correctly 1278 // interpreting said metadata). 1279 rootConfig := testutils.NewTestBaseContext(security.RootUser) 1280 rpcContext := newRPCTestContext(ts, rootConfig) 1281 url := ts.ServingRPCAddr() 1282 nodeID := ts.NodeID() 1283 conn, err := rpcContext.GRPCDialNode(url, nodeID, rpc.DefaultClass).Connect(context.Background()) 1284 if err != nil { 1285 t.Fatal(err) 1286 } 1287 client := serverpb.NewStatusClient(conn) 1288 if _, err := client.Gossip(ctx, &serverpb.GossipRequest{}); err != nil { 1289 t.Error(err) 1290 } 1291 if _, err := client.Allocator(ctx, &serverpb.AllocatorRequest{}); err != nil { 1292 t.Error(err) 1293 } 1294 if _, err := client.Allocator(ctx, &serverpb.AllocatorRequest{}); err != nil { 1295 t.Error(err) 1296 } 1297 if _, err := client.AllocatorRange(ctx, &serverpb.AllocatorRangeRequest{}); err != nil { 1298 t.Error(err) 1299 } 1300 if _, err := client.Logs(ctx, &serverpb.LogsRequest{}); err != nil { 1301 t.Error(err) 1302 } 1303 if _, err := client.ListLocalSessions(ctx, &serverpb.ListSessionsRequest{}); err != nil { 1304 t.Error(err) 1305 } 1306 if _, err := client.ListSessions(ctx, &serverpb.ListSessionsRequest{}); err != nil { 1307 t.Error(err) 1308 } 1309 1310 // Check that keys are properly omitted from the Ranges, HotRanges, and 1311 // RangeLog endpoints. 1312 var rangesResp serverpb.RangesResponse 1313 if err := getStatusJSONProto(ts, "ranges/local", &rangesResp); err != nil { 1314 t.Fatal(err) 1315 } 1316 if len(rangesResp.Ranges) == 0 { 1317 t.Errorf("didn't get any ranges") 1318 } 1319 for _, ri := range rangesResp.Ranges { 1320 if ri.Span.StartKey != omittedKeyStr || ri.Span.EndKey != omittedKeyStr || 1321 ri.State.ReplicaState.Desc.StartKey != nil || ri.State.ReplicaState.Desc.EndKey != nil { 1322 t.Errorf("unexpected key value found in RangeInfo: %+v", ri) 1323 } 1324 } 1325 1326 var hotRangesResp serverpb.HotRangesResponse 1327 if err := getStatusJSONProto(ts, "hotranges", &hotRangesResp); err != nil { 1328 t.Fatal(err) 1329 } 1330 if len(hotRangesResp.HotRangesByNodeID) == 0 { 1331 t.Errorf("didn't get hot range responses from any nodes") 1332 } 1333 for nodeID, nodeResp := range hotRangesResp.HotRangesByNodeID { 1334 if len(nodeResp.Stores) == 0 { 1335 t.Errorf("didn't get any stores in hot range response from n%d: %v", 1336 nodeID, nodeResp.ErrorMessage) 1337 } 1338 for _, storeResp := range nodeResp.Stores { 1339 // Only the first store will actually have any ranges on it. 1340 if storeResp.StoreID != roachpb.StoreID(1) { 1341 continue 1342 } 1343 if len(storeResp.HotRanges) == 0 { 1344 t.Errorf("didn't get any hot ranges in response from n%d,s%d: %v", 1345 nodeID, storeResp.StoreID, nodeResp.ErrorMessage) 1346 } 1347 for _, r := range storeResp.HotRanges { 1348 if r.Desc.StartKey != nil || r.Desc.EndKey != nil { 1349 t.Errorf("unexpected key value found in hot ranges range descriptor: %+v", r.Desc) 1350 } 1351 } 1352 } 1353 } 1354 1355 var rangelogResp serverpb.RangeLogResponse 1356 if err := getAdminJSONProto(ts, "rangelog", &rangelogResp); err != nil { 1357 t.Fatal(err) 1358 } 1359 if len(rangelogResp.Events) == 0 { 1360 t.Errorf("didn't get any Events") 1361 } 1362 for _, event := range rangelogResp.Events { 1363 if event.Event.Info.NewDesc != nil { 1364 if event.Event.Info.NewDesc.StartKey != nil || event.Event.Info.NewDesc.EndKey != nil || 1365 event.Event.Info.UpdatedDesc.StartKey != nil || event.Event.Info.UpdatedDesc.EndKey != nil { 1366 t.Errorf("unexpected key value found in rangelog event: %+v", event) 1367 } 1368 } 1369 if strings.Contains(event.PrettyInfo.NewDesc, "Min-System") || 1370 strings.Contains(event.PrettyInfo.UpdatedDesc, "Min-System") { 1371 t.Errorf("unexpected key value found in rangelog event info: %+v", event.PrettyInfo) 1372 } 1373 } 1374 } 1375 1376 func TestStatusAPIStatements(t *testing.T) { 1377 defer leaktest.AfterTest(t)() 1378 1379 testCluster := serverutils.StartTestCluster(t, 3, base.TestClusterArgs{}) 1380 defer testCluster.Stopper().Stop(context.Background()) 1381 1382 firstServerProto := testCluster.Server(0) 1383 thirdServerSQL := sqlutils.MakeSQLRunner(testCluster.ServerConn(2)) 1384 1385 statements := []struct { 1386 stmt string 1387 fingerprinted string 1388 }{ 1389 {stmt: `CREATE DATABASE roachblog`}, 1390 {stmt: `SET database = roachblog`}, 1391 {stmt: `CREATE TABLE posts (id INT8 PRIMARY KEY, body STRING)`}, 1392 { 1393 stmt: `INSERT INTO posts VALUES (1, 'foo')`, 1394 fingerprinted: `INSERT INTO posts VALUES (_, _)`, 1395 }, 1396 {stmt: `SELECT * FROM posts`}, 1397 } 1398 1399 for _, stmt := range statements { 1400 thirdServerSQL.Exec(t, stmt.stmt) 1401 } 1402 1403 // Hit query endpoint. 1404 var resp serverpb.StatementsResponse 1405 if err := getStatusJSONProto(firstServerProto, "statements", &resp); err != nil { 1406 t.Fatal(err) 1407 } 1408 1409 // See if the statements returned are what we executed. 1410 var expectedStatements []string 1411 for _, stmt := range statements { 1412 var expectedStmt = stmt.stmt 1413 if stmt.fingerprinted != "" { 1414 expectedStmt = stmt.fingerprinted 1415 } 1416 expectedStatements = append(expectedStatements, expectedStmt) 1417 } 1418 1419 var statementsInResponse []string 1420 for _, respStatement := range resp.Statements { 1421 if respStatement.Key.KeyData.Failed { 1422 // We ignore failed statements here as the INSERT statement can fail and 1423 // be automatically retried, confusing the test success check. 1424 continue 1425 } 1426 if strings.HasPrefix(respStatement.Key.KeyData.App, sqlbase.InternalAppNamePrefix) { 1427 // We ignore internal queries, these are not relevant for the 1428 // validity of this test. 1429 continue 1430 } 1431 statementsInResponse = append(statementsInResponse, respStatement.Key.KeyData.Query) 1432 } 1433 1434 sort.Strings(expectedStatements) 1435 sort.Strings(statementsInResponse) 1436 1437 if !reflect.DeepEqual(expectedStatements, statementsInResponse) { 1438 t.Fatalf("expected queries\n\n%v\n\ngot queries\n\n%v\n%s", 1439 expectedStatements, statementsInResponse, pretty.Sprint(resp)) 1440 } 1441 } 1442 1443 func TestListSessionsSecurity(t *testing.T) { 1444 defer leaktest.AfterTest(t)() 1445 s, _, _ := serverutils.StartServer(t, base.TestServerArgs{}) 1446 ts := s.(*TestServer) 1447 defer ts.Stopper().Stop(context.Background()) 1448 1449 ctx := context.Background() 1450 1451 for _, requestWithAdmin := range []bool{true, false} { 1452 t.Run(fmt.Sprintf("admin=%v", requestWithAdmin), func(t *testing.T) { 1453 myUser := authenticatedUserNameNoAdmin 1454 expectedErrOnListingRootSessions := "does not have permission to view sessions from user" 1455 if requestWithAdmin { 1456 myUser = authenticatedUserName 1457 expectedErrOnListingRootSessions = "" 1458 } 1459 1460 // HTTP requests respect the authenticated username from the HTTP session. 1461 testCases := []struct { 1462 endpoint string 1463 expectedErr string 1464 }{ 1465 {"local_sessions", ""}, 1466 {"sessions", ""}, 1467 {fmt.Sprintf("local_sessions?username=%s", myUser), ""}, 1468 {fmt.Sprintf("sessions?username=%s", myUser), ""}, 1469 {"local_sessions?username=root", expectedErrOnListingRootSessions}, 1470 {"sessions?username=root", expectedErrOnListingRootSessions}, 1471 } 1472 for _, tc := range testCases { 1473 var response serverpb.ListSessionsResponse 1474 err := getStatusJSONProtoWithAdminOption(ts, tc.endpoint, &response, requestWithAdmin) 1475 if tc.expectedErr == "" { 1476 if err != nil || len(response.Errors) > 0 { 1477 t.Errorf("unexpected failure listing sessions from %s; error: %v; response errors: %v", 1478 tc.endpoint, err, response.Errors) 1479 } 1480 } else { 1481 respErr := "<no error>" 1482 if len(response.Errors) > 0 { 1483 respErr = response.Errors[0].Message 1484 } 1485 if !testutils.IsError(err, tc.expectedErr) && 1486 !strings.Contains(respErr, tc.expectedErr) { 1487 t.Errorf("did not get expected error %q when listing sessions from %s: %v", 1488 tc.expectedErr, tc.endpoint, err) 1489 } 1490 } 1491 } 1492 }) 1493 } 1494 1495 // gRPC requests behave as root and thus are always allowed. 1496 rootConfig := testutils.NewTestBaseContext(security.RootUser) 1497 rpcContext := newRPCTestContext(ts, rootConfig) 1498 url := ts.ServingRPCAddr() 1499 nodeID := ts.NodeID() 1500 conn, err := rpcContext.GRPCDialNode(url, nodeID, rpc.DefaultClass).Connect(context.Background()) 1501 if err != nil { 1502 t.Fatal(err) 1503 } 1504 client := serverpb.NewStatusClient(conn) 1505 1506 for _, user := range []string{"", authenticatedUserName, "root"} { 1507 request := &serverpb.ListSessionsRequest{Username: user} 1508 if resp, err := client.ListLocalSessions(ctx, request); err != nil || len(resp.Errors) > 0 { 1509 t.Errorf("unexpected failure listing local sessions for %q; error: %v; response errors: %v", 1510 user, err, resp.Errors) 1511 } 1512 if resp, err := client.ListSessions(ctx, request); err != nil || len(resp.Errors) > 0 { 1513 t.Errorf("unexpected failure listing sessions for %q; error: %v; response errors: %v", 1514 user, err, resp.Errors) 1515 } 1516 } 1517 } 1518 1519 func TestCreateStatementDiagnosticsReport(t *testing.T) { 1520 defer leaktest.AfterTest(t)() 1521 1522 s, _, _ := serverutils.StartServer(t, base.TestServerArgs{}) 1523 defer s.Stopper().Stop(context.Background()) 1524 1525 req := &serverpb.CreateStatementDiagnosticsReportRequest{ 1526 StatementFingerprint: "INSERT INTO test VALUES (_)", 1527 } 1528 var resp serverpb.CreateStatementDiagnosticsReportResponse 1529 if err := postStatusJSONProto(s, "stmtdiagreports", req, &resp); err != nil { 1530 t.Fatal(err) 1531 } 1532 1533 var respGet serverpb.StatementDiagnosticsReportsResponse 1534 if err := getStatusJSONProto(s, "stmtdiagreports", &respGet); err != nil { 1535 t.Fatal(err) 1536 } 1537 1538 if respGet.Reports[0].StatementFingerprint != req.StatementFingerprint { 1539 t.Fatal("statement diagnostics request was not persisted") 1540 } 1541 } 1542 1543 func TestStatementDiagnosticsCompleted(t *testing.T) { 1544 defer leaktest.AfterTest(t)() 1545 1546 s, db, _ := serverutils.StartServer(t, base.TestServerArgs{}) 1547 defer s.Stopper().Stop(context.Background()) 1548 1549 _, err := db.Exec("CREATE TABLE test (x int PRIMARY KEY)") 1550 if err != nil { 1551 t.Fatal(err) 1552 } 1553 1554 req := &serverpb.CreateStatementDiagnosticsReportRequest{ 1555 StatementFingerprint: "INSERT INTO test VALUES (_)", 1556 } 1557 var resp serverpb.CreateStatementDiagnosticsReportResponse 1558 if err := postStatusJSONProto(s, "stmtdiagreports", req, &resp); err != nil { 1559 t.Fatal(err) 1560 } 1561 1562 _, err = db.Exec("INSERT INTO test VALUES (1)") 1563 if err != nil { 1564 t.Fatal(err) 1565 } 1566 1567 var respGet serverpb.StatementDiagnosticsReportsResponse 1568 if err := getStatusJSONProto(s, "stmtdiagreports", &respGet); err != nil { 1569 t.Fatal(err) 1570 } 1571 1572 if respGet.Reports[0].Completed != true { 1573 t.Fatal("statement diagnostics was not captured") 1574 } 1575 1576 var diagRespGet serverpb.StatementDiagnosticsResponse 1577 diagPath := fmt.Sprintf("stmtdiag/%d", respGet.Reports[0].StatementDiagnosticsId) 1578 if err := getStatusJSONProto(s, diagPath, &diagRespGet); err != nil { 1579 t.Fatal(err) 1580 } 1581 1582 json := diagRespGet.Diagnostics.Trace 1583 if json == "" || 1584 !strings.Contains(json, "traced statement") || 1585 !strings.Contains(json, "statement execution committed the txn") { 1586 t.Fatal("statement diagnostics did not capture a trace") 1587 } 1588 } 1589 1590 func TestJobStatusResponse(t *testing.T) { 1591 defer leaktest.AfterTest(t)() 1592 ts := startServer(t) 1593 defer ts.Stopper().Stop(context.Background()) 1594 1595 rootConfig := testutils.NewTestBaseContext(security.RootUser) 1596 rpcContext := newRPCTestContext(ts, rootConfig) 1597 1598 url := ts.ServingRPCAddr() 1599 nodeID := ts.NodeID() 1600 conn, err := rpcContext.GRPCDialNode(url, nodeID, rpc.DefaultClass).Connect(context.Background()) 1601 if err != nil { 1602 t.Fatal(err) 1603 } 1604 client := serverpb.NewStatusClient(conn) 1605 1606 request := &serverpb.JobStatusRequest{JobId: -1} 1607 response, err := client.JobStatus(context.Background(), request) 1608 require.Regexp(t, `job with ID -1 does not exist`, err) 1609 require.Nil(t, response) 1610 1611 ctx := context.Background() 1612 job, err := ts.JobRegistry().(*jobs.Registry).CreateJobWithTxn( 1613 ctx, 1614 jobs.Record{ 1615 Description: "testing", 1616 Statement: "SELECT 1", 1617 Username: "root", 1618 Details: jobspb.ImportDetails{ 1619 Tables: []jobspb.ImportDetails_Table{ 1620 { 1621 Desc: &sqlbase.TableDescriptor{ 1622 ID: 1, 1623 }, 1624 }, 1625 { 1626 Desc: &sqlbase.TableDescriptor{ 1627 ID: 2, 1628 }, 1629 }, 1630 }, 1631 URIs: []string{"a", "b"}, 1632 }, 1633 Progress: jobspb.ImportProgress{}, 1634 DescriptorIDs: []sqlbase.ID{1, 2, 3}, 1635 }, 1636 nil) 1637 if err != nil { 1638 t.Fatal(err) 1639 } 1640 request.JobId = *job.ID() 1641 response, err = client.JobStatus(context.Background(), request) 1642 if err != nil { 1643 t.Fatal(err) 1644 } 1645 require.Equal(t, *job.ID(), response.Job.Id) 1646 require.Equal(t, job.Payload(), *response.Job.Payload) 1647 require.Equal(t, job.Progress(), *response.Job.Progress) 1648 }