github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/trace_test.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package sql_test 12 13 import ( 14 "context" 15 gosql "database/sql" 16 "fmt" 17 "net/url" 18 "sort" 19 "strings" 20 "testing" 21 22 "github.com/cockroachdb/cockroach/pkg/base" 23 "github.com/cockroachdb/cockroach/pkg/security" 24 "github.com/cockroachdb/cockroach/pkg/sql" 25 "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" 26 "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" 27 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 28 "github.com/cockroachdb/cockroach/pkg/util/log" 29 "github.com/cockroachdb/logtags" 30 ) 31 32 func TestTrace(t *testing.T) { 33 defer leaktest.AfterTest(t)() 34 35 s := log.Scope(t) 36 defer s.Close(t) 37 38 // These are always appended, even without the test specifying it. 39 alwaysOptionalSpans := []string{ 40 "[async] drain", 41 "[async] storage.pendingLeaseRequest: requesting lease", 42 "[async] storage.Store: gossip on capacity change", 43 "outbox", 44 "request range lease", 45 "range lookup", 46 } 47 48 testData := []struct { 49 name string 50 getRows func(t *testing.T, sqlDB *gosql.DB) (*gosql.Rows, error) 51 expSpans []string 52 optionalSpans []string 53 }{ 54 { 55 name: "Session", 56 getRows: func(t *testing.T, sqlDB *gosql.DB) (*gosql.Rows, error) { 57 if _, err := sqlDB.Exec("SET distsql = off"); err != nil { 58 t.Fatal(err) 59 } 60 61 // This test is specific to distsql execution. 62 if _, err := sqlDB.Exec("SET vectorize = off"); err != nil { 63 t.Fatal(err) 64 } 65 66 // Run some query with session tracing enabled. 67 if _, err := sqlDB.Exec("SET tracing = on; SELECT * FROM test.foo; SET tracing = off"); err != nil { 68 t.Fatal(err) 69 } 70 71 return sqlDB.Query( 72 "SELECT DISTINCT operation AS op FROM crdb_internal.session_trace " + 73 "WHERE operation IS NOT NULL ORDER BY op") 74 }, 75 expSpans: []string{ 76 "exec stmt", 77 "flow", 78 "session recording", 79 "sql txn", 80 "table reader", 81 "consuming rows", 82 "txn coordinator send", 83 "dist sender send", 84 "/cockroach.roachpb.Internal/Batch", 85 }, 86 }, 87 { 88 name: "SessionDistSQL", 89 getRows: func(t *testing.T, sqlDB *gosql.DB) (*gosql.Rows, error) { 90 if _, err := sqlDB.Exec("SET distsql = on"); err != nil { 91 t.Fatal(err) 92 } 93 94 // This test is specific to distsql execution. 95 if _, err := sqlDB.Exec("SET vectorize = off"); err != nil { 96 t.Fatal(err) 97 } 98 99 // Run some query with tracing enabled. 100 if _, err := sqlDB.Exec("SET tracing = on; SELECT * FROM test.foo; SET tracing = off"); err != nil { 101 t.Fatal(err) 102 } 103 104 // Check that stat collection from the above SELECT statement is output 105 // to trace. We don't insert any rows in this test, thus the expected 106 // stat value is 0. 107 rows, err := sqlDB.Query( 108 "SELECT count(message) FROM crdb_internal.session_trace " + 109 "WHERE message LIKE '%cockroach.stat.tablereader.input.rows: 0%'", 110 ) 111 if err != nil { 112 t.Fatal(err) 113 } 114 if !rows.Next() { 115 t.Fatal("unable to retrieve count") 116 } 117 118 var count int 119 if err := rows.Scan(&count); err != nil { 120 t.Fatal(err) 121 } 122 if err := rows.Close(); err != nil { 123 t.Fatal(err) 124 } 125 if count == 0 { 126 t.Fatalf("no stat messages found") 127 } 128 129 return sqlDB.Query( 130 "SELECT DISTINCT operation AS op FROM crdb_internal.session_trace " + 131 "WHERE operation IS NOT NULL ORDER BY op") 132 }, 133 expSpans: []string{ 134 "session recording", 135 "sql txn", 136 "exec stmt", 137 "flow", 138 "table reader", 139 "consuming rows", 140 "txn coordinator send", 141 "dist sender send", 142 "/cockroach.roachpb.Internal/Batch", 143 }, 144 // Depending on whether the data is local or not, we may not see these 145 // spans. 146 optionalSpans: []string{ 147 "/cockroach.sql.distsqlrun.DistSQL/SetupFlow", 148 "noop", 149 }, 150 }, 151 { 152 name: "ShowTraceFor", 153 getRows: func(_ *testing.T, sqlDB *gosql.DB) (*gosql.Rows, error) { 154 if _, err := sqlDB.Exec("SET DISTSQL = OFF"); err != nil { 155 t.Fatal(err) 156 } 157 158 // This test is specific to distsql execution. 159 if _, err := sqlDB.Exec("SET vectorize = off"); err != nil { 160 t.Fatal(err) 161 } 162 163 if _, err := sqlDB.Exec("SET tracing = on; SELECT * FROM test.foo; SET tracing = off"); err != nil { 164 t.Fatal(err) 165 } 166 return sqlDB.Query( 167 "SELECT DISTINCT operation AS op FROM [SHOW TRACE FOR SESSION] " + 168 "WHERE operation IS NOT NULL ORDER BY op") 169 }, 170 expSpans: []string{ 171 "exec stmt", 172 "flow", 173 "session recording", 174 "sql txn", 175 "table reader", 176 "consuming rows", 177 "txn coordinator send", 178 "dist sender send", 179 "/cockroach.roachpb.Internal/Batch", 180 }, 181 }, 182 { 183 name: "ShowTraceForDistSQL", 184 getRows: func(_ *testing.T, sqlDB *gosql.DB) (*gosql.Rows, error) { 185 if _, err := sqlDB.Exec("SET distsql = on"); err != nil { 186 t.Fatal(err) 187 } 188 189 // This test is specific to distsql execution. 190 if _, err := sqlDB.Exec("SET vectorize = off"); err != nil { 191 t.Fatal(err) 192 } 193 194 if _, err := sqlDB.Exec("SET tracing = on; SELECT * FROM test.foo; SET tracing = off"); err != nil { 195 t.Fatal(err) 196 } 197 return sqlDB.Query( 198 "SELECT DISTINCT operation AS op FROM [SHOW TRACE FOR SESSION] " + 199 "WHERE operation IS NOT NULL ORDER BY op") 200 }, 201 expSpans: []string{ 202 "session recording", 203 "sql txn", 204 "exec stmt", 205 "flow", 206 "table reader", 207 "consuming rows", 208 "txn coordinator send", 209 "dist sender send", 210 "/cockroach.roachpb.Internal/Batch", 211 }, 212 // Depending on whether the data is local or not, we may not see these 213 // spans. 214 optionalSpans: []string{ 215 "/cockroach.sql.distsqlrun.DistSQL/SetupFlow", 216 "noop", 217 }, 218 }, 219 { 220 name: "ShowTraceForVectorized", 221 getRows: func(_ *testing.T, sqlDB *gosql.DB) (*gosql.Rows, error) { 222 if _, err := sqlDB.Exec("SET distsql = off"); err != nil { 223 t.Fatal(err) 224 } 225 if _, err := sqlDB.Exec("SET vectorize = on; SET vectorize_row_count_threshold=0"); err != nil { 226 t.Fatal(err) 227 } 228 if _, err := sqlDB.Exec("SET tracing = on; SELECT * FROM test.foo; SET tracing = off"); err != nil { 229 t.Fatal(err) 230 } 231 return sqlDB.Query( 232 "SELECT DISTINCT operation AS op FROM [SHOW TRACE FOR SESSION] " + 233 "WHERE operation IS NOT NULL ORDER BY op") 234 }, 235 expSpans: []string{ 236 "session recording", 237 "sql txn", 238 "exec stmt", 239 "flow", 240 "materializer", 241 "*colexec.CancelChecker", 242 "consuming rows", 243 "txn coordinator send", 244 "dist sender send", 245 "/cockroach.roachpb.Internal/Batch", 246 }, 247 }, 248 } 249 250 // Create a cluster. We'll run sub-tests using each node of this cluster. 251 const numNodes = 3 252 cluster := serverutils.StartTestCluster(t, numNodes, base.TestClusterArgs{}) 253 defer cluster.Stopper().Stop(context.Background()) 254 255 clusterDB := cluster.ServerConn(0) 256 if _, err := clusterDB.Exec(` 257 CREATE DATABASE test; 258 259 --- test.foo is a single range table. 260 CREATE TABLE test.foo (id INT PRIMARY KEY); 261 262 --- test.bar is a multi-range table. 263 CREATE TABLE test.bar (id INT PRIMARY KEY); 264 ALTER TABLE test.bar SPLIT AT VALUES (5); 265 `); err != nil { 266 t.Fatal(err) 267 } 268 269 for _, test := range testData { 270 test.optionalSpans = append(test.optionalSpans, alwaysOptionalSpans...) 271 sort.Strings(test.expSpans) 272 273 t.Run(test.name, func(t *testing.T) { 274 // Session tracing needs to work regardless of whether tracing is enabled, so 275 // we're going to test both cases. 276 // 277 // We'll also check traces from all nodes. The point is to be sure that we 278 // test a node that is different than the leaseholder for the range, so that 279 // the trace contains remote spans. 280 for _, enableTr := range []bool{false, true} { 281 name := "TracingOff" 282 if enableTr { 283 name = "TracingOn" 284 } 285 t.Run(name, func(t *testing.T) { 286 for i := 0; i < numNodes; i++ { 287 t.Run(fmt.Sprintf("node-%d", i), func(t *testing.T) { 288 // Use a new "session" for each sub-test rather than 289 // cluster.ServerConn() so that per-session state has a known 290 // value. This is necessary for the check below that the 291 // session_trace starts empty. 292 // 293 // TODO(andrei): Pull the check for an empty session_trace out of 294 // the sub-tests so we can use cluster.ServerConn(i) here. 295 pgURL, cleanup := sqlutils.PGUrl( 296 t, cluster.Server(i).ServingSQLAddr(), "TestTrace", url.User(security.RootUser)) 297 defer cleanup() 298 sqlDB, err := gosql.Open("postgres", pgURL.String()) 299 if err != nil { 300 t.Fatal(err) 301 } 302 defer sqlDB.Close() 303 304 sqlDB.SetMaxOpenConns(1) 305 306 // Run a non-traced read to acquire a lease on the table, so that the 307 // traced read below doesn't need to take a lease. Tracing a lease 308 // acquisition incurs some spans that are too fragile to test here. 309 if _, err := sqlDB.Exec(`SELECT * FROM test.foo LIMIT 1`); err != nil { 310 t.Fatal(err) 311 } 312 313 if _, err := cluster.ServerConn(0).Exec( 314 fmt.Sprintf(`SET CLUSTER SETTING trace.debug.enable = %t`, enableTr), 315 ); err != nil { 316 t.Fatal(err) 317 } 318 319 // Sanity check that new sessions don't have trace info on them. 320 row := sqlDB.QueryRow("SELECT count(1) FROM crdb_internal.session_trace") 321 var count int 322 if err := row.Scan(&count); err != nil { 323 t.Fatal(err) 324 } 325 if count != 0 { 326 t.Fatalf("expected crdb_internal.session_trace to be empty "+ 327 "at the beginning of a session, but it wasn't. Count: %d.", count) 328 } 329 330 rows, err := test.getRows(t, sqlDB) 331 if err != nil { 332 t.Fatal(err) 333 } 334 defer rows.Close() 335 336 ignoreSpans := make(map[string]bool) 337 for _, s := range test.optionalSpans { 338 ignoreSpans[s] = true 339 } 340 r := 0 341 for rows.Next() { 342 var op string 343 if err := rows.Scan(&op); err != nil { 344 t.Fatal(err) 345 } 346 if ignoreSpans[op] { 347 continue 348 } 349 350 remainingErr := false 351 if r >= len(test.expSpans) { 352 t.Errorf("extra span: %s", op) 353 remainingErr = true 354 } else if op != test.expSpans[r] { 355 t.Errorf("expected span: %q, got: %q", test.expSpans[r], op) 356 remainingErr = true 357 } 358 if remainingErr { 359 for rows.Next() { 360 if err := rows.Scan(&op); err != nil { 361 t.Fatal(err) 362 } 363 if ignoreSpans[op] { 364 continue 365 } 366 t.Errorf("remaining span: %q", op) 367 } 368 return 369 } 370 r++ 371 } 372 if r < len(test.expSpans) { 373 t.Fatalf("missing expected spans: %s", test.expSpans[r:]) 374 } 375 }) 376 } 377 }) 378 } 379 }) 380 } 381 } 382 383 // TestTraceFieldDecomposition checks that SHOW TRACE is able to decompose 384 // the parts of a trace/log message into different columns properly. 385 func TestTraceFieldDecomposition(t *testing.T) { 386 defer leaktest.AfterTest(t)() 387 388 query := "SELECT 42" 389 390 params := base.TestServerArgs{ 391 Knobs: base.TestingKnobs{ 392 SQLExecutor: &sql.ExecutorTestingKnobs{ 393 BeforeExecute: func(ctx context.Context, stmt string) { 394 if strings.Contains(stmt, query) { 395 // We need to check a tag containing brackets (e.g. an 396 // IPv6 address). See #18558. 397 taggedCtx := logtags.AddTag(ctx, "hello", "[::666]") 398 // We use log.Infof here (instead of log.Event) to ensure 399 // the trace message contains also a file name prefix. See 400 // #19453/#20085. 401 log.Infof(taggedCtx, "world") 402 } 403 }, 404 }, 405 }, 406 } 407 s, sqlDB, _ := serverutils.StartServer(t, params) 408 defer s.Stopper().Stop(context.Background()) 409 410 sqlDB.SetMaxOpenConns(1) 411 412 if _, err := sqlDB.Exec("SET tracing = ON"); err != nil { 413 t.Fatal(err) 414 } 415 416 if _, err := sqlDB.Exec(query); err != nil { 417 t.Fatal(err) 418 } 419 420 if _, err := sqlDB.Exec("SET tracing = OFF"); err != nil { 421 t.Fatal(err) 422 } 423 424 t.Run("SHOW TRACE", func(t *testing.T) { 425 rows, err := sqlDB.Query(`SELECT message, tag, location FROM [SHOW TRACE FOR SESSION]`) 426 if err != nil { 427 t.Fatal(err) 428 } 429 defer rows.Close() 430 431 ok := false 432 for rows.Next() { 433 var msg, ct, loc []byte 434 if err := rows.Scan(&msg, &ct, &loc); err != nil { 435 t.Fatal(err) 436 } 437 t.Logf("received trace: %q // %q // %q", msg, ct, loc) 438 // Check that brackets are properly balanced. 439 if len(ct) > 0 && ct[0] == '[' { 440 if ct[len(ct)-1] != ']' { 441 t.Errorf("tag starts with open bracket but does not close it: %q", ct) 442 } 443 } 444 c1 := strings.Count(string(ct), "[") 445 c2 := strings.Count(string(ct), "]") 446 if c1 != c2 { 447 t.Errorf("mismatched brackets: %q", ct) 448 } 449 // Check that the expected message was received. 450 if string(msg) == "world" && 451 strings.Contains(string(ct), "hello=[::666]") && 452 strings.Contains(string(loc), ".go") { 453 ok = true 454 } 455 // Check that the fields don't have heading or trailing whitespaces. 456 for _, b := range [][]byte{msg, ct, loc} { 457 if len(b) > 0 && (b[0] == ' ' || b[len(b)-1] == ' ') { 458 t.Errorf("unexpected whitespace: %q", b) 459 } 460 } 461 } 462 if !ok { 463 t.Fatal("expected message not found in trace") 464 } 465 }) 466 467 t.Run("SHOW COMPACT TRACE", func(t *testing.T) { 468 rows, err := sqlDB.Query(`SELECT message, tag FROM [SHOW COMPACT TRACE FOR SESSION];`) 469 if err != nil { 470 t.Fatal(err) 471 } 472 defer rows.Close() 473 474 ok := false 475 for rows.Next() { 476 var msg, ct []byte 477 if err := rows.Scan(&msg, &ct); err != nil { 478 t.Fatal(err) 479 } 480 t.Logf("received trace: %q // %q", msg, ct) 481 // Check that brackets are properly balanced. 482 if len(ct) > 0 && ct[0] == '[' { 483 if ct[len(ct)-1] != ']' { 484 t.Errorf("tag starts with open bracket but does not close it: %q", ct) 485 } 486 } 487 c1 := strings.Count(string(ct), "[") 488 c2 := strings.Count(string(ct), "]") 489 if c1 != c2 { 490 t.Errorf("mismatched brackets: %q", ct) 491 } 492 // Check that the expected message was received. 493 if strings.HasSuffix(string(msg), " world") && 494 strings.Contains(string(ct), "hello=[::666]") && 495 strings.Contains(string(msg), ".go") { 496 ok = true 497 } 498 // Check that the fields don't have heading or trailing whitespaces. 499 for _, b := range [][]byte{msg, ct} { 500 if len(b) > 0 && (b[0] == ' ' || b[len(b)-1] == ' ') { 501 t.Errorf("unexpected whitespace: %q", b) 502 } 503 } 504 } 505 if !ok { 506 t.Fatal("expected message not found in trace") 507 } 508 }) 509 510 } 511 512 func TestKVTraceWithCountStar(t *testing.T) { 513 defer leaktest.AfterTest(t)() 514 515 // Test that we don't crash if we try to do a KV trace 516 // on a COUNT(*) query (#19846). 517 s, db, _ := serverutils.StartServer(t, base.TestServerArgs{}) 518 defer s.Stopper().Stop(context.Background()) 519 520 r := sqlutils.MakeSQLRunner(db) 521 r.Exec(t, "CREATE DATABASE test") 522 r.Exec(t, "CREATE TABLE test.a (a INT PRIMARY KEY, b INT)") 523 r.Exec(t, "INSERT INTO test.a VALUES (1,1), (2,2)") 524 r.Exec(t, "SET tracing = on,kv; SELECT count(*) FROM test.a; SET tracing = off") 525 } 526 527 func TestKVTraceDistSQL(t *testing.T) { 528 defer leaktest.AfterTest(t)() 529 530 // Test that kv tracing works in distsql. 531 const numNodes = 2 532 cluster := serverutils.StartTestCluster(t, numNodes, base.TestClusterArgs{ 533 ReplicationMode: base.ReplicationManual, 534 ServerArgs: base.TestServerArgs{ 535 UseDatabase: "test", 536 }, 537 }) 538 defer cluster.Stopper().Stop(context.Background()) 539 540 r := sqlutils.MakeSQLRunner(cluster.ServerConn(0)) 541 r.Exec(t, "CREATE DATABASE test") 542 r.Exec(t, "CREATE TABLE test.a (a INT PRIMARY KEY, b INT)") 543 r.Exec(t, "INSERT INTO test.a VALUES (1,1), (2,2)") 544 r.Exec(t, "ALTER TABLE a SPLIT AT VALUES(1)") 545 r.Exec(t, "SET tracing = on,kv; SELECT count(*) FROM test.a; SET tracing = off") 546 547 for node := 0; node < 2; node++ { 548 rows := r.Query(t, 549 fmt.Sprintf(`SELECT tag 550 FROM [SHOW KV TRACE FOR SESSION] 551 WHERE tag LIKE '%%%d%%'`, node)) 552 if !rows.Next() { 553 t.Fatalf("no message from n%d found", node) 554 } 555 if err := rows.Close(); err != nil { 556 t.Fatal(err) 557 } 558 } 559 560 rows := r.Query(t, `SELECT * 561 FROM [SHOW KV TRACE FOR SESSION] 562 WHERE MESSAGE LIKE '%fetched: %'`) 563 if !rows.Next() { 564 t.Fatal("No kv messages found") 565 } 566 if err := rows.Close(); err != nil { 567 t.Fatal(err) 568 } 569 }