github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/trace_test.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package sql_test
    12  
    13  import (
    14  	"context"
    15  	gosql "database/sql"
    16  	"fmt"
    17  	"net/url"
    18  	"sort"
    19  	"strings"
    20  	"testing"
    21  
    22  	"github.com/cockroachdb/cockroach/pkg/base"
    23  	"github.com/cockroachdb/cockroach/pkg/security"
    24  	"github.com/cockroachdb/cockroach/pkg/sql"
    25  	"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
    26  	"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
    27  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    28  	"github.com/cockroachdb/cockroach/pkg/util/log"
    29  	"github.com/cockroachdb/logtags"
    30  )
    31  
    32  func TestTrace(t *testing.T) {
    33  	defer leaktest.AfterTest(t)()
    34  
    35  	s := log.Scope(t)
    36  	defer s.Close(t)
    37  
    38  	// These are always appended, even without the test specifying it.
    39  	alwaysOptionalSpans := []string{
    40  		"[async] drain",
    41  		"[async] storage.pendingLeaseRequest: requesting lease",
    42  		"[async] storage.Store: gossip on capacity change",
    43  		"outbox",
    44  		"request range lease",
    45  		"range lookup",
    46  	}
    47  
    48  	testData := []struct {
    49  		name          string
    50  		getRows       func(t *testing.T, sqlDB *gosql.DB) (*gosql.Rows, error)
    51  		expSpans      []string
    52  		optionalSpans []string
    53  	}{
    54  		{
    55  			name: "Session",
    56  			getRows: func(t *testing.T, sqlDB *gosql.DB) (*gosql.Rows, error) {
    57  				if _, err := sqlDB.Exec("SET distsql = off"); err != nil {
    58  					t.Fatal(err)
    59  				}
    60  
    61  				// This test is specific to distsql execution.
    62  				if _, err := sqlDB.Exec("SET vectorize = off"); err != nil {
    63  					t.Fatal(err)
    64  				}
    65  
    66  				// Run some query with session tracing enabled.
    67  				if _, err := sqlDB.Exec("SET tracing = on; SELECT * FROM test.foo; SET tracing = off"); err != nil {
    68  					t.Fatal(err)
    69  				}
    70  
    71  				return sqlDB.Query(
    72  					"SELECT DISTINCT operation AS op FROM crdb_internal.session_trace " +
    73  						"WHERE operation IS NOT NULL ORDER BY op")
    74  			},
    75  			expSpans: []string{
    76  				"exec stmt",
    77  				"flow",
    78  				"session recording",
    79  				"sql txn",
    80  				"table reader",
    81  				"consuming rows",
    82  				"txn coordinator send",
    83  				"dist sender send",
    84  				"/cockroach.roachpb.Internal/Batch",
    85  			},
    86  		},
    87  		{
    88  			name: "SessionDistSQL",
    89  			getRows: func(t *testing.T, sqlDB *gosql.DB) (*gosql.Rows, error) {
    90  				if _, err := sqlDB.Exec("SET distsql = on"); err != nil {
    91  					t.Fatal(err)
    92  				}
    93  
    94  				// This test is specific to distsql execution.
    95  				if _, err := sqlDB.Exec("SET vectorize = off"); err != nil {
    96  					t.Fatal(err)
    97  				}
    98  
    99  				// Run some query with tracing enabled.
   100  				if _, err := sqlDB.Exec("SET tracing = on; SELECT * FROM test.foo; SET tracing = off"); err != nil {
   101  					t.Fatal(err)
   102  				}
   103  
   104  				// Check that stat collection from the above SELECT statement is output
   105  				// to trace. We don't insert any rows in this test, thus the expected
   106  				// stat value is 0.
   107  				rows, err := sqlDB.Query(
   108  					"SELECT count(message) FROM crdb_internal.session_trace " +
   109  						"WHERE message LIKE '%cockroach.stat.tablereader.input.rows: 0%'",
   110  				)
   111  				if err != nil {
   112  					t.Fatal(err)
   113  				}
   114  				if !rows.Next() {
   115  					t.Fatal("unable to retrieve count")
   116  				}
   117  
   118  				var count int
   119  				if err := rows.Scan(&count); err != nil {
   120  					t.Fatal(err)
   121  				}
   122  				if err := rows.Close(); err != nil {
   123  					t.Fatal(err)
   124  				}
   125  				if count == 0 {
   126  					t.Fatalf("no stat messages found")
   127  				}
   128  
   129  				return sqlDB.Query(
   130  					"SELECT DISTINCT operation AS op FROM crdb_internal.session_trace " +
   131  						"WHERE operation IS NOT NULL ORDER BY op")
   132  			},
   133  			expSpans: []string{
   134  				"session recording",
   135  				"sql txn",
   136  				"exec stmt",
   137  				"flow",
   138  				"table reader",
   139  				"consuming rows",
   140  				"txn coordinator send",
   141  				"dist sender send",
   142  				"/cockroach.roachpb.Internal/Batch",
   143  			},
   144  			// Depending on whether the data is local or not, we may not see these
   145  			// spans.
   146  			optionalSpans: []string{
   147  				"/cockroach.sql.distsqlrun.DistSQL/SetupFlow",
   148  				"noop",
   149  			},
   150  		},
   151  		{
   152  			name: "ShowTraceFor",
   153  			getRows: func(_ *testing.T, sqlDB *gosql.DB) (*gosql.Rows, error) {
   154  				if _, err := sqlDB.Exec("SET DISTSQL = OFF"); err != nil {
   155  					t.Fatal(err)
   156  				}
   157  
   158  				// This test is specific to distsql execution.
   159  				if _, err := sqlDB.Exec("SET vectorize = off"); err != nil {
   160  					t.Fatal(err)
   161  				}
   162  
   163  				if _, err := sqlDB.Exec("SET tracing = on; SELECT * FROM test.foo; SET tracing = off"); err != nil {
   164  					t.Fatal(err)
   165  				}
   166  				return sqlDB.Query(
   167  					"SELECT DISTINCT operation AS op FROM [SHOW TRACE FOR SESSION] " +
   168  						"WHERE operation IS NOT NULL ORDER BY op")
   169  			},
   170  			expSpans: []string{
   171  				"exec stmt",
   172  				"flow",
   173  				"session recording",
   174  				"sql txn",
   175  				"table reader",
   176  				"consuming rows",
   177  				"txn coordinator send",
   178  				"dist sender send",
   179  				"/cockroach.roachpb.Internal/Batch",
   180  			},
   181  		},
   182  		{
   183  			name: "ShowTraceForDistSQL",
   184  			getRows: func(_ *testing.T, sqlDB *gosql.DB) (*gosql.Rows, error) {
   185  				if _, err := sqlDB.Exec("SET distsql = on"); err != nil {
   186  					t.Fatal(err)
   187  				}
   188  
   189  				// This test is specific to distsql execution.
   190  				if _, err := sqlDB.Exec("SET vectorize = off"); err != nil {
   191  					t.Fatal(err)
   192  				}
   193  
   194  				if _, err := sqlDB.Exec("SET tracing = on; SELECT * FROM test.foo; SET tracing = off"); err != nil {
   195  					t.Fatal(err)
   196  				}
   197  				return sqlDB.Query(
   198  					"SELECT DISTINCT operation AS op FROM [SHOW TRACE FOR SESSION] " +
   199  						"WHERE operation IS NOT NULL ORDER BY op")
   200  			},
   201  			expSpans: []string{
   202  				"session recording",
   203  				"sql txn",
   204  				"exec stmt",
   205  				"flow",
   206  				"table reader",
   207  				"consuming rows",
   208  				"txn coordinator send",
   209  				"dist sender send",
   210  				"/cockroach.roachpb.Internal/Batch",
   211  			},
   212  			// Depending on whether the data is local or not, we may not see these
   213  			// spans.
   214  			optionalSpans: []string{
   215  				"/cockroach.sql.distsqlrun.DistSQL/SetupFlow",
   216  				"noop",
   217  			},
   218  		},
   219  		{
   220  			name: "ShowTraceForVectorized",
   221  			getRows: func(_ *testing.T, sqlDB *gosql.DB) (*gosql.Rows, error) {
   222  				if _, err := sqlDB.Exec("SET distsql = off"); err != nil {
   223  					t.Fatal(err)
   224  				}
   225  				if _, err := sqlDB.Exec("SET vectorize = on; SET vectorize_row_count_threshold=0"); err != nil {
   226  					t.Fatal(err)
   227  				}
   228  				if _, err := sqlDB.Exec("SET tracing = on; SELECT * FROM test.foo; SET tracing = off"); err != nil {
   229  					t.Fatal(err)
   230  				}
   231  				return sqlDB.Query(
   232  					"SELECT DISTINCT operation AS op FROM [SHOW TRACE FOR SESSION] " +
   233  						"WHERE operation IS NOT NULL ORDER BY op")
   234  			},
   235  			expSpans: []string{
   236  				"session recording",
   237  				"sql txn",
   238  				"exec stmt",
   239  				"flow",
   240  				"materializer",
   241  				"*colexec.CancelChecker",
   242  				"consuming rows",
   243  				"txn coordinator send",
   244  				"dist sender send",
   245  				"/cockroach.roachpb.Internal/Batch",
   246  			},
   247  		},
   248  	}
   249  
   250  	// Create a cluster. We'll run sub-tests using each node of this cluster.
   251  	const numNodes = 3
   252  	cluster := serverutils.StartTestCluster(t, numNodes, base.TestClusterArgs{})
   253  	defer cluster.Stopper().Stop(context.Background())
   254  
   255  	clusterDB := cluster.ServerConn(0)
   256  	if _, err := clusterDB.Exec(`
   257  		CREATE DATABASE test;
   258  
   259  		--- test.foo is a single range table.
   260  		CREATE TABLE test.foo (id INT PRIMARY KEY);
   261  
   262  		--- test.bar is a multi-range table.
   263  		CREATE TABLE test.bar (id INT PRIMARY KEY);
   264  		ALTER TABLE  test.bar SPLIT AT VALUES (5);
   265  	`); err != nil {
   266  		t.Fatal(err)
   267  	}
   268  
   269  	for _, test := range testData {
   270  		test.optionalSpans = append(test.optionalSpans, alwaysOptionalSpans...)
   271  		sort.Strings(test.expSpans)
   272  
   273  		t.Run(test.name, func(t *testing.T) {
   274  			// Session tracing needs to work regardless of whether tracing is enabled, so
   275  			// we're going to test both cases.
   276  			//
   277  			// We'll also check traces from all nodes. The point is to be sure that we
   278  			// test a node that is different than the leaseholder for the range, so that
   279  			// the trace contains remote spans.
   280  			for _, enableTr := range []bool{false, true} {
   281  				name := "TracingOff"
   282  				if enableTr {
   283  					name = "TracingOn"
   284  				}
   285  				t.Run(name, func(t *testing.T) {
   286  					for i := 0; i < numNodes; i++ {
   287  						t.Run(fmt.Sprintf("node-%d", i), func(t *testing.T) {
   288  							// Use a new "session" for each sub-test rather than
   289  							// cluster.ServerConn() so that per-session state has a known
   290  							// value. This is necessary for the check below that the
   291  							// session_trace starts empty.
   292  							//
   293  							// TODO(andrei): Pull the check for an empty session_trace out of
   294  							// the sub-tests so we can use cluster.ServerConn(i) here.
   295  							pgURL, cleanup := sqlutils.PGUrl(
   296  								t, cluster.Server(i).ServingSQLAddr(), "TestTrace", url.User(security.RootUser))
   297  							defer cleanup()
   298  							sqlDB, err := gosql.Open("postgres", pgURL.String())
   299  							if err != nil {
   300  								t.Fatal(err)
   301  							}
   302  							defer sqlDB.Close()
   303  
   304  							sqlDB.SetMaxOpenConns(1)
   305  
   306  							// Run a non-traced read to acquire a lease on the table, so that the
   307  							// traced read below doesn't need to take a lease. Tracing a lease
   308  							// acquisition incurs some spans that are too fragile to test here.
   309  							if _, err := sqlDB.Exec(`SELECT * FROM test.foo LIMIT 1`); err != nil {
   310  								t.Fatal(err)
   311  							}
   312  
   313  							if _, err := cluster.ServerConn(0).Exec(
   314  								fmt.Sprintf(`SET CLUSTER SETTING trace.debug.enable = %t`, enableTr),
   315  							); err != nil {
   316  								t.Fatal(err)
   317  							}
   318  
   319  							// Sanity check that new sessions don't have trace info on them.
   320  							row := sqlDB.QueryRow("SELECT count(1) FROM crdb_internal.session_trace")
   321  							var count int
   322  							if err := row.Scan(&count); err != nil {
   323  								t.Fatal(err)
   324  							}
   325  							if count != 0 {
   326  								t.Fatalf("expected crdb_internal.session_trace to be empty "+
   327  									"at the beginning of a session, but it wasn't. Count: %d.", count)
   328  							}
   329  
   330  							rows, err := test.getRows(t, sqlDB)
   331  							if err != nil {
   332  								t.Fatal(err)
   333  							}
   334  							defer rows.Close()
   335  
   336  							ignoreSpans := make(map[string]bool)
   337  							for _, s := range test.optionalSpans {
   338  								ignoreSpans[s] = true
   339  							}
   340  							r := 0
   341  							for rows.Next() {
   342  								var op string
   343  								if err := rows.Scan(&op); err != nil {
   344  									t.Fatal(err)
   345  								}
   346  								if ignoreSpans[op] {
   347  									continue
   348  								}
   349  
   350  								remainingErr := false
   351  								if r >= len(test.expSpans) {
   352  									t.Errorf("extra span: %s", op)
   353  									remainingErr = true
   354  								} else if op != test.expSpans[r] {
   355  									t.Errorf("expected span: %q, got: %q", test.expSpans[r], op)
   356  									remainingErr = true
   357  								}
   358  								if remainingErr {
   359  									for rows.Next() {
   360  										if err := rows.Scan(&op); err != nil {
   361  											t.Fatal(err)
   362  										}
   363  										if ignoreSpans[op] {
   364  											continue
   365  										}
   366  										t.Errorf("remaining span: %q", op)
   367  									}
   368  									return
   369  								}
   370  								r++
   371  							}
   372  							if r < len(test.expSpans) {
   373  								t.Fatalf("missing expected spans: %s", test.expSpans[r:])
   374  							}
   375  						})
   376  					}
   377  				})
   378  			}
   379  		})
   380  	}
   381  }
   382  
   383  // TestTraceFieldDecomposition checks that SHOW TRACE is able to decompose
   384  // the parts of a trace/log message into different columns properly.
   385  func TestTraceFieldDecomposition(t *testing.T) {
   386  	defer leaktest.AfterTest(t)()
   387  
   388  	query := "SELECT 42"
   389  
   390  	params := base.TestServerArgs{
   391  		Knobs: base.TestingKnobs{
   392  			SQLExecutor: &sql.ExecutorTestingKnobs{
   393  				BeforeExecute: func(ctx context.Context, stmt string) {
   394  					if strings.Contains(stmt, query) {
   395  						// We need to check a tag containing brackets (e.g. an
   396  						// IPv6 address).  See #18558.
   397  						taggedCtx := logtags.AddTag(ctx, "hello", "[::666]")
   398  						// We use log.Infof here (instead of log.Event) to ensure
   399  						// the trace message contains also a file name prefix. See
   400  						// #19453/#20085.
   401  						log.Infof(taggedCtx, "world")
   402  					}
   403  				},
   404  			},
   405  		},
   406  	}
   407  	s, sqlDB, _ := serverutils.StartServer(t, params)
   408  	defer s.Stopper().Stop(context.Background())
   409  
   410  	sqlDB.SetMaxOpenConns(1)
   411  
   412  	if _, err := sqlDB.Exec("SET tracing = ON"); err != nil {
   413  		t.Fatal(err)
   414  	}
   415  
   416  	if _, err := sqlDB.Exec(query); err != nil {
   417  		t.Fatal(err)
   418  	}
   419  
   420  	if _, err := sqlDB.Exec("SET tracing = OFF"); err != nil {
   421  		t.Fatal(err)
   422  	}
   423  
   424  	t.Run("SHOW TRACE", func(t *testing.T) {
   425  		rows, err := sqlDB.Query(`SELECT message, tag, location FROM [SHOW TRACE FOR SESSION]`)
   426  		if err != nil {
   427  			t.Fatal(err)
   428  		}
   429  		defer rows.Close()
   430  
   431  		ok := false
   432  		for rows.Next() {
   433  			var msg, ct, loc []byte
   434  			if err := rows.Scan(&msg, &ct, &loc); err != nil {
   435  				t.Fatal(err)
   436  			}
   437  			t.Logf("received trace: %q // %q // %q", msg, ct, loc)
   438  			// Check that brackets are properly balanced.
   439  			if len(ct) > 0 && ct[0] == '[' {
   440  				if ct[len(ct)-1] != ']' {
   441  					t.Errorf("tag starts with open bracket but does not close it: %q", ct)
   442  				}
   443  			}
   444  			c1 := strings.Count(string(ct), "[")
   445  			c2 := strings.Count(string(ct), "]")
   446  			if c1 != c2 {
   447  				t.Errorf("mismatched brackets: %q", ct)
   448  			}
   449  			// Check that the expected message was received.
   450  			if string(msg) == "world" &&
   451  				strings.Contains(string(ct), "hello=[::666]") &&
   452  				strings.Contains(string(loc), ".go") {
   453  				ok = true
   454  			}
   455  			// Check that the fields don't have heading or trailing whitespaces.
   456  			for _, b := range [][]byte{msg, ct, loc} {
   457  				if len(b) > 0 && (b[0] == ' ' || b[len(b)-1] == ' ') {
   458  					t.Errorf("unexpected whitespace: %q", b)
   459  				}
   460  			}
   461  		}
   462  		if !ok {
   463  			t.Fatal("expected message not found in trace")
   464  		}
   465  	})
   466  
   467  	t.Run("SHOW COMPACT TRACE", func(t *testing.T) {
   468  		rows, err := sqlDB.Query(`SELECT message, tag FROM [SHOW COMPACT TRACE FOR SESSION];`)
   469  		if err != nil {
   470  			t.Fatal(err)
   471  		}
   472  		defer rows.Close()
   473  
   474  		ok := false
   475  		for rows.Next() {
   476  			var msg, ct []byte
   477  			if err := rows.Scan(&msg, &ct); err != nil {
   478  				t.Fatal(err)
   479  			}
   480  			t.Logf("received trace: %q // %q", msg, ct)
   481  			// Check that brackets are properly balanced.
   482  			if len(ct) > 0 && ct[0] == '[' {
   483  				if ct[len(ct)-1] != ']' {
   484  					t.Errorf("tag starts with open bracket but does not close it: %q", ct)
   485  				}
   486  			}
   487  			c1 := strings.Count(string(ct), "[")
   488  			c2 := strings.Count(string(ct), "]")
   489  			if c1 != c2 {
   490  				t.Errorf("mismatched brackets: %q", ct)
   491  			}
   492  			// Check that the expected message was received.
   493  			if strings.HasSuffix(string(msg), " world") &&
   494  				strings.Contains(string(ct), "hello=[::666]") &&
   495  				strings.Contains(string(msg), ".go") {
   496  				ok = true
   497  			}
   498  			// Check that the fields don't have heading or trailing whitespaces.
   499  			for _, b := range [][]byte{msg, ct} {
   500  				if len(b) > 0 && (b[0] == ' ' || b[len(b)-1] == ' ') {
   501  					t.Errorf("unexpected whitespace: %q", b)
   502  				}
   503  			}
   504  		}
   505  		if !ok {
   506  			t.Fatal("expected message not found in trace")
   507  		}
   508  	})
   509  
   510  }
   511  
   512  func TestKVTraceWithCountStar(t *testing.T) {
   513  	defer leaktest.AfterTest(t)()
   514  
   515  	// Test that we don't crash if we try to do a KV trace
   516  	// on a COUNT(*) query (#19846).
   517  	s, db, _ := serverutils.StartServer(t, base.TestServerArgs{})
   518  	defer s.Stopper().Stop(context.Background())
   519  
   520  	r := sqlutils.MakeSQLRunner(db)
   521  	r.Exec(t, "CREATE DATABASE test")
   522  	r.Exec(t, "CREATE TABLE test.a (a INT PRIMARY KEY, b INT)")
   523  	r.Exec(t, "INSERT INTO test.a VALUES (1,1), (2,2)")
   524  	r.Exec(t, "SET tracing = on,kv; SELECT count(*) FROM test.a; SET tracing = off")
   525  }
   526  
   527  func TestKVTraceDistSQL(t *testing.T) {
   528  	defer leaktest.AfterTest(t)()
   529  
   530  	// Test that kv tracing works in distsql.
   531  	const numNodes = 2
   532  	cluster := serverutils.StartTestCluster(t, numNodes, base.TestClusterArgs{
   533  		ReplicationMode: base.ReplicationManual,
   534  		ServerArgs: base.TestServerArgs{
   535  			UseDatabase: "test",
   536  		},
   537  	})
   538  	defer cluster.Stopper().Stop(context.Background())
   539  
   540  	r := sqlutils.MakeSQLRunner(cluster.ServerConn(0))
   541  	r.Exec(t, "CREATE DATABASE test")
   542  	r.Exec(t, "CREATE TABLE test.a (a INT PRIMARY KEY, b INT)")
   543  	r.Exec(t, "INSERT INTO test.a VALUES (1,1), (2,2)")
   544  	r.Exec(t, "ALTER TABLE a SPLIT AT VALUES(1)")
   545  	r.Exec(t, "SET tracing = on,kv; SELECT count(*) FROM test.a; SET tracing = off")
   546  
   547  	for node := 0; node < 2; node++ {
   548  		rows := r.Query(t,
   549  			fmt.Sprintf(`SELECT tag
   550  			 FROM [SHOW KV TRACE FOR SESSION]
   551  			 WHERE tag LIKE '%%%d%%'`, node))
   552  		if !rows.Next() {
   553  			t.Fatalf("no message from n%d found", node)
   554  		}
   555  		if err := rows.Close(); err != nil {
   556  			t.Fatal(err)
   557  		}
   558  	}
   559  
   560  	rows := r.Query(t, `SELECT *
   561             FROM [SHOW KV TRACE FOR SESSION]
   562             WHERE MESSAGE LIKE '%fetched: %'`)
   563  	if !rows.Next() {
   564  		t.Fatal("No kv messages found")
   565  	}
   566  	if err := rows.Close(); err != nil {
   567  		t.Fatal(err)
   568  	}
   569  }