github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/server/updates_test.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package server
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"net/url"
    17  	"reflect"
    18  	"runtime"
    19  	"strings"
    20  	"testing"
    21  
    22  	"github.com/cockroachdb/cockroach/pkg/base"
    23  	"github.com/cockroachdb/cockroach/pkg/build"
    24  	"github.com/cockroachdb/cockroach/pkg/clusterversion"
    25  	"github.com/cockroachdb/cockroach/pkg/config/zonepb"
    26  	"github.com/cockroachdb/cockroach/pkg/keys"
    27  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    28  	"github.com/cockroachdb/cockroach/pkg/server/diagnosticspb"
    29  	"github.com/cockroachdb/cockroach/pkg/server/telemetry"
    30  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    31  	"github.com/cockroachdb/cockroach/pkg/sql"
    32  	"github.com/cockroachdb/cockroach/pkg/sql/catalog/lease"
    33  	"github.com/cockroachdb/cockroach/pkg/testutils"
    34  	"github.com/cockroachdb/cockroach/pkg/testutils/diagutils"
    35  	"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
    36  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    37  	"github.com/cockroachdb/errors"
    38  )
    39  
    40  func TestCheckVersion(t *testing.T) {
    41  	defer leaktest.AfterTest(t)()
    42  
    43  	ctx := context.Background()
    44  
    45  	t.Run("expected-reporting", func(t *testing.T) {
    46  		r := diagutils.NewServer()
    47  		defer r.Close()
    48  
    49  		url := r.URL()
    50  		s, _, _ := serverutils.StartServer(t, base.TestServerArgs{
    51  			Knobs: base.TestingKnobs{
    52  				Server: &TestingKnobs{
    53  					DiagnosticsTestingKnobs: diagnosticspb.TestingKnobs{
    54  						OverrideUpdatesURL: &url,
    55  					},
    56  				},
    57  			},
    58  		})
    59  		defer s.Stopper().Stop(ctx)
    60  		s.CheckForUpdates(ctx)
    61  		r.Close()
    62  
    63  		if expected, actual := 1, r.NumRequests(); actual != expected {
    64  			t.Fatalf("expected %v update checks, got %v", expected, actual)
    65  		}
    66  
    67  		last := r.LastRequestData()
    68  		if expected, actual := s.(*TestServer).ClusterID().String(), last.UUID; expected != actual {
    69  			t.Errorf("expected uuid %v, got %v", expected, actual)
    70  		}
    71  
    72  		if expected, actual := build.GetInfo().Tag, last.Version; expected != actual {
    73  			t.Errorf("expected version tag %v, got %v", expected, actual)
    74  		}
    75  
    76  		if expected, actual := "OSS", last.LicenseType; expected != actual {
    77  			t.Errorf("expected license type %v, got %v", expected, actual)
    78  		}
    79  
    80  		if expected, actual := "false", last.Internal; expected != actual {
    81  			t.Errorf("expected internal to be %v, got %v", expected, actual)
    82  		}
    83  	})
    84  
    85  	t.Run("npe", func(t *testing.T) {
    86  		// Ensure nil, which happens when an empty env override URL is used, does not
    87  		// cause a crash.
    88  		var nilURL *url.URL
    89  		s, _, _ := serverutils.StartServer(t, base.TestServerArgs{
    90  			Knobs: base.TestingKnobs{
    91  				Server: &TestingKnobs{
    92  					DiagnosticsTestingKnobs: diagnosticspb.TestingKnobs{
    93  						OverrideUpdatesURL:   &nilURL,
    94  						OverrideReportingURL: &nilURL,
    95  					},
    96  				},
    97  			},
    98  		})
    99  		defer s.Stopper().Stop(ctx)
   100  		s.CheckForUpdates(ctx)
   101  		s.ReportDiagnostics(ctx)
   102  	})
   103  }
   104  
   105  func TestUsageQuantization(t *testing.T) {
   106  	defer leaktest.AfterTest(t)()
   107  
   108  	r := diagutils.NewServer()
   109  	defer r.Close()
   110  
   111  	st := cluster.MakeTestingClusterSettings()
   112  	ctx := context.Background()
   113  
   114  	url := r.URL()
   115  	s, db, _ := serverutils.StartServer(t, base.TestServerArgs{
   116  		Settings: st,
   117  		Knobs: base.TestingKnobs{
   118  			Server: &TestingKnobs{
   119  				DiagnosticsTestingKnobs: diagnosticspb.TestingKnobs{
   120  					OverrideReportingURL: &url,
   121  				},
   122  			},
   123  		},
   124  	})
   125  	defer s.Stopper().Stop(ctx)
   126  	ts := s.(*TestServer)
   127  
   128  	// Disable periodic reporting so it doesn't interfere with the test.
   129  	if _, err := db.Exec(`SET CLUSTER SETTING diagnostics.reporting.enabled = false`); err != nil {
   130  		t.Fatal(err)
   131  	}
   132  
   133  	if _, err := db.Exec(`SET application_name = 'test'`); err != nil {
   134  		t.Fatal(err)
   135  	}
   136  
   137  	// Issue some queries against the test app name.
   138  	for i := 0; i < 8; i++ {
   139  		if _, err := db.Exec(`SELECT 1`); err != nil {
   140  			t.Fatal(err)
   141  		}
   142  	}
   143  	// Between 10 and 100 queries is quantized to 10.
   144  	for i := 0; i < 30; i++ {
   145  		if _, err := db.Exec(`SELECT 1,2`); err != nil {
   146  			t.Fatal(err)
   147  		}
   148  	}
   149  	// Between 100 and 10000 gets quantized to 100.
   150  	for i := 0; i < 200; i++ {
   151  		if _, err := db.Exec(`SELECT 1,2,3`); err != nil {
   152  			t.Fatal(err)
   153  		}
   154  	}
   155  	// Above 10000 gets quantized to 10000.
   156  	for i := 0; i < 10010; i++ {
   157  		if _, err := db.Exec(`SHOW application_name`); err != nil {
   158  			t.Fatal(err)
   159  		}
   160  	}
   161  
   162  	// Flush the SQL stat pool.
   163  	ts.Server.sqlServer.pgServer.SQLServer.ResetSQLStats(ctx)
   164  
   165  	// Collect a round of statistics.
   166  	ts.ReportDiagnostics(ctx)
   167  
   168  	// The stats "hide" the application name by hashing it. To find the
   169  	// test app name, we need to hash the ref string too prior to the
   170  	// comparison.
   171  	clusterSecret := sql.ClusterSecret.Get(&st.SV)
   172  	hashedAppName := sql.HashForReporting(clusterSecret, "test")
   173  	if hashedAppName == sql.FailedHashedValue {
   174  		t.Fatalf("expected hashedAppName to not be 'unknown'")
   175  	}
   176  
   177  	testData := []struct {
   178  		query         string
   179  		expectedCount int64
   180  	}{
   181  		{`SELECT _`, 8},
   182  		{`SELECT _, _`, 10},
   183  		{`SELECT _, _, _`, 100},
   184  		{`SHOW application_name`, 10000},
   185  	}
   186  
   187  	last := r.LastRequestData()
   188  	for _, test := range testData {
   189  		found := false
   190  		for _, s := range last.SqlStats {
   191  			if s.Key.App == hashedAppName && s.Key.Query == test.query {
   192  				if s.Stats.Count != test.expectedCount {
   193  					t.Errorf("quantization incorrect for query %q: expected %d, got %d",
   194  						test.query, test.expectedCount, s.Stats.Count)
   195  				}
   196  				found = true
   197  				break
   198  			}
   199  		}
   200  		if !found {
   201  			t.Errorf("query %q missing from stats", test.query)
   202  		}
   203  	}
   204  }
   205  
   206  // This test is deprecated; it is being replaced with datadriven tests
   207  // (see sql.TestTelemetry).
   208  func TestReportUsage(t *testing.T) {
   209  	defer leaktest.AfterTest(t)()
   210  
   211  	const elemName = "somestring"
   212  	ctx := context.Background()
   213  
   214  	r := diagutils.NewServer()
   215  	defer r.Close()
   216  
   217  	st := cluster.MakeTestingClusterSettings()
   218  
   219  	url := r.URL()
   220  	storeSpec := base.DefaultTestStoreSpec
   221  	storeSpec.Attributes = roachpb.Attributes{Attrs: []string{elemName}}
   222  	params := base.TestServerArgs{
   223  		StoreSpecs: []base.StoreSpec{
   224  			storeSpec,
   225  			base.DefaultTestStoreSpec,
   226  		},
   227  		Settings: st,
   228  		Locality: roachpb.Locality{
   229  			Tiers: []roachpb.Tier{
   230  				{Key: "region", Value: "east"},
   231  				{Key: "zone", Value: elemName},
   232  				{Key: "state", Value: "ny"},
   233  				{Key: "city", Value: "nyc"},
   234  			},
   235  		},
   236  		Knobs: base.TestingKnobs{
   237  			SQLLeaseManager: &lease.ManagerTestingKnobs{
   238  				// Disable SELECT called for delete orphaned leases to keep
   239  				// query stats stable.
   240  				DisableDeleteOrphanedLeases: true,
   241  			},
   242  			Server: &TestingKnobs{
   243  				DiagnosticsTestingKnobs: diagnosticspb.TestingKnobs{
   244  					OverrideReportingURL: &url,
   245  				},
   246  			},
   247  		},
   248  	}
   249  
   250  	s, db, _ := serverutils.StartServer(t, params)
   251  	defer s.Stopper().Stop(context.Background()) // stopper will wait for the update/report loop to finish too.
   252  	ts := s.(*TestServer)
   253  
   254  	// make sure the test's generated activity is the only activity we measure.
   255  	telemetry.GetFeatureCounts(telemetry.Raw, telemetry.ResetCounts)
   256  
   257  	if _, err := db.Exec(fmt.Sprintf(`CREATE DATABASE %s`, elemName)); err != nil {
   258  		t.Fatal(err)
   259  	}
   260  	if _, err := db.Exec(`SET CLUSTER SETTING server.time_until_store_dead = '90s'`); err != nil {
   261  		t.Fatal(err)
   262  	}
   263  	// Disable periodic reporting so it doesn't interfere with the test.
   264  	if _, err := db.Exec(`SET CLUSTER SETTING diagnostics.reporting.enabled = false`); err != nil {
   265  		t.Fatal(err)
   266  	}
   267  
   268  	if _, err := db.Exec(`SET CLUSTER SETTING diagnostics.reporting.send_crash_reports = false`); err != nil {
   269  		t.Fatal(err)
   270  	}
   271  
   272  	for _, cmd := range []struct {
   273  		resource string
   274  		config   string
   275  	}{
   276  		{"TABLE system.rangelog", fmt.Sprintf(`constraints: [+zone=%[1]s, +%[1]s]`, elemName)},
   277  		{"TABLE system.rangelog", `{gc: {ttlseconds: 1}}`},
   278  		{"DATABASE system", `num_replicas: 5`},
   279  		{"DATABASE system", fmt.Sprintf(`constraints: {"+zone=%[1]s,+%[1]s": 2, +%[1]s: 1}`, elemName)},
   280  		{"DATABASE system", fmt.Sprintf(`experimental_lease_preferences: [[+zone=%[1]s,+%[1]s], [+%[1]s]]`, elemName)},
   281  	} {
   282  		testutils.SucceedsSoon(t, func() error {
   283  			if _, err := db.Exec(
   284  				fmt.Sprintf(`ALTER %s CONFIGURE ZONE = '%s'`, cmd.resource, cmd.config),
   285  			); err != nil {
   286  				// Work around gossip asynchronicity.
   287  				return errors.Errorf("error applying zone config %q to %q: %v", cmd.config, cmd.resource, err)
   288  			}
   289  			return nil
   290  		})
   291  	}
   292  
   293  	// Set cluster to an internal testing cluster
   294  	q := `SET CLUSTER SETTING cluster.organization = 'Cockroach Labs - Production Testing'`
   295  	if _, err := db.Exec(q); err != nil {
   296  		t.Fatal(err)
   297  	}
   298  
   299  	expectedUsageReports := 0
   300  
   301  	clusterSecret := sql.ClusterSecret.Get(&st.SV)
   302  	testutils.SucceedsSoon(t, func() error {
   303  		expectedUsageReports++
   304  
   305  		node := ts.node.recorder.GenerateNodeStatus(ctx)
   306  		// Clear the SQL stat pool before getting diagnostics.
   307  		ts.sqlServer.pgServer.SQLServer.ResetSQLStats(ctx)
   308  		ts.ReportDiagnostics(ctx)
   309  
   310  		keyCounts := make(map[roachpb.StoreID]int64)
   311  		rangeCounts := make(map[roachpb.StoreID]int64)
   312  		totalKeys := int64(0)
   313  		totalRanges := int64(0)
   314  
   315  		for _, store := range node.StoreStatuses {
   316  			if keys, ok := store.Metrics["keycount"]; ok {
   317  				totalKeys += int64(keys)
   318  				keyCounts[store.Desc.StoreID] = int64(keys)
   319  			} else {
   320  				t.Fatal("keycount not in metrics")
   321  			}
   322  			if replicas, ok := store.Metrics["replicas"]; ok {
   323  				totalRanges += int64(replicas)
   324  				rangeCounts[store.Desc.StoreID] = int64(replicas)
   325  			} else {
   326  				t.Fatal("replicas not in metrics")
   327  			}
   328  		}
   329  
   330  		if expected, actual := expectedUsageReports, r.NumRequests(); expected != actual {
   331  			t.Fatalf("expected %v reports, got %v", expected, actual)
   332  		}
   333  		last := r.LastRequestData()
   334  		if expected, actual := ts.ClusterID().String(), last.UUID; expected != actual {
   335  			return errors.Errorf("expected cluster id %v got %v", expected, actual)
   336  		}
   337  		if expected, actual := ts.node.Descriptor.NodeID, last.Node.NodeID; expected != actual {
   338  			return errors.Errorf("expected node id %v got %v", expected, actual)
   339  		}
   340  
   341  		if last.Node.Hardware.Mem.Total == 0 {
   342  			return errors.Errorf("expected non-zero total mem")
   343  		}
   344  		if last.Node.Hardware.Mem.Available == 0 {
   345  			return errors.Errorf("expected non-zero available mem")
   346  		}
   347  		if actual, expected := last.Node.Hardware.Cpu.Numcpu, runtime.NumCPU(); int(actual) != expected {
   348  			return errors.Errorf("expected %d num cpu, got %d", expected, actual)
   349  		}
   350  		if last.Node.Hardware.Cpu.Sockets == 0 {
   351  			return errors.Errorf("expected non-zero sockets")
   352  		}
   353  		if last.Node.Hardware.Cpu.Mhz == 0.0 {
   354  			return errors.Errorf("expected non-zero speed")
   355  		}
   356  		if last.Node.Os.Platform == "" {
   357  			return errors.Errorf("expected non-empty OS")
   358  		}
   359  
   360  		if minExpected, actual := totalKeys, last.Node.KeyCount; minExpected > actual {
   361  			return errors.Errorf("expected node keys at least %v got %v", minExpected, actual)
   362  		}
   363  		if minExpected, actual := totalRanges, last.Node.RangeCount; minExpected > actual {
   364  			return errors.Errorf("expected node ranges at least %v got %v", minExpected, actual)
   365  		}
   366  		if minExpected, actual := len(params.StoreSpecs), len(last.Stores); minExpected > actual {
   367  			return errors.Errorf("expected at least %v stores got %v", minExpected, actual)
   368  		}
   369  		if expected, actual := "true", last.Internal; expected != actual {
   370  			t.Errorf("expected internal to be %v, got %v", expected, actual)
   371  		}
   372  		if expected, actual := len(params.Locality.Tiers), len(last.Node.Locality.Tiers); expected != actual {
   373  			t.Errorf("expected locality to have %d tier, got %d", expected, actual)
   374  		}
   375  		for i := range params.Locality.Tiers {
   376  			if expected, actual := sql.HashForReporting(clusterSecret, params.Locality.Tiers[i].Key),
   377  				last.Node.Locality.Tiers[i].Key; expected != actual {
   378  				t.Errorf("expected locality tier %d key to be %s, got %s", i, expected, actual)
   379  			}
   380  			if expected, actual := sql.HashForReporting(clusterSecret, params.Locality.Tiers[i].Value),
   381  				last.Node.Locality.Tiers[i].Value; expected != actual {
   382  				t.Errorf("expected locality tier %d value to be %s, got %s", i, expected, actual)
   383  			}
   384  		}
   385  
   386  		for _, store := range last.Stores {
   387  			if minExpected, actual := keyCounts[store.StoreID], store.KeyCount; minExpected > actual {
   388  				return errors.Errorf("expected at least %v keys in store %v got %v", minExpected, store.StoreID, actual)
   389  			}
   390  			if minExpected, actual := rangeCounts[store.StoreID], store.RangeCount; minExpected > actual {
   391  				return errors.Errorf("expected at least %v ranges in store %v got %v", minExpected, store.StoreID, actual)
   392  			}
   393  		}
   394  		return nil
   395  	})
   396  
   397  	last := r.LastRequestData()
   398  	// This check isn't clean, since the body is a raw proto binary and thus could
   399  	// easily contain some encoded form of elemName, but *if* it ever does fail,
   400  	// that is probably very interesting.
   401  	if strings.Contains(last.RawReportBody, elemName) {
   402  		t.Fatalf("%q should not appear in %q", elemName, last.RawReportBody)
   403  	}
   404  
   405  	// 3 + 3 = 6: set 3 initially and org is set mid-test for 3 altered settings,
   406  	// plus version, reporting and secret settings are set in startup
   407  	// migrations.
   408  	if expected, actual := 6, len(last.AlteredSettings); expected != actual {
   409  		t.Fatalf("expected %d changed settings, got %d: %v", expected, actual, last.AlteredSettings)
   410  	}
   411  	for key, expected := range map[string]string{
   412  		"cluster.organization":                     "<redacted>",
   413  		"diagnostics.reporting.send_crash_reports": "false",
   414  		"server.time_until_store_dead":             "1m30s",
   415  		"version":                                  clusterversion.TestingBinaryVersion.String(),
   416  		"cluster.secret":                           "<redacted>",
   417  	} {
   418  		if got, ok := last.AlteredSettings[key]; !ok {
   419  			t.Fatalf("expected report of altered setting %q", key)
   420  		} else if got != expected {
   421  			t.Fatalf("expected reported value of setting %q to be %q not %q", key, expected, got)
   422  		}
   423  	}
   424  
   425  	// Verify that we receive the four auto-populated zone configs plus the two
   426  	// modified above, and that their values are as expected.
   427  	for _, expectedID := range []int64{
   428  		keys.RootNamespaceID,
   429  		keys.LivenessRangesID,
   430  		keys.MetaRangesID,
   431  		keys.RangeEventTableID,
   432  		keys.SystemDatabaseID,
   433  	} {
   434  		if _, ok := last.ZoneConfigs[expectedID]; !ok {
   435  			t.Errorf("didn't find expected ID %d in reported ZoneConfigs: %+v",
   436  				expectedID, last.ZoneConfigs)
   437  		}
   438  	}
   439  	hashedElemName := sql.HashForReporting(clusterSecret, elemName)
   440  	hashedZone := sql.HashForReporting(clusterSecret, "zone")
   441  	for id, zone := range last.ZoneConfigs {
   442  		if id == keys.RootNamespaceID {
   443  			if defZone := ts.Cfg.DefaultZoneConfig; !reflect.DeepEqual(zone, defZone) {
   444  				t.Errorf("default zone config does not match: expected\n%+v got\n%+v", defZone, zone)
   445  			}
   446  		}
   447  		if id == keys.RangeEventTableID {
   448  			if a, e := zone.GC.TTLSeconds, int32(1); a != e {
   449  				t.Errorf("expected zone %d GC.TTLSeconds = %d; got %d", id, e, a)
   450  			}
   451  			if a, e := zone.Constraints, []zonepb.ConstraintsConjunction{
   452  				{
   453  					Constraints: []zonepb.Constraint{
   454  						{Key: hashedZone, Value: hashedElemName, Type: zonepb.Constraint_REQUIRED},
   455  						{Value: hashedElemName, Type: zonepb.Constraint_REQUIRED},
   456  					},
   457  				},
   458  			}; !reflect.DeepEqual(a, e) {
   459  				t.Errorf("expected zone %d Constraints = %+v; got %+v", id, e, a)
   460  			}
   461  		}
   462  		if id == keys.SystemDatabaseID {
   463  			if a, e := zone.Constraints, []zonepb.ConstraintsConjunction{
   464  				{
   465  					NumReplicas: 1,
   466  					Constraints: []zonepb.Constraint{{Value: hashedElemName, Type: zonepb.Constraint_REQUIRED}},
   467  				},
   468  				{
   469  					NumReplicas: 2,
   470  					Constraints: []zonepb.Constraint{
   471  						{Key: hashedZone, Value: hashedElemName, Type: zonepb.Constraint_REQUIRED},
   472  						{Value: hashedElemName, Type: zonepb.Constraint_REQUIRED},
   473  					},
   474  				},
   475  			}; !reflect.DeepEqual(a, e) {
   476  				t.Errorf("expected zone %d Constraints = %+v; got %+v", id, e, a)
   477  			}
   478  			if a, e := zone.LeasePreferences, []zonepb.LeasePreference{
   479  				{
   480  					Constraints: []zonepb.Constraint{
   481  						{Key: hashedZone, Value: hashedElemName, Type: zonepb.Constraint_REQUIRED},
   482  						{Value: hashedElemName, Type: zonepb.Constraint_REQUIRED},
   483  					},
   484  				},
   485  				{
   486  					Constraints: []zonepb.Constraint{{Value: hashedElemName, Type: zonepb.Constraint_REQUIRED}},
   487  				},
   488  			}; !reflect.DeepEqual(a, e) {
   489  				t.Errorf("expected zone %d LeasePreferences = %+v; got %+v", id, e, a)
   490  			}
   491  		}
   492  	}
   493  }