google.golang.org/grpc@v1.74.2/xds/internal/clients/lrsclient/load_store_test.go (about)

     1  /*
     2   *
     3   * Copyright 2020 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   */
    17  
    18  package lrsclient
    19  
    20  import (
    21  	"fmt"
    22  	"sort"
    23  	"sync"
    24  	"testing"
    25  	"time"
    26  
    27  	"github.com/google/go-cmp/cmp"
    28  	"github.com/google/go-cmp/cmp/cmpopts"
    29  	"google.golang.org/grpc/xds/internal/clients"
    30  	lrsclientinternal "google.golang.org/grpc/xds/internal/clients/lrsclient/internal"
    31  )
    32  
    33  var (
    34  	dropCategories = []string{"drop_for_real", "drop_for_fun"}
    35  	localities     = []clients.Locality{{Region: "locality-A"}, {Region: "locality-B"}}
    36  	errTest        = fmt.Errorf("test error")
    37  )
    38  
    39  // rpcData wraps the rpc counts and load data to be pushed to the store.
    40  type rpcData struct {
    41  	start, success, failure int
    42  	serverData              map[string]float64 // Will be reported with successful RPCs.
    43  }
    44  
    45  func verifyLoadStoreData(wantStoreData, gotStoreData []*loadData) error {
    46  	if diff := cmp.Diff(wantStoreData, gotStoreData, cmpopts.EquateEmpty(), cmp.AllowUnexported(loadData{}, localityData{}, requestData{}, serverLoadData{}), cmpopts.IgnoreFields(loadData{}, "reportInterval"), sortDataSlice); diff != "" {
    47  		return fmt.Errorf("store.stats() returned unexpected diff (-want +got):\n%s", diff)
    48  	}
    49  	return nil
    50  }
    51  
    52  // TestDrops spawns a bunch of goroutines which report drop data. After the
    53  // goroutines have exited, the test dumps the stats from the Store and makes
    54  // sure they are as expected.
    55  func TestDrops(t *testing.T) {
    56  	var (
    57  		drops = map[string]int{
    58  			dropCategories[0]: 30,
    59  			dropCategories[1]: 40,
    60  			"":                10,
    61  		}
    62  		wantStoreData = &loadData{
    63  			totalDrops: 80,
    64  			drops: map[string]uint64{
    65  				dropCategories[0]: 30,
    66  				dropCategories[1]: 40,
    67  			},
    68  		}
    69  	)
    70  
    71  	ls := PerClusterReporter{}
    72  	var wg sync.WaitGroup
    73  	for category, count := range drops {
    74  		for i := 0; i < count; i++ {
    75  			wg.Add(1)
    76  			go func(c string) {
    77  				ls.CallDropped(c)
    78  				wg.Done()
    79  			}(category)
    80  		}
    81  	}
    82  	wg.Wait()
    83  
    84  	gotStoreData := ls.stats()
    85  	if err := verifyLoadStoreData([]*loadData{wantStoreData}, []*loadData{gotStoreData}); err != nil {
    86  		t.Error(err)
    87  	}
    88  }
    89  
    90  // TestLocalityStats spawns a bunch of goroutines which report rpc and load
    91  // data. After the goroutines have exited, the test dumps the stats from the
    92  // Store and makes sure they are as expected.
    93  func TestLocalityStats(t *testing.T) {
    94  	var (
    95  		ld = map[clients.Locality]rpcData{
    96  			localities[0]: {
    97  				start:      40,
    98  				success:    20,
    99  				failure:    10,
   100  				serverData: map[string]float64{"net": 1, "disk": 2, "cpu": 3, "mem": 4},
   101  			},
   102  			localities[1]: {
   103  				start:      80,
   104  				success:    40,
   105  				failure:    20,
   106  				serverData: map[string]float64{"net": 1, "disk": 2, "cpu": 3, "mem": 4},
   107  			},
   108  		}
   109  		wantStoreData = &loadData{
   110  			localityStats: map[clients.Locality]localityData{
   111  				localities[0]: {
   112  					requestStats: requestData{
   113  						succeeded:  20,
   114  						errored:    10,
   115  						inProgress: 10,
   116  						issued:     40,
   117  					},
   118  					loadStats: map[string]serverLoadData{
   119  						"net":  {count: 20, sum: 20},
   120  						"disk": {count: 20, sum: 40},
   121  						"cpu":  {count: 20, sum: 60},
   122  						"mem":  {count: 20, sum: 80},
   123  					},
   124  				},
   125  				localities[1]: {
   126  					requestStats: requestData{
   127  						succeeded:  40,
   128  						errored:    20,
   129  						inProgress: 20,
   130  						issued:     80,
   131  					},
   132  					loadStats: map[string]serverLoadData{
   133  						"net":  {count: 40, sum: 40},
   134  						"disk": {count: 40, sum: 80},
   135  						"cpu":  {count: 40, sum: 120},
   136  						"mem":  {count: 40, sum: 160},
   137  					},
   138  				},
   139  			},
   140  		}
   141  	)
   142  
   143  	ls := PerClusterReporter{}
   144  	var wg sync.WaitGroup
   145  	for locality, data := range ld {
   146  		wg.Add(data.start)
   147  		for i := 0; i < data.start; i++ {
   148  			go func(l clients.Locality) {
   149  				ls.CallStarted(l)
   150  				wg.Done()
   151  			}(locality)
   152  		}
   153  		// The calls to callStarted() need to happen before the other calls are
   154  		// made. Hence the wait here.
   155  		wg.Wait()
   156  
   157  		wg.Add(data.success)
   158  		for i := 0; i < data.success; i++ {
   159  			go func(l clients.Locality, serverData map[string]float64) {
   160  				ls.CallFinished(l, nil)
   161  				for n, d := range serverData {
   162  					ls.CallServerLoad(l, n, d)
   163  				}
   164  				wg.Done()
   165  			}(locality, data.serverData)
   166  		}
   167  		wg.Add(data.failure)
   168  		for i := 0; i < data.failure; i++ {
   169  			go func(l clients.Locality) {
   170  				ls.CallFinished(l, errTest)
   171  				wg.Done()
   172  			}(locality)
   173  		}
   174  		wg.Wait()
   175  	}
   176  
   177  	gotStoreData := ls.stats()
   178  	if err := verifyLoadStoreData([]*loadData{wantStoreData}, []*loadData{gotStoreData}); err != nil {
   179  		t.Error(err)
   180  	}
   181  }
   182  
   183  func TestResetAfterStats(t *testing.T) {
   184  	// Push a bunch of drops, call stats and load stats, and leave inProgress to be non-zero.
   185  	// Dump the stats. Verify expected
   186  	// Push the same set of loads as before
   187  	// Now dump and verify the newly expected ones.
   188  	var (
   189  		drops = map[string]int{
   190  			dropCategories[0]: 30,
   191  			dropCategories[1]: 40,
   192  		}
   193  		ld = map[clients.Locality]rpcData{
   194  			localities[0]: {
   195  				start:      40,
   196  				success:    20,
   197  				failure:    10,
   198  				serverData: map[string]float64{"net": 1, "disk": 2, "cpu": 3, "mem": 4},
   199  			},
   200  			localities[1]: {
   201  				start:      80,
   202  				success:    40,
   203  				failure:    20,
   204  				serverData: map[string]float64{"net": 1, "disk": 2, "cpu": 3, "mem": 4},
   205  			},
   206  		}
   207  		wantStoreData = &loadData{
   208  			totalDrops: 70,
   209  			drops: map[string]uint64{
   210  				dropCategories[0]: 30,
   211  				dropCategories[1]: 40,
   212  			},
   213  			localityStats: map[clients.Locality]localityData{
   214  				localities[0]: {
   215  					requestStats: requestData{
   216  						succeeded:  20,
   217  						errored:    10,
   218  						inProgress: 10,
   219  						issued:     40,
   220  					},
   221  
   222  					loadStats: map[string]serverLoadData{
   223  						"net":  {count: 20, sum: 20},
   224  						"disk": {count: 20, sum: 40},
   225  						"cpu":  {count: 20, sum: 60},
   226  						"mem":  {count: 20, sum: 80},
   227  					},
   228  				},
   229  				localities[1]: {
   230  					requestStats: requestData{
   231  						succeeded:  40,
   232  						errored:    20,
   233  						inProgress: 20,
   234  						issued:     80,
   235  					},
   236  
   237  					loadStats: map[string]serverLoadData{
   238  						"net":  {count: 40, sum: 40},
   239  						"disk": {count: 40, sum: 80},
   240  						"cpu":  {count: 40, sum: 120},
   241  						"mem":  {count: 40, sum: 160},
   242  					},
   243  				},
   244  			},
   245  		}
   246  	)
   247  
   248  	reportLoad := func(ls *PerClusterReporter) {
   249  		for category, count := range drops {
   250  			for i := 0; i < count; i++ {
   251  				ls.CallDropped(category)
   252  			}
   253  		}
   254  		for locality, data := range ld {
   255  			for i := 0; i < data.start; i++ {
   256  				ls.CallStarted(locality)
   257  			}
   258  			for i := 0; i < data.success; i++ {
   259  				ls.CallFinished(locality, nil)
   260  				for n, d := range data.serverData {
   261  					ls.CallServerLoad(locality, n, d)
   262  				}
   263  			}
   264  			for i := 0; i < data.failure; i++ {
   265  				ls.CallFinished(locality, errTest)
   266  			}
   267  		}
   268  	}
   269  
   270  	ls := PerClusterReporter{}
   271  	reportLoad(&ls)
   272  	gotStoreData := ls.stats()
   273  	if err := verifyLoadStoreData([]*loadData{wantStoreData}, []*loadData{gotStoreData}); err != nil {
   274  		t.Error(err)
   275  	}
   276  
   277  	// The above call to stats() should have reset all load reports except the
   278  	// inProgress rpc count. We are now going to push the same load data into
   279  	// the store. So, we should expect to see twice the count for inProgress.
   280  	for _, l := range localities {
   281  		ls := wantStoreData.localityStats[l]
   282  		ls.requestStats.inProgress *= 2
   283  		wantStoreData.localityStats[l] = ls
   284  	}
   285  	reportLoad(&ls)
   286  	gotStoreData = ls.stats()
   287  	if err := verifyLoadStoreData([]*loadData{wantStoreData}, []*loadData{gotStoreData}); err != nil {
   288  		t.Error(err)
   289  	}
   290  }
   291  
   292  var sortDataSlice = cmp.Transformer("SortDataSlice", func(in []*loadData) []*loadData {
   293  	out := append([]*loadData(nil), in...) // Copy input to avoid mutating it
   294  	sort.Slice(out,
   295  		func(i, j int) bool {
   296  			if out[i].cluster < out[j].cluster {
   297  				return true
   298  			}
   299  			if out[i].cluster == out[j].cluster {
   300  				return out[i].service < out[j].service
   301  			}
   302  			return false
   303  		},
   304  	)
   305  	return out
   306  })
   307  
   308  // Test all load are returned for the given clusters, and all clusters are
   309  // reported if no cluster is specified.
   310  func TestStoreStats(t *testing.T) {
   311  	var (
   312  		testClusters = []string{"c0", "c1", "c2"}
   313  		testServices = []string{"s0", "s1"}
   314  		testLocality = clients.Locality{Region: "test-locality"}
   315  	)
   316  
   317  	store := newLoadStore()
   318  	for _, c := range testClusters {
   319  		for _, s := range testServices {
   320  			store.ReporterForCluster(c, s).CallStarted(testLocality)
   321  			store.ReporterForCluster(c, s).CallServerLoad(testLocality, "abc", 123)
   322  			store.ReporterForCluster(c, s).CallDropped("dropped")
   323  			store.ReporterForCluster(c, s).CallFinished(testLocality, nil)
   324  		}
   325  	}
   326  
   327  	wantC0 := []*loadData{
   328  		{
   329  			cluster: "c0", service: "s0",
   330  			totalDrops: 1, drops: map[string]uint64{"dropped": 1},
   331  			localityStats: map[clients.Locality]localityData{
   332  				testLocality: {
   333  					requestStats: requestData{succeeded: 1, issued: 1},
   334  					loadStats:    map[string]serverLoadData{"abc": {count: 1, sum: 123}},
   335  				},
   336  			},
   337  		},
   338  		{
   339  			cluster: "c0", service: "s1",
   340  			totalDrops: 1, drops: map[string]uint64{"dropped": 1},
   341  			localityStats: map[clients.Locality]localityData{
   342  				testLocality: {
   343  					requestStats: requestData{succeeded: 1, issued: 1},
   344  					loadStats:    map[string]serverLoadData{"abc": {count: 1, sum: 123}},
   345  				},
   346  			},
   347  		},
   348  	}
   349  	// Call Stats with just "c0", this should return data for "c0", and not
   350  	// touch data for other clusters.
   351  	gotC0 := store.stats([]string{"c0"})
   352  	verifyLoadStoreData(wantC0, gotC0)
   353  
   354  	wantOther := []*loadData{
   355  		{
   356  			cluster: "c1", service: "s0",
   357  			totalDrops: 1, drops: map[string]uint64{"dropped": 1},
   358  			localityStats: map[clients.Locality]localityData{
   359  				testLocality: {
   360  					requestStats: requestData{succeeded: 1, issued: 1},
   361  					loadStats:    map[string]serverLoadData{"abc": {count: 1, sum: 123}},
   362  				},
   363  			},
   364  		},
   365  		{
   366  			cluster: "c1", service: "s1",
   367  			totalDrops: 1, drops: map[string]uint64{"dropped": 1},
   368  			localityStats: map[clients.Locality]localityData{
   369  				testLocality: {
   370  					requestStats: requestData{succeeded: 1, issued: 1},
   371  					loadStats:    map[string]serverLoadData{"abc": {count: 1, sum: 123}},
   372  				},
   373  			},
   374  		},
   375  		{
   376  			cluster: "c2", service: "s0",
   377  			totalDrops: 1, drops: map[string]uint64{"dropped": 1},
   378  			localityStats: map[clients.Locality]localityData{
   379  				testLocality: {
   380  					requestStats: requestData{succeeded: 1, issued: 1},
   381  					loadStats:    map[string]serverLoadData{"abc": {count: 1, sum: 123}},
   382  				},
   383  			},
   384  		},
   385  		{
   386  			cluster: "c2", service: "s1",
   387  			totalDrops: 1, drops: map[string]uint64{"dropped": 1},
   388  			localityStats: map[clients.Locality]localityData{
   389  				testLocality: {
   390  					requestStats: requestData{succeeded: 1, issued: 1},
   391  					loadStats:    map[string]serverLoadData{"abc": {count: 1, sum: 123}},
   392  				},
   393  			},
   394  		},
   395  	}
   396  	// Call Stats with empty slice, this should return data for all the
   397  	// remaining clusters, and not include c0 (because c0 data was cleared).
   398  	gotOther := store.stats(nil)
   399  	if err := verifyLoadStoreData(wantOther, gotOther); err != nil {
   400  		t.Error(err)
   401  	}
   402  }
   403  
   404  // Test the cases that if a cluster doesn't have load to report, its data is not
   405  // appended to the slice returned by Stats().
   406  func TestStoreStatsEmptyDataNotReported(t *testing.T) {
   407  	var (
   408  		testServices = []string{"s0", "s1"}
   409  		testLocality = clients.Locality{Region: "test-locality"}
   410  	)
   411  
   412  	store := newLoadStore()
   413  	// "c0"'s RPCs all finish with success.
   414  	for _, s := range testServices {
   415  		store.ReporterForCluster("c0", s).CallStarted(testLocality)
   416  		store.ReporterForCluster("c0", s).CallFinished(testLocality, nil)
   417  	}
   418  	// "c1"'s RPCs never finish (always inprocess).
   419  	for _, s := range testServices {
   420  		store.ReporterForCluster("c1", s).CallStarted(testLocality)
   421  	}
   422  
   423  	want0 := []*loadData{
   424  		{
   425  			cluster: "c0", service: "s0",
   426  			localityStats: map[clients.Locality]localityData{
   427  				testLocality: {requestStats: requestData{succeeded: 1, issued: 1}},
   428  			},
   429  		},
   430  		{
   431  			cluster: "c0", service: "s1",
   432  			localityStats: map[clients.Locality]localityData{
   433  				testLocality: {requestStats: requestData{succeeded: 1, issued: 1}},
   434  			},
   435  		},
   436  		{
   437  			cluster: "c1", service: "s0",
   438  			localityStats: map[clients.Locality]localityData{
   439  				testLocality: {requestStats: requestData{inProgress: 1, issued: 1}},
   440  			},
   441  		},
   442  		{
   443  			cluster: "c1", service: "s1",
   444  			localityStats: map[clients.Locality]localityData{
   445  				testLocality: {requestStats: requestData{inProgress: 1, issued: 1}},
   446  			},
   447  		},
   448  	}
   449  	// Call Stats with empty slice, this should return data for all the
   450  	// clusters.
   451  	got0 := store.stats(nil)
   452  	if err := verifyLoadStoreData(want0, got0); err != nil {
   453  		t.Error(err)
   454  	}
   455  
   456  	want1 := []*loadData{
   457  		{
   458  			cluster: "c1", service: "s0",
   459  			localityStats: map[clients.Locality]localityData{
   460  				testLocality: {requestStats: requestData{inProgress: 1}},
   461  			},
   462  		},
   463  		{
   464  			cluster: "c1", service: "s1",
   465  			localityStats: map[clients.Locality]localityData{
   466  				testLocality: {requestStats: requestData{inProgress: 1}},
   467  			},
   468  		},
   469  	}
   470  	// Call Stats with empty slice again, this should return data only for "c1",
   471  	// because "c0" data was cleared, but "c1" has in-progress RPCs.
   472  	got1 := store.stats(nil)
   473  	if err := verifyLoadStoreData(want1, got1); err != nil {
   474  		t.Error(err)
   475  	}
   476  }
   477  
   478  // TestStoreReportInterval verify that the load report interval gets
   479  // calculated at every stats() call and is the duration between start of last
   480  // load reporting to next stats() call.
   481  func TestStoreReportInterval(t *testing.T) {
   482  	originalTimeNow := lrsclientinternal.TimeNow
   483  	t.Cleanup(func() { lrsclientinternal.TimeNow = originalTimeNow })
   484  
   485  	// Initial time for reporter creation
   486  	currentTime := time.Now()
   487  	lrsclientinternal.TimeNow = func() time.Time {
   488  		return currentTime
   489  	}
   490  
   491  	store := newLoadStore()
   492  	reporter := store.ReporterForCluster("test-cluster", "test-service")
   493  	// Report dummy drop to ensure stats1 is not nil.
   494  	reporter.CallDropped("dummy-category")
   495  
   496  	// Update currentTime to simulate the passage of time between the reporter
   497  	// creation and first stats() call.
   498  	currentTime = currentTime.Add(5 * time.Second)
   499  	stats1 := reporter.stats()
   500  
   501  	if stats1 == nil {
   502  		t.Fatalf("stats1 is nil after reporting a drop, want non-nil")
   503  	}
   504  	// Verify stats() call calculate the report interval from the time of
   505  	// reporter creation.
   506  	if got, want := stats1.reportInterval, 5*time.Second; got != want {
   507  		t.Errorf("stats1.reportInterval = %v, want %v", stats1.reportInterval, want)
   508  	}
   509  
   510  	// Update currentTime to simulate the passage of time between the first
   511  	// and second stats() call.
   512  	currentTime = currentTime.Add(10 * time.Second)
   513  	// Report another dummy drop to ensure stats2 is not nil.
   514  	reporter.CallDropped("dummy-category-2")
   515  	stats2 := reporter.stats()
   516  
   517  	if stats2 == nil {
   518  		t.Fatalf("stats2 is nil after reporting a drop, want non-nil")
   519  	}
   520  	// Verify stats() call calculate the report interval from the time of first
   521  	// stats() call.
   522  	if got, want := stats2.reportInterval, 10*time.Second; got != want {
   523  		t.Errorf("stats2.reportInterval = %v, want %v", stats2.reportInterval, want)
   524  	}
   525  }