vitess.io/vitess@v0.16.2/go/test/endtoend/tabletmanager/throttler/throttler_test.go (about)

     1  /*
     2  Copyright 2020 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8  	http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  package throttler
    17  
    18  import (
    19  	"context"
    20  	"flag"
    21  	"fmt"
    22  	"io"
    23  	"net/http"
    24  	"os"
    25  	"testing"
    26  	"time"
    27  
    28  	"vitess.io/vitess/go/vt/vttablet/tabletserver/throttle/base"
    29  
    30  	"vitess.io/vitess/go/test/endtoend/cluster"
    31  
    32  	"github.com/stretchr/testify/assert"
    33  	"github.com/stretchr/testify/require"
    34  )
    35  
    36  var (
    37  	clusterInstance *cluster.LocalProcessCluster
    38  	primaryTablet   *cluster.Vttablet
    39  	replicaTablet   *cluster.Vttablet
    40  	hostname        = "localhost"
    41  	keyspaceName    = "ks"
    42  	cell            = "zone1"
    43  	sqlSchema       = `
    44  	create table t1(
    45  		id bigint,
    46  		value varchar(16),
    47  		primary key(id)
    48  	) Engine=InnoDB;
    49  `
    50  
    51  	vSchema = `
    52  	{
    53      "sharded": true,
    54      "vindexes": {
    55        "hash": {
    56          "type": "hash"
    57        }
    58      },
    59      "tables": {
    60        "t1": {
    61          "column_vindexes": [
    62            {
    63              "column": "id",
    64              "name": "hash"
    65            }
    66          ]
    67        }
    68      }
    69  	}`
    70  
    71  	httpClient           = base.SetupHTTPClient(time.Second)
    72  	throttledAppsAPIPath = "throttler/throttled-apps"
    73  	checkAPIPath         = "throttler/check"
    74  	checkSelfAPIPath     = "throttler/check-self"
    75  )
    76  
    77  const (
    78  	throttlerThreshold        = 1 * time.Second // standard, tight threshold
    79  	onDemandHeartbeatDuration = 5 * time.Second
    80  	applyConfigWait           = 15 * time.Second // time after which we're sure the throttler has refreshed config and tablets
    81  )
    82  
    83  func TestMain(m *testing.M) {
    84  	defer cluster.PanicHandler(nil)
    85  	flag.Parse()
    86  
    87  	exitCode := func() int {
    88  		clusterInstance = cluster.NewCluster(cell, hostname)
    89  		defer clusterInstance.Teardown()
    90  
    91  		// Start topo server
    92  		err := clusterInstance.StartTopo()
    93  		if err != nil {
    94  			return 1
    95  		}
    96  
    97  		// Set extra tablet args for lock timeout
    98  		clusterInstance.VtTabletExtraArgs = []string{
    99  			"--lock_tables_timeout", "5s",
   100  			"--watch_replication_stream",
   101  			"--enable_replication_reporter",
   102  			"--enable-lag-throttler",
   103  			"--throttle_threshold", throttlerThreshold.String(),
   104  			"--heartbeat_enable",
   105  			"--heartbeat_interval", "250ms",
   106  			"--heartbeat_on_demand_duration", onDemandHeartbeatDuration.String(),
   107  			"--disable_active_reparents",
   108  		}
   109  
   110  		// Start keyspace
   111  		keyspace := &cluster.Keyspace{
   112  			Name:      keyspaceName,
   113  			SchemaSQL: sqlSchema,
   114  			VSchema:   vSchema,
   115  		}
   116  
   117  		if err = clusterInstance.StartUnshardedKeyspace(*keyspace, 1, false); err != nil {
   118  			return 1
   119  		}
   120  
   121  		// Collect table paths and ports
   122  		tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets
   123  		for _, tablet := range tablets {
   124  			if tablet.Type == "primary" {
   125  				primaryTablet = tablet
   126  			} else if tablet.Type != "rdonly" {
   127  				replicaTablet = tablet
   128  			}
   129  		}
   130  
   131  		return m.Run()
   132  	}()
   133  	os.Exit(exitCode)
   134  }
   135  
   136  func throttledApps(tablet *cluster.Vttablet) (resp *http.Response, respBody string, err error) {
   137  	resp, err = httpClient.Get(fmt.Sprintf("http://localhost:%d/%s", tablet.HTTPPort, throttledAppsAPIPath))
   138  	if err != nil {
   139  		return resp, respBody, err
   140  	}
   141  	b, err := io.ReadAll(resp.Body)
   142  	if err != nil {
   143  		return resp, respBody, err
   144  	}
   145  	respBody = string(b)
   146  	return resp, respBody, err
   147  }
   148  
   149  func throttleCheck(tablet *cluster.Vttablet, skipRequestHeartbeats bool) (*http.Response, error) {
   150  	return httpClient.Get(fmt.Sprintf("http://localhost:%d/%s?s=%t", tablet.HTTPPort, checkAPIPath, skipRequestHeartbeats))
   151  }
   152  
   153  func throttleCheckSelf(tablet *cluster.Vttablet) (*http.Response, error) {
   154  	return httpClient.Head(fmt.Sprintf("http://localhost:%d/%s", tablet.HTTPPort, checkSelfAPIPath))
   155  }
   156  
   157  func warmUpHeartbeat(t *testing.T) (respStatus int) {
   158  	//  because we run with -heartbeat_on_demand_duration=5s, the heartbeat is "cold" right now.
   159  	// Let's warm it up.
   160  	resp, err := throttleCheck(primaryTablet, false)
   161  	require.NoError(t, err)
   162  	defer resp.Body.Close()
   163  	time.Sleep(time.Second)
   164  	return resp.StatusCode
   165  }
   166  
   167  // waitForThrottleCheckStatus waits for the tablet to return the provided HTTP code in a throttle check
   168  func waitForThrottleCheckStatus(t *testing.T, tablet *cluster.Vttablet, wantCode int) {
   169  	_ = warmUpHeartbeat(t)
   170  	ctx, cancel := context.WithTimeout(context.Background(), onDemandHeartbeatDuration+applyConfigWait)
   171  	defer cancel()
   172  
   173  	for {
   174  		resp, err := throttleCheck(tablet, true)
   175  		require.NoError(t, err)
   176  
   177  		if wantCode == resp.StatusCode {
   178  			// Wait for any cached check values to be cleared and the new
   179  			// status value to be in effect everywhere before returning.
   180  			resp.Body.Close()
   181  			return
   182  		}
   183  		select {
   184  		case <-ctx.Done():
   185  			b, err := io.ReadAll(resp.Body)
   186  			require.NoError(t, err)
   187  			resp.Body.Close()
   188  
   189  			assert.Equal(t, wantCode, resp.StatusCode, "body: %v", string(b))
   190  			return
   191  		default:
   192  			resp.Body.Close()
   193  			time.Sleep(time.Second)
   194  		}
   195  	}
   196  }
   197  
   198  func TestThrottlerAfterMetricsCollected(t *testing.T) {
   199  	defer cluster.PanicHandler(t)
   200  
   201  	// We run with on-demand heartbeats. Immediately as the tablet manager opens, it sends a one-time
   202  	// request for heartbeats, which means the throttler is able to collect initial "good" data.
   203  	// After a few seconds, the heartbeat lease terminates. We wait for that.
   204  	// {"StatusCode":429,"Value":4.864921,"Threshold":1,"Message":"Threshold exceeded"}
   205  	t.Run("expect push back once initial heartbeat lease terminates", func(t *testing.T) {
   206  		time.Sleep(onDemandHeartbeatDuration)
   207  		waitForThrottleCheckStatus(t, primaryTablet, http.StatusTooManyRequests)
   208  	})
   209  	t.Run("requesting heartbeats", func(t *testing.T) {
   210  		respStatus := warmUpHeartbeat(t)
   211  		assert.NotEqual(t, http.StatusOK, respStatus)
   212  	})
   213  	t.Run("expect OK once heartbeats lease renewed", func(t *testing.T) {
   214  		time.Sleep(1 * time.Second)
   215  		resp, err := throttleCheck(primaryTablet, false)
   216  		require.NoError(t, err)
   217  		defer resp.Body.Close()
   218  		assert.Equal(t, http.StatusOK, resp.StatusCode)
   219  	})
   220  	t.Run("expect OK once heartbeats lease renewed, still", func(t *testing.T) {
   221  		time.Sleep(1 * time.Second)
   222  		resp, err := throttleCheck(primaryTablet, false)
   223  		require.NoError(t, err)
   224  		defer resp.Body.Close()
   225  		assert.Equal(t, http.StatusOK, resp.StatusCode)
   226  	})
   227  	t.Run("validate throttled-apps", func(t *testing.T) {
   228  		resp, body, err := throttledApps(primaryTablet)
   229  		require.NoError(t, err)
   230  		defer resp.Body.Close()
   231  		assert.Equal(t, http.StatusOK, resp.StatusCode)
   232  		assert.Contains(t, body, "always-throttled-app")
   233  	})
   234  	t.Run("validate check-self", func(t *testing.T) {
   235  		resp, err := throttleCheckSelf(primaryTablet)
   236  		require.NoError(t, err)
   237  		defer resp.Body.Close()
   238  		assert.Equal(t, http.StatusOK, resp.StatusCode)
   239  	})
   240  	t.Run("validate check-self, again", func(t *testing.T) {
   241  		resp, err := throttleCheckSelf(replicaTablet)
   242  		require.NoError(t, err)
   243  		defer resp.Body.Close()
   244  		assert.Equal(t, http.StatusOK, resp.StatusCode)
   245  	})
   246  }
   247  
   248  func TestLag(t *testing.T) {
   249  	defer cluster.PanicHandler(t)
   250  	// Stop VTOrc because we want to stop replication to increase lag.
   251  	// We don't want VTOrc to fix this.
   252  	clusterInstance.DisableVTOrcRecoveries(t)
   253  	defer clusterInstance.EnableVTOrcRecoveries(t)
   254  
   255  	t.Run("stopping replication", func(t *testing.T) {
   256  		err := clusterInstance.VtctlclientProcess.ExecuteCommand("StopReplication", replicaTablet.Alias)
   257  		assert.NoError(t, err)
   258  	})
   259  	t.Run("accumulating lag, expecting throttler push back", func(t *testing.T) {
   260  		time.Sleep(2 * throttlerThreshold)
   261  
   262  		resp, err := throttleCheck(primaryTablet, false)
   263  		require.NoError(t, err)
   264  		defer resp.Body.Close()
   265  		assert.Equal(t, http.StatusTooManyRequests, resp.StatusCode)
   266  	})
   267  	t.Run("primary self-check should still be fine", func(t *testing.T) {
   268  		resp, err := throttleCheckSelf(primaryTablet)
   269  		require.NoError(t, err)
   270  		defer resp.Body.Close()
   271  		// self (on primary) is unaffected by replication lag
   272  		assert.Equal(t, http.StatusOK, resp.StatusCode)
   273  	})
   274  	t.Run("replica self-check should show error", func(t *testing.T) {
   275  		resp, err := throttleCheckSelf(replicaTablet)
   276  		require.NoError(t, err)
   277  		defer resp.Body.Close()
   278  		assert.Equal(t, http.StatusTooManyRequests, resp.StatusCode)
   279  	})
   280  	t.Run("starting replication", func(t *testing.T) {
   281  		err := clusterInstance.VtctlclientProcess.ExecuteCommand("StartReplication", replicaTablet.Alias)
   282  		assert.NoError(t, err)
   283  	})
   284  	t.Run("expecting replication to catch up and throttler check to return OK", func(t *testing.T) {
   285  		waitForThrottleCheckStatus(t, primaryTablet, http.StatusOK)
   286  	})
   287  	t.Run("primary self-check should be fine", func(t *testing.T) {
   288  		resp, err := throttleCheckSelf(primaryTablet)
   289  		require.NoError(t, err)
   290  		defer resp.Body.Close()
   291  		// self (on primary) is unaffected by replication lag
   292  		assert.Equal(t, http.StatusOK, resp.StatusCode)
   293  	})
   294  	t.Run("replica self-check should be fine", func(t *testing.T) {
   295  		resp, err := throttleCheckSelf(replicaTablet)
   296  		require.NoError(t, err)
   297  		defer resp.Body.Close()
   298  		assert.Equal(t, http.StatusOK, resp.StatusCode)
   299  	})
   300  }
   301  
   302  func TestNoReplicas(t *testing.T) {
   303  	defer cluster.PanicHandler(t)
   304  	t.Run("changing replica to RDONLY", func(t *testing.T) {
   305  		err := clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", replicaTablet.Alias, "RDONLY")
   306  		assert.NoError(t, err)
   307  
   308  		// This makes no REPLICA servers available. We expect something like:
   309  		// {"StatusCode":200,"Value":0,"Threshold":1,"Message":""}
   310  		waitForThrottleCheckStatus(t, primaryTablet, http.StatusOK)
   311  	})
   312  	t.Run("restoring to REPLICA", func(t *testing.T) {
   313  
   314  		err := clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", replicaTablet.Alias, "REPLICA")
   315  		assert.NoError(t, err)
   316  
   317  		waitForThrottleCheckStatus(t, primaryTablet, http.StatusOK)
   318  	})
   319  }