vitess.io/vitess@v0.16.2/go/vt/discovery/replicationlag_test.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package discovery
    18  
    19  import (
    20  	"fmt"
    21  	"testing"
    22  	"time"
    23  
    24  	"vitess.io/vitess/go/test/utils"
    25  
    26  	querypb "vitess.io/vitess/go/vt/proto/query"
    27  	"vitess.io/vitess/go/vt/topo"
    28  )
    29  
    30  func init() {
    31  	lowReplicationLag = 30 * time.Second
    32  	highReplicationLagMinServing = 2 * time.Hour
    33  	minNumTablets = 2
    34  	legacyReplicationLagAlgorithm = true
    35  }
    36  
    37  // testSetLegacyReplicationLagAlgorithm is a test helper function, if this is used by a production code path, something is wrong.
    38  func testSetLegacyReplicationLagAlgorithm(newLegacy bool) {
    39  	legacyReplicationLagAlgorithm = newLegacy
    40  }
    41  
    42  // testSetMinNumTablets is a test helper function, if this is used by a production code path, something is wrong.
    43  func testSetMinNumTablets(newMin int) {
    44  	minNumTablets = newMin
    45  }
    46  
    47  func TestFilterByReplicationLagUnhealthy(t *testing.T) {
    48  	// 1 healthy serving tablet, 1 not healthy
    49  	ts1 := &TabletHealth{
    50  		Tablet:  topo.NewTablet(1, "cell", "host1"),
    51  		Serving: true,
    52  		Stats:   &querypb.RealtimeStats{},
    53  	}
    54  	ts2 := &TabletHealth{
    55  		Tablet:  topo.NewTablet(2, "cell", "host2"),
    56  		Serving: false,
    57  		Stats:   &querypb.RealtimeStats{},
    58  	}
    59  	got := FilterStatsByReplicationLag([]*TabletHealth{ts1, ts2})
    60  	want := []*TabletHealth{ts1}
    61  	mustMatch(t, want, got, "FilterStatsByReplicationLag")
    62  }
    63  
    64  func TestFilterByReplicationLag(t *testing.T) {
    65  	// Use simplified logic
    66  	testSetLegacyReplicationLagAlgorithm(false)
    67  
    68  	cases := []struct {
    69  		description string
    70  		input       []uint32
    71  		output      []uint32
    72  	}{
    73  		{
    74  			"0 tablet",
    75  			[]uint32{},
    76  			[]uint32{},
    77  		},
    78  		{
    79  			"lags of (1s) - return all items with low lag.",
    80  			[]uint32{1},
    81  			[]uint32{1},
    82  		},
    83  		{
    84  			"lags of (1s, 1s, 1s, 30s) - return all items with low lag.",
    85  			[]uint32{1, 1, 1, 30},
    86  			[]uint32{1, 1, 1, 30},
    87  		},
    88  		{
    89  			"lags of (1s, 1s, 1s, 40m, 40m, 40m) - return all items with low lag.",
    90  			[]uint32{1, 1, 1, 40 * 60, 40 * 60, 40 * 60},
    91  			[]uint32{1, 1, 1},
    92  		},
    93  		{
    94  			"lags of (1s, 40m, 40m, 40m) - return at least 2 items if they don't have very high lag.",
    95  			[]uint32{1, 40 * 60, 40 * 60, 40 * 60},
    96  			[]uint32{1, 40 * 60},
    97  		},
    98  		{
    99  			"lags of (30m, 35m, 40m, 45m) - return at least 2 items if they don't have very high lag.",
   100  			[]uint32{30 * 60, 35 * 60, 40 * 60, 45 * 60},
   101  			[]uint32{30 * 60, 35 * 60},
   102  		},
   103  		{
   104  			"lags of (2h, 3h, 4h, 5h) - return <2 items if the others have very high lag.",
   105  			[]uint32{2 * 60 * 60, 3 * 60 * 60, 4 * 60 * 60, 5 * 60 * 60},
   106  			[]uint32{2 * 60 * 60},
   107  		},
   108  		{
   109  			"lags of (3h, 30h) - return nothing if all have very high lag.",
   110  			[]uint32{3 * 60 * 60, 30 * 60 * 60},
   111  			[]uint32{},
   112  		},
   113  	}
   114  
   115  	for _, tc := range cases {
   116  		lts := make([]*TabletHealth, len(tc.input))
   117  		for i, lag := range tc.input {
   118  			lts[i] = &TabletHealth{
   119  				Tablet:  topo.NewTablet(uint32(i+1), "cell", fmt.Sprintf("host-%vs-behind", lag)),
   120  				Serving: true,
   121  				Stats:   &querypb.RealtimeStats{ReplicationLagSeconds: lag},
   122  			}
   123  		}
   124  		got := FilterStatsByReplicationLag(lts)
   125  		if len(got) != len(tc.output) {
   126  			t.Errorf("FilterStatsByReplicationLag(%v) failed: got output:\n%v\nExpected: %v", tc.description, got, tc.output)
   127  			continue
   128  		}
   129  		for i, elag := range tc.output {
   130  			if got[i].Stats.ReplicationLagSeconds != elag {
   131  				t.Errorf("FilterStatsByReplicationLag(%v) failed: got output:\n%v\nExpected value index %v to be %v", tc.description, got, i, elag)
   132  			}
   133  		}
   134  	}
   135  
   136  	// Reset to the default
   137  	testSetLegacyReplicationLagAlgorithm(true)
   138  }
   139  
   140  func TestFilterByReplicationLagThreeTabletMin(t *testing.T) {
   141  	// Use at least 3 tablets if possible
   142  	testSetMinNumTablets(3)
   143  	// lags of (1s, 1s, 10m, 11m) - returns at least32 items where the slightly delayed ones that are returned are the 10m and 11m ones.
   144  	ts1 := &TabletHealth{
   145  		Tablet:  topo.NewTablet(1, "cell", "host1"),
   146  		Serving: true,
   147  		Stats:   &querypb.RealtimeStats{ReplicationLagSeconds: 1},
   148  	}
   149  	ts2 := &TabletHealth{
   150  		Tablet:  topo.NewTablet(2, "cell", "host2"),
   151  		Serving: true,
   152  		Stats:   &querypb.RealtimeStats{ReplicationLagSeconds: 1},
   153  	}
   154  	ts3 := &TabletHealth{
   155  		Tablet:  topo.NewTablet(3, "cell", "host3"),
   156  		Serving: true,
   157  		Stats:   &querypb.RealtimeStats{ReplicationLagSeconds: 10 * 60},
   158  	}
   159  	ts4 := &TabletHealth{
   160  		Tablet:  topo.NewTablet(4, "cell", "host4"),
   161  		Serving: true,
   162  		Stats:   &querypb.RealtimeStats{ReplicationLagSeconds: 11 * 60},
   163  	}
   164  	got := FilterStatsByReplicationLag([]*TabletHealth{ts1, ts2, ts3, ts4})
   165  	want := []*TabletHealth{ts1, ts2, ts3}
   166  	mustMatch(t, want, got, "FilterStatsByReplicationLag")
   167  
   168  	// lags of (11m, 10m, 1s, 1s) - reordered tablets returns the same 3 items where the slightly delayed one that is returned is the 10m and 11m ones.
   169  	ts1 = &TabletHealth{
   170  		Tablet:  topo.NewTablet(1, "cell", "host1"),
   171  		Serving: true,
   172  		Stats:   &querypb.RealtimeStats{ReplicationLagSeconds: 11 * 60},
   173  	}
   174  	ts2 = &TabletHealth{
   175  		Tablet:  topo.NewTablet(2, "cell", "host2"),
   176  		Serving: true,
   177  		Stats:   &querypb.RealtimeStats{ReplicationLagSeconds: 10 * 60},
   178  	}
   179  	ts3 = &TabletHealth{
   180  		Tablet:  topo.NewTablet(3, "cell", "host3"),
   181  		Serving: true,
   182  		Stats:   &querypb.RealtimeStats{ReplicationLagSeconds: 1},
   183  	}
   184  	ts4 = &TabletHealth{
   185  		Tablet:  topo.NewTablet(4, "cell", "host4"),
   186  		Serving: true,
   187  		Stats:   &querypb.RealtimeStats{ReplicationLagSeconds: 1},
   188  	}
   189  	got = FilterStatsByReplicationLag([]*TabletHealth{ts1, ts2, ts3, ts4})
   190  	want = []*TabletHealth{ts3, ts4, ts2}
   191  	mustMatch(t, want, got, "FilterStatsByReplicationLag")
   192  	// Reset to the default
   193  	testSetMinNumTablets(2)
   194  }
   195  
   196  func TestFilterStatsByReplicationLagOneTabletMin(t *testing.T) {
   197  	// Use at least 1 tablets if possible
   198  	testSetMinNumTablets(1)
   199  	// lags of (1s, 100m) - return only healthy tablet if that is all that is available.
   200  	ts1 := &TabletHealth{
   201  		Tablet:  topo.NewTablet(1, "cell", "host1"),
   202  		Serving: true,
   203  		Stats:   &querypb.RealtimeStats{ReplicationLagSeconds: 1},
   204  	}
   205  	ts2 := &TabletHealth{
   206  		Tablet:  topo.NewTablet(2, "cell", "host2"),
   207  		Serving: true,
   208  		Stats:   &querypb.RealtimeStats{ReplicationLagSeconds: 100 * 60},
   209  	}
   210  	got := FilterStatsByReplicationLag([]*TabletHealth{ts1, ts2})
   211  	want := []*TabletHealth{ts1}
   212  	mustMatch(t, want, got, "FilterStatsByReplicationLag")
   213  
   214  	// lags of (1m, 100m) - return only healthy tablet if that is all that is healthy enough.
   215  	ts1 = &TabletHealth{
   216  		Tablet:  topo.NewTablet(1, "cell", "host1"),
   217  		Serving: true,
   218  		Stats:   &querypb.RealtimeStats{ReplicationLagSeconds: 1 * 60},
   219  	}
   220  	ts2 = &TabletHealth{
   221  		Tablet:  topo.NewTablet(2, "cell", "host2"),
   222  		Serving: true,
   223  		Stats:   &querypb.RealtimeStats{ReplicationLagSeconds: 100 * 60},
   224  	}
   225  	got = FilterStatsByReplicationLag([]*TabletHealth{ts1, ts2})
   226  	want = []*TabletHealth{ts1}
   227  	utils.MustMatch(t, want, got, "FilterStatsByReplicationLag")
   228  	// Reset to the default
   229  	testSetMinNumTablets(2)
   230  }