github.com/matrixorigin/matrixone@v1.2.0/pkg/proxy/rebalancer_test.go (about)

     1  // Copyright 2021 - 2023 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package proxy
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"net"
    21  	"os"
    22  	"testing"
    23  	"time"
    24  
    25  	"github.com/lni/goutils/leaktest"
    26  	"github.com/matrixorigin/matrixone/pkg/clusterservice"
    27  	"github.com/matrixorigin/matrixone/pkg/common/log"
    28  	"github.com/matrixorigin/matrixone/pkg/common/runtime"
    29  	"github.com/matrixorigin/matrixone/pkg/common/stopper"
    30  	"github.com/matrixorigin/matrixone/pkg/pb/metadata"
    31  	"github.com/stretchr/testify/require"
    32  )
    33  
    34  func testRebalancer(
    35  	t *testing.T, st *stopper.Stopper, logger *log.MOLogger, mc clusterservice.MOCluster,
    36  ) *rebalancer {
    37  	var opts []rebalancerOption
    38  	opts = append(opts,
    39  		withRebalancerInterval(200*time.Millisecond),
    40  		withRebalancerTolerance(0.3),
    41  	)
    42  	re, err := newRebalancer(st, logger, mc, opts...)
    43  	require.NoError(t, err)
    44  	return re
    45  }
    46  
    47  func TestCollectTunnels(t *testing.T) {
    48  	rt := runtime.DefaultRuntime()
    49  	runtime.SetupProcessLevelRuntime(rt)
    50  	hc := &mockHAKeeperClient{}
    51  	mc := clusterservice.NewMOCluster(hc, 3*time.Second)
    52  	defer mc.Close()
    53  	rt.SetGlobalVariables(runtime.ClusterService, mc)
    54  	logger := rt.Logger()
    55  	st := stopper.NewStopper("test-proxy", stopper.WithLogger(rt.Logger().RawLogger()))
    56  	defer st.Stop()
    57  	ha := LabelHash("hash1")
    58  	reqLabel := newLabelInfo("t1", map[string]string{
    59  		"k1": "v1",
    60  		"k2": "v2",
    61  	})
    62  	cnLabels := map[string]metadata.LabelList{
    63  		tenantLabelKey: {Labels: []string{"t1"}},
    64  		"k1":           {Labels: []string{"v1"}},
    65  		"k2":           {Labels: []string{"v2"}},
    66  	}
    67  
    68  	cn11 := prepareCN("cn11", hc, ha, reqLabel, cnLabels)
    69  	cn12 := prepareCN("cn12", hc, ha, reqLabel, cnLabels)
    70  	_ = prepareCN("cn13", hc, ha, reqLabel, cnLabels)
    71  	mc.ForceRefresh(true)
    72  
    73  	t.Run("tolerance-0.1", func(t *testing.T) {
    74  		ctx, cancel := context.WithCancel(context.TODO())
    75  		defer cancel()
    76  
    77  		re := testRebalancer(t, st, logger, mc)
    78  		re.tolerance = 0.1
    79  		tu1 := newTunnel(ctx, logger, nil)
    80  		re.connManager.connect(cn11, tu1)
    81  		tu2 := newTunnel(ctx, logger, nil)
    82  		re.connManager.connect(cn11, tu2)
    83  		tu3 := newTunnel(ctx, logger, nil)
    84  		re.connManager.connect(cn11, tu3)
    85  		tu4 := newTunnel(ctx, logger, nil)
    86  		re.connManager.connect(cn11, tu4)
    87  		tu5 := newTunnel(ctx, logger, nil)
    88  		re.connManager.connect(cn12, tu5)
    89  		require.Equal(t, 2, len(re.collectTunnels(ha)))
    90  	})
    91  
    92  	t.Run("tolerance-0.3", func(t *testing.T) {
    93  		ctx, cancel := context.WithCancel(context.TODO())
    94  		defer cancel()
    95  
    96  		re := testRebalancer(t, st, logger, mc)
    97  		re.tolerance = 0.3
    98  		tu1 := newTunnel(ctx, logger, nil)
    99  		re.connManager.connect(cn11, tu1)
   100  		tu2 := newTunnel(ctx, logger, nil)
   101  		re.connManager.connect(cn11, tu2)
   102  		tu3 := newTunnel(ctx, logger, nil)
   103  		re.connManager.connect(cn11, tu3)
   104  		tu4 := newTunnel(ctx, logger, nil)
   105  		re.connManager.connect(cn11, tu4)
   106  		require.Equal(t, 2, len(re.collectTunnels(ha)))
   107  	})
   108  
   109  	t.Run("tolerance-0.8", func(t *testing.T) {
   110  		ctx, cancel := context.WithCancel(context.TODO())
   111  		defer cancel()
   112  
   113  		re := testRebalancer(t, st, logger, mc)
   114  		re.tolerance = 0.8
   115  		tu1 := newTunnel(ctx, logger, nil)
   116  		re.connManager.connect(cn11, tu1)
   117  		tu2 := newTunnel(ctx, logger, nil)
   118  		re.connManager.connect(cn11, tu2)
   119  		tu3 := newTunnel(ctx, logger, nil)
   120  		re.connManager.connect(cn11, tu3)
   121  		tu4 := newTunnel(ctx, logger, nil)
   122  		re.connManager.connect(cn11, tu4)
   123  		require.Equal(t, 1, len(re.collectTunnels(ha)))
   124  	})
   125  }
   126  
   127  func TestCollectTunnels_Mixed(t *testing.T) {
   128  	rt := runtime.DefaultRuntime()
   129  	runtime.SetupProcessLevelRuntime(rt)
   130  	logger := rt.Logger()
   131  	st := stopper.NewStopper("test-proxy", stopper.WithLogger(rt.Logger().RawLogger()))
   132  	defer st.Stop()
   133  	ha := LabelHash("hash1")
   134  	cs := newCounterSet()
   135  	reqLabel := newLabelInfo("t1", map[string]string{
   136  		"k1": "v1",
   137  	})
   138  	cnLabels := map[string]metadata.LabelList{
   139  		tenantLabelKey: {Labels: []string{"t1"}},
   140  		"k1":           {Labels: []string{"v1"}},
   141  	}
   142  
   143  	t.Run("balance-in-shared", func(t *testing.T) {
   144  		ctx, cancel := context.WithCancel(context.TODO())
   145  		defer cancel()
   146  		hc := &mockHAKeeperClient{}
   147  		mc := clusterservice.NewMOCluster(hc, 3*time.Second)
   148  		defer mc.Close()
   149  		rt.SetGlobalVariables(runtime.ClusterService, mc)
   150  
   151  		shared01 := prepareCN("shared01", hc, ha, reqLabel, nil)
   152  		shared02 := prepareCN("shared02", hc, ha, reqLabel, nil)
   153  		mc.ForceRefresh(true)
   154  
   155  		re := testRebalancer(t, st, logger, mc)
   156  		re.tolerance = 0
   157  		tu1 := newTunnel(ctx, logger, cs)
   158  		re.connManager.connect(shared01, tu1)
   159  		tu2 := newTunnel(ctx, logger, cs)
   160  		re.connManager.connect(shared01, tu2)
   161  		tu3 := newTunnel(ctx, logger, cs)
   162  		re.connManager.connect(shared01, tu3)
   163  		tu4 := newTunnel(ctx, logger, cs)
   164  		re.connManager.connect(shared02, tu4)
   165  		require.Equal(t, 1, len(re.collectTunnels(ha)))
   166  	})
   167  
   168  	t.Run("balance-in-selected", func(t *testing.T) {
   169  		ctx, cancel := context.WithCancel(context.TODO())
   170  		defer cancel()
   171  		hc := &mockHAKeeperClient{}
   172  		mc := clusterservice.NewMOCluster(hc, 3*time.Second)
   173  		defer mc.Close()
   174  		rt.SetGlobalVariables(runtime.ClusterService, mc)
   175  
   176  		_ = prepareCN("shared01", hc, ha, reqLabel, nil)
   177  		_ = prepareCN("shared02", hc, ha, reqLabel, nil)
   178  		tenant01 := prepareCN("tenant01", hc, ha, reqLabel, cnLabels)
   179  		tenant02 := prepareCN("tenant02", hc, ha, reqLabel, cnLabels)
   180  		mc.ForceRefresh(true)
   181  
   182  		re := testRebalancer(t, st, logger, mc)
   183  		re.tolerance = 0
   184  		tu1 := newTunnel(ctx, logger, cs)
   185  		re.connManager.connect(tenant01, tu1)
   186  		tu2 := newTunnel(ctx, logger, cs)
   187  		re.connManager.connect(tenant01, tu2)
   188  		tu3 := newTunnel(ctx, logger, cs)
   189  		re.connManager.connect(tenant01, tu3)
   190  		tu4 := newTunnel(ctx, logger, cs)
   191  		re.connManager.connect(tenant02, tu4)
   192  		require.Equal(t, 1, len(re.collectTunnels(ha)))
   193  	})
   194  
   195  	t.Run("migrate-tunnels-to-selected", func(t *testing.T) {
   196  		ctx, cancel := context.WithCancel(context.TODO())
   197  		defer cancel()
   198  		hc := &mockHAKeeperClient{}
   199  		mc := clusterservice.NewMOCluster(hc, 3*time.Second)
   200  		defer mc.Close()
   201  		rt.SetGlobalVariables(runtime.ClusterService, mc)
   202  
   203  		shared01 := prepareCN("shared01", hc, ha, reqLabel, nil)
   204  		shared02 := prepareCN("shared02", hc, ha, reqLabel, nil)
   205  		_ = prepareCN("tenant01", hc, ha, reqLabel, cnLabels)
   206  		mc.ForceRefresh(true)
   207  
   208  		re := testRebalancer(t, st, logger, mc)
   209  		re.tolerance = 0
   210  		tu1 := newTunnel(ctx, logger, cs)
   211  		re.connManager.connect(shared01, tu1)
   212  		tu2 := newTunnel(ctx, logger, cs)
   213  		re.connManager.connect(shared01, tu2)
   214  		tu3 := newTunnel(ctx, logger, cs)
   215  		re.connManager.connect(shared02, tu3)
   216  		tu4 := newTunnel(ctx, logger, cs)
   217  		re.connManager.connect(shared02, tu4)
   218  		require.Equal(t, 4, len(re.collectTunnels(ha)))
   219  	})
   220  
   221  	t.Run("mixed-tunnels", func(t *testing.T) {
   222  		ctx, cancel := context.WithCancel(context.TODO())
   223  		defer cancel()
   224  		hc := &mockHAKeeperClient{}
   225  		mc := clusterservice.NewMOCluster(hc, 3*time.Second)
   226  		defer mc.Close()
   227  		rt.SetGlobalVariables(runtime.ClusterService, mc)
   228  
   229  		shared01 := prepareCN("shared01", hc, ha, reqLabel, nil)
   230  		_ = prepareCN("shared02", hc, ha, reqLabel, nil)
   231  		tenant01 := prepareCN("tenant01", hc, ha, reqLabel, cnLabels)
   232  		tenant02 := prepareCN("tenant02", hc, ha, reqLabel, cnLabels)
   233  		mc.ForceRefresh(true)
   234  
   235  		re := testRebalancer(t, st, logger, mc)
   236  		re.tolerance = 0
   237  		tu1 := newTunnel(ctx, logger, cs)
   238  		re.connManager.connect(tenant01, tu1)
   239  		tu2 := newTunnel(ctx, logger, cs)
   240  		re.connManager.connect(tenant01, tu2)
   241  		tu3 := newTunnel(ctx, logger, cs)
   242  		re.connManager.connect(tenant02, tu3)
   243  		tu4 := newTunnel(ctx, logger, cs)
   244  		re.connManager.connect(shared01, tu4)
   245  		// tenant01 2 connections
   246  		// tenant02 1 connection
   247  		// shared01 1 connection
   248  		// shared02 0 connection
   249  		// expect migrating tu4 to tenant02 in this case
   250  		tunnels := re.collectTunnels(ha)
   251  		require.Equal(t, 1, len(tunnels))
   252  		require.Equal(t, tu4, tunnels[0])
   253  	})
   254  }
   255  
   256  func TestDoRebalance(t *testing.T) {
   257  	defer leaktest.AfterTest(t)()
   258  
   259  	var err error
   260  	tp := newTestProxyHandler(t)
   261  	defer tp.closeFn()
   262  
   263  	temp := os.TempDir()
   264  	// Construct backend CN servers.
   265  	addr1 := fmt.Sprintf("%s/%d.sock", temp, time.Now().Nanosecond())
   266  	require.NoError(t, os.RemoveAll(addr1))
   267  	cn11 := testMakeCNServer("cn11", addr1, 0, "",
   268  		newLabelInfo("t1", map[string]string{
   269  			"k1": "v1",
   270  			"k2": "v2",
   271  		}),
   272  	)
   273  	li := labelInfo{
   274  		Tenant: "t1",
   275  		Labels: map[string]string{
   276  			"k1": "v1",
   277  			"k2": "v2",
   278  		},
   279  	}
   280  	cn11.hash, err = li.getHash()
   281  	require.NoError(t, err)
   282  	tp.hc.updateCN("cn11", cn11.addr, map[string]metadata.LabelList{
   283  		tenantLabelKey: {Labels: []string{"t1"}},
   284  		"k1":           {Labels: []string{"v1"}},
   285  		"k2":           {Labels: []string{"v2"}},
   286  	})
   287  	stopFn11 := startTestCNServer(t, tp.ctx, addr1, nil)
   288  	defer func() {
   289  		require.NoError(t, stopFn11())
   290  	}()
   291  
   292  	addr2 := fmt.Sprintf("%s/%d.sock", temp, time.Now().Nanosecond())
   293  	require.NoError(t, os.RemoveAll(addr2))
   294  	cn12 := testMakeCNServer("cn12", addr2, 0, "",
   295  		newLabelInfo("t1", map[string]string{
   296  			"k1": "v1",
   297  			"k2": "v2",
   298  		}),
   299  	)
   300  	cn12.hash, err = li.getHash()
   301  	require.NoError(t, err)
   302  	tp.hc.updateCN("cn12", cn12.addr, map[string]metadata.LabelList{
   303  		tenantLabelKey: {Labels: []string{"t1"}},
   304  		"k1":           {Labels: []string{"v1"}},
   305  		"k2":           {Labels: []string{"v2"}},
   306  	})
   307  	stopFn12 := startTestCNServer(t, tp.ctx, addr2, nil)
   308  	defer func() {
   309  		require.NoError(t, stopFn12())
   310  	}()
   311  	tp.mc.ForceRefresh(true)
   312  
   313  	ctx, cancel := context.WithTimeout(tp.ctx, 10*time.Second)
   314  	defer cancel()
   315  
   316  	ci := clientInfo{
   317  		labelInfo: li,
   318  		username:  "test",
   319  		originIP:  net.ParseIP("127.0.0.1"),
   320  	}
   321  	// There 2 servers cn11 and cn12. 4 connections are all on cn11, and the
   322  	// toleration is 0.3, so there will be 3 connections on cn11 and 1 connection
   323  	// on cn12 at last.
   324  	cleanup := testStartNClients(t, tp, ci, cn11, 4)
   325  	defer cleanup()
   326  
   327  	tick := time.NewTicker(time.Millisecond * 200)
   328  	for {
   329  		select {
   330  		case <-ctx.Done():
   331  			require.Fail(t, "rebalance failed")
   332  		case <-tick.C:
   333  			tunnels := tp.re.connManager.getCNTunnels(cn11.hash)
   334  			tp.re.connManager.Lock()
   335  			if tunnels["cn11"].count() == 3 && tunnels["cn12"].count() == 1 {
   336  				tp.re.connManager.Unlock()
   337  				return
   338  			}
   339  			tp.re.connManager.Unlock()
   340  		}
   341  	}
   342  }
   343  
   344  func prepareCN(uid string, hc *mockHAKeeperClient, ha LabelHash, reLabels labelInfo, cnLabels map[string]metadata.LabelList) *CNServer {
   345  	temp := os.TempDir()
   346  	addr := fmt.Sprintf("%s/%d.sock", temp, time.Now().Nanosecond())
   347  	_ = os.RemoveAll(addr)
   348  	cn := testMakeCNServer(uid, addr, 0, ha, reLabels)
   349  	hc.updateCN(uid, cn.addr, cnLabels)
   350  	return cn
   351  }