github.com/matrixorigin/matrixone@v1.2.0/pkg/proxy/rebalancer_test.go (about) 1 // Copyright 2021 - 2023 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package proxy 16 17 import ( 18 "context" 19 "fmt" 20 "net" 21 "os" 22 "testing" 23 "time" 24 25 "github.com/lni/goutils/leaktest" 26 "github.com/matrixorigin/matrixone/pkg/clusterservice" 27 "github.com/matrixorigin/matrixone/pkg/common/log" 28 "github.com/matrixorigin/matrixone/pkg/common/runtime" 29 "github.com/matrixorigin/matrixone/pkg/common/stopper" 30 "github.com/matrixorigin/matrixone/pkg/pb/metadata" 31 "github.com/stretchr/testify/require" 32 ) 33 34 func testRebalancer( 35 t *testing.T, st *stopper.Stopper, logger *log.MOLogger, mc clusterservice.MOCluster, 36 ) *rebalancer { 37 var opts []rebalancerOption 38 opts = append(opts, 39 withRebalancerInterval(200*time.Millisecond), 40 withRebalancerTolerance(0.3), 41 ) 42 re, err := newRebalancer(st, logger, mc, opts...) 43 require.NoError(t, err) 44 return re 45 } 46 47 func TestCollectTunnels(t *testing.T) { 48 rt := runtime.DefaultRuntime() 49 runtime.SetupProcessLevelRuntime(rt) 50 hc := &mockHAKeeperClient{} 51 mc := clusterservice.NewMOCluster(hc, 3*time.Second) 52 defer mc.Close() 53 rt.SetGlobalVariables(runtime.ClusterService, mc) 54 logger := rt.Logger() 55 st := stopper.NewStopper("test-proxy", stopper.WithLogger(rt.Logger().RawLogger())) 56 defer st.Stop() 57 ha := LabelHash("hash1") 58 reqLabel := newLabelInfo("t1", map[string]string{ 59 "k1": "v1", 60 "k2": "v2", 61 }) 62 cnLabels := map[string]metadata.LabelList{ 63 tenantLabelKey: {Labels: []string{"t1"}}, 64 "k1": {Labels: []string{"v1"}}, 65 "k2": {Labels: []string{"v2"}}, 66 } 67 68 cn11 := prepareCN("cn11", hc, ha, reqLabel, cnLabels) 69 cn12 := prepareCN("cn12", hc, ha, reqLabel, cnLabels) 70 _ = prepareCN("cn13", hc, ha, reqLabel, cnLabels) 71 mc.ForceRefresh(true) 72 73 t.Run("tolerance-0.1", func(t *testing.T) { 74 ctx, cancel := context.WithCancel(context.TODO()) 75 defer cancel() 76 77 re := testRebalancer(t, st, logger, mc) 78 re.tolerance = 0.1 79 tu1 := newTunnel(ctx, logger, nil) 80 re.connManager.connect(cn11, tu1) 81 tu2 := newTunnel(ctx, logger, nil) 82 re.connManager.connect(cn11, tu2) 83 tu3 := newTunnel(ctx, logger, nil) 84 re.connManager.connect(cn11, tu3) 85 tu4 := newTunnel(ctx, logger, nil) 86 re.connManager.connect(cn11, tu4) 87 tu5 := newTunnel(ctx, logger, nil) 88 re.connManager.connect(cn12, tu5) 89 require.Equal(t, 2, len(re.collectTunnels(ha))) 90 }) 91 92 t.Run("tolerance-0.3", func(t *testing.T) { 93 ctx, cancel := context.WithCancel(context.TODO()) 94 defer cancel() 95 96 re := testRebalancer(t, st, logger, mc) 97 re.tolerance = 0.3 98 tu1 := newTunnel(ctx, logger, nil) 99 re.connManager.connect(cn11, tu1) 100 tu2 := newTunnel(ctx, logger, nil) 101 re.connManager.connect(cn11, tu2) 102 tu3 := newTunnel(ctx, logger, nil) 103 re.connManager.connect(cn11, tu3) 104 tu4 := newTunnel(ctx, logger, nil) 105 re.connManager.connect(cn11, tu4) 106 require.Equal(t, 2, len(re.collectTunnels(ha))) 107 }) 108 109 t.Run("tolerance-0.8", func(t *testing.T) { 110 ctx, cancel := context.WithCancel(context.TODO()) 111 defer cancel() 112 113 re := testRebalancer(t, st, logger, mc) 114 re.tolerance = 0.8 115 tu1 := newTunnel(ctx, logger, nil) 116 re.connManager.connect(cn11, tu1) 117 tu2 := newTunnel(ctx, logger, nil) 118 re.connManager.connect(cn11, tu2) 119 tu3 := newTunnel(ctx, logger, nil) 120 re.connManager.connect(cn11, tu3) 121 tu4 := newTunnel(ctx, logger, nil) 122 re.connManager.connect(cn11, tu4) 123 require.Equal(t, 1, len(re.collectTunnels(ha))) 124 }) 125 } 126 127 func TestCollectTunnels_Mixed(t *testing.T) { 128 rt := runtime.DefaultRuntime() 129 runtime.SetupProcessLevelRuntime(rt) 130 logger := rt.Logger() 131 st := stopper.NewStopper("test-proxy", stopper.WithLogger(rt.Logger().RawLogger())) 132 defer st.Stop() 133 ha := LabelHash("hash1") 134 cs := newCounterSet() 135 reqLabel := newLabelInfo("t1", map[string]string{ 136 "k1": "v1", 137 }) 138 cnLabels := map[string]metadata.LabelList{ 139 tenantLabelKey: {Labels: []string{"t1"}}, 140 "k1": {Labels: []string{"v1"}}, 141 } 142 143 t.Run("balance-in-shared", func(t *testing.T) { 144 ctx, cancel := context.WithCancel(context.TODO()) 145 defer cancel() 146 hc := &mockHAKeeperClient{} 147 mc := clusterservice.NewMOCluster(hc, 3*time.Second) 148 defer mc.Close() 149 rt.SetGlobalVariables(runtime.ClusterService, mc) 150 151 shared01 := prepareCN("shared01", hc, ha, reqLabel, nil) 152 shared02 := prepareCN("shared02", hc, ha, reqLabel, nil) 153 mc.ForceRefresh(true) 154 155 re := testRebalancer(t, st, logger, mc) 156 re.tolerance = 0 157 tu1 := newTunnel(ctx, logger, cs) 158 re.connManager.connect(shared01, tu1) 159 tu2 := newTunnel(ctx, logger, cs) 160 re.connManager.connect(shared01, tu2) 161 tu3 := newTunnel(ctx, logger, cs) 162 re.connManager.connect(shared01, tu3) 163 tu4 := newTunnel(ctx, logger, cs) 164 re.connManager.connect(shared02, tu4) 165 require.Equal(t, 1, len(re.collectTunnels(ha))) 166 }) 167 168 t.Run("balance-in-selected", func(t *testing.T) { 169 ctx, cancel := context.WithCancel(context.TODO()) 170 defer cancel() 171 hc := &mockHAKeeperClient{} 172 mc := clusterservice.NewMOCluster(hc, 3*time.Second) 173 defer mc.Close() 174 rt.SetGlobalVariables(runtime.ClusterService, mc) 175 176 _ = prepareCN("shared01", hc, ha, reqLabel, nil) 177 _ = prepareCN("shared02", hc, ha, reqLabel, nil) 178 tenant01 := prepareCN("tenant01", hc, ha, reqLabel, cnLabels) 179 tenant02 := prepareCN("tenant02", hc, ha, reqLabel, cnLabels) 180 mc.ForceRefresh(true) 181 182 re := testRebalancer(t, st, logger, mc) 183 re.tolerance = 0 184 tu1 := newTunnel(ctx, logger, cs) 185 re.connManager.connect(tenant01, tu1) 186 tu2 := newTunnel(ctx, logger, cs) 187 re.connManager.connect(tenant01, tu2) 188 tu3 := newTunnel(ctx, logger, cs) 189 re.connManager.connect(tenant01, tu3) 190 tu4 := newTunnel(ctx, logger, cs) 191 re.connManager.connect(tenant02, tu4) 192 require.Equal(t, 1, len(re.collectTunnels(ha))) 193 }) 194 195 t.Run("migrate-tunnels-to-selected", func(t *testing.T) { 196 ctx, cancel := context.WithCancel(context.TODO()) 197 defer cancel() 198 hc := &mockHAKeeperClient{} 199 mc := clusterservice.NewMOCluster(hc, 3*time.Second) 200 defer mc.Close() 201 rt.SetGlobalVariables(runtime.ClusterService, mc) 202 203 shared01 := prepareCN("shared01", hc, ha, reqLabel, nil) 204 shared02 := prepareCN("shared02", hc, ha, reqLabel, nil) 205 _ = prepareCN("tenant01", hc, ha, reqLabel, cnLabels) 206 mc.ForceRefresh(true) 207 208 re := testRebalancer(t, st, logger, mc) 209 re.tolerance = 0 210 tu1 := newTunnel(ctx, logger, cs) 211 re.connManager.connect(shared01, tu1) 212 tu2 := newTunnel(ctx, logger, cs) 213 re.connManager.connect(shared01, tu2) 214 tu3 := newTunnel(ctx, logger, cs) 215 re.connManager.connect(shared02, tu3) 216 tu4 := newTunnel(ctx, logger, cs) 217 re.connManager.connect(shared02, tu4) 218 require.Equal(t, 4, len(re.collectTunnels(ha))) 219 }) 220 221 t.Run("mixed-tunnels", func(t *testing.T) { 222 ctx, cancel := context.WithCancel(context.TODO()) 223 defer cancel() 224 hc := &mockHAKeeperClient{} 225 mc := clusterservice.NewMOCluster(hc, 3*time.Second) 226 defer mc.Close() 227 rt.SetGlobalVariables(runtime.ClusterService, mc) 228 229 shared01 := prepareCN("shared01", hc, ha, reqLabel, nil) 230 _ = prepareCN("shared02", hc, ha, reqLabel, nil) 231 tenant01 := prepareCN("tenant01", hc, ha, reqLabel, cnLabels) 232 tenant02 := prepareCN("tenant02", hc, ha, reqLabel, cnLabels) 233 mc.ForceRefresh(true) 234 235 re := testRebalancer(t, st, logger, mc) 236 re.tolerance = 0 237 tu1 := newTunnel(ctx, logger, cs) 238 re.connManager.connect(tenant01, tu1) 239 tu2 := newTunnel(ctx, logger, cs) 240 re.connManager.connect(tenant01, tu2) 241 tu3 := newTunnel(ctx, logger, cs) 242 re.connManager.connect(tenant02, tu3) 243 tu4 := newTunnel(ctx, logger, cs) 244 re.connManager.connect(shared01, tu4) 245 // tenant01 2 connections 246 // tenant02 1 connection 247 // shared01 1 connection 248 // shared02 0 connection 249 // expect migrating tu4 to tenant02 in this case 250 tunnels := re.collectTunnels(ha) 251 require.Equal(t, 1, len(tunnels)) 252 require.Equal(t, tu4, tunnels[0]) 253 }) 254 } 255 256 func TestDoRebalance(t *testing.T) { 257 defer leaktest.AfterTest(t)() 258 259 var err error 260 tp := newTestProxyHandler(t) 261 defer tp.closeFn() 262 263 temp := os.TempDir() 264 // Construct backend CN servers. 265 addr1 := fmt.Sprintf("%s/%d.sock", temp, time.Now().Nanosecond()) 266 require.NoError(t, os.RemoveAll(addr1)) 267 cn11 := testMakeCNServer("cn11", addr1, 0, "", 268 newLabelInfo("t1", map[string]string{ 269 "k1": "v1", 270 "k2": "v2", 271 }), 272 ) 273 li := labelInfo{ 274 Tenant: "t1", 275 Labels: map[string]string{ 276 "k1": "v1", 277 "k2": "v2", 278 }, 279 } 280 cn11.hash, err = li.getHash() 281 require.NoError(t, err) 282 tp.hc.updateCN("cn11", cn11.addr, map[string]metadata.LabelList{ 283 tenantLabelKey: {Labels: []string{"t1"}}, 284 "k1": {Labels: []string{"v1"}}, 285 "k2": {Labels: []string{"v2"}}, 286 }) 287 stopFn11 := startTestCNServer(t, tp.ctx, addr1, nil) 288 defer func() { 289 require.NoError(t, stopFn11()) 290 }() 291 292 addr2 := fmt.Sprintf("%s/%d.sock", temp, time.Now().Nanosecond()) 293 require.NoError(t, os.RemoveAll(addr2)) 294 cn12 := testMakeCNServer("cn12", addr2, 0, "", 295 newLabelInfo("t1", map[string]string{ 296 "k1": "v1", 297 "k2": "v2", 298 }), 299 ) 300 cn12.hash, err = li.getHash() 301 require.NoError(t, err) 302 tp.hc.updateCN("cn12", cn12.addr, map[string]metadata.LabelList{ 303 tenantLabelKey: {Labels: []string{"t1"}}, 304 "k1": {Labels: []string{"v1"}}, 305 "k2": {Labels: []string{"v2"}}, 306 }) 307 stopFn12 := startTestCNServer(t, tp.ctx, addr2, nil) 308 defer func() { 309 require.NoError(t, stopFn12()) 310 }() 311 tp.mc.ForceRefresh(true) 312 313 ctx, cancel := context.WithTimeout(tp.ctx, 10*time.Second) 314 defer cancel() 315 316 ci := clientInfo{ 317 labelInfo: li, 318 username: "test", 319 originIP: net.ParseIP("127.0.0.1"), 320 } 321 // There 2 servers cn11 and cn12. 4 connections are all on cn11, and the 322 // toleration is 0.3, so there will be 3 connections on cn11 and 1 connection 323 // on cn12 at last. 324 cleanup := testStartNClients(t, tp, ci, cn11, 4) 325 defer cleanup() 326 327 tick := time.NewTicker(time.Millisecond * 200) 328 for { 329 select { 330 case <-ctx.Done(): 331 require.Fail(t, "rebalance failed") 332 case <-tick.C: 333 tunnels := tp.re.connManager.getCNTunnels(cn11.hash) 334 tp.re.connManager.Lock() 335 if tunnels["cn11"].count() == 3 && tunnels["cn12"].count() == 1 { 336 tp.re.connManager.Unlock() 337 return 338 } 339 tp.re.connManager.Unlock() 340 } 341 } 342 } 343 344 func prepareCN(uid string, hc *mockHAKeeperClient, ha LabelHash, reLabels labelInfo, cnLabels map[string]metadata.LabelList) *CNServer { 345 temp := os.TempDir() 346 addr := fmt.Sprintf("%s/%d.sock", temp, time.Now().Nanosecond()) 347 _ = os.RemoveAll(addr) 348 cn := testMakeCNServer(uid, addr, 0, ha, reLabels) 349 hc.updateCN(uid, cn.addr, cnLabels) 350 return cn 351 }