github.com/matrixorigin/matrixone@v1.2.0/pkg/proxy/rebalancer.go (about) 1 // Copyright 2021 - 2023 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package proxy 16 17 import ( 18 "context" 19 "math" 20 "sync" 21 "time" 22 23 "github.com/matrixorigin/matrixone/pkg/clusterservice" 24 "github.com/matrixorigin/matrixone/pkg/common/log" 25 "github.com/matrixorigin/matrixone/pkg/common/moerr" 26 "github.com/matrixorigin/matrixone/pkg/common/stopper" 27 "github.com/matrixorigin/matrixone/pkg/pb/metadata" 28 v2 "github.com/matrixorigin/matrixone/pkg/util/metric/v2" 29 "github.com/matrixorigin/matrixone/pkg/vm/engine/disttae/route" 30 "go.uber.org/zap" 31 ) 32 33 const ( 34 // The default rebalancer queue size is 1024. 35 defaultQueueSize = 1024 36 ) 37 38 type rebalancer struct { 39 stopper *stopper.Stopper 40 logger *log.MOLogger 41 // mc is MO-Cluster instance, which is used to get CN servers. 42 mc clusterservice.MOCluster 43 // connManager is used to track the connections on the CN servers. 44 connManager *connManager 45 // scaling is used to scale in the CN servers gracefully. 46 scaling *scaling 47 // queue takes the tunnels which need to do migration. 48 queue chan *tunnel 49 mu struct { 50 sync.Mutex 51 // inflight is the tunnels which are in the queue or is being 52 // rebalanced. The same tunnel should not be added to the queue. 53 inflight map[*tunnel]struct{} 54 } 55 // If disabled is true, rebalance does nothing. 56 disabled bool 57 // interval indicates that how often the rebalance is act. 58 interval time.Duration 59 // tolerance is the tolerance that is used to calculate tunnels need 60 // to migrate to other CN servers. For example, if tolerance is 0.3, 61 // and the average of tunnels is 10, then if there are 15 tunnels on 62 // a CN server, 2 tunnels will do migration. 63 tolerance float64 64 } 65 66 // rebalancerOption defines the function to set options of rebalancer. 67 type rebalancerOption func(*rebalancer) 68 69 // withRebalancerDisabled sets if rebalancer is disabled. 70 func withRebalancerDisabled() rebalancerOption { 71 return func(r *rebalancer) { 72 r.disabled = true 73 } 74 } 75 76 // withRebalancerInterval sets the interval 77 func withRebalancerInterval(interval time.Duration) rebalancerOption { 78 return func(r *rebalancer) { 79 r.interval = interval 80 } 81 } 82 83 // withRebalancerTolerance sets the tolerance of rebalancer. 84 func withRebalancerTolerance(tolerance float64) rebalancerOption { 85 return func(r *rebalancer) { 86 r.tolerance = tolerance 87 } 88 } 89 90 // newRebalancer creates a new rebalancer. 91 func newRebalancer( 92 stopper *stopper.Stopper, logger *log.MOLogger, mc clusterservice.MOCluster, opts ...rebalancerOption, 93 ) (*rebalancer, error) { 94 r := &rebalancer{ 95 stopper: stopper, 96 logger: logger, 97 connManager: newConnManager(), 98 mc: mc, 99 queue: make(chan *tunnel, defaultQueueSize), 100 } 101 r.mu.inflight = make(map[*tunnel]struct{}) 102 for _, opt := range opts { 103 opt(r) 104 } 105 r.scaling = newScaling(r.connManager, r.queue, mc, logger, r.disabled) 106 107 // Starts the transfer go-routine to handle the transfer request. 108 if err := r.stopper.RunNamedTask("rebalaner-transfer", r.handleTransfer); err != nil { 109 return nil, err 110 } 111 // Starts the runner go-routine to check the tunnels that need to transfer. 112 if err := r.stopper.RunNamedTask("rebalancer-runner", r.run); err != nil { 113 return nil, err 114 } 115 // Starts the scaling go-routine to check the CN service that need to do scaling. 116 if err := r.stopper.RunNamedTask("scaling", r.scaling.run); err != nil { 117 return nil, err 118 } 119 return r, nil 120 } 121 122 // run begins the loop to check if there are any connections need to 123 // be rebalanced. 124 func (r *rebalancer) run(ctx context.Context) { 125 ticker := time.NewTicker(r.interval) 126 defer ticker.Stop() 127 for { 128 select { 129 case <-ticker.C: 130 r.doRebalance() 131 case <-ctx.Done(): 132 r.logger.Info("rebalancer runner ended") 133 return 134 } 135 } 136 } 137 138 // doRebalance do the real rebalance work by tenants. 139 func (r *rebalancer) doRebalance() { 140 // Re-balance is disabled, nothing to do. 141 if r.disabled { 142 return 143 } 144 hashes := r.connManager.getLabelHashes() 145 for _, h := range hashes { 146 r.rebalanceByHash(h) 147 } 148 } 149 150 func (r *rebalancer) rebalanceByHash(hash LabelHash) { 151 // Collect the tunnels that need to migrate. 152 tuns := r.collectTunnels(hash) 153 v2.ProxyConnectionsNeedToTransferGauge.Set(float64(len(tuns))) 154 155 r.mu.Lock() 156 defer r.mu.Unlock() 157 // Put the tunnels to the queue. 158 for _, t := range tuns { 159 // If the tunnel is inflight, do NOT enqueue it. 160 _, ok := r.mu.inflight[t] 161 if ok { 162 continue 163 } 164 165 select { 166 case r.queue <- t: 167 r.mu.inflight[t] = struct{}{} 168 default: 169 r.logger.Info("rebalance queue is full") 170 } 171 } 172 } 173 174 func (r *rebalancer) collectTunnels(hash LabelHash) []*tunnel { 175 // get CN servers from mocluster for this label. 176 li := r.connManager.getLabelInfo(hash) 177 // CNs are the CN server UUIDs that match the given labelHash 178 cns := make(map[string]struct{}) 179 // emptyCNs are the fallback CN UUIDs that used to serve the connections when there is no CN match the label hash 180 emptyCNs := make(map[string]struct{}) 181 182 notEmptyCns := make(map[string]struct{}) 183 184 selector := li.genSelector(clusterservice.EQ_Globbing) 185 appendFn := func(s *metadata.CNService) { 186 cns[s.ServiceID] = struct{}{} 187 if len(s.Labels) > 0 { 188 notEmptyCns[s.ServiceID] = struct{}{} 189 } 190 } 191 if li.isSuperTenant() { 192 route.RouteForSuperTenant(selector, "", nil, appendFn) 193 } else { 194 route.RouteForCommonTenant(selector, nil, appendFn) 195 } 196 197 r.mc.GetCNService(selector, func(s metadata.CNService) bool { 198 if len(s.Labels) == 0 { 199 emptyCNs[s.ServiceID] = struct{}{} 200 } 201 return true 202 }) 203 204 // we expect all conns are served by the selected CNs 205 desiredCnCount := len(cns) 206 if desiredCnCount == 0 { 207 // no CN selected, fallback to re-balance session across empty CNs 208 desiredCnCount = len(emptyCNs) 209 } 210 if desiredCnCount == 0 { 211 return nil 212 } 213 214 // Here we get the tunnels on each CN server for the tenant. 215 tuns := r.connManager.getCNTunnels(hash) 216 if tuns == nil { 217 return nil 218 } 219 220 // Calculate the upper limit of tunnels that each CN server could take 221 r.connManager.Lock() 222 defer r.connManager.Unlock() 223 tunnelCount := tuns.count() 224 avg := float64(tunnelCount) / float64(desiredCnCount) 225 upperLimit := int(math.Max(1, math.Ceil(avg*(1+r.tolerance)))) 226 227 var ret []*tunnel 228 // For each CN server, pick the tunnels that need to move to other 229 // CN servers. 230 for uuid, ts := range tuns { 231 num := ts.countWithoutIntent() 232 if num > upperLimit { 233 ret = append(ret, pickTunnels(ts, num-upperLimit)...) 234 } 235 if _, ok := emptyCNs[uuid]; ok && len(notEmptyCns) > 0 { 236 // when there ARE selected CNs, migrate tunnels (if any) in empty CNs to the selected CNs 237 ret = append(ret, pickTunnels(ts, ts.count())...) 238 } 239 } 240 return ret 241 } 242 243 // handlerTransfer gets the tunnel transfer request from queue and handles it. 244 func (r *rebalancer) handleTransfer(ctx context.Context) { 245 for { 246 select { 247 case tun := <-r.queue: 248 v2.ProxyTransferQueueSizeGauge.Set(float64(len(r.queue))) 249 if err := tun.transfer(ctx); err != nil { 250 if !moerr.IsMoErrCode(err, moerr.OkExpectedNotSafeToStartTransfer) { 251 r.logger.Error("failed to do transfer", zap.Error(err)) 252 } 253 } 254 255 // After transfer the tunnel, remove it from the inflight map. 256 r.mu.Lock() 257 delete(r.mu.inflight, tun) 258 r.mu.Unlock() 259 case <-ctx.Done(): 260 r.logger.Info("rebalancer transfer ended.") 261 return 262 } 263 } 264 }