github.com/matrixorigin/matrixone@v1.2.0/pkg/proxy/rebalancer.go (about)

     1  // Copyright 2021 - 2023 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package proxy
    16  
    17  import (
    18  	"context"
    19  	"math"
    20  	"sync"
    21  	"time"
    22  
    23  	"github.com/matrixorigin/matrixone/pkg/clusterservice"
    24  	"github.com/matrixorigin/matrixone/pkg/common/log"
    25  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    26  	"github.com/matrixorigin/matrixone/pkg/common/stopper"
    27  	"github.com/matrixorigin/matrixone/pkg/pb/metadata"
    28  	v2 "github.com/matrixorigin/matrixone/pkg/util/metric/v2"
    29  	"github.com/matrixorigin/matrixone/pkg/vm/engine/disttae/route"
    30  	"go.uber.org/zap"
    31  )
    32  
    33  const (
    34  	// The default rebalancer queue size is 1024.
    35  	defaultQueueSize = 1024
    36  )
    37  
    38  type rebalancer struct {
    39  	stopper *stopper.Stopper
    40  	logger  *log.MOLogger
    41  	// mc is MO-Cluster instance, which is used to get CN servers.
    42  	mc clusterservice.MOCluster
    43  	// connManager is used to track the connections on the CN servers.
    44  	connManager *connManager
    45  	// scaling is used to scale in the CN servers gracefully.
    46  	scaling *scaling
    47  	// queue takes the tunnels which need to do migration.
    48  	queue chan *tunnel
    49  	mu    struct {
    50  		sync.Mutex
    51  		// inflight is the tunnels which are in the queue or is being
    52  		// rebalanced. The same tunnel should not be added to the queue.
    53  		inflight map[*tunnel]struct{}
    54  	}
    55  	// If disabled is true, rebalance does nothing.
    56  	disabled bool
    57  	// interval indicates that how often the rebalance is act.
    58  	interval time.Duration
    59  	// tolerance is the tolerance that is used to calculate tunnels need
    60  	// to migrate to other CN servers.  For example, if tolerance is 0.3,
    61  	// and the average of tunnels is 10, then if there are 15 tunnels on
    62  	// a CN server, 2 tunnels will do migration.
    63  	tolerance float64
    64  }
    65  
    66  // rebalancerOption defines the function to set options of rebalancer.
    67  type rebalancerOption func(*rebalancer)
    68  
    69  // withRebalancerDisabled sets if rebalancer is disabled.
    70  func withRebalancerDisabled() rebalancerOption {
    71  	return func(r *rebalancer) {
    72  		r.disabled = true
    73  	}
    74  }
    75  
    76  // withRebalancerInterval sets the interval
    77  func withRebalancerInterval(interval time.Duration) rebalancerOption {
    78  	return func(r *rebalancer) {
    79  		r.interval = interval
    80  	}
    81  }
    82  
    83  // withRebalancerTolerance sets the tolerance of rebalancer.
    84  func withRebalancerTolerance(tolerance float64) rebalancerOption {
    85  	return func(r *rebalancer) {
    86  		r.tolerance = tolerance
    87  	}
    88  }
    89  
    90  // newRebalancer creates a new rebalancer.
    91  func newRebalancer(
    92  	stopper *stopper.Stopper, logger *log.MOLogger, mc clusterservice.MOCluster, opts ...rebalancerOption,
    93  ) (*rebalancer, error) {
    94  	r := &rebalancer{
    95  		stopper:     stopper,
    96  		logger:      logger,
    97  		connManager: newConnManager(),
    98  		mc:          mc,
    99  		queue:       make(chan *tunnel, defaultQueueSize),
   100  	}
   101  	r.mu.inflight = make(map[*tunnel]struct{})
   102  	for _, opt := range opts {
   103  		opt(r)
   104  	}
   105  	r.scaling = newScaling(r.connManager, r.queue, mc, logger, r.disabled)
   106  
   107  	// Starts the transfer go-routine to handle the transfer request.
   108  	if err := r.stopper.RunNamedTask("rebalaner-transfer", r.handleTransfer); err != nil {
   109  		return nil, err
   110  	}
   111  	// Starts the runner go-routine to check the tunnels that need to transfer.
   112  	if err := r.stopper.RunNamedTask("rebalancer-runner", r.run); err != nil {
   113  		return nil, err
   114  	}
   115  	// Starts the scaling go-routine to check the CN service that need to do scaling.
   116  	if err := r.stopper.RunNamedTask("scaling", r.scaling.run); err != nil {
   117  		return nil, err
   118  	}
   119  	return r, nil
   120  }
   121  
   122  // run begins the loop to check if there are any connections need to
   123  // be rebalanced.
   124  func (r *rebalancer) run(ctx context.Context) {
   125  	ticker := time.NewTicker(r.interval)
   126  	defer ticker.Stop()
   127  	for {
   128  		select {
   129  		case <-ticker.C:
   130  			r.doRebalance()
   131  		case <-ctx.Done():
   132  			r.logger.Info("rebalancer runner ended")
   133  			return
   134  		}
   135  	}
   136  }
   137  
   138  // doRebalance do the real rebalance work by tenants.
   139  func (r *rebalancer) doRebalance() {
   140  	// Re-balance is disabled, nothing to do.
   141  	if r.disabled {
   142  		return
   143  	}
   144  	hashes := r.connManager.getLabelHashes()
   145  	for _, h := range hashes {
   146  		r.rebalanceByHash(h)
   147  	}
   148  }
   149  
   150  func (r *rebalancer) rebalanceByHash(hash LabelHash) {
   151  	// Collect the tunnels that need to migrate.
   152  	tuns := r.collectTunnels(hash)
   153  	v2.ProxyConnectionsNeedToTransferGauge.Set(float64(len(tuns)))
   154  
   155  	r.mu.Lock()
   156  	defer r.mu.Unlock()
   157  	// Put the tunnels to the queue.
   158  	for _, t := range tuns {
   159  		// If the tunnel is inflight, do NOT enqueue it.
   160  		_, ok := r.mu.inflight[t]
   161  		if ok {
   162  			continue
   163  		}
   164  
   165  		select {
   166  		case r.queue <- t:
   167  			r.mu.inflight[t] = struct{}{}
   168  		default:
   169  			r.logger.Info("rebalance queue is full")
   170  		}
   171  	}
   172  }
   173  
   174  func (r *rebalancer) collectTunnels(hash LabelHash) []*tunnel {
   175  	// get CN servers from mocluster for this label.
   176  	li := r.connManager.getLabelInfo(hash)
   177  	// CNs are the CN server UUIDs that match the given labelHash
   178  	cns := make(map[string]struct{})
   179  	// emptyCNs are the fallback CN UUIDs that used to serve the connections when there is no CN match the label hash
   180  	emptyCNs := make(map[string]struct{})
   181  
   182  	notEmptyCns := make(map[string]struct{})
   183  
   184  	selector := li.genSelector(clusterservice.EQ_Globbing)
   185  	appendFn := func(s *metadata.CNService) {
   186  		cns[s.ServiceID] = struct{}{}
   187  		if len(s.Labels) > 0 {
   188  			notEmptyCns[s.ServiceID] = struct{}{}
   189  		}
   190  	}
   191  	if li.isSuperTenant() {
   192  		route.RouteForSuperTenant(selector, "", nil, appendFn)
   193  	} else {
   194  		route.RouteForCommonTenant(selector, nil, appendFn)
   195  	}
   196  
   197  	r.mc.GetCNService(selector, func(s metadata.CNService) bool {
   198  		if len(s.Labels) == 0 {
   199  			emptyCNs[s.ServiceID] = struct{}{}
   200  		}
   201  		return true
   202  	})
   203  
   204  	// we expect all conns are served by the selected CNs
   205  	desiredCnCount := len(cns)
   206  	if desiredCnCount == 0 {
   207  		// no CN selected, fallback to re-balance session across empty CNs
   208  		desiredCnCount = len(emptyCNs)
   209  	}
   210  	if desiredCnCount == 0 {
   211  		return nil
   212  	}
   213  
   214  	// Here we get the tunnels on each CN server for the tenant.
   215  	tuns := r.connManager.getCNTunnels(hash)
   216  	if tuns == nil {
   217  		return nil
   218  	}
   219  
   220  	// Calculate the upper limit of tunnels that each CN server could take
   221  	r.connManager.Lock()
   222  	defer r.connManager.Unlock()
   223  	tunnelCount := tuns.count()
   224  	avg := float64(tunnelCount) / float64(desiredCnCount)
   225  	upperLimit := int(math.Max(1, math.Ceil(avg*(1+r.tolerance))))
   226  
   227  	var ret []*tunnel
   228  	// For each CN server, pick the tunnels that need to move to other
   229  	// CN servers.
   230  	for uuid, ts := range tuns {
   231  		num := ts.countWithoutIntent()
   232  		if num > upperLimit {
   233  			ret = append(ret, pickTunnels(ts, num-upperLimit)...)
   234  		}
   235  		if _, ok := emptyCNs[uuid]; ok && len(notEmptyCns) > 0 {
   236  			// when there ARE selected CNs, migrate tunnels (if any) in empty CNs to the selected CNs
   237  			ret = append(ret, pickTunnels(ts, ts.count())...)
   238  		}
   239  	}
   240  	return ret
   241  }
   242  
   243  // handlerTransfer gets the tunnel transfer request from queue and handles it.
   244  func (r *rebalancer) handleTransfer(ctx context.Context) {
   245  	for {
   246  		select {
   247  		case tun := <-r.queue:
   248  			v2.ProxyTransferQueueSizeGauge.Set(float64(len(r.queue)))
   249  			if err := tun.transfer(ctx); err != nil {
   250  				if !moerr.IsMoErrCode(err, moerr.OkExpectedNotSafeToStartTransfer) {
   251  					r.logger.Error("failed to do transfer", zap.Error(err))
   252  				}
   253  			}
   254  
   255  			// After transfer the tunnel, remove it from the inflight map.
   256  			r.mu.Lock()
   257  			delete(r.mu.inflight, tun)
   258  			r.mu.Unlock()
   259  		case <-ctx.Done():
   260  			r.logger.Info("rebalancer transfer ended.")
   261  			return
   262  		}
   263  	}
   264  }