github.com/matrixorigin/matrixone@v1.2.0/pkg/clusterservice/cluster.go (about)

     1  // Copyright 2023 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package clusterservice
    16  
    17  import (
    18  	"context"
    19  	"sync"
    20  	"sync/atomic"
    21  	"time"
    22  
    23  	"github.com/matrixorigin/matrixone/pkg/common/log"
    24  	"github.com/matrixorigin/matrixone/pkg/common/runtime"
    25  	"github.com/matrixorigin/matrixone/pkg/common/stopper"
    26  	logpb "github.com/matrixorigin/matrixone/pkg/pb/logservice"
    27  	"github.com/matrixorigin/matrixone/pkg/pb/metadata"
    28  	"go.uber.org/zap"
    29  )
    30  
    31  // GetMOCluster get mo cluster from process level runtime
    32  func GetMOCluster() MOCluster {
    33  	timeout := time.Second * 10
    34  	now := time.Now()
    35  	for {
    36  		v, ok := runtime.ProcessLevelRuntime().GetGlobalVariables(runtime.ClusterService)
    37  		if !ok {
    38  			if time.Since(now) > timeout {
    39  				panic("no mocluster service")
    40  			}
    41  			time.Sleep(time.Second)
    42  			continue
    43  		}
    44  		return v.(MOCluster)
    45  	}
    46  }
    47  
    48  // Option options for create cluster
    49  type Option func(*cluster)
    50  
    51  // WithServices set init cn and tn services
    52  func WithServices(
    53  	cnServices []metadata.CNService,
    54  	tnServices []metadata.TNService) Option {
    55  	return func(c *cluster) {
    56  		new := c.copyServices()
    57  		new.addCN(cnServices)
    58  		new.addTN(tnServices)
    59  		c.services.Store(new)
    60  	}
    61  }
    62  
    63  // WithDisableRefresh disable refresh from hakeeper
    64  func WithDisableRefresh() Option {
    65  	return func(c *cluster) {
    66  		c.options.disableRefresh = true
    67  	}
    68  }
    69  
    70  type cluster struct {
    71  	logger          *log.MOLogger
    72  	stopper         *stopper.Stopper
    73  	client          ClusterClient
    74  	refreshInterval time.Duration
    75  	forceRefreshC   chan struct{}
    76  	readyOnce       sync.Once
    77  	readyC          chan struct{}
    78  	services        atomic.Pointer[services]
    79  	options         struct {
    80  		disableRefresh bool
    81  	}
    82  }
    83  
    84  // NewMOCluster create a MOCluter by HAKeeperClient. MoCluster synchronizes
    85  // information from HAKeeper and forcibly refreshes the information once every
    86  // refreshInterval.
    87  //
    88  // TODO(fagongzi): extend hakeeper to support event-driven original message changes
    89  func NewMOCluster(
    90  	client ClusterClient,
    91  	refreshInterval time.Duration,
    92  	opts ...Option) MOCluster {
    93  	logger := runtime.ProcessLevelRuntime().Logger().Named("mo-cluster")
    94  	c := &cluster{
    95  		logger:          logger,
    96  		stopper:         stopper.NewStopper("mo-cluster", stopper.WithLogger(logger.RawLogger())),
    97  		client:          client,
    98  		forceRefreshC:   make(chan struct{}, 1),
    99  		readyC:          make(chan struct{}),
   100  		refreshInterval: refreshInterval,
   101  	}
   102  
   103  	c.services.Store(&services{})
   104  
   105  	for _, opt := range opts {
   106  		opt(c)
   107  	}
   108  	if !c.options.disableRefresh {
   109  		if err := c.stopper.RunTask(c.refreshTask); err != nil {
   110  			panic(err)
   111  		}
   112  	} else {
   113  		c.readyOnce.Do(func() {
   114  			close(c.readyC)
   115  		})
   116  	}
   117  	return c
   118  }
   119  
   120  func (c *cluster) GetCNService(selector Selector, apply func(metadata.CNService) bool) {
   121  	c.waitReady()
   122  
   123  	s := c.services.Load()
   124  	for _, cn := range s.cn {
   125  		// If the all field is false, the work state of CN service MUST be
   126  		// working, and then we could do the filter job. If the state is not
   127  		// working, means that the CN may be marked as draining and is going
   128  		// to be removed, or has been removed.
   129  		// The state Unknown is allowed here to make many test cases pass, and
   130  		// it does not affect the function.
   131  		if (selector.all || cn.WorkState == metadata.WorkState_Working ||
   132  			cn.WorkState == metadata.WorkState_Unknown) &&
   133  			selector.filterCN(cn) {
   134  			if !apply(cn) {
   135  				return
   136  			}
   137  		}
   138  	}
   139  }
   140  
   141  func (c *cluster) GetCNServiceWithoutWorkingState(selector Selector, apply func(metadata.CNService) bool) {
   142  	c.waitReady()
   143  
   144  	s := c.services.Load()
   145  	for _, cn := range s.cn {
   146  		if selector.filterCN(cn) {
   147  			if !apply(cn) {
   148  				return
   149  			}
   150  		}
   151  	}
   152  }
   153  
   154  func (c *cluster) GetTNService(selector Selector, apply func(metadata.TNService) bool) {
   155  	c.waitReady()
   156  
   157  	s := c.services.Load()
   158  	for _, tn := range s.tn {
   159  		if selector.filterTN(tn) {
   160  			if !apply(tn) {
   161  				return
   162  			}
   163  		}
   164  	}
   165  }
   166  
   167  func (c *cluster) GetAllTNServices() []metadata.TNService {
   168  	c.waitReady()
   169  	s := c.services.Load()
   170  	return s.tn
   171  }
   172  
   173  func (c *cluster) ForceRefresh(sync bool) {
   174  	if c.options.disableRefresh {
   175  		return
   176  	}
   177  	if sync {
   178  		c.refresh()
   179  		return
   180  	}
   181  
   182  	select {
   183  	case c.forceRefreshC <- struct{}{}:
   184  	default:
   185  	}
   186  }
   187  
   188  func (c *cluster) Close() {
   189  	c.waitReady()
   190  	c.stopper.Stop()
   191  	close(c.forceRefreshC)
   192  }
   193  
   194  // DebugUpdateCNLabel implements the MOCluster interface.
   195  func (c *cluster) DebugUpdateCNLabel(uuid string, kvs map[string][]string) error {
   196  	ctx, cancel := context.WithTimeout(context.TODO(), time.Second*3)
   197  	defer cancel()
   198  	convert := make(map[string]metadata.LabelList)
   199  	for k, v := range kvs {
   200  		convert[k] = metadata.LabelList{Labels: v}
   201  	}
   202  	label := logpb.CNStoreLabel{
   203  		UUID:   uuid,
   204  		Labels: convert,
   205  	}
   206  	proxyClient := c.client.(labelSupportedClient)
   207  	if err := proxyClient.UpdateCNLabel(ctx, label); err != nil {
   208  		return err
   209  	}
   210  	return nil
   211  }
   212  
   213  func (c *cluster) DebugUpdateCNWorkState(uuid string, state int) error {
   214  	ctx, cancel := context.WithTimeout(context.TODO(), time.Second*3)
   215  	defer cancel()
   216  	wstate := logpb.CNWorkState{
   217  		UUID:  uuid,
   218  		State: metadata.WorkState(state),
   219  	}
   220  	proxyClient := c.client.(labelSupportedClient)
   221  	if err := proxyClient.UpdateCNWorkState(ctx, wstate); err != nil {
   222  		return err
   223  	}
   224  	return nil
   225  }
   226  
   227  func (c *cluster) RemoveCN(id string) {
   228  	new := c.copyServices()
   229  	values := new.cn[:0]
   230  	for _, s := range new.cn {
   231  		if s.ServiceID != id {
   232  			values = append(values, s)
   233  		}
   234  	}
   235  	new.cn = values
   236  	c.services.Store(new)
   237  }
   238  
   239  func (c *cluster) AddCN(s metadata.CNService) {
   240  	new := c.copyServices()
   241  	new.cn = append(new.cn, s)
   242  	c.services.Store(new)
   243  }
   244  
   245  func (c *cluster) waitReady() {
   246  	<-c.readyC
   247  }
   248  
   249  func (c *cluster) refreshTask(ctx context.Context) {
   250  	c.ForceRefresh(false)
   251  
   252  	timer := time.NewTimer(c.refreshInterval)
   253  	defer timer.Stop()
   254  
   255  	for {
   256  		select {
   257  		case <-ctx.Done():
   258  			c.logger.Info("refresh cluster details task stopped")
   259  			return
   260  		case <-timer.C:
   261  			c.refresh()
   262  			timer.Reset(c.refreshInterval)
   263  		case <-c.forceRefreshC:
   264  			c.refresh()
   265  		}
   266  	}
   267  }
   268  
   269  func (c *cluster) refresh() {
   270  	defer c.logger.LogAction("refresh from hakeeper",
   271  		log.DefaultLogOptions().WithLevel(zap.DebugLevel))()
   272  
   273  	ctx, cancel := context.WithTimeout(context.Background(), c.refreshInterval)
   274  	defer cancel()
   275  
   276  	details, err := c.client.GetClusterDetails(ctx)
   277  	if err != nil {
   278  		c.logger.Error("failed to refresh cluster details from hakeeper",
   279  			zap.Error(err))
   280  		return
   281  	}
   282  
   283  	c.logger.Debug("refresh cluster details from hakeeper",
   284  		zap.Int("cn-count", len(details.CNStores)),
   285  		zap.Int("dn-count", len(details.TNStores)))
   286  
   287  	new := &services{}
   288  	for _, cn := range details.CNStores {
   289  		v := newCNService(cn)
   290  		new.addCN([]metadata.CNService{v})
   291  		if c.logger.Enabled(zap.DebugLevel) {
   292  			c.logger.Debug("cn service added", zap.String("cn", v.DebugString()))
   293  		}
   294  	}
   295  	for _, tn := range details.TNStores {
   296  		v := newTNService(tn)
   297  		new.addTN([]metadata.TNService{v})
   298  		if c.logger.Enabled(zap.DebugLevel) {
   299  			c.logger.Debug("dn service added", zap.String("dn", v.DebugString()))
   300  		}
   301  	}
   302  	c.services.Store(new)
   303  	c.readyOnce.Do(func() {
   304  		close(c.readyC)
   305  	})
   306  }
   307  
   308  func (c *cluster) copyServices() *services {
   309  	new := &services{}
   310  	old := c.services.Load()
   311  	if old != nil {
   312  		new.addCN(old.cn)
   313  		new.addTN(old.tn)
   314  	}
   315  	return new
   316  }
   317  
   318  func newCNService(cn logpb.CNStore) metadata.CNService {
   319  	return metadata.CNService{
   320  		ServiceID:              cn.UUID,
   321  		PipelineServiceAddress: cn.ServiceAddress,
   322  		SQLAddress:             cn.SQLAddress,
   323  		LockServiceAddress:     cn.LockServiceAddress,
   324  		WorkState:              cn.WorkState,
   325  		Labels:                 cn.Labels,
   326  		QueryAddress:           cn.QueryAddress,
   327  	}
   328  }
   329  
   330  func newTNService(tn logpb.TNStore) metadata.TNService {
   331  	v := metadata.TNService{
   332  		ServiceID:             tn.UUID,
   333  		TxnServiceAddress:     tn.ServiceAddress,
   334  		LogTailServiceAddress: tn.LogtailServerAddress,
   335  		LockServiceAddress:    tn.LockServiceAddress,
   336  		QueryAddress:          tn.QueryAddress,
   337  	}
   338  	v.Shards = make([]metadata.TNShard, 0, len(tn.Shards))
   339  	for _, s := range tn.Shards {
   340  		v.Shards = append(v.Shards, metadata.TNShard{
   341  			TNShardRecord: metadata.TNShardRecord{ShardID: s.ShardID},
   342  			ReplicaID:     s.ReplicaID,
   343  		})
   344  	}
   345  	return v
   346  }
   347  
   348  type services struct {
   349  	cn []metadata.CNService
   350  	tn []metadata.TNService
   351  }
   352  
   353  func (s *services) addCN(values []metadata.CNService) {
   354  	s.cn = append(s.cn, values...)
   355  }
   356  
   357  func (s *services) addTN(values []metadata.TNService) {
   358  	s.tn = append(s.tn, values...)
   359  }