github.com/matrixorigin/matrixone@v0.7.0/pkg/dnservice/store.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package dnservice
    16  
    17  import (
    18  	"context"
    19  	"sync"
    20  	"time"
    21  
    22  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    23  	"github.com/matrixorigin/matrixone/pkg/common/morpc"
    24  	"github.com/matrixorigin/matrixone/pkg/common/runtime"
    25  	"github.com/matrixorigin/matrixone/pkg/common/stopper"
    26  	"github.com/matrixorigin/matrixone/pkg/defines"
    27  	"github.com/matrixorigin/matrixone/pkg/fileservice"
    28  	"github.com/matrixorigin/matrixone/pkg/logservice"
    29  	logservicepb "github.com/matrixorigin/matrixone/pkg/pb/logservice"
    30  	"github.com/matrixorigin/matrixone/pkg/pb/metadata"
    31  	"github.com/matrixorigin/matrixone/pkg/pb/txn"
    32  	"github.com/matrixorigin/matrixone/pkg/taskservice"
    33  	"github.com/matrixorigin/matrixone/pkg/txn/rpc"
    34  	"github.com/matrixorigin/matrixone/pkg/txn/service"
    35  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common"
    36  	"go.uber.org/multierr"
    37  	"go.uber.org/zap"
    38  )
    39  
    40  var (
    41  	retryCreateStorageInterval = time.Second * 5
    42  )
    43  
    44  // WithConfigAdjust set adjust config func
    45  func WithConfigAdjust(adjustConfigFunc func(c *Config)) Option {
    46  	return func(s *store) {
    47  		s.options.adjustConfigFunc = adjustConfigFunc
    48  	}
    49  }
    50  
    51  // WithBackendFilter set filtering txn.TxnRequest sent to other DNShard
    52  func WithBackendFilter(filter func(morpc.Message, string) bool) Option {
    53  	return func(s *store) {
    54  		s.options.backendFilter = filter
    55  	}
    56  }
    57  
    58  // WithHAKeeperClientFactory set hakeeper client factory
    59  func WithHAKeeperClientFactory(factory func() (logservice.DNHAKeeperClient, error)) Option {
    60  	return func(s *store) {
    61  		s.options.hakeekerClientFactory = factory
    62  	}
    63  }
    64  
    65  // WithLogServiceClientFactory set log service client factory
    66  func WithLogServiceClientFactory(factory func(metadata.DNShard) (logservice.Client, error)) Option {
    67  	return func(s *store) {
    68  		s.options.logServiceClientFactory = factory
    69  	}
    70  }
    71  
    72  // WithTaskStorageFactory setup the special task strorage factory
    73  func WithTaskStorageFactory(factory taskservice.TaskStorageFactory) Option {
    74  	return func(s *store) {
    75  		s.task.storageFactory = factory
    76  	}
    77  }
    78  
    79  type store struct {
    80  	cfg                 *Config
    81  	rt                  runtime.Runtime
    82  	sender              rpc.TxnSender
    83  	server              rpc.TxnServer
    84  	hakeeperClient      logservice.DNHAKeeperClient
    85  	fileService         fileservice.FileService
    86  	metadataFileService fileservice.ReplaceableFileService
    87  	replicas            *sync.Map
    88  	stopper             *stopper.Stopper
    89  
    90  	options struct {
    91  		logServiceClientFactory func(metadata.DNShard) (logservice.Client, error)
    92  		hakeekerClientFactory   func() (logservice.DNHAKeeperClient, error)
    93  		backendFilter           func(msg morpc.Message, backendAddr string) bool
    94  		adjustConfigFunc        func(c *Config)
    95  	}
    96  
    97  	mu struct {
    98  		sync.RWMutex
    99  		metadata metadata.DNStore
   100  	}
   101  
   102  	task struct {
   103  		sync.RWMutex
   104  		serviceCreated bool
   105  		serviceHolder  taskservice.TaskServiceHolder
   106  		storageFactory taskservice.TaskStorageFactory
   107  	}
   108  }
   109  
   110  // NewService create DN Service
   111  func NewService(cfg *Config,
   112  	rt runtime.Runtime,
   113  	fileService fileservice.FileService,
   114  	opts ...Option) (Service, error) {
   115  	if err := cfg.Validate(); err != nil {
   116  		return nil, err
   117  	}
   118  
   119  	// start common stuff
   120  	common.InitTAEMPool()
   121  
   122  	// get metadata fs
   123  	metadataFS, err := fileservice.Get[fileservice.ReplaceableFileService](fileService, defines.LocalFileServiceName)
   124  	if err != nil {
   125  		return nil, err
   126  	}
   127  
   128  	s := &store{
   129  		cfg:                 cfg,
   130  		rt:                  rt,
   131  		fileService:         fileService,
   132  		metadataFileService: metadataFS,
   133  	}
   134  	for _, opt := range opts {
   135  		opt(s)
   136  	}
   137  	s.replicas = &sync.Map{}
   138  	s.stopper = stopper.NewStopper("dn-store",
   139  		stopper.WithLogger(s.rt.Logger().RawLogger()))
   140  	s.mu.metadata = metadata.DNStore{UUID: cfg.UUID}
   141  	if s.options.adjustConfigFunc != nil {
   142  		s.options.adjustConfigFunc(s.cfg)
   143  	}
   144  
   145  	if err := s.initClocker(); err != nil {
   146  		return nil, err
   147  	}
   148  	if err := s.initHAKeeperClient(); err != nil {
   149  		return nil, err
   150  	}
   151  	if err := s.initTxnSender(); err != nil {
   152  		return nil, err
   153  	}
   154  	if err := s.initTxnServer(); err != nil {
   155  		return nil, err
   156  	}
   157  	if err := s.initMetadata(); err != nil {
   158  		return nil, err
   159  	}
   160  	s.initTaskHolder()
   161  	return s, nil
   162  }
   163  
   164  func (s *store) Start() error {
   165  	if err := s.startDNShards(); err != nil {
   166  		return err
   167  	}
   168  	if err := s.server.Start(); err != nil {
   169  		return err
   170  	}
   171  	s.rt.SubLogger(runtime.SystemInit).Info("dn heartbeat task started")
   172  	return s.stopper.RunTask(s.heartbeatTask)
   173  }
   174  
   175  func (s *store) Close() error {
   176  	s.stopper.Stop()
   177  	var err error
   178  	if e := s.hakeeperClient.Close(); e != nil {
   179  		err = multierr.Append(e, err)
   180  	}
   181  	if e := s.sender.Close(); e != nil {
   182  		err = multierr.Append(e, err)
   183  	}
   184  	if e := s.server.Close(); e != nil {
   185  		err = multierr.Append(e, err)
   186  	}
   187  	s.replicas.Range(func(_, value any) bool {
   188  		r := value.(*replica)
   189  		if e := r.close(false); e != nil {
   190  			err = multierr.Append(e, err)
   191  		}
   192  		return true
   193  	})
   194  	s.task.RLock()
   195  	ts := s.task.serviceHolder
   196  	s.task.RUnlock()
   197  	if ts != nil {
   198  		err = ts.Close()
   199  	}
   200  	return err
   201  }
   202  
   203  func (s *store) StartDNReplica(shard metadata.DNShard) error {
   204  	return s.createReplica(shard)
   205  }
   206  
   207  func (s *store) CloseDNReplica(shard metadata.DNShard) error {
   208  	return s.removeReplica(shard.ShardID)
   209  }
   210  
   211  func (s *store) startDNShards() error {
   212  	s.mu.Lock()
   213  	defer s.mu.Unlock()
   214  
   215  	for _, shard := range s.mu.metadata.Shards {
   216  		if err := s.createReplica(shard); err != nil {
   217  			return err
   218  		}
   219  	}
   220  	return nil
   221  }
   222  
   223  func (s *store) getDNShardInfo() []logservicepb.DNShardInfo {
   224  	var shards []logservicepb.DNShardInfo
   225  	s.replicas.Range(func(_, value any) bool {
   226  		r := value.(*replica)
   227  		shards = append(shards, logservicepb.DNShardInfo{
   228  			ShardID:   r.shard.ShardID,
   229  			ReplicaID: r.shard.ReplicaID,
   230  		})
   231  		return true
   232  	})
   233  	return shards
   234  }
   235  
   236  func (s *store) createReplica(shard metadata.DNShard) error {
   237  	r := newReplica(shard, s.rt)
   238  	v, ok := s.replicas.LoadOrStore(shard.ShardID, r)
   239  	if ok {
   240  		s.rt.Logger().Debug("DNShard already created",
   241  			zap.String("new", shard.DebugString()),
   242  			zap.String("exist", v.(*replica).shard.DebugString()))
   243  		return nil
   244  	}
   245  
   246  	err := s.stopper.RunTask(func(ctx context.Context) {
   247  		for {
   248  			select {
   249  			case <-ctx.Done():
   250  				return
   251  			default:
   252  				storage, err := s.createTxnStorage(ctx, shard)
   253  				if err != nil {
   254  					r.logger.Error("start DNShard failed",
   255  						zap.Error(err))
   256  					time.Sleep(retryCreateStorageInterval)
   257  					continue
   258  				}
   259  
   260  				err = r.start(service.NewTxnService(
   261  					r.rt,
   262  					shard,
   263  					storage,
   264  					s.sender,
   265  					s.cfg.Txn.ZombieTimeout.Duration))
   266  				if err != nil {
   267  					r.logger.Fatal("start DNShard failed",
   268  						zap.Error(err))
   269  				}
   270  				return
   271  			}
   272  		}
   273  	})
   274  	if err != nil {
   275  		return err
   276  	}
   277  
   278  	s.addDNShardLocked(shard)
   279  	return nil
   280  }
   281  
   282  func (s *store) removeReplica(dnShardID uint64) error {
   283  	if r := s.getReplica(dnShardID); r != nil {
   284  		err := r.close(true)
   285  		s.replicas.Delete(dnShardID)
   286  		s.removeDNShard(dnShardID)
   287  		return err
   288  	}
   289  	return nil
   290  }
   291  
   292  func (s *store) getReplica(id uint64) *replica {
   293  	v, ok := s.replicas.Load(id)
   294  	if !ok {
   295  		return nil
   296  	}
   297  	return v.(*replica)
   298  }
   299  
   300  func (s *store) initTxnSender() error {
   301  	sender, err := rpc.NewSenderWithConfig(
   302  		s.cfg.RPC,
   303  		s.rt,
   304  		rpc.WithSenderBackendOptions(morpc.WithBackendFilter(func(m morpc.Message, backendAddr string) bool {
   305  			return s.options.backendFilter == nil || s.options.backendFilter(m.(*txn.TxnRequest), backendAddr)
   306  		})),
   307  		rpc.WithSenderLocalDispatch(s.dispatchLocalRequest))
   308  	if err != nil {
   309  		return err
   310  	}
   311  	s.sender = sender
   312  	return nil
   313  }
   314  
   315  func (s *store) initTxnServer() error {
   316  	server, err := rpc.NewTxnServer(
   317  		s.cfg.ListenAddress,
   318  		s.rt,
   319  		rpc.WithServerMaxMessageSize(int(s.cfg.RPC.MaxMessageSize)),
   320  		rpc.WithServerEnableCompress(s.cfg.RPC.EnableCompress))
   321  	if err != nil {
   322  		return err
   323  	}
   324  	s.server = server
   325  	s.registerRPCHandlers()
   326  	return nil
   327  }
   328  
   329  func (s *store) initClocker() error {
   330  	if s.rt.Clock() == nil {
   331  		return moerr.NewBadConfigNoCtx("missing txn clock")
   332  	}
   333  	return nil
   334  }
   335  
   336  func (s *store) initHAKeeperClient() error {
   337  	if s.options.hakeekerClientFactory != nil {
   338  		client, err := s.options.hakeekerClientFactory()
   339  		if err != nil {
   340  			return err
   341  		}
   342  		s.hakeeperClient = client
   343  		return nil
   344  	}
   345  
   346  	ctx, cancel := context.WithTimeout(context.Background(), s.cfg.HAKeeper.DiscoveryTimeout.Duration)
   347  	defer cancel()
   348  	client, err := logservice.NewDNHAKeeperClient(ctx, s.cfg.HAKeeper.ClientConfig)
   349  	if err != nil {
   350  		return err
   351  	}
   352  	s.hakeeperClient = client
   353  	return nil
   354  }