github.com/matrixorigin/matrixone@v1.2.0/pkg/tnservice/store_heartbeat.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package tnservice
    16  
    17  import (
    18  	"context"
    19  	"time"
    20  
    21  	logservicepb "github.com/matrixorigin/matrixone/pkg/pb/logservice"
    22  	"github.com/matrixorigin/matrixone/pkg/pb/metadata"
    23  	v2 "github.com/matrixorigin/matrixone/pkg/util/metric/v2"
    24  	"go.uber.org/zap"
    25  )
    26  
    27  func (s *store) heartbeatTask(ctx context.Context) {
    28  	if s.cfg.HAKeeper.HeatbeatInterval.Duration == 0 {
    29  		panic("invalid heartbeat interval")
    30  	}
    31  	defer func() {
    32  		s.rt.Logger().Info("dn heartbeat task stopped")
    33  	}()
    34  
    35  	ticker := time.NewTicker(s.cfg.HAKeeper.HeatbeatInterval.Duration)
    36  	defer ticker.Stop()
    37  
    38  	for {
    39  		select {
    40  		case <-ctx.Done():
    41  			return
    42  		case <-ticker.C:
    43  			s.heartbeat(ctx)
    44  			// see pkg/logservice/service_commands.go#130
    45  			select {
    46  			case <-ctx.Done():
    47  				return
    48  			default:
    49  			}
    50  		}
    51  	}
    52  }
    53  
    54  func (s *store) heartbeat(ctx context.Context) {
    55  	start := time.Now()
    56  	defer func() {
    57  		v2.TNHeartbeatHistogram.Observe(time.Since(start).Seconds())
    58  	}()
    59  	ctx2, cancel := context.WithTimeout(ctx, s.cfg.HAKeeper.HeatbeatTimeout.Duration)
    60  	defer cancel()
    61  
    62  	hb := logservicepb.TNStoreHeartbeat{
    63  		UUID:                 s.cfg.UUID,
    64  		ServiceAddress:       s.txnServiceServiceAddr(),
    65  		Shards:               s.getTNShardInfo(),
    66  		TaskServiceCreated:   s.taskServiceCreated(),
    67  		LogtailServerAddress: s.logtailServiceServiceAddr(),
    68  		LockServiceAddress:   s.lockServiceServiceAddr(),
    69  		ConfigData:           s.config.GetData(),
    70  	}
    71  
    72  	if s.queryService != nil {
    73  		hb.QueryAddress = s.queryServiceServiceAddr()
    74  	}
    75  
    76  	cb, err := s.hakeeperClient.SendTNHeartbeat(ctx2, hb)
    77  	if err != nil {
    78  		v2.TNHeartbeatFailureCounter.Inc()
    79  		s.rt.Logger().Error("failed to send tn heartbeat", zap.Error(err))
    80  		return
    81  	}
    82  
    83  	s.config.DecrCount()
    84  	s.handleCommands(cb.Commands)
    85  }
    86  
    87  func (s *store) handleCommands(cmds []logservicepb.ScheduleCommand) {
    88  	for _, cmd := range cmds {
    89  		if cmd.ServiceType != logservicepb.TNService {
    90  			s.rt.Logger().Fatal("received invalid command", zap.String("command", cmd.LogString()))
    91  		}
    92  		s.rt.Logger().Debug("applying schedule command:", zap.String("command", cmd.LogString()))
    93  		if cmd.ConfigChange != nil {
    94  			switch cmd.ConfigChange.ChangeType {
    95  			case logservicepb.AddReplica, logservicepb.StartReplica:
    96  				s.handleAddReplica(cmd)
    97  			case logservicepb.RemoveReplica, logservicepb.StopReplica:
    98  				s.handleRemoveReplica(cmd)
    99  			}
   100  		} else if cmd.GetShutdownStore() != nil {
   101  			s.handleShutdownStore(cmd)
   102  		} else if cmd.CreateTaskService != nil {
   103  			s.createTaskService(cmd.CreateTaskService)
   104  			s.createSQLLogger(cmd.CreateTaskService)
   105  		}
   106  	}
   107  }
   108  
   109  func (s *store) handleAddReplica(cmd logservicepb.ScheduleCommand) {
   110  	shardID := cmd.ConfigChange.Replica.ShardID
   111  	logShardID := cmd.ConfigChange.Replica.LogShardID
   112  	replicaID := cmd.ConfigChange.Replica.ReplicaID
   113  	address := s.cfg.ServiceAddress
   114  	if err := s.createReplica(metadata.TNShard{
   115  		TNShardRecord: metadata.TNShardRecord{
   116  			ShardID:    shardID,
   117  			LogShardID: logShardID,
   118  		},
   119  		ReplicaID: replicaID,
   120  		Address:   address,
   121  	}); err != nil {
   122  		s.rt.Logger().Error("failed to add replica", zap.Error(err))
   123  	}
   124  }
   125  
   126  func (s *store) handleRemoveReplica(cmd logservicepb.ScheduleCommand) {
   127  	shardID := cmd.ConfigChange.Replica.ShardID
   128  	if err := s.removeReplica(shardID); err != nil {
   129  		s.rt.Logger().Error("failed to remove replica", zap.Error(err))
   130  	}
   131  }
   132  
   133  func (s *store) handleShutdownStore(_ logservicepb.ScheduleCommand) {
   134  	// notify main routine that have received shutdown cmd
   135  	select {
   136  	case s.shutdownC <- struct{}{}:
   137  	default:
   138  	}
   139  }