github.com/matrixorigin/matrixone@v1.2.0/pkg/logservice/service_commands.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package logservice
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"reflect"
    21  	"time"
    22  
    23  	"go.uber.org/zap"
    24  
    25  	"github.com/matrixorigin/matrixone/pkg/hakeeper"
    26  	pb "github.com/matrixorigin/matrixone/pkg/pb/logservice"
    27  	v2 "github.com/matrixorigin/matrixone/pkg/util/metric/v2"
    28  	"github.com/matrixorigin/matrixone/pkg/util/trace"
    29  )
    30  
    31  func (s *Service) handleCommands(cmds []pb.ScheduleCommand) {
    32  	for _, cmd := range cmds {
    33  		s.runtime.Logger().Info(fmt.Sprintf("%s applying cmd: %s", s.ID(), cmd.LogString()))
    34  		if cmd.GetConfigChange() != nil {
    35  			s.runtime.Logger().Debug("applying schedule command:", zap.String("command", cmd.LogString()))
    36  			switch cmd.ConfigChange.ChangeType {
    37  			case pb.AddReplica:
    38  				s.handleAddReplica(cmd)
    39  			case pb.RemoveReplica:
    40  				s.handleRemoveReplica(cmd)
    41  			case pb.StartReplica:
    42  				s.handleStartReplica(cmd)
    43  			case pb.StopReplica:
    44  				s.handleStopReplica(cmd)
    45  			case pb.KillZombie:
    46  				s.handleKillZombie(cmd)
    47  			default:
    48  				panic("unknown config change cmd type")
    49  			}
    50  		} else if cmd.GetShutdownStore() != nil {
    51  			s.handleShutdownStore(cmd)
    52  		} else if cmd.GetCreateTaskService() != nil {
    53  			s.createTaskService(cmd.CreateTaskService)
    54  			s.createSQLLogger(cmd.CreateTaskService)
    55  		} else {
    56  			panic("unknown schedule command type")
    57  		}
    58  	}
    59  }
    60  
    61  func (s *Service) handleAddReplica(cmd pb.ScheduleCommand) {
    62  	shardID := cmd.ConfigChange.Replica.ShardID
    63  	replicaID := cmd.ConfigChange.Replica.ReplicaID
    64  	epoch := cmd.ConfigChange.Replica.Epoch
    65  	target := cmd.ConfigChange.Replica.UUID
    66  	if err := s.store.addReplica(shardID, replicaID, target, epoch); err != nil {
    67  		s.runtime.Logger().Error("failed to add replica", zap.Error(err))
    68  	}
    69  }
    70  
    71  func (s *Service) handleRemoveReplica(cmd pb.ScheduleCommand) {
    72  	shardID := cmd.ConfigChange.Replica.ShardID
    73  	replicaID := cmd.ConfigChange.Replica.ReplicaID
    74  	epoch := cmd.ConfigChange.Replica.Epoch
    75  	if err := s.store.removeReplica(shardID, replicaID, epoch); err != nil {
    76  		s.runtime.Logger().Error("failed to remove replica", zap.Error(err))
    77  	}
    78  }
    79  
    80  func (s *Service) handleStartReplica(cmd pb.ScheduleCommand) {
    81  	shardID := cmd.ConfigChange.Replica.ShardID
    82  	replicaID := cmd.ConfigChange.Replica.ReplicaID
    83  	join := len(cmd.ConfigChange.InitialMembers) == 0
    84  	if shardID == hakeeper.DefaultHAKeeperShardID {
    85  		if err := s.store.startHAKeeperReplica(replicaID,
    86  			cmd.ConfigChange.InitialMembers, join); err != nil {
    87  			s.runtime.Logger().Error("failed to start HAKeeper replica", zap.Error(err))
    88  		}
    89  	} else {
    90  		if err := s.store.startReplica(shardID,
    91  			replicaID, cmd.ConfigChange.InitialMembers, join); err != nil {
    92  			s.runtime.Logger().Error("failed to start log replica", zap.Error(err))
    93  		}
    94  	}
    95  }
    96  
    97  func (s *Service) handleStopReplica(cmd pb.ScheduleCommand) {
    98  	shardID := cmd.ConfigChange.Replica.ShardID
    99  	replicaID := cmd.ConfigChange.Replica.ReplicaID
   100  	if err := s.store.stopReplica(shardID, replicaID); err != nil {
   101  		s.runtime.Logger().Error("failed to stop replica", zap.Error(err))
   102  	}
   103  }
   104  
   105  func (s *Service) handleKillZombie(cmd pb.ScheduleCommand) {
   106  	shardID := cmd.ConfigChange.Replica.ShardID
   107  	replicaID := cmd.ConfigChange.Replica.ReplicaID
   108  	s.handleStopReplica(cmd)
   109  	s.store.removeMetadata(shardID, replicaID)
   110  }
   111  
   112  func (s *Service) handleShutdownStore(_ pb.ScheduleCommand) {
   113  	// notify main routine that have received shutdown cmd
   114  	select {
   115  	case s.shutdownC <- struct{}{}:
   116  	default:
   117  	}
   118  }
   119  
   120  func (s *Service) heartbeatWorker(ctx context.Context) {
   121  	// TODO: check tick interval
   122  	if s.cfg.HeartbeatInterval.Duration == 0 {
   123  		panic("invalid heartbeat interval")
   124  	}
   125  	defer func() {
   126  		s.runtime.Logger().Info("heartbeat worker stopped")
   127  	}()
   128  	ticker := time.NewTicker(s.cfg.HeartbeatInterval.Duration)
   129  	defer ticker.Stop()
   130  	ctx, span := trace.Start(ctx, "heartbeatWorker")
   131  	defer span.End()
   132  
   133  	for {
   134  		select {
   135  		case <-ctx.Done():
   136  			return
   137  		case <-ticker.C:
   138  			s.heartbeat(ctx)
   139  			// I'd call this an ugly hack to just workaround select's
   140  			// policy of randomly picking a ready channel from the case list.
   141  			select {
   142  			case <-ctx.Done():
   143  				return
   144  			default:
   145  			}
   146  		}
   147  	}
   148  }
   149  
   150  func (s *Service) heartbeat(ctx context.Context) {
   151  	start := time.Now()
   152  	defer func() {
   153  		v2.LogHeartbeatHistogram.Observe(time.Since(start).Seconds())
   154  	}()
   155  	ctx2, cancel := context.WithTimeout(ctx, 3*time.Second)
   156  	defer cancel()
   157  
   158  	if s.haClient == nil {
   159  		if reflect.DeepEqual(s.cfg.HAKeeperClientConfig, HAKeeperClientConfig{}) {
   160  			panic("empty HAKeeper client config")
   161  		}
   162  		cc, err := NewLogHAKeeperClient(ctx2, s.cfg.GetHAKeeperClientConfig())
   163  		if err != nil {
   164  			s.runtime.Logger().Error("failed to create HAKeeper client", zap.Error(err))
   165  			return
   166  		}
   167  		s.haClient = cc
   168  	}
   169  
   170  	hb := s.store.getHeartbeatMessage()
   171  	hb.TaskServiceCreated = s.taskServiceCreated()
   172  	hb.ConfigData = s.config.GetData()
   173  
   174  	cb, err := s.haClient.SendLogHeartbeat(ctx2, hb)
   175  	if err != nil {
   176  		v2.LogHeartbeatFailureCounter.Inc()
   177  		s.runtime.Logger().Error("failed to send log service heartbeat", zap.Error(err))
   178  		return
   179  	}
   180  
   181  	s.config.DecrCount()
   182  
   183  	s.handleCommands(cb.Commands)
   184  }