github.com/matrixorigin/matrixone@v1.2.0/pkg/logservice/service_commands.go (about) 1 // Copyright 2021 - 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package logservice 16 17 import ( 18 "context" 19 "fmt" 20 "reflect" 21 "time" 22 23 "go.uber.org/zap" 24 25 "github.com/matrixorigin/matrixone/pkg/hakeeper" 26 pb "github.com/matrixorigin/matrixone/pkg/pb/logservice" 27 v2 "github.com/matrixorigin/matrixone/pkg/util/metric/v2" 28 "github.com/matrixorigin/matrixone/pkg/util/trace" 29 ) 30 31 func (s *Service) handleCommands(cmds []pb.ScheduleCommand) { 32 for _, cmd := range cmds { 33 s.runtime.Logger().Info(fmt.Sprintf("%s applying cmd: %s", s.ID(), cmd.LogString())) 34 if cmd.GetConfigChange() != nil { 35 s.runtime.Logger().Debug("applying schedule command:", zap.String("command", cmd.LogString())) 36 switch cmd.ConfigChange.ChangeType { 37 case pb.AddReplica: 38 s.handleAddReplica(cmd) 39 case pb.RemoveReplica: 40 s.handleRemoveReplica(cmd) 41 case pb.StartReplica: 42 s.handleStartReplica(cmd) 43 case pb.StopReplica: 44 s.handleStopReplica(cmd) 45 case pb.KillZombie: 46 s.handleKillZombie(cmd) 47 default: 48 panic("unknown config change cmd type") 49 } 50 } else if cmd.GetShutdownStore() != nil { 51 s.handleShutdownStore(cmd) 52 } else if cmd.GetCreateTaskService() != nil { 53 s.createTaskService(cmd.CreateTaskService) 54 s.createSQLLogger(cmd.CreateTaskService) 55 } else { 56 panic("unknown schedule command type") 57 } 58 } 59 } 60 61 func (s *Service) handleAddReplica(cmd pb.ScheduleCommand) { 62 shardID := cmd.ConfigChange.Replica.ShardID 63 replicaID := cmd.ConfigChange.Replica.ReplicaID 64 epoch := cmd.ConfigChange.Replica.Epoch 65 target := cmd.ConfigChange.Replica.UUID 66 if err := s.store.addReplica(shardID, replicaID, target, epoch); err != nil { 67 s.runtime.Logger().Error("failed to add replica", zap.Error(err)) 68 } 69 } 70 71 func (s *Service) handleRemoveReplica(cmd pb.ScheduleCommand) { 72 shardID := cmd.ConfigChange.Replica.ShardID 73 replicaID := cmd.ConfigChange.Replica.ReplicaID 74 epoch := cmd.ConfigChange.Replica.Epoch 75 if err := s.store.removeReplica(shardID, replicaID, epoch); err != nil { 76 s.runtime.Logger().Error("failed to remove replica", zap.Error(err)) 77 } 78 } 79 80 func (s *Service) handleStartReplica(cmd pb.ScheduleCommand) { 81 shardID := cmd.ConfigChange.Replica.ShardID 82 replicaID := cmd.ConfigChange.Replica.ReplicaID 83 join := len(cmd.ConfigChange.InitialMembers) == 0 84 if shardID == hakeeper.DefaultHAKeeperShardID { 85 if err := s.store.startHAKeeperReplica(replicaID, 86 cmd.ConfigChange.InitialMembers, join); err != nil { 87 s.runtime.Logger().Error("failed to start HAKeeper replica", zap.Error(err)) 88 } 89 } else { 90 if err := s.store.startReplica(shardID, 91 replicaID, cmd.ConfigChange.InitialMembers, join); err != nil { 92 s.runtime.Logger().Error("failed to start log replica", zap.Error(err)) 93 } 94 } 95 } 96 97 func (s *Service) handleStopReplica(cmd pb.ScheduleCommand) { 98 shardID := cmd.ConfigChange.Replica.ShardID 99 replicaID := cmd.ConfigChange.Replica.ReplicaID 100 if err := s.store.stopReplica(shardID, replicaID); err != nil { 101 s.runtime.Logger().Error("failed to stop replica", zap.Error(err)) 102 } 103 } 104 105 func (s *Service) handleKillZombie(cmd pb.ScheduleCommand) { 106 shardID := cmd.ConfigChange.Replica.ShardID 107 replicaID := cmd.ConfigChange.Replica.ReplicaID 108 s.handleStopReplica(cmd) 109 s.store.removeMetadata(shardID, replicaID) 110 } 111 112 func (s *Service) handleShutdownStore(_ pb.ScheduleCommand) { 113 // notify main routine that have received shutdown cmd 114 select { 115 case s.shutdownC <- struct{}{}: 116 default: 117 } 118 } 119 120 func (s *Service) heartbeatWorker(ctx context.Context) { 121 // TODO: check tick interval 122 if s.cfg.HeartbeatInterval.Duration == 0 { 123 panic("invalid heartbeat interval") 124 } 125 defer func() { 126 s.runtime.Logger().Info("heartbeat worker stopped") 127 }() 128 ticker := time.NewTicker(s.cfg.HeartbeatInterval.Duration) 129 defer ticker.Stop() 130 ctx, span := trace.Start(ctx, "heartbeatWorker") 131 defer span.End() 132 133 for { 134 select { 135 case <-ctx.Done(): 136 return 137 case <-ticker.C: 138 s.heartbeat(ctx) 139 // I'd call this an ugly hack to just workaround select's 140 // policy of randomly picking a ready channel from the case list. 141 select { 142 case <-ctx.Done(): 143 return 144 default: 145 } 146 } 147 } 148 } 149 150 func (s *Service) heartbeat(ctx context.Context) { 151 start := time.Now() 152 defer func() { 153 v2.LogHeartbeatHistogram.Observe(time.Since(start).Seconds()) 154 }() 155 ctx2, cancel := context.WithTimeout(ctx, 3*time.Second) 156 defer cancel() 157 158 if s.haClient == nil { 159 if reflect.DeepEqual(s.cfg.HAKeeperClientConfig, HAKeeperClientConfig{}) { 160 panic("empty HAKeeper client config") 161 } 162 cc, err := NewLogHAKeeperClient(ctx2, s.cfg.GetHAKeeperClientConfig()) 163 if err != nil { 164 s.runtime.Logger().Error("failed to create HAKeeper client", zap.Error(err)) 165 return 166 } 167 s.haClient = cc 168 } 169 170 hb := s.store.getHeartbeatMessage() 171 hb.TaskServiceCreated = s.taskServiceCreated() 172 hb.ConfigData = s.config.GetData() 173 174 cb, err := s.haClient.SendLogHeartbeat(ctx2, hb) 175 if err != nil { 176 v2.LogHeartbeatFailureCounter.Inc() 177 s.runtime.Logger().Error("failed to send log service heartbeat", zap.Error(err)) 178 return 179 } 180 181 s.config.DecrCount() 182 183 s.handleCommands(cb.Commands) 184 }