github.com/matrixorigin/matrixone@v1.2.0/pkg/tnservice/store_heartbeat.go (about) 1 // Copyright 2021 - 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package tnservice 16 17 import ( 18 "context" 19 "time" 20 21 logservicepb "github.com/matrixorigin/matrixone/pkg/pb/logservice" 22 "github.com/matrixorigin/matrixone/pkg/pb/metadata" 23 v2 "github.com/matrixorigin/matrixone/pkg/util/metric/v2" 24 "go.uber.org/zap" 25 ) 26 27 func (s *store) heartbeatTask(ctx context.Context) { 28 if s.cfg.HAKeeper.HeatbeatInterval.Duration == 0 { 29 panic("invalid heartbeat interval") 30 } 31 defer func() { 32 s.rt.Logger().Info("dn heartbeat task stopped") 33 }() 34 35 ticker := time.NewTicker(s.cfg.HAKeeper.HeatbeatInterval.Duration) 36 defer ticker.Stop() 37 38 for { 39 select { 40 case <-ctx.Done(): 41 return 42 case <-ticker.C: 43 s.heartbeat(ctx) 44 // see pkg/logservice/service_commands.go#130 45 select { 46 case <-ctx.Done(): 47 return 48 default: 49 } 50 } 51 } 52 } 53 54 func (s *store) heartbeat(ctx context.Context) { 55 start := time.Now() 56 defer func() { 57 v2.TNHeartbeatHistogram.Observe(time.Since(start).Seconds()) 58 }() 59 ctx2, cancel := context.WithTimeout(ctx, s.cfg.HAKeeper.HeatbeatTimeout.Duration) 60 defer cancel() 61 62 hb := logservicepb.TNStoreHeartbeat{ 63 UUID: s.cfg.UUID, 64 ServiceAddress: s.txnServiceServiceAddr(), 65 Shards: s.getTNShardInfo(), 66 TaskServiceCreated: s.taskServiceCreated(), 67 LogtailServerAddress: s.logtailServiceServiceAddr(), 68 LockServiceAddress: s.lockServiceServiceAddr(), 69 ConfigData: s.config.GetData(), 70 } 71 72 if s.queryService != nil { 73 hb.QueryAddress = s.queryServiceServiceAddr() 74 } 75 76 cb, err := s.hakeeperClient.SendTNHeartbeat(ctx2, hb) 77 if err != nil { 78 v2.TNHeartbeatFailureCounter.Inc() 79 s.rt.Logger().Error("failed to send tn heartbeat", zap.Error(err)) 80 return 81 } 82 83 s.config.DecrCount() 84 s.handleCommands(cb.Commands) 85 } 86 87 func (s *store) handleCommands(cmds []logservicepb.ScheduleCommand) { 88 for _, cmd := range cmds { 89 if cmd.ServiceType != logservicepb.TNService { 90 s.rt.Logger().Fatal("received invalid command", zap.String("command", cmd.LogString())) 91 } 92 s.rt.Logger().Debug("applying schedule command:", zap.String("command", cmd.LogString())) 93 if cmd.ConfigChange != nil { 94 switch cmd.ConfigChange.ChangeType { 95 case logservicepb.AddReplica, logservicepb.StartReplica: 96 s.handleAddReplica(cmd) 97 case logservicepb.RemoveReplica, logservicepb.StopReplica: 98 s.handleRemoveReplica(cmd) 99 } 100 } else if cmd.GetShutdownStore() != nil { 101 s.handleShutdownStore(cmd) 102 } else if cmd.CreateTaskService != nil { 103 s.createTaskService(cmd.CreateTaskService) 104 s.createSQLLogger(cmd.CreateTaskService) 105 } 106 } 107 } 108 109 func (s *store) handleAddReplica(cmd logservicepb.ScheduleCommand) { 110 shardID := cmd.ConfigChange.Replica.ShardID 111 logShardID := cmd.ConfigChange.Replica.LogShardID 112 replicaID := cmd.ConfigChange.Replica.ReplicaID 113 address := s.cfg.ServiceAddress 114 if err := s.createReplica(metadata.TNShard{ 115 TNShardRecord: metadata.TNShardRecord{ 116 ShardID: shardID, 117 LogShardID: logShardID, 118 }, 119 ReplicaID: replicaID, 120 Address: address, 121 }); err != nil { 122 s.rt.Logger().Error("failed to add replica", zap.Error(err)) 123 } 124 } 125 126 func (s *store) handleRemoveReplica(cmd logservicepb.ScheduleCommand) { 127 shardID := cmd.ConfigChange.Replica.ShardID 128 if err := s.removeReplica(shardID); err != nil { 129 s.rt.Logger().Error("failed to remove replica", zap.Error(err)) 130 } 131 } 132 133 func (s *store) handleShutdownStore(_ logservicepb.ScheduleCommand) { 134 // notify main routine that have received shutdown cmd 135 select { 136 case s.shutdownC <- struct{}{}: 137 default: 138 } 139 }