github.com/matrixorigin/matrixone@v0.7.0/pkg/dnservice/store.go (about) 1 // Copyright 2021 - 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package dnservice 16 17 import ( 18 "context" 19 "sync" 20 "time" 21 22 "github.com/matrixorigin/matrixone/pkg/common/moerr" 23 "github.com/matrixorigin/matrixone/pkg/common/morpc" 24 "github.com/matrixorigin/matrixone/pkg/common/runtime" 25 "github.com/matrixorigin/matrixone/pkg/common/stopper" 26 "github.com/matrixorigin/matrixone/pkg/defines" 27 "github.com/matrixorigin/matrixone/pkg/fileservice" 28 "github.com/matrixorigin/matrixone/pkg/logservice" 29 logservicepb "github.com/matrixorigin/matrixone/pkg/pb/logservice" 30 "github.com/matrixorigin/matrixone/pkg/pb/metadata" 31 "github.com/matrixorigin/matrixone/pkg/pb/txn" 32 "github.com/matrixorigin/matrixone/pkg/taskservice" 33 "github.com/matrixorigin/matrixone/pkg/txn/rpc" 34 "github.com/matrixorigin/matrixone/pkg/txn/service" 35 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common" 36 "go.uber.org/multierr" 37 "go.uber.org/zap" 38 ) 39 40 var ( 41 retryCreateStorageInterval = time.Second * 5 42 ) 43 44 // WithConfigAdjust set adjust config func 45 func WithConfigAdjust(adjustConfigFunc func(c *Config)) Option { 46 return func(s *store) { 47 s.options.adjustConfigFunc = adjustConfigFunc 48 } 49 } 50 51 // WithBackendFilter set filtering txn.TxnRequest sent to other DNShard 52 func WithBackendFilter(filter func(morpc.Message, string) bool) Option { 53 return func(s *store) { 54 s.options.backendFilter = filter 55 } 56 } 57 58 // WithHAKeeperClientFactory set hakeeper client factory 59 func WithHAKeeperClientFactory(factory func() (logservice.DNHAKeeperClient, error)) Option { 60 return func(s *store) { 61 s.options.hakeekerClientFactory = factory 62 } 63 } 64 65 // WithLogServiceClientFactory set log service client factory 66 func WithLogServiceClientFactory(factory func(metadata.DNShard) (logservice.Client, error)) Option { 67 return func(s *store) { 68 s.options.logServiceClientFactory = factory 69 } 70 } 71 72 // WithTaskStorageFactory setup the special task strorage factory 73 func WithTaskStorageFactory(factory taskservice.TaskStorageFactory) Option { 74 return func(s *store) { 75 s.task.storageFactory = factory 76 } 77 } 78 79 type store struct { 80 cfg *Config 81 rt runtime.Runtime 82 sender rpc.TxnSender 83 server rpc.TxnServer 84 hakeeperClient logservice.DNHAKeeperClient 85 fileService fileservice.FileService 86 metadataFileService fileservice.ReplaceableFileService 87 replicas *sync.Map 88 stopper *stopper.Stopper 89 90 options struct { 91 logServiceClientFactory func(metadata.DNShard) (logservice.Client, error) 92 hakeekerClientFactory func() (logservice.DNHAKeeperClient, error) 93 backendFilter func(msg morpc.Message, backendAddr string) bool 94 adjustConfigFunc func(c *Config) 95 } 96 97 mu struct { 98 sync.RWMutex 99 metadata metadata.DNStore 100 } 101 102 task struct { 103 sync.RWMutex 104 serviceCreated bool 105 serviceHolder taskservice.TaskServiceHolder 106 storageFactory taskservice.TaskStorageFactory 107 } 108 } 109 110 // NewService create DN Service 111 func NewService(cfg *Config, 112 rt runtime.Runtime, 113 fileService fileservice.FileService, 114 opts ...Option) (Service, error) { 115 if err := cfg.Validate(); err != nil { 116 return nil, err 117 } 118 119 // start common stuff 120 common.InitTAEMPool() 121 122 // get metadata fs 123 metadataFS, err := fileservice.Get[fileservice.ReplaceableFileService](fileService, defines.LocalFileServiceName) 124 if err != nil { 125 return nil, err 126 } 127 128 s := &store{ 129 cfg: cfg, 130 rt: rt, 131 fileService: fileService, 132 metadataFileService: metadataFS, 133 } 134 for _, opt := range opts { 135 opt(s) 136 } 137 s.replicas = &sync.Map{} 138 s.stopper = stopper.NewStopper("dn-store", 139 stopper.WithLogger(s.rt.Logger().RawLogger())) 140 s.mu.metadata = metadata.DNStore{UUID: cfg.UUID} 141 if s.options.adjustConfigFunc != nil { 142 s.options.adjustConfigFunc(s.cfg) 143 } 144 145 if err := s.initClocker(); err != nil { 146 return nil, err 147 } 148 if err := s.initHAKeeperClient(); err != nil { 149 return nil, err 150 } 151 if err := s.initTxnSender(); err != nil { 152 return nil, err 153 } 154 if err := s.initTxnServer(); err != nil { 155 return nil, err 156 } 157 if err := s.initMetadata(); err != nil { 158 return nil, err 159 } 160 s.initTaskHolder() 161 return s, nil 162 } 163 164 func (s *store) Start() error { 165 if err := s.startDNShards(); err != nil { 166 return err 167 } 168 if err := s.server.Start(); err != nil { 169 return err 170 } 171 s.rt.SubLogger(runtime.SystemInit).Info("dn heartbeat task started") 172 return s.stopper.RunTask(s.heartbeatTask) 173 } 174 175 func (s *store) Close() error { 176 s.stopper.Stop() 177 var err error 178 if e := s.hakeeperClient.Close(); e != nil { 179 err = multierr.Append(e, err) 180 } 181 if e := s.sender.Close(); e != nil { 182 err = multierr.Append(e, err) 183 } 184 if e := s.server.Close(); e != nil { 185 err = multierr.Append(e, err) 186 } 187 s.replicas.Range(func(_, value any) bool { 188 r := value.(*replica) 189 if e := r.close(false); e != nil { 190 err = multierr.Append(e, err) 191 } 192 return true 193 }) 194 s.task.RLock() 195 ts := s.task.serviceHolder 196 s.task.RUnlock() 197 if ts != nil { 198 err = ts.Close() 199 } 200 return err 201 } 202 203 func (s *store) StartDNReplica(shard metadata.DNShard) error { 204 return s.createReplica(shard) 205 } 206 207 func (s *store) CloseDNReplica(shard metadata.DNShard) error { 208 return s.removeReplica(shard.ShardID) 209 } 210 211 func (s *store) startDNShards() error { 212 s.mu.Lock() 213 defer s.mu.Unlock() 214 215 for _, shard := range s.mu.metadata.Shards { 216 if err := s.createReplica(shard); err != nil { 217 return err 218 } 219 } 220 return nil 221 } 222 223 func (s *store) getDNShardInfo() []logservicepb.DNShardInfo { 224 var shards []logservicepb.DNShardInfo 225 s.replicas.Range(func(_, value any) bool { 226 r := value.(*replica) 227 shards = append(shards, logservicepb.DNShardInfo{ 228 ShardID: r.shard.ShardID, 229 ReplicaID: r.shard.ReplicaID, 230 }) 231 return true 232 }) 233 return shards 234 } 235 236 func (s *store) createReplica(shard metadata.DNShard) error { 237 r := newReplica(shard, s.rt) 238 v, ok := s.replicas.LoadOrStore(shard.ShardID, r) 239 if ok { 240 s.rt.Logger().Debug("DNShard already created", 241 zap.String("new", shard.DebugString()), 242 zap.String("exist", v.(*replica).shard.DebugString())) 243 return nil 244 } 245 246 err := s.stopper.RunTask(func(ctx context.Context) { 247 for { 248 select { 249 case <-ctx.Done(): 250 return 251 default: 252 storage, err := s.createTxnStorage(ctx, shard) 253 if err != nil { 254 r.logger.Error("start DNShard failed", 255 zap.Error(err)) 256 time.Sleep(retryCreateStorageInterval) 257 continue 258 } 259 260 err = r.start(service.NewTxnService( 261 r.rt, 262 shard, 263 storage, 264 s.sender, 265 s.cfg.Txn.ZombieTimeout.Duration)) 266 if err != nil { 267 r.logger.Fatal("start DNShard failed", 268 zap.Error(err)) 269 } 270 return 271 } 272 } 273 }) 274 if err != nil { 275 return err 276 } 277 278 s.addDNShardLocked(shard) 279 return nil 280 } 281 282 func (s *store) removeReplica(dnShardID uint64) error { 283 if r := s.getReplica(dnShardID); r != nil { 284 err := r.close(true) 285 s.replicas.Delete(dnShardID) 286 s.removeDNShard(dnShardID) 287 return err 288 } 289 return nil 290 } 291 292 func (s *store) getReplica(id uint64) *replica { 293 v, ok := s.replicas.Load(id) 294 if !ok { 295 return nil 296 } 297 return v.(*replica) 298 } 299 300 func (s *store) initTxnSender() error { 301 sender, err := rpc.NewSenderWithConfig( 302 s.cfg.RPC, 303 s.rt, 304 rpc.WithSenderBackendOptions(morpc.WithBackendFilter(func(m morpc.Message, backendAddr string) bool { 305 return s.options.backendFilter == nil || s.options.backendFilter(m.(*txn.TxnRequest), backendAddr) 306 })), 307 rpc.WithSenderLocalDispatch(s.dispatchLocalRequest)) 308 if err != nil { 309 return err 310 } 311 s.sender = sender 312 return nil 313 } 314 315 func (s *store) initTxnServer() error { 316 server, err := rpc.NewTxnServer( 317 s.cfg.ListenAddress, 318 s.rt, 319 rpc.WithServerMaxMessageSize(int(s.cfg.RPC.MaxMessageSize)), 320 rpc.WithServerEnableCompress(s.cfg.RPC.EnableCompress)) 321 if err != nil { 322 return err 323 } 324 s.server = server 325 s.registerRPCHandlers() 326 return nil 327 } 328 329 func (s *store) initClocker() error { 330 if s.rt.Clock() == nil { 331 return moerr.NewBadConfigNoCtx("missing txn clock") 332 } 333 return nil 334 } 335 336 func (s *store) initHAKeeperClient() error { 337 if s.options.hakeekerClientFactory != nil { 338 client, err := s.options.hakeekerClientFactory() 339 if err != nil { 340 return err 341 } 342 s.hakeeperClient = client 343 return nil 344 } 345 346 ctx, cancel := context.WithTimeout(context.Background(), s.cfg.HAKeeper.DiscoveryTimeout.Duration) 347 defer cancel() 348 client, err := logservice.NewDNHAKeeperClient(ctx, s.cfg.HAKeeper.ClientConfig) 349 if err != nil { 350 return err 351 } 352 s.hakeeperClient = client 353 return nil 354 }