github.com/matrixorigin/matrixone@v1.2.0/pkg/logservice/service_bootstrap.go (about) 1 // Copyright 2021 - 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package logservice 16 17 import ( 18 "context" 19 "time" 20 21 "github.com/lni/dragonboat/v4" 22 "github.com/matrixorigin/matrixone/pkg/common/moerr" 23 "github.com/matrixorigin/matrixone/pkg/common/runtime" 24 "github.com/matrixorigin/matrixone/pkg/defines" 25 "github.com/matrixorigin/matrixone/pkg/fileservice" 26 pb "github.com/matrixorigin/matrixone/pkg/pb/logservice" 27 "go.uber.org/zap" 28 ) 29 30 const restoredTagFile = "./RESTORED" 31 32 func (s *Service) BootstrapHAKeeper(ctx context.Context, cfg Config) error { 33 replicaID, bootstrapping := cfg.Bootstrapping() 34 if !bootstrapping { 35 return nil 36 } 37 members, err := cfg.GetInitHAKeeperMembers() 38 if err != nil { 39 return err 40 } 41 if err := s.store.startHAKeeperReplica(replicaID, members, false); err != nil { 42 // let's be a little less strict, when HAKeeper replica is already 43 // running as a result of store.startReplicas(), we just ignore the 44 // dragonboat.ErrShardAlreadyExist error below. 45 if err != dragonboat.ErrShardAlreadyExist { 46 s.runtime.SubLogger(runtime.SystemInit).Error("failed to start hakeeper replica", zap.Error(err)) 47 return err 48 } 49 } 50 numOfLogShards := cfg.BootstrapConfig.NumOfLogShards 51 numOfTNShards := cfg.BootstrapConfig.NumOfTNShards 52 numOfLogReplicas := cfg.BootstrapConfig.NumOfLogShardReplicas 53 54 fs, err := fileservice.Get[fileservice.FileService](s.fileService, defines.LocalFileServiceName) 55 if err != nil { 56 s.runtime.SubLogger(runtime.SystemInit).Error("failed to get file service instance", zap.Error(err)) 57 return err 58 } 59 60 var nextID uint64 61 var nextIDByKey map[string]uint64 62 backup, err := s.getBackupData(ctx) 63 if err != nil { 64 s.runtime.SubLogger(runtime.SystemInit).Error("failed to get backup data", zap.Error(err)) 65 return err 66 } 67 if backup != nil { // We are trying to restore from a backup. 68 // If a backup has already been issued, ignore this time. 69 _, err := fs.StatFile(ctx, restoredTagFile) 70 if s.cfg.BootstrapConfig.Restore.Force || // force is true, we do restore whatever. 71 (err != nil && moerr.IsMoErrCode(err, moerr.ErrFileNotFound)) { 72 s.runtime.SubLogger(runtime.SystemInit).Info("restore hakeeper data", 73 zap.Uint64("next ID", backup.NextID), 74 zap.Any("next ID by key", backup.NextIDByKey), 75 ) 76 // Restored tag file does not exist, we can do backup. 77 nextID = backup.NextID 78 nextIDByKey = backup.NextIDByKey 79 80 // After backup, create a restore file. 81 if err := fs.Write(ctx, fileservice.IOVector{ 82 FilePath: restoredTagFile, 83 Entries: []fileservice.IOEntry{ 84 { 85 Offset: 0, 86 Size: 1, 87 Data: []byte{1}, 88 }, 89 }, 90 }); err != nil { 91 s.runtime.SubLogger(runtime.SystemInit).Error("failed to write restore tag file", 92 zap.Error(err)) 93 return err 94 } 95 } 96 } 97 for i := 0; i < checkBootstrapCycles; i++ { 98 select { 99 case <-ctx.Done(): 100 return nil 101 default: 102 } 103 if err := s.store.setInitialClusterInfo(numOfLogShards, 104 numOfTNShards, numOfLogReplicas, nextID, nextIDByKey); err != nil { 105 s.runtime.SubLogger(runtime.SystemInit).Error("failed to set initial cluster info", zap.Error(err)) 106 if err == dragonboat.ErrShardNotFound { 107 return nil 108 } 109 time.Sleep(time.Second) 110 continue 111 } 112 s.runtime.SubLogger(runtime.SystemInit).Info("initial cluster info set") 113 break 114 } 115 return nil 116 } 117 118 func (s *Service) getBackupData(ctx context.Context) (*pb.BackupData, error) { 119 filePath := s.cfg.BootstrapConfig.Restore.FilePath 120 if filePath == "" { 121 return nil, nil 122 } 123 124 path, err := fileservice.ParsePath(filePath) 125 if err != nil { 126 return nil, err 127 } 128 129 fsName := defines.LocalFileServiceName 130 if path.Service != "" { 131 fsName = path.Service 132 } 133 134 fs, err := fileservice.Get[fileservice.FileService](s.fileService, fsName) 135 if err != nil { 136 s.runtime.SubLogger(runtime.SystemInit).Error("failed to get file service instance %s", 137 zap.String("fileservice name", fsName), 138 zap.Error(err)) 139 return nil, err 140 } 141 142 st, err := fs.StatFile(ctx, filePath) 143 if err != nil { 144 if moerr.IsMoErrCode(err, moerr.ErrFileNotFound) { 145 return nil, nil 146 } 147 return nil, err 148 } 149 150 ioVec := &fileservice.IOVector{ 151 FilePath: filePath, 152 Entries: make([]fileservice.IOEntry, 1), 153 } 154 155 // Read the whole file to one entry. 156 ioVec.Entries[0] = fileservice.IOEntry{ 157 Offset: 0, 158 Size: st.Size, 159 } 160 if err := fs.Read(ctx, ioVec); err != nil { 161 return nil, err 162 } 163 164 var data pb.BackupData 165 if err := data.Unmarshal(ioVec.Entries[0].Data); err != nil { 166 return nil, err 167 } 168 return &data, nil 169 }