github.com/matrixorigin/matrixone@v1.2.0/pkg/logservice/service_bootstrap.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package logservice
    16  
    17  import (
    18  	"context"
    19  	"time"
    20  
    21  	"github.com/lni/dragonboat/v4"
    22  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    23  	"github.com/matrixorigin/matrixone/pkg/common/runtime"
    24  	"github.com/matrixorigin/matrixone/pkg/defines"
    25  	"github.com/matrixorigin/matrixone/pkg/fileservice"
    26  	pb "github.com/matrixorigin/matrixone/pkg/pb/logservice"
    27  	"go.uber.org/zap"
    28  )
    29  
    30  const restoredTagFile = "./RESTORED"
    31  
    32  func (s *Service) BootstrapHAKeeper(ctx context.Context, cfg Config) error {
    33  	replicaID, bootstrapping := cfg.Bootstrapping()
    34  	if !bootstrapping {
    35  		return nil
    36  	}
    37  	members, err := cfg.GetInitHAKeeperMembers()
    38  	if err != nil {
    39  		return err
    40  	}
    41  	if err := s.store.startHAKeeperReplica(replicaID, members, false); err != nil {
    42  		// let's be a little less strict, when HAKeeper replica is already
    43  		// running as a result of store.startReplicas(), we just ignore the
    44  		// dragonboat.ErrShardAlreadyExist error below.
    45  		if err != dragonboat.ErrShardAlreadyExist {
    46  			s.runtime.SubLogger(runtime.SystemInit).Error("failed to start hakeeper replica", zap.Error(err))
    47  			return err
    48  		}
    49  	}
    50  	numOfLogShards := cfg.BootstrapConfig.NumOfLogShards
    51  	numOfTNShards := cfg.BootstrapConfig.NumOfTNShards
    52  	numOfLogReplicas := cfg.BootstrapConfig.NumOfLogShardReplicas
    53  
    54  	fs, err := fileservice.Get[fileservice.FileService](s.fileService, defines.LocalFileServiceName)
    55  	if err != nil {
    56  		s.runtime.SubLogger(runtime.SystemInit).Error("failed to get file service instance", zap.Error(err))
    57  		return err
    58  	}
    59  
    60  	var nextID uint64
    61  	var nextIDByKey map[string]uint64
    62  	backup, err := s.getBackupData(ctx)
    63  	if err != nil {
    64  		s.runtime.SubLogger(runtime.SystemInit).Error("failed to get backup data", zap.Error(err))
    65  		return err
    66  	}
    67  	if backup != nil { // We are trying to restore from a backup.
    68  		// If a backup has already been issued, ignore this time.
    69  		_, err := fs.StatFile(ctx, restoredTagFile)
    70  		if s.cfg.BootstrapConfig.Restore.Force || // force is true, we do restore whatever.
    71  			(err != nil && moerr.IsMoErrCode(err, moerr.ErrFileNotFound)) {
    72  			s.runtime.SubLogger(runtime.SystemInit).Info("restore hakeeper data",
    73  				zap.Uint64("next ID", backup.NextID),
    74  				zap.Any("next ID by key", backup.NextIDByKey),
    75  			)
    76  			// Restored tag file does not exist, we can do backup.
    77  			nextID = backup.NextID
    78  			nextIDByKey = backup.NextIDByKey
    79  
    80  			// After backup, create a restore file.
    81  			if err := fs.Write(ctx, fileservice.IOVector{
    82  				FilePath: restoredTagFile,
    83  				Entries: []fileservice.IOEntry{
    84  					{
    85  						Offset: 0,
    86  						Size:   1,
    87  						Data:   []byte{1},
    88  					},
    89  				},
    90  			}); err != nil {
    91  				s.runtime.SubLogger(runtime.SystemInit).Error("failed to write restore tag file",
    92  					zap.Error(err))
    93  				return err
    94  			}
    95  		}
    96  	}
    97  	for i := 0; i < checkBootstrapCycles; i++ {
    98  		select {
    99  		case <-ctx.Done():
   100  			return nil
   101  		default:
   102  		}
   103  		if err := s.store.setInitialClusterInfo(numOfLogShards,
   104  			numOfTNShards, numOfLogReplicas, nextID, nextIDByKey); err != nil {
   105  			s.runtime.SubLogger(runtime.SystemInit).Error("failed to set initial cluster info", zap.Error(err))
   106  			if err == dragonboat.ErrShardNotFound {
   107  				return nil
   108  			}
   109  			time.Sleep(time.Second)
   110  			continue
   111  		}
   112  		s.runtime.SubLogger(runtime.SystemInit).Info("initial cluster info set")
   113  		break
   114  	}
   115  	return nil
   116  }
   117  
   118  func (s *Service) getBackupData(ctx context.Context) (*pb.BackupData, error) {
   119  	filePath := s.cfg.BootstrapConfig.Restore.FilePath
   120  	if filePath == "" {
   121  		return nil, nil
   122  	}
   123  
   124  	path, err := fileservice.ParsePath(filePath)
   125  	if err != nil {
   126  		return nil, err
   127  	}
   128  
   129  	fsName := defines.LocalFileServiceName
   130  	if path.Service != "" {
   131  		fsName = path.Service
   132  	}
   133  
   134  	fs, err := fileservice.Get[fileservice.FileService](s.fileService, fsName)
   135  	if err != nil {
   136  		s.runtime.SubLogger(runtime.SystemInit).Error("failed to get file service instance %s",
   137  			zap.String("fileservice name", fsName),
   138  			zap.Error(err))
   139  		return nil, err
   140  	}
   141  
   142  	st, err := fs.StatFile(ctx, filePath)
   143  	if err != nil {
   144  		if moerr.IsMoErrCode(err, moerr.ErrFileNotFound) {
   145  			return nil, nil
   146  		}
   147  		return nil, err
   148  	}
   149  
   150  	ioVec := &fileservice.IOVector{
   151  		FilePath: filePath,
   152  		Entries:  make([]fileservice.IOEntry, 1),
   153  	}
   154  
   155  	// Read the whole file to one entry.
   156  	ioVec.Entries[0] = fileservice.IOEntry{
   157  		Offset: 0,
   158  		Size:   st.Size,
   159  	}
   160  	if err := fs.Read(ctx, ioVec); err != nil {
   161  		return nil, err
   162  	}
   163  
   164  	var data pb.BackupData
   165  	if err := data.Unmarshal(ioVec.Entries[0].Data); err != nil {
   166  		return nil, err
   167  	}
   168  	return &data, nil
   169  }