github.com/matrixorigin/matrixone@v1.2.0/pkg/txn/service/service_recovery.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package service
    16  
    17  import (
    18  	"context"
    19  
    20  	"github.com/matrixorigin/matrixone/pkg/pb/txn"
    21  	"github.com/matrixorigin/matrixone/pkg/txn/util"
    22  	"go.uber.org/zap"
    23  )
    24  
    25  func (s *service) startRecovery() {
    26  	if err := s.stopper.RunTask(s.doRecovery); err != nil {
    27  		s.logger.Fatal("start recover task failed",
    28  			zap.Error(err))
    29  	}
    30  	s.storage.StartRecovery(context.TODO(), s.txnC)
    31  	s.waitRecoveryCompleted()
    32  }
    33  
    34  func (s *service) doRecovery(ctx context.Context) {
    35  	for {
    36  		select {
    37  		case <-ctx.Done():
    38  			return
    39  		case txn, ok := <-s.txnC:
    40  			if !ok {
    41  				s.end()
    42  				return
    43  			}
    44  			s.addLog(txn)
    45  		}
    46  	}
    47  }
    48  
    49  func (s *service) addLog(txnMeta txn.TxnMeta) {
    50  	if len(txnMeta.TNShards) <= 1 {
    51  		return
    52  	}
    53  
    54  	switch txnMeta.Status {
    55  	case txn.TxnStatus_Committing:
    56  		s.checkRecoveryStatus(txnMeta)
    57  		txnCtx := s.getTxnContext(txnMeta.ID)
    58  		if txnCtx == nil {
    59  			s.maybeAddTxn(txnMeta)
    60  		} else {
    61  			if txnCtx.getTxn().Status != txn.TxnStatus_Prepared &&
    62  				txnCtx.getTxn().Status != txn.TxnStatus_Committing {
    63  				s.logger.Fatal("invalid txn status before committing",
    64  					zap.String("prev-status", txnCtx.getTxn().Status.String()),
    65  					util.TxnField(txnMeta))
    66  			}
    67  			txnCtx.updateTxn(txnMeta)
    68  		}
    69  	case txn.TxnStatus_Prepared:
    70  		s.checkRecoveryStatus(txnMeta)
    71  		txnCtx := s.getTxnContext(txnMeta.ID)
    72  		if txnCtx == nil {
    73  			s.maybeAddTxn(txnMeta)
    74  			break
    75  		}
    76  
    77  		if txnCtx.getTxn().Status != txn.TxnStatus_Prepared {
    78  			s.logger.Fatal("invalid txn status before prepare status",
    79  				zap.String("prev-status", txnCtx.getTxn().Status.String()),
    80  				util.TxnField(txnMeta))
    81  		}
    82  		txnCtx.updateTxn(txnMeta)
    83  	case txn.TxnStatus_Committed:
    84  		s.checkRecoveryStatus(txnMeta)
    85  		s.removeTxn(txnMeta.ID)
    86  	default:
    87  		s.logger.Fatal("invalid recovery status",
    88  			util.TxnField(txnMeta))
    89  	}
    90  }
    91  
    92  func (s *service) end() {
    93  	defer close(s.recoveryC)
    94  	s.transactions.Range(func(_, value any) bool {
    95  		txnCtx := value.(*txnContext)
    96  		txnMeta := txnCtx.getTxn()
    97  		if !s.shard.Equal(txnMeta.TNShards[0]) {
    98  			return true
    99  		}
   100  
   101  		switch txnMeta.Status {
   102  		case txn.TxnStatus_Prepared:
   103  			if err := s.startAsyncCheckCommitTask(txnCtx); err != nil {
   104  				panic(err)
   105  			}
   106  		case txn.TxnStatus_Committing:
   107  			s.removeTxn(txnMeta.ID)
   108  			if err := s.startAsyncCommitTask(txnCtx); err != nil {
   109  				panic(err)
   110  			}
   111  		}
   112  		return true
   113  	})
   114  }
   115  
   116  func (s *service) waitRecoveryCompleted() {
   117  	<-s.recoveryC
   118  }
   119  
   120  func (s *service) startAsyncCheckCommitTask(txnCtx *txnContext) error {
   121  	return s.stopper.RunTask(func(ctx context.Context) {
   122  		txnMeta := txnCtx.getTxn()
   123  
   124  		requests := make([]txn.TxnRequest, 0, len(txnMeta.TNShards)-1)
   125  		for _, tn := range txnMeta.TNShards[1:] {
   126  			requests = append(requests, txn.TxnRequest{
   127  				Txn:              txnMeta,
   128  				Method:           txn.TxnMethod_GetStatus,
   129  				GetStatusRequest: &txn.TxnGetStatusRequest{TNShard: tn},
   130  			})
   131  		}
   132  
   133  		result := s.parallelSendWithRetry(ctx, requests, prepareIgnoreErrorCodes)
   134  		if result == nil {
   135  			return
   136  		}
   137  		defer result.Release()
   138  
   139  		prepared := 1
   140  		txnMeta.CommitTS = txnMeta.PreparedTS
   141  		for _, resp := range result.Responses {
   142  			if resp.Txn != nil && resp.Txn.Status == txn.TxnStatus_Prepared {
   143  				prepared++
   144  				if txnMeta.CommitTS.Less(resp.Txn.PreparedTS) {
   145  					txnMeta.PreparedTS = resp.Txn.PreparedTS
   146  				}
   147  			}
   148  		}
   149  
   150  		if prepared == len(txnMeta.TNShards) {
   151  			txnCtx.updateTxnLocked(txnMeta)
   152  			s.removeTxn(txnMeta.ID)
   153  			if err := s.startAsyncCommitTask(txnCtx); err != nil {
   154  				s.logger.Error("start commit task failed",
   155  					zap.Error(err),
   156  					util.TxnField(txnMeta))
   157  			}
   158  		} else {
   159  			s.startAsyncRollbackTask(txnMeta)
   160  		}
   161  	})
   162  }
   163  
   164  func (s *service) checkRecoveryStatus(txnMeta txn.TxnMeta) {
   165  	if txnMeta.PreparedTS.IsEmpty() ||
   166  		(txnMeta.Status != txn.TxnStatus_Prepared &&
   167  			txnMeta.CommitTS.IsEmpty()) {
   168  		s.logger.Fatal("invalid preparedTS or commitTS",
   169  			util.TxnField(txnMeta))
   170  	}
   171  
   172  	if txnMeta.Status == txn.TxnStatus_Committing {
   173  		s.validTNShard(txnMeta.TNShards[0])
   174  	}
   175  }