github.com/matrixorigin/matrixone@v1.2.0/pkg/txn/service/service_recovery.go (about) 1 // Copyright 2021 - 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package service 16 17 import ( 18 "context" 19 20 "github.com/matrixorigin/matrixone/pkg/pb/txn" 21 "github.com/matrixorigin/matrixone/pkg/txn/util" 22 "go.uber.org/zap" 23 ) 24 25 func (s *service) startRecovery() { 26 if err := s.stopper.RunTask(s.doRecovery); err != nil { 27 s.logger.Fatal("start recover task failed", 28 zap.Error(err)) 29 } 30 s.storage.StartRecovery(context.TODO(), s.txnC) 31 s.waitRecoveryCompleted() 32 } 33 34 func (s *service) doRecovery(ctx context.Context) { 35 for { 36 select { 37 case <-ctx.Done(): 38 return 39 case txn, ok := <-s.txnC: 40 if !ok { 41 s.end() 42 return 43 } 44 s.addLog(txn) 45 } 46 } 47 } 48 49 func (s *service) addLog(txnMeta txn.TxnMeta) { 50 if len(txnMeta.TNShards) <= 1 { 51 return 52 } 53 54 switch txnMeta.Status { 55 case txn.TxnStatus_Committing: 56 s.checkRecoveryStatus(txnMeta) 57 txnCtx := s.getTxnContext(txnMeta.ID) 58 if txnCtx == nil { 59 s.maybeAddTxn(txnMeta) 60 } else { 61 if txnCtx.getTxn().Status != txn.TxnStatus_Prepared && 62 txnCtx.getTxn().Status != txn.TxnStatus_Committing { 63 s.logger.Fatal("invalid txn status before committing", 64 zap.String("prev-status", txnCtx.getTxn().Status.String()), 65 util.TxnField(txnMeta)) 66 } 67 txnCtx.updateTxn(txnMeta) 68 } 69 case txn.TxnStatus_Prepared: 70 s.checkRecoveryStatus(txnMeta) 71 txnCtx := s.getTxnContext(txnMeta.ID) 72 if txnCtx == nil { 73 s.maybeAddTxn(txnMeta) 74 break 75 } 76 77 if txnCtx.getTxn().Status != txn.TxnStatus_Prepared { 78 s.logger.Fatal("invalid txn status before prepare status", 79 zap.String("prev-status", txnCtx.getTxn().Status.String()), 80 util.TxnField(txnMeta)) 81 } 82 txnCtx.updateTxn(txnMeta) 83 case txn.TxnStatus_Committed: 84 s.checkRecoveryStatus(txnMeta) 85 s.removeTxn(txnMeta.ID) 86 default: 87 s.logger.Fatal("invalid recovery status", 88 util.TxnField(txnMeta)) 89 } 90 } 91 92 func (s *service) end() { 93 defer close(s.recoveryC) 94 s.transactions.Range(func(_, value any) bool { 95 txnCtx := value.(*txnContext) 96 txnMeta := txnCtx.getTxn() 97 if !s.shard.Equal(txnMeta.TNShards[0]) { 98 return true 99 } 100 101 switch txnMeta.Status { 102 case txn.TxnStatus_Prepared: 103 if err := s.startAsyncCheckCommitTask(txnCtx); err != nil { 104 panic(err) 105 } 106 case txn.TxnStatus_Committing: 107 s.removeTxn(txnMeta.ID) 108 if err := s.startAsyncCommitTask(txnCtx); err != nil { 109 panic(err) 110 } 111 } 112 return true 113 }) 114 } 115 116 func (s *service) waitRecoveryCompleted() { 117 <-s.recoveryC 118 } 119 120 func (s *service) startAsyncCheckCommitTask(txnCtx *txnContext) error { 121 return s.stopper.RunTask(func(ctx context.Context) { 122 txnMeta := txnCtx.getTxn() 123 124 requests := make([]txn.TxnRequest, 0, len(txnMeta.TNShards)-1) 125 for _, tn := range txnMeta.TNShards[1:] { 126 requests = append(requests, txn.TxnRequest{ 127 Txn: txnMeta, 128 Method: txn.TxnMethod_GetStatus, 129 GetStatusRequest: &txn.TxnGetStatusRequest{TNShard: tn}, 130 }) 131 } 132 133 result := s.parallelSendWithRetry(ctx, requests, prepareIgnoreErrorCodes) 134 if result == nil { 135 return 136 } 137 defer result.Release() 138 139 prepared := 1 140 txnMeta.CommitTS = txnMeta.PreparedTS 141 for _, resp := range result.Responses { 142 if resp.Txn != nil && resp.Txn.Status == txn.TxnStatus_Prepared { 143 prepared++ 144 if txnMeta.CommitTS.Less(resp.Txn.PreparedTS) { 145 txnMeta.PreparedTS = resp.Txn.PreparedTS 146 } 147 } 148 } 149 150 if prepared == len(txnMeta.TNShards) { 151 txnCtx.updateTxnLocked(txnMeta) 152 s.removeTxn(txnMeta.ID) 153 if err := s.startAsyncCommitTask(txnCtx); err != nil { 154 s.logger.Error("start commit task failed", 155 zap.Error(err), 156 util.TxnField(txnMeta)) 157 } 158 } else { 159 s.startAsyncRollbackTask(txnMeta) 160 } 161 }) 162 } 163 164 func (s *service) checkRecoveryStatus(txnMeta txn.TxnMeta) { 165 if txnMeta.PreparedTS.IsEmpty() || 166 (txnMeta.Status != txn.TxnStatus_Prepared && 167 txnMeta.CommitTS.IsEmpty()) { 168 s.logger.Fatal("invalid preparedTS or commitTS", 169 util.TxnField(txnMeta)) 170 } 171 172 if txnMeta.Status == txn.TxnStatus_Committing { 173 s.validTNShard(txnMeta.TNShards[0]) 174 } 175 }