github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/unit/unit.go (about) 1 // Copyright 2019 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package unit 15 16 import ( 17 "context" 18 "strings" 19 "time" 20 21 "github.com/pingcap/errors" 22 "github.com/pingcap/tiflow/dm/config" 23 "github.com/pingcap/tiflow/dm/pb" 24 "github.com/pingcap/tiflow/dm/pkg/binlog" 25 "github.com/pingcap/tiflow/dm/pkg/retry" 26 "github.com/pingcap/tiflow/dm/pkg/terror" 27 ) 28 29 const ( 30 // DefaultInitTimeout represents the default timeout value when initializing a process unit. 31 DefaultInitTimeout = time.Minute 32 ) 33 34 // Unit defines interface for subtask process units, like syncer, loader, relay, etc. 35 // The Unit is not responsible to maintain its status like "pausing"/"paused". The caller should maintain the status, 36 // for example, know the Unit is "paused" and avoid call Pause again. 37 // All method is Unit interface can expect no concurrent invocation, the caller should guarantee this. 38 type Unit interface { 39 // Init initializes the dm process unit 40 // every unit does base initialization in `Init`, and this must pass before start running the subtask 41 // other setups can be done in the beginning of `Process`, but this should be treated carefully to make it 42 // compatible with Pause / Resume. 43 // if initialing successfully, the outer caller should call `Close` when the unit (or the task) finished, stopped or canceled (because other units Init fail). 44 // if initialing fail, Init itself should release resources it acquired before (rolling itself back). 45 Init(ctx context.Context) error 46 // Process does the main logic and its returning must send a result to pr channel. 47 // When ctx.Done, stops the process and returns, otherwise the DM-worker will be blocked forever 48 // When not in processing, call Process to continue or resume the process 49 Process(ctx context.Context, pr chan pb.ProcessResult) 50 // Close shuts down the process and closes the unit, after that can not call Process to resume 51 // The implementation should not block for a long time. 52 Close() 53 // Kill shuts down the process and closes the unit without graceful. 54 Kill() 55 // Pause does some cleanups and the unit can be resumed later. The caller will make sure Process has returned. 56 // The implementation should not block for a long time. 57 Pause() 58 // Resume resumes the paused process and its returning must send a result to pr channel. 59 Resume(ctx context.Context, pr chan pb.ProcessResult) 60 // Update updates the configuration 61 Update(ctx context.Context, cfg *config.SubTaskConfig) error 62 63 // Status returns the unit's current status. The result may need calculation with source status, like estimated time 64 // to catch up. If sourceStatus is nil, the calculation should be skipped. 65 Status(sourceStatus *binlog.SourceStatus) interface{} 66 // Type returns the unit's type 67 Type() pb.UnitType 68 // IsFreshTask return whether is a fresh task (not processed before) 69 // it will be used to decide where the task should become restoring 70 IsFreshTask(ctx context.Context) (bool, error) 71 } 72 73 // NewProcessError creates a new ProcessError 74 // we can refine to add error scope field if needed. 75 func NewProcessError(err error) *pb.ProcessError { 76 if e, ok := err.(*terror.Error); ok { 77 return &pb.ProcessError{ 78 ErrCode: int32(e.Code()), 79 ErrClass: e.Class().String(), 80 ErrScope: e.Scope().String(), 81 ErrLevel: e.Level().String(), 82 Message: terror.Message(e), 83 RawCause: terror.Message(e.Cause()), 84 Workaround: e.Workaround(), 85 } 86 } 87 88 return &pb.ProcessError{ 89 ErrCode: int32(terror.ErrNotSet.Code()), 90 ErrClass: terror.ErrNotSet.Class().String(), 91 ErrScope: terror.ErrNotSet.Scope().String(), 92 ErrLevel: terror.ErrNotSet.Level().String(), 93 Message: terror.Message(err), 94 RawCause: terror.Message(terror.ErrNotSet.Cause()), 95 Workaround: terror.ErrNotSet.Workaround(), 96 } 97 } 98 99 // IsCtxCanceledProcessErr returns true if the err's context canceled. 100 func IsCtxCanceledProcessErr(err *pb.ProcessError) bool { 101 return strings.Contains(err.Message, "context canceled") 102 } 103 104 // JoinProcessErrors return the string of pb.ProcessErrors joined by ", ". 105 func JoinProcessErrors(errors []*pb.ProcessError) string { 106 serrs := make([]string, 0, len(errors)) 107 for _, serr := range errors { 108 serrs = append(serrs, serr.String()) 109 } 110 return strings.Join(serrs, ", ") 111 } 112 113 // IsResumableError checks the error message and returns whether we need to 114 // resume the task unit and retry. 115 func IsResumableError(err *pb.ProcessError) bool { 116 if err == nil { 117 return true 118 } 119 120 // not elegant code, because TiDB doesn't expose some error 121 for _, msg := range retry.UnsupportedDDLMsgs { 122 if strings.Contains(strings.ToLower(err.RawCause), strings.ToLower(msg)) { 123 return false 124 } 125 } 126 for _, msg := range retry.UnsupportedDMLMsgs { 127 if strings.Contains(strings.ToLower(err.RawCause), strings.ToLower(msg)) { 128 return false 129 } 130 } 131 for _, msg := range retry.ReplicationErrMsgs { 132 if strings.Contains(strings.ToLower(err.RawCause), strings.ToLower(msg)) { 133 return false 134 } 135 } 136 if err.ErrCode == int32(terror.ErrParserParseRelayLog.Code()) { 137 for _, msg := range retry.ParseRelayLogErrMsgs { 138 if strings.Contains(strings.ToLower(err.Message), strings.ToLower(msg)) { 139 return false 140 } 141 } 142 } 143 if _, ok := retry.UnresumableErrCodes[err.ErrCode]; ok { 144 return false 145 } 146 147 return true 148 } 149 150 // IsResumableDBError checks whether the error is resumable DB error. 151 // this is a simplified version of IsResumableError. 152 // we use a blacklist to filter out some errors which can not be resumed, 153 // all other errors is resumable. 154 func IsResumableDBError(err error) bool { 155 if err == nil { 156 return true 157 } 158 159 err = errors.Cause(err) 160 if err == context.Canceled { 161 return false 162 } 163 164 // not elegant code, because TiDB doesn't expose some error 165 errStr := strings.ToLower(err.Error()) 166 for _, msg := range retry.UnsupportedDDLMsgs { 167 if strings.Contains(errStr, strings.ToLower(msg)) { 168 return false 169 } 170 } 171 for _, msg := range retry.UnsupportedDMLMsgs { 172 if strings.Contains(errStr, strings.ToLower(msg)) { 173 return false 174 } 175 } 176 return true 177 } 178 179 // IsResumableRelayError return whether we need resume relay on error 180 // since relay impl unit interface too, so we put it here. 181 func IsResumableRelayError(err *pb.ProcessError) bool { 182 if _, ok := retry.UnresumableRelayErrCodes[err.ErrCode]; ok { 183 return false 184 } 185 return true 186 }