go.temporal.io/server@v1.23.0/common/persistence/cassandra/errors.go (about) 1 // The MIT License 2 // 3 // Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. 4 // 5 // Copyright (c) 2020 Uber Technologies, Inc. 6 // 7 // Permission is hereby granted, free of charge, to any person obtaining a copy 8 // of this software and associated documentation files (the "Software"), to deal 9 // in the Software without restriction, including without limitation the rights 10 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 // copies of the Software, and to permit persons to whom the Software is 12 // furnished to do so, subject to the following conditions: 13 // 14 // The above copyright notice and this permission notice shall be included in 15 // all copies or substantial portions of the Software. 16 // 17 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 // THE SOFTWARE. 24 25 package cassandra 26 27 import ( 28 "encoding/json" 29 "fmt" 30 "math" 31 "reflect" 32 "sort" 33 34 persistencespb "go.temporal.io/server/api/persistence/v1" 35 p "go.temporal.io/server/common/persistence" 36 "go.temporal.io/server/common/persistence/nosql/nosqlplugin/cassandra/gocql" 37 "go.temporal.io/server/common/persistence/serialization" 38 ) 39 40 var ( 41 errorPriority = map[reflect.Type]int{ 42 reflect.TypeOf(&p.ShardOwnershipLostError{}): 0, 43 reflect.TypeOf(&p.CurrentWorkflowConditionFailedError{}): 1, 44 reflect.TypeOf(&p.WorkflowConditionFailedError{}): 2, 45 reflect.TypeOf(&p.ConditionFailedError{}): 3, 46 } 47 48 errorDefaultPriority = math.MaxInt64 49 ) 50 51 type ( 52 executionCASCondition struct { 53 runID string 54 dbVersion int64 55 nextEventID int64 // TODO deprecate this variable once DB version comparison is the default 56 } 57 ) 58 59 // ScyllaDB will return rows with null values to match # of queries in a batch query (see #2683). 60 // To support null values, fields type should be a pointer to pointer of underlying type (i.e. **int). 61 // Resulting value will be converted to a pointer of underlying type (i.e. *int) and stored in the map. 62 // We do it only for "type" field which is checked for `nil` value. 63 // All other fields are created automatically by gocql with non-pointer types (i.e. int). 64 func newConflictRecord() map[string]interface{} { 65 t := new(int) 66 return map[string]interface{}{ 67 "type": &t, 68 } 69 } 70 71 func convertErrors( 72 conflictRecord map[string]interface{}, 73 conflictIter gocql.Iter, 74 requestShardID int32, 75 requestRangeID int64, 76 requestCurrentRunID string, 77 requestExecutionCASConditions []executionCASCondition, 78 ) error { 79 80 conflictRecords := []map[string]interface{}{conflictRecord} 81 errors := extractErrors( 82 conflictRecord, 83 requestShardID, 84 requestRangeID, 85 requestCurrentRunID, 86 requestExecutionCASConditions, 87 ) 88 89 conflictRecord = newConflictRecord() 90 for conflictIter.MapScan(conflictRecord) { 91 if conflictRecord["[applied]"].(bool) { 92 // Should never happen. All records in batch should have [applied]=false. 93 continue 94 } 95 96 conflictRecords = append(conflictRecords, conflictRecord) 97 errors = append(errors, extractErrors( 98 conflictRecord, 99 requestShardID, 100 requestRangeID, 101 requestCurrentRunID, 102 requestExecutionCASConditions, 103 )...) 104 105 conflictRecord = newConflictRecord() 106 } 107 108 if len(errors) == 0 { 109 // This means that extractErrors wasn't able to extract error from the conflicting records. 110 // Most likely record to update is not found in the DB by WHERE clause and is NOT in conflictRecords slice. 111 // Unfortunately, there is no way to get the missing record w/o extra call to DB. 112 // Most likely it is current workflow execution record. 113 return &p.ConditionFailedError{ 114 Msg: fmt.Sprintf("Encounter unknown condition update error: shard ID: %v, range ID: %v, possibly conflicting records:%v", 115 requestShardID, 116 requestRangeID, 117 printRecords(conflictRecords), 118 ), 119 } 120 } 121 122 errors = sortErrors(errors) 123 return errors[0] 124 } 125 126 func extractErrors( 127 conflictRecord map[string]interface{}, 128 requestShardID int32, 129 requestRangeID int64, 130 requestCurrentRunID string, 131 requestExecutionCASConditions []executionCASCondition, 132 ) []error { 133 134 var errors []error 135 if err := extractShardOwnershipLostError( 136 conflictRecord, 137 requestShardID, 138 requestRangeID, 139 ); err != nil { 140 errors = append(errors, err) 141 } 142 143 if err := extractCurrentWorkflowConflictError( 144 conflictRecord, 145 requestCurrentRunID, 146 ); err != nil { 147 errors = append(errors, err) 148 } 149 150 for _, condition := range requestExecutionCASConditions { 151 if err := extractWorkflowConflictError( 152 conflictRecord, 153 condition.runID, 154 condition.dbVersion, 155 condition.nextEventID, 156 ); err != nil { 157 errors = append(errors, err) 158 } 159 } 160 161 return errors 162 } 163 164 func sortErrors( 165 errors []error, 166 ) []error { 167 sort.Slice(errors, func(i int, j int) bool { 168 leftPriority, ok := errorPriority[reflect.TypeOf(errors[i])] 169 if !ok { 170 leftPriority = errorDefaultPriority 171 } 172 rightPriority, ok := errorPriority[reflect.TypeOf(errors[j])] 173 if !ok { 174 rightPriority = errorDefaultPriority 175 } 176 return leftPriority < rightPriority 177 }) 178 return errors 179 } 180 181 func extractShardOwnershipLostError( 182 conflictRecord map[string]interface{}, 183 requestShardID int32, 184 requestRangeID int64, 185 ) error { 186 rowType, ok := conflictRecord["type"].(*int) 187 if !ok || rowType == nil { 188 // This can happen on ScyllaDB. 189 return nil 190 } 191 if *rowType != rowTypeShard { 192 return nil 193 } 194 195 actualRangeID := conflictRecord["range_id"].(int64) 196 if actualRangeID != requestRangeID { 197 return &p.ShardOwnershipLostError{ 198 ShardID: requestShardID, 199 Msg: fmt.Sprintf("Encounter shard ownership lost, request range ID: %v, actual range ID: %v", 200 requestRangeID, 201 actualRangeID, 202 ), 203 } 204 } 205 return nil 206 } 207 208 func extractCurrentWorkflowConflictError( 209 conflictRecord map[string]interface{}, 210 requestCurrentRunID string, 211 ) error { 212 rowType, ok := conflictRecord["type"].(*int) 213 if !ok || rowType == nil { 214 // This can happen on ScyllaDB. 215 return nil 216 } 217 if *rowType != rowTypeExecution { 218 return nil 219 } 220 if runID := gocql.UUIDToString(conflictRecord["run_id"]); runID != permanentRunID { 221 return nil 222 } 223 224 actualCurrentRunID := gocql.UUIDToString(conflictRecord["current_run_id"]) 225 if actualCurrentRunID != requestCurrentRunID { 226 binary, _ := conflictRecord["execution_state"].([]byte) 227 encoding, _ := conflictRecord["execution_state_encoding"].(string) 228 executionState := &persistencespb.WorkflowExecutionState{} 229 if state, err := serialization.WorkflowExecutionStateFromBlob( 230 binary, 231 encoding, 232 ); err == nil { 233 executionState = state 234 } 235 // if err != nil, this means execution state cannot be parsed, just use default values 236 237 lastWriteVersion, _ := conflictRecord["workflow_last_write_version"].(int64) 238 239 // TODO maybe assert actualCurrentRunID == executionState.RunId ? 240 241 return &p.CurrentWorkflowConditionFailedError{ 242 Msg: fmt.Sprintf("Encounter current workflow error, request run ID: %v, actual run ID: %v", 243 requestCurrentRunID, 244 actualCurrentRunID, 245 ), 246 RequestID: executionState.CreateRequestId, 247 RunID: executionState.RunId, 248 State: executionState.State, 249 Status: executionState.Status, 250 LastWriteVersion: lastWriteVersion, 251 } 252 } 253 return nil 254 } 255 256 func extractWorkflowConflictError( 257 conflictRecord map[string]interface{}, 258 requestRunID string, 259 requestDBVersion int64, 260 requestNextEventID int64, // TODO deprecate this variable once DB version comparison is the default 261 ) error { 262 rowType, ok := conflictRecord["type"].(*int) 263 if !ok || rowType == nil { 264 // This can happen on ScyllaDB. 265 return nil 266 } 267 if *rowType != rowTypeExecution { 268 return nil 269 } 270 if runID := gocql.UUIDToString(conflictRecord["run_id"]); runID != requestRunID { 271 return nil 272 } 273 274 actualNextEventID, _ := conflictRecord["next_event_id"].(int64) 275 actualDBVersion, _ := conflictRecord["db_record_version"].(int64) 276 277 // TODO remove this block once DB version comparison is the default 278 if requestDBVersion == 0 { 279 if actualNextEventID != requestNextEventID { 280 return &p.WorkflowConditionFailedError{ 281 Msg: fmt.Sprintf("Encounter workflow next event ID mismatch, request next event ID: %v, actual next event ID: %v", 282 requestNextEventID, 283 actualNextEventID, 284 ), 285 NextEventID: actualNextEventID, 286 DBRecordVersion: actualDBVersion, 287 } 288 } 289 return nil 290 } 291 292 if actualDBVersion != requestDBVersion { 293 return &p.WorkflowConditionFailedError{ 294 Msg: fmt.Sprintf("Encounter workflow db version mismatch, request db version: %v, actual db version: %v", 295 requestDBVersion, 296 actualDBVersion, 297 ), 298 NextEventID: actualNextEventID, 299 DBRecordVersion: actualDBVersion, 300 } 301 } 302 return nil 303 } 304 305 func printRecords( 306 records []map[string]interface{}, 307 ) string { 308 binary, _ := json.MarshalIndent(records, "", " ") 309 return string(binary) 310 }