go.temporal.io/server@v1.23.0/common/persistence/cassandra/errors.go (about)

     1  // The MIT License
     2  //
     3  // Copyright (c) 2020 Temporal Technologies Inc.  All rights reserved.
     4  //
     5  // Copyright (c) 2020 Uber Technologies, Inc.
     6  //
     7  // Permission is hereby granted, free of charge, to any person obtaining a copy
     8  // of this software and associated documentation files (the "Software"), to deal
     9  // in the Software without restriction, including without limitation the rights
    10  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    11  // copies of the Software, and to permit persons to whom the Software is
    12  // furnished to do so, subject to the following conditions:
    13  //
    14  // The above copyright notice and this permission notice shall be included in
    15  // all copies or substantial portions of the Software.
    16  //
    17  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    18  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    19  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    20  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    21  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    22  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    23  // THE SOFTWARE.
    24  
    25  package cassandra
    26  
    27  import (
    28  	"encoding/json"
    29  	"fmt"
    30  	"math"
    31  	"reflect"
    32  	"sort"
    33  
    34  	persistencespb "go.temporal.io/server/api/persistence/v1"
    35  	p "go.temporal.io/server/common/persistence"
    36  	"go.temporal.io/server/common/persistence/nosql/nosqlplugin/cassandra/gocql"
    37  	"go.temporal.io/server/common/persistence/serialization"
    38  )
    39  
    40  var (
    41  	errorPriority = map[reflect.Type]int{
    42  		reflect.TypeOf(&p.ShardOwnershipLostError{}):             0,
    43  		reflect.TypeOf(&p.CurrentWorkflowConditionFailedError{}): 1,
    44  		reflect.TypeOf(&p.WorkflowConditionFailedError{}):        2,
    45  		reflect.TypeOf(&p.ConditionFailedError{}):                3,
    46  	}
    47  
    48  	errorDefaultPriority = math.MaxInt64
    49  )
    50  
    51  type (
    52  	executionCASCondition struct {
    53  		runID       string
    54  		dbVersion   int64
    55  		nextEventID int64 // TODO deprecate this variable once DB version comparison is the default
    56  	}
    57  )
    58  
    59  // ScyllaDB will return rows with null values to match # of queries in a batch query (see #2683).
    60  // To support null values, fields type should be a pointer to pointer of underlying type (i.e. **int).
    61  // Resulting value will be converted to a pointer of underlying type (i.e. *int) and stored in the map.
    62  // We do it only for "type" field which is checked for `nil` value.
    63  // All other fields are created automatically by gocql with non-pointer types (i.e. int).
    64  func newConflictRecord() map[string]interface{} {
    65  	t := new(int)
    66  	return map[string]interface{}{
    67  		"type": &t,
    68  	}
    69  }
    70  
    71  func convertErrors(
    72  	conflictRecord map[string]interface{},
    73  	conflictIter gocql.Iter,
    74  	requestShardID int32,
    75  	requestRangeID int64,
    76  	requestCurrentRunID string,
    77  	requestExecutionCASConditions []executionCASCondition,
    78  ) error {
    79  
    80  	conflictRecords := []map[string]interface{}{conflictRecord}
    81  	errors := extractErrors(
    82  		conflictRecord,
    83  		requestShardID,
    84  		requestRangeID,
    85  		requestCurrentRunID,
    86  		requestExecutionCASConditions,
    87  	)
    88  
    89  	conflictRecord = newConflictRecord()
    90  	for conflictIter.MapScan(conflictRecord) {
    91  		if conflictRecord["[applied]"].(bool) {
    92  			// Should never happen. All records in batch should have [applied]=false.
    93  			continue
    94  		}
    95  
    96  		conflictRecords = append(conflictRecords, conflictRecord)
    97  		errors = append(errors, extractErrors(
    98  			conflictRecord,
    99  			requestShardID,
   100  			requestRangeID,
   101  			requestCurrentRunID,
   102  			requestExecutionCASConditions,
   103  		)...)
   104  
   105  		conflictRecord = newConflictRecord()
   106  	}
   107  
   108  	if len(errors) == 0 {
   109  		// This means that extractErrors wasn't able to extract error from the conflicting records.
   110  		// Most likely record to update is not found in the DB by WHERE clause and is NOT in conflictRecords slice.
   111  		// Unfortunately, there is no way to get the missing record w/o extra call to DB.
   112  		// Most likely it is current workflow execution record.
   113  		return &p.ConditionFailedError{
   114  			Msg: fmt.Sprintf("Encounter unknown condition update error: shard ID: %v, range ID: %v, possibly conflicting records:%v",
   115  				requestShardID,
   116  				requestRangeID,
   117  				printRecords(conflictRecords),
   118  			),
   119  		}
   120  	}
   121  
   122  	errors = sortErrors(errors)
   123  	return errors[0]
   124  }
   125  
   126  func extractErrors(
   127  	conflictRecord map[string]interface{},
   128  	requestShardID int32,
   129  	requestRangeID int64,
   130  	requestCurrentRunID string,
   131  	requestExecutionCASConditions []executionCASCondition,
   132  ) []error {
   133  
   134  	var errors []error
   135  	if err := extractShardOwnershipLostError(
   136  		conflictRecord,
   137  		requestShardID,
   138  		requestRangeID,
   139  	); err != nil {
   140  		errors = append(errors, err)
   141  	}
   142  
   143  	if err := extractCurrentWorkflowConflictError(
   144  		conflictRecord,
   145  		requestCurrentRunID,
   146  	); err != nil {
   147  		errors = append(errors, err)
   148  	}
   149  
   150  	for _, condition := range requestExecutionCASConditions {
   151  		if err := extractWorkflowConflictError(
   152  			conflictRecord,
   153  			condition.runID,
   154  			condition.dbVersion,
   155  			condition.nextEventID,
   156  		); err != nil {
   157  			errors = append(errors, err)
   158  		}
   159  	}
   160  
   161  	return errors
   162  }
   163  
   164  func sortErrors(
   165  	errors []error,
   166  ) []error {
   167  	sort.Slice(errors, func(i int, j int) bool {
   168  		leftPriority, ok := errorPriority[reflect.TypeOf(errors[i])]
   169  		if !ok {
   170  			leftPriority = errorDefaultPriority
   171  		}
   172  		rightPriority, ok := errorPriority[reflect.TypeOf(errors[j])]
   173  		if !ok {
   174  			rightPriority = errorDefaultPriority
   175  		}
   176  		return leftPriority < rightPriority
   177  	})
   178  	return errors
   179  }
   180  
   181  func extractShardOwnershipLostError(
   182  	conflictRecord map[string]interface{},
   183  	requestShardID int32,
   184  	requestRangeID int64,
   185  ) error {
   186  	rowType, ok := conflictRecord["type"].(*int)
   187  	if !ok || rowType == nil {
   188  		// This can happen on ScyllaDB.
   189  		return nil
   190  	}
   191  	if *rowType != rowTypeShard {
   192  		return nil
   193  	}
   194  
   195  	actualRangeID := conflictRecord["range_id"].(int64)
   196  	if actualRangeID != requestRangeID {
   197  		return &p.ShardOwnershipLostError{
   198  			ShardID: requestShardID,
   199  			Msg: fmt.Sprintf("Encounter shard ownership lost, request range ID: %v, actual range ID: %v",
   200  				requestRangeID,
   201  				actualRangeID,
   202  			),
   203  		}
   204  	}
   205  	return nil
   206  }
   207  
   208  func extractCurrentWorkflowConflictError(
   209  	conflictRecord map[string]interface{},
   210  	requestCurrentRunID string,
   211  ) error {
   212  	rowType, ok := conflictRecord["type"].(*int)
   213  	if !ok || rowType == nil {
   214  		// This can happen on ScyllaDB.
   215  		return nil
   216  	}
   217  	if *rowType != rowTypeExecution {
   218  		return nil
   219  	}
   220  	if runID := gocql.UUIDToString(conflictRecord["run_id"]); runID != permanentRunID {
   221  		return nil
   222  	}
   223  
   224  	actualCurrentRunID := gocql.UUIDToString(conflictRecord["current_run_id"])
   225  	if actualCurrentRunID != requestCurrentRunID {
   226  		binary, _ := conflictRecord["execution_state"].([]byte)
   227  		encoding, _ := conflictRecord["execution_state_encoding"].(string)
   228  		executionState := &persistencespb.WorkflowExecutionState{}
   229  		if state, err := serialization.WorkflowExecutionStateFromBlob(
   230  			binary,
   231  			encoding,
   232  		); err == nil {
   233  			executionState = state
   234  		}
   235  		// if err != nil, this means execution state cannot be parsed, just use default values
   236  
   237  		lastWriteVersion, _ := conflictRecord["workflow_last_write_version"].(int64)
   238  
   239  		// TODO maybe assert actualCurrentRunID == executionState.RunId ?
   240  
   241  		return &p.CurrentWorkflowConditionFailedError{
   242  			Msg: fmt.Sprintf("Encounter current workflow error, request run ID: %v, actual run ID: %v",
   243  				requestCurrentRunID,
   244  				actualCurrentRunID,
   245  			),
   246  			RequestID:        executionState.CreateRequestId,
   247  			RunID:            executionState.RunId,
   248  			State:            executionState.State,
   249  			Status:           executionState.Status,
   250  			LastWriteVersion: lastWriteVersion,
   251  		}
   252  	}
   253  	return nil
   254  }
   255  
   256  func extractWorkflowConflictError(
   257  	conflictRecord map[string]interface{},
   258  	requestRunID string,
   259  	requestDBVersion int64,
   260  	requestNextEventID int64, // TODO deprecate this variable once DB version comparison is the default
   261  ) error {
   262  	rowType, ok := conflictRecord["type"].(*int)
   263  	if !ok || rowType == nil {
   264  		// This can happen on ScyllaDB.
   265  		return nil
   266  	}
   267  	if *rowType != rowTypeExecution {
   268  		return nil
   269  	}
   270  	if runID := gocql.UUIDToString(conflictRecord["run_id"]); runID != requestRunID {
   271  		return nil
   272  	}
   273  
   274  	actualNextEventID, _ := conflictRecord["next_event_id"].(int64)
   275  	actualDBVersion, _ := conflictRecord["db_record_version"].(int64)
   276  
   277  	// TODO remove this block once DB version comparison is the default
   278  	if requestDBVersion == 0 {
   279  		if actualNextEventID != requestNextEventID {
   280  			return &p.WorkflowConditionFailedError{
   281  				Msg: fmt.Sprintf("Encounter workflow next event ID mismatch, request next event ID: %v, actual next event ID: %v",
   282  					requestNextEventID,
   283  					actualNextEventID,
   284  				),
   285  				NextEventID:     actualNextEventID,
   286  				DBRecordVersion: actualDBVersion,
   287  			}
   288  		}
   289  		return nil
   290  	}
   291  
   292  	if actualDBVersion != requestDBVersion {
   293  		return &p.WorkflowConditionFailedError{
   294  			Msg: fmt.Sprintf("Encounter workflow db version mismatch, request db version: %v, actual db version: %v",
   295  				requestDBVersion,
   296  				actualDBVersion,
   297  			),
   298  			NextEventID:     actualNextEventID,
   299  			DBRecordVersion: actualDBVersion,
   300  		}
   301  	}
   302  	return nil
   303  }
   304  
   305  func printRecords(
   306  	records []map[string]interface{},
   307  ) string {
   308  	binary, _ := json.MarshalIndent(records, "", "  ")
   309  	return string(binary)
   310  }