github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/dbs/rollingback.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package dbs
    15  
    16  import (
    17  	"fmt"
    18  
    19  	"github.com/whtcorpsinc/BerolinaSQL/allegrosql"
    20  	"github.com/whtcorpsinc/BerolinaSQL/ast"
    21  	"github.com/whtcorpsinc/BerolinaSQL/perceptron"
    22  	"github.com/whtcorpsinc/BerolinaSQL/terror"
    23  	"github.com/whtcorpsinc/errors"
    24  	"github.com/whtcorpsinc/milevadb/ekv"
    25  	"github.com/whtcorpsinc/milevadb/soliton/logutil"
    26  	"github.com/whtcorpsinc/milevadb/spacetime"
    27  	"go.uber.org/zap"
    28  )
    29  
    30  func uFIDelateDefCaussNull2NotNull(tblInfo *perceptron.BlockInfo, indexInfo *perceptron.IndexInfo) error {
    31  	nullDefCauss, err := getNullDefCausInfos(tblInfo, indexInfo)
    32  	if err != nil {
    33  		return errors.Trace(err)
    34  	}
    35  
    36  	for _, defCaus := range nullDefCauss {
    37  		defCaus.Flag |= allegrosql.NotNullFlag
    38  		defCaus.Flag = defCaus.Flag &^ allegrosql.PreventNullInsertFlag
    39  	}
    40  	return nil
    41  }
    42  
    43  func convertAddIdxJob2RollbackJob(t *spacetime.Meta, job *perceptron.Job, tblInfo *perceptron.BlockInfo, indexInfo *perceptron.IndexInfo, err error) (int64, error) {
    44  	job.State = perceptron.JobStateRollingback
    45  
    46  	if indexInfo.Primary {
    47  		nullDefCauss, err := getNullDefCausInfos(tblInfo, indexInfo)
    48  		if err != nil {
    49  			return 0, errors.Trace(err)
    50  		}
    51  		for _, defCaus := range nullDefCauss {
    52  			// Field PreventNullInsertFlag flag reset.
    53  			defCaus.Flag = defCaus.Flag &^ allegrosql.PreventNullInsertFlag
    54  		}
    55  	}
    56  
    57  	// the second args will be used in onDropIndex.
    58  	job.Args = []interface{}{indexInfo.Name, getPartitionIDs(tblInfo)}
    59  	// If add index job rollbacks in write reorganization state, its need to delete all keys which has been added.
    60  	// Its work is the same as drop index job do.
    61  	// The write reorganization state in add index job that likes write only state in drop index job.
    62  	// So the next state is delete only state.
    63  	originalState := indexInfo.State
    64  	indexInfo.State = perceptron.StateDeleteOnly
    65  	// Change dependent hidden defCausumns if necessary.
    66  	uFIDelateHiddenDeferredCausets(tblInfo, indexInfo, perceptron.StateDeleteOnly)
    67  	job.SchemaState = perceptron.StateDeleteOnly
    68  	ver, err1 := uFIDelateVersionAndBlockInfo(t, job, tblInfo, originalState != indexInfo.State)
    69  	if err1 != nil {
    70  		return ver, errors.Trace(err1)
    71  	}
    72  
    73  	if ekv.ErrKeyExists.Equal(err) {
    74  		return ver, ekv.ErrKeyExists.GenWithStackByArgs("", indexInfo.Name.O)
    75  	}
    76  
    77  	return ver, errors.Trace(err)
    78  }
    79  
    80  // convertNotStartAddIdxJob2RollbackJob converts the add index job that are not started workers to rollingbackJob,
    81  // to rollback add index operations. job.SnapshotVer == 0 indicates the workers are not started.
    82  func convertNotStartAddIdxJob2RollbackJob(t *spacetime.Meta, job *perceptron.Job, occuredErr error) (ver int64, err error) {
    83  	schemaID := job.SchemaID
    84  	tblInfo, err := getBlockInfoAndCancelFaultJob(t, job, schemaID)
    85  	if err != nil {
    86  		return ver, errors.Trace(err)
    87  	}
    88  
    89  	var (
    90  		unique                  bool
    91  		indexName               perceptron.CIStr
    92  		indexPartSpecifications []*ast.IndexPartSpecification
    93  		indexOption             *ast.IndexOption
    94  	)
    95  	err = job.DecodeArgs(&unique, &indexName, &indexPartSpecifications, &indexOption)
    96  	if err != nil {
    97  		job.State = perceptron.JobStateCancelled
    98  		return ver, errors.Trace(err)
    99  	}
   100  
   101  	indexInfo := tblInfo.FindIndexByName(indexName.L)
   102  	if indexInfo == nil {
   103  		job.State = perceptron.JobStateCancelled
   104  		return ver, errCancelledDBSJob
   105  	}
   106  	return convertAddIdxJob2RollbackJob(t, job, tblInfo, indexInfo, occuredErr)
   107  }
   108  
   109  // rollingbackModifyDeferredCauset change the modifying-defCausumn job into rolling back state.
   110  // Since modifying defCausumn job has two types: normal-type and reorg-type, we should handle it respectively.
   111  // normal-type has only two states:    None -> Public
   112  // reorg-type has five states:         None -> Delete-only -> Write-only -> Write-org -> Public
   113  func rollingbackModifyDeferredCauset(t *spacetime.Meta, job *perceptron.Job) (ver int64, err error) {
   114  	_, tblInfo, oldDefCaus, jp, err := getModifyDeferredCausetInfo(t, job)
   115  	if err != nil {
   116  		return ver, err
   117  	}
   118  	if !needChangeDeferredCausetData(oldDefCaus, jp.newDefCaus) {
   119  		// Normal-type rolling back
   120  		if job.SchemaState == perceptron.StateNone {
   121  			// When change null to not null, although state is unchanged with none, the oldDefCaus flag's has been changed to preNullInsertFlag.
   122  			// To roll back this HoTT of normal job, it is necessary to mark the state as JobStateRollingback to restore the old defCaus's flag.
   123  			if jp.modifyDeferredCausetTp == allegrosql.TypeNull && tblInfo.DeferredCausets[oldDefCaus.Offset].Flag|allegrosql.PreventNullInsertFlag != 0 {
   124  				job.State = perceptron.JobStateRollingback
   125  				return ver, errCancelledDBSJob
   126  			}
   127  			// Normal job with stateNone can be cancelled directly.
   128  			job.State = perceptron.JobStateCancelled
   129  			return ver, errCancelledDBSJob
   130  		}
   131  		// StatePublic couldn't be cancelled.
   132  		job.State = perceptron.JobStateRunning
   133  		return ver, nil
   134  	}
   135  	// reorg-type rolling back
   136  	if jp.changingDefCaus == nil {
   137  		// The job hasn't been handled and we cancel it directly.
   138  		job.State = perceptron.JobStateCancelled
   139  		return ver, errCancelledDBSJob
   140  	}
   141  	// The job has been in it's midbse state and we roll it back.
   142  	job.State = perceptron.JobStateRollingback
   143  	return ver, errCancelledDBSJob
   144  }
   145  
   146  func rollingbackAddDeferredCauset(t *spacetime.Meta, job *perceptron.Job) (ver int64, err error) {
   147  	job.State = perceptron.JobStateRollingback
   148  	tblInfo, defCausumnInfo, defCaus, _, _, err := checkAddDeferredCauset(t, job)
   149  	if err != nil {
   150  		return ver, errors.Trace(err)
   151  	}
   152  	if defCausumnInfo == nil {
   153  		job.State = perceptron.JobStateCancelled
   154  		return ver, errCancelledDBSJob
   155  	}
   156  
   157  	originalState := defCausumnInfo.State
   158  	defCausumnInfo.State = perceptron.StateDeleteOnly
   159  	job.SchemaState = perceptron.StateDeleteOnly
   160  
   161  	job.Args = []interface{}{defCaus.Name}
   162  	ver, err = uFIDelateVersionAndBlockInfo(t, job, tblInfo, originalState != defCausumnInfo.State)
   163  	if err != nil {
   164  		return ver, errors.Trace(err)
   165  	}
   166  	return ver, errCancelledDBSJob
   167  }
   168  
   169  func rollingbackAddDeferredCausets(t *spacetime.Meta, job *perceptron.Job) (ver int64, err error) {
   170  	job.State = perceptron.JobStateRollingback
   171  	tblInfo, defCausumnInfos, _, _, _, _, err := checkAddDeferredCausets(t, job)
   172  	if err != nil {
   173  		return ver, errors.Trace(err)
   174  	}
   175  	if len(defCausumnInfos) == 0 {
   176  		job.State = perceptron.JobStateCancelled
   177  		return ver, errCancelledDBSJob
   178  	}
   179  
   180  	defCausNames := make([]perceptron.CIStr, len(defCausumnInfos))
   181  	originalState := defCausumnInfos[0].State
   182  	for i, defCausumnInfo := range defCausumnInfos {
   183  		defCausumnInfos[i].State = perceptron.StateDeleteOnly
   184  		defCausNames[i] = defCausumnInfo.Name
   185  	}
   186  	ifExists := make([]bool, len(defCausumnInfos))
   187  
   188  	job.SchemaState = perceptron.StateDeleteOnly
   189  	job.Args = []interface{}{defCausNames, ifExists}
   190  	ver, err = uFIDelateVersionAndBlockInfo(t, job, tblInfo, originalState != defCausumnInfos[0].State)
   191  	if err != nil {
   192  		return ver, errors.Trace(err)
   193  	}
   194  	return ver, errCancelledDBSJob
   195  }
   196  
   197  func rollingbackDropDeferredCauset(t *spacetime.Meta, job *perceptron.Job) (ver int64, err error) {
   198  	tblInfo, defCausInfo, idxInfos, err := checkDropDeferredCauset(t, job)
   199  	if err != nil {
   200  		return ver, errors.Trace(err)
   201  	}
   202  
   203  	for _, indexInfo := range idxInfos {
   204  		switch indexInfo.State {
   205  		case perceptron.StateWriteOnly, perceptron.StateDeleteOnly, perceptron.StateDeleteReorganization, perceptron.StateNone:
   206  			// We can not rollback now, so just continue to drop index.
   207  			// In function isJobRollbackable will let job rollback when state is StateNone.
   208  			// When there is no index related to the drop defCausumn job it is OK, but when there has indices, we should
   209  			// make sure the job is not rollback.
   210  			job.State = perceptron.JobStateRunning
   211  			return ver, nil
   212  		case perceptron.StatePublic:
   213  		default:
   214  			return ver, ErrInvalidDBSState.GenWithStackByArgs("index", indexInfo.State)
   215  		}
   216  	}
   217  
   218  	// StatePublic means when the job is not running yet.
   219  	if defCausInfo.State == perceptron.StatePublic {
   220  		job.State = perceptron.JobStateCancelled
   221  		job.FinishBlockJob(perceptron.JobStateRollbackDone, perceptron.StatePublic, ver, tblInfo)
   222  		return ver, errCancelledDBSJob
   223  	}
   224  	// In the state of drop defCausumn `write only -> delete only -> reorganization`,
   225  	// We can not rollback now, so just continue to drop defCausumn.
   226  	job.State = perceptron.JobStateRunning
   227  	return ver, nil
   228  }
   229  
   230  func rollingbackDropDeferredCausets(t *spacetime.Meta, job *perceptron.Job) (ver int64, err error) {
   231  	tblInfo, defCausInfos, _, idxInfos, err := checkDropDeferredCausets(t, job)
   232  	if err != nil {
   233  		return ver, errors.Trace(err)
   234  	}
   235  
   236  	for _, indexInfo := range idxInfos {
   237  		switch indexInfo.State {
   238  		case perceptron.StateWriteOnly, perceptron.StateDeleteOnly, perceptron.StateDeleteReorganization, perceptron.StateNone:
   239  			// We can not rollback now, so just continue to drop index.
   240  			// In function isJobRollbackable will let job rollback when state is StateNone.
   241  			// When there is no index related to the drop defCausumns job it is OK, but when there has indices, we should
   242  			// make sure the job is not rollback.
   243  			job.State = perceptron.JobStateRunning
   244  			return ver, nil
   245  		case perceptron.StatePublic:
   246  		default:
   247  			return ver, ErrInvalidDBSState.GenWithStackByArgs("index", indexInfo.State)
   248  		}
   249  	}
   250  
   251  	// StatePublic means when the job is not running yet.
   252  	if defCausInfos[0].State == perceptron.StatePublic {
   253  		job.State = perceptron.JobStateCancelled
   254  		job.FinishBlockJob(perceptron.JobStateRollbackDone, perceptron.StatePublic, ver, tblInfo)
   255  		return ver, errCancelledDBSJob
   256  	}
   257  	// In the state of drop defCausumns `write only -> delete only -> reorganization`,
   258  	// We can not rollback now, so just continue to drop defCausumns.
   259  	job.State = perceptron.JobStateRunning
   260  	return ver, nil
   261  }
   262  
   263  func rollingbackDropIndex(t *spacetime.Meta, job *perceptron.Job) (ver int64, err error) {
   264  	tblInfo, indexInfo, err := checkDropIndex(t, job)
   265  	if err != nil {
   266  		return ver, errors.Trace(err)
   267  	}
   268  
   269  	originalState := indexInfo.State
   270  	switch indexInfo.State {
   271  	case perceptron.StateWriteOnly, perceptron.StateDeleteOnly, perceptron.StateDeleteReorganization, perceptron.StateNone:
   272  		// We can not rollback now, so just continue to drop index.
   273  		// Normally won't fetch here, because there is check when cancel dbs jobs. see function: isJobRollbackable.
   274  		job.State = perceptron.JobStateRunning
   275  		return ver, nil
   276  	case perceptron.StatePublic:
   277  		job.State = perceptron.JobStateRollbackDone
   278  		indexInfo.State = perceptron.StatePublic
   279  	default:
   280  		return ver, ErrInvalidDBSState.GenWithStackByArgs("index", indexInfo.State)
   281  	}
   282  
   283  	job.SchemaState = indexInfo.State
   284  	job.Args = []interface{}{indexInfo.Name}
   285  	ver, err = uFIDelateVersionAndBlockInfo(t, job, tblInfo, originalState != indexInfo.State)
   286  	if err != nil {
   287  		return ver, errors.Trace(err)
   288  	}
   289  	job.FinishBlockJob(perceptron.JobStateRollbackDone, perceptron.StatePublic, ver, tblInfo)
   290  	return ver, errCancelledDBSJob
   291  }
   292  
   293  func rollingbackAddIndex(w *worker, d *dbsCtx, t *spacetime.Meta, job *perceptron.Job, isPK bool) (ver int64, err error) {
   294  	// If the value of SnapshotVer isn't zero, it means the work is backfilling the indexes.
   295  	if job.SchemaState == perceptron.StateWriteReorganization && job.SnapshotVer != 0 {
   296  		// add index workers are started. need to ask them to exit.
   297  		logutil.Logger(w.logCtx).Info("[dbs] run the cancelling DBS job", zap.String("job", job.String()))
   298  		w.reorgCtx.notifyReorgCancel()
   299  		ver, err = w.onCreateIndex(d, t, job, isPK)
   300  	} else {
   301  		// add index workers are not started, remove the indexInfo in blockInfo.
   302  		ver, err = convertNotStartAddIdxJob2RollbackJob(t, job, errCancelledDBSJob)
   303  	}
   304  	return
   305  }
   306  
   307  func convertAddBlockPartitionJob2RollbackJob(t *spacetime.Meta, job *perceptron.Job, otherwiseErr error, tblInfo *perceptron.BlockInfo) (ver int64, err error) {
   308  	job.State = perceptron.JobStateRollingback
   309  	addingDefinitions := tblInfo.Partition.AddingDefinitions
   310  	partNames := make([]string, 0, len(addingDefinitions))
   311  	for _, fidel := range addingDefinitions {
   312  		partNames = append(partNames, fidel.Name.L)
   313  	}
   314  	job.Args = []interface{}{partNames}
   315  	ver, err = uFIDelateVersionAndBlockInfo(t, job, tblInfo, true)
   316  	if err != nil {
   317  		return ver, errors.Trace(err)
   318  	}
   319  	return ver, errors.Trace(otherwiseErr)
   320  }
   321  
   322  func rollingbackAddBlockPartition(t *spacetime.Meta, job *perceptron.Job) (ver int64, err error) {
   323  	tblInfo, _, addingDefinitions, err := checkAddPartition(t, job)
   324  	if err != nil {
   325  		return ver, errors.Trace(err)
   326  	}
   327  	// addingDefinitions' len = 0 means the job hasn't reached the replica-only state.
   328  	if len(addingDefinitions) == 0 {
   329  		job.State = perceptron.JobStateCancelled
   330  		return ver, errors.Trace(errCancelledDBSJob)
   331  	}
   332  	// addingDefinitions is also in tblInfo, here pass the tblInfo as parameter directly.
   333  	return convertAddBlockPartitionJob2RollbackJob(t, job, errCancelledDBSJob, tblInfo)
   334  }
   335  
   336  func rollingbackDropBlockOrView(t *spacetime.Meta, job *perceptron.Job) error {
   337  	tblInfo, err := checkBlockExistAndCancelNonExistJob(t, job, job.SchemaID)
   338  	if err != nil {
   339  		return errors.Trace(err)
   340  	}
   341  	// To simplify the rollback logic, cannot be canceled after job start to run.
   342  	// Normally won't fetch here, because there is check when cancel dbs jobs. see function: isJobRollbackable.
   343  	if tblInfo.State == perceptron.StatePublic {
   344  		job.State = perceptron.JobStateCancelled
   345  		return errCancelledDBSJob
   346  	}
   347  	job.State = perceptron.JobStateRunning
   348  	return nil
   349  }
   350  
   351  func rollingbackDropBlockPartition(t *spacetime.Meta, job *perceptron.Job) (ver int64, err error) {
   352  	_, err = getBlockInfoAndCancelFaultJob(t, job, job.SchemaID)
   353  	if err != nil {
   354  		return ver, errors.Trace(err)
   355  	}
   356  	return cancelOnlyNotHandledJob(job)
   357  }
   358  
   359  func rollingbackDropSchema(t *spacetime.Meta, job *perceptron.Job) error {
   360  	dbInfo, err := checkSchemaExistAndCancelNotExistJob(t, job)
   361  	if err != nil {
   362  		return errors.Trace(err)
   363  	}
   364  	// To simplify the rollback logic, cannot be canceled after job start to run.
   365  	// Normally won't fetch here, because there is check when cancel dbs jobs. see function: isJobRollbackable.
   366  	if dbInfo.State == perceptron.StatePublic {
   367  		job.State = perceptron.JobStateCancelled
   368  		return errCancelledDBSJob
   369  	}
   370  	job.State = perceptron.JobStateRunning
   371  	return nil
   372  }
   373  
   374  func rollingbackRenameIndex(t *spacetime.Meta, job *perceptron.Job) (ver int64, err error) {
   375  	tblInfo, from, _, err := checkRenameIndex(t, job)
   376  	if err != nil {
   377  		return ver, errors.Trace(err)
   378  	}
   379  	// Here rename index is done in a transaction, if the job is not completed, it can be canceled.
   380  	idx := tblInfo.FindIndexByName(from.L)
   381  	if idx.State == perceptron.StatePublic {
   382  		job.State = perceptron.JobStateCancelled
   383  		return ver, errCancelledDBSJob
   384  	}
   385  	job.State = perceptron.JobStateRunning
   386  	return ver, errors.Trace(err)
   387  }
   388  
   389  func cancelOnlyNotHandledJob(job *perceptron.Job) (ver int64, err error) {
   390  	// We can only cancel the not handled job.
   391  	if job.SchemaState == perceptron.StateNone {
   392  		job.State = perceptron.JobStateCancelled
   393  		return ver, errCancelledDBSJob
   394  	}
   395  
   396  	job.State = perceptron.JobStateRunning
   397  
   398  	return ver, nil
   399  }
   400  
   401  func rollingbackTruncateBlock(t *spacetime.Meta, job *perceptron.Job) (ver int64, err error) {
   402  	_, err = getBlockInfoAndCancelFaultJob(t, job, job.SchemaID)
   403  	if err != nil {
   404  		return ver, errors.Trace(err)
   405  	}
   406  	return cancelOnlyNotHandledJob(job)
   407  }
   408  
   409  func convertJob2RollbackJob(w *worker, d *dbsCtx, t *spacetime.Meta, job *perceptron.Job) (ver int64, err error) {
   410  	switch job.Type {
   411  	case perceptron.CausetActionAddDeferredCauset:
   412  		ver, err = rollingbackAddDeferredCauset(t, job)
   413  	case perceptron.CausetActionAddDeferredCausets:
   414  		ver, err = rollingbackAddDeferredCausets(t, job)
   415  	case perceptron.CausetActionAddIndex:
   416  		ver, err = rollingbackAddIndex(w, d, t, job, false)
   417  	case perceptron.CausetActionAddPrimaryKey:
   418  		ver, err = rollingbackAddIndex(w, d, t, job, true)
   419  	case perceptron.CausetActionAddBlockPartition:
   420  		ver, err = rollingbackAddBlockPartition(t, job)
   421  	case perceptron.CausetActionDropDeferredCauset:
   422  		ver, err = rollingbackDropDeferredCauset(t, job)
   423  	case perceptron.CausetActionDropDeferredCausets:
   424  		ver, err = rollingbackDropDeferredCausets(t, job)
   425  	case perceptron.CausetActionDropIndex, perceptron.CausetActionDropPrimaryKey:
   426  		ver, err = rollingbackDropIndex(t, job)
   427  	case perceptron.CausetActionDropBlock, perceptron.CausetActionDropView, perceptron.CausetActionDropSequence:
   428  		err = rollingbackDropBlockOrView(t, job)
   429  	case perceptron.CausetActionDropBlockPartition:
   430  		ver, err = rollingbackDropBlockPartition(t, job)
   431  	case perceptron.CausetActionDropSchema:
   432  		err = rollingbackDropSchema(t, job)
   433  	case perceptron.CausetActionRenameIndex:
   434  		ver, err = rollingbackRenameIndex(t, job)
   435  	case perceptron.CausetActionTruncateBlock:
   436  		ver, err = rollingbackTruncateBlock(t, job)
   437  	case perceptron.CausetActionModifyDeferredCauset:
   438  		ver, err = rollingbackModifyDeferredCauset(t, job)
   439  	case perceptron.CausetActionRebaseAutoID, perceptron.CausetActionShardRowID, perceptron.CausetActionAddForeignKey,
   440  		perceptron.CausetActionDropForeignKey, perceptron.CausetActionRenameBlock,
   441  		perceptron.CausetActionModifyBlockCharsetAndDefCauslate, perceptron.CausetActionTruncateBlockPartition,
   442  		perceptron.CausetActionModifySchemaCharsetAndDefCauslate, perceptron.CausetActionRepairBlock,
   443  		perceptron.CausetActionModifyBlockAutoIdCache, perceptron.CausetActionAlterIndexVisibility,
   444  		perceptron.CausetActionExchangeBlockPartition:
   445  		ver, err = cancelOnlyNotHandledJob(job)
   446  	default:
   447  		job.State = perceptron.JobStateCancelled
   448  		err = errCancelledDBSJob
   449  	}
   450  
   451  	if err != nil {
   452  		if job.Error == nil {
   453  			job.Error = toTError(err)
   454  		}
   455  		if !job.Error.Equal(errCancelledDBSJob) {
   456  			job.Error = terror.GetErrClass(job.Error).Synthesize(terror.ErrCode(job.Error.Code()),
   457  				fmt.Sprintf("DBS job rollback, error msg: %s", terror.ToALLEGROSQLError(job.Error).Message))
   458  		}
   459  		job.ErrorCount++
   460  
   461  		if job.State != perceptron.JobStateRollingback && job.State != perceptron.JobStateCancelled {
   462  			logutil.Logger(w.logCtx).Error("[dbs] run DBS job failed", zap.String("job", job.String()), zap.Error(err))
   463  		} else {
   464  			logutil.Logger(w.logCtx).Info("[dbs] the DBS job is cancelled normally", zap.String("job", job.String()), zap.Error(err))
   465  			// If job is cancelled, we shouldn't return an error.
   466  			return ver, nil
   467  		}
   468  	}
   469  
   470  	return
   471  }