github.com/cloudberrydb/gpbackup@v1.0.3-0.20240118031043-5410fd45eed6/restore/parallel.go (about)

     1  package restore
     2  
     3  /*
     4   * This file contains functions related to executing multiple SQL statements in parallel.
     5   */
     6  
     7  import (
     8  	"fmt"
     9  	"strings"
    10  	"sync"
    11  	"sync/atomic"
    12  
    13  	"github.com/cloudberrydb/gp-common-go-libs/gplog"
    14  	"github.com/cloudberrydb/gpbackup/options"
    15  	"github.com/cloudberrydb/gpbackup/toc"
    16  	"github.com/cloudberrydb/gpbackup/utils"
    17  )
    18  
    19  var (
    20  	mutex = &sync.Mutex{}
    21  )
    22  
    23  func executeStatementsForConn(statements chan toc.StatementWithType, fatalErr *error, numErrors *int32, progressBar utils.ProgressBar, whichConn int, executeInParallel bool) {
    24  	for statement := range statements {
    25  		if wasTerminated || *fatalErr != nil {
    26  			return
    27  		}
    28  		_, err := connectionPool.Exec(statement.Statement, whichConn)
    29  		if err != nil {
    30  			gplog.Verbose("Error encountered when executing statement: %s Error was: %s", strings.TrimSpace(statement.Statement), err.Error())
    31  			if MustGetFlagBool(options.ON_ERROR_CONTINUE) {
    32  				if executeInParallel {
    33  					atomic.AddInt32(numErrors, 1)
    34  					mutex.Lock()
    35  					errorTablesMetadata[statement.Schema+"."+statement.Name] = Empty{}
    36  					mutex.Unlock()
    37  				} else {
    38  					*numErrors = *numErrors + 1
    39  					errorTablesMetadata[statement.Schema+"."+statement.Name] = Empty{}
    40  				}
    41  			} else {
    42  				*fatalErr = err
    43  			}
    44  		}
    45  		progressBar.Increment()
    46  	}
    47  }
    48  
    49  /*
    50   * This function creates a worker pool of N goroutines to be able to execute up
    51   * to N statements in parallel.
    52   */
    53  func ExecuteStatements(statements []toc.StatementWithType, progressBar utils.ProgressBar, executeInParallel bool, whichConn ...int) int32 {
    54  	var workerPool sync.WaitGroup
    55  	var fatalErr error
    56  	var numErrors int32
    57  	tasks := make(chan toc.StatementWithType, len(statements))
    58  	for _, statement := range statements {
    59  		tasks <- statement
    60  	}
    61  	close(tasks)
    62  
    63  	if !executeInParallel {
    64  		connNum := connectionPool.ValidateConnNum(whichConn...)
    65  		executeStatementsForConn(tasks, &fatalErr, &numErrors, progressBar, connNum, executeInParallel)
    66  	} else {
    67  		for i := 0; i < connectionPool.NumConns; i++ {
    68  			workerPool.Add(1)
    69  			go func(connNum int) {
    70  				defer workerPool.Done()
    71  				connNum = connectionPool.ValidateConnNum(connNum)
    72  				executeStatementsForConn(tasks, &fatalErr, &numErrors, progressBar, connNum, executeInParallel)
    73  			}(i)
    74  		}
    75  		workerPool.Wait()
    76  	}
    77  	if fatalErr != nil {
    78  		fmt.Println("")
    79  		gplog.Fatal(fatalErr, "")
    80  	} else if numErrors > 0 {
    81  		fmt.Println("")
    82  		gplog.Error("Encountered %d errors during metadata restore; see log file %s for a list of failed statements.", numErrors, gplog.GetLogFilePath())
    83  	}
    84  
    85  	return numErrors
    86  }
    87  
    88  func ExecuteStatementsAndCreateProgressBar(statements []toc.StatementWithType, objectsTitle string, showProgressBar int, executeInParallel bool, whichConn ...int) int32 {
    89  	progressBar := utils.NewProgressBar(len(statements), fmt.Sprintf("%s restored: ", objectsTitle), showProgressBar)
    90  	progressBar.Start()
    91  	numErrors := ExecuteStatements(statements, progressBar, executeInParallel, whichConn...)
    92  	progressBar.Finish()
    93  
    94  	return numErrors
    95  }
    96  
    97  /*
    98   *   There is an existing bug in Greenplum where creating indexes in parallel
    99   *   on an AO table that didn't have any indexes previously can cause
   100   *   deadlock.
   101   *
   102   *   We work around this issue by restoring post data objects in
   103   *   two batches. The first batch takes one index from each table and
   104   *   restores them in parallel (which has no possibility of deadlock) and
   105   *   then the second restores all other postdata objects in parallel. After
   106   *   each table has at least one index, there is no more risk of deadlock.
   107   *
   108   *   A third batch is created specifically for postdata metadata
   109   *   (e.g. ALTER INDEX, ALTER EVENT TRIGGER, COMMENT ON). These
   110   *   statements cannot be concurrently run with batch two since that
   111   *   is where the dependent postdata objects are being created.
   112   */
   113  func BatchPostdataStatements(statements []toc.StatementWithType) ([]toc.StatementWithType, []toc.StatementWithType, []toc.StatementWithType) {
   114  	indexMap := make(map[string]bool)
   115  	firstBatch := make([]toc.StatementWithType, 0)
   116  	secondBatch := make([]toc.StatementWithType, 0)
   117  	thirdBatch := make([]toc.StatementWithType, 0)
   118  	for _, statement := range statements {
   119  		_, tableIndexPresent := indexMap[statement.ReferenceObject]
   120  		if statement.ObjectType == "INDEX" && !tableIndexPresent {
   121  			indexMap[statement.ReferenceObject] = true
   122  			firstBatch = append(firstBatch, statement)
   123  		} else if strings.Contains(statement.ObjectType, " METADATA") {
   124  			thirdBatch = append(thirdBatch, statement)
   125  		} else {
   126  			secondBatch = append(secondBatch, statement)
   127  		}
   128  	}
   129  	return firstBatch, secondBatch, thirdBatch
   130  }