github.com/cloudberrydb/gpbackup@v1.0.3-0.20240118031043-5410fd45eed6/restore/parallel.go (about) 1 package restore 2 3 /* 4 * This file contains functions related to executing multiple SQL statements in parallel. 5 */ 6 7 import ( 8 "fmt" 9 "strings" 10 "sync" 11 "sync/atomic" 12 13 "github.com/cloudberrydb/gp-common-go-libs/gplog" 14 "github.com/cloudberrydb/gpbackup/options" 15 "github.com/cloudberrydb/gpbackup/toc" 16 "github.com/cloudberrydb/gpbackup/utils" 17 ) 18 19 var ( 20 mutex = &sync.Mutex{} 21 ) 22 23 func executeStatementsForConn(statements chan toc.StatementWithType, fatalErr *error, numErrors *int32, progressBar utils.ProgressBar, whichConn int, executeInParallel bool) { 24 for statement := range statements { 25 if wasTerminated || *fatalErr != nil { 26 return 27 } 28 _, err := connectionPool.Exec(statement.Statement, whichConn) 29 if err != nil { 30 gplog.Verbose("Error encountered when executing statement: %s Error was: %s", strings.TrimSpace(statement.Statement), err.Error()) 31 if MustGetFlagBool(options.ON_ERROR_CONTINUE) { 32 if executeInParallel { 33 atomic.AddInt32(numErrors, 1) 34 mutex.Lock() 35 errorTablesMetadata[statement.Schema+"."+statement.Name] = Empty{} 36 mutex.Unlock() 37 } else { 38 *numErrors = *numErrors + 1 39 errorTablesMetadata[statement.Schema+"."+statement.Name] = Empty{} 40 } 41 } else { 42 *fatalErr = err 43 } 44 } 45 progressBar.Increment() 46 } 47 } 48 49 /* 50 * This function creates a worker pool of N goroutines to be able to execute up 51 * to N statements in parallel. 52 */ 53 func ExecuteStatements(statements []toc.StatementWithType, progressBar utils.ProgressBar, executeInParallel bool, whichConn ...int) int32 { 54 var workerPool sync.WaitGroup 55 var fatalErr error 56 var numErrors int32 57 tasks := make(chan toc.StatementWithType, len(statements)) 58 for _, statement := range statements { 59 tasks <- statement 60 } 61 close(tasks) 62 63 if !executeInParallel { 64 connNum := connectionPool.ValidateConnNum(whichConn...) 65 executeStatementsForConn(tasks, &fatalErr, &numErrors, progressBar, connNum, executeInParallel) 66 } else { 67 for i := 0; i < connectionPool.NumConns; i++ { 68 workerPool.Add(1) 69 go func(connNum int) { 70 defer workerPool.Done() 71 connNum = connectionPool.ValidateConnNum(connNum) 72 executeStatementsForConn(tasks, &fatalErr, &numErrors, progressBar, connNum, executeInParallel) 73 }(i) 74 } 75 workerPool.Wait() 76 } 77 if fatalErr != nil { 78 fmt.Println("") 79 gplog.Fatal(fatalErr, "") 80 } else if numErrors > 0 { 81 fmt.Println("") 82 gplog.Error("Encountered %d errors during metadata restore; see log file %s for a list of failed statements.", numErrors, gplog.GetLogFilePath()) 83 } 84 85 return numErrors 86 } 87 88 func ExecuteStatementsAndCreateProgressBar(statements []toc.StatementWithType, objectsTitle string, showProgressBar int, executeInParallel bool, whichConn ...int) int32 { 89 progressBar := utils.NewProgressBar(len(statements), fmt.Sprintf("%s restored: ", objectsTitle), showProgressBar) 90 progressBar.Start() 91 numErrors := ExecuteStatements(statements, progressBar, executeInParallel, whichConn...) 92 progressBar.Finish() 93 94 return numErrors 95 } 96 97 /* 98 * There is an existing bug in Greenplum where creating indexes in parallel 99 * on an AO table that didn't have any indexes previously can cause 100 * deadlock. 101 * 102 * We work around this issue by restoring post data objects in 103 * two batches. The first batch takes one index from each table and 104 * restores them in parallel (which has no possibility of deadlock) and 105 * then the second restores all other postdata objects in parallel. After 106 * each table has at least one index, there is no more risk of deadlock. 107 * 108 * A third batch is created specifically for postdata metadata 109 * (e.g. ALTER INDEX, ALTER EVENT TRIGGER, COMMENT ON). These 110 * statements cannot be concurrently run with batch two since that 111 * is where the dependent postdata objects are being created. 112 */ 113 func BatchPostdataStatements(statements []toc.StatementWithType) ([]toc.StatementWithType, []toc.StatementWithType, []toc.StatementWithType) { 114 indexMap := make(map[string]bool) 115 firstBatch := make([]toc.StatementWithType, 0) 116 secondBatch := make([]toc.StatementWithType, 0) 117 thirdBatch := make([]toc.StatementWithType, 0) 118 for _, statement := range statements { 119 _, tableIndexPresent := indexMap[statement.ReferenceObject] 120 if statement.ObjectType == "INDEX" && !tableIndexPresent { 121 indexMap[statement.ReferenceObject] = true 122 firstBatch = append(firstBatch, statement) 123 } else if strings.Contains(statement.ObjectType, " METADATA") { 124 thirdBatch = append(thirdBatch, statement) 125 } else { 126 secondBatch = append(secondBatch, statement) 127 } 128 } 129 return firstBatch, secondBatch, thirdBatch 130 }