github.com/cloudberrydb/gpbackup@v1.0.3-0.20240118031043-5410fd45eed6/restore/data.go (about) 1 package restore 2 3 /* 4 * This file contains structs and functions related to backing up data on the segments. 5 */ 6 7 import ( 8 "fmt" 9 "sync" 10 "sync/atomic" 11 12 "github.com/cloudberrydb/gp-common-go-libs/cluster" 13 "github.com/cloudberrydb/gp-common-go-libs/dbconn" 14 "github.com/cloudberrydb/gp-common-go-libs/gplog" 15 "github.com/cloudberrydb/gpbackup/filepath" 16 "github.com/cloudberrydb/gpbackup/options" 17 "github.com/cloudberrydb/gpbackup/toc" 18 "github.com/cloudberrydb/gpbackup/utils" 19 "github.com/jackc/pgconn" 20 "github.com/pkg/errors" 21 "gopkg.in/cheggaaa/pb.v1" 22 ) 23 24 var ( 25 tableDelim = "," 26 ) 27 28 func CopyTableIn(connectionPool *dbconn.DBConn, tableName string, tableAttributes string, destinationToRead string, singleDataFile bool, whichConn int) (int64, error) { 29 whichConn = connectionPool.ValidateConnNum(whichConn) 30 copyCommand := "" 31 readFromDestinationCommand := "cat" 32 customPipeThroughCommand := utils.GetPipeThroughProgram().InputCommand 33 origSize, destSize, resizeCluster := GetResizeClusterInfo() 34 35 if singleDataFile || resizeCluster { 36 //helper.go handles compression, so we don't want to set it here 37 customPipeThroughCommand = "cat -" 38 } else if MustGetFlagString(options.PLUGIN_CONFIG) != "" { 39 readFromDestinationCommand = fmt.Sprintf("%s restore_data %s", pluginConfig.ExecutablePath, pluginConfig.ConfigPath) 40 } 41 42 copyCommand = fmt.Sprintf("PROGRAM '%s %s | %s'", readFromDestinationCommand, destinationToRead, customPipeThroughCommand) 43 44 query := fmt.Sprintf("COPY %s%s FROM %s WITH CSV DELIMITER '%s' ON SEGMENT;", tableName, tableAttributes, copyCommand, tableDelim) 45 46 var numRows int64 47 var err error 48 49 // During a larger-to-smaller restore, we need multiple COPY passes to load all the data. 50 // One pass is sufficient for smaller-to-larger and normal restores. 51 batches := 1 52 if resizeCluster && origSize > destSize { 53 batches = origSize / destSize 54 if origSize%destSize != 0 { 55 batches += 1 56 } 57 } 58 for i := 0; i < batches; i++ { 59 gplog.Verbose(`Executing "%s" on coordinator`, query) 60 result, err := connectionPool.Exec(query, whichConn) 61 if err != nil { 62 errStr := fmt.Sprintf("Error loading data into table %s", tableName) 63 64 // The COPY ON SEGMENT error might contain useful CONTEXT output 65 if pgErr, ok := err.(*pgconn.PgError); ok && pgErr.Where != "" { 66 errStr = fmt.Sprintf("%s: %s", errStr, pgErr.Where) 67 } 68 69 return 0, errors.Wrap(err, errStr) 70 } 71 rowsLoaded, _ := result.RowsAffected() 72 numRows += rowsLoaded 73 } 74 75 return numRows, err 76 } 77 78 func restoreSingleTableData(fpInfo *filepath.FilePathInfo, entry toc.CoordinatorDataEntry, tableName string, whichConn int, origSize int, destSize int) error { 79 resizeCluster := MustGetFlagBool(options.RESIZE_CLUSTER) 80 destinationToRead := "" 81 if backupConfig.SingleDataFile || resizeCluster { 82 destinationToRead = fmt.Sprintf("%s_%d", fpInfo.GetSegmentPipePathForCopyCommand(), entry.Oid) 83 } else { 84 destinationToRead = fpInfo.GetTableBackupFilePathForCopyCommand(entry.Oid, utils.GetPipeThroughProgram().Extension, backupConfig.SingleDataFile) 85 } 86 gplog.Debug("Reading from %s", destinationToRead) 87 numRowsRestored, err := CopyTableIn(connectionPool, tableName, entry.AttributeString, destinationToRead, backupConfig.SingleDataFile, whichConn) 88 if err != nil { 89 return err 90 } 91 numRowsBackedUp := entry.RowsCopied 92 93 // For replicated tables, we don't restore second and subsequent batches of data in the larger-to-smaller case, 94 // as that would duplicate data, so we have to "scale down" the values to determine whether the correct number 95 // of rows was restored 96 if entry.IsReplicated && origSize > destSize { 97 numRowsBackedUp /= int64(origSize) 98 numRowsRestored /= int64(destSize) 99 } 100 101 err = CheckRowsRestored(numRowsRestored, numRowsBackedUp, tableName) 102 if err != nil { 103 return err 104 } 105 106 if resizeCluster { 107 // replicated tables cannot be redistributed, so instead expand them if needed 108 if entry.IsReplicated && (origSize < destSize) { 109 err = ExpandReplicatedTable(origSize, tableName, whichConn) 110 } else { 111 err = RedistributeTableData(tableName, whichConn) 112 } 113 if err != nil { 114 return err 115 } 116 } 117 return err 118 } 119 120 func ExpandReplicatedTable(origSize int, tableName string, whichConn int) error { 121 // Replicated tables will only be initially restored to the segments backup was run from, and 122 // redistributing does not cause the data to be replicated to the new segments. 123 // To work around this, update the distribution policy entry for those tables to the original cluster size 124 // and then explicitly expand them to cause the data to be replicated to all new segments. 125 gplog.Debug("Distributing replicated data for %s", tableName) 126 alterDistPolQuery := fmt.Sprintf("UPDATE gp_distribution_policy SET numsegments=%d WHERE localoid = '%s'::regclass::oid", origSize, tableName) 127 _, err := connectionPool.Exec(alterDistPolQuery, whichConn) 128 if err != nil { 129 return err 130 } 131 132 expandTableQuery := fmt.Sprintf("ALTER TABLE %s EXPAND TABLE;", tableName) 133 _, err = connectionPool.Exec(expandTableQuery, whichConn) 134 if err != nil { 135 return err 136 } 137 138 return nil 139 } 140 141 func CheckRowsRestored(rowsRestored int64, rowsBackedUp int64, tableName string) error { 142 if rowsRestored != rowsBackedUp { 143 rowsErrMsg := fmt.Sprintf("Expected to restore %d rows to table %s, but restored %d instead", rowsBackedUp, tableName, rowsRestored) 144 return errors.New(rowsErrMsg) 145 } 146 return nil 147 } 148 149 func RedistributeTableData(tableName string, whichConn int) error { 150 gplog.Debug("Redistributing data for %s", tableName) 151 query := fmt.Sprintf("ALTER TABLE %s SET WITH (REORGANIZE=true)", tableName) 152 _, err := connectionPool.Exec(query, whichConn) 153 return err 154 } 155 156 func restoreDataFromTimestamp(fpInfo filepath.FilePathInfo, dataEntries []toc.CoordinatorDataEntry, 157 gucStatements []toc.StatementWithType, dataProgressBar utils.ProgressBar) int32 { 158 totalTables := len(dataEntries) 159 if totalTables == 0 { 160 gplog.Verbose("No data to restore for timestamp = %s", fpInfo.Timestamp) 161 return 0 162 } 163 164 origSize, destSize, resizeCluster := GetResizeClusterInfo() 165 if backupConfig.SingleDataFile || resizeCluster { 166 msg := "" 167 if backupConfig.SingleDataFile { 168 msg += "single data file " 169 } 170 if resizeCluster { 171 msg += "resize " 172 } 173 gplog.Verbose("Initializing pipes and gpbackup_helper on segments for %srestore", msg) 174 utils.VerifyHelperVersionOnSegments(version, globalCluster) 175 oidList := make([]string, totalTables) 176 replicatedOidList := make([]string, 0) 177 for i, entry := range dataEntries { 178 oidString := fmt.Sprintf("%d", entry.Oid) 179 oidList[i] = oidString 180 if entry.IsReplicated { 181 replicatedOidList = append(replicatedOidList, oidString) 182 } 183 } 184 utils.WriteOidListToSegments(oidList, globalCluster, fpInfo, "oid") 185 if len(replicatedOidList) > 0 { 186 utils.WriteOidListToSegments(replicatedOidList, globalCluster, fpInfo, "replicated_oid") 187 } 188 initialPipes := CreateInitialSegmentPipes(oidList, globalCluster, connectionPool, fpInfo) 189 if wasTerminated { 190 return 0 191 } 192 isFilter := false 193 if len(opts.IncludedRelations) > 0 || len(opts.ExcludedRelations) > 0 || len(opts.IncludedSchemas) > 0 || len(opts.ExcludedSchemas) > 0 { 194 isFilter = true 195 } 196 compressStr := "" 197 if backupConfig.Compressed { 198 compressStr = fmt.Sprintf(" --compression-type %s ", utils.GetPipeThroughProgram().Name) 199 } 200 utils.StartGpbackupHelpers(globalCluster, fpInfo, "--restore-agent", MustGetFlagString(options.PLUGIN_CONFIG), compressStr, MustGetFlagBool(options.ON_ERROR_CONTINUE), isFilter, &wasTerminated, initialPipes, backupConfig.SingleDataFile, resizeCluster, origSize, destSize) 201 } 202 /* 203 * We break when an interrupt is received and rely on 204 * TerminateHangingCopySessions to stop any COPY 205 * statements in progress if they don't finish on their own. 206 */ 207 var tableNum int64 = 0 208 tasks := make(chan toc.CoordinatorDataEntry, totalTables) 209 var workerPool sync.WaitGroup 210 var numErrors int32 211 var mutex = &sync.Mutex{} 212 213 for i := 0; i < connectionPool.NumConns; i++ { 214 workerPool.Add(1) 215 go func(whichConn int) { 216 defer workerPool.Done() 217 218 setGUCsForConnection(gucStatements, whichConn) 219 for entry := range tasks { 220 if wasTerminated { 221 dataProgressBar.(*pb.ProgressBar).NotPrint = true 222 return 223 } 224 tableName := utils.MakeFQN(entry.Schema, entry.Name) 225 if opts.RedirectSchema != "" { 226 tableName = utils.MakeFQN(opts.RedirectSchema, entry.Name) 227 } 228 // Truncate table before restore, if needed 229 var err error 230 if MustGetFlagBool(options.INCREMENTAL) || MustGetFlagBool(options.TRUNCATE_TABLE) { 231 err = TruncateTable(tableName, whichConn) 232 } 233 if err == nil { 234 err = restoreSingleTableData(&fpInfo, entry, tableName, whichConn, origSize, destSize) 235 236 if gplog.GetVerbosity() > gplog.LOGINFO { 237 // No progress bar at this log level, so we note table count here 238 gplog.Verbose("Restored data to table %s from file (table %d of %d)", tableName, atomic.AddInt64(&tableNum, 1), totalTables) 239 } else { 240 gplog.Verbose("Restored data to table %s from file", tableName) 241 } 242 } 243 244 if err != nil { 245 gplog.Error(err.Error()) 246 atomic.AddInt32(&numErrors, 1) 247 if !MustGetFlagBool(options.ON_ERROR_CONTINUE) { 248 dataProgressBar.(*pb.ProgressBar).NotPrint = true 249 return 250 } else if backupConfig.SingleDataFile { 251 // inform segment helpers to skip this entry 252 utils.CreateSkipFileOnSegments(fmt.Sprintf("%d", entry.Oid), tableName, globalCluster, globalFPInfo) 253 } 254 mutex.Lock() 255 errorTablesData[tableName] = Empty{} 256 mutex.Unlock() 257 } 258 259 if backupConfig.SingleDataFile { 260 agentErr := utils.CheckAgentErrorsOnSegments(globalCluster, globalFPInfo) 261 if agentErr != nil { 262 gplog.Error(agentErr.Error()) 263 return 264 } 265 } 266 267 dataProgressBar.Increment() 268 } 269 }(i) 270 } 271 for _, entry := range dataEntries { 272 tasks <- entry 273 } 274 close(tasks) 275 workerPool.Wait() 276 277 if numErrors > 0 { 278 fmt.Println("") 279 gplog.Error("Encountered %d error(s) during table data restore; see log file %s for a list of table errors.", numErrors, gplog.GetLogFilePath()) 280 } 281 282 return numErrors 283 } 284 285 func CreateInitialSegmentPipes(oidList []string, c *cluster.Cluster, connectionPool *dbconn.DBConn, fpInfo filepath.FilePathInfo) int { 286 // Create min(connections, tables) segment pipes on each host 287 var maxPipes int 288 if connectionPool.NumConns < len(oidList) { 289 maxPipes = connectionPool.NumConns 290 } else { 291 maxPipes = len(oidList) 292 } 293 for i := 0; i < maxPipes; i++ { 294 utils.CreateSegmentPipeOnAllHosts(oidList[i], c, fpInfo) 295 } 296 return maxPipes 297 }