github.com/greenplum-db/gpbackup@v0.0.0-20240517212602-89daab1885b3/restore/wrappers.go (about)

     1  package restore
     2  
     3  import (
     4  	"fmt"
     5  	path "path/filepath"
     6  	"strconv"
     7  	"strings"
     8  
     9  	"github.com/greenplum-db/gp-common-go-libs/dbconn"
    10  	"github.com/greenplum-db/gp-common-go-libs/gplog"
    11  	"github.com/greenplum-db/gp-common-go-libs/iohelper"
    12  	"github.com/greenplum-db/gp-common-go-libs/operating"
    13  	"github.com/greenplum-db/gpbackup/filepath"
    14  	"github.com/greenplum-db/gpbackup/history"
    15  	"github.com/greenplum-db/gpbackup/options"
    16  	"github.com/greenplum-db/gpbackup/report"
    17  	"github.com/greenplum-db/gpbackup/toc"
    18  	"github.com/greenplum-db/gpbackup/utils"
    19  )
    20  
    21  /*
    22   * This file contains wrapper functions that group together functions relating
    23   * to querying and restoring metadata, so that the logic for each object type
    24   * can all be in one place and restore.go can serve as a high-level look at the
    25   * overall restore flow.
    26   */
    27  
    28  /*
    29   * Setup and validation wrapper functions
    30   */
    31  
    32  /*
    33   * Filter structure to filter schemas and relations
    34   */
    35  type Filters struct {
    36  	includeSchemas   []string
    37  	excludeSchemas   []string
    38  	includeRelations []string
    39  	excludeRelations []string
    40  }
    41  
    42  func NewFilters(inSchema []string, exSchemas []string, inRelations []string, exRelations []string) Filters {
    43  	f := Filters{}
    44  	f.includeSchemas = inSchema
    45  	f.excludeSchemas = exSchemas
    46  	f.includeRelations = inRelations
    47  	f.excludeRelations = exRelations
    48  	return f
    49  }
    50  
    51  func filtersEmpty(filters Filters) bool {
    52  	return len(filters.includeSchemas) == 0 && len(filters.excludeSchemas) == 0 && len(filters.includeRelations) == 0 && len(filters.excludeRelations) == 0
    53  }
    54  
    55  func SetLoggerVerbosity() {
    56  	gplog.SetLogFileVerbosity(gplog.LOGINFO)
    57  	if MustGetFlagBool(options.QUIET) {
    58  		gplog.SetVerbosity(gplog.LOGERROR)
    59  		gplog.SetLogFileVerbosity(gplog.LOGERROR)
    60  	} else if MustGetFlagBool(options.DEBUG) {
    61  		gplog.SetVerbosity(gplog.LOGDEBUG)
    62  		gplog.SetLogFileVerbosity(gplog.LOGDEBUG)
    63  	} else if MustGetFlagBool(options.VERBOSE) {
    64  		gplog.SetVerbosity(gplog.LOGVERBOSE)
    65  		gplog.SetLogFileVerbosity(gplog.LOGVERBOSE)
    66  	}
    67  }
    68  
    69  func CreateConnectionPool(unquotedDBName string) {
    70  	connectionPool = dbconn.NewDBConnFromEnvironment(unquotedDBName)
    71  	if FlagChanged(options.COPY_QUEUE_SIZE) {
    72  		connectionPool.MustConnect(MustGetFlagInt(options.COPY_QUEUE_SIZE))
    73  	} else {
    74  		connectionPool.MustConnect(MustGetFlagInt(options.JOBS))
    75  	}
    76  	utils.ValidateGPDBVersionCompatibility(connectionPool)
    77  }
    78  
    79  func InitializeConnectionPool(backupTimestamp string, restoreTimestamp string, unquotedDBName string) {
    80  	CreateConnectionPool(unquotedDBName)
    81  	resizeRestore := MustGetFlagBool(options.RESIZE_CLUSTER)
    82  	setupQuery := fmt.Sprintf("SET application_name TO 'gprestore_%s_%s';", backupTimestamp, restoreTimestamp)
    83  	setupQuery += `
    84  SET search_path TO pg_catalog;
    85  SET gp_default_storage_options='';
    86  SET statement_timeout = 0;
    87  SET check_function_bodies = false;
    88  SET client_min_messages = error;
    89  SET standard_conforming_strings = on;
    90  SET default_with_oids = off;
    91  `
    92  
    93  	setupQuery += "SET gp_ignore_error_table = on;\n"
    94  	if connectionPool.Version.Before("6") {
    95  		setupQuery += "SET allow_system_table_mods = 'DML';\n"
    96  	}
    97  
    98  	if connectionPool.Version.AtLeast("6") {
    99  		setupQuery += "SET allow_system_table_mods = true;\n"
   100  		setupQuery += "SET lock_timeout = 0;\n"
   101  		setupQuery += "SET default_transaction_read_only = off;\n"
   102  		setupQuery += "SET xmloption = content;\n"
   103  
   104  		// If the backup is from a GPDB version less than 6.0,
   105  		// we need to use legacy hash operators when restoring
   106  		// the tables, unless we're restoring to a cluster of
   107  		// a different size since in that case the data will be
   108  		// redistributed during the restore process.
   109  		backupConfigMajorVer, _ := strconv.Atoi(strings.Split(backupConfig.DatabaseVersion, ".")[0])
   110  		if backupConfigMajorVer < 6 && !resizeRestore {
   111  			setupQuery += "SET gp_use_legacy_hashops = on;\n"
   112  			gplog.Warn("This backup set was taken on a version of Greenplum prior to 6.x. This restore will use the legacy hash operators when loading data.")
   113  			gplog.Warn("To use the new Greenplum 6.x default hash operators, these tables will need to be redistributed.")
   114  			gplog.Warn("For more information, refer to the migration guide located as https://docs.greenplum.org/latest/install_guide/migrate.html.")
   115  		}
   116  	}
   117  
   118  	// If we're restoring to a different-sized cluster, disable the
   119  	// distribution key check because the data won't necessarily
   120  	// match initially and will be redistributed after the restore.
   121  	if resizeRestore {
   122  		setupQuery += "SET gp_enable_segment_copy_checking TO off;\n"
   123  	}
   124  
   125  	setupQuery += SetMaxCsvLineLengthQuery(connectionPool)
   126  
   127  	// Always disable gp_autostats_mode to prevent automatic ANALYZE
   128  	// during COPY FROM SEGMENT. ANALYZE should be run separately.
   129  	setupQuery += "SET gp_autostats_mode = 'none';\n"
   130  
   131  	// GPDB7 removed support for QuickLZ.  To support creating tables
   132  	// from backups done with QuickLZ, a GUC was added to allow silent
   133  	// fallback to zstd
   134  	if connectionPool.Version.AtLeast("7") {
   135  		setupQuery += "SET gp_quicklz_fallback = on;\n"
   136  	}
   137  
   138  	for i := 0; i < connectionPool.NumConns; i++ {
   139  		connectionPool.MustExec(setupQuery, i)
   140  	}
   141  }
   142  
   143  func SetMaxCsvLineLengthQuery(connectionPool *dbconn.DBConn) string {
   144  	if connectionPool.Version.AtLeast("6") {
   145  		return ""
   146  	}
   147  
   148  	var maxLineLength int
   149  	if connectionPool.Version.Is("5") && connectionPool.Version.AtLeast("5.11.0") {
   150  		maxLineLength = 1024 * 1024 * 1024
   151  	} else {
   152  		maxLineLength = 4 * 1024 * 1024 // 4MB
   153  	}
   154  
   155  	return fmt.Sprintf("SET gp_max_csv_line_length = %d;\n", maxLineLength)
   156  }
   157  
   158  func InitializeBackupConfig() {
   159  	backupConfig = history.ReadConfigFile(globalFPInfo.GetConfigFilePath())
   160  	utils.InitializePipeThroughParameters(backupConfig.Compressed, backupConfig.CompressionType, 0)
   161  	report.EnsureBackupVersionCompatibility(backupConfig.BackupVersion, version)
   162  	report.EnsureDatabaseVersionCompatibility(backupConfig.DatabaseVersion, connectionPool.Version)
   163  }
   164  
   165  func BackupConfigurationValidation() {
   166  	if !backupConfig.MetadataOnly {
   167  		gplog.Verbose("Gathering information on backup directories")
   168  		VerifyBackupDirectoriesExistOnAllHosts()
   169  	}
   170  
   171  	VerifyMetadataFilePaths(MustGetFlagBool(options.WITH_STATS))
   172  
   173  	tocFilename := globalFPInfo.GetTOCFilePath()
   174  	globalTOC = toc.NewTOC(tocFilename)
   175  	globalTOC.InitializeMetadataEntryMap()
   176  
   177  	// Legacy backups prior to the incremental feature would have no restoreplan yaml element
   178  	if isLegacyBackup := backupConfig.RestorePlan == nil; isLegacyBackup {
   179  		SetRestorePlanForLegacyBackup(globalTOC, globalFPInfo.Timestamp, backupConfig)
   180  	}
   181  
   182  	ValidateBackupFlagCombinations()
   183  
   184  	validateFilterListsInBackupSet()
   185  }
   186  
   187  func SetRestorePlanForLegacyBackup(toc *toc.TOC, backupTimestamp string, backupConfig *history.BackupConfig) {
   188  	tableFQNs := make([]string, 0, len(toc.DataEntries))
   189  	for _, entry := range toc.DataEntries {
   190  		entryFQN := utils.MakeFQN(entry.Schema, entry.Name)
   191  		tableFQNs = append(tableFQNs, entryFQN)
   192  	}
   193  	backupConfig.RestorePlan = []history.RestorePlanEntry{
   194  		{Timestamp: backupTimestamp, TableFQNs: tableFQNs},
   195  	}
   196  }
   197  
   198  func RecoverMetadataFilesUsingPlugin() {
   199  	var err error
   200  	pluginConfig, err = utils.ReadPluginConfig(MustGetFlagString(options.PLUGIN_CONFIG))
   201  	gplog.FatalOnError(err)
   202  	configFilename := path.Base(pluginConfig.ConfigPath)
   203  	configDirname := path.Dir(pluginConfig.ConfigPath)
   204  	pluginConfig.ConfigPath = path.Join(configDirname, history.CurrentTimestamp()+"_"+configFilename)
   205  	_ = cmdFlags.Set(options.PLUGIN_CONFIG, pluginConfig.ConfigPath)
   206  	gplog.Info("plugin config path: %s", pluginConfig.ConfigPath)
   207  
   208  	pluginConfig.CheckPluginExistsOnAllHosts(globalCluster)
   209  
   210  	timestamp := MustGetFlagString(options.TIMESTAMP)
   211  	historicalPluginVersion := FindHistoricalPluginVersion(timestamp)
   212  	pluginConfig.SetBackupPluginVersion(timestamp, historicalPluginVersion)
   213  
   214  	pluginConfig.CopyPluginConfigToAllHosts(globalCluster)
   215  	pluginConfig.SetupPluginForRestore(globalCluster, globalFPInfo)
   216  
   217  	metadataFiles := []string{globalFPInfo.GetConfigFilePath(), globalFPInfo.GetMetadataFilePath(),
   218  		globalFPInfo.GetBackupReportFilePath()}
   219  	if MustGetFlagBool(options.WITH_STATS) {
   220  		metadataFiles = append(metadataFiles, globalFPInfo.GetStatisticsFilePath())
   221  	}
   222  	for _, filename := range metadataFiles {
   223  		pluginConfig.MustRestoreFile(filename)
   224  	}
   225  
   226  	InitializeBackupConfig()
   227  
   228  	var fpInfoList []filepath.FilePathInfo
   229  	if backupConfig.MetadataOnly {
   230  		fpInfoList = []filepath.FilePathInfo{globalFPInfo}
   231  	} else {
   232  		fpInfoList = GetBackupFPInfoListFromRestorePlan()
   233  	}
   234  
   235  	for _, fpInfo := range fpInfoList {
   236  		pluginConfig.MustRestoreFile(fpInfo.GetTOCFilePath())
   237  		if backupConfig.SingleDataFile {
   238  			origSize, destSize, _, batches := GetResizeClusterInfo()
   239  			pluginConfig.RestoreSegmentTOCs(globalCluster, fpInfo, origSize, destSize, batches)
   240  		}
   241  	}
   242  }
   243  
   244  func FindHistoricalPluginVersion(timestamp string) string {
   245  	// in order for plugins to implement backwards compatibility,
   246  	// first, read history from coordinator and provide the historical version
   247  	// of the plugin that was used to create the original backup
   248  	var historicalPluginVersion string
   249  
   250  	historyDBPath := globalFPInfo.GetBackupHistoryDatabasePath()
   251  	_, err := operating.System.Stat(historyDBPath)
   252  	if err == nil {
   253  		historyDB, err := history.InitializeHistoryDatabase(historyDBPath)
   254  		if err != nil {
   255  			return historicalPluginVersion
   256  		}
   257  		defer historyDB.Close()
   258  
   259  		foundBackupConfig, err := history.GetBackupConfig(timestamp, historyDB)
   260  		if err != nil && err.Error() != "timestamp doesn't match any existing backups" {
   261  			gplog.FatalOnError(err)
   262  		}
   263  		if err == nil {
   264  			historicalPluginVersion = foundBackupConfig.PluginVersion
   265  		}
   266  	}
   267  
   268  	return historicalPluginVersion
   269  }
   270  
   271  /*
   272   * Metadata and/or data restore wrapper functions
   273   */
   274  
   275  func GetRestoreMetadataStatements(section string, filename string, includeObjectTypes []string, excludeObjectTypes []string) []toc.StatementWithType {
   276  	var statements []toc.StatementWithType
   277  	statements = GetRestoreMetadataStatementsFiltered(section, filename, includeObjectTypes, excludeObjectTypes, Filters{})
   278  	return statements
   279  }
   280  
   281  func GetRestoreMetadataStatementsFiltered(section string, filename string, includeObjectTypes []string, excludeObjectTypes []string, filters Filters) []toc.StatementWithType {
   282  	metadataFile := iohelper.MustOpenFileForReading(filename)
   283  	var statements []toc.StatementWithType
   284  	var inSchemas, exSchemas, inRelations, exRelations []string
   285  	if !filtersEmpty(filters) {
   286  		inSchemas = filters.includeSchemas
   287  		exSchemas = filters.excludeSchemas
   288  		inRelations = filters.includeRelations
   289  		exRelations = filters.excludeRelations
   290  		fpInfoList := GetBackupFPInfoListFromRestorePlan()
   291  		for _, fpInfo := range fpInfoList {
   292  			tocFilename := fpInfo.GetTOCFilePath()
   293  			tocfile := toc.NewTOC(tocFilename)
   294  			inRelations = append(inRelations, toc.GetIncludedPartitionRoots(tocfile.DataEntries, inRelations)...)
   295  		}
   296  		// Update include schemas for schema restore if include table is set
   297  		if utils.Exists(includeObjectTypes, toc.OBJ_SCHEMA) {
   298  			for _, inRelation := range inRelations {
   299  				schema := inRelation[:strings.Index(inRelation, ".")]
   300  				if !utils.Exists(inSchemas, schema) {
   301  					inSchemas = append(inSchemas, schema)
   302  				}
   303  			}
   304  			// reset relation list as these were required only to extract schemas from inRelations
   305  			inRelations = nil
   306  			exRelations = nil
   307  		}
   308  	}
   309  	statements = globalTOC.GetSQLStatementForObjectTypes(section, metadataFile, includeObjectTypes, excludeObjectTypes, inSchemas, exSchemas, inRelations, exRelations)
   310  	return statements
   311  }
   312  
   313  func ExecuteRestoreMetadataStatements(section string, statements []toc.StatementWithType, objectsTitle string, progressBar utils.ProgressBar, showProgressBar int, executeInParallel bool) int32 {
   314  	var numErrors int32
   315  	if section == "predata" {
   316  		numErrors = ExecutePredataStatements(statements, progressBar, executeInParallel)
   317  	} else {
   318  		numErrors = ExecuteStatements(statements, progressBar, executeInParallel)
   319  	}
   320  	return numErrors
   321  }
   322  
   323  func GetBackupFPInfoListFromRestorePlan() []filepath.FilePathInfo {
   324  	fpInfoList := make([]filepath.FilePathInfo, 0)
   325  	for _, entry := range backupConfig.RestorePlan {
   326  		segPrefix, singleBackupDir, err := filepath.ParseSegPrefix(MustGetFlagString(options.BACKUP_DIR), entry.Timestamp)
   327  		gplog.FatalOnError(err)
   328  
   329  		fpInfo := filepath.NewFilePathInfo(globalCluster, MustGetFlagString(options.BACKUP_DIR), entry.Timestamp, segPrefix, singleBackupDir)
   330  		fpInfoList = append(fpInfoList, fpInfo)
   331  	}
   332  
   333  	return fpInfoList
   334  }
   335  
   336  func GetBackupFPInfoForTimestamp(timestamp string) filepath.FilePathInfo {
   337  	segPrefix, singleBackupDir, err := filepath.ParseSegPrefix(MustGetFlagString(options.BACKUP_DIR), timestamp)
   338  	gplog.FatalOnError(err)
   339  	fpInfo := filepath.NewFilePathInfo(globalCluster, MustGetFlagString(options.BACKUP_DIR), timestamp, segPrefix, singleBackupDir)
   340  	return fpInfo
   341  }
   342  
   343  /*
   344   * The first time this function is called, it retrieves the session GUCs from the
   345   * predata file and processes them appropriately, then it returns them so they
   346   * can be used in later calls without the file access and processing overhead.
   347   */
   348  func setGUCsForConnection(gucStatements []toc.StatementWithType, whichConn int) []toc.StatementWithType {
   349  	if gucStatements == nil {
   350  		objectTypes := []string{toc.OBJ_SESSION_GUC}
   351  		gucStatements = GetRestoreMetadataStatements("global", globalFPInfo.GetMetadataFilePath(), objectTypes, []string{})
   352  	}
   353  	ExecuteStatements(gucStatements, nil, false, whichConn)
   354  	return gucStatements
   355  }
   356  
   357  func RestoreSchemas(schemaStatements []toc.StatementWithType, progressBar utils.ProgressBar) {
   358  	numErrors := 0
   359  	for _, schema := range schemaStatements {
   360  		_, err := connectionPool.Exec(schema.Statement, 0)
   361  		if err != nil {
   362  			if strings.Contains(err.Error(), "already exists") {
   363  				gplog.Warn("Schema %s already exists", schema.Name)
   364  			} else {
   365  				errMsg := fmt.Sprintf("Error encountered while creating schema %s", schema.Name)
   366  				if MustGetFlagBool(options.ON_ERROR_CONTINUE) {
   367  					gplog.Verbose(fmt.Sprintf("%s: %s", errMsg, err.Error()))
   368  					numErrors++
   369  				} else {
   370  					gplog.Fatal(err, errMsg)
   371  				}
   372  			}
   373  		}
   374  		progressBar.Increment()
   375  	}
   376  	if numErrors > 0 {
   377  		gplog.Error("Encountered %d errors during schema restore; see log file %s for a list of errors.", numErrors, gplog.GetLogFilePath())
   378  	}
   379  }
   380  
   381  func GetExistingTableFQNs() ([]string, error) {
   382  	existingTableFQNs := make([]string, 0)
   383  	var relkindFilter string
   384  
   385  	if connectionPool.Version.Before("6") {
   386  		relkindFilter = "'r', 'S'"
   387  	} else if connectionPool.Version.Is("6") {
   388  		relkindFilter = "'r', 'S', 'f'"
   389  	} else if connectionPool.Version.AtLeast("7") {
   390  		relkindFilter = "'r', 'S', 'f', 'p'"
   391  	}
   392  	query := fmt.Sprintf(`SELECT quote_ident(n.nspname) || '.' || quote_ident(c.relname)
   393  			  FROM pg_catalog.pg_class c
   394  				LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
   395  			  WHERE c.relkind IN (%s)
   396  				 AND n.nspname !~ '^pg_'
   397  				 AND n.nspname !~ '^gp_'
   398  				 AND n.nspname <> 'information_schema'
   399  			  ORDER BY 1;`, relkindFilter)
   400  
   401  	err := connectionPool.Select(&existingTableFQNs, query)
   402  	return existingTableFQNs, err
   403  }
   404  
   405  func GetExistingSchemas() ([]string, error) {
   406  	existingSchemas := make([]string, 0)
   407  
   408  	query := `SELECT n.nspname AS "Name"
   409  			  FROM pg_catalog.pg_namespace n
   410  			  WHERE n.nspname !~ '^pg_' AND n.nspname <> 'information_schema'
   411  			  ORDER BY 1;`
   412  
   413  	err := connectionPool.Select(&existingSchemas, query)
   414  	return existingSchemas, err
   415  }