github.com/greenplum-db/gpbackup@v0.0.0-20240517212602-89daab1885b3/ci/scripts/scale-perf-tests.bash (about)

     1  #!/bin/bash
     2  
     3  set -ex
     4  
     5  # retrieve cluster set up by previous job, and set up SSH to it
     6  tar -xvf "cluster-metadata/cluster-metadata.tar.gz"
     7  ccp_src/scripts/setup_ssh_to_cluster.sh
     8  
     9  cat <<SCRIPT > /tmp/run_tests.bash
    10  #!/bin/bash
    11  
    12  source env.sh
    13  # set format for logging
    14  export TIMEFORMAT="TEST RUNTIME: %E"
    15  export RESULTS_LOG_FILE=${RESULTS_LOG_FILE}
    16  
    17  # set parameters for reference time DB
    18  export RESULTS_DATABASE_HOST=${RESULTS_DATABASE_HOST}
    19  export RESULTS_DATABASE_USER=${RESULTS_DATABASE_USER}
    20  export RESULTS_DATABASE_NAME=${RESULTS_DATABASE_NAME}
    21  export RESULTS_DATABASE_PASSWORD=${RESULTS_DATABASE_PASSWORD}
    22  
    23  # capture installed versions for later storage in run stats
    24  gpstart --version > /home/gpadmin/gpversion.txt
    25  gpbackup --version > /home/gpadmin/gpbversion.txt
    26  export GPDB_VERSION=\$(cat /home/gpadmin/gpversion.txt)
    27  export GPB_VERSION=\$(cat /home/gpadmin/gpbversion.txt)
    28  
    29  echo "## Capturing row counts for comparison ##"
    30  psql -d scaletestdb -f /home/gpadmin/pull_rowcount.sql -o /home/gpadmin/rowcounts_orig.txt
    31  
    32  #####################################################################
    33  ##################################################################### 
    34  echo "## Performing single-data-file, --no-compression, --copy-queue-size 8 backup test ##"
    35  # BACKUP
    36  rm -f $RESULTS_LOG_FILE
    37  (time gpbackup --dbname scaletestdb --include-schema big --backup-dir /data/gpdata/ --single-data-file --no-compression --copy-queue-size 8) > $RESULTS_LOG_FILE 2>&1
    38  timestamp=\$(head -10 "\$RESULTS_LOG_FILE" | grep "Backup Timestamp " | grep -Eo "[[:digit:]]{14}")
    39  echo "gpb_single_data_file_copy_q8 timestamp backed up: \$timestamp"
    40  
    41  # conduct runtime analysis
    42  python /home/gpadmin/analyze_run.py gpb_single_data_file_copy_q8
    43  #####################################################################
    44  
    45  #####################################################################
    46  echo "## Performing single-data-file, --no-compression, --copy-queue-size 8 restore test ##"
    47  # RESTORE
    48  rm -f $RESULTS_LOG_FILE
    49  (time gprestore --timestamp "\$timestamp" --include-schema big --backup-dir /data/gpdata/ --create-db --redirect-db copyqueuerestore8 --copy-queue-size 8) > $RESULTS_LOG_FILE 2>&1
    50  echo "gpr_single_data_file_copy_q8 timestamp restored: \$timestamp"
    51  
    52  # compare round-trip row counts
    53  psql -d copyqueuerestore8 -f /home/gpadmin/pull_rowcount.sql -o /home/gpadmin/rowcounts_gpr_single_data_file_copy_q8.txt
    54  ROWCOUNTS_DIFF=\$(diff -w /home/gpadmin/rowcounts_orig.txt /home/gpadmin/rowcounts_gpr_single_data_file_copy_q8.txt)
    55  if [ "\$ROWCOUNTS_DIFF" != "" ] 
    56  then
    57    echo "Failed result from gpr_single_data_file_copy_q8 -- mismatched row counts.  Exiting early with failure code."
    58    exit 1
    59  fi
    60  
    61  # conduct runtime analysis
    62  python /home/gpadmin/analyze_run.py gpr_single_data_file_copy_q8
    63  
    64  # clean out redirected database before proceeding further
    65  yes y | gpbackup_manager delete-backup "\$timestamp"
    66  dropdb copyqueuerestore8
    67  #####################################################################
    68  #####################################################################
    69  
    70  #####################################################################
    71  ##################################################################### 
    72  echo "## Performing backup for data scale test ##"
    73  # BACKUP
    74  rm -f $RESULTS_LOG_FILE
    75  (time gpbackup --dbname scaletestdb --include-schema big --backup-dir /data/gpdata/) > $RESULTS_LOG_FILE 2>&1
    76  timestamp=\$(head -10 "\$RESULTS_LOG_FILE" | grep "Backup Timestamp " | grep -Eo "[[:digit:]]{14}")
    77  echo "gpb_scale_multi_data_file timestamp backed up: \$timestamp"
    78  
    79  # conduct runtime analysis
    80  python /home/gpadmin/analyze_run.py gpb_scale_multi_data_file
    81  #####################################################################
    82  
    83  #####################################################################
    84  echo "## Performing restore for data scale test ##"
    85  # RESTORE
    86  rm -f $RESULTS_LOG_FILE
    87  (time gprestore --timestamp "\$timestamp" --include-schema big --backup-dir /data/gpdata/ --create-db --redirect-db scalemultifile --jobs=4) > $RESULTS_LOG_FILE 2>&1
    88  echo "gpr_scale_multi_data_file timestamp restored: \$timestamp"
    89  
    90  # compare round-trip row counts
    91  psql -d scalemultifile -f /home/gpadmin/pull_rowcount.sql -o /home/gpadmin/rowcounts_gpr_scale_multi_data_file.txt
    92  ROWCOUNTS_DIFF=\$(diff -w /home/gpadmin/rowcounts_orig.txt /home/gpadmin/rowcounts_gpr_scale_multi_data_file.txt)
    93  if [ "\$ROWCOUNTS_DIFF" != "" ] 
    94  then
    95    echo "Failed result from gpr_scale_multi_data_file -- mismatched row counts.  Exiting early with failure code."
    96    exit 1
    97  fi
    98  
    99  # conduct runtime analysis
   100  python /home/gpadmin/analyze_run.py gpr_scale_multi_data_file
   101  
   102  # clean out redirected database before proceeding further
   103  yes y | gpbackup_manager delete-backup "\$timestamp"
   104  dropdb scalemultifile
   105  #####################################################################
   106  #####################################################################
   107  
   108  #####################################################################
   109  ##################################################################### 
   110  echo "## Performing backup for data scale test with zstd ##"
   111  # BACKUP
   112  rm -f $RESULTS_LOG_FILE
   113  (time gpbackup --dbname scaletestdb --include-schema big --backup-dir /data/gpdata/ --compression-type zstd) > $RESULTS_LOG_FILE 2>&1
   114  timestamp=\$(head -10 "\$RESULTS_LOG_FILE" | grep "Backup Timestamp " | grep -Eo "[[:digit:]]{14}")
   115  echo "gpb_scale_multi_data_file_zstd timestamp backed up: \$timestamp"
   116  
   117  # conduct runtime analysis
   118  python /home/gpadmin/analyze_run.py gpb_scale_multi_data_file_zstd
   119  #####################################################################
   120  
   121  #####################################################################
   122  echo "## Performing restore for data scale test with zstd ##"
   123  # RESTORE
   124  rm -f $RESULTS_LOG_FILE
   125  (time gprestore --timestamp "\$timestamp" --include-schema big --backup-dir /data/gpdata/ --create-db --redirect-db scalemultifilezstd --jobs=4) > $RESULTS_LOG_FILE 2>&1
   126  echo "gpr_scale_multi_data_file_zstd timestamp restored: \$timestamp"
   127  
   128  # compare round-trip row counts
   129  psql -d scalemultifilezstd -f /home/gpadmin/pull_rowcount.sql -o /home/gpadmin/rowcounts_gpr_scale_multi_data_file_zstd.txt
   130  ROWCOUNTS_DIFF=\$(diff -w /home/gpadmin/rowcounts_orig.txt /home/gpadmin/rowcounts_gpr_scale_multi_data_file_zstd.txt)
   131  if [ "\$ROWCOUNTS_DIFF" != "" ] 
   132  then
   133    echo "Failed result from gpr_scale_multi_data_file_zstd -- mismatched row counts.  Exiting early with failure code."
   134    exit 1
   135  fi
   136  
   137  # conduct runtime analysis
   138  python /home/gpadmin/analyze_run.py gpr_scale_multi_data_file_zstd
   139  
   140  # clean out redirected database before proceeding further
   141  yes y | gpbackup_manager delete-backup "\$timestamp"
   142  dropdb scalemultifilezstd
   143  #####################################################################
   144  #####################################################################
   145  
   146  #####################################################################
   147  ##################################################################### 
   148  echo "## Performing single-data-file backup for data scale test ##"
   149  # BACKUP
   150  rm -f $RESULTS_LOG_FILE
   151  (time gpbackup --dbname scaletestdb --include-schema big --backup-dir /data/gpdata/ --single-data-file) > $RESULTS_LOG_FILE 2>&1
   152  timestamp=\$(head -10 "\$RESULTS_LOG_FILE" | grep "Backup Timestamp " | grep -Eo "[[:digit:]]{14}")
   153  echo "gpb_scale_single_data_file timestamp backed up: \$timestamp"
   154  
   155  # conduct runtime analysis
   156  python /home/gpadmin/analyze_run.py gpb_scale_single_data_file
   157  #####################################################################
   158  
   159  #####################################################################
   160  echo "## Performing single-data-file restore for data scale test ##"
   161  # RESTORE
   162  rm -f $RESULTS_LOG_FILE
   163  (time gprestore --timestamp "\$timestamp" --include-schema big --backup-dir /data/gpdata/ --create-db --redirect-db scalesinglefile) > $RESULTS_LOG_FILE 2>&1
   164  echo "gpr_scale_single_data_file timestamp restored: \$timestamp"
   165  
   166  # compare round-trip row counts
   167  psql -d scalesinglefile -f /home/gpadmin/pull_rowcount.sql -o /home/gpadmin/rowcounts_gpr_scale_single_data_file.txt
   168  ROWCOUNTS_DIFF=\$(diff -w /home/gpadmin/rowcounts_orig.txt /home/gpadmin/rowcounts_gpr_scale_single_data_file.txt)
   169  if [ "\$ROWCOUNTS_DIFF" != "" ] 
   170  then
   171    echo "Failed result from gpr_scale_single_data_file -- mismatched row counts.  Exiting early with failure code."
   172    exit 1
   173  fi
   174  
   175  # conduct runtime analysis
   176  python /home/gpadmin/analyze_run.py gpr_scale_single_data_file
   177  
   178  # clean out redirected database before proceeding further
   179  yes y | gpbackup_manager delete-backup "\$timestamp"
   180  dropdb scalesinglefile
   181  #####################################################################
   182  #####################################################################
   183  
   184  #####################################################################
   185  ##################################################################### 
   186  echo "## Performing single-data-file backup for data scale test with zstd ##"
   187  # BACKUP
   188  rm -f $RESULTS_LOG_FILE
   189  (time gpbackup --dbname scaletestdb --include-schema big --backup-dir /data/gpdata/ --single-data-file --compression-type zstd) > $RESULTS_LOG_FILE 2>&1
   190  timestamp=\$(head -10 "\$RESULTS_LOG_FILE" | grep "Backup Timestamp " | grep -Eo "[[:digit:]]{14}")
   191  echo "gpb_scale_single_data_file_zstd timestamp backed up: \$timestamp"
   192  
   193  # conduct runtime analysis
   194  python /home/gpadmin/analyze_run.py gpb_scale_single_data_file_zstd
   195  #####################################################################
   196  
   197  #####################################################################
   198  echo "## Performing single-data-file restore for data scale test with zstd ##"
   199  # RESTORE
   200  rm -f $RESULTS_LOG_FILE
   201  (time gprestore --timestamp "\$timestamp" --include-schema big --backup-dir /data/gpdata/ --create-db --redirect-db scalesinglefilezstd) > $RESULTS_LOG_FILE 2>&1
   202  echo "gpr_scale_single_data_file_zstd timestamp restored: \$timestamp"
   203  
   204  # compare round-trip row counts
   205  psql -d scalesinglefilezstd -f /home/gpadmin/pull_rowcount.sql -o /home/gpadmin/rowcounts_gpr_scale_single_data_file_zstd.txt
   206  ROWCOUNTS_DIFF=\$(diff -w /home/gpadmin/rowcounts_orig.txt /home/gpadmin/rowcounts_gpr_scale_single_data_file_zstd.txt)
   207  if [ "\$ROWCOUNTS_DIFF" != "" ] 
   208  then
   209    echo "Failed result from gpr_scale_single_data_file_zstd -- mismatched row counts.  Exiting early with failure code."
   210    exit 1
   211  fi
   212  
   213  # conduct runtime analysis
   214  python /home/gpadmin/analyze_run.py gpr_scale_single_data_file_zstd
   215  
   216  # clean out redirected database before proceeding further
   217  yes y | gpbackup_manager delete-backup "\$timestamp"
   218  dropdb scalesinglefilezstd
   219  #####################################################################
   220  #####################################################################
   221  
   222  #####################################################################
   223  ##################################################################### 
   224  # TEST GPBACKUP UNDER VARIOUS PRESSURES
   225  #####################################################################
   226  ##################################################################### 
   227  
   228  #####################################################################
   229  #####################################################################
   230  echo "## Performing backup with moderate number of jobs while database is being edited ##"
   231  # BACKUP
   232  rm -f $RESULTS_LOG_FILE
   233  echo "RESULTS_LOG_FILE: \$RESULTS_LOG_FILE"
   234  (time gpbackup --dbname scaletestdb --include-schema big --backup-dir /data/gpdata --jobs=16 ) > \$RESULTS_LOG_FILE 2>&1 &
   235  echo "Backup initiated in the background."
   236  # check log for lock acquisition before proceeding
   237  set +e # turn off exit on error so grep doesn't halt the whole script
   238  TIMEOUT_COUNTER=0
   239  while true
   240  do
   241      sleep 1
   242      LOCKSGREP=\$(grep "Locks acquired: .* 100\.00\%" \$RESULTS_LOG_FILE)
   243      if [ "\$LOCKSGREP" != "" ]; then
   244          echo "All locks acquired.  Proceeding with ETL job."
   245          break
   246      fi
   247  
   248      if ((\$TIMEOUT_COUNTER > 100)); then
   249          echo "Test timed out waiting for lock acquisition"
   250          exit 1
   251      fi
   252      echo "\$TIMEOUT_COUNTER"
   253      ((TIMEOUT_COUNTER=\$TIMEOUT_COUNTER+1))
   254  done
   255  
   256  # begin ETL job
   257  psql -d scaletestdb -f /home/gpadmin/etl_job.sql > /dev/null
   258  
   259  # check log for backup completion before proceeding
   260  TIMEOUT_COUNTER=0
   261  while true
   262  do
   263      sleep 1
   264      COMPGREP=\$(grep "Backup completed successfully" \$RESULTS_LOG_FILE)
   265      if [ "\$COMPGREP" != "" ]; then
   266          break
   267      fi
   268  
   269      if ((\$TIMEOUT_COUNTER > 10000)); then
   270          echo "Test timed out waiting for backup completion"
   271          exit 1
   272      fi
   273      ((TIMEOUT_COUNTER=\$TIMEOUT_COUNTER+1))
   274  done
   275  set -e
   276  
   277  timestamp=\$(head -10 "\$RESULTS_LOG_FILE" | grep "Backup Timestamp " | grep -Eo "[[:digit:]]{14}")
   278  echo "gpb_distr_snap_edit_data timestamp backed up: \$timestamp"
   279  
   280  # conduct runtime analysis
   281  python /home/gpadmin/analyze_run.py gpb_distr_snap_edit_data
   282  #####################################################################
   283  
   284  #####################################################################
   285  echo "## Performing restore with moderate number of jobs on backup done while database is edited ##"
   286  # RESTORE
   287  rm -f $RESULTS_LOG_FILE
   288  dropdb scaletestdb
   289  (time gprestore --timestamp "\$timestamp" --include-schema big --backup-dir /data/gpdata --create-db --redirect-db newscaletestdb --jobs=16) > \$RESULTS_LOG_FILE 2>&1
   290  echo "gpr_distr_snap_edit_data timestamp restored: \$timestamp"
   291  
   292  # compare round-trip row counts
   293  psql -d newscaletestdb -f /home/gpadmin/pull_rowcount.sql -o /home/gpadmin/rowcounts_gpr_distr_snap_edit_data.txt
   294  ROWCOUNTS_DIFF=\$(diff -w /home/gpadmin/rowcounts_orig.txt /home/gpadmin/rowcounts_gpr_distr_snap_edit_data.txt)
   295  if [ "\$ROWCOUNTS_DIFF" != "" ] 
   296  then
   297    echo "Failed result from gpr_distr_snap_edit_data -- mismatched row counts.  Exiting early with failure code."
   298    exit 1
   299  fi
   300  
   301  # conduct runtime analysis
   302  python /home/gpadmin/analyze_run.py gpr_distr_snap_edit_data
   303  
   304  # clean out redirected database before proceeding further
   305  yes y | gpbackup_manager delete-backup "\$timestamp"
   306  #####################################################################
   307  #####################################################################
   308  
   309  #####################################################################
   310  ##################################################################### 
   311  echo "## Performing backup with high number of jobs on cluster with high-concurrency load ##"
   312  # BACKUP
   313  rm -f $RESULTS_LOG_FILE
   314  (time gpbackup --dbname newscaletestdb --include-schema big --backup-dir /data/gpdata --jobs=32 ) > \$RESULTS_LOG_FILE 2>&1 &
   315  # check log for lock acquisition before proceeding
   316  set +e set +e # turn off exit on error so grep doesn't halt the whole script
   317  TIMEOUT_COUNTER=0
   318  while true
   319  do
   320      sleep 1
   321      LOCKSGREP=\$(grep "Locks acquired: .* 100\.00\%" \$RESULTS_LOG_FILE)
   322      if [ "\$LOCKSGREP" != "" ]; then
   323          echo "All locks acquired.  Proceeding with data load"
   324          break
   325      fi
   326  
   327      if ((\$TIMEOUT_COUNTER > 100)); then
   328          echo "Test timed out waiting for lock acquisition"
   329          exit 1
   330      fi
   331      ((TIMEOUT_COUNTER=\$TIMEOUT_COUNTER+1))
   332  done
   333  
   334  # load data into a separate database to apply high concurrent load to cluster
   335  createdb scaletestdb
   336  psql -d scaletestdb -q -f scaletestdb_bigschema_ddl.sql
   337  gpload -f /home/gpadmin/gpload_yaml/lineitem.yml
   338  gpload -f /home/gpadmin/gpload_yaml/orders_3.yml
   339  
   340  # check log for backup completion before proceeding
   341  TIMEOUT_COUNTER=0
   342  while true
   343  do
   344      sleep 1
   345      COMPGREP=\$(grep "Backup completed successfully" \$RESULTS_LOG_FILE)
   346      if [ "\$COMPGREP" != "" ]; then
   347          break
   348      fi
   349  
   350      if ((\$TIMEOUT_COUNTER > 10000)); then
   351          echo "Test timed out waiting for backup completion"
   352          exit 1
   353      fi
   354      ((TIMEOUT_COUNTER=\$TIMEOUT_COUNTER+1))
   355  done
   356  set -e
   357  
   358  timestamp=\$(head -10 "\$RESULTS_LOG_FILE" | grep "Backup Timestamp " | grep -Eo "[[:digit:]]{14}")
   359  echo "gpb_distr_snap_high_conc timestamp backed up: \$timestamp"
   360  
   361  # conduct runtime analysis
   362  python /home/gpadmin/analyze_run.py gpb_distr_snap_high_conc
   363  #####################################################################
   364  
   365  #####################################################################
   366  echo "## Performing restore with high number of jobs on backup done while cluster had high-concurrency load ##"
   367  # RESTORE
   368  rm -f $RESULTS_LOG_FILE
   369  dropdb scaletestdb
   370  (time gprestore --timestamp "\$timestamp" --include-schema big --backup-dir /data/gpdata --create-db --redirect-db scaletestdb --jobs=32) > \$RESULTS_LOG_FILE 2>&1
   371  echo "gpr_distr_snap_high_conc timestamp restored: \$timestamp"
   372  
   373  # compare round-trip row counts
   374  psql -d scaletestdb -f /home/gpadmin/pull_rowcount.sql -o /home/gpadmin/rowcounts_gpr_distr_snap_high_conc.txt
   375  ROWCOUNTS_DIFF=\$(diff -w /home/gpadmin/rowcounts_orig.txt /home/gpadmin/rowcounts_gpr_distr_snap_high_conc.txt)
   376  if [ "\$ROWCOUNTS_DIFF" != "" ] 
   377  then
   378    echo "Failed result from gpb_distr_snap_high_conc -- mismatched row counts.  Exiting early with failure code."
   379    exit 1
   380  fi
   381  
   382  # conduct runtime analysis
   383  python /home/gpadmin/analyze_run.py gpr_distr_snap_high_conc
   384  
   385  # clean out redirected database before proceeding further
   386  yes y | gpbackup_manager delete-backup "\$timestamp"
   387  dropdb newscaletestdb
   388  #####################################################################
   389  #####################################################################
   390  
   391  #####################################################################
   392  ##################################################################### 
   393  # METADATA-ONLY FROM HERE ON
   394  echo "## Loading wide schema for metadata tests"
   395  psql -d scaletestdb -q -f scaletestdb_wideschema_ddl.sql
   396  #####################################################################
   397  ##################################################################### 
   398  
   399  #####################################################################
   400  ##################################################################### 
   401  echo "## Performing first backup with metadata-only ##"
   402  # BACKUP
   403  rm -f $RESULTS_LOG_FILE
   404  (time gpbackup --dbname scaletestdb --include-schema wide --backup-dir /data/gpdata/ --metadata-only --verbose) > $RESULTS_LOG_FILE 2>&1
   405  timestamp=\$(head -10 "\$RESULTS_LOG_FILE" | grep "Backup Timestamp " | grep -Eo "[[:digit:]]{14}")
   406  echo "gpb_scale_metadata timestamp backed up: \$timestamp"
   407  test_metadata=\$(find /data/gpdata/ -name *\$timestamp*_metadata.sql)
   408  
   409  METADATA_DIFF=\$(diff -w /home/gpadmin/valid_metadata.sql \$test_metadata)
   410  echo "got past metadata diff"
   411  if [ "\$METADATA_DIFF" != "" ] 
   412  then
   413    echo "Failed result from gpb_scale_metadata -- mismatched metadata output.  Exiting early with failure code."
   414    exit 1
   415  fi
   416  
   417  # conduct runtime analysis
   418  python /home/gpadmin/analyze_run.py gpb_scale_metadata
   419  #####################################################################
   420  
   421  #####################################################################
   422  echo "## Performing restore on metadata-only ##"
   423  # RESTORE
   424  rm -f $RESULTS_LOG_FILE
   425  dropdb scaletestdb
   426  (time gprestore --timestamp "\$timestamp" --include-schema wide --backup-dir /data/gpdata/ --create-db --redirect-db scaletestdb) > $RESULTS_LOG_FILE 2>&1
   427  echo "gpr_scale_metadata timestamp restored: \$timestamp"
   428  
   429  echo "## Performing second backup with metadata-only ##"
   430  rm -f $RESULTS_LOG_FILE
   431  (time gpbackup --dbname scaletestdb --include-schema wide --backup-dir /data/gpdata/ --metadata-only --verbose) > $RESULTS_LOG_FILE 2>&1
   432  timestamp=\$(head -10 "\$RESULTS_LOG_FILE" | grep "Backup Timestamp " | grep -Eo "[[:digit:]]{14}")
   433  test_metadata=\$(find /data/gpdata/ -name *\$timestamp*_metadata.sql)
   434  
   435  METADATA_DIFF=\$(diff -w /home/gpadmin/valid_metadata.sql \$test_metadata)
   436  if [ "\$METADATA_DIFF" != "" ] 
   437  then
   438    echo "Failed result from gpr_scale_metadata -- mismatched metadata output.  Exiting early with failure code."
   439    exit 1
   440  fi
   441  
   442  # conduct runtime analysis
   443  python /home/gpadmin/analyze_run.py gpr_scale_metadata
   444  
   445  # clean out redirected database before proceeding further
   446  yes y | gpbackup_manager delete-backup "\$timestamp"
   447  #####################################################################
   448  #####################################################################
   449  
   450  SCRIPT
   451  
   452  chmod +x /tmp/run_tests.bash
   453  scp /tmp/run_tests.bash cdw:/home/gpadmin/run_tests.bash
   454  ssh -t cdw "/home/gpadmin/run_tests.bash"