github.com/greenplum-db/gpbackup@v0.0.0-20240517212602-89daab1885b3/ci/scripts/scale-perf-tests.bash (about) 1 #!/bin/bash 2 3 set -ex 4 5 # retrieve cluster set up by previous job, and set up SSH to it 6 tar -xvf "cluster-metadata/cluster-metadata.tar.gz" 7 ccp_src/scripts/setup_ssh_to_cluster.sh 8 9 cat <<SCRIPT > /tmp/run_tests.bash 10 #!/bin/bash 11 12 source env.sh 13 # set format for logging 14 export TIMEFORMAT="TEST RUNTIME: %E" 15 export RESULTS_LOG_FILE=${RESULTS_LOG_FILE} 16 17 # set parameters for reference time DB 18 export RESULTS_DATABASE_HOST=${RESULTS_DATABASE_HOST} 19 export RESULTS_DATABASE_USER=${RESULTS_DATABASE_USER} 20 export RESULTS_DATABASE_NAME=${RESULTS_DATABASE_NAME} 21 export RESULTS_DATABASE_PASSWORD=${RESULTS_DATABASE_PASSWORD} 22 23 # capture installed versions for later storage in run stats 24 gpstart --version > /home/gpadmin/gpversion.txt 25 gpbackup --version > /home/gpadmin/gpbversion.txt 26 export GPDB_VERSION=\$(cat /home/gpadmin/gpversion.txt) 27 export GPB_VERSION=\$(cat /home/gpadmin/gpbversion.txt) 28 29 echo "## Capturing row counts for comparison ##" 30 psql -d scaletestdb -f /home/gpadmin/pull_rowcount.sql -o /home/gpadmin/rowcounts_orig.txt 31 32 ##################################################################### 33 ##################################################################### 34 echo "## Performing single-data-file, --no-compression, --copy-queue-size 8 backup test ##" 35 # BACKUP 36 rm -f $RESULTS_LOG_FILE 37 (time gpbackup --dbname scaletestdb --include-schema big --backup-dir /data/gpdata/ --single-data-file --no-compression --copy-queue-size 8) > $RESULTS_LOG_FILE 2>&1 38 timestamp=\$(head -10 "\$RESULTS_LOG_FILE" | grep "Backup Timestamp " | grep -Eo "[[:digit:]]{14}") 39 echo "gpb_single_data_file_copy_q8 timestamp backed up: \$timestamp" 40 41 # conduct runtime analysis 42 python /home/gpadmin/analyze_run.py gpb_single_data_file_copy_q8 43 ##################################################################### 44 45 ##################################################################### 46 echo "## Performing single-data-file, --no-compression, --copy-queue-size 8 restore test ##" 47 # RESTORE 48 rm -f $RESULTS_LOG_FILE 49 (time gprestore --timestamp "\$timestamp" --include-schema big --backup-dir /data/gpdata/ --create-db --redirect-db copyqueuerestore8 --copy-queue-size 8) > $RESULTS_LOG_FILE 2>&1 50 echo "gpr_single_data_file_copy_q8 timestamp restored: \$timestamp" 51 52 # compare round-trip row counts 53 psql -d copyqueuerestore8 -f /home/gpadmin/pull_rowcount.sql -o /home/gpadmin/rowcounts_gpr_single_data_file_copy_q8.txt 54 ROWCOUNTS_DIFF=\$(diff -w /home/gpadmin/rowcounts_orig.txt /home/gpadmin/rowcounts_gpr_single_data_file_copy_q8.txt) 55 if [ "\$ROWCOUNTS_DIFF" != "" ] 56 then 57 echo "Failed result from gpr_single_data_file_copy_q8 -- mismatched row counts. Exiting early with failure code." 58 exit 1 59 fi 60 61 # conduct runtime analysis 62 python /home/gpadmin/analyze_run.py gpr_single_data_file_copy_q8 63 64 # clean out redirected database before proceeding further 65 yes y | gpbackup_manager delete-backup "\$timestamp" 66 dropdb copyqueuerestore8 67 ##################################################################### 68 ##################################################################### 69 70 ##################################################################### 71 ##################################################################### 72 echo "## Performing backup for data scale test ##" 73 # BACKUP 74 rm -f $RESULTS_LOG_FILE 75 (time gpbackup --dbname scaletestdb --include-schema big --backup-dir /data/gpdata/) > $RESULTS_LOG_FILE 2>&1 76 timestamp=\$(head -10 "\$RESULTS_LOG_FILE" | grep "Backup Timestamp " | grep -Eo "[[:digit:]]{14}") 77 echo "gpb_scale_multi_data_file timestamp backed up: \$timestamp" 78 79 # conduct runtime analysis 80 python /home/gpadmin/analyze_run.py gpb_scale_multi_data_file 81 ##################################################################### 82 83 ##################################################################### 84 echo "## Performing restore for data scale test ##" 85 # RESTORE 86 rm -f $RESULTS_LOG_FILE 87 (time gprestore --timestamp "\$timestamp" --include-schema big --backup-dir /data/gpdata/ --create-db --redirect-db scalemultifile --jobs=4) > $RESULTS_LOG_FILE 2>&1 88 echo "gpr_scale_multi_data_file timestamp restored: \$timestamp" 89 90 # compare round-trip row counts 91 psql -d scalemultifile -f /home/gpadmin/pull_rowcount.sql -o /home/gpadmin/rowcounts_gpr_scale_multi_data_file.txt 92 ROWCOUNTS_DIFF=\$(diff -w /home/gpadmin/rowcounts_orig.txt /home/gpadmin/rowcounts_gpr_scale_multi_data_file.txt) 93 if [ "\$ROWCOUNTS_DIFF" != "" ] 94 then 95 echo "Failed result from gpr_scale_multi_data_file -- mismatched row counts. Exiting early with failure code." 96 exit 1 97 fi 98 99 # conduct runtime analysis 100 python /home/gpadmin/analyze_run.py gpr_scale_multi_data_file 101 102 # clean out redirected database before proceeding further 103 yes y | gpbackup_manager delete-backup "\$timestamp" 104 dropdb scalemultifile 105 ##################################################################### 106 ##################################################################### 107 108 ##################################################################### 109 ##################################################################### 110 echo "## Performing backup for data scale test with zstd ##" 111 # BACKUP 112 rm -f $RESULTS_LOG_FILE 113 (time gpbackup --dbname scaletestdb --include-schema big --backup-dir /data/gpdata/ --compression-type zstd) > $RESULTS_LOG_FILE 2>&1 114 timestamp=\$(head -10 "\$RESULTS_LOG_FILE" | grep "Backup Timestamp " | grep -Eo "[[:digit:]]{14}") 115 echo "gpb_scale_multi_data_file_zstd timestamp backed up: \$timestamp" 116 117 # conduct runtime analysis 118 python /home/gpadmin/analyze_run.py gpb_scale_multi_data_file_zstd 119 ##################################################################### 120 121 ##################################################################### 122 echo "## Performing restore for data scale test with zstd ##" 123 # RESTORE 124 rm -f $RESULTS_LOG_FILE 125 (time gprestore --timestamp "\$timestamp" --include-schema big --backup-dir /data/gpdata/ --create-db --redirect-db scalemultifilezstd --jobs=4) > $RESULTS_LOG_FILE 2>&1 126 echo "gpr_scale_multi_data_file_zstd timestamp restored: \$timestamp" 127 128 # compare round-trip row counts 129 psql -d scalemultifilezstd -f /home/gpadmin/pull_rowcount.sql -o /home/gpadmin/rowcounts_gpr_scale_multi_data_file_zstd.txt 130 ROWCOUNTS_DIFF=\$(diff -w /home/gpadmin/rowcounts_orig.txt /home/gpadmin/rowcounts_gpr_scale_multi_data_file_zstd.txt) 131 if [ "\$ROWCOUNTS_DIFF" != "" ] 132 then 133 echo "Failed result from gpr_scale_multi_data_file_zstd -- mismatched row counts. Exiting early with failure code." 134 exit 1 135 fi 136 137 # conduct runtime analysis 138 python /home/gpadmin/analyze_run.py gpr_scale_multi_data_file_zstd 139 140 # clean out redirected database before proceeding further 141 yes y | gpbackup_manager delete-backup "\$timestamp" 142 dropdb scalemultifilezstd 143 ##################################################################### 144 ##################################################################### 145 146 ##################################################################### 147 ##################################################################### 148 echo "## Performing single-data-file backup for data scale test ##" 149 # BACKUP 150 rm -f $RESULTS_LOG_FILE 151 (time gpbackup --dbname scaletestdb --include-schema big --backup-dir /data/gpdata/ --single-data-file) > $RESULTS_LOG_FILE 2>&1 152 timestamp=\$(head -10 "\$RESULTS_LOG_FILE" | grep "Backup Timestamp " | grep -Eo "[[:digit:]]{14}") 153 echo "gpb_scale_single_data_file timestamp backed up: \$timestamp" 154 155 # conduct runtime analysis 156 python /home/gpadmin/analyze_run.py gpb_scale_single_data_file 157 ##################################################################### 158 159 ##################################################################### 160 echo "## Performing single-data-file restore for data scale test ##" 161 # RESTORE 162 rm -f $RESULTS_LOG_FILE 163 (time gprestore --timestamp "\$timestamp" --include-schema big --backup-dir /data/gpdata/ --create-db --redirect-db scalesinglefile) > $RESULTS_LOG_FILE 2>&1 164 echo "gpr_scale_single_data_file timestamp restored: \$timestamp" 165 166 # compare round-trip row counts 167 psql -d scalesinglefile -f /home/gpadmin/pull_rowcount.sql -o /home/gpadmin/rowcounts_gpr_scale_single_data_file.txt 168 ROWCOUNTS_DIFF=\$(diff -w /home/gpadmin/rowcounts_orig.txt /home/gpadmin/rowcounts_gpr_scale_single_data_file.txt) 169 if [ "\$ROWCOUNTS_DIFF" != "" ] 170 then 171 echo "Failed result from gpr_scale_single_data_file -- mismatched row counts. Exiting early with failure code." 172 exit 1 173 fi 174 175 # conduct runtime analysis 176 python /home/gpadmin/analyze_run.py gpr_scale_single_data_file 177 178 # clean out redirected database before proceeding further 179 yes y | gpbackup_manager delete-backup "\$timestamp" 180 dropdb scalesinglefile 181 ##################################################################### 182 ##################################################################### 183 184 ##################################################################### 185 ##################################################################### 186 echo "## Performing single-data-file backup for data scale test with zstd ##" 187 # BACKUP 188 rm -f $RESULTS_LOG_FILE 189 (time gpbackup --dbname scaletestdb --include-schema big --backup-dir /data/gpdata/ --single-data-file --compression-type zstd) > $RESULTS_LOG_FILE 2>&1 190 timestamp=\$(head -10 "\$RESULTS_LOG_FILE" | grep "Backup Timestamp " | grep -Eo "[[:digit:]]{14}") 191 echo "gpb_scale_single_data_file_zstd timestamp backed up: \$timestamp" 192 193 # conduct runtime analysis 194 python /home/gpadmin/analyze_run.py gpb_scale_single_data_file_zstd 195 ##################################################################### 196 197 ##################################################################### 198 echo "## Performing single-data-file restore for data scale test with zstd ##" 199 # RESTORE 200 rm -f $RESULTS_LOG_FILE 201 (time gprestore --timestamp "\$timestamp" --include-schema big --backup-dir /data/gpdata/ --create-db --redirect-db scalesinglefilezstd) > $RESULTS_LOG_FILE 2>&1 202 echo "gpr_scale_single_data_file_zstd timestamp restored: \$timestamp" 203 204 # compare round-trip row counts 205 psql -d scalesinglefilezstd -f /home/gpadmin/pull_rowcount.sql -o /home/gpadmin/rowcounts_gpr_scale_single_data_file_zstd.txt 206 ROWCOUNTS_DIFF=\$(diff -w /home/gpadmin/rowcounts_orig.txt /home/gpadmin/rowcounts_gpr_scale_single_data_file_zstd.txt) 207 if [ "\$ROWCOUNTS_DIFF" != "" ] 208 then 209 echo "Failed result from gpr_scale_single_data_file_zstd -- mismatched row counts. Exiting early with failure code." 210 exit 1 211 fi 212 213 # conduct runtime analysis 214 python /home/gpadmin/analyze_run.py gpr_scale_single_data_file_zstd 215 216 # clean out redirected database before proceeding further 217 yes y | gpbackup_manager delete-backup "\$timestamp" 218 dropdb scalesinglefilezstd 219 ##################################################################### 220 ##################################################################### 221 222 ##################################################################### 223 ##################################################################### 224 # TEST GPBACKUP UNDER VARIOUS PRESSURES 225 ##################################################################### 226 ##################################################################### 227 228 ##################################################################### 229 ##################################################################### 230 echo "## Performing backup with moderate number of jobs while database is being edited ##" 231 # BACKUP 232 rm -f $RESULTS_LOG_FILE 233 echo "RESULTS_LOG_FILE: \$RESULTS_LOG_FILE" 234 (time gpbackup --dbname scaletestdb --include-schema big --backup-dir /data/gpdata --jobs=16 ) > \$RESULTS_LOG_FILE 2>&1 & 235 echo "Backup initiated in the background." 236 # check log for lock acquisition before proceeding 237 set +e # turn off exit on error so grep doesn't halt the whole script 238 TIMEOUT_COUNTER=0 239 while true 240 do 241 sleep 1 242 LOCKSGREP=\$(grep "Locks acquired: .* 100\.00\%" \$RESULTS_LOG_FILE) 243 if [ "\$LOCKSGREP" != "" ]; then 244 echo "All locks acquired. Proceeding with ETL job." 245 break 246 fi 247 248 if ((\$TIMEOUT_COUNTER > 100)); then 249 echo "Test timed out waiting for lock acquisition" 250 exit 1 251 fi 252 echo "\$TIMEOUT_COUNTER" 253 ((TIMEOUT_COUNTER=\$TIMEOUT_COUNTER+1)) 254 done 255 256 # begin ETL job 257 psql -d scaletestdb -f /home/gpadmin/etl_job.sql > /dev/null 258 259 # check log for backup completion before proceeding 260 TIMEOUT_COUNTER=0 261 while true 262 do 263 sleep 1 264 COMPGREP=\$(grep "Backup completed successfully" \$RESULTS_LOG_FILE) 265 if [ "\$COMPGREP" != "" ]; then 266 break 267 fi 268 269 if ((\$TIMEOUT_COUNTER > 10000)); then 270 echo "Test timed out waiting for backup completion" 271 exit 1 272 fi 273 ((TIMEOUT_COUNTER=\$TIMEOUT_COUNTER+1)) 274 done 275 set -e 276 277 timestamp=\$(head -10 "\$RESULTS_LOG_FILE" | grep "Backup Timestamp " | grep -Eo "[[:digit:]]{14}") 278 echo "gpb_distr_snap_edit_data timestamp backed up: \$timestamp" 279 280 # conduct runtime analysis 281 python /home/gpadmin/analyze_run.py gpb_distr_snap_edit_data 282 ##################################################################### 283 284 ##################################################################### 285 echo "## Performing restore with moderate number of jobs on backup done while database is edited ##" 286 # RESTORE 287 rm -f $RESULTS_LOG_FILE 288 dropdb scaletestdb 289 (time gprestore --timestamp "\$timestamp" --include-schema big --backup-dir /data/gpdata --create-db --redirect-db newscaletestdb --jobs=16) > \$RESULTS_LOG_FILE 2>&1 290 echo "gpr_distr_snap_edit_data timestamp restored: \$timestamp" 291 292 # compare round-trip row counts 293 psql -d newscaletestdb -f /home/gpadmin/pull_rowcount.sql -o /home/gpadmin/rowcounts_gpr_distr_snap_edit_data.txt 294 ROWCOUNTS_DIFF=\$(diff -w /home/gpadmin/rowcounts_orig.txt /home/gpadmin/rowcounts_gpr_distr_snap_edit_data.txt) 295 if [ "\$ROWCOUNTS_DIFF" != "" ] 296 then 297 echo "Failed result from gpr_distr_snap_edit_data -- mismatched row counts. Exiting early with failure code." 298 exit 1 299 fi 300 301 # conduct runtime analysis 302 python /home/gpadmin/analyze_run.py gpr_distr_snap_edit_data 303 304 # clean out redirected database before proceeding further 305 yes y | gpbackup_manager delete-backup "\$timestamp" 306 ##################################################################### 307 ##################################################################### 308 309 ##################################################################### 310 ##################################################################### 311 echo "## Performing backup with high number of jobs on cluster with high-concurrency load ##" 312 # BACKUP 313 rm -f $RESULTS_LOG_FILE 314 (time gpbackup --dbname newscaletestdb --include-schema big --backup-dir /data/gpdata --jobs=32 ) > \$RESULTS_LOG_FILE 2>&1 & 315 # check log for lock acquisition before proceeding 316 set +e set +e # turn off exit on error so grep doesn't halt the whole script 317 TIMEOUT_COUNTER=0 318 while true 319 do 320 sleep 1 321 LOCKSGREP=\$(grep "Locks acquired: .* 100\.00\%" \$RESULTS_LOG_FILE) 322 if [ "\$LOCKSGREP" != "" ]; then 323 echo "All locks acquired. Proceeding with data load" 324 break 325 fi 326 327 if ((\$TIMEOUT_COUNTER > 100)); then 328 echo "Test timed out waiting for lock acquisition" 329 exit 1 330 fi 331 ((TIMEOUT_COUNTER=\$TIMEOUT_COUNTER+1)) 332 done 333 334 # load data into a separate database to apply high concurrent load to cluster 335 createdb scaletestdb 336 psql -d scaletestdb -q -f scaletestdb_bigschema_ddl.sql 337 gpload -f /home/gpadmin/gpload_yaml/lineitem.yml 338 gpload -f /home/gpadmin/gpload_yaml/orders_3.yml 339 340 # check log for backup completion before proceeding 341 TIMEOUT_COUNTER=0 342 while true 343 do 344 sleep 1 345 COMPGREP=\$(grep "Backup completed successfully" \$RESULTS_LOG_FILE) 346 if [ "\$COMPGREP" != "" ]; then 347 break 348 fi 349 350 if ((\$TIMEOUT_COUNTER > 10000)); then 351 echo "Test timed out waiting for backup completion" 352 exit 1 353 fi 354 ((TIMEOUT_COUNTER=\$TIMEOUT_COUNTER+1)) 355 done 356 set -e 357 358 timestamp=\$(head -10 "\$RESULTS_LOG_FILE" | grep "Backup Timestamp " | grep -Eo "[[:digit:]]{14}") 359 echo "gpb_distr_snap_high_conc timestamp backed up: \$timestamp" 360 361 # conduct runtime analysis 362 python /home/gpadmin/analyze_run.py gpb_distr_snap_high_conc 363 ##################################################################### 364 365 ##################################################################### 366 echo "## Performing restore with high number of jobs on backup done while cluster had high-concurrency load ##" 367 # RESTORE 368 rm -f $RESULTS_LOG_FILE 369 dropdb scaletestdb 370 (time gprestore --timestamp "\$timestamp" --include-schema big --backup-dir /data/gpdata --create-db --redirect-db scaletestdb --jobs=32) > \$RESULTS_LOG_FILE 2>&1 371 echo "gpr_distr_snap_high_conc timestamp restored: \$timestamp" 372 373 # compare round-trip row counts 374 psql -d scaletestdb -f /home/gpadmin/pull_rowcount.sql -o /home/gpadmin/rowcounts_gpr_distr_snap_high_conc.txt 375 ROWCOUNTS_DIFF=\$(diff -w /home/gpadmin/rowcounts_orig.txt /home/gpadmin/rowcounts_gpr_distr_snap_high_conc.txt) 376 if [ "\$ROWCOUNTS_DIFF" != "" ] 377 then 378 echo "Failed result from gpb_distr_snap_high_conc -- mismatched row counts. Exiting early with failure code." 379 exit 1 380 fi 381 382 # conduct runtime analysis 383 python /home/gpadmin/analyze_run.py gpr_distr_snap_high_conc 384 385 # clean out redirected database before proceeding further 386 yes y | gpbackup_manager delete-backup "\$timestamp" 387 dropdb newscaletestdb 388 ##################################################################### 389 ##################################################################### 390 391 ##################################################################### 392 ##################################################################### 393 # METADATA-ONLY FROM HERE ON 394 echo "## Loading wide schema for metadata tests" 395 psql -d scaletestdb -q -f scaletestdb_wideschema_ddl.sql 396 ##################################################################### 397 ##################################################################### 398 399 ##################################################################### 400 ##################################################################### 401 echo "## Performing first backup with metadata-only ##" 402 # BACKUP 403 rm -f $RESULTS_LOG_FILE 404 (time gpbackup --dbname scaletestdb --include-schema wide --backup-dir /data/gpdata/ --metadata-only --verbose) > $RESULTS_LOG_FILE 2>&1 405 timestamp=\$(head -10 "\$RESULTS_LOG_FILE" | grep "Backup Timestamp " | grep -Eo "[[:digit:]]{14}") 406 echo "gpb_scale_metadata timestamp backed up: \$timestamp" 407 test_metadata=\$(find /data/gpdata/ -name *\$timestamp*_metadata.sql) 408 409 METADATA_DIFF=\$(diff -w /home/gpadmin/valid_metadata.sql \$test_metadata) 410 echo "got past metadata diff" 411 if [ "\$METADATA_DIFF" != "" ] 412 then 413 echo "Failed result from gpb_scale_metadata -- mismatched metadata output. Exiting early with failure code." 414 exit 1 415 fi 416 417 # conduct runtime analysis 418 python /home/gpadmin/analyze_run.py gpb_scale_metadata 419 ##################################################################### 420 421 ##################################################################### 422 echo "## Performing restore on metadata-only ##" 423 # RESTORE 424 rm -f $RESULTS_LOG_FILE 425 dropdb scaletestdb 426 (time gprestore --timestamp "\$timestamp" --include-schema wide --backup-dir /data/gpdata/ --create-db --redirect-db scaletestdb) > $RESULTS_LOG_FILE 2>&1 427 echo "gpr_scale_metadata timestamp restored: \$timestamp" 428 429 echo "## Performing second backup with metadata-only ##" 430 rm -f $RESULTS_LOG_FILE 431 (time gpbackup --dbname scaletestdb --include-schema wide --backup-dir /data/gpdata/ --metadata-only --verbose) > $RESULTS_LOG_FILE 2>&1 432 timestamp=\$(head -10 "\$RESULTS_LOG_FILE" | grep "Backup Timestamp " | grep -Eo "[[:digit:]]{14}") 433 test_metadata=\$(find /data/gpdata/ -name *\$timestamp*_metadata.sql) 434 435 METADATA_DIFF=\$(diff -w /home/gpadmin/valid_metadata.sql \$test_metadata) 436 if [ "\$METADATA_DIFF" != "" ] 437 then 438 echo "Failed result from gpr_scale_metadata -- mismatched metadata output. Exiting early with failure code." 439 exit 1 440 fi 441 442 # conduct runtime analysis 443 python /home/gpadmin/analyze_run.py gpr_scale_metadata 444 445 # clean out redirected database before proceeding further 446 yes y | gpbackup_manager delete-backup "\$timestamp" 447 ##################################################################### 448 ##################################################################### 449 450 SCRIPT 451 452 chmod +x /tmp/run_tests.bash 453 scp /tmp/run_tests.bash cdw:/home/gpadmin/run_tests.bash 454 ssh -t cdw "/home/gpadmin/run_tests.bash"