github.com/greenplum-db/gpbackup@v0.0.0-20240517212602-89daab1885b3/ci/scripts/scale-tests.bash (about)

     1  #!/bin/bash
     2  
     3  set -ex
     4  
     5  # setup cluster and install gpbackup tools using gppkg
     6  ccp_src/scripts/setup_ssh_to_cluster.sh
     7  out=$(ssh -t cdw 'source env.sh && psql postgres -c "select version();"')
     8  GPDB_VERSION=$(echo ${out} | sed -n 's/.*Greenplum Database \([0-9]\).*/\1/p')
     9  mkdir -p /tmp/untarred
    10  tar -xzf gppkgs/gpbackup-gppkgs.tar.gz -C /tmp/untarred
    11  scp /tmp/untarred/gpbackup_tools*gp${GPDB_VERSION}*${OS}*.gppkg cdw:/home/gpadmin
    12  
    13  tar -xzf gppkgs/gpbackup-gppkgs.tar.gz -C /tmp/untarred
    14  
    15  if [[ -d gp-pkg ]] ; then
    16    mkdir /tmp/gppkgv2
    17    tar -xzf gp-pkg/gppkg* -C /tmp/gppkgv2
    18  
    19    # install gppkgv2 onto all segments
    20    while read -r host; do
    21      ssh -n "$host" "mkdir -p /home/gpadmin/.local/bin"
    22      scp /tmp/gppkgv2/gppkg "$host":/home/gpadmin/.local/bin
    23    done <cluster_env_files/hostfile_all
    24  fi
    25  
    26  scp cluster_env_files/hostfile_all cdw:/tmp
    27  
    28  cat <<SCRIPT > /tmp/run_tests.bash
    29  #!/bin/bash
    30  
    31  source env.sh
    32  
    33  # Double the vmem protect limit default on the coordinator segment to
    34  # prevent query cancels on large table creations (e.g. scale_db1.sql)
    35  gpconfig -c gp_vmem_protect_limit -v 16384 --masteronly
    36  gpstop -air
    37  
    38  # only install if not installed already
    39  is_installed_output=\$(source env.sh; gppkg -q gpbackup*gp*.gppkg)
    40  set +e
    41  echo \$is_installed_output | grep 'is installed'
    42  if [ \$? -ne 0 ] ; then
    43    set -e
    44    if [[ -f /home/gpadmin/.local/bin/gppkg ]] ; then
    45      # gppkg v2 is installed here
    46      gppkg install -a gpbackup*gp*.gppkg
    47    else
    48      gppkg -i gpbackup*gp*.gppkg
    49    fi
    50  fi
    51  set -e
    52  
    53  ### Data scale tests ###
    54  log_file=/tmp/gpbackup.log
    55  
    56  echo "## Populating database for copy queue test ##"
    57  createdb copyqueuedb
    58  for j in {1..20000}
    59  do
    60    psql -d copyqueuedb -q -c "CREATE TABLE tbl_1k_\$j(i int) DISTRIBUTED BY (i);"
    61    psql -d copyqueuedb -q -c "INSERT INTO tbl_1k_\$j SELECT generate_series(1,1000)"
    62  done
    63  
    64  echo "## Performing single-data-file, --no-compression, --copy-queue-size 2 backup for copy queue test ##"
    65  time gpbackup --dbname copyqueuedb --backup-dir /data/gpdata/ --single-data-file --no-compression --copy-queue-size 2 | tee "\$log_file"
    66  timestamp=\$(head -10 "\$log_file" | grep "Backup Timestamp " | grep -Eo "[[:digit:]]{14}")
    67  gpbackup_manager display-report \$timestamp
    68  
    69  echo "## Performing single-data-file, --no-compression, --copy-queue-size 4 backup for copy queue test ##"
    70  time gpbackup --dbname copyqueuedb --backup-dir /data/gpdata/ --single-data-file --no-compression --copy-queue-size 4 | tee "\$log_file"
    71  timestamp=\$(head -10 "\$log_file" | grep "Backup Timestamp " | grep -Eo "[[:digit:]]{14}")
    72  gpbackup_manager display-report \$timestamp
    73  
    74  echo "## Performing single-data-file, --no-compression, --copy-queue-size 8 backup for copy queue test ##"
    75  time gpbackup --dbname copyqueuedb --backup-dir /data/gpdata/ --single-data-file --no-compression --copy-queue-size 8 | tee "\$log_file"
    76  timestamp=\$(head -10 "\$log_file" | grep "Backup Timestamp " | grep -Eo "[[:digit:]]{14}")
    77  gpbackup_manager display-report \$timestamp
    78  
    79  echo "## Performing single-data-file, --no-compression, --copy-queue-size 2 restore for copy queue test ##"
    80  time gprestore --timestamp "\$timestamp" --backup-dir /data/gpdata/ --create-db --redirect-db copyqueuerestore2 --copy-queue-size 2
    81  
    82  echo "## Performing single-data-file, --no-compression, --copy-queue-size 8 restore for copy queue test ##"
    83  time gprestore --timestamp "\$timestamp" --backup-dir /data/gpdata/ --create-db --redirect-db copyqueuerestore8 --copy-queue-size 8
    84  
    85  echo "## Populating database for data scale test ##"
    86  createdb datascaledb
    87  for j in {1..3000}
    88  do
    89    psql -d datascaledb -q -c "CREATE TABLE tbl_1k_\$j(i int) DISTRIBUTED BY (i);"
    90    psql -d datascaledb -q -c "INSERT INTO tbl_1k_\$j SELECT generate_series(1,1000)"
    91  done
    92  psql -d datascaledb -q -c "CREATE TABLE tbl_1B(i int) DISTRIBUTED BY(i);"
    93  for j in {1..1000}
    94  do
    95    psql -d datascaledb -q -c "INSERT INTO tbl_1B SELECT generate_series(1,1000000)"
    96  done
    97  ### Create a partition table with varying amounts per partition to exercise writer polling for COPY to connect
    98  psql -d datascaledb -c "CREATE TABLE big_partition(a int, b int, c int) DISTRIBUTED BY (a) PARTITION BY RANGE (b) (START (1) END (101) EVERY (1))"
    99  psql -d datascaledb -c "INSERT INTO big_partition SELECT i, i, i FROM generate_series(1,100) i"
   100  for j in {1..20}
   101  do
   102  	psql -d datascaledb -c "INSERT INTO big_partition (SELECT * FROM big_partition)"
   103  done
   104  psql -d datascaledb -c "INSERT INTO big_partition (SELECT * FROM big_partition WHERE a % 2 = 0)"
   105  psql -d datascaledb -c "INSERT INTO big_partition (SELECT * FROM big_partition WHERE a % 2 = 0)"
   106  psql -d datascaledb -c "INSERT INTO big_partition (SELECT * FROM big_partition WHERE a % 3 = 0)"
   107  
   108  echo "## Performing backup for data scale test ##"
   109  ### Multiple data file test ###
   110  time gpbackup --dbname datascaledb --backup-dir /data/gpdata/ --leaf-partition-data | tee "\$log_file"
   111  timestamp=\$(head -10 "\$log_file" | grep "Backup Timestamp " | grep -Eo "[[:digit:]]{14}")
   112  dropdb datascaledb
   113  echo "## Performing restore for data scale test ##"
   114  time gprestore --timestamp "\$timestamp" --backup-dir /data/gpdata/ --create-db --jobs=4
   115  rm "\$log_file"
   116  
   117  echo "## Performing backup for data scale test with zstd ##"
   118  ### Multiple data file test with zstd ###
   119  time gpbackup --dbname datascaledb --backup-dir /data/gpdata/ --leaf-partition-data --compression-type zstd | tee "\$log_file"
   120  timestamp=\$(head -10 "\$log_file" | grep "Backup Timestamp " | grep -Eo "[[:digit:]]{14}")
   121  dropdb datascaledb
   122  echo "## Performing restore for data scale test with zstd ##"
   123  time gprestore --timestamp "\$timestamp" --backup-dir /data/gpdata/ --create-db --jobs=4
   124  rm "\$log_file"
   125  
   126  echo "## Performing single-data-file backup for data scale test ##"
   127  ### Single data file test ###
   128  time gpbackup --dbname datascaledb --backup-dir /data/gpdata/ --leaf-partition-data --single-data-file | tee "\$log_file"
   129  timestamp=\$(head -10 "\$log_file" | grep "Backup Timestamp " | grep -Eo "[[:digit:]]{14}")
   130  dropdb datascaledb
   131  echo "## Performing single-data-file restore for data scale test ##"
   132  time gprestore --timestamp "\$timestamp" --backup-dir /data/gpdata/  --create-db
   133  rm "\$log_file"
   134  
   135  echo "## Performing single-data-file backup for data scale test with zstd ##"
   136  ### Single data file test with zstd ###
   137  time gpbackup --dbname datascaledb --backup-dir /data/gpdata/ --leaf-partition-data --single-data-file --compression-type zstd | tee "\$log_file"
   138  timestamp=\$(head -10 "\$log_file" | grep "Backup Timestamp " | grep -Eo "[[:digit:]]{14}")
   139  dropdb datascaledb
   140  echo "## Performing single-data-file restore for data scale test with zstd ##"
   141  time gprestore --timestamp "\$timestamp" --backup-dir /data/gpdata/  --create-db
   142  dropdb datascaledb
   143  rm "\$log_file"
   144  
   145  ### Metadata scale test ###
   146  echo "## Populating database for metadata scale test ##"
   147  tar -xvf scale_db1.tgz
   148  createdb metadatascaledb -T template0
   149  
   150  psql -f scale_db1.sql -d metadatascaledb -v client_min_messages=error -q
   151  
   152  echo "## Performing pg_dump with metadata-only ##"
   153  time pg_dump -s metadatascaledb > /data/gpdata/pg_dump.sql
   154  echo "## Performing gpbackup with metadata-only ##"
   155  time gpbackup --dbname metadatascaledb --backup-dir /data/gpdata/ --metadata-only --verbose | tee "\$log_file"
   156  
   157  timestamp=\$(head -10 "\$log_file" | grep "Backup Timestamp " | grep -Eo "[[:digit:]]{14}")
   158  echo "## Performing gprestore with metadata-only ##"
   159  time gprestore --timestamp "\$timestamp" --backup-dir /data/gpdata/ --redirect-db=metadatascaledb_res --jobs=4 --create-db
   160  
   161  SCRIPT
   162  
   163  chmod +x /tmp/run_tests.bash
   164  scp /tmp/run_tests.bash cdw:/home/gpadmin/run_tests.bash
   165  scp -r scale_schema/scale_db1.tgz cdw:/home/gpadmin/
   166  ssh -t cdw "/home/gpadmin/run_tests.bash"