github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/test/scripts/s3-mpt-large-files.sh (about)

     1  #!/bin/bash
     2  
     3  # Prerequisites: ########################################################################################
     4  # - aistore cluster
     5  # - s3cmd
     6  # - locally accessible (source) directory that MAY contain large files (see examples below)
     7  # - any aistore bucket
     8  #
     9  ## Examples:
    10  ## 1. use existing large files and run 8 iterations
    11  ##    s3-mpt-large-files.sh /tmp/largefiles s3://abc 8
    12  #
    13  ## 2. generate (the default number of) large files, run 16 iterations
    14  ##    s3-mpt-large-files.sh /tmp/largefiles s3://abc 16 true
    15  #
    16  ## 3. same as above w/ 10 generated large files
    17  ##    s3-mpt-large-files.sh /tmp/largefiles s3://abc 16 true 10
    18  #
    19  # #######################################################################################################
    20  
    21  # command line
    22  srcdir=$1
    23  bucket=$2
    24  
    25  # runtime defaults
    26  iterations=4
    27  generate="false"
    28  numfiles=3
    29  
    30  dstdir="mpt-$RANDOM"
    31  
    32  # NOTE: to have s3cmd working directly with ais:// bucket
    33  # overriding AWS defaults (incl. "--host=s3.amazonaws.com")
    34  s3endpoint="localhost:8080/s3"
    35  host="--host=$s3endpoint"
    36  host_bucket="--host-bucket=$s3endpoint/%(bucket)"
    37  
    38  if ! [ -x "$(command -v s3cmd)" ]; then
    39    echo "Error: s3cmd not installed" >&2
    40    exit 1
    41  fi
    42  
    43  if [ $# -le 1 ]; then
    44      echo "Usage: $0 DIR BUCKET [NUM-ITERATIONS [GENERATE [NUM-FILES]]], where:"
    45      echo "  DIR: source directory containing (large) files"
    46      echo "  BUCKET: destination aistore bucket, e.g., s3://abc (where 's3://abc' may, in fact, be 'ais://abc' etc.)"
    47      echo "and optionally:"
    48      echo "  ITERATIONS: number of iterations to run (default: 4)"
    49      echo "  GENERATE:   generate large files (default: false)"
    50      echo "  NUM-FILES:  number of large files to generate (default: 3)"
    51      exit 1
    52  fi
    53  
    54  ## command line
    55  if ! [ -d $srcdir ]; then
    56    echo "Error: directory '$srcdir' does not exist" >&2
    57    exit 1
    58  fi
    59  if ! [ -z "$3" ]; then
    60    iterations=$3
    61  fi
    62  if ! [ -z "$4" ]; then
    63    generate=$4
    64  fi
    65  if ! [ -z "$5" ]; then
    66    numfiles=$5
    67  fi
    68  
    69  ## uncomment for verbose output
    70  ## set -x
    71  
    72  s3cmd info $bucket $host $host_bucket --no-ssl 1> /dev/null | exit $?
    73  
    74  cleanup() {
    75    rc=$?
    76    if [[ "$generate" == "true" ]]; then
    77      rm -f $srcdir/mpt-*
    78    fi
    79    s3cmd del "$bucket/$dstdir/mpt*" $host $host_bucket --no-ssl 2> /dev/null
    80    exit $rc
    81  }
    82  
    83  trap cleanup EXIT INT TERM
    84  
    85  if [[ "$generate" == "true" ]]; then  ## generate
    86    echo "Generating large files ..."
    87    count=499
    88    for i in $(seq 1 1 $numfiles); do
    89      dd if=/dev/random of="$srcdir/mpt-$count" bs=4024k count=$count | exit $?
    90      count=`expr $count + 500`
    91    done
    92  fi
    93  
    94  files=`ls $srcdir`
    95  for i in $(seq 1 1 $iterations); do
    96    echo "Iteration #$i -------------------------------------------------------------------------"
    97    for f in $files; do
    98      filesize=$(stat --printf="%s" $srcdir/$f)
    99      mbs=$(echo "$filesize / 1000 / 1000" | bc)
   100      parts=$(shuf -i 2-100 -n 1)
   101      partsize=$(echo "$mbs / $parts" | bc)
   102  
   103      if [ $partsize -le 5 ]; then
   104        partsize=$(shuf -i 5-10 -n 1)
   105      fi
   106      cmd="s3cmd put $srcdir/$f $bucket/$dstdir/$f --multipart-chunk-size-mb=$partsize $host $host_bucket --no-ssl"
   107      echo "Running '$cmd' ..."
   108      $cmd || exit $?
   109    done
   110    # cleanup
   111    s3cmd ls $bucket/$dstdir/* $host $host_bucket --no-ssl || exit $?
   112    echo "Cleaning up ..."
   113    for f in $files; do
   114      s3cmd del $bucket/$dstdir/$f $host $host_bucket --no-ssl
   115    done
   116  done